From d2ab36bb115b720c9c738184d4007e1ca01c53da Mon Sep 17 00:00:00 2001 From: Erin Shepherd Date: Fri, 29 Nov 2024 14:38:00 +0100 Subject: [PATCH 1/6] pseudofs: add support for export_ops Pseudo-filesystems might reasonably wish to implement the export ops (particularly for name_to_handle_at/open_by_handle_at); plumb this through pseudo_fs_context Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Erin Shepherd Link: https://lore.kernel.org/r/20241113-pidfs_fh-v2-1-9a4d28155a37@e43.eu Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-1-87d803a42495@kernel.org Signed-off-by: Christian Brauner --- fs/libfs.c | 1 + include/linux/pseudo_fs.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index 748ac5923154..2890a9c4a414 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -673,6 +673,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) s->s_blocksize_bits = PAGE_SHIFT; s->s_magic = ctx->magic; s->s_op = ctx->ops ?: &simple_super_operations; + s->s_export_op = ctx->eops; s->s_xattr = ctx->xattr; s->s_time_gran = 1; root = new_inode(s); diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index 730f77381d55..2503f7625d65 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,6 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; + const struct export_operations *eops; const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; From f07c7cc4684a641032c6bd439d3b91ec336e8cb5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:38:01 +0100 Subject: [PATCH 2/6] fhandle: simplify error handling Rely on our cleanup infrastructure. Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-2-87d803a42495@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fhandle.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index ec9145047dfc..c00d88fb14e1 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -261,19 +261,20 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, { int handle_dwords; struct vfsmount *mnt = ctx->root.mnt; + struct dentry *dentry; /* change the handle size to multiple of sizeof(u32) */ handle_dwords = handle->handle_bytes >> 2; - path->dentry = exportfs_decode_fh_raw(mnt, - (struct fid *)handle->f_handle, - handle_dwords, handle->handle_type, - ctx->fh_flags, - vfs_dentry_acceptable, ctx); - if (IS_ERR_OR_NULL(path->dentry)) { - if (path->dentry == ERR_PTR(-ENOMEM)) + dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle, + handle_dwords, handle->handle_type, + ctx->fh_flags, vfs_dentry_acceptable, + ctx); + if (IS_ERR_OR_NULL(dentry)) { + if (dentry == ERR_PTR(-ENOMEM)) return -ENOMEM; return -ESTALE; } + path->dentry = dentry; path->mnt = mntget(mnt); return 0; } @@ -398,29 +399,23 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag) { long retval = 0; - struct path path; + struct path path __free(path_put) = {}; struct file *file; - int fd; retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) return retval; - fd = get_unused_fd_flags(open_flag); - if (fd < 0) { - path_put(&path); + CLASS(get_unused_fd, fd)(O_CLOEXEC); + if (fd < 0) return fd; - } + file = file_open_root(&path, "", open_flag, 0); - if (IS_ERR(file)) { - put_unused_fd(fd); - retval = PTR_ERR(file); - } else { - retval = fd; - fd_install(fd, file); - } - path_put(&path); - return retval; + if (IS_ERR(file)) + return PTR_ERR(file); + + fd_install(fd, file); + return take_fd(fd); } /** From 50166d57ea8c5042ecba0ee22532617d72ed085a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:38:02 +0100 Subject: [PATCH 3/6] exportfs: add open method This allows filesystems such as pidfs to provide their custom open. Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-3-87d803a42495@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fhandle.c | 7 ++++++- include/linux/exportfs.h | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index c00d88fb14e1..f0b818f08aaa 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -401,6 +401,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, long retval = 0; struct path path __free(path_put) = {}; struct file *file; + const struct export_operations *eops; retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) @@ -410,7 +411,11 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, if (fd < 0) return fd; - file = file_open_root(&path, "", open_flag, 0); + eops = path.mnt->mnt_sb->s_export_op; + if (eops->open) + file = eops->open(&path, open_flag); + else + file = file_open_root(&path, "", open_flag, 0); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 4cc8801e50e3..c69b79b64466 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -10,6 +10,7 @@ struct inode; struct iomap; struct super_block; struct vfsmount; +struct path; /* limit the handle size to NFSv4 handle size now */ #define MAX_HANDLE_SZ 128 @@ -225,6 +226,9 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * open: + * Allow filesystems to specify a custom open function. + * * commit_metadata: * @commit_metadata should commit metadata changes to stable storage. * @@ -251,6 +255,7 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + struct file * (*open)(struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ From 6ebb05b48e9c555f23a042dcbb45280a0f26def8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:38:03 +0100 Subject: [PATCH 4/6] fhandle: pull CAP_DAC_READ_SEARCH check into may_decode_fh() There's no point in keeping it outside of that helper. This way we have all the permission pieces in one place. Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-4-87d803a42495@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fhandle.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index f0b818f08aaa..e17029b1dc44 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -279,28 +279,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, return 0; } -/* - * Allow relaxed permissions of file handles if the caller has the - * ability to mount the filesystem or create a bind-mount of the - * provided @mountdirfd. - * - * In both cases the caller may be able to get an unobstructed way to - * the encoded file handle. If the caller is only able to create a - * bind-mount we need to verify that there are no locked mounts on top - * of it that could prevent us from getting to the encoded file. - * - * In principle, locked mounts can prevent the caller from mounting the - * filesystem but that only applies to procfs and sysfs neither of which - * support decoding file handles. - */ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, unsigned int o_flags) { struct path *root = &ctx->root; + if (capable(CAP_DAC_READ_SEARCH)) + return true; + /* - * Restrict to O_DIRECTORY to provide a deterministic API that avoids a - * confusing api in the face of disconnected non-dir dentries. + * Allow relaxed permissions of file handles if the caller has + * the ability to mount the filesystem or create a bind-mount of + * the provided @mountdirfd. + * + * In both cases the caller may be able to get an unobstructed + * way to the encoded file handle. If the caller is only able to + * create a bind-mount we need to verify that there are no + * locked mounts on top of it that could prevent us from getting + * to the encoded file. + * + * In principle, locked mounts can prevent the caller from + * mounting the filesystem but that only applies to procfs and + * sysfs neither of which support decoding file handles. + * + * Restrict to O_DIRECTORY to provide a deterministic API that + * avoids a confusing api in the face of disconnected non-dir + * dentries. * * There's only one dentry for each directory inode (VFS rule)... */ @@ -337,7 +341,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, if (retval) goto out_err; - if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) { + if (!may_decode_fh(&ctx, o_flags)) { retval = -EPERM; goto out_path; } From c220e216d6bcd52cc7333e38edf43dc66ba0dd13 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:38:04 +0100 Subject: [PATCH 5/6] exportfs: add permission method This allows filesystems such as pidfs to provide their custom permission checks. Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-5-87d803a42495@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fhandle.c | 35 ++++++++++++++--------------------- include/linux/exportfs.h | 17 ++++++++++++++++- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index e17029b1dc44..3e092ae6d142 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -187,17 +187,6 @@ static int get_path_from_fd(int fd, struct path *root) return 0; } -enum handle_to_path_flags { - HANDLE_CHECK_PERMS = (1 << 0), - HANDLE_CHECK_SUBTREE = (1 << 1), -}; - -struct handle_to_path_ctx { - struct path root; - enum handle_to_path_flags flags; - unsigned int fh_flags; -}; - static int vfs_dentry_acceptable(void *context, struct dentry *dentry) { struct handle_to_path_ctx *ctx = context; @@ -279,13 +268,13 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, return 0; } -static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, - unsigned int o_flags) +static inline int may_decode_fh(struct handle_to_path_ctx *ctx, + unsigned int o_flags) { struct path *root = &ctx->root; if (capable(CAP_DAC_READ_SEARCH)) - return true; + return 0; /* * Allow relaxed permissions of file handles if the caller has @@ -309,7 +298,7 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, * There's only one dentry for each directory inode (VFS rule)... */ if (!(o_flags & O_DIRECTORY)) - return false; + return -EPERM; if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) ctx->flags = HANDLE_CHECK_PERMS; @@ -319,14 +308,14 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, !has_locked_children(real_mount(root->mnt), root->dentry)) ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; else - return false; + return -EPERM; /* Are we able to override DAC permissions? */ if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) - return false; + return -EPERM; ctx->fh_flags = EXPORT_FH_DIR_ONLY; - return true; + return 0; } static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, @@ -336,15 +325,19 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, struct file_handle f_handle; struct file_handle *handle = NULL; struct handle_to_path_ctx ctx = {}; + const struct export_operations *eops; retval = get_path_from_fd(mountdirfd, &ctx.root); if (retval) goto out_err; - if (!may_decode_fh(&ctx, o_flags)) { - retval = -EPERM; + eops = ctx.root.mnt->mnt_sb->s_export_op; + if (eops && eops->permission) + retval = eops->permission(&ctx, o_flags); + else + retval = may_decode_fh(&ctx, o_flags); + if (retval) goto out_path; - } if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { retval = -EFAULT; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index c69b79b64466..a087606ace19 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -3,6 +3,7 @@ #define LINUX_EXPORTFS_H 1 #include +#include struct dentry; struct iattr; @@ -10,7 +11,6 @@ struct inode; struct iomap; struct super_block; struct vfsmount; -struct path; /* limit the handle size to NFSv4 handle size now */ #define MAX_HANDLE_SZ 128 @@ -157,6 +157,17 @@ struct fid { }; }; +enum handle_to_path_flags { + HANDLE_CHECK_PERMS = (1 << 0), + HANDLE_CHECK_SUBTREE = (1 << 1), +}; + +struct handle_to_path_ctx { + struct path root; + enum handle_to_path_flags flags; + unsigned int fh_flags; +}; + #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ #define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ @@ -226,6 +237,9 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * permission: + * Allow filesystems to specify a custom permission function. + * * open: * Allow filesystems to specify a custom open function. * @@ -255,6 +269,7 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); struct file * (*open)(struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ From b3caba8f7a34a2bbaf45ffc6ff3a49b70afeb192 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:38:05 +0100 Subject: [PATCH 6/6] pidfs: implement file handle support On 64-bit platforms, userspace can read the pidfd's inode in order to get a never-repeated PID identifier. On 32-bit platforms this identifier is not exposed, as inodes are limited to 32 bits. Instead expose the identifier via export_fh, which makes it available to userspace via name_to_handle_at. In addition we implement fh_to_dentry, which allows userspace to recover a pidfd from a pidfs file handle. Signed-off-by: Erin Shepherd [brauner: patch heavily rewritten] Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-6-87d803a42495@kernel.org Reviewed-by: Amir Goldstein Co-Developed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/pidfs.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/fs/pidfs.c b/fs/pidfs.c index 8d62d900d20d..cc773875e9e4 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -473,6 +474,118 @@ static const struct dentry_operations pidfs_dentry_operations = { .d_prune = stashed_dentry_prune, }; +static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) +{ + const struct pid *pid = inode->i_private; + + if (*max_len < 2) { + *max_len = 2; + return FILEID_INVALID; + } + + *max_len = 2; + *(u64 *)fh = pid->ino; + return FILEID_KERNFS; +} + +/* Find a struct pid based on the inode number. */ +static struct pid *pidfs_ino_get_pid(u64 ino) +{ + unsigned long pid_ino = pidfs_ino(ino); + u32 gen = pidfs_gen(ino); + struct pid *pid; + + guard(rcu)(); + + pid = idr_find(&pidfs_ino_idr, lower_32_bits(pid_ino)); + if (!pid) + return NULL; + + if (pidfs_ino(pid->ino) != pid_ino) + return NULL; + + if (pidfs_gen(pid->ino) != gen) + return NULL; + + /* Within our pid namespace hierarchy? */ + if (pid_vnr(pid) == 0) + return NULL; + + return get_pid(pid); +} + +static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, + int fh_type) +{ + int ret; + u64 pid_ino; + struct path path; + struct pid *pid; + + if (fh_len < 2) + return NULL; + + switch (fh_type) { + case FILEID_KERNFS: + pid_ino = *(u64 *)fid; + break; + default: + return NULL; + } + + pid = pidfs_ino_get_pid(pid_ino); + if (!pid) + return NULL; + + ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); + if (ret < 0) + return ERR_PTR(ret); + + mntput(path.mnt); + return path.dentry; +} + +/* + * Make sure that we reject any nonsensical flags that users pass via + * open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and + * PIDFD_NONBLOCK as O_NONBLOCK. + */ +#define VALID_FILE_HANDLE_OPEN_FLAGS \ + (O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL) + +static int pidfs_export_permission(struct handle_to_path_ctx *ctx, + unsigned int oflags) +{ + if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE)) + return -EINVAL; + + /* + * pidfd_ino_get_pid() will verify that the struct pid is part + * of the caller's pid namespace hierarchy. No further + * permission checks are needed. + */ + return 0; +} + +static struct file *pidfs_export_open(struct path *path, unsigned int oflags) +{ + /* + * Clear O_LARGEFILE as open_by_handle_at() forces it and raise + * O_RDWR as pidfds always are. + */ + oflags &= ~O_LARGEFILE; + return dentry_open(path, oflags | O_RDWR, current_cred()); +} + +static const struct export_operations pidfs_export_operations = { + .encode_fh = pidfs_encode_fh, + .fh_to_dentry = pidfs_fh_to_dentry, + .open = pidfs_export_open, + .permission = pidfs_export_permission, +}; + static int pidfs_init_inode(struct inode *inode, void *data) { const struct pid *pid = data; @@ -507,6 +620,7 @@ static int pidfs_init_fs_context(struct fs_context *fc) return -ENOMEM; ctx->ops = &pidfs_sops; + ctx->eops = &pidfs_export_operations; ctx->dops = &pidfs_dentry_operations; fc->s_fs_info = (void *)&pidfs_stashed_ops; return 0;