From bdfd66788349acc43cd3f1298718ad491663cfcc Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Thu, 27 Feb 2020 14:59:27 +0900 Subject: [PATCH] virtiofsd: Fix xattr operations Current virtiofsd has problems about xattr operations and they does not work properly for directory/symlink/special file. The fundamental cause is that virtiofsd uses openat() + f...xattr() systemcalls for xattr operation but we should not open symlink/special file in the daemon. Therefore the function is restricted. Fix this problem by: 1. during setup of each thread, call unshare(CLONE_FS) 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular file or directory, use fchdir(proc_loot_fd) + ...xattr() + fchdir(root.fd) instead of openat() + f...xattr() (Note: for a regular file/directory openat() + f...xattr() is still used for performance reason) With this patch, xfstests generic/062 passes on virtiofs. This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. The original discussion can be found here: https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html Signed-off-by: Misono Tomohiro Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> Acked-by: Vivek Goyal Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 13 ++++ tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++-------------- tools/virtiofsd/seccomp.c | 6 ++ 3 files changed, 77 insertions(+), 47 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index dd1c605dbf..3b6d16a041 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -426,6 +426,8 @@ err: return ret; } +static __thread bool clone_fs_called; + /* Process one FVRequest in a thread pool */ static void fv_queue_worker(gpointer data, gpointer user_data) { @@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data) assert(se->bufsize > sizeof(struct fuse_in_header)); + if (!clone_fs_called) { + int ret; + + /* unshare FS for xattr operation */ + ret = unshare(CLONE_FS); + /* should not fail */ + assert(ret == 0); + + clone_fs_called = true; + } + /* * An element contains one request and the space to send our response * They're spread over multiple descriptors in a scatter/gather set diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 4690a97947..4f259aac70 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -123,7 +123,7 @@ struct lo_inode { pthread_mutex_t plock_mutex; GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ - bool is_symlink; + mode_t filetype; }; struct lo_cred { @@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, struct lo_inode *parent; char path[PATH_MAX]; - if (inode->is_symlink) { + if (S_ISLNK(inode->filetype)) { res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); if (res == -1 && errno == EINVAL) { /* Sorry, no race free way to set times on symlink. */ @@ -928,7 +928,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, goto out_err; } - inode->is_symlink = S_ISLNK(e->attr.st_mode); + /* cache only filetype */ + inode->filetype = (e->attr.st_mode & S_IFMT); /* * One for the caller and one for nlookup (released in @@ -1135,7 +1136,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, struct lo_inode *parent; char path[PATH_MAX]; - if (inode->is_symlink) { + if (S_ISLNK(inode->filetype)) { res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); if (res == -1 && (errno == ENOENT || errno == EINVAL)) { /* Sorry, no race free way to hard-link a symlink. */ @@ -2189,12 +2190,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, size); - if (inode->is_symlink) { - /* Sorry, no race free way to getxattr on symlink. */ - saverr = EPERM; - goto out; - } - if (size) { value = malloc(size); if (!value) { @@ -2203,12 +2198,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, } sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; + /* + * It is not safe to open() non-regular/non-dir files in file server + * unless O_PATH is used, so use that method for regular files/dir + * only (as it seems giving less performance overhead). + * Otherwise, call fchdir() to avoid open(). + */ + if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ret = fgetxattr(fd, name, value, size); + } else { + /* fchdir should not fail here */ + assert(fchdir(lo->proc_self_fd) == 0); + ret = getxattr(procname, name, value, size); + assert(fchdir(lo->root.fd) == 0); } - ret = fgetxattr(fd, name, value, size); if (ret == -1) { goto out_err; } @@ -2262,12 +2270,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, size); - if (inode->is_symlink) { - /* Sorry, no race free way to listxattr on symlink. */ - saverr = EPERM; - goto out; - } - if (size) { value = malloc(size); if (!value) { @@ -2276,12 +2278,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) } sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; + if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ret = flistxattr(fd, value, size); + } else { + /* fchdir should not fail here */ + assert(fchdir(lo->proc_self_fd) == 0); + ret = listxattr(procname, value, size); + assert(fchdir(lo->root.fd) == 0); } - ret = flistxattr(fd, value, size); if (ret == -1) { goto out_err; } @@ -2335,20 +2344,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", ino, name, value, size); - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ - saverr = EPERM; - goto out; - } - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDWR); - if (fd < 0) { - saverr = errno; - goto out; + if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + saverr = errno; + goto out; + } + ret = fsetxattr(fd, name, value, size, flags); + } else { + /* fchdir should not fail here */ + assert(fchdir(lo->proc_self_fd) == 0); + ret = setxattr(procname, name, value, size, flags); + assert(fchdir(lo->root.fd) == 0); } - ret = fsetxattr(fd, name, value, size, flags); saverr = ret == -1 ? errno : 0; out: @@ -2383,20 +2393,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, name); - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ - saverr = EPERM; - goto out; - } - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDWR); - if (fd < 0) { - saverr = errno; - goto out; + if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + saverr = errno; + goto out; + } + ret = fremovexattr(fd, name); + } else { + /* fchdir should not fail here */ + assert(fchdir(lo->proc_self_fd) == 0); + ret = removexattr(procname, name); + assert(fchdir(lo->root.fd) == 0); } - ret = fremovexattr(fd, name); saverr = ret == -1 ? errno : 0; out: @@ -2796,7 +2807,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) exit(1); } - root->is_symlink = false; + root->filetype = S_IFDIR; root->fd = fd; root->key.ino = stat.st_ino; root->key.dev = stat.st_dev; diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c index 2d9d4a7ec0..bd9e7b083c 100644 --- a/tools/virtiofsd/seccomp.c +++ b/tools/virtiofsd/seccomp.c @@ -41,6 +41,7 @@ static const int syscall_whitelist[] = { SCMP_SYS(exit), SCMP_SYS(exit_group), SCMP_SYS(fallocate), + SCMP_SYS(fchdir), SCMP_SYS(fchmodat), SCMP_SYS(fchownat), SCMP_SYS(fcntl), @@ -62,7 +63,9 @@ static const int syscall_whitelist[] = { SCMP_SYS(getpid), SCMP_SYS(gettid), SCMP_SYS(gettimeofday), + SCMP_SYS(getxattr), SCMP_SYS(linkat), + SCMP_SYS(listxattr), SCMP_SYS(lseek), SCMP_SYS(madvise), SCMP_SYS(mkdirat), @@ -85,6 +88,7 @@ static const int syscall_whitelist[] = { SCMP_SYS(recvmsg), SCMP_SYS(renameat), SCMP_SYS(renameat2), + SCMP_SYS(removexattr), SCMP_SYS(rt_sigaction), SCMP_SYS(rt_sigprocmask), SCMP_SYS(rt_sigreturn), @@ -98,10 +102,12 @@ static const int syscall_whitelist[] = { SCMP_SYS(setresuid32), #endif SCMP_SYS(set_robust_list), + SCMP_SYS(setxattr), SCMP_SYS(symlinkat), SCMP_SYS(time), /* Rarely needed, except on static builds */ SCMP_SYS(tgkill), SCMP_SYS(unlinkat), + SCMP_SYS(unshare), SCMP_SYS(utimensat), SCMP_SYS(write), SCMP_SYS(writev),