diff --git a/MAINTAINERS b/MAINTAINERS index ce46c0a552..c7717df720 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1613,6 +1613,7 @@ S: Supported F: tools/virtiofsd/* F: hw/virtio/vhost-user-fs* F: include/hw/virtio/vhost-user-fs.h +F: docs/interop/virtiofsd.rst virtio-input M: Gerd Hoffmann diff --git a/Makefile b/Makefile index 461d40bea6..f0e1a2fc1d 100644 --- a/Makefile +++ b/Makefile @@ -348,6 +348,9 @@ DOCS=qemu-doc.html qemu-doc.txt qemu.1 DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-img.1 DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-nbd.8 DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-ga.8 +ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) +DOCS+=$(MANUAL_BUILDDIR)/interop/virtiofsd.1 +endif DOCS+=$(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7 DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7 @@ -861,6 +864,9 @@ ifdef CONFIG_VIRTFS $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/virtfs-proxy-helper.1 "$(DESTDIR)$(mandir)/man1" endif +ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) + $(INSTALL_DATA) docs/interop/virtiofsd.1 "$(DESTDIR)$(mandir)/man1" +endif install-datadir: $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)" @@ -1051,7 +1057,8 @@ $(MANUAL_BUILDDIR)/system/index.html: $(call manual-deps,system) $(call build-manual,system,html) $(call define-manpage-rule,interop,\ - qemu-ga.8 qemu-img.1 qemu-nbd.8 qemu-trace-stap.1 virtfs-proxy-helper.1,\ + qemu-ga.8 qemu-img.1 qemu-nbd.8 qemu-trace-stap.1\ + virtiofsd.1 virtfs-proxy-helper.1,\ $(SRC_PATH/qemu-img-cmds.hx)) $(call define-manpage-rule,system,qemu-block-drivers.7) diff --git a/docs/interop/conf.py b/docs/interop/conf.py index b0f322207c..b3cda17042 100644 --- a/docs/interop/conf.py +++ b/docs/interop/conf.py @@ -27,5 +27,8 @@ man_pages = [ [], 1), ('virtfs-proxy-helper', 'virtfs-proxy-helper', u'QEMU 9p virtfs proxy filesystem helper', - ['M. Mohan Kumar'], 1) + ['M. Mohan Kumar'], 1), + ('virtiofsd', 'virtiofsd', u'QEMU virtio-fs shared file system daemon', + ['Stefan Hajnoczi ', + 'Masayoshi Mizuma '], 1), ] diff --git a/docs/interop/index.rst b/docs/interop/index.rst index 3b763b1eeb..e8455b4270 100644 --- a/docs/interop/index.rst +++ b/docs/interop/index.rst @@ -24,3 +24,4 @@ Contents: vhost-user vhost-user-gpu virtfs-proxy-helper + virtiofsd diff --git a/docs/interop/virtiofsd.rst b/docs/interop/virtiofsd.rst new file mode 100644 index 0000000000..378594c422 --- /dev/null +++ b/docs/interop/virtiofsd.rst @@ -0,0 +1,120 @@ +QEMU virtio-fs shared file system daemon +======================================== + +Synopsis +-------- + +**virtiofsd** [*OPTIONS*] + +Description +----------- + +Share a host directory tree with a guest through a virtio-fs device. This +program is a vhost-user backend that implements the virtio-fs device. Each +virtio-fs device instance requires its own virtiofsd process. + +This program is designed to work with QEMU's ``--device vhost-user-fs-pci`` +but should work with any virtual machine monitor (VMM) that supports +vhost-user. See the Examples section below. + +This program must be run as the root user. Upon startup the program will +switch into a new file system namespace with the shared directory tree as its +root. This prevents "file system escapes" due to symlinks and other file +system objects that might lead to files outside the shared directory. The +program also sandboxes itself using seccomp(2) to prevent ptrace(2) and other +vectors that could allow an attacker to compromise the system after gaining +control of the virtiofsd process. + +Options +------- + +.. program:: virtiofsd + +.. option:: -h, --help + + Print help. + +.. option:: -V, --version + + Print version. + +.. option:: -d + + Enable debug output. + +.. option:: --syslog + + Print log messages to syslog instead of stderr. + +.. option:: -o OPTION + + * debug - + Enable debug output. + + * flock|no_flock - + Enable/disable flock. The default is ``no_flock``. + + * log_level=LEVEL - + Print only log messages matching LEVEL or more severe. LEVEL is one of + ``err``, ``warn``, ``info``, or ``debug``. The default is ``info``. + + * norace - + Disable racy fallback. The default is false. + + * posix_lock|no_posix_lock - + Enable/disable remote POSIX locks. The default is ``posix_lock``. + + * readdirplus|no_readdirplus - + Enable/disable readdirplus. The default is ``readdirplus``. + + * source=PATH - + Share host directory tree located at PATH. This option is required. + + * timeout=TIMEOUT - + I/O timeout in seconds. The default depends on cache= option. + + * writeback|no_writeback - + Enable/disable writeback cache. The cache alows the FUSE client to buffer + and merge write requests. The default is ``no_writeback``. + + * xattr|no_xattr - + Enable/disable extended attributes (xattr) on files and directories. The + default is ``no_xattr``. + +.. option:: --socket-path=PATH + + Listen on vhost-user UNIX domain socket at PATH. + +.. option:: --fd=FDNUM + + Accept connections from vhost-user UNIX domain socket file descriptor FDNUM. + The file descriptor must already be listening for connections. + +.. option:: --thread-pool-size=NUM + + Restrict the number of worker threads per request queue to NUM. The default + is 64. + +.. option:: --cache=none|auto|always + + Select the desired trade-off between coherency and performance. ``none`` + forbids the FUSE client from caching to achieve best coherency at the cost of + performance. ``auto`` acts similar to NFS with a 1 second metadata cache + timeout. ``always`` sets a long cache lifetime at the expense of coherency. + +Examples +-------- + +Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket +``/var/run/vm001-vhost-fs.sock``: + +:: + + host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 + host# qemu-system-x86_64 \ + -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \ + -device vhost-user-fs-pci,chardev=char0,tag=myfs \ + -object memory-backend-memfd,id=mem,size=4G,share=on \ + -numa node,memdev=mem \ + ... + guest# mount -t virtiofs myfs /mnt diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h index 7a4c713559..aba13fef2d 100644 --- a/tools/virtiofsd/fuse.h +++ b/tools/virtiofsd/fuse.h @@ -1006,26 +1006,6 @@ void fuse_exit(struct fuse *f); */ struct fuse_context *fuse_get_context(void); -/** - * Get the current supplementary group IDs for the current request - * - * Similar to the getgroups(2) system call, except the return value is - * always the total number of group IDs, even if it is larger than the - * specified size. - * - * The current fuse kernel module in linux (as of 2.6.30) doesn't pass - * the group list to userspace, hence this function needs to parse - * "/proc/$TID/task/$TID/status" to get the group IDs. - * - * This feature may not be supported on all operating systems. In - * such a case this function will return -ENOSYS. - * - * @param size size of given array - * @param list array of group IDs to be filled in - * @return the total number of supplementary group IDs or -errno on failure - */ -int fuse_getgroups(int size, gid_t list[]); - /** * Check if the current request has already been interrupted * diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index de2e2e0c65..704c0369b2 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -1116,6 +1116,10 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, struct fuse_file_info fi; arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); + if (!arg) { + fuse_reply_err(req, EINVAL); + return; + } memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; @@ -2667,83 +2671,6 @@ int fuse_lowlevel_is_virtio(struct fuse_session *se) return !!se->virtio_dev; } -#ifdef linux -int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -{ - char *buf; - size_t bufsize = 1024; - char path[128]; - int ret; - int fd; - unsigned long pid = req->ctx.pid; - char *s; - - sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); - -retry: - buf = malloc(bufsize); - if (buf == NULL) { - return -ENOMEM; - } - - ret = -EIO; - fd = open(path, O_RDONLY); - if (fd == -1) { - goto out_free; - } - - ret = read(fd, buf, bufsize); - close(fd); - if (ret < 0) { - ret = -EIO; - goto out_free; - } - - if ((size_t)ret == bufsize) { - free(buf); - bufsize *= 4; - goto retry; - } - - ret = -EIO; - s = strstr(buf, "\nGroups:"); - if (s == NULL) { - goto out_free; - } - - s += 8; - ret = 0; - while (1) { - char *end; - unsigned long val = strtoul(s, &end, 0); - if (end == s) { - break; - } - - s = end; - if (ret < size) { - list[ret] = val; - } - ret++; - } - -out_free: - free(buf); - return ret; -} -#else /* linux */ -/* - * This is currently not implemented on other than Linux... - */ -int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -{ - (void)req; - (void)size; - (void)list; - return -ENOSYS; -} -#endif - void fuse_session_exit(struct fuse_session *se) { se->exited = 1; diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h index 138041e5f1..8f6d705b5c 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -1704,27 +1704,6 @@ void *fuse_req_userdata(fuse_req_t req); */ const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); -/** - * Get the current supplementary group IDs for the specified request - * - * Similar to the getgroups(2) system call, except the return value is - * always the total number of group IDs, even if it is larger than the - * specified size. - * - * The current fuse kernel module in linux (as of 2.6.30) doesn't pass - * the group list to userspace, hence this function needs to parse - * "/proc/$TID/task/$TID/status" to get the group IDs. - * - * This feature may not be supported on all operating systems. In - * such a case this function will return -ENOSYS. - * - * @param req request handle - * @param size size of given array - * @param list array of group IDs to be filled in - * @return the total number of supplementary group IDs or -errno on failure - */ -int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); - /** * Callback function for an interrupt * diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 80a6e929df..dd1c605dbf 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -916,6 +916,7 @@ static int fv_create_listen_socket(struct fuse_session *se) old_umask = umask(0077); if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); + close(listen_sock); umask(old_umask); return -1; } @@ -923,6 +924,7 @@ static int fv_create_listen_socket(struct fuse_session *se) if (listen(listen_sock, 1) == -1) { fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); + close(listen_sock); return -1; } diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index e6f2399efc..c635fc8820 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -232,6 +232,7 @@ static int load_capng(void) */ cap.saved = capng_save_state(); if (!cap.saved) { + pthread_mutex_unlock(&cap.mutex); fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); return -EINVAL; }