virtiofsd pull 2020-10-26

Misono
    Set default log level to info
    Explicit build option for virtiofsd
 
 Me
    xattr name mapping
 
 Stefan
   Alternative chroot sandbox method
 
 Max
   Submount mechanism
 
 Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAl+XGGAACgkQBRYzHrxb
 /efeDw/8Dz+yKjdV0mqzdOJ/2lg4etlDRqG7P65W9G79ZIthAOZVIH5p3yv+qzdN
 a+QTGWquCA/gCCfl29QHU2zC78PcBjP/ugm6icqW126MmJmHR/rOZx+RYC+0W4+0
 2YA2HzwtKpolrMuzKoddHwsYoIF2Uw6l1oZK1QOl1hjqL2q47VRDvCs6H7vpvGn1
 dGd+xkXMSYCVL4Lq+zAaIg6ZfTtCIlwJ+LMCoT/Wy7eZeB338T9Bz5iLl7BTqF2x
 GBv2eDw0xibYw+3d8zX9k76irKdLYPgJiaskjsGNWxLgtSYEOmCrDPzIMDbe34lS
 u4JlqRdmc62YoE5X1oI6tF8XSaD+O/PS1CT9O9IttDuHNVctg0zCqjTxONJn9IQk
 CmszvoGScnaH4PqWueR47wDjKdFq8p5nryODtmuYILjvAXPYJp0Vt6JDJ6ZZcS+t
 YwRYltCK7ToKiteTuosusQ/Vzk2kq4U6znWZsZH1LcyNEVaJNUIoIjYZrxKugG/F
 yuAeWinoG37N6gwx5GfUoIO/eRd8UyBmKEaeY7RpJfo+UnFpErg4+uEfZ5gbD1/0
 YtQmBXHrXnsZB//wTw0gUob0sDKoII21H5EcA4QiDpgci9Q5saL/XG55TDLNZ62d
 uUH7PbIna8KpoKRBjEzz5e71FBTF0sKshzrZFHjfmhZ9HiM6XeE=
 =dTCm
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20201026' into staging

virtiofsd pull 2020-10-26

Misono
   Set default log level to info
   Explicit build option for virtiofsd

Me
   xattr name mapping

Stefan
  Alternative chroot sandbox method

Max
  Submount mechanism

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Mon 26 Oct 2020 18:41:36 GMT
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert-gitlab/tags/pull-virtiofs-20201026:
  tests/acceptance: Add virtiofs_submounts.py
  tests/acceptance/boot_linux: Accept SSH pubkey
  virtiofsd: Announce sub-mount points
  virtiofsd: Store every lo_inode's parent_dev
  virtiofsd: Add fuse_reply_attr_with_flags()
  virtiofsd: Add attr_flags to fuse_entry_param
  virtiofsd: Announce FUSE_ATTR_FLAGS
  linux/fuse.h: Pull in from Linux
  tools/virtiofsd: xattr name mappings: Simple 'map'
  tools/virtiofsd: xattr name mapping examples
  tools/virtiofsd: xattr name mappings: Map server xattr names
  tools/virtiofsd: xattr name mappings: Map client xattr names
  tools/virtiofsd: xattr name mappings: Add option
  virtiofsd: add container-friendly -o sandbox=chroot option
  virtiofsd: passthrough_ll: set FUSE_LOG_INFO as default log_level
  configure: add option for virtiofsd

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-10-27 14:29:52 +00:00
commit 725ca3313a
17 changed files with 1528 additions and 40 deletions

8
configure vendored
View File

@ -302,6 +302,7 @@ fdt="auto"
netmap="no"
sdl="auto"
sdl_image="auto"
virtiofsd="auto"
virtfs=""
libudev="auto"
mpath="auto"
@ -999,6 +1000,10 @@ for opt do
;;
--enable-libudev) libudev="enabled"
;;
--disable-virtiofsd) virtiofsd="disabled"
;;
--enable-virtiofsd) virtiofsd="enabled"
;;
--disable-mpath) mpath="disabled"
;;
--enable-mpath) mpath="enabled"
@ -1758,6 +1763,7 @@ disabled with --disable-FEATURE, default is enabled if available:
vnc-png PNG compression for VNC server
cocoa Cocoa UI (Mac OS X only)
virtfs VirtFS
virtiofsd build virtiofs daemon (virtiofsd)
libudev Use libudev to enumerate host devices
mpath Multipath persistent reservation passthrough
xen xen backend driver support
@ -6972,7 +6978,7 @@ NINJA=$ninja $meson setup \
-Dxen=$xen -Dxen_pci_passthrough=$xen_pci_passthrough -Dtcg=$tcg \
-Dcocoa=$cocoa -Dmpath=$mpath -Dsdl=$sdl -Dsdl_image=$sdl_image \
-Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg -Dvnc_png=$vnc_png \
-Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f \
-Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f -Dvirtiofsd=$virtiofsd \
-Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt \
-Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
-Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \

View File

@ -17,13 +17,24 @@ This program is designed to work with QEMU's ``--device vhost-user-fs-pci``
but should work with any virtual machine monitor (VMM) that supports
vhost-user. See the Examples section below.
This program must be run as the root user. Upon startup the program will
switch into a new file system namespace with the shared directory tree as its
root. This prevents "file system escapes" due to symlinks and other file
system objects that might lead to files outside the shared directory. The
program also sandboxes itself using seccomp(2) to prevent ptrace(2) and other
vectors that could allow an attacker to compromise the system after gaining
control of the virtiofsd process.
This program must be run as the root user. The program drops privileges where
possible during startup although it must be able to create and access files
with any uid/gid:
* The ability to invoke syscalls is limited using seccomp(2).
* Linux capabilities(7) are dropped.
In "namespace" sandbox mode the program switches into a new file system
namespace and invokes pivot_root(2) to make the shared directory tree its root.
A new pid and net namespace is also created to isolate the process.
In "chroot" sandbox mode the program invokes chroot(2) to make the shared
directory tree its root. This mode is intended for container environments where
the container runtime has already set up the namespaces and the program does
not have permission to create namespaces itself.
Both sandbox modes prevent "file system escapes" due to symlinks and other file
system objects that might lead to files outside the shared directory.
Options
-------
@ -69,6 +80,13 @@ Options
* readdirplus|no_readdirplus -
Enable/disable readdirplus. The default is ``readdirplus``.
* sandbox=namespace|chroot -
Sandbox mode:
- namespace: Create mount, pid, and net namespaces and pivot_root(2) into
the shared directory.
- chroot: chroot(2) into shared directory (use in containers).
The default is "namespace".
* source=PATH -
Share host directory tree located at PATH. This option is required.
@ -109,6 +127,167 @@ Options
timeout. ``always`` sets a long cache lifetime at the expense of coherency.
The default is ``auto``.
xattr-mapping
-------------
By default the name of xattr's used by the client are passed through to the server
file system. This can be a problem where either those xattr names are used
by something on the server (e.g. selinux client/server confusion) or if the
virtiofsd is running in a container with restricted privileges where it cannot
access some attributes.
A mapping of xattr names can be made using -o xattrmap=mapping where the ``mapping``
string consists of a series of rules.
The first matching rule terminates the mapping.
The set of rules must include a terminating rule to match any remaining attributes
at the end.
Each rule consists of a number of fields separated with a separator that is the
first non-white space character in the rule. This separator must then be used
for the whole rule.
White space may be added before and after each rule.
Using ':' as the separator a rule is of the form:
``:type:scope:key:prepend:``
**scope** is:
- 'client' - match 'key' against a xattr name from the client for
setxattr/getxattr/removexattr
- 'server' - match 'prepend' against a xattr name from the server
for listxattr
- 'all' - can be used to make a single rule where both the server
and client matches are triggered.
**type** is one of:
- 'prefix' - is designed to prepend and strip a prefix; the modified
attributes then being passed on to the client/server.
- 'ok' - Causes the rule set to be terminated when a match is found
while allowing matching xattr's through unchanged.
It is intended both as a way of explicitly terminating
the list of rules, and to allow some xattr's to skip following rules.
- 'bad' - If a client tries to use a name matching 'key' it's
denied using EPERM; when the server passes an attribute
name matching 'prepend' it's hidden. In many ways it's use is very like
'ok' as either an explict terminator or for special handling of certain
patterns.
**key** is a string tested as a prefix on an attribute name originating
on the client. It maybe empty in which case a 'client' rule
will always match on client names.
**prepend** is a string tested as a prefix on an attribute name originating
on the server, and used as a new prefix. It may be empty
in which case a 'server' rule will always match on all names from
the server.
e.g.:
``:prefix:client:trusted.:user.virtiofs.:``
will match 'trusted.' attributes in client calls and prefix them before
passing them to the server.
``:prefix:server::user.virtiofs.:``
will strip 'user.virtiofs.' from all server replies.
``:prefix:all:trusted.:user.virtiofs.:``
combines the previous two cases into a single rule.
``:ok:client:user.::``
will allow get/set xattr for 'user.' xattr's and ignore
following rules.
``:ok:server::security.:``
will pass 'securty.' xattr's in listxattr from the server
and ignore following rules.
``:ok:all:::``
will terminate the rule search passing any remaining attributes
in both directions.
``:bad:server::security.:``
would hide 'security.' xattr's in listxattr from the server.
A simpler 'map' type provides a shorter syntax for the common case:
``:map:key:prepend:``
The 'map' type adds a number of separate rules to add **prepend** as a prefix
to the matched **key** (or all attributes if **key** is empty).
There may be at most one 'map' rule and it must be the last rule in the set.
xattr-mapping Examples
----------------------
1) Prefix all attributes with 'user.virtiofs.'
::
-o xattrmap=":prefix:all::user.virtiofs.::bad:all:::"
This uses two rules, using : as the field separator;
the first rule prefixes and strips 'user.virtiofs.',
the second rule hides any non-prefixed attributes that
the host set.
This is equivalent to the 'map' rule:
::
-o xattrmap=":map::user.virtiofs.:"
2) Prefix 'trusted.' attributes, allow others through
::
"/prefix/all/trusted./user.virtiofs./
/bad/server//trusted./
/bad/client/user.virtiofs.//
/ok/all///"
Here there are four rules, using / as the field
separator, and also demonstrating that new lines can
be included between rules.
The first rule is the prefixing of 'trusted.' and
stripping of 'user.virtiofs.'.
The second rule hides unprefixed 'trusted.' attributes
on the host.
The third rule stops a guest from explicitly setting
the 'user.virtiofs.' path directly.
Finally, the fourth rule lets all remaining attributes
through.
This is equivalent to the 'map' rule:
::
-o xattrmap="/map/trusted./user.virtiofs./"
3) Hide 'security.' attributes, and allow everything else
::
"/bad/all/security./security./
/ok/all///'
The first rule combines what could be separate client and server
rules into a single 'all' rule, matching 'security.' in either
client arguments or lists returned from the host. This stops
the client seeing any 'security.' attributes on the server and
stops it setting any.
Examples
--------

View File

@ -227,7 +227,7 @@ struct fuse_attr {
uint32_t gid;
uint32_t rdev;
uint32_t blksize;
uint32_t padding;
uint32_t flags;
};
struct fuse_kstatfs {
@ -310,6 +310,7 @@ struct fuse_file_lock {
* FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
* FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
* FUSE_MAP_ALIGNMENT: map_alignment field is valid
* FUSE_ATTR_FLAGS: fuse_attr.flags is present and valid
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@ -338,6 +339,7 @@ struct fuse_file_lock {
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
#define FUSE_MAP_ALIGNMENT (1 << 26)
#define FUSE_ATTR_FLAGS (1 << 27)
/**
* CUSE INIT request/reply flags
@ -413,6 +415,13 @@ struct fuse_file_lock {
*/
#define FUSE_FSYNC_FDATASYNC (1 << 0)
/**
* fuse_attr flags
*
* FUSE_ATTR_SUBMOUNT: File/directory is a submount point
*/
#define FUSE_ATTR_SUBMOUNT (1 << 0)
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */

View File

@ -2045,6 +2045,7 @@ summary_info += {'Audio drivers': config_host['CONFIG_AUDIO_DRIVERS']}
summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']}
summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']}
summary_info += {'VirtFS support': config_host.has_key('CONFIG_VIRTFS')}
summary_info += {'build virtiofs daemon': have_virtiofsd}
summary_info += {'Multipath support': mpathpersist.found()}
summary_info += {'VNC support': vnc.found()}
if vnc.found()

View File

@ -62,6 +62,8 @@ option('vnc_sasl', type : 'feature', value : 'auto',
description: 'SASL authentication for VNC server')
option('xkbcommon', type : 'feature', value : 'auto',
description: 'xkbcommon support')
option('virtiofsd', type: 'feature', value: 'auto',
description: 'build virtiofs daemon (virtiofsd)')
option('capstone', type: 'combo', value: 'auto',
choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],

View File

@ -57,7 +57,7 @@ class BootLinuxBase(Test):
self.cancel('Failed to download/prepare boot image')
return boot.path
def download_cloudinit(self):
def download_cloudinit(self, ssh_pubkey=None):
self.log.info('Preparing cloudinit image')
try:
cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso')
@ -67,7 +67,8 @@ class BootLinuxBase(Test):
password='password',
# QEMU's hard coded usermode router address
phone_home_host='10.0.2.2',
phone_home_port=self.phone_home_port)
phone_home_port=self.phone_home_port,
authorized_key=ssh_pubkey)
except Exception:
self.cancel('Failed to prepared cloudinit image')
return cloudinit_iso
@ -80,19 +81,19 @@ class BootLinux(BootLinuxBase):
timeout = 900
chksum = None
def setUp(self):
def setUp(self, ssh_pubkey=None):
super(BootLinux, self).setUp()
self.vm.add_args('-smp', '2')
self.vm.add_args('-m', '1024')
self.prepare_boot()
self.prepare_cloudinit()
self.prepare_cloudinit(ssh_pubkey)
def prepare_boot(self):
path = self.download_boot()
self.vm.add_args('-drive', 'file=%s' % path)
def prepare_cloudinit(self):
cloudinit_iso = self.download_cloudinit()
def prepare_cloudinit(self, ssh_pubkey=None):
cloudinit_iso = self.download_cloudinit(ssh_pubkey)
self.vm.add_args('-drive', 'file=%s,format=raw' % cloudinit_iso)
def launch_and_wait(self):

View File

@ -0,0 +1,289 @@
import logging
import re
import os
import subprocess
import time
from avocado import skipUnless
from avocado_qemu import Test, BUILD_DIR
from avocado_qemu import wait_for_console_pattern
from avocado.utils import ssh
from qemu.accel import kvm_available
from boot_linux import BootLinux
def run_cmd(args):
subp = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
stdout, stderr = subp.communicate()
ret = subp.returncode
return (stdout, stderr, ret)
def has_passwordless_sudo():
"""
This function is for use in a @avocado.skipUnless decorator, e.g.:
@skipUnless(*has_passwordless_sudo())
def test_something_that_needs_sudo(self):
...
"""
_, stderr, exitcode = run_cmd(('sudo', '-n', 'true'))
if exitcode != 0:
return (False, f'Failed to use sudo -n: {stderr.strip()}')
else:
return (True, '')
class VirtiofsSubmountsTest(BootLinux):
"""
:avocado: tags=arch:x86_64
"""
def get_portfwd(self):
port = None
res = self.vm.command('human-monitor-command',
command_line='info usernet')
for line in res.split('\r\n'):
match = \
re.search(r'TCP.HOST_FORWARD.*127\.0\.0\.1\s*(\d+)\s+10\.',
line)
if match is not None:
port = match[1]
break
self.assertIsNotNone(port)
self.log.debug('sshd listening on port: ' + port)
return port
def ssh_connect(self, username, keyfile):
self.ssh_logger = logging.getLogger('ssh')
port = self.get_portfwd()
self.ssh_session = ssh.Session('127.0.0.1', port=int(port),
user=username, key=keyfile)
for i in range(10):
try:
self.ssh_session.connect()
return
except:
time.sleep(4)
pass
self.fail('sshd timeout')
def ssh_command(self, command):
self.ssh_logger.info(command)
result = self.ssh_session.cmd(command)
stdout_lines = [line.rstrip() for line
in result.stdout_text.splitlines()]
for line in stdout_lines:
self.ssh_logger.info(line)
stderr_lines = [line.rstrip() for line
in result.stderr_text.splitlines()]
for line in stderr_lines:
self.ssh_logger.warning(line)
self.assertEqual(result.exit_status, 0,
f'Guest command failed: {command}')
return stdout_lines, stderr_lines
def run(self, args, ignore_error=False):
stdout, stderr, ret = run_cmd(args)
if ret != 0:
cmdline = ' '.join(args)
if not ignore_error:
self.fail(f'{cmdline}: Returned {ret}: {stderr}')
else:
self.log.warn(f'{cmdline}: Returned {ret}: {stderr}')
return (stdout, stderr, ret)
def set_up_shared_dir(self):
atwd = os.getenv('AVOCADO_TEST_WORKDIR')
self.shared_dir = os.path.join(atwd, 'virtiofs-shared')
os.mkdir(self.shared_dir)
self.run(('cp', self.get_data('guest.sh'),
os.path.join(self.shared_dir, 'check.sh')))
self.run(('cp', self.get_data('guest-cleanup.sh'),
os.path.join(self.shared_dir, 'cleanup.sh')))
def set_up_virtiofs(self):
attmp = os.getenv('AVOCADO_TESTS_COMMON_TMPDIR')
self.vfsdsock = os.path.join(attmp, 'vfsdsock')
self.run(('sudo', '-n', 'rm', '-f', self.vfsdsock), ignore_error=True)
self.virtiofsd = \
subprocess.Popen(('sudo', '-n',
'tools/virtiofsd/virtiofsd',
f'--socket-path={self.vfsdsock}',
'-o', f'source={self.shared_dir}',
'-o', 'cache=always',
'-o', 'xattr',
'-o', 'announce_submounts',
'-f'),
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
universal_newlines=True)
while not os.path.exists(self.vfsdsock):
if self.virtiofsd.poll() is not None:
self.fail('virtiofsd exited prematurely: ' +
self.virtiofsd.communicate()[1])
time.sleep(0.1)
self.run(('sudo', '-n', 'chmod', 'go+rw', self.vfsdsock))
self.vm.add_args('-chardev',
f'socket,id=vfsdsock,path={self.vfsdsock}',
'-device',
'vhost-user-fs-pci,queue-size=1024,chardev=vfsdsock' \
',tag=host',
'-object',
'memory-backend-file,id=mem,size=1G,' \
'mem-path=/dev/shm,share=on',
'-numa',
'node,memdev=mem')
def launch_vm(self):
self.launch_and_wait()
self.ssh_connect('root', self.ssh_key)
def set_up_nested_mounts(self):
scratch_dir = os.path.join(self.shared_dir, 'scratch')
try:
os.mkdir(scratch_dir)
except FileExistsError:
pass
args = ['bash', self.get_data('host.sh'), scratch_dir]
if self.seed:
args += [self.seed]
out, _, _ = self.run(args)
seed = re.search(r'^Seed: \d+', out)
self.log.info(seed[0])
def mount_in_guest(self):
self.ssh_command('mkdir -p /mnt/host')
self.ssh_command('mount -t virtiofs host /mnt/host')
def check_in_guest(self):
self.ssh_command('bash /mnt/host/check.sh /mnt/host/scratch/share')
def live_cleanup(self):
self.ssh_command('bash /mnt/host/cleanup.sh /mnt/host/scratch')
# It would be nice if the above was sufficient to make virtiofsd clear
# all references to the mounted directories (so they can be unmounted
# on the host), but unfortunately it is not. To do so, we have to
# resort to a remount.
self.ssh_command('mount -o remount /mnt/host')
scratch_dir = os.path.join(self.shared_dir, 'scratch')
self.run(('bash', self.get_data('cleanup.sh'), scratch_dir))
@skipUnless(*has_passwordless_sudo())
def setUp(self):
vmlinuz = self.params.get('vmlinuz')
if vmlinuz is None:
self.cancel('vmlinuz parameter not set; you must point it to a '
'Linux kernel binary to test (to run this test with ' \
'the on-image kernel, set it to an empty string)')
self.seed = self.params.get('seed')
atwd = os.getenv('AVOCADO_TEST_WORKDIR')
self.ssh_key = os.path.join(atwd, 'id_ed25519')
self.run(('ssh-keygen', '-t', 'ed25519', '-f', self.ssh_key))
pubkey = open(self.ssh_key + '.pub').read()
super(VirtiofsSubmountsTest, self).setUp(pubkey)
if len(vmlinuz) > 0:
self.vm.add_args('-kernel', vmlinuz,
'-append', 'console=ttyS0 root=/dev/sda1')
# Allow us to connect to SSH
self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22',
'-device', 'e1000,netdev=vnet')
if not kvm_available(self.arch, self.qemu_bin):
self.cancel(KVM_NOT_AVAILABLE)
self.vm.add_args('-accel', 'kvm')
def tearDown(self):
try:
self.vm.shutdown()
except:
pass
scratch_dir = os.path.join(self.shared_dir, 'scratch')
self.run(('bash', self.get_data('cleanup.sh'), scratch_dir),
ignore_error=True)
def test_pre_virtiofsd_set_up(self):
self.set_up_shared_dir()
self.set_up_nested_mounts()
self.set_up_virtiofs()
self.launch_vm()
self.mount_in_guest()
self.check_in_guest()
def test_pre_launch_set_up(self):
self.set_up_shared_dir()
self.set_up_virtiofs()
self.set_up_nested_mounts()
self.launch_vm()
self.mount_in_guest()
self.check_in_guest()
def test_post_launch_set_up(self):
self.set_up_shared_dir()
self.set_up_virtiofs()
self.launch_vm()
self.set_up_nested_mounts()
self.mount_in_guest()
self.check_in_guest()
def test_post_mount_set_up(self):
self.set_up_shared_dir()
self.set_up_virtiofs()
self.launch_vm()
self.mount_in_guest()
self.set_up_nested_mounts()
self.check_in_guest()
def test_two_runs(self):
self.set_up_shared_dir()
self.set_up_nested_mounts()
self.set_up_virtiofs()
self.launch_vm()
self.mount_in_guest()
self.check_in_guest()
self.live_cleanup()
self.set_up_nested_mounts()
self.check_in_guest()

View File

@ -0,0 +1,46 @@
#!/bin/bash
function print_usage()
{
if [ -n "$2" ]; then
echo "Error: $2"
echo
fi
echo "Usage: $1 <scratch dir>"
}
scratch_dir=$1
if [ -z "$scratch_dir" ]; then
print_usage "$0" 'Scratch dir not given' >&2
exit 1
fi
cd "$scratch_dir/share" || exit 1
mps=(mnt*)
mp_i=0
for mp in "${mps[@]}"; do
mp_i=$((mp_i + 1))
printf "Unmounting %i/%i...\r" "$mp_i" "${#mps[@]}"
sudo umount -R "$mp"
rm -rf "$mp"
done
echo
rm some-file
cd ..
rmdir share
imgs=(fs*.img)
img_i=0
for img in "${imgs[@]}"; do
img_i=$((img_i + 1))
printf "Detaching and deleting %i/%i...\r" "$img_i" "${#imgs[@]}"
dev=$(losetup -j "$img" | sed -e 's/:.*//')
sudo losetup -d "$dev"
rm -f "$img"
done
echo
echo 'Done.'

View File

@ -0,0 +1,30 @@
#!/bin/bash
function print_usage()
{
if [ -n "$2" ]; then
echo "Error: $2"
echo
fi
echo "Usage: $1 <scratch dir>"
}
scratch_dir=$1
if [ -z "$scratch_dir" ]; then
print_usage "$0" 'Scratch dir not given' >&2
exit 1
fi
cd "$scratch_dir/share" || exit 1
mps=(mnt*)
mp_i=0
for mp in "${mps[@]}"; do
mp_i=$((mp_i + 1))
printf "Unmounting %i/%i...\r" "$mp_i" "${#mps[@]}"
sudo umount -R "$mp"
done
echo
echo 'Done.'

View File

@ -0,0 +1,138 @@
#!/bin/bash
function print_usage()
{
if [ -n "$2" ]; then
echo "Error: $2"
echo
fi
echo "Usage: $1 <shared dir>"
echo '(The shared directory is the "share" directory in the scratch' \
'directory)'
}
shared_dir=$1
if [ -z "$shared_dir" ]; then
print_usage "$0" 'Shared dir not given' >&2
exit 1
fi
cd "$shared_dir"
# FIXME: This should not be necessary, but it is. In order for all
# submounts to be proper mount points, we need to visit them.
# (Before we visit them, they will not be auto-mounted, and so just
# appear as normal directories, with the catch that their st_ino will
# be the st_ino of the filesystem they host, while the st_dev will
# still be the st_dev of the parent.)
# `find` does not work, because it will refuse to touch the mount
# points as long as they are not mounted; their st_dev being shared
# with the parent and st_ino just being the root node's inode ID
# will practically ensure that this node exists elsewhere on the
# filesystem, and `find` is required to recognize loops and not to
# follow them.
# Thus, we have to manually visit all nodes first.
mnt_i=0
function recursively_visit()
{
pushd "$1" >/dev/null
for entry in *; do
if [[ "$entry" == mnt* ]]; then
mnt_i=$((mnt_i + 1))
printf "Triggering auto-mount $mnt_i...\r"
fi
if [ -d "$entry" ]; then
recursively_visit "$entry"
fi
done
popd >/dev/null
}
recursively_visit .
echo
if [ -n "$(find -name not-mounted)" ]; then
echo "Error: not-mounted files visible on mount points:" >&2
find -name not-mounted >&2
exit 1
fi
if [ ! -f some-file -o "$(cat some-file)" != 'root' ]; then
echo "Error: Bad file in the share root" >&2
exit 1
fi
shopt -s nullglob
function check_submounts()
{
local base_path=$1
for mp in mnt*; do
printf "Checking submount %i...\r" "$((${#devs[@]} + 1))"
mp_i=$(echo "$mp" | sed -e 's/mnt//')
dev=$(stat -c '%D' "$mp")
if [ -n "${devs[mp_i]}" ]; then
echo "Error: $mp encountered twice" >&2
exit 1
fi
devs[mp_i]=$dev
pushd "$mp" >/dev/null
path="$base_path$mp"
while true; do
expected_content="$(printf '%s\n%s\n' "$mp_i" "$path")"
if [ ! -f some-file ]; then
echo "Error: $PWD/some-file does not exist" >&2
exit 1
fi
if [ "$(cat some-file)" != "$expected_content" ]; then
echo "Error: Bad content in $PWD/some-file:" >&2
echo '--- found ---'
cat some-file
echo '--- expected ---'
echo "$expected_content"
exit 1
fi
if [ "$(stat -c '%D' some-file)" != "$dev" ]; then
echo "Error: $PWD/some-file has the wrong device ID" >&2
exit 1
fi
if [ -d sub ]; then
if [ "$(stat -c '%D' sub)" != "$dev" ]; then
echo "Error: $PWD/some-file has the wrong device ID" >&2
exit 1
fi
cd sub
path="$path/sub"
else
if [ -n "$(echo mnt*)" ]; then
check_submounts "$path/"
fi
break
fi
done
popd >/dev/null
done
}
root_dev=$(stat -c '%D' some-file)
devs=()
check_submounts ''
echo
reused_devs=$(echo "$root_dev ${devs[@]}" | tr ' ' '\n' | sort | uniq -d)
if [ -n "$reused_devs" ]; then
echo "Error: Reused device IDs: $reused_devs" >&2
exit 1
fi
echo "Test passed for ${#devs[@]} submounts."

View File

@ -0,0 +1,127 @@
#!/bin/bash
mount_count=128
function print_usage()
{
if [ -n "$2" ]; then
echo "Error: $2"
echo
fi
echo "Usage: $1 <scratch dir> [seed]"
echo "(If no seed is given, it will be randomly generated.)"
}
scratch_dir=$1
if [ -z "$scratch_dir" ]; then
print_usage "$0" 'No scratch dir given' >&2
exit 1
fi
if [ ! -d "$scratch_dir" ]; then
print_usage "$0" "$scratch_dir is not a directory" >&2
exit 1
fi
seed=$2
if [ -z "$seed" ]; then
seed=$RANDOM
fi
RANDOM=$seed
echo "Seed: $seed"
set -e
shopt -s nullglob
cd "$scratch_dir"
if [ -d share ]; then
echo 'Error: This directory seems to be in use already' >&2
exit 1
fi
for ((i = 0; i < $mount_count; i++)); do
printf "Setting up fs %i/%i...\r" "$((i + 1))" "$mount_count"
rm -f fs$i.img
truncate -s 512M fs$i.img
mkfs.xfs -q fs$i.img
devs[i]=$(sudo losetup -f --show fs$i.img)
done
echo
top_level_mounts=$((RANDOM % mount_count + 1))
mkdir -p share
echo 'root' > share/some-file
for ((i = 0; i < $top_level_mounts; i++)); do
printf "Mounting fs %i/%i...\r" "$((i + 1))" "$mount_count"
mkdir -p share/mnt$i
touch share/mnt$i/not-mounted
sudo mount "${devs[i]}" share/mnt$i
sudo chown "$(id -u):$(id -g)" share/mnt$i
pushd share/mnt$i >/dev/null
path=mnt$i
nesting=$((RANDOM % 4))
for ((j = 0; j < $nesting; j++)); do
cat > some-file <<EOF
$i
$path
EOF
mkdir sub
cd sub
path="$path/sub"
done
cat > some-file <<EOF
$i
$path
EOF
popd >/dev/null
done
for ((; i < $mount_count; i++)); do
printf "Mounting fs %i/%i...\r" "$((i + 1))" "$mount_count"
mp_i=$((i % top_level_mounts))
pushd share/mnt$mp_i >/dev/null
path=mnt$mp_i
while true; do
sub_mp="$(echo mnt*)"
if cd sub 2>/dev/null; then
path="$path/sub"
elif [ -n "$sub_mp" ] && cd "$sub_mp" 2>/dev/null; then
path="$path/$sub_mp"
else
break
fi
done
mkdir mnt$i
touch mnt$i/not-mounted
sudo mount "${devs[i]}" mnt$i
sudo chown "$(id -u):$(id -g)" mnt$i
cd mnt$i
path="$path/mnt$i"
nesting=$((RANDOM % 4))
for ((j = 0; j < $nesting; j++)); do
cat > some-file <<EOF
$i
$path
EOF
mkdir sub
cd sub
path="$path/sub"
done
cat > some-file <<EOF
$i
$path
EOF
popd >/dev/null
done
echo
echo 'Done.'

View File

@ -1,10 +1,23 @@
have_virtiofsd = (have_system and
have_virtiofsd = (targetos == 'linux' and
have_tools and
'CONFIG_LINUX' in config_host and
'CONFIG_SECCOMP' in config_host and
'CONFIG_LIBCAP_NG' in config_host and
'CONFIG_VHOST_USER' in config_host)
if get_option('virtiofsd').enabled()
if not have_virtiofsd
if targetos != 'linux'
error('virtiofsd requires Linux')
elif 'CONFIG_SECCOMP' not in config_host or 'CONFIG_LIBCAP_NG' not in config_host
error('virtiofsd requires libcap-ng-devel and seccomp-devel')
elif not have_tools or 'CONFIG_VHOST_USER' not in config_host
error('virtiofsd needs tools and vhost-user support')
endif
endif
elif get_option('virtiofsd').disabled() or not have_system
have_virtiofsd = false
endif
if have_virtiofsd
subdir('virtiofsd')
endif

View File

@ -352,6 +352,14 @@ struct fuse_file_info {
*/
#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24)
/**
* Indicates that the client will provide fuse_attr.flags, and the kernel will
* interpret it.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_ATTR_FLAGS (1 << 27)
/**
* Ioctl flags
*

View File

@ -329,7 +329,8 @@ static unsigned int calc_timeout_nsec(double t)
}
}
static void fill_entry(struct fuse_entry_out *arg,
static void fill_entry(struct fuse_session *se,
struct fuse_entry_out *arg,
const struct fuse_entry_param *e)
{
*arg = (struct fuse_entry_out){
@ -341,6 +342,10 @@ static void fill_entry(struct fuse_entry_out *arg,
.attr_valid_nsec = calc_timeout_nsec(e->attr_timeout),
};
convert_stat(&e->attr, &arg->attr);
if (se->conn.capable & FUSE_CAP_ATTR_FLAGS) {
arg->attr.flags = e->attr_flags;
}
}
/*
@ -365,7 +370,7 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
struct fuse_direntplus *dp = (struct fuse_direntplus *)buf;
memset(&dp->entry_out, 0, sizeof(dp->entry_out));
fill_entry(&dp->entry_out, e);
fill_entry(req->se, &dp->entry_out, e);
struct fuse_dirent *dirent = &dp->dirent;
*dirent = (struct fuse_dirent){
@ -403,7 +408,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
size_t size = sizeof(arg);
memset(&arg, 0, sizeof(arg));
fill_entry(&arg, e);
fill_entry(req->se, &arg, e);
return send_reply_ok(req, &arg, size);
}
@ -416,13 +421,13 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize);
memset(buf, 0, sizeof(buf));
fill_entry(earg, e);
fill_entry(req->se, earg, e);
fill_open(oarg, f);
return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out));
}
int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
double attr_timeout)
int fuse_reply_attr_with_flags(fuse_req_t req, const struct stat *attr,
double attr_timeout, uint32_t attr_flags)
{
struct fuse_attr_out arg;
size_t size = sizeof(arg);
@ -432,9 +437,19 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout);
convert_stat(attr, &arg.attr);
if (req->se->conn.capable & FUSE_CAP_ATTR_FLAGS) {
arg.attr.flags = attr_flags;
}
return send_reply_ok(req, &arg, size);
}
int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
double attr_timeout)
{
return fuse_reply_attr_with_flags(req, attr, attr_timeout, 0);
}
int fuse_reply_readlink(fuse_req_t req, const char *linkname)
{
return send_reply_ok(req, linkname, strlen(linkname));
@ -1988,6 +2003,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
bufsize = max_bufsize;
}
}
if (arg->flags & FUSE_ATTR_FLAGS) {
se->conn.capable |= FUSE_CAP_ATTR_FLAGS;
}
#ifdef HAVE_SPLICE
#ifdef HAVE_VMSPLICE
se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
@ -2014,6 +2032,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO);
LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR);
LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC);
LL_SET_DEFAULT(1, FUSE_CAP_ATTR_FLAGS);
LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ);
LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS);
LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS);
@ -2103,6 +2122,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
if (se->conn.want & FUSE_CAP_POSIX_ACL) {
outarg.flags |= FUSE_POSIX_ACL;
}
if (se->conn.want & FUSE_CAP_ATTR_FLAGS) {
outarg.flags |= FUSE_ATTR_FLAGS;
}
outarg.max_readahead = se->conn.max_readahead;
outarg.max_write = se->conn.max_write;
if (se->conn.max_background >= (1 << 16)) {

View File

@ -102,6 +102,11 @@ struct fuse_entry_param {
* large value.
*/
double entry_timeout;
/**
* Flags for fuse_attr.flags that do not fit into attr.
*/
uint32_t attr_flags;
};
/**
@ -1308,6 +1313,21 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
double attr_timeout);
/**
* Reply with attributes and set fuse_attr.flags
*
* Possible requests:
* getattr, setattr
*
* @param req request handle
* @param attr the attributes
* @param attr_timeout validity timeout (in seconds) for the attributes
* @param attr_flags flags to put into fuse_attr.flags
* @return zero for success, -errno for failure to send reply
*/
int fuse_reply_attr_with_flags(fuse_req_t req, const struct stat *attr,
double attr_timeout, uint32_t attr_flags);
/**
* Reply with the contents of a symbolic link
*

View File

@ -166,10 +166,19 @@ void fuse_cmdline_help(void)
" enable/disable readirplus\n"
" default: readdirplus except with "
"cache=none\n"
" -o sandbox=namespace|chroot\n"
" sandboxing mode:\n"
" - namespace: mount, pid, and net\n"
" namespaces with pivot_root(2)\n"
" into shared directory\n"
" - chroot: chroot(2) into shared\n"
" directory (use in containers)\n"
" default: namespace\n"
" -o timeout=<number> I/O timeout (seconds)\n"
" default: depends on cache= option.\n"
" -o writeback|no_writeback enable/disable writeback cache\n"
" default: no_writeback\n"
" -o announce_submounts Announce sub-mount points to the guest\n"
" -o xattr|no_xattr enable/disable xattr\n"
" default: no_xattr\n"
" -o modcaps=CAPLIST Modify the list of capabilities\n"

View File

@ -40,6 +40,7 @@
#include "fuse_virtio.h"
#include "fuse_log.h"
#include "fuse_lowlevel.h"
#include "standard-headers/linux/fuse.h"
#include <assert.h>
#include <cap-ng.h>
#include <dirent.h>
@ -64,6 +65,7 @@
#include <syslog.h>
#include <unistd.h>
#include "qemu/cutils.h"
#include "passthrough_helpers.h"
#include "passthrough_seccomp.h"
@ -124,6 +126,14 @@ struct lo_inode {
GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
mode_t filetype;
/*
* So we can detect crossmount roots
* (As such, this only needs to be valid for directories. Note
* that files can have multiple parents due to hard links, and so
* their parent_dev may fluctuate.)
*/
dev_t parent_dev;
};
struct lo_cred {
@ -137,13 +147,26 @@ enum {
CACHE_ALWAYS,
};
enum {
SANDBOX_NAMESPACE,
SANDBOX_CHROOT,
};
typedef struct xattr_map_entry {
char *key;
char *prepend;
unsigned int flags;
} XattrMapEntry;
struct lo_data {
pthread_mutex_t mutex;
int sandbox;
int debug;
int writeback;
int flock;
int posix_lock;
int xattr;
char *xattrmap;
char *source;
char *modcaps;
double timeout;
@ -151,18 +174,27 @@ struct lo_data {
int timeout_set;
int readdirplus_set;
int readdirplus_clear;
int announce_submounts;
int allow_direct_io;
struct lo_inode root;
GHashTable *inodes; /* protected by lo->mutex */
struct lo_map ino_map; /* protected by lo->mutex */
struct lo_map dirp_map; /* protected by lo->mutex */
struct lo_map fd_map; /* protected by lo->mutex */
XattrMapEntry *xattr_map_list;
size_t xattr_map_nentries;
/* An O_PATH file descriptor to /proc/self/fd/ */
int proc_self_fd;
};
static const struct fuse_opt lo_opts[] = {
{ "sandbox=namespace",
offsetof(struct lo_data, sandbox),
SANDBOX_NAMESPACE },
{ "sandbox=chroot",
offsetof(struct lo_data, sandbox),
SANDBOX_CHROOT },
{ "writeback", offsetof(struct lo_data, writeback), 1 },
{ "no_writeback", offsetof(struct lo_data, writeback), 0 },
{ "source=%s", offsetof(struct lo_data, source), 0 },
@ -172,6 +204,7 @@ static const struct fuse_opt lo_opts[] = {
{ "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 },
{ "xattr", offsetof(struct lo_data, xattr), 1 },
{ "no_xattr", offsetof(struct lo_data, xattr), 0 },
{ "xattrmap=%s", offsetof(struct lo_data, xattrmap), 0 },
{ "modcaps=%s", offsetof(struct lo_data, modcaps), 0 },
{ "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
{ "timeout=", offsetof(struct lo_data, timeout_set), 1 },
@ -180,6 +213,7 @@ static const struct fuse_opt lo_opts[] = {
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
{ "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
{ "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
{ "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
{ "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 },
{ "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 },
FUSE_OPT_END
@ -577,22 +611,52 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
}
}
/**
* Call fstatat() and set st_rdev whenever a directory's st_dev
* differs from the rparent's st_dev (@parent_dev). This will
* announce submounts to the FUSE client (unless @announce_submounts
* is false).
*/
static int do_fstatat(int dirfd, const char *pathname, struct stat *statbuf,
int flags, dev_t parent_dev, uint32_t *fuse_attr_flags)
{
int res = fstatat(dirfd, pathname, statbuf, flags);
if (res == -1) {
return res;
}
if (statbuf->st_dev != parent_dev && S_ISDIR(statbuf->st_mode) &&
fuse_attr_flags)
{
*fuse_attr_flags |= FUSE_ATTR_SUBMOUNT;
}
return 0;
}
static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
{
int res;
struct stat buf;
struct lo_data *lo = lo_data(req);
struct lo_inode *inode = lo_inode(req, ino);
uint32_t fuse_attr_flags = 0;
(void)fi;
res =
fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
res = do_fstatat(inode->fd, "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
inode->parent_dev, &fuse_attr_flags);
lo_inode_put(lo, &inode);
if (res == -1) {
return (void)fuse_reply_err(req, errno);
}
fuse_reply_attr(req, &buf, lo->timeout);
if (!lo->announce_submounts) {
fuse_attr_flags &= ~FUSE_ATTR_SUBMOUNT;
}
fuse_reply_attr_with_flags(req, &buf, lo->timeout, fuse_attr_flags);
}
static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
@ -788,11 +852,16 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
goto out_err;
}
res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
res = do_fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
dir->key.dev, &e->attr_flags);
if (res == -1) {
goto out_err;
}
if (!lo->announce_submounts) {
e->attr_flags &= ~FUSE_ATTR_SUBMOUNT;
}
inode = lo_find(lo, &e->attr);
if (inode) {
close(newfd);
@ -824,6 +893,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
g_hash_table_insert(lo->inodes, &inode->key, inode);
pthread_mutex_unlock(&lo->mutex);
}
inode->parent_dev = dir->key.dev;
e->ino = inode->fuse_ino;
lo_inode_put(lo, &inode);
lo_inode_put(lo, &dir);
@ -1037,11 +1107,17 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
goto out_err;
}
res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
res = do_fstatat(inode->fd, "", &e.attr,
AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
parent_inode->key.dev, &e.attr_flags);
if (res == -1) {
goto out_err;
}
if (!lo->announce_submounts) {
e.attr_flags &= ~FUSE_ATTR_SUBMOUNT;
}
pthread_mutex_lock(&lo->mutex);
inode->nlookup++;
pthread_mutex_unlock(&lo->mutex);
@ -1050,6 +1126,14 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
name, (unsigned long long)e.ino);
/*
* No need to update inode->parent_dev, because
* (1) We cannot, the inode now has more than one parent,
* (2) Directories cannot have more than one parent, so link()
* does not work for them; but parent_dev only needs to be
* valid for directories.
*/
fuse_reply_entry(req, &e);
lo_inode_put(lo, &parent_inode);
lo_inode_put(lo, &inode);
@ -1068,14 +1152,21 @@ static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
{
int res;
struct stat attr;
struct lo_data *lo = lo_data(req);
struct lo_inode *dir = lo_inode(req, parent);
res = fstatat(lo_fd(req, parent), name, &attr,
AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
if (!dir) {
return NULL;
}
res = do_fstatat(dir->fd, name, &attr,
AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, dir->key.dev, NULL);
lo_inode_put(lo, &dir);
if (res == -1) {
return NULL;
}
return lo_find(lo_data(req), &attr);
return lo_find(lo, &attr);
}
static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
@ -2010,20 +2101,383 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
fuse_reply_err(req, res == -1 ? errno : 0);
}
static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
/* types */
/*
* Exit; process attribute unmodified if matched.
* An empty key applies to all.
*/
#define XATTR_MAP_FLAG_OK (1 << 0)
/*
* The attribute is unwanted;
* EPERM on write, hidden on read.
*/
#define XATTR_MAP_FLAG_BAD (1 << 1)
/*
* For attr that start with 'key' prepend 'prepend'
* 'key' may be empty to prepend for all attrs
* key is defined from set/remove point of view.
* Automatically reversed on read
*/
#define XATTR_MAP_FLAG_PREFIX (1 << 2)
/* scopes */
/* Apply rule to get/set/remove */
#define XATTR_MAP_FLAG_CLIENT (1 << 16)
/* Apply rule to list */
#define XATTR_MAP_FLAG_SERVER (1 << 17)
/* Apply rule to all */
#define XATTR_MAP_FLAG_ALL (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT)
static void add_xattrmap_entry(struct lo_data *lo,
const XattrMapEntry *new_entry)
{
XattrMapEntry *res = g_realloc_n(lo->xattr_map_list,
lo->xattr_map_nentries + 1,
sizeof(XattrMapEntry));
res[lo->xattr_map_nentries++] = *new_entry;
lo->xattr_map_list = res;
}
static void free_xattrmap(struct lo_data *lo)
{
XattrMapEntry *map = lo->xattr_map_list;
size_t i;
if (!map) {
return;
}
for (i = 0; i < lo->xattr_map_nentries; i++) {
g_free(map[i].key);
g_free(map[i].prepend);
};
g_free(map);
lo->xattr_map_list = NULL;
lo->xattr_map_nentries = -1;
}
/*
* Handle the 'map' type, which is sugar for a set of commands
* for the common case of prefixing a subset or everything,
* and allowing anything not prefixed through.
* It must be the last entry in the stream, although there
* can be other entries before it.
* The form is:
* :map:key:prefix:
*
* key maybe empty in which case all entries are prefixed.
*/
static void parse_xattrmap_map(struct lo_data *lo,
const char *rule, char sep)
{
const char *tmp;
char *key;
char *prefix;
XattrMapEntry tmp_entry;
if (*rule != sep) {
fuse_log(FUSE_LOG_ERR,
"%s: Expecting '%c' after 'map' keyword, found '%c'\n",
__func__, sep, *rule);
exit(1);
}
rule++;
/* At start of 'key' field */
tmp = strchr(rule, sep);
if (!tmp) {
fuse_log(FUSE_LOG_ERR,
"%s: Missing '%c' at end of key field in map rule\n",
__func__, sep);
exit(1);
}
key = g_strndup(rule, tmp - rule);
rule = tmp + 1;
/* At start of prefix field */
tmp = strchr(rule, sep);
if (!tmp) {
fuse_log(FUSE_LOG_ERR,
"%s: Missing '%c' at end of prefix field in map rule\n",
__func__, sep);
exit(1);
}
prefix = g_strndup(rule, tmp - rule);
rule = tmp + 1;
/*
* This should be the end of the string, we don't allow
* any more commands after 'map'.
*/
if (*rule) {
fuse_log(FUSE_LOG_ERR,
"%s: Expecting end of command after map, found '%c'\n",
__func__, *rule);
exit(1);
}
/* 1st: Prefix matches/everything */
tmp_entry.flags = XATTR_MAP_FLAG_PREFIX | XATTR_MAP_FLAG_ALL;
tmp_entry.key = g_strdup(key);
tmp_entry.prepend = g_strdup(prefix);
add_xattrmap_entry(lo, &tmp_entry);
if (!*key) {
/* Prefix all case */
/* 2nd: Hide any non-prefixed entries on the host */
tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_ALL;
tmp_entry.key = g_strdup("");
tmp_entry.prepend = g_strdup("");
add_xattrmap_entry(lo, &tmp_entry);
} else {
/* Prefix matching case */
/* 2nd: Hide non-prefixed but matching entries on the host */
tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_SERVER;
tmp_entry.key = g_strdup(""); /* Not used */
tmp_entry.prepend = g_strdup(key);
add_xattrmap_entry(lo, &tmp_entry);
/* 3rd: Stop the client accessing prefixed attributes directly */
tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_CLIENT;
tmp_entry.key = g_strdup(prefix);
tmp_entry.prepend = g_strdup(""); /* Not used */
add_xattrmap_entry(lo, &tmp_entry);
/* 4th: Everything else is OK */
tmp_entry.flags = XATTR_MAP_FLAG_OK | XATTR_MAP_FLAG_ALL;
tmp_entry.key = g_strdup("");
tmp_entry.prepend = g_strdup("");
add_xattrmap_entry(lo, &tmp_entry);
}
g_free(key);
g_free(prefix);
}
static void parse_xattrmap(struct lo_data *lo)
{
const char *map = lo->xattrmap;
const char *tmp;
lo->xattr_map_nentries = 0;
while (*map) {
XattrMapEntry tmp_entry;
char sep;
if (isspace(*map)) {
map++;
continue;
}
/* The separator is the first non-space of the rule */
sep = *map++;
if (!sep) {
break;
}
tmp_entry.flags = 0;
/* Start of 'type' */
if (strstart(map, "prefix", &map)) {
tmp_entry.flags |= XATTR_MAP_FLAG_PREFIX;
} else if (strstart(map, "ok", &map)) {
tmp_entry.flags |= XATTR_MAP_FLAG_OK;
} else if (strstart(map, "bad", &map)) {
tmp_entry.flags |= XATTR_MAP_FLAG_BAD;
} else if (strstart(map, "map", &map)) {
/*
* map is sugar that adds a number of rules, and must be
* the last entry.
*/
parse_xattrmap_map(lo, map, sep);
return;
} else {
fuse_log(FUSE_LOG_ERR,
"%s: Unexpected type;"
"Expecting 'prefix', 'ok', 'bad' or 'map' in rule %zu\n",
__func__, lo->xattr_map_nentries);
exit(1);
}
if (*map++ != sep) {
fuse_log(FUSE_LOG_ERR,
"%s: Missing '%c' at end of type field of rule %zu\n",
__func__, sep, lo->xattr_map_nentries);
exit(1);
}
/* Start of 'scope' */
if (strstart(map, "client", &map)) {
tmp_entry.flags |= XATTR_MAP_FLAG_CLIENT;
} else if (strstart(map, "server", &map)) {
tmp_entry.flags |= XATTR_MAP_FLAG_SERVER;
} else if (strstart(map, "all", &map)) {
tmp_entry.flags |= XATTR_MAP_FLAG_ALL;
} else {
fuse_log(FUSE_LOG_ERR,
"%s: Unexpected scope;"
" Expecting 'client', 'server', or 'all', in rule %zu\n",
__func__, lo->xattr_map_nentries);
exit(1);
}
if (*map++ != sep) {
fuse_log(FUSE_LOG_ERR,
"%s: Expecting '%c' found '%c'"
" after scope in rule %zu\n",
__func__, sep, *map, lo->xattr_map_nentries);
exit(1);
}
/* At start of 'key' field */
tmp = strchr(map, sep);
if (!tmp) {
fuse_log(FUSE_LOG_ERR,
"%s: Missing '%c' at end of key field of rule %zu",
__func__, sep, lo->xattr_map_nentries);
exit(1);
}
tmp_entry.key = g_strndup(map, tmp - map);
map = tmp + 1;
/* At start of 'prepend' field */
tmp = strchr(map, sep);
if (!tmp) {
fuse_log(FUSE_LOG_ERR,
"%s: Missing '%c' at end of prepend field of rule %zu",
__func__, sep, lo->xattr_map_nentries);
exit(1);
}
tmp_entry.prepend = g_strndup(map, tmp - map);
map = tmp + 1;
add_xattrmap_entry(lo, &tmp_entry);
/* End of rule - go around again for another rule */
}
if (!lo->xattr_map_nentries) {
fuse_log(FUSE_LOG_ERR, "Empty xattr map\n");
exit(1);
}
}
/*
* For use with getxattr/setxattr/removexattr, where the client
* gives us a name and we may need to choose a different one.
* Allocates a buffer for the result placing it in *out_name.
* If there's no change then *out_name is not set.
* Returns 0 on success
* Can return -EPERM to indicate we block a given attribute
* (in which case out_name is not allocated)
* Can return -ENOMEM to indicate out_name couldn't be allocated.
*/
static int xattr_map_client(const struct lo_data *lo, const char *client_name,
char **out_name)
{
size_t i;
for (i = 0; i < lo->xattr_map_nentries; i++) {
const XattrMapEntry *cur_entry = lo->xattr_map_list + i;
if ((cur_entry->flags & XATTR_MAP_FLAG_CLIENT) &&
(strstart(client_name, cur_entry->key, NULL))) {
if (cur_entry->flags & XATTR_MAP_FLAG_BAD) {
return -EPERM;
}
if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
/* Unmodified name */
return 0;
}
if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) {
*out_name = g_try_malloc(strlen(client_name) +
strlen(cur_entry->prepend) + 1);
if (!*out_name) {
return -ENOMEM;
}
sprintf(*out_name, "%s%s", cur_entry->prepend, client_name);
return 0;
}
}
}
return -EPERM;
}
/*
* For use with listxattr where the server fs gives us a name and we may need
* to sanitize this for the client.
* Returns a pointer to the result in *out_name
* This is always the original string or the current string with some prefix
* removed; no reallocation is done.
* Returns 0 on success
* Can return -ENODATA to indicate the name should be dropped from the list.
*/
static int xattr_map_server(const struct lo_data *lo, const char *server_name,
const char **out_name)
{
size_t i;
const char *end;
for (i = 0; i < lo->xattr_map_nentries; i++) {
const XattrMapEntry *cur_entry = lo->xattr_map_list + i;
if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) &&
(strstart(server_name, cur_entry->prepend, &end))) {
if (cur_entry->flags & XATTR_MAP_FLAG_BAD) {
return -ENODATA;
}
if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
*out_name = server_name;
return 0;
}
if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) {
/* Remove prefix */
*out_name = end;
return 0;
}
}
}
return -ENODATA;
}
static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
size_t size)
{
struct lo_data *lo = lo_data(req);
char *value = NULL;
char procname[64];
const char *name;
char *mapped_name;
struct lo_inode *inode;
ssize_t ret;
int saverr;
int fd = -1;
mapped_name = NULL;
name = in_name;
if (lo->xattrmap) {
ret = xattr_map_client(lo, in_name, &mapped_name);
if (ret < 0) {
if (ret == -EPERM) {
ret = -ENODATA;
}
fuse_reply_err(req, -ret);
return;
}
if (mapped_name) {
name = mapped_name;
}
}
inode = lo_inode(req, ino);
if (!inode) {
fuse_reply_err(req, EBADF);
g_free(mapped_name);
return;
}
@ -2088,6 +2542,7 @@ out_err:
saverr = errno;
out:
fuse_reply_err(req, saverr);
g_free(mapped_name);
goto out_free;
}
@ -2144,8 +2599,60 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
if (ret == 0) {
goto out;
}
if (lo->xattr_map_list) {
/*
* Map the names back, some attributes might be dropped,
* some shortened, but not increased, so we shouldn't
* run out of room.
*/
size_t out_index, in_index;
out_index = 0;
in_index = 0;
while (in_index < ret) {
const char *map_out;
char *in_ptr = value + in_index;
/* Length of current attribute name */
size_t in_len = strlen(value + in_index) + 1;
int mapret = xattr_map_server(lo, in_ptr, &map_out);
if (mapret != -ENODATA && mapret != 0) {
/* Shouldn't happen */
saverr = -mapret;
goto out;
}
if (mapret == 0) {
/* Either unchanged, or truncated */
size_t out_len;
if (map_out != in_ptr) {
/* +1 copies the NIL */
out_len = strlen(map_out) + 1;
} else {
/* No change */
out_len = in_len;
}
/*
* Move result along, may still be needed for an unchanged
* entry if a previous entry was changed.
*/
memmove(value + out_index, map_out, out_len);
out_index += out_len;
}
in_index += in_len;
}
ret = out_index;
if (ret == 0) {
goto out;
}
}
fuse_reply_buf(req, value, ret);
} else {
/*
* xattrmap only ever shortens the result,
* so we don't need to do anything clever with the
* allocation length here.
*/
fuse_reply_xattr(req, ret);
}
out_free:
@ -2165,19 +2672,35 @@ out:
goto out_free;
}
static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
const char *value, size_t size, int flags)
{
char procname[64];
const char *name;
char *mapped_name;
struct lo_data *lo = lo_data(req);
struct lo_inode *inode;
ssize_t ret;
int saverr;
int fd = -1;
mapped_name = NULL;
name = in_name;
if (lo->xattrmap) {
ret = xattr_map_client(lo, in_name, &mapped_name);
if (ret < 0) {
fuse_reply_err(req, -ret);
return;
}
if (mapped_name) {
name = mapped_name;
}
}
inode = lo_inode(req, ino);
if (!inode) {
fuse_reply_err(req, EBADF);
g_free(mapped_name);
return;
}
@ -2212,21 +2735,38 @@ out:
}
lo_inode_put(lo, &inode);
g_free(mapped_name);
fuse_reply_err(req, saverr);
}
static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name)
{
char procname[64];
const char *name;
char *mapped_name;
struct lo_data *lo = lo_data(req);
struct lo_inode *inode;
ssize_t ret;
int saverr;
int fd = -1;
mapped_name = NULL;
name = in_name;
if (lo->xattrmap) {
ret = xattr_map_client(lo, in_name, &mapped_name);
if (ret < 0) {
fuse_reply_err(req, -ret);
return;
}
if (mapped_name) {
name = mapped_name;
}
}
inode = lo_inode(req, ino);
if (!inode) {
fuse_reply_err(req, EBADF);
g_free(mapped_name);
return;
}
@ -2261,6 +2801,7 @@ out:
}
lo_inode_put(lo, &inode);
g_free(mapped_name);
fuse_reply_err(req, saverr);
}
@ -2660,6 +3201,41 @@ static void setup_capabilities(char *modcaps_in)
pthread_mutex_unlock(&cap.mutex);
}
/*
* Use chroot as a weaker sandbox for environments where the process is
* launched without CAP_SYS_ADMIN.
*/
static void setup_chroot(struct lo_data *lo)
{
lo->proc_self_fd = open("/proc/self/fd", O_PATH);
if (lo->proc_self_fd == -1) {
fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n");
exit(1);
}
/*
* Make the shared directory the file system root so that FUSE_OPEN
* (lo_open()) cannot escape the shared directory by opening a symlink.
*
* The chroot(2) syscall is later disabled by seccomp and the
* CAP_SYS_CHROOT capability is dropped so that tampering with the chroot
* is not possible.
*
* However, it's still possible to escape the chroot via lo->proc_self_fd
* but that requires first gaining control of the process.
*/
if (chroot(lo->source) != 0) {
fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source);
exit(1);
}
/* Move into the chroot */
if (chdir("/") != 0) {
fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n");
exit(1);
}
}
/*
* Lock down this process to prevent access to other processes or files outside
* source directory. This reduces the impact of arbitrary code execution bugs.
@ -2667,8 +3243,13 @@ static void setup_capabilities(char *modcaps_in)
static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
bool enable_syslog)
{
setup_namespaces(lo, se);
setup_mounts(lo->source);
if (lo->sandbox == SANDBOX_NAMESPACE) {
setup_namespaces(lo, se);
setup_mounts(lo->source);
} else {
setup_chroot(lo);
}
setup_seccomp(enable_syslog);
setup_capabilities(g_strdup(lo->modcaps));
}
@ -2806,6 +3387,8 @@ static void fuse_lo_data_cleanup(struct lo_data *lo)
close(lo->root.fd);
}
free(lo->xattrmap);
free_xattrmap(lo);
free(lo->source);
}
@ -2815,6 +3398,7 @@ int main(int argc, char *argv[])
struct fuse_session *se;
struct fuse_cmdline_opts opts;
struct lo_data lo = {
.sandbox = SANDBOX_NAMESPACE,
.debug = 0,
.writeback = 0,
.posix_lock = 0,
@ -2878,12 +3462,11 @@ int main(int argc, char *argv[])
goto err_out1;
}
/*
* log_level is 0 if not configured via cmd options (0 is LOG_EMERG,
* and we don't use this log level).
*/
if (opts.log_level != 0) {
current_log_level = opts.log_level;
} else {
/* default log level is INFO */
current_log_level = FUSE_LOG_INFO;
}
lo.debug = opts.debug;
if (lo.debug) {
@ -2906,6 +3489,11 @@ int main(int argc, char *argv[])
} else {
lo.source = strdup("/");
}
if (lo.xattrmap) {
parse_xattrmap(&lo);
}
if (!lo.timeout_set) {
switch (lo.cache) {
case CACHE_NONE: