qemu-e2k/tools/virtiofsd/passthrough_seccomp.c

173 lines
4.1 KiB
C
Raw Normal View History

/*
* Seccomp sandboxing for virtiofsd
*
* Copyright (C) 2019 Red Hat, Inc.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "passthrough_seccomp.h"
#include "fuse_i.h"
#include "fuse_log.h"
#include <errno.h>
#include <glib.h>
#include <seccomp.h>
#include <stdlib.h>
/* Bodge for libseccomp 2.4.2 which broke ppoll */
#if !defined(__SNR_ppoll) && defined(__SNR_brk)
#ifdef __NR_ppoll
#define __SNR_ppoll __NR_ppoll
#else
#define __SNR_ppoll __PNR_ppoll
#endif
#endif
static const int syscall_whitelist[] = {
/* TODO ireg sem*() syscalls */
SCMP_SYS(brk),
SCMP_SYS(capget), /* For CAP_FSETID */
SCMP_SYS(capset),
SCMP_SYS(clock_gettime),
SCMP_SYS(clone),
#ifdef __NR_clone3
SCMP_SYS(clone3),
#endif
SCMP_SYS(close),
SCMP_SYS(copy_file_range),
SCMP_SYS(dup),
SCMP_SYS(eventfd2),
SCMP_SYS(exit),
SCMP_SYS(exit_group),
SCMP_SYS(fallocate),
SCMP_SYS(fchdir),
virtiofsd: Whitelist fchmod lo_setattr() invokes fchmod() in a rarely used code path, so it should be whitelisted or virtiofsd will crash with EBADSYS. Said code path can be triggered for example as follows: On the host, in the shared directory, create a file with the sticky bit set and a security.capability xattr: (1) # touch foo (2) # chmod u+s foo (3) # setcap '' foo Then in the guest let some process truncate that file after it has dropped all of its capabilities (at least CAP_FSETID): int main(int argc, char *argv[]) { capng_setpid(getpid()); capng_clear(CAPNG_SELECT_BOTH); capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, 0); capng_apply(CAPNG_SELECT_BOTH); ftruncate(open(argv[1], O_RDWR), 0); } This will cause the guest kernel to drop the sticky bit (i.e. perform a mode change) as part of the truncate (where FATTR_FH is set), and that will cause virtiofsd to invoke fchmod() instead of fchmodat(). (A similar configuration exists further below with futimens() vs. utimensat(), but the former is not a syscall but just a wrapper for the latter, so no further whitelisting is required.) Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1842667 Reported-by: Qian Cai <caiqian@redhat.com> Cc: qemu-stable@nongnu.org Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20200608093111.14942-1-mreitz@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2020-06-08 11:31:11 +02:00
SCMP_SYS(fchmod),
SCMP_SYS(fchmodat),
SCMP_SYS(fchownat),
SCMP_SYS(fcntl),
SCMP_SYS(fdatasync),
SCMP_SYS(fgetxattr),
SCMP_SYS(flistxattr),
SCMP_SYS(flock),
SCMP_SYS(fremovexattr),
SCMP_SYS(fsetxattr),
SCMP_SYS(fstat),
SCMP_SYS(fstatfs),
SCMP_SYS(fsync),
SCMP_SYS(ftruncate),
SCMP_SYS(futex),
SCMP_SYS(getdents),
SCMP_SYS(getdents64),
SCMP_SYS(getegid),
SCMP_SYS(geteuid),
SCMP_SYS(getpid),
SCMP_SYS(gettid),
SCMP_SYS(gettimeofday),
SCMP_SYS(getxattr),
SCMP_SYS(linkat),
SCMP_SYS(listxattr),
SCMP_SYS(lseek),
SCMP_SYS(madvise),
SCMP_SYS(mkdirat),
SCMP_SYS(mknodat),
SCMP_SYS(mmap),
SCMP_SYS(mprotect),
SCMP_SYS(mremap),
SCMP_SYS(munmap),
SCMP_SYS(newfstatat),
SCMP_SYS(open),
SCMP_SYS(openat),
SCMP_SYS(ppoll),
SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
SCMP_SYS(preadv),
SCMP_SYS(pread64),
SCMP_SYS(pwritev),
SCMP_SYS(pwrite64),
SCMP_SYS(read),
SCMP_SYS(readlinkat),
SCMP_SYS(recvmsg),
SCMP_SYS(renameat),
SCMP_SYS(renameat2),
SCMP_SYS(removexattr),
SCMP_SYS(rt_sigaction),
SCMP_SYS(rt_sigprocmask),
SCMP_SYS(rt_sigreturn),
SCMP_SYS(sendmsg),
SCMP_SYS(setresgid),
SCMP_SYS(setresuid),
#ifdef __NR_setresgid32
SCMP_SYS(setresgid32),
#endif
#ifdef __NR_setresuid32
SCMP_SYS(setresuid32),
#endif
SCMP_SYS(set_robust_list),
SCMP_SYS(setxattr),
SCMP_SYS(symlinkat),
SCMP_SYS(time), /* Rarely needed, except on static builds */
SCMP_SYS(tgkill),
SCMP_SYS(unlinkat),
SCMP_SYS(unshare),
SCMP_SYS(utimensat),
SCMP_SYS(write),
SCMP_SYS(writev),
};
/* Syscalls used when --syslog is enabled */
static const int syscall_whitelist_syslog[] = {
SCMP_SYS(sendto),
};
static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) {
fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n",
syscalls[i]);
exit(1);
}
}
}
void setup_seccomp(bool enable_syslog)
{
scmp_filter_ctx ctx;
#ifdef SCMP_ACT_KILL_PROCESS
ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
/* Handle a newer libseccomp but an older kernel */
if (!ctx && errno == EOPNOTSUPP) {
ctx = seccomp_init(SCMP_ACT_TRAP);
}
#else
ctx = seccomp_init(SCMP_ACT_TRAP);
#endif
if (!ctx) {
fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
exit(1);
}
add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist));
if (enable_syslog) {
add_whitelist(ctx, syscall_whitelist_syslog,
G_N_ELEMENTS(syscall_whitelist_syslog));
}
/* libvhost-user calls this for post-copy migration, we don't need it */
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
SCMP_SYS(userfaultfd), 0) != 0) {
fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
exit(1);
}
if (seccomp_load(ctx) < 0) {
fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
exit(1);
}
seccomp_release(ctx);
}