diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index 215138a372..4a9e63c7cd 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -17,6 +17,7 @@ #define QEMU_SECCOMP_SET_DEFAULT (1 << 0) #define QEMU_SECCOMP_SET_OBSOLETE (1 << 1) +#define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2) #include diff --git a/qemu-options.hx b/qemu-options.hx index 72150c6b84..5c1b163fb5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4017,20 +4017,26 @@ Old param mode (ARM only). ETEXI DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ - "-sandbox on[,obsolete=allow|deny]\n" \ + "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \ " Enable seccomp mode 2 system call filter (default 'off').\n" \ " use 'obsolete' to allow obsolete system calls that are provided\n" \ " by the kernel, but typically no longer used by modern\n" \ - " C library implementations.\n", + " C library implementations.\n" \ + " use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \ + " its privileges by blacklisting all set*uid|gid system calls.\n" \ + " The value 'children' will deny set*uid|gid system calls for\n" \ + " main QEMU process but will allow forks and execves to run unprivileged\n", QEMU_ARCH_ALL) STEXI -@item -sandbox @var{arg}[,obsolete=@var{string}] +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}] @findex -sandbox Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will disable it. The default is 'off'. @table @option @item obsolete=@var{string} Enable Obsolete system calls +@item elevateprivileges=@var{string} +Disable set*uid|gid system calls @end table ETEXI diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 8a5fbd2ff1..978d66bd28 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -67,6 +67,17 @@ static const struct QemuSeccompSyscall blacklist[] = { { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE }, { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE }, { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE }, + /* privileged */ + { SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED }, }; diff --git a/vl.c b/vl.c index 57c5e93c1a..d59b560276 100644 --- a/vl.c +++ b/vl.c @@ -29,6 +29,7 @@ #ifdef CONFIG_SECCOMP #include "sysemu/seccomp.h" +#include "sys/prctl.h" #endif #if defined(CONFIG_VDE) @@ -275,6 +276,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "obsolete", .type = QEMU_OPT_STRING, }, + { + .name = "elevateprivileges", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1056,6 +1061,28 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) } } + value = qemu_opt_get(opts, "elevateprivileges"); + if (value) { + if (g_str_equal(value, "deny")) { + seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; + } else if (g_str_equal(value, "children")) { + seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; + + /* calling prctl directly because we're + * not sure if host has CAP_SYS_ADMIN set*/ + if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { + error_report("failed to set no_new_privs " + "aborting"); + return -1; + } + } else if (g_str_equal(value, "allow")) { + /* default value */ + } else { + error_report("invalid argument for elevateprivileges"); + return -1; + } + } + if (seccomp_start(seccomp_opts) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel");