From 6f2231e9b0931e1998d9ed0c509adf7aedc02db2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 22 Aug 2018 19:02:47 +0200 Subject: [PATCH 1/4] seccomp: use SIGSYS signal instead of killing the thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The seccomp action SCMP_ACT_KILL results in immediate termination of the thread that made the bad system call. However, qemu being multi-threaded, it keeps running. There is no easy way for parent process / management layer (libvirt) to know about that situation. Instead, the default SIGSYS handler when invoked with SCMP_ACT_TRAP will terminate the program and core dump. This may not be the most secure solution, but probably better than just killing the offending thread. SCMP_ACT_KILL_PROCESS has been added in Linux 4.14 to improve the situation, which I propose to use by default if available in the next patch. Related to: https://bugzilla.redhat.com/show_bug.cgi?id=1594456 Signed-off-by: Marc-André Lureau Reviewed-by: Daniel P. Berrangé Acked-by: Eduardo Otubo --- qemu-seccomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 9cd8eb9499..b117a92559 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -125,7 +125,7 @@ static int seccomp_start(uint32_t seccomp_opts) continue; } - rc = seccomp_rule_add_array(ctx, SCMP_ACT_KILL, blacklist[i].num, + rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, blacklist[i].narg, blacklist[i].arg_cmp); if (rc < 0) { goto seccomp_return; From bda08a5764d470f101fa38635d30b41179a313e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 22 Aug 2018 19:02:48 +0200 Subject: [PATCH 2/4] seccomp: prefer SCMP_ACT_KILL_PROCESS if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upcoming libseccomp release should have SCMP_ACT_KILL_PROCESS action (https://github.com/seccomp/libseccomp/issues/96). SCMP_ACT_KILL_PROCESS is preferable to immediately terminate the offending process, rather than having the SIGSYS handler running. Use SECCOMP_GET_ACTION_AVAIL to check availability of kernel support, as libseccomp will fallback on SCMP_ACT_KILL otherwise, and we still prefer SCMP_ACT_TRAP. Signed-off-by: Marc-André Lureau Reviewed-by: Daniel P. Berrangé Acked-by: Eduardo Otubo --- qemu-seccomp.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/qemu-seccomp.c b/qemu-seccomp.c index b117a92559..f0c833f3ca 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -20,6 +20,7 @@ #include #include #include "sysemu/seccomp.h" +#include /* For some architectures (notably ARM) cacheflush is not supported until * libseccomp 2.2.3, but configure enforces that we are using a more recent @@ -107,12 +108,40 @@ static const struct QemuSeccompSyscall blacklist[] = { { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL }, }; +static inline __attribute__((unused)) int +qemu_seccomp(unsigned int operation, unsigned int flags, void *args) +{ +#ifdef __NR_seccomp + return syscall(__NR_seccomp, operation, flags, args); +#else + errno = ENOSYS; + return -1; +#endif +} + +static uint32_t qemu_seccomp_get_kill_action(void) +{ +#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ + defined(SECCOMP_RET_KILL_PROCESS) + { + uint32_t action = SECCOMP_RET_KILL_PROCESS; + + if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { + return SCMP_ACT_KILL_PROCESS; + } + } +#endif + + return SCMP_ACT_TRAP; +} + static int seccomp_start(uint32_t seccomp_opts) { int rc = 0; unsigned int i = 0; scmp_filter_ctx ctx; + uint32_t action = qemu_seccomp_get_kill_action(); ctx = seccomp_init(SCMP_ACT_ALLOW); if (ctx == NULL) { @@ -125,7 +154,7 @@ static int seccomp_start(uint32_t seccomp_opts) continue; } - rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, + rc = seccomp_rule_add_array(ctx, action, blacklist[i].num, blacklist[i].narg, blacklist[i].arg_cmp); if (rc < 0) { goto seccomp_return; From d0699bd37c48067cffbd80383172efc29da6d2f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 22 Aug 2018 19:02:49 +0200 Subject: [PATCH 3/4] configure: require libseccomp 2.2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following patch is going to require TSYNC, which is only available since libseccomp 2.2.0. libseccomp 2.2.0 was released February 12, 2015. According to repology, libseccomp version in different distros: RHEL-7: 2.3.1 Debian (Stretch): 2.3.1 OpenSUSE Leap 15: 2.3.2 Ubuntu (Xenial): 2.3.1 This will drop support for -sandbox on: Debian (Jessie): 2.1.1 (but 2.2.3 in backports) Signed-off-by: Marc-André Lureau Acked-by: Eduardo Otubo --- configure | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/configure b/configure index e7bddc04b0..5fc2915096 100755 --- a/configure +++ b/configure @@ -2228,13 +2228,10 @@ fi ########################################## # libseccomp check +libseccomp_minver="2.2.0" if test "$seccomp" != "no" ; then case "$cpu" in - i386|x86_64) - libseccomp_minver="2.1.0" - ;; - mips) - libseccomp_minver="2.2.0" + i386|x86_64|mips) ;; arm|aarch64) libseccomp_minver="2.2.3" From 70dfabeaa79ba4d7a3b699abe1a047c8012db114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 22 Aug 2018 19:02:50 +0200 Subject: [PATCH 4/4] seccomp: set the seccomp filter to all threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using "-seccomp on", the seccomp policy is only applied to the main thread, the vcpu worker thread and other worker threads created after seccomp policy is applied; the seccomp policy is not applied to e.g. the RCU thread because it is created before the seccomp policy is applied and SECCOMP_FILTER_FLAG_TSYNC isn't used. This can be verified with for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done Seccomp: 2 Seccomp: 0 Seccomp: 0 Seccomp: 2 Seccomp: 2 Seccomp: 2 Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy on all threads. libseccomp requirement was bumped to 2.2.0 in previous patch. libseccomp should fail to set the filter if it can't honour SCMP_FLTATR_CTL_TSYNC (untested), and thus -sandbox will now fail on kernel < 3.17. Signed-off-by: Marc-André Lureau Acked-by: Eduardo Otubo --- qemu-seccomp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qemu-seccomp.c b/qemu-seccomp.c index f0c833f3ca..4729eb107f 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -149,6 +149,11 @@ static int seccomp_start(uint32_t seccomp_opts) goto seccomp_return; } + rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); + if (rc != 0) { + goto seccomp_return; + } + for (i = 0; i < ARRAY_SIZE(blacklist); i++) { if (!(seccomp_opts & blacklist[i].set)) { continue;