Add seccomp syscall filtering to kore.

With this commit all Kore processes (minus the parent) are running
under seccomp.

The worker processes get the bare minimum allowed syscalls while each module
like curl, pgsql, etc will add their own filters to allow what they require.

New API functions:
    int kore_seccomp_filter(const char *name, void *filter, size_t len);

    Adds a filter into the seccomp system (must be called before
    seccomp is enabled).

New helpful macro:
    define KORE_SYSCALL_ALLOW(name)

    Allow the syscall with a given name, should be used in
    a sock_filter data structure.

New hooks:
    void kore_seccomp_hook(void);

    Called before seccomp is enabled, allows developers to add their
    own BPF filters into seccomp.
This commit is contained in:
Joris Vink 2019-09-25 12:25:49 +00:00
parent d99d0b2d77
commit cd9971247c
16 changed files with 483 additions and 9 deletions

1
.gitignore vendored
View File

@ -10,3 +10,4 @@ obj
kodev/kodev
kore.features
src/version.c
src/platform.h

View File

@ -12,6 +12,7 @@ SHARE_DIR=$(PREFIX)/share/kore
INCLUDE_DIR=$(PREFIX)/include/kore
VERSION=src/version.c
PLATFORM=src/platform.h
S_SRC= src/kore.c src/buf.c src/config.c src/connection.c \
src/domain.c src/filemap.c src/fileref.c src/mem.c src/msg.c \
@ -23,7 +24,7 @@ FEATURES_INC=
CFLAGS+=-Wall -Werror -Wstrict-prototypes -Wmissing-prototypes
CFLAGS+=-Wmissing-declarations -Wshadow -Wpointer-arith -Wcast-qual
CFLAGS+=-Wsign-compare -Iinclude/kore -std=c99 -pedantic
CFLAGS+=-Wsign-compare -Iinclude/kore --std=c99 -pedantic
CFLAGS+=-DPREFIX='"$(PREFIX)"' -fstack-protector-all
ifneq ("$(OPENSSL_PATH)", "")
@ -133,7 +134,7 @@ ifeq ("$(OSNAME)", "darwin")
else ifeq ("$(OSNAME)", "linux")
CFLAGS+=-D_GNU_SOURCE=1 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
LDFLAGS+=-ldl
S_SRC+=src/linux.c
S_SRC+=src/linux.c src/seccomp.c
else
S_SRC+=src/bsd.c
ifneq ("$(JSONRPC)", "")
@ -144,7 +145,12 @@ endif
S_OBJS= $(S_SRC:src/%.c=$(OBJDIR)/%.o)
all: $(VERSION) $(KORE) $(KODEV)
all: $(PLATFORM) $(VERSION) $(KORE) $(KODEV)
$(PLATFORM): force
@if [ -f misc/$(OSNAME)-platform.sh ]; then \
misc/$(OSNAME)-platform.sh > $(PLATFORM) ; \
fi
$(VERSION): force
@if [ -d .git ]; then \

View File

@ -6,7 +6,7 @@
# set kore_source together with kore_flavor.
single_binary=yes
kore_source=../../
kore_flavor=PYTHON=1 CURL=1 NOTLS=1
kore_flavor=PYTHON=1 CURL=1 NOTLS=1 DEBUG=1
# The flags below are shared between flavors
cflags=-Wall -Wmissing-declarations -Wshadow

View File

@ -26,6 +26,7 @@ async def httpclient(req):
client = kore.httpclient("https://kore.io")
# Do a simple GET request.
print("firing off request")
status, body = await client.get()
print("status: %d, body: '%s'" % (status, body))

View File

@ -595,6 +595,7 @@ void kore_worker_privdrop(const char *, const char *);
struct kore_worker *kore_worker_data(u_int8_t);
void kore_platform_init(void);
void kore_platform_sandbox(void);
void kore_platform_event_init(void);
void kore_platform_event_cleanup(void);
void kore_platform_proctitle(char *);
@ -852,6 +853,7 @@ void kore_buf_replace_string(struct kore_buf *,
void kore_keymgr_run(void);
void kore_keymgr_cleanup(int);
void kore_seccomp_hook(void);
void kore_worker_teardown(void);
void kore_parent_teardown(void);
void kore_worker_configure(void);

47
include/kore/seccomp.h Normal file
View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2019 Joris Vink <joris@coders.se>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef __H_SECCOMP_H
#define __H_SECCOMP_H
#include <sys/syscall.h>
#include <linux/audit.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
/*
* Allow a system call by comparing the accumulator value (which will contain
* the system call value) with the value of SYS_##name.
*
* If the value is equal the true branch (first) is taken, otherwise the
* false branch (second) is taken.
*
* When the program is constructed the true branch jump destination is
* resolved automatically.
*/
#define KORE_SYSCALL_ALLOW(_name) \
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_##_name, 0, 0)
/* The length of a filter. */
#define KORE_FILTER_LEN(x) (sizeof(x) / sizeof(x[0]))
void kore_seccomp_init(void);
void kore_seccomp_drop(void);
void kore_seccomp_enable(void);
int kore_seccomp_filter(const char *, void *, size_t);
#endif

23
misc/linux-platform.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/sh
PLATFORM=$(uname -m)
case "$PLATFORM" in
x86_64*)
seccomp_audit_arch=AUDIT_ARCH_X86_64
;;
i*86*)
seccomp_audit_arch=AUDIT_ARCH_I386
;;
arm*)
seccomp_audit_arch=AUDIT_ARCH_ARM
;;
aarch64*)
seccomp_audit_arch=AUDIT_ARCH_AARCH64
;;
esac
cat << __EOF
/* Auto generated by linux-platform.sh - DO NOT EDIT */
#define SECCOMP_AUDIT_ARCH $seccomp_audit_arch
__EOF

View File

@ -272,6 +272,14 @@ kore_platform_sendfile(struct connection *c, struct netbuf *nb)
}
#endif
void
kore_platform_sandbox(void)
{
#if defined(KORE_USE_PLATFORM_PLEDGE)
kore_platform_pledge();
#endif
}
#if defined(KORE_USE_PLATFORM_PLEDGE)
void
kore_platform_pledge(void)

View File

@ -22,6 +22,28 @@
#include "http.h"
#include "curl.h"
#if defined(__linux__)
#include "seccomp.h"
static struct sock_filter filter_curl[] = {
/* Allow sockets and libcurl to call connect. */
KORE_SYSCALL_ALLOW(bind),
KORE_SYSCALL_ALLOW(socket),
KORE_SYSCALL_ALLOW(connect),
/* Threading related. */
KORE_SYSCALL_ALLOW(clone),
KORE_SYSCALL_ALLOW(set_robust_list),
/* Other */
KORE_SYSCALL_ALLOW(ioctl),
KORE_SYSCALL_ALLOW(madvise),
KORE_SYSCALL_ALLOW(recvmsg),
KORE_SYSCALL_ALLOW(sendmmsg),
KORE_SYSCALL_ALLOW(getpeername),
};
#endif
#define FD_CACHE_BUCKETS 2048
struct fd_cache {
@ -82,6 +104,10 @@ kore_curl_sysinit(void)
len = snprintf(user_agent, sizeof(user_agent), "kore/%s", kore_version);
if (len == -1 || (size_t)len >= sizeof(user_agent))
fatal("user-agent string too long");
#if defined(__linux__)
kore_seccomp_filter("curl", filter_curl, KORE_FILTER_LEN(filter_curl));
#endif
}
int

View File

@ -52,6 +52,27 @@
#define RAND_POLL_INTERVAL (1800 * 1000)
#define RAND_FILE_SIZE 1024
#if defined(__linux__)
#include "seccomp.h"
/* The syscalls our keymgr is allowed to perform, only. */
static struct sock_filter filter_keymgr[] = {
/* Required to deal with private keys and certs. */
KORE_SYSCALL_ALLOW(open),
KORE_SYSCALL_ALLOW(read),
KORE_SYSCALL_ALLOW(close),
/* Allow it to read/write messages. */
KORE_SYSCALL_ALLOW(write),
KORE_SYSCALL_ALLOW(read),
/* Process things. */
KORE_SYSCALL_ALLOW(exit),
KORE_SYSCALL_ALLOW(sigaltstack),
KORE_SYSCALL_ALLOW(rt_sigaction),
};
#endif
struct key {
EVP_PKEY *pkey;
struct kore_domain *dom;
@ -102,6 +123,13 @@ kore_keymgr_run(void)
kore_msg_register(KORE_MSG_ENTROPY_REQ, keymgr_entropy_request);
kore_msg_register(KORE_MSG_CERTIFICATE_REQ, keymgr_certificate_request);
#if defined(__linux__)
/* Drop all enabled seccomp filters, and add only ours. */
kore_seccomp_drop();
kore_seccomp_filter("keymgr", filter_keymgr,
KORE_FILTER_LEN(filter_keymgr));
#endif
kore_worker_privdrop(keymgr_runas_user, keymgr_root_path);
if (rand_file != NULL) {

View File

@ -226,6 +226,7 @@ main(int argc, char *argv[])
nlisteners = 0;
LIST_INIT(&listeners);
kore_platform_init();
kore_log_init();
#if !defined(KORE_NO_HTTP)
http_parent_init();
@ -268,8 +269,6 @@ main(int argc, char *argv[])
}
#endif
kore_platform_init();
#if !defined(KORE_NO_HTTP)
if (http_body_disk_offload > 0) {
if (mkdir(http_body_disk_path, 0700) == -1 && errno != EEXIST) {

View File

@ -16,12 +16,13 @@
#include <sys/param.h>
#include <sys/epoll.h>
#include <sys/prctl.h>
#include <sys/sendfile.h>
#include <sys/syscall.h>
#include <sched.h>
#include "kore.h"
#include "seccomp.h"
#if defined(KORE_USE_PGSQL)
#include "pgsql.h"
@ -40,6 +41,8 @@ kore_platform_init(void)
{
long n;
kore_seccomp_init();
if ((n = sysconf(_SC_NPROCESSORS_ONLN)) == -1) {
kore_debug("could not get number of cpu's falling back to 1");
cpu_count = 1;
@ -248,3 +251,9 @@ resend:
return (KORE_RESULT_OK);
}
#endif
void
kore_platform_sandbox(void)
{
kore_seccomp_enable();
}

View File

@ -28,6 +28,19 @@
#include "pgsql.h"
#if defined(__linux__)
#include "seccomp.h"
static struct sock_filter filter_pgsql[] = {
KORE_SYSCALL_ALLOW(socket),
KORE_SYSCALL_ALLOW(connect),
KORE_SYSCALL_ALLOW(sendto),
KORE_SYSCALL_ALLOW(recvfrom),
KORE_SYSCALL_ALLOW(getsockopt),
KORE_SYSCALL_ALLOW(getsockname),
};
#endif
struct pgsql_wait {
struct kore_pgsql *pgsql;
TAILQ_ENTRY(pgsql_wait) list;
@ -79,6 +92,11 @@ kore_pgsql_sys_init(void)
sizeof(struct pgsql_job), 100);
kore_pool_init(&pgsql_wait_pool, "pgsql_wait_pool",
sizeof(struct pgsql_wait), pgsql_queue_limit);
#if defined(__linux__)
kore_seccomp_filter("pgsql", filter_pgsql,
KORE_FILTER_LEN(filter_pgsql));
#endif
}
void

View File

@ -189,6 +189,25 @@ static PyMemAllocatorEx allocator = {
.free = python_free
};
#if defined(__linux__)
#include "seccomp.h"
static struct sock_filter filter_python[] = {
/* Required for kore.proc */
KORE_SYSCALL_ALLOW(dup2),
KORE_SYSCALL_ALLOW(pipe),
KORE_SYSCALL_ALLOW(wait4),
KORE_SYSCALL_ALLOW(execve),
/* Socket related. */
KORE_SYSCALL_ALLOW(sendto),
KORE_SYSCALL_ALLOW(recvfrom),
KORE_SYSCALL_ALLOW(getsockopt),
KORE_SYSCALL_ALLOW(setsockopt),
KORE_SYSCALL_ALLOW(getsockname),
};
#endif
static TAILQ_HEAD(, pyproc) procs;
static struct reqcall_list prereq;
@ -258,6 +277,11 @@ kore_python_init(void)
}
Py_Initialize();
#if defined(__linux__)
kore_seccomp_filter("python", filter_python,
KORE_FILTER_LEN(filter_python));
#endif
}
void

271
src/seccomp.c Normal file
View File

@ -0,0 +1,271 @@
/*
* Copyright (c) 2019 Joris Vink <joris@coders.se>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/param.h>
#include <sys/epoll.h>
#include <sys/prctl.h>
#include <sys/syscall.h>
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <stddef.h>
#include <sched.h>
#include "kore.h"
#include "seccomp.h"
#include "platform.h"
#if defined(KORE_DEBUG)
#define SECCOMP_KILL_POLICY SECCOMP_RET_TRAP
#else
#define SECCOMP_KILL_POLICY SECCOMP_RET_KILL
#endif
/* The bare minimum to be able to run kore. */
static struct sock_filter filter_kore[] = {
/* File related. */
KORE_SYSCALL_ALLOW(open),
KORE_SYSCALL_ALLOW(read),
KORE_SYSCALL_ALLOW(stat),
KORE_SYSCALL_ALLOW(fstat),
KORE_SYSCALL_ALLOW(write),
KORE_SYSCALL_ALLOW(fcntl),
KORE_SYSCALL_ALLOW(lseek),
KORE_SYSCALL_ALLOW(close),
KORE_SYSCALL_ALLOW(access),
KORE_SYSCALL_ALLOW(getcwd),
KORE_SYSCALL_ALLOW(openat),
KORE_SYSCALL_ALLOW(unlink),
/* Process related. */
KORE_SYSCALL_ALLOW(exit),
KORE_SYSCALL_ALLOW(kill),
KORE_SYSCALL_ALLOW(getpid),
KORE_SYSCALL_ALLOW(getuid),
KORE_SYSCALL_ALLOW(geteuid),
KORE_SYSCALL_ALLOW(exit_group),
/* Memory related. */
KORE_SYSCALL_ALLOW(brk),
KORE_SYSCALL_ALLOW(mmap),
KORE_SYSCALL_ALLOW(munmap),
KORE_SYSCALL_ALLOW(mprotect),
/* Net related. */
KORE_SYSCALL_ALLOW(poll),
KORE_SYSCALL_ALLOW(accept),
KORE_SYSCALL_ALLOW(sendfile),
KORE_SYSCALL_ALLOW(epoll_ctl),
KORE_SYSCALL_ALLOW(setsockopt),
KORE_SYSCALL_ALLOW(epoll_wait),
/* "Other" without clear category. */
KORE_SYSCALL_ALLOW(futex),
KORE_SYSCALL_ALLOW(getrandom),
KORE_SYSCALL_ALLOW(sigaltstack),
KORE_SYSCALL_ALLOW(rt_sigreturn),
KORE_SYSCALL_ALLOW(rt_sigaction),
KORE_SYSCALL_ALLOW(clock_gettime),
};
/* bpf program prologue. */
static struct sock_filter filter_prologue[] = {
/* Load arch member into accumulator (A) (arch is __u32). */
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, arch)),
/* Compare accumulator against constant, if false jump over kill. */
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
/* Load system call member into accumulator (nr is int). */
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)),
};
/* bpf program epilogue. */
#define FILTER_EPILOGUE_ALLOW_OFFSET 1
static struct sock_filter filter_epilogue[] = {
/* Return hit if no system calls matched our list. */
BPF_STMT(BPF_RET+BPF_K, SECCOMP_KILL_POLICY),
/* Final destination for syscalls that are accepted. */
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
};
#define filter_prologue_len KORE_FILTER_LEN(filter_prologue)
#define filter_epilogue_len KORE_FILTER_LEN(filter_epilogue)
#if defined(KORE_DEBUG)
static void seccomp_trap(int sig, siginfo_t *, void *);
#endif
struct filter {
char *name;
struct sock_filter *prog;
size_t instructions;
TAILQ_ENTRY(filter) list;
};
static TAILQ_HEAD(, filter) filters;
void
kore_seccomp_init(void)
{
TAILQ_INIT(&filters);
}
void
kore_seccomp_drop(void)
{
struct filter *filter;
while ((filter = TAILQ_FIRST(&filters)) != NULL) {
kore_log(LOG_INFO, "seccomp filter '%s' dropped", filter->name);
TAILQ_REMOVE(&filters, filter, list);
kore_free(filter->name);
kore_free(filter);
}
TAILQ_INIT(&filters);
}
void
kore_seccomp_enable(void)
{
#if defined(KORE_DEBUG)
struct sigaction sa;
#endif
struct sock_filter *sf;
struct sock_fprog prog;
struct kore_runtime_call *rcall;
struct filter *filter;
size_t prog_len, pos, jmp_off, i;
#if defined(KORE_DEBUG)
memset(&sa, 0, sizeof(sa));
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = seccomp_trap;
if (sigfillset(&sa.sa_mask) == -1)
fatal("sigfillset: %s", errno_s);
if (sigaction(SIGSYS, &sa, NULL) == -1)
fatal("sigaction: %s", errno_s);
#endif
/* Allow application to add its own filters. */
if ((rcall = kore_runtime_getcall("kore_seccomp_hook")) != NULL) {
kore_runtime_execute(rcall);
kore_free(rcall);
}
/* Add worker required syscalls. */
kore_seccomp_filter("worker", filter_kore,
KORE_FILTER_LEN(filter_kore));
/*
* Construct the entire BPF program by adding all relevant parts
* together. While doing so remember where the jmp_off is going to be
* so we can resolve the true branch for all comparisons.
*/
/* Start with the prologue. */
prog_len = filter_prologue_len;
jmp_off = prog_len;
/* Now account for all enabled filters. */
TAILQ_FOREACH(filter, &filters, list) {
prog_len += filter->instructions;
jmp_off += filter->instructions;
}
/* Finally add the epilogue. */
prog_len += filter_epilogue_len;
/* Finalize the jump position. */
jmp_off += FILTER_EPILOGUE_ALLOW_OFFSET;
/* Initial filter position is immediately after prologue. */
pos = filter_prologue_len + 1;
/* Iterate over all filters and fixup the true branch. */
TAILQ_FOREACH(filter, &filters, list) {
for (i = 0; i < filter->instructions; i++) {
filter->prog[i].jt = (u_int8_t)jmp_off - pos;
pos++;
}
}
/* Build the entire bpf program now. */
if ((sf = calloc(prog_len, sizeof(*sf))) == NULL)
fatal("calloc");
jmp_off = 0;
for (i = 0; i < filter_prologue_len; i++)
sf[jmp_off++] = filter_prologue[i];
TAILQ_FOREACH(filter, &filters, list) {
for (i = 0; i < filter->instructions; i++)
sf[jmp_off++] = filter->prog[i];
kore_log(LOG_INFO, "seccomp filter '%s' added", filter->name);
}
for (i = 0; i < filter_epilogue_len; i++)
sf[jmp_off++] = filter_epilogue[i];
/* Lock and load it. */
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1)
fatal("prctl: %s", errno_s);
prog.filter = sf;
prog.len = prog_len;
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == -1)
fatal("prctl: %s", errno_s);
kore_log(LOG_INFO, "seccomp sandbox activated");
}
int
kore_seccomp_filter(const char *name, void *prog, size_t len)
{
struct filter *filter;
TAILQ_FOREACH(filter, &filters, list) {
if (!strcmp(filter->name, name))
return (KORE_RESULT_ERROR);
}
filter = kore_calloc(1, sizeof(*filter));
filter->prog = prog;
filter->instructions = len;
filter->name = kore_strdup(name);
TAILQ_INSERT_TAIL(&filters, filter, list);
return (KORE_RESULT_OK);
}
#if defined(KORE_DEBUG)
static void
seccomp_trap(int sig, siginfo_t *info, void *ucontext)
{
kore_log(LOG_INFO, "sandbox violation - syscall=%d", info->si_syscall);
}
#endif

View File

@ -310,10 +310,14 @@ kore_worker_privdrop(const char *runas, const char *root)
fatalx("cannot drop privileges");
}
#if defined(KORE_USE_PLATFORM_PLEDGE)
kore_platform_pledge();
#if defined(__linux__) && !defined(KORE_NO_TLS)
/* keymgr gets its own privileges. */
if (worker->id == KORE_WORKER_KEYMGR)
return;
#endif
kore_platform_sandbox();
}
void
@ -569,6 +573,13 @@ kore_worker_reap(void)
"worker %d (pid: %d) (hdlr: %s) gone",
kw->id, kw->pid, func);
#if defined(__linux__)
if (WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS) {
kore_log(LOG_NOTICE,
"worker %d died from sandbox violation", kw->id);
}
#endif
#if !defined(KORE_NO_TLS)
if (id == KORE_WORKER_KEYMGR) {
kore_log(LOG_CRIT, "keymgr gone, stopping");