mirror of https://git.kore.io/kore.git
272 lines
6.9 KiB
C
272 lines
6.9 KiB
C
/*
|
|
* Copyright (c) 2019 Joris Vink <joris@coders.se>
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/epoll.h>
|
|
#include <sys/prctl.h>
|
|
#include <sys/syscall.h>
|
|
|
|
#include <linux/seccomp.h>
|
|
#include <linux/filter.h>
|
|
#include <linux/audit.h>
|
|
|
|
#include <stddef.h>
|
|
#include <sched.h>
|
|
|
|
#include "kore.h"
|
|
#include "seccomp.h"
|
|
#include "platform.h"
|
|
|
|
#if defined(KORE_DEBUG)
|
|
#define SECCOMP_KILL_POLICY SECCOMP_RET_TRAP
|
|
#else
|
|
#define SECCOMP_KILL_POLICY SECCOMP_RET_KILL
|
|
#endif
|
|
|
|
/* The bare minimum to be able to run kore. */
|
|
static struct sock_filter filter_kore[] = {
|
|
/* File related. */
|
|
KORE_SYSCALL_ALLOW(open),
|
|
KORE_SYSCALL_ALLOW(read),
|
|
KORE_SYSCALL_ALLOW(stat),
|
|
KORE_SYSCALL_ALLOW(fstat),
|
|
KORE_SYSCALL_ALLOW(write),
|
|
KORE_SYSCALL_ALLOW(fcntl),
|
|
KORE_SYSCALL_ALLOW(lseek),
|
|
KORE_SYSCALL_ALLOW(close),
|
|
KORE_SYSCALL_ALLOW(access),
|
|
KORE_SYSCALL_ALLOW(getcwd),
|
|
KORE_SYSCALL_ALLOW(openat),
|
|
KORE_SYSCALL_ALLOW(unlink),
|
|
|
|
/* Process related. */
|
|
KORE_SYSCALL_ALLOW(exit),
|
|
KORE_SYSCALL_ALLOW(kill),
|
|
KORE_SYSCALL_ALLOW(getpid),
|
|
KORE_SYSCALL_ALLOW(getuid),
|
|
KORE_SYSCALL_ALLOW(geteuid),
|
|
KORE_SYSCALL_ALLOW(exit_group),
|
|
|
|
/* Memory related. */
|
|
KORE_SYSCALL_ALLOW(brk),
|
|
KORE_SYSCALL_ALLOW(mmap),
|
|
KORE_SYSCALL_ALLOW(munmap),
|
|
KORE_SYSCALL_ALLOW(mprotect),
|
|
|
|
/* Net related. */
|
|
KORE_SYSCALL_ALLOW(poll),
|
|
KORE_SYSCALL_ALLOW(accept),
|
|
KORE_SYSCALL_ALLOW(sendfile),
|
|
KORE_SYSCALL_ALLOW(epoll_ctl),
|
|
KORE_SYSCALL_ALLOW(setsockopt),
|
|
KORE_SYSCALL_ALLOW(epoll_wait),
|
|
|
|
/* "Other" without clear category. */
|
|
KORE_SYSCALL_ALLOW(futex),
|
|
KORE_SYSCALL_ALLOW(getrandom),
|
|
KORE_SYSCALL_ALLOW(sigaltstack),
|
|
KORE_SYSCALL_ALLOW(rt_sigreturn),
|
|
KORE_SYSCALL_ALLOW(rt_sigaction),
|
|
KORE_SYSCALL_ALLOW(clock_gettime),
|
|
};
|
|
|
|
/* bpf program prologue. */
|
|
static struct sock_filter filter_prologue[] = {
|
|
/* Load arch member into accumulator (A) (arch is __u32). */
|
|
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, arch)),
|
|
|
|
/* Compare accumulator against constant, if false jump over kill. */
|
|
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
|
|
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
|
|
|
|
/* Load system call member into accumulator (nr is int). */
|
|
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)),
|
|
};
|
|
|
|
/* bpf program epilogue. */
|
|
#define FILTER_EPILOGUE_ALLOW_OFFSET 1
|
|
|
|
static struct sock_filter filter_epilogue[] = {
|
|
/* Return hit if no system calls matched our list. */
|
|
BPF_STMT(BPF_RET+BPF_K, SECCOMP_KILL_POLICY),
|
|
|
|
/* Final destination for syscalls that are accepted. */
|
|
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
|
|
};
|
|
|
|
#define filter_prologue_len KORE_FILTER_LEN(filter_prologue)
|
|
#define filter_epilogue_len KORE_FILTER_LEN(filter_epilogue)
|
|
|
|
#if defined(KORE_DEBUG)
|
|
static void seccomp_trap(int sig, siginfo_t *, void *);
|
|
#endif
|
|
|
|
struct filter {
|
|
char *name;
|
|
struct sock_filter *prog;
|
|
size_t instructions;
|
|
TAILQ_ENTRY(filter) list;
|
|
};
|
|
|
|
static TAILQ_HEAD(, filter) filters;
|
|
|
|
void
|
|
kore_seccomp_init(void)
|
|
{
|
|
TAILQ_INIT(&filters);
|
|
}
|
|
|
|
void
|
|
kore_seccomp_drop(void)
|
|
{
|
|
struct filter *filter;
|
|
|
|
while ((filter = TAILQ_FIRST(&filters)) != NULL) {
|
|
kore_log(LOG_INFO, "seccomp filter '%s' dropped", filter->name);
|
|
TAILQ_REMOVE(&filters, filter, list);
|
|
kore_free(filter->name);
|
|
kore_free(filter);
|
|
}
|
|
|
|
TAILQ_INIT(&filters);
|
|
}
|
|
|
|
void
|
|
kore_seccomp_enable(void)
|
|
{
|
|
#if defined(KORE_DEBUG)
|
|
struct sigaction sa;
|
|
#endif
|
|
struct sock_filter *sf;
|
|
struct sock_fprog prog;
|
|
struct kore_runtime_call *rcall;
|
|
struct filter *filter;
|
|
size_t prog_len, pos, jmp_off, i;
|
|
|
|
#if defined(KORE_DEBUG)
|
|
memset(&sa, 0, sizeof(sa));
|
|
|
|
sa.sa_flags = SA_SIGINFO;
|
|
sa.sa_sigaction = seccomp_trap;
|
|
|
|
if (sigfillset(&sa.sa_mask) == -1)
|
|
fatal("sigfillset: %s", errno_s);
|
|
if (sigaction(SIGSYS, &sa, NULL) == -1)
|
|
fatal("sigaction: %s", errno_s);
|
|
#endif
|
|
|
|
/* Allow application to add its own filters. */
|
|
if ((rcall = kore_runtime_getcall("kore_seccomp_hook")) != NULL) {
|
|
kore_runtime_execute(rcall);
|
|
kore_free(rcall);
|
|
}
|
|
|
|
/* Add worker required syscalls. */
|
|
kore_seccomp_filter("worker", filter_kore,
|
|
KORE_FILTER_LEN(filter_kore));
|
|
|
|
/*
|
|
* Construct the entire BPF program by adding all relevant parts
|
|
* together. While doing so remember where the jmp_off is going to be
|
|
* so we can resolve the true branch for all comparisons.
|
|
*/
|
|
|
|
/* Start with the prologue. */
|
|
prog_len = filter_prologue_len;
|
|
jmp_off = prog_len;
|
|
|
|
/* Now account for all enabled filters. */
|
|
TAILQ_FOREACH(filter, &filters, list) {
|
|
prog_len += filter->instructions;
|
|
jmp_off += filter->instructions;
|
|
}
|
|
|
|
/* Finally add the epilogue. */
|
|
prog_len += filter_epilogue_len;
|
|
|
|
/* Finalize the jump position. */
|
|
jmp_off += FILTER_EPILOGUE_ALLOW_OFFSET;
|
|
|
|
/* Initial filter position is immediately after prologue. */
|
|
pos = filter_prologue_len + 1;
|
|
|
|
/* Iterate over all filters and fixup the true branch. */
|
|
TAILQ_FOREACH(filter, &filters, list) {
|
|
for (i = 0; i < filter->instructions; i++) {
|
|
filter->prog[i].jt = (u_int8_t)jmp_off - pos;
|
|
pos++;
|
|
}
|
|
}
|
|
|
|
/* Build the entire bpf program now. */
|
|
if ((sf = calloc(prog_len, sizeof(*sf))) == NULL)
|
|
fatal("calloc");
|
|
|
|
jmp_off = 0;
|
|
for (i = 0; i < filter_prologue_len; i++)
|
|
sf[jmp_off++] = filter_prologue[i];
|
|
|
|
TAILQ_FOREACH(filter, &filters, list) {
|
|
for (i = 0; i < filter->instructions; i++)
|
|
sf[jmp_off++] = filter->prog[i];
|
|
kore_log(LOG_INFO, "seccomp filter '%s' added", filter->name);
|
|
}
|
|
|
|
for (i = 0; i < filter_epilogue_len; i++)
|
|
sf[jmp_off++] = filter_epilogue[i];
|
|
|
|
/* Lock and load it. */
|
|
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1)
|
|
fatal("prctl: %s", errno_s);
|
|
|
|
prog.filter = sf;
|
|
prog.len = prog_len;
|
|
|
|
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == -1)
|
|
fatal("prctl: %s", errno_s);
|
|
|
|
kore_log(LOG_INFO, "seccomp sandbox activated");
|
|
}
|
|
|
|
int
|
|
kore_seccomp_filter(const char *name, void *prog, size_t len)
|
|
{
|
|
struct filter *filter;
|
|
|
|
TAILQ_FOREACH(filter, &filters, list) {
|
|
if (!strcmp(filter->name, name))
|
|
return (KORE_RESULT_ERROR);
|
|
}
|
|
|
|
filter = kore_calloc(1, sizeof(*filter));
|
|
|
|
filter->prog = prog;
|
|
filter->instructions = len;
|
|
filter->name = kore_strdup(name);
|
|
|
|
TAILQ_INSERT_TAIL(&filters, filter, list);
|
|
|
|
return (KORE_RESULT_OK);
|
|
}
|
|
|
|
#if defined(KORE_DEBUG)
|
|
static void
|
|
seccomp_trap(int sig, siginfo_t *info, void *ucontext)
|
|
{
|
|
kore_log(LOG_INFO, "sandbox violation - syscall=%d", info->si_syscall);
|
|
}
|
|
#endif
|