From 9c3997601d51069ec08d7d06cf31a17884056cc2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 28 Oct 2014 15:11:41 -0700 Subject: [PATCH 1/3] bpf: reduce verifier memory consumption verifier keeps track of register state spilled to stack. registers are 8-byte wide and always aligned, so instead of tracking them in every byte-sized stack slot, use MAX_BPF_STACK / 8 array to track spilled register state. Though verifier runs in user context and its state freed immediately after verification, it makes sense to reduce its memory usage. This optimization reduces sizeof(struct verifier_state) from 12464 to 1712 on 64-bit and from 6232 to 1112 on 32-bit. Note, this patch doesn't change existing limits, which are there to bound time and memory during verification: 4k total number of insns in a program, 1k number of jumps (states to visit) and 32k number of processed insn (since an insn may be visited multiple times). Theoretical worst case memory during verification is 1712 * 1k = 17Mbyte. Out-of-memory situation triggers cleanup and rejects the program. Suggested-by: Andy Lutomirski Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 101 ++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9f81818f2941..b6a1f7c14a67 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -153,22 +153,19 @@ struct reg_state { enum bpf_stack_slot_type { STACK_INVALID, /* nothing was stored in this stack slot */ - STACK_SPILL, /* 1st byte of register spilled into stack */ - STACK_SPILL_PART, /* other 7 bytes of register spill */ + STACK_SPILL, /* register spilled into stack */ STACK_MISC /* BPF program wrote some data into this slot */ }; -struct bpf_stack_slot { - enum bpf_stack_slot_type stype; - struct reg_state reg_st; -}; +#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ /* state of the program: * type of all registers and stack info */ struct verifier_state { struct reg_state regs[MAX_BPF_REG]; - struct bpf_stack_slot stack[MAX_BPF_STACK]; + u8 stack_slot_type[MAX_BPF_STACK]; + struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; }; /* linked list of verifier states used to prune search */ @@ -259,10 +256,10 @@ static void print_verifier_state(struct verifier_env *env) env->cur_state.regs[i].map_ptr->key_size, env->cur_state.regs[i].map_ptr->value_size); } - for (i = 0; i < MAX_BPF_STACK; i++) { - if (env->cur_state.stack[i].stype == STACK_SPILL) + for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { + if (env->cur_state.stack_slot_type[i] == STACK_SPILL) verbose(" fp%d=%s", -MAX_BPF_STACK + i, - reg_type_str[env->cur_state.stack[i].reg_st.type]); + reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]); } verbose("\n"); } @@ -539,8 +536,10 @@ static int bpf_size_to_bytes(int bpf_size) static int check_stack_write(struct verifier_state *state, int off, int size, int value_regno) { - struct bpf_stack_slot *slot; int i; + /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, + * so it's aligned access and [off, off + size) are within stack limits + */ if (value_regno >= 0 && (state->regs[value_regno].type == PTR_TO_MAP_VALUE || @@ -548,30 +547,24 @@ static int check_stack_write(struct verifier_state *state, int off, int size, state->regs[value_regno].type == PTR_TO_CTX)) { /* register containing pointer is being spilled into stack */ - if (size != 8) { + if (size != BPF_REG_SIZE) { verbose("invalid size of register spill\n"); return -EACCES; } - slot = &state->stack[MAX_BPF_STACK + off]; - slot->stype = STACK_SPILL; /* save register state */ - slot->reg_st = state->regs[value_regno]; - for (i = 1; i < 8; i++) { - slot = &state->stack[MAX_BPF_STACK + off + i]; - slot->stype = STACK_SPILL_PART; - slot->reg_st.type = UNKNOWN_VALUE; - slot->reg_st.map_ptr = NULL; - } - } else { + state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = + state->regs[value_regno]; + for (i = 0; i < BPF_REG_SIZE; i++) + state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + } else { /* regular write of data into stack */ - for (i = 0; i < size; i++) { - slot = &state->stack[MAX_BPF_STACK + off + i]; - slot->stype = STACK_MISC; - slot->reg_st.type = UNKNOWN_VALUE; - slot->reg_st.map_ptr = NULL; - } + state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = + (struct reg_state) {}; + + for (i = 0; i < size; i++) + state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; } return 0; } @@ -579,19 +572,18 @@ static int check_stack_write(struct verifier_state *state, int off, int size, static int check_stack_read(struct verifier_state *state, int off, int size, int value_regno) { + u8 *slot_type; int i; - struct bpf_stack_slot *slot; - slot = &state->stack[MAX_BPF_STACK + off]; + slot_type = &state->stack_slot_type[MAX_BPF_STACK + off]; - if (slot->stype == STACK_SPILL) { - if (size != 8) { + if (slot_type[0] == STACK_SPILL) { + if (size != BPF_REG_SIZE) { verbose("invalid size of register spill\n"); return -EACCES; } - for (i = 1; i < 8; i++) { - if (state->stack[MAX_BPF_STACK + off + i].stype != - STACK_SPILL_PART) { + for (i = 1; i < BPF_REG_SIZE; i++) { + if (slot_type[i] != STACK_SPILL) { verbose("corrupted spill memory\n"); return -EACCES; } @@ -599,12 +591,12 @@ static int check_stack_read(struct verifier_state *state, int off, int size, if (value_regno >= 0) /* restore register state from stack */ - state->regs[value_regno] = slot->reg_st; + state->regs[value_regno] = + state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE]; return 0; } else { for (i = 0; i < size; i++) { - if (state->stack[MAX_BPF_STACK + off + i].stype != - STACK_MISC) { + if (slot_type[i] != STACK_MISC) { verbose("invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -747,7 +739,7 @@ static int check_stack_boundary(struct verifier_env *env, } for (i = 0; i < access_size; i++) { - if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) { + if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { verbose("invalid indirect read from stack off %d+%d size %d\n", off, i, access_size); return -EACCES; @@ -1417,12 +1409,33 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) } for (i = 0; i < MAX_BPF_STACK; i++) { - if (memcmp(&old->stack[i], &cur->stack[i], - sizeof(old->stack[0])) != 0) { - if (old->stack[i].stype == STACK_INVALID) - continue; + if (old->stack_slot_type[i] == STACK_INVALID) + continue; + if (old->stack_slot_type[i] != cur->stack_slot_type[i]) + /* Ex: old explored (safe) state has STACK_SPILL in + * this stack slot, but current has has STACK_MISC -> + * this verifier states are not equivalent, + * return false to continue verification of this path + */ return false; - } + if (i % BPF_REG_SIZE) + continue; + if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE], + &cur->spilled_regs[i / BPF_REG_SIZE], + sizeof(old->spilled_regs[0]))) + /* when explored and current stack slot types are + * the same, check that stored pointers types + * are the same as well. + * Ex: explored safe path could have stored + * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} + * but current path has stored: + * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} + * such verifier states are not equivalent. + * return false to continue verification of this path + */ + return false; + else + continue; } return true; } From 342ded409608f84fe929216efe0ca2d45e2656be Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 28 Oct 2014 15:11:42 -0700 Subject: [PATCH 2/3] samples: bpf: add a verifier test and summary line - add a test specifically targeting verifier state pruning. It checks state propagation between registers, storing that state into stack and state pruning algorithm recognizing equivalent stack and register states. - add summary line to spot failures easier Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/test_verifier.c | 46 ++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index eb4bec0ad8af..63402742345e 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -602,6 +602,45 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, }, + { + "jump test 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -630,7 +669,7 @@ static int create_map(void) static int test(void) { - int prog_fd, i; + int prog_fd, i, pass_cnt = 0, err_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_insn *prog = tests[i].insns; @@ -657,21 +696,25 @@ static int test(void) printf("FAIL\nfailed to load prog '%s'\n", strerror(errno)); printf("%s", bpf_log_buf); + err_cnt++; goto fail; } } else { if (prog_fd >= 0) { printf("FAIL\nunexpected success to load\n"); printf("%s", bpf_log_buf); + err_cnt++; goto fail; } if (strstr(bpf_log_buf, tests[i].errstr) == 0) { printf("FAIL\nunexpected error message: %s", bpf_log_buf); + err_cnt++; goto fail; } } + pass_cnt++; printf("OK\n"); fail: if (map_fd >= 0) @@ -679,6 +722,7 @@ fail: close(prog_fd); } + printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); return 0; } From e21ab36a80c2e011a14149fecdd654b8b47d3e8c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 28 Oct 2014 15:11:43 -0700 Subject: [PATCH 3/3] test: bpf: add a testcase reduced from nmap nmap generates classic BPF programs to filter ARP packets with given target MAC which triggered a bug in eBPF x64 JIT. The bug was fixed in commit e0ee9c12157d ("x86: bpf_jit: fix two bugs in eBPF JIT compiler") This patch is adding a testcase in eBPF instructions (those that were generated by classic->eBPF converter) to be processed by JIT. The test is primarily targeting JIT compiler. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- lib/test_bpf.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 23e070bcf72d..3f167d2eeb94 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1756,6 +1756,49 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } } }, + { + "nmap reduced", + .u.insns_int = { + BPF_MOV64_REG(R6, R1), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26), + BPF_MOV32_IMM(R0, 18), + BPF_STX_MEM(BPF_W, R10, R0, -64), + BPF_LDX_MEM(BPF_W, R7, R10, -64), + BPF_LD_IND(BPF_W, R7, 14), + BPF_STX_MEM(BPF_W, R10, R0, -60), + BPF_MOV32_IMM(R0, 280971478), + BPF_STX_MEM(BPF_W, R10, R0, -56), + BPF_LDX_MEM(BPF_W, R7, R10, -56), + BPF_LDX_MEM(BPF_W, R0, R10, -60), + BPF_ALU32_REG(BPF_SUB, R0, R7), + BPF_JMP_IMM(BPF_JNE, R0, 0, 15), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13), + BPF_MOV32_IMM(R0, 22), + BPF_STX_MEM(BPF_W, R10, R0, -56), + BPF_LDX_MEM(BPF_W, R7, R10, -56), + BPF_LD_IND(BPF_H, R7, 14), + BPF_STX_MEM(BPF_W, R10, R0, -52), + BPF_MOV32_IMM(R0, 17366), + BPF_STX_MEM(BPF_W, R10, R0, -48), + BPF_LDX_MEM(BPF_W, R7, R10, -48), + BPF_LDX_MEM(BPF_W, R0, R10, -52), + BPF_ALU32_REG(BPF_SUB, R0, R7), + BPF_JMP_IMM(BPF_JNE, R0, 0, 2), + BPF_MOV32_IMM(R0, 256), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, + { { 38, 256 } } + }, }; static struct net_device dev;