From e90004d56bf805db7d4401fe51a80a93c311cfe6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Dec 2017 14:03:12 +0000 Subject: [PATCH 01/16] bpf: fix spelling mistake: "funcation"-> "function" Trivial fix to spelling mistake in error message text. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 48b2901cf483..56ef21c71bd3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -772,7 +772,7 @@ static int check_subprogs(struct bpf_verifier_env *env) return -EPERM; } if (bpf_prog_is_dev_bound(env->prog->aux)) { - verbose(env, "funcation calls in offloaded programs are not supported yet\n"); + verbose(env, "function calls in offloaded programs are not supported yet\n"); return -EINVAL; } ret = add_subprog(env, i + insn[i].imm + 1); From fa2d41adb953235c4acaa98f6c980fd9eabe0062 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Dec 2017 17:47:07 +0000 Subject: [PATCH 02/16] bpf: make function skip_callee static and return NULL rather than 0 Function skip_callee is local to the source and does not need to be in global scope, so make it static. Also return NULL rather than 0. Cleans up two sparse warnings: symbol 'skip_callee' was not declared. Should it be static? Using plain integer as NULL pointer Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 56ef21c71bd3..52c9b32d58bf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -823,6 +823,7 @@ next: return 0; } +static struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env, const struct bpf_verifier_state *state, struct bpf_verifier_state *parent, @@ -867,7 +868,7 @@ bug: verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp); verbose(env, "regno %d parent frame %d current frame %d\n", regno, parent->curframe, state->curframe); - return 0; + return NULL; } static int mark_reg_read(struct bpf_verifier_env *env, From 5ee7f784cd32e63fbf54275b2b4f991ca085db61 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 18 Dec 2017 10:09:44 -0800 Subject: [PATCH 03/16] bpf: arm64: fix uninitialized variable fix the following issue: arch/arm64/net/bpf_jit_comp.c: In function 'bpf_int_jit_compile': arch/arm64/net/bpf_jit_comp.c:982:18: error: 'image_size' may be used uninitialized in this function [-Werror=maybe-uninitialized] Fixes: db496944fdaa ("bpf: arm64: add JIT support for multi-function programs") Reported-by: Arnd Bergmann Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- arch/arm64/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 396490cf7316..acaa935ed977 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -897,6 +897,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image_ptr = jit_data->image; header = jit_data->header; extra_pass = true; + image_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); From 4ca998fe46b1fce4988005851df2c85b7bf2addf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Dec 2017 15:11:30 -0800 Subject: [PATCH 04/16] selftests/bpf: add netdevsim to config BPF offload tests (test_offload.py) will require netdevsim to be built, add it to config. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 9d4897317c77..983dd25d49f4 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -4,3 +4,4 @@ CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y +CONFIG_NETDEVSIM=m From c060bc6115b6a204cb60e6b03fe64135731bc6c8 Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Tue, 19 Dec 2017 07:17:15 +0800 Subject: [PATCH 05/16] bpf: make function xdp_do_generic_redirect_map() static The function xdp_do_generic_redirect_map() is only used in this file, so make it static. Clean up sparse warning: net/core/filter.c:2687:5: warning: no previous prototype for 'xdp_do_generic_redirect_map' [-Wmissing-prototypes] Signed-off-by: Xiongwei Song Signed-off-by: Daniel Borkmann --- net/core/filter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 754abe1041b7..130b842c3a15 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2684,8 +2684,9 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) return 0; } -int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog) +static int xdp_do_generic_redirect_map(struct net_device *dev, + struct sk_buff *skb, + struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); unsigned long map_owner = ri->map_owner; From 06ef0ccb5a36e1feba9b413ff59a04ecc4407c1c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 18 Dec 2017 10:13:44 -0800 Subject: [PATCH 06/16] bpf/cgroup: fix a verification error for a CGROUP_DEVICE type prog The tools/testing/selftests/bpf test program test_dev_cgroup fails with the following error when compiled with llvm 6.0. (I did not try with earlier versions.) libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (61) r2 = *(u32 *)(r1 +4) 1: (b7) r0 = 0 2: (55) if r2 != 0x1 goto pc+8 R0=inv0 R1=ctx(id=0,off=0,imm=0) R2=inv1 R10=fp0 3: (69) r2 = *(u16 *)(r1 +0) invalid bpf_context access off=0 size=2 ... The culprit is the following statement in dev_cgroup.c: short type = ctx->access_type & 0xFFFF; This code is typical as the ctx->access_type is assigned as below in kernel/bpf/cgroup.c: struct bpf_cgroup_dev_ctx ctx = { .access_type = (access << 16) | dev_type, .major = major, .minor = minor, }; The compiler converts it to u16 access while the verifier cgroup_dev_is_valid_access rejects any non u32 access. This patch permits the field access_type to be accessible with type u16 and u8 as well. Signed-off-by: Yonghong Song Tested-by: Roman Gushchin Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 ++- kernel/bpf/cgroup.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d01f1cb3cfc0..69eabfcb9bdb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1012,7 +1012,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index b789ab78d28f..c1c0b60d3f2f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -568,6 +568,8 @@ static bool cgroup_dev_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + if (type == BPF_WRITE) return false; @@ -576,8 +578,17 @@ static bool cgroup_dev_is_valid_access(int off, int size, /* The verifier guarantees that size > 0. */ if (off % size != 0) return false; - if (size != sizeof(__u32)) - return false; + + switch (off) { + case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): + bpf_ctx_record_field_size(info, size_default); + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) + return false; + break; + default: + if (size != size_default) + return false; + } return true; } From 7d9890ef505a8c2a778d304535e26e827d58c466 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 19 Dec 2017 15:53:11 -0500 Subject: [PATCH 07/16] libbpf: Fix build errors. These elf object pieces are of type Elf64_Xword and therefore could be "long long" on some builds. Cast to "long long" and use printf format %lld to deal with this since we are building with -Werror=format. Signed-off-by: David S. Miller Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5b83875b3594..e9c4b7cabcf2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -910,8 +910,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - pr_debug("relo for %ld value %ld name %d\n", - rel.r_info >> 32, sym.st_value, sym.st_name); + pr_debug("relo for %lld value %lld name %d\n", + (long long) (rel.r_info >> 32), + (long long) sym.st_value, sym.st_name); if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", From 4f74d80971bce93d9e608c40324d662c70eb4664 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Dec 2017 13:42:56 +0100 Subject: [PATCH 08/16] bpf: fix kallsyms handling for subprogs Right now kallsyms handling is not working with JITed subprogs. The reason is that when in 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs") in jit_subprogs() they are passed to bpf_prog_kallsyms_add(), then their prog type is 0, which BPF core will think it's a cBPF program as only cBPF programs have a 0 type. Thus, they need to inherit the type from the main prog. Once that is fixed, they are indeed added to the BPF kallsyms infra, but their tag is 0. Therefore, since intention is to add them as bpf_prog_F_, we need to pass them to bpf_prog_calc_tag() first. And once this is resolved, there is a use-after-free on prog cleanup: we remove the kallsyms entry from the main prog, later walk all subprogs and call bpf_jit_free() on them. However, the kallsyms linkage was never released on them. Thus, do that for all subprogs right in __bpf_prog_put() when refcount hits 0. Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 6 ++++++ kernel/bpf/verifier.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e2e1c78ce1dc..30e728dcd35d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -937,10 +937,16 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { + int i; + trace_bpf_prog_put_rcu(prog); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); + + for (i = 0; i < prog->aux->func_cnt; i++) + bpf_prog_kallsyms_del(prog->aux->func[i]); bpf_prog_kallsyms_del(prog); + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 52c9b32d58bf..3c3eec58b3e8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5063,7 +5063,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) goto out_free; memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], len * sizeof(struct bpf_insn)); + func[i]->type = prog->type; func[i]->len = len; + if (bpf_prog_calc_tag(func[i])) + goto out_free; func[i]->is_func = 1; /* Use bpf_prog_F_tag to indicate functions in stack traces. * Long term would need debug info to populate names From 7105e828c087de970fcb5a9509db51bfe6bd7894 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Dec 2017 13:42:57 +0100 Subject: [PATCH 09/16] bpf: allow for correlation of maps and helpers in dump Currently a dump of an xlated prog (post verifier stage) doesn't correlate used helpers as well as maps. The prog info lists involved map ids, however there's no correlation of where in the program they are used as of today. Likewise, bpftool does not correlate helper calls with the target functions. The latter can be done w/o any kernel changes through kallsyms, and also has the advantage that this works with inlined helpers and BPF calls. Example, via interpreter: # tc filter show dev foo ingress filter protocol all pref 49152 bpf chain 0 filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \ direct-action not_in_hw id 1 tag c74773051b364165 <-- prog id:1 * Output before patch (calls/maps remain unclear): # bpftool prog dump xlated id 1 <-- dump prog id:1 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = 0xffff95c47a8d4800 6: (85) call unknown#73040 7: (15) if r0 == 0x0 goto pc+18 8: (bf) r2 = r10 9: (07) r2 += -4 10: (bf) r1 = r0 11: (85) call unknown#73040 12: (15) if r0 == 0x0 goto pc+23 [...] * Output after patch: # bpftool prog dump xlated id 1 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] <-- map id:2 6: (85) call bpf_map_lookup_elem#73424 <-- helper call 7: (15) if r0 == 0x0 goto pc+18 8: (bf) r2 = r10 9: (07) r2 += -4 10: (bf) r1 = r0 11: (85) call bpf_map_lookup_elem#73424 12: (15) if r0 == 0x0 goto pc+23 [...] # bpftool map show id 2 <-- show/dump/etc map id:2 2: hash_of_maps flags 0x0 key 4B value 4B max_entries 3 memlock 4096B Example, JITed, same prog: # tc filter show dev foo ingress filter protocol all pref 49152 bpf chain 0 filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \ direct-action not_in_hw id 3 tag c74773051b364165 jited # bpftool prog show id 3 3: sched_cls tag c74773051b364165 loaded_at Dec 19/13:48 uid 0 xlated 384B jited 257B memlock 4096B map_ids 2 # bpftool prog dump xlated id 3 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] <-- map id:2 6: (85) call __htab_map_lookup_elem#77408 <-+ inlined rewrite 7: (15) if r0 == 0x0 goto pc+2 | 8: (07) r0 += 56 | 9: (79) r0 = *(u64 *)(r0 +0) <-+ 10: (15) if r0 == 0x0 goto pc+24 11: (bf) r2 = r10 12: (07) r2 += -4 [...] Example, same prog, but kallsyms disabled (in that case we are also not allowed to pass any relative offsets, etc, so prog becomes pointer sanitized on dump): # sysctl kernel.kptr_restrict=2 kernel.kptr_restrict = 2 # bpftool prog dump xlated id 3 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] 6: (85) call bpf_unspec#0 7: (15) if r0 == 0x0 goto pc+2 [...] Example, BPF calls via interpreter: # bpftool prog dump xlated id 1 0: (85) call pc+2#__bpf_prog_run_args32 1: (b7) r0 = 1 2: (95) exit 3: (b7) r0 = 2 4: (95) exit Example, BPF calls via JIT: # sysctl net.core.bpf_jit_enable=1 net.core.bpf_jit_enable = 1 # sysctl net.core.bpf_jit_kallsyms=1 net.core.bpf_jit_kallsyms = 1 # bpftool prog dump xlated id 1 0: (85) call pc+2#bpf_prog_3b185187f1855c4c_F 1: (b7) r0 = 1 2: (95) exit 3: (b7) r0 = 2 4: (95) exit And finally, an example for tail calls that is now working as well wrt correlation: # bpftool prog dump xlated id 2 [...] 10: (b7) r2 = 8 11: (85) call bpf_trace_printk#-41312 12: (bf) r1 = r6 13: (18) r2 = map[id:1] 15: (b7) r3 = 0 16: (85) call bpf_tail_call#12 17: (b7) r1 = 42 18: (6b) *(u16 *)(r6 +46) = r1 19: (b7) r0 = 0 20: (95) exit # bpftool map show id 1 1: prog_array flags 0x0 key 4B value 4B max_entries 1 memlock 4096B Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 9 ++ kernel/bpf/core.c | 4 +- kernel/bpf/disasm.c | 65 +++++++++++--- kernel/bpf/disasm.h | 29 +++++-- kernel/bpf/syscall.c | 87 +++++++++++++++++-- kernel/bpf/verifier.c | 30 ++++++- tools/bpf/bpftool/prog.c | 181 +++++++++++++++++++++++++++++++++++++-- 7 files changed, 370 insertions(+), 35 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index e872b4ebaa57..2b0df2703671 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -724,6 +725,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_pkt_data(void *func); +static inline bool bpf_dump_raw_ok(void) +{ + /* Reconstruction of call-sites is dependent on kallsyms, + * thus make dump the same restriction. + */ + return kallsyms_show_value() == 1; +} + struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 768e0a02d8c8..70a534549cd3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -771,7 +771,9 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs - * anyway later on, so do not let the compiler omit it. + * anyway later on, so do not let the compiler omit it. This also needs + * to go into kallsyms for correlation from e.g. bpftool, so naming + * must not change. */ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 883f88fa5bfc..8740406df2cd 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -21,10 +21,39 @@ static const char * const func_id_str[] = { }; #undef __BPF_FUNC_STR_FN -const char *func_id_name(int id) +static const char *__func_get_name(const struct bpf_insn_cbs *cbs, + const struct bpf_insn *insn, + char *buff, size_t len) { BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); + if (insn->src_reg != BPF_PSEUDO_CALL && + insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && + func_id_str[insn->imm]) + return func_id_str[insn->imm]; + + if (cbs && cbs->cb_call) + return cbs->cb_call(cbs->private_data, insn); + + if (insn->src_reg == BPF_PSEUDO_CALL) + snprintf(buff, len, "%+d", insn->imm); + + return buff; +} + +static const char *__func_imm_name(const struct bpf_insn_cbs *cbs, + const struct bpf_insn *insn, + u64 full_imm, char *buff, size_t len) +{ + if (cbs && cbs->cb_imm) + return cbs->cb_imm(cbs->private_data, insn, full_imm); + + snprintf(buff, len, "0x%llx", (unsigned long long)full_imm); + return buff; +} + +const char *func_id_name(int id) +{ if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id]) return func_id_str[id]; else @@ -83,7 +112,7 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; -static void print_bpf_end_insn(bpf_insn_print_cb verbose, +static void print_bpf_end_insn(bpf_insn_print_t verbose, struct bpf_verifier_env *env, const struct bpf_insn *insn) { @@ -92,9 +121,12 @@ static void print_bpf_end_insn(bpf_insn_print_cb verbose, insn->imm, insn->dst_reg); } -void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, - const struct bpf_insn *insn, bool allow_ptr_leaks) +void print_bpf_insn(const struct bpf_insn_cbs *cbs, + struct bpf_verifier_env *env, + const struct bpf_insn *insn, + bool allow_ptr_leaks) { + const bpf_insn_print_t verbose = cbs->cb_print; u8 class = BPF_CLASS(insn->code); if (class == BPF_ALU || class == BPF_ALU64) { @@ -175,12 +207,15 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, */ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; + char tmp[64]; if (map_ptr && !allow_ptr_leaks) imm = 0; - verbose(env, "(%02x) r%d = 0x%llx\n", insn->code, - insn->dst_reg, (unsigned long long)imm); + verbose(env, "(%02x) r%d = %s\n", + insn->code, insn->dst_reg, + __func_imm_name(cbs, insn, imm, + tmp, sizeof(tmp))); } else { verbose(env, "BUG_ld_%02x\n", insn->code); return; @@ -189,12 +224,20 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { - if (insn->src_reg == BPF_PSEUDO_CALL) - verbose(env, "(%02x) call pc%+d\n", insn->code, - insn->imm); - else + char tmp[64]; + + if (insn->src_reg == BPF_PSEUDO_CALL) { + verbose(env, "(%02x) call pc%s\n", + insn->code, + __func_get_name(cbs, insn, + tmp, sizeof(tmp))); + } else { + strcpy(tmp, "unknown"); verbose(env, "(%02x) call %s#%d\n", insn->code, - func_id_name(insn->imm), insn->imm); + __func_get_name(cbs, insn, + tmp, sizeof(tmp)), + insn->imm); + } } else if (insn->code == (BPF_JMP | BPF_JA)) { verbose(env, "(%02x) goto pc%+d\n", insn->code, insn->off); diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h index 8de977e420b6..e0857d016f89 100644 --- a/kernel/bpf/disasm.h +++ b/kernel/bpf/disasm.h @@ -17,16 +17,35 @@ #include #include #include +#ifndef __KERNEL__ +#include +#include +#endif + +struct bpf_verifier_env; extern const char *const bpf_alu_string[16]; extern const char *const bpf_class_string[8]; const char *func_id_name(int id); -struct bpf_verifier_env; -typedef void (*bpf_insn_print_cb)(struct bpf_verifier_env *env, - const char *, ...); -void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, - const struct bpf_insn *insn, bool allow_ptr_leaks); +typedef void (*bpf_insn_print_t)(struct bpf_verifier_env *env, + const char *, ...); +typedef const char *(*bpf_insn_revmap_call_t)(void *private_data, + const struct bpf_insn *insn); +typedef const char *(*bpf_insn_print_imm_t)(void *private_data, + const struct bpf_insn *insn, + __u64 full_imm); +struct bpf_insn_cbs { + bpf_insn_print_t cb_print; + bpf_insn_revmap_call_t cb_call; + bpf_insn_print_imm_t cb_imm; + void *private_data; +}; + +void print_bpf_insn(const struct bpf_insn_cbs *cbs, + struct bpf_verifier_env *env, + const struct bpf_insn *insn, + bool allow_ptr_leaks); #endif diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 30e728dcd35d..007802c5ca7d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1558,6 +1558,67 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) return fd; } +static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, + unsigned long addr) +{ + int i; + + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (prog->aux->used_maps[i] == (void *)addr) + return prog->aux->used_maps[i]; + return NULL; +} + +static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) +{ + const struct bpf_map *map; + struct bpf_insn *insns; + u64 imm; + int i; + + insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), + GFP_USER); + if (!insns) + return insns; + + for (i = 0; i < prog->len; i++) { + if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { + insns[i].code = BPF_JMP | BPF_CALL; + insns[i].imm = BPF_FUNC_tail_call; + /* fall-through */ + } + if (insns[i].code == (BPF_JMP | BPF_CALL) || + insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { + if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) + insns[i].code = BPF_JMP | BPF_CALL; + if (!bpf_dump_raw_ok()) + insns[i].imm = 0; + continue; + } + + if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) + continue; + + imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; + map = bpf_map_from_imm(prog, imm); + if (map) { + insns[i].src_reg = BPF_PSEUDO_MAP_FD; + insns[i].imm = map->id; + insns[i + 1].imm = 0; + continue; + } + + if (!bpf_dump_raw_ok() && + imm == (unsigned long)prog->aux) { + insns[i].imm = 0; + insns[i + 1].imm = 0; + continue; + } + } + + return insns; +} + static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -1608,18 +1669,34 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ulen = info.jited_prog_len; info.jited_prog_len = prog->jited_len; if (info.jited_prog_len && ulen) { - uinsns = u64_to_user_ptr(info.jited_prog_insns); - ulen = min_t(u32, info.jited_prog_len, ulen); - if (copy_to_user(uinsns, prog->bpf_func, ulen)) - return -EFAULT; + if (bpf_dump_raw_ok()) { + uinsns = u64_to_user_ptr(info.jited_prog_insns); + ulen = min_t(u32, info.jited_prog_len, ulen); + if (copy_to_user(uinsns, prog->bpf_func, ulen)) + return -EFAULT; + } else { + info.jited_prog_insns = 0; + } } ulen = info.xlated_prog_len; info.xlated_prog_len = bpf_prog_insn_size(prog); if (info.xlated_prog_len && ulen) { + struct bpf_insn *insns_sanitized; + bool fault; + + if (prog->blinded && !bpf_dump_raw_ok()) { + info.xlated_prog_insns = 0; + goto done; + } + insns_sanitized = bpf_insn_prepare_dump(prog); + if (!insns_sanitized) + return -ENOMEM; uinsns = u64_to_user_ptr(info.xlated_prog_insns); ulen = min_t(u32, info.xlated_prog_len, ulen); - if (copy_to_user(uinsns, prog->insnsi, ulen)) + fault = copy_to_user(uinsns, insns_sanitized, ulen); + kfree(insns_sanitized); + if (fault) return -EFAULT; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3c3eec58b3e8..4ae46b2cba88 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4427,9 +4427,12 @@ static int do_check(struct bpf_verifier_env *env) } if (env->log.level) { + const struct bpf_insn_cbs cbs = { + .cb_print = verbose, + }; + verbose(env, "%d: ", insn_idx); - print_bpf_insn(verbose, env, insn, - env->allow_ptr_leaks); + print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks); } err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); @@ -5017,14 +5020,14 @@ static int jit_subprogs(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog, **func, *tmp; int i, j, subprog_start, subprog_end = 0, len, subprog; - struct bpf_insn *insn = prog->insnsi; + struct bpf_insn *insn; void *old_bpf_func; int err = -ENOMEM; if (env->subprog_cnt == 0) return 0; - for (i = 0; i < prog->len; i++, insn++) { + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) continue; @@ -5116,6 +5119,25 @@ static int jit_subprogs(struct bpf_verifier_env *env) bpf_prog_lock_ro(func[i]); bpf_prog_kallsyms_add(func[i]); } + + /* Last step: make now unused interpreter insns from main + * prog consistent for later dump requests, so they can + * later look the same as if they were interpreted only. + */ + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + unsigned long addr; + + if (insn->code != (BPF_JMP | BPF_CALL) || + insn->src_reg != BPF_PSEUDO_CALL) + continue; + insn->off = env->insn_aux_data[i].call_imm; + subprog = find_subprog(env, i + insn->off + 1); + addr = (unsigned long)func[subprog + 1]->bpf_func; + addr &= PAGE_MASK; + insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) + addr - __bpf_call_base; + } + prog->jited = 1; prog->bpf_func = func[0]->bpf_func; prog->aux->func = func; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 037484ceaeaf..42ee8892549c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -401,6 +401,88 @@ static int do_show(int argc, char **argv) return err; } +#define SYM_MAX_NAME 256 + +struct kernel_sym { + unsigned long address; + char name[SYM_MAX_NAME]; +}; + +struct dump_data { + unsigned long address_call_base; + struct kernel_sym *sym_mapping; + __u32 sym_count; + char scratch_buff[SYM_MAX_NAME]; +}; + +static int kernel_syms_cmp(const void *sym_a, const void *sym_b) +{ + return ((struct kernel_sym *)sym_a)->address - + ((struct kernel_sym *)sym_b)->address; +} + +static void kernel_syms_load(struct dump_data *dd) +{ + struct kernel_sym *sym; + char buff[256]; + void *tmp, *address; + FILE *fp; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) + return; + + while (!feof(fp)) { + if (!fgets(buff, sizeof(buff), fp)) + break; + tmp = realloc(dd->sym_mapping, + (dd->sym_count + 1) * + sizeof(*dd->sym_mapping)); + if (!tmp) { +out: + free(dd->sym_mapping); + dd->sym_mapping = NULL; + fclose(fp); + return; + } + dd->sym_mapping = tmp; + sym = &dd->sym_mapping[dd->sym_count]; + if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2) + continue; + sym->address = (unsigned long)address; + if (!strcmp(sym->name, "__bpf_call_base")) { + dd->address_call_base = sym->address; + /* sysctl kernel.kptr_restrict was set */ + if (!sym->address) + goto out; + } + if (sym->address) + dd->sym_count++; + } + + fclose(fp); + + qsort(dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp); +} + +static void kernel_syms_destroy(struct dump_data *dd) +{ + free(dd->sym_mapping); +} + +static struct kernel_sym *kernel_syms_search(struct dump_data *dd, + unsigned long key) +{ + struct kernel_sym sym = { + .address = key, + }; + + return dd->sym_mapping ? + bsearch(&sym, dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; +} + static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) { va_list args; @@ -410,8 +492,71 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) +static const char *print_call_pcrel(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address, + const struct bpf_insn *insn) { + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#%s", insn->off, sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#0x%lx", insn->off, address); + return dd->scratch_buff; +} + +static const char *print_call_helper(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address) +{ + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%s", sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%lx", address); + return dd->scratch_buff; +} + +static const char *print_call(void *private_data, + const struct bpf_insn *insn) +{ + struct dump_data *dd = private_data; + unsigned long address = dd->address_call_base + insn->imm; + struct kernel_sym *sym; + + sym = kernel_syms_search(dd, address); + if (insn->src_reg == BPF_PSEUDO_CALL) + return print_call_pcrel(dd, sym, address, insn); + else + return print_call_helper(dd, sym, address); +} + +static const char *print_imm(void *private_data, + const struct bpf_insn *insn, + __u64 full_imm) +{ + struct dump_data *dd = private_data; + + if (insn->src_reg == BPF_PSEUDO_MAP_FD) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[id:%u]", insn->imm); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%llx", (unsigned long long)full_imm); + return dd->scratch_buff; +} + +static void dump_xlated_plain(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) +{ + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -425,7 +570,7 @@ static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); - print_bpf_insn(print_insn, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { printf(" "); @@ -454,8 +599,15 @@ static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) +static void dump_xlated_json(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) { + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn_json, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -470,7 +622,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) jsonw_start_object(json_wtr); jsonw_name(json_wtr, "disasm"); - print_bpf_insn(print_insn_json, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { jsonw_name(json_wtr, "opcodes"); @@ -505,6 +657,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) static int do_dump(int argc, char **argv) { struct bpf_prog_info info = {}; + struct dump_data dd = {}; __u32 len = sizeof(info); unsigned int buf_size; char *filepath = NULL; @@ -592,6 +745,14 @@ static int do_dump(int argc, char **argv) goto err_free; } + if ((member_len == &info.jited_prog_len && + info.jited_prog_insns == 0) || + (member_len == &info.xlated_prog_len && + info.xlated_prog_insns == 0)) { + p_err("error retrieving insn dump: kernel.kptr_restrict set?"); + goto err_free; + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -608,17 +769,19 @@ static int do_dump(int argc, char **argv) goto err_free; } } else { - if (member_len == &info.jited_prog_len) + if (member_len == &info.jited_prog_len) { disasm_print_insn(buf, *member_len, opcodes); - else + } else { + kernel_syms_load(&dd); if (json_output) - dump_xlated_json(buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes); else - dump_xlated_plain(buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes); + kernel_syms_destroy(&dd); + } } free(buf); - return 0; err_free: From c475ffad58a8a2f1d3a2bd433eaa491471981b49 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 20 Dec 2017 10:37:08 -0800 Subject: [PATCH 10/16] tools/bpf: adjust rlimit RLIMIT_MEMLOCK for test_dev_cgroup The default rlimit RLIMIT_MEMLOCK is 64KB. In certain cases, e.g. in a test machine mimicking our production system, this test may fail due to unable to charge the required memory for prog load: $ ./test_dev_cgroup libbpf: load bpf program failed: Operation not permitted libbpf: failed to load program 'cgroup/dev' libbpf: failed to load object './dev_cgroup.o' Failed to load DEV_CGROUP program ... Changing the default rlimit RLIMIT_MEMLOCK to unlimited makes the test pass. This patch also fixed a problem where when bpf_prog_load fails, cleanup_cgroup_environment() should not be called since setup_cgroup_environment() has not been invoked. Otherwise, the following confusing message will appear: ... (/home/yhs/local/linux/tools/testing/selftests/bpf/cgroup_helpers.c:95: errno: No such file or directory) Opening Cgroup Procs: /mnt/cgroup.procs ... Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_dev_cgroup.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index 02c85d6c89b0..c1535b34f14f 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include @@ -23,15 +25,19 @@ int main(int argc, char **argv) { + struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; struct bpf_object *obj; int error = EXIT_FAILURE; int prog_fd, cgroup_fd; __u32 prog_cnt; + if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) + perror("Unable to lift memlock rlimit"); + if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); - goto err; + goto out; } if (setup_cgroup_environment()) { @@ -89,5 +95,6 @@ int main(int argc, char **argv) err: cleanup_cgroup_environment(); +out: return error; } From 5f5a641116391f6c8c5fb4dad717895d2da96678 Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 22 Dec 2017 09:42:41 -0500 Subject: [PATCH 11/16] bpf: sparc64: Add JIT support for multi-function programs. Modelled strongly upon the arm64 implementation. Signed-off-by: David S. Miller Signed-off-by: Daniel Borkmann --- arch/sparc/net/bpf_jit_comp_64.c | 44 +++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index a2f1b5e774a7..991a3ab76e7a 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1507,11 +1507,19 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = 0x91d02005; /* ta 5 */ } +struct sparc64_jit_data { + struct bpf_binary_header *header; + u8 *image; + struct jit_ctx ctx; +}; + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; + struct sparc64_jit_data *jit_data; struct bpf_binary_header *header; bool tmp_blinded = false; + bool extra_pass = false; struct jit_ctx ctx; u32 image_size; u8 *image_ptr; @@ -1531,13 +1539,31 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = tmp; } + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + if (jit_data->ctx.offset) { + ctx = jit_data->ctx; + image_ptr = jit_data->image; + header = jit_data->header; + extra_pass = true; + image_size = sizeof(u32) * ctx.idx; + goto skip_init_ctx; + } + memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; - goto out; + goto out_off; } /* Fake pass to detect features used, and get an accurate assessment @@ -1560,7 +1586,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.image = (u32 *)image_ptr; - +skip_init_ctx: for (pass = 1; pass < 3; pass++) { ctx.idx = 0; @@ -1591,14 +1617,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(header, (u8 *)header + (header->pages * PAGE_SIZE)); - bpf_jit_binary_lock_ro(header); + if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(header); + } else { + jit_data->ctx = ctx; + jit_data->image = image_ptr; + jit_data->header = header; + } prog->bpf_func = (void *)ctx.image; prog->jited = 1; prog->jited_len = image_size; + if (!prog->is_func || extra_pass) { out_off: - kfree(ctx.offset); + kfree(ctx.offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } out: if (tmp_blinded) bpf_jit_prog_release_other(prog, prog == orig_prog ? From fd05e57bb35ad5eb7e261b64e5cf46445250f842 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Sat, 23 Dec 2017 10:09:55 +0000 Subject: [PATCH 12/16] bpf: fix stacksafe exploration when comparing states Commit cc2b14d51053 ("bpf: teach verifier to recognize zero initialized stack") introduced a very relaxed check when comparing stacks of different states, effectively returning a positive result in many cases where it shouldn't. This can create problems in cases such as this following C pseudocode: long var; long *x = bpf_map_lookup(...); if (!x) return; if (*x != 0xbeef) var = 0; else var = 1; /* This is the key part, calling a helper causes an explored state * to be saved with the information that "var" is on the stack as * STACK_ZERO, since the helper is first met by the verifier after * the "var = 0" assignment. This state will however be wrongly used * also for the "var = 1" case, so the verifier assumes "var" is always * 0 and will replace the NULL assignment with nops, because the * search pruning prevents it from exploring the faulty branch. */ bpf_ktime_get_ns(); if (var) *(long *)0 = 0xbeef; Fix the issue by making sure that the stack is fully explored before returning a positive comparison result. Also attach a couple tests that highlight the bad behavior. In the first test, without this fix instructions 16 and 17 are replaced with nops instead of being rejected by the verifier. The second test, instead, allows a program to make a potentially illegal read from the stack. Fixes: cc2b14d51053 ("bpf: teach verifier to recognize zero initialized stack") Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 51 +++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4ae46b2cba88..82ae580440b8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4107,7 +4107,7 @@ static bool stacksafe(struct bpf_func_state *old, if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) /* explored state didn't use this */ - return true; + continue; if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3bacff0d6f91..5e79515d10c5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -9715,6 +9715,57 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, + { + "search pruning: all branches should be verified (nop operation)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_A(1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R6 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "search pruning: all branches should be verified (invalid stack access)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_JMP_A(1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "invalid read from stack off -16+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) From 6b80ad299208b44ba33cb6df80bdaa3f63cf03e2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 22 Dec 2017 19:12:35 +0100 Subject: [PATCH 13/16] bpf: selftest for late caller stack size increase This checks that it is not possible to bypass the total stack size check in update_stack_depth() by calling a function that uses a large amount of stack memory *before* using a large amount of stack memory in the caller. Currently, the first added testcase causes a rejection as expected, but the second testcase is (AFAICS incorrectly) accepted: [...] #483/p calls: stack overflow using two frames (post-call access) FAIL Unexpected success to load! 0: (85) call pc+2 caller: R10=fp0,call_-1 callee: frame1: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_0 3: (72) *(u8 *)(r10 -300) = 0 4: (b7) r0 = 0 5: (95) exit returning from callee: frame1: R0_w=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_0 to caller at 1: R0_w=inv0 R10=fp0,call_-1 from 5 to 1: R0=inv0 R10=fp0,call_-1 1: (72) *(u8 *)(r10 -300) = 0 2: (95) exit processed 6 insns, stack depth 300+300 [...] Summary: 704 PASSED, 1 FAILED AFAICS the JIT-generated code for the second testcase shows that this really causes the stack pointer to be decremented by 300+300: first function: 00000000 55 push rbp 00000001 4889E5 mov rbp,rsp 00000004 4881EC58010000 sub rsp,0x158 0000000B 4883ED28 sub rbp,byte +0x28 [...] 00000025 E89AB3AFE5 call 0xffffffffe5afb3c4 0000002A C685D4FEFFFF00 mov byte [rbp-0x12c],0x0 [...] 00000041 4883C528 add rbp,byte +0x28 00000045 C9 leave 00000046 C3 ret second function: 00000000 55 push rbp 00000001 4889E5 mov rbp,rsp 00000004 4881EC58010000 sub rsp,0x158 0000000B 4883ED28 sub rbp,byte +0x28 [...] 00000025 C685D4FEFFFF00 mov byte [rbp-0x12c],0x0 [...] 0000003E 4883C528 add rbp,byte +0x28 00000042 C9 leave 00000043 C3 ret Signed-off-by: Jann Horn Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 5e79515d10c5..41dcc7dbba42 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8729,6 +8729,40 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_XDP, .result = ACCEPT, }, + { + "calls: stack overflow using two frames (pre-call access)", + .insns = { + /* prog 1 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack overflow using two frames (post-call access)", + .insns = { + /* prog 1 */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, { "calls: spill into caller stack frame", .insns = { From 70a87ffea8acc390ae315fa1930e849f656bdb88 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Dec 2017 13:15:40 -0800 Subject: [PATCH 14/16] bpf: fix maximum stack depth tracking logic Instead of computing max stack depth for current call chain during the main verifier pass track stack depth of each function independently and after do_check() is done do another pass over all instructions analyzing depth of all possible call stacks. Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 82 +++++++++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index aaac589e490c..94a02ceb1246 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -194,6 +194,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ struct bpf_verifer_log log; u32 subprog_starts[BPF_MAX_SUBPROGS]; + /* computes the stack depth of each bpf function */ u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1]; u32 subprog_cnt; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 82ae580440b8..738e919efdf0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1430,33 +1430,80 @@ static int update_stack_depth(struct bpf_verifier_env *env, const struct bpf_func_state *func, int off) { - u16 stack = env->subprog_stack_depth[func->subprogno], total = 0; - struct bpf_verifier_state *cur = env->cur_state; - int i; + u16 stack = env->subprog_stack_depth[func->subprogno]; if (stack >= -off) return 0; /* update known max for given subprogram */ env->subprog_stack_depth[func->subprogno] = -off; + return 0; +} - /* compute the total for current call chain */ - for (i = 0; i <= cur->curframe; i++) { - u32 depth = env->subprog_stack_depth[cur->frame[i]->subprogno]; +/* starting from main bpf function walk all instructions of the function + * and recursively walk all callees that given function can call. + * Ignore jump and exit insns. + * Since recursion is prevented by check_cfg() this algorithm + * only needs a local stack of MAX_CALL_FRAMES to remember callsites + */ +static int check_max_stack_depth(struct bpf_verifier_env *env) +{ + int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + int ret_insn[MAX_CALL_FRAMES]; + int ret_prog[MAX_CALL_FRAMES]; - /* round up to 32-bytes, since this is granularity - * of interpreter stack sizes - */ - depth = round_up(depth, 32); - total += depth; - } - - if (total > MAX_BPF_STACK) { +process_func: + /* round up to 32-bytes, since this is granularity + * of interpreter stack size + */ + depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); + if (depth > MAX_BPF_STACK) { verbose(env, "combined stack size of %d calls is %d. Too large\n", - cur->curframe, total); + frame + 1, depth); return -EACCES; } - return 0; +continue_func: + if (env->subprog_cnt == subprog) + subprog_end = insn_cnt; + else + subprog_end = env->subprog_starts[subprog]; + for (; i < subprog_end; i++) { + if (insn[i].code != (BPF_JMP | BPF_CALL)) + continue; + if (insn[i].src_reg != BPF_PSEUDO_CALL) + continue; + /* remember insn and function to return to */ + ret_insn[frame] = i + 1; + ret_prog[frame] = subprog; + + /* find the callee */ + i = i + insn[i].imm + 1; + subprog = find_subprog(env, i); + if (subprog < 0) { + WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", + i); + return -EFAULT; + } + subprog++; + frame++; + if (frame >= MAX_CALL_FRAMES) { + WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); + return -EFAULT; + } + goto process_func; + } + /* end of for() loop means the last insn of the 'subprog' + * was reached. Doesn't matter whether it was JA or EXIT + */ + if (frame == 0) + return 0; + depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); + frame--; + i = ret_insn[frame]; + subprog = ret_prog[frame]; + goto continue_func; } static int get_callee_stack_depth(struct bpf_verifier_env *env, @@ -5403,6 +5450,9 @@ skip_full_check: if (ret == 0) sanitize_dead_code(env); + if (ret == 0) + ret = check_max_stack_depth(env); + if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); From 6b86c4217c231cbd268bd8c6fda025b27047d3ed Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Dec 2017 13:15:41 -0800 Subject: [PATCH 15/16] selftests/bpf: additional stack depth tests to test inner logic of stack depth tracking Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 121 ++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 41dcc7dbba42..b5a7a6c530dc 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8763,6 +8763,127 @@ static struct bpf_test tests[] = { .errstr = "combined stack size", .result = REJECT, }, + { + "calls: stack depth check using three frames. test1", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=256, stack_B=64 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test2", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=64, stack_B=256 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test3", + .insns = { + /* main */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + /* B */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=64, stack_A=224, stack_B=256 + * and max(main+A, main+A+B) > 512 + */ + .errstr = "combined stack", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test4", + /* void main(void) { + * func1(0); + * func1(1); + * func2(1); + * } + * void func1(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } else { + * func2(alloc_or_recurse); + * } + * } + * void func2(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } + * } + */ + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "combined stack", + }, { "calls: spill into caller stack frame", .insns = { From aada9ce644e53410954daa6beb1f7c4ca158abd7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Dec 2017 13:15:42 -0800 Subject: [PATCH 16/16] bpf: fix max call depth check fix off by one error in max call depth check and add a test Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 4 +-- tools/testing/selftests/bpf/test_verifier.c | 35 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 738e919efdf0..52ad60b3b8be 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2126,9 +2126,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_func_state *caller, *callee; int i, subprog, target_insn; - if (state->curframe >= MAX_CALL_FRAMES) { + if (state->curframe + 1 >= MAX_CALL_FRAMES) { verbose(env, "the call stack of %d frames is too deep\n", - state->curframe); + state->curframe + 2); return -E2BIG; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b5a7a6c530dc..5d0a574ce270 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8884,6 +8884,41 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "combined stack", }, + { + "calls: stack depth check using three frames. test5", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, + }, { "calls: spill into caller stack frame", .insns = {