diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 8bf1b6351716..16a49819da9a 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -26,6 +26,8 @@ #include #include +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs + /* * Overload regs->result to specify whether we should use the MSR (result * is zero) or the SIAR (result is non zero). diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index a658091a19f9..3712152206f3 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,7 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -generic-y += bpf_perf_event.h generic-y += param.h generic-y += poll.h generic-y += resource.h diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/arch/powerpc/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 795ff0b869bb..a8b9d90a8042 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -861,7 +861,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; -extern int bpf_jit_limit; +extern long bpf_jit_limit; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b1a3545d0ec8..b2890c268cb3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -365,13 +365,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) } #ifdef CONFIG_BPF_JIT -# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) - /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; -int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; +long bpf_jit_limit __read_mostly; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -580,16 +578,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static atomic_long_t bpf_jit_current; +/* Can be overridden by an arch's JIT compiler if it has a custom, + * dedicated BPF backend memory area, or if neither of the two + * below apply. + */ +u64 __weak bpf_jit_alloc_exec_limit(void) +{ #if defined(MODULES_VADDR) + return MODULES_END - MODULES_VADDR; +#else + return VMALLOC_END - VMALLOC_START; +#endif +} + static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, - PAGE_SIZE), INT_MAX); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + PAGE_SIZE), LONG_MAX); return 0; } pure_initcall(bpf_jit_charge_init); -#endif static int bpf_jit_charge_modmem(u32 pages) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fc760d00a38c..51ba84d4d34a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5102,9 +5102,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) } new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; - /* connect new state to parentage chain */ - for (i = 0; i < BPF_REG_FP; i++) - cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i]; + /* connect new state to parentage chain. Current frame needs all + * registers connected. Only r6 - r9 of the callers are alive (pushed + * to the stack implicitly by JITs) so in callers' frames connect just + * r6 - r9 as an optimization. Callers will have r1 - r5 connected to + * the state of the call instruction (with WRITTEN set), and r0 comes + * from callee with its full parentage chain, anyway. + */ + for (j = 0; j <= cur->curframe; j++) + for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) + cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; /* clear write marks in current state: the writes we did are not writes * our child did, so they don't screen off its reads from us. * (There are no read marks in current state, because reads always mark diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 588f475019d4..af68207ee56c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -783,6 +783,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, /* Pass parameters to the BPF program */ cb->qdisc_cb.flow_keys = &flow_keys; flow_keys.nhoff = nhoff; + flow_keys.thoff = nhoff; bpf_compute_data_pointers((struct sk_buff *)skb); result = BPF_PROG_RUN(attached, skb); @@ -790,9 +791,12 @@ bool __skb_flow_dissect(const struct sk_buff *skb, /* Restore state */ memcpy(cb, &cb_saved, sizeof(cb_saved)); + flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len); + flow_keys.thoff = clamp_t(u16, flow_keys.thoff, + flow_keys.nhoff, skb->len); + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); - key_control->thoff = min_t(u16, key_control->thoff, skb->len); rcu_read_unlock(); return result == BPF_OK; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 37b4667128a3..d67ec17f2cc8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -28,6 +28,8 @@ static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static long long_one __maybe_unused = 1; +static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -289,6 +291,17 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } + +static int +proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} #endif static struct ctl_table net_core_table[] = { @@ -398,10 +411,11 @@ static struct ctl_table net_core_table[] = { { .procname = "bpf_jit_limit", .data = &bpf_jit_limit, - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0600, - .proc_handler = proc_dointvec_minmax_bpf_restricted, - .extra1 = &one, + .proc_handler = proc_dolongvec_minmax_bpf_restricted, + .extra1 = &long_one, + .extra2 = &long_max, }, #endif { diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c index 107350a7821d..df9d32fd2055 100644 --- a/tools/testing/selftests/bpf/bpf_flow.c +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -70,18 +70,18 @@ static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; - __u16 nhoff = skb->flow_keys->nhoff; + __u16 thoff = skb->flow_keys->thoff; __u8 *hdr; /* Verifies this variable offset does not overflow */ - if (nhoff > (USHRT_MAX - hdr_size)) + if (thoff > (USHRT_MAX - hdr_size)) return NULL; - hdr = data + nhoff; + hdr = data + thoff; if (hdr + hdr_size <= data_end) return hdr; - if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size)) + if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) return NULL; return buffer; @@ -158,13 +158,13 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) /* Only inspect standard GRE packets with version 0 */ return BPF_OK; - keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ if (GRE_IS_CSUM(gre->flags)) - keys->nhoff += 4; /* Step over chksum and Padding */ + keys->thoff += 4; /* Step over chksum and Padding */ if (GRE_IS_KEY(gre->flags)) - keys->nhoff += 4; /* Step over key */ + keys->thoff += 4; /* Step over key */ if (GRE_IS_SEQ(gre->flags)) - keys->nhoff += 4; /* Step over sequence number */ + keys->thoff += 4; /* Step over sequence number */ keys->is_encap = true; @@ -174,7 +174,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if (!eth) return BPF_DROP; - keys->nhoff += sizeof(*eth); + keys->thoff += sizeof(*eth); return parse_eth_proto(skb, eth->h_proto); } else { @@ -191,7 +191,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if ((__u8 *)tcp + (tcp->doff << 2) > data_end) return BPF_DROP; - keys->thoff = keys->nhoff; keys->sport = tcp->source; keys->dport = tcp->dest; return BPF_OK; @@ -201,7 +200,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if (!udp) return BPF_DROP; - keys->thoff = keys->nhoff; keys->sport = udp->source; keys->dport = udp->dest; return BPF_OK; @@ -252,8 +250,8 @@ PROG(IP)(struct __sk_buff *skb) keys->ipv4_src = iph->saddr; keys->ipv4_dst = iph->daddr; - keys->nhoff += iph->ihl << 2; - if (data + keys->nhoff > data_end) + keys->thoff += iph->ihl << 2; + if (data + keys->thoff > data_end) return BPF_DROP; if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { @@ -285,7 +283,7 @@ PROG(IPV6)(struct __sk_buff *skb) keys->addr_proto = ETH_P_IPV6; memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); - keys->nhoff += sizeof(struct ipv6hdr); + keys->thoff += sizeof(struct ipv6hdr); return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -301,7 +299,7 @@ PROG(IPV6OP)(struct __sk_buff *skb) /* hlen is in 8-octets and does not include the first 8 bytes * of the header */ - skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3; + skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3; return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -315,7 +313,7 @@ PROG(IPV6FR)(struct __sk_buff *skb) if (!fragh) return BPF_DROP; - keys->nhoff += sizeof(*fragh); + keys->thoff += sizeof(*fragh); keys->is_frag = true; if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) keys->is_first_frag = true; @@ -341,7 +339,7 @@ PROG(VLAN)(struct __sk_buff *skb) __be16 proto; /* Peek back to see if single or double-tagging */ - if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto, + if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, sizeof(proto))) return BPF_DROP; @@ -354,14 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb) if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) return BPF_DROP; - keys->nhoff += sizeof(*vlan); + keys->thoff += sizeof(*vlan); } vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) return BPF_DROP; - keys->nhoff += sizeof(*vlan); + keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index df6f751cc1e8..f8eac4a544f4 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -13915,6 +13915,34 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "calls: cross frame pruning", + .insns = { + /* r8 = !!random(); + * call pruner() + * if (r8) + * do something bad; + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -13940,7 +13968,7 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } -static int create_prog_dummy1(enum bpf_map_type prog_type) +static int create_prog_dummy1(enum bpf_prog_type prog_type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, 42), @@ -13951,7 +13979,7 @@ static int create_prog_dummy1(enum bpf_map_type prog_type) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) +static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), @@ -13966,7 +13994,7 @@ static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem, +static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, int p1key) { int p2key = 1; @@ -14037,7 +14065,7 @@ static int create_cgroup_storage(bool percpu) static char bpf_vlog[UINT_MAX >> 8]; -static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, +static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, struct bpf_insn *prog, int *map_fds) { int *fixup_map_hash_8b = test->fixup_map_hash_8b; @@ -14166,7 +14194,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, do { prog[*fixup_map_stacktrace].imm = map_fds[12]; fixup_map_stacktrace++; - } while (fixup_map_stacktrace); + } while (*fixup_map_stacktrace); } }