2014-09-26 09:17:02 +02:00
|
|
|
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
2016-05-06 04:49:10 +02:00
|
|
|
* Copyright (c) 2016 Facebook
|
2014-09-26 09:17:02 +02:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/bpf.h>
|
2016-09-21 12:43:57 +02:00
|
|
|
#include <linux/bpf_verifier.h>
|
2014-09-26 09:17:02 +02:00
|
|
|
#include <linux/filter.h>
|
|
|
|
#include <net/netlink.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
|
|
|
|
/* bpf_check() is a static code analyzer that walks eBPF program
|
|
|
|
* instruction by instruction and updates register/stack state.
|
|
|
|
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
|
|
|
|
*
|
|
|
|
* The first pass is depth-first-search to check that the program is a DAG.
|
|
|
|
* It rejects the following programs:
|
|
|
|
* - larger than BPF_MAXINSNS insns
|
|
|
|
* - if loop is present (detected via back-edge)
|
|
|
|
* - unreachable insns exist (shouldn't be a forest. program = one function)
|
|
|
|
* - out of bounds or malformed jumps
|
|
|
|
* The second pass is all possible path descent from the 1st insn.
|
|
|
|
* Since it's analyzing all pathes through the program, the length of the
|
|
|
|
* analysis is limited to 32k insn, which may be hit even if total number of
|
|
|
|
* insn is less then 4K, but there are too many branches that change stack/regs.
|
|
|
|
* Number of 'branches to be analyzed' is limited to 1k
|
|
|
|
*
|
|
|
|
* On entry to each instruction, each register has a type, and the instruction
|
|
|
|
* changes the types of the registers depending on instruction semantics.
|
|
|
|
* If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
|
|
|
|
* copied to R1.
|
|
|
|
*
|
|
|
|
* All registers are 64-bit.
|
|
|
|
* R0 - return register
|
|
|
|
* R1-R5 argument passing registers
|
|
|
|
* R6-R9 callee saved registers
|
|
|
|
* R10 - frame pointer read-only
|
|
|
|
*
|
|
|
|
* At the start of BPF program the register R1 contains a pointer to bpf_context
|
|
|
|
* and has type PTR_TO_CTX.
|
|
|
|
*
|
|
|
|
* Verifier tracks arithmetic operations on pointers in case:
|
|
|
|
* BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
|
|
|
|
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
|
|
|
|
* 1st insn copies R10 (which has FRAME_PTR) type into R1
|
|
|
|
* and 2nd arithmetic instruction is pattern matched to recognize
|
|
|
|
* that it wants to construct a pointer to some element within stack.
|
|
|
|
* So after 2nd insn, the register R1 has type PTR_TO_STACK
|
|
|
|
* (and -20 constant is saved for further stack bounds checking).
|
|
|
|
* Meaning that this reg is a pointer to stack plus known immediate constant.
|
|
|
|
*
|
|
|
|
* Most of the time the registers have UNKNOWN_VALUE type, which
|
|
|
|
* means the register has some value, but it's not a valid pointer.
|
|
|
|
* (like pointer plus pointer becomes UNKNOWN_VALUE type)
|
|
|
|
*
|
|
|
|
* When verifier sees load or store instructions the type of base register
|
|
|
|
* can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
|
|
|
|
* types recognized by check_mem_access() function.
|
|
|
|
*
|
|
|
|
* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
|
|
|
|
* and the range of [ptr, ptr + map's value_size) is accessible.
|
|
|
|
*
|
|
|
|
* registers used to pass values to function calls are checked against
|
|
|
|
* function argument constraints.
|
|
|
|
*
|
|
|
|
* ARG_PTR_TO_MAP_KEY is one of such argument constraints.
|
|
|
|
* It means that the register type passed to this function must be
|
|
|
|
* PTR_TO_STACK and it will be used inside the function as
|
|
|
|
* 'pointer to map element key'
|
|
|
|
*
|
|
|
|
* For example the argument constraints for bpf_map_lookup_elem():
|
|
|
|
* .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
|
|
|
* .arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
* .arg2_type = ARG_PTR_TO_MAP_KEY,
|
|
|
|
*
|
|
|
|
* ret_type says that this function returns 'pointer to map elem value or null'
|
|
|
|
* function expects 1st argument to be a const pointer to 'struct bpf_map' and
|
|
|
|
* 2nd argument should be a pointer to stack, which will be used inside
|
|
|
|
* the helper function as a pointer to map element key.
|
|
|
|
*
|
|
|
|
* On the kernel side the helper function looks like:
|
|
|
|
* u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
|
|
|
* {
|
|
|
|
* struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
|
|
|
|
* void *key = (void *) (unsigned long) r2;
|
|
|
|
* void *value;
|
|
|
|
*
|
|
|
|
* here kernel can access 'key' and 'map' pointers safely, knowing that
|
|
|
|
* [key, key + map->key_size) bytes are valid and were initialized on
|
|
|
|
* the stack of eBPF program.
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* Corresponding eBPF program may look like:
|
|
|
|
* BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
|
|
|
|
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
|
|
|
|
* BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
|
|
|
|
* BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
|
|
|
* here verifier looks at prototype of map_lookup_elem() and sees:
|
|
|
|
* .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
|
|
|
|
* Now verifier knows that this map has key of R1->map_ptr->key_size bytes
|
|
|
|
*
|
|
|
|
* Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
|
|
|
|
* Now verifier checks that [R2, R2 + map's key_size) are within stack limits
|
|
|
|
* and were initialized prior to this call.
|
|
|
|
* If it's ok, then verifier allows this BPF_CALL insn and looks at
|
|
|
|
* .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
|
|
|
|
* R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
|
|
|
|
* returns ether pointer to map value or NULL.
|
|
|
|
*
|
|
|
|
* When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
|
|
|
|
* insn, the register holding that pointer in the true branch changes state to
|
|
|
|
* PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
|
|
|
|
* branch. See check_cond_jmp_op().
|
|
|
|
*
|
|
|
|
* After the call R0 is set to return type of the function and registers R1-R5
|
|
|
|
* are set to NOT_INIT to indicate that they are no longer readable.
|
|
|
|
*/
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_stack_elem {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* verifer state is 'st'
|
|
|
|
* before processing instruction 'insn_idx'
|
|
|
|
* and after processing instruction 'prev_insn_idx'
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state st;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int insn_idx;
|
|
|
|
int prev_insn_idx;
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_stack_elem *next;
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
};
|
|
|
|
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-05 22:33:17 +02:00
|
|
|
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
|
|
|
|
#define BPF_COMPLEXITY_LIMIT_STACK 1024
|
|
|
|
|
2016-04-13 00:10:50 +02:00
|
|
|
struct bpf_call_arg_meta {
|
|
|
|
struct bpf_map *map_ptr;
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
bool raw_mode;
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
bool pkt_access;
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
int regno;
|
|
|
|
int access_size;
|
2016-04-13 00:10:50 +02:00
|
|
|
};
|
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
/* verbose verifier prints what it's seeing
|
|
|
|
* bpf_check() is called under lock, so no race to access these global vars
|
|
|
|
*/
|
|
|
|
static u32 log_level, log_size, log_len;
|
|
|
|
static char *log_buf;
|
|
|
|
|
|
|
|
static DEFINE_MUTEX(bpf_verifier_lock);
|
|
|
|
|
|
|
|
/* log_level controls verbosity level of eBPF verifier.
|
|
|
|
* verbose() is used to dump the verification trace to the log, so the user
|
|
|
|
* can figure out what's wrong with the program
|
|
|
|
*/
|
2015-11-03 11:39:20 +01:00
|
|
|
static __printf(1, 2) void verbose(const char *fmt, ...)
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (log_level == 0 || log_len >= log_size - 1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* string representation of 'enum bpf_reg_type' */
|
|
|
|
static const char * const reg_type_str[] = {
|
|
|
|
[NOT_INIT] = "?",
|
|
|
|
[UNKNOWN_VALUE] = "inv",
|
|
|
|
[PTR_TO_CTX] = "ctx",
|
|
|
|
[CONST_PTR_TO_MAP] = "map_ptr",
|
|
|
|
[PTR_TO_MAP_VALUE] = "map_value",
|
|
|
|
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
|
|
|
|
[FRAME_PTR] = "fp",
|
|
|
|
[PTR_TO_STACK] = "fp",
|
|
|
|
[CONST_IMM] = "imm",
|
2016-05-06 04:49:10 +02:00
|
|
|
[PTR_TO_PACKET] = "pkt",
|
|
|
|
[PTR_TO_PACKET_END] = "pkt_end",
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
};
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static void print_verifier_state(struct bpf_verifier_state *state)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *reg;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
enum bpf_reg_type t;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_REG; i++) {
|
2016-05-06 04:49:09 +02:00
|
|
|
reg = &state->regs[i];
|
|
|
|
t = reg->type;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (t == NOT_INIT)
|
|
|
|
continue;
|
|
|
|
verbose(" R%d=%s", i, reg_type_str[t]);
|
|
|
|
if (t == CONST_IMM || t == PTR_TO_STACK)
|
2016-05-06 04:49:10 +02:00
|
|
|
verbose("%lld", reg->imm);
|
|
|
|
else if (t == PTR_TO_PACKET)
|
|
|
|
verbose("(id=%d,off=%d,r=%d)",
|
|
|
|
reg->id, reg->off, reg->range);
|
|
|
|
else if (t == UNKNOWN_VALUE && reg->imm)
|
|
|
|
verbose("%lld", reg->imm);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
|
|
|
|
t == PTR_TO_MAP_VALUE_OR_NULL)
|
|
|
|
verbose("(ks=%d,vs=%d)",
|
2016-05-06 04:49:09 +02:00
|
|
|
reg->map_ptr->key_size,
|
|
|
|
reg->map_ptr->value_size);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
2014-10-28 23:11:41 +01:00
|
|
|
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
|
2016-05-06 04:49:09 +02:00
|
|
|
if (state->stack_slot_type[i] == STACK_SPILL)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose(" fp%d=%s", -MAX_BPF_STACK + i,
|
2016-05-06 04:49:09 +02:00
|
|
|
reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
|
|
|
verbose("\n");
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
static const char *const bpf_class_string[] = {
|
|
|
|
[BPF_LD] = "ld",
|
|
|
|
[BPF_LDX] = "ldx",
|
|
|
|
[BPF_ST] = "st",
|
|
|
|
[BPF_STX] = "stx",
|
|
|
|
[BPF_ALU] = "alu",
|
|
|
|
[BPF_JMP] = "jmp",
|
|
|
|
[BPF_RET] = "BUG",
|
|
|
|
[BPF_ALU64] = "alu64",
|
|
|
|
};
|
|
|
|
|
2015-09-08 22:40:01 +02:00
|
|
|
static const char *const bpf_alu_string[16] = {
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
[BPF_ADD >> 4] = "+=",
|
|
|
|
[BPF_SUB >> 4] = "-=",
|
|
|
|
[BPF_MUL >> 4] = "*=",
|
|
|
|
[BPF_DIV >> 4] = "/=",
|
|
|
|
[BPF_OR >> 4] = "|=",
|
|
|
|
[BPF_AND >> 4] = "&=",
|
|
|
|
[BPF_LSH >> 4] = "<<=",
|
|
|
|
[BPF_RSH >> 4] = ">>=",
|
|
|
|
[BPF_NEG >> 4] = "neg",
|
|
|
|
[BPF_MOD >> 4] = "%=",
|
|
|
|
[BPF_XOR >> 4] = "^=",
|
|
|
|
[BPF_MOV >> 4] = "=",
|
|
|
|
[BPF_ARSH >> 4] = "s>>=",
|
|
|
|
[BPF_END >> 4] = "endian",
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *const bpf_ldst_string[] = {
|
|
|
|
[BPF_W >> 3] = "u32",
|
|
|
|
[BPF_H >> 3] = "u16",
|
|
|
|
[BPF_B >> 3] = "u8",
|
|
|
|
[BPF_DW >> 3] = "u64",
|
|
|
|
};
|
|
|
|
|
2015-09-08 22:40:01 +02:00
|
|
|
static const char *const bpf_jmp_string[16] = {
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
[BPF_JA >> 4] = "jmp",
|
|
|
|
[BPF_JEQ >> 4] = "==",
|
|
|
|
[BPF_JGT >> 4] = ">",
|
|
|
|
[BPF_JGE >> 4] = ">=",
|
|
|
|
[BPF_JSET >> 4] = "&",
|
|
|
|
[BPF_JNE >> 4] = "!=",
|
|
|
|
[BPF_JSGT >> 4] = "s>",
|
|
|
|
[BPF_JSGE >> 4] = "s>=",
|
|
|
|
[BPF_CALL >> 4] = "call",
|
|
|
|
[BPF_EXIT >> 4] = "exit",
|
|
|
|
};
|
|
|
|
|
|
|
|
static void print_bpf_insn(struct bpf_insn *insn)
|
|
|
|
{
|
|
|
|
u8 class = BPF_CLASS(insn->code);
|
|
|
|
|
|
|
|
if (class == BPF_ALU || class == BPF_ALU64) {
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X)
|
|
|
|
verbose("(%02x) %sr%d %s %sr%d\n",
|
|
|
|
insn->code, class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->dst_reg,
|
|
|
|
bpf_alu_string[BPF_OP(insn->code) >> 4],
|
|
|
|
class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->src_reg);
|
|
|
|
else
|
|
|
|
verbose("(%02x) %sr%d %s %s%d\n",
|
|
|
|
insn->code, class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->dst_reg,
|
|
|
|
bpf_alu_string[BPF_OP(insn->code) >> 4],
|
|
|
|
class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->imm);
|
|
|
|
} else if (class == BPF_STX) {
|
|
|
|
if (BPF_MODE(insn->code) == BPF_MEM)
|
|
|
|
verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->dst_reg,
|
|
|
|
insn->off, insn->src_reg);
|
|
|
|
else if (BPF_MODE(insn->code) == BPF_XADD)
|
|
|
|
verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->dst_reg, insn->off,
|
|
|
|
insn->src_reg);
|
|
|
|
else
|
|
|
|
verbose("BUG_%02x\n", insn->code);
|
|
|
|
} else if (class == BPF_ST) {
|
|
|
|
if (BPF_MODE(insn->code) != BPF_MEM) {
|
|
|
|
verbose("BUG_st_%02x\n", insn->code);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->dst_reg,
|
|
|
|
insn->off, insn->imm);
|
|
|
|
} else if (class == BPF_LDX) {
|
|
|
|
if (BPF_MODE(insn->code) != BPF_MEM) {
|
|
|
|
verbose("BUG_ldx_%02x\n", insn->code);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
|
|
|
|
insn->code, insn->dst_reg,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->src_reg, insn->off);
|
|
|
|
} else if (class == BPF_LD) {
|
|
|
|
if (BPF_MODE(insn->code) == BPF_ABS) {
|
|
|
|
verbose("(%02x) r0 = *(%s *)skb[%d]\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->imm);
|
|
|
|
} else if (BPF_MODE(insn->code) == BPF_IND) {
|
|
|
|
verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->src_reg, insn->imm);
|
|
|
|
} else if (BPF_MODE(insn->code) == BPF_IMM) {
|
|
|
|
verbose("(%02x) r%d = 0x%x\n",
|
|
|
|
insn->code, insn->dst_reg, insn->imm);
|
|
|
|
} else {
|
|
|
|
verbose("BUG_ld_%02x\n", insn->code);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else if (class == BPF_JMP) {
|
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
|
|
|
|
if (opcode == BPF_CALL) {
|
|
|
|
verbose("(%02x) call %d\n", insn->code, insn->imm);
|
|
|
|
} else if (insn->code == (BPF_JMP | BPF_JA)) {
|
|
|
|
verbose("(%02x) goto pc%+d\n",
|
|
|
|
insn->code, insn->off);
|
|
|
|
} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
|
|
|
|
verbose("(%02x) exit\n", insn->code);
|
|
|
|
} else if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
verbose("(%02x) if r%d %s r%d goto pc%+d\n",
|
|
|
|
insn->code, insn->dst_reg,
|
|
|
|
bpf_jmp_string[BPF_OP(insn->code) >> 4],
|
|
|
|
insn->src_reg, insn->off);
|
|
|
|
} else {
|
|
|
|
verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
|
|
|
|
insn->code, insn->dst_reg,
|
|
|
|
bpf_jmp_string[BPF_OP(insn->code) >> 4],
|
|
|
|
insn->imm, insn->off);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_stack_elem *elem;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int insn_idx;
|
|
|
|
|
|
|
|
if (env->head == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
|
|
|
|
insn_idx = env->head->insn_idx;
|
|
|
|
if (prev_insn_idx)
|
|
|
|
*prev_insn_idx = env->head->prev_insn_idx;
|
|
|
|
elem = env->head->next;
|
|
|
|
kfree(env->head);
|
|
|
|
env->head = elem;
|
|
|
|
env->stack_size--;
|
|
|
|
return insn_idx;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
|
|
|
|
int insn_idx, int prev_insn_idx)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_stack_elem *elem;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (!elem)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
|
|
|
|
elem->insn_idx = insn_idx;
|
|
|
|
elem->prev_insn_idx = prev_insn_idx;
|
|
|
|
elem->next = env->head;
|
|
|
|
env->head = elem;
|
|
|
|
env->stack_size++;
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-05 22:33:17 +02:00
|
|
|
if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("BPF program is too complex\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
return &elem->st;
|
|
|
|
err:
|
|
|
|
/* pop all elements and return */
|
|
|
|
while (pop_stack(env, NULL) >= 0);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define CALLER_SAVED_REGS 6
|
|
|
|
static const int caller_saved[CALLER_SAVED_REGS] = {
|
|
|
|
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
|
|
|
|
};
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static void init_reg_state(struct bpf_reg_state *regs)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_REG; i++) {
|
|
|
|
regs[i].type = NOT_INIT;
|
|
|
|
regs[i].imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* frame pointer */
|
|
|
|
regs[BPF_REG_FP].type = FRAME_PTR;
|
|
|
|
|
|
|
|
/* 1st arg to a function */
|
|
|
|
regs[BPF_REG_1].type = PTR_TO_CTX;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
|
|
|
BUG_ON(regno >= MAX_BPF_REG);
|
|
|
|
regs[regno].type = UNKNOWN_VALUE;
|
|
|
|
regs[regno].imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum reg_arg_type {
|
|
|
|
SRC_OP, /* register is used as source operand */
|
|
|
|
DST_OP, /* register is used as destination operand */
|
|
|
|
DST_OP_NO_MARK /* same as above, check only, don't mark */
|
|
|
|
};
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
enum reg_arg_type t)
|
|
|
|
{
|
|
|
|
if (regno >= MAX_BPF_REG) {
|
|
|
|
verbose("R%d is invalid\n", regno);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t == SRC_OP) {
|
|
|
|
/* check whether register used as source operand can be read */
|
|
|
|
if (regs[regno].type == NOT_INIT) {
|
|
|
|
verbose("R%d !read_ok\n", regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* check whether register used as dest operand can be written to */
|
|
|
|
if (regno == BPF_REG_FP) {
|
|
|
|
verbose("frame pointer is read only\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
if (t == DST_OP)
|
|
|
|
mark_reg_unknown_value(regs, regno);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bpf_size_to_bytes(int bpf_size)
|
|
|
|
{
|
|
|
|
if (bpf_size == BPF_W)
|
|
|
|
return 4;
|
|
|
|
else if (bpf_size == BPF_H)
|
|
|
|
return 2;
|
|
|
|
else if (bpf_size == BPF_B)
|
|
|
|
return 1;
|
|
|
|
else if (bpf_size == BPF_DW)
|
|
|
|
return 8;
|
|
|
|
else
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
static bool is_spillable_regtype(enum bpf_reg_type type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case PTR_TO_MAP_VALUE:
|
|
|
|
case PTR_TO_MAP_VALUE_OR_NULL:
|
|
|
|
case PTR_TO_STACK:
|
|
|
|
case PTR_TO_CTX:
|
2016-05-06 04:49:10 +02:00
|
|
|
case PTR_TO_PACKET:
|
|
|
|
case PTR_TO_PACKET_END:
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
case FRAME_PTR:
|
|
|
|
case CONST_PTR_TO_MAP:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check_stack_read/write functions track spill/fill of registers,
|
|
|
|
* stack boundary and alignment are checked in check_mem_access()
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_stack_write(struct bpf_verifier_state *state, int off,
|
|
|
|
int size, int value_regno)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
|
|
|
int i;
|
2014-10-28 23:11:41 +01:00
|
|
|
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
|
|
|
|
* so it's aligned access and [off, off + size) are within stack limits
|
|
|
|
*/
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
|
|
|
if (value_regno >= 0 &&
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
is_spillable_regtype(state->regs[value_regno].type)) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
|
|
|
/* register containing pointer is being spilled into stack */
|
2014-10-28 23:11:41 +01:00
|
|
|
if (size != BPF_REG_SIZE) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("invalid size of register spill\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* save register state */
|
2014-10-28 23:11:41 +01:00
|
|
|
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
|
|
|
|
state->regs[value_regno];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
2014-10-28 23:11:41 +01:00
|
|
|
for (i = 0; i < BPF_REG_SIZE; i++)
|
|
|
|
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
|
|
|
|
} else {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* regular write of data into stack */
|
2014-10-28 23:11:41 +01:00
|
|
|
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
|
2016-09-21 12:43:57 +02:00
|
|
|
(struct bpf_reg_state) {};
|
2014-10-28 23:11:41 +01:00
|
|
|
|
|
|
|
for (i = 0; i < size; i++)
|
|
|
|
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int value_regno)
|
|
|
|
{
|
2014-10-28 23:11:41 +01:00
|
|
|
u8 *slot_type;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int i;
|
|
|
|
|
2014-10-28 23:11:41 +01:00
|
|
|
slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
2014-10-28 23:11:41 +01:00
|
|
|
if (slot_type[0] == STACK_SPILL) {
|
|
|
|
if (size != BPF_REG_SIZE) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("invalid size of register spill\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2014-10-28 23:11:41 +01:00
|
|
|
for (i = 1; i < BPF_REG_SIZE; i++) {
|
|
|
|
if (slot_type[i] != STACK_SPILL) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("corrupted spill memory\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value_regno >= 0)
|
|
|
|
/* restore register state from stack */
|
2014-10-28 23:11:41 +01:00
|
|
|
state->regs[value_regno] =
|
|
|
|
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < size; i++) {
|
2014-10-28 23:11:41 +01:00
|
|
|
if (slot_type[i] != STACK_MISC) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("invalid read from stack off %d+%d size %d\n",
|
|
|
|
off, i, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (value_regno >= 0)
|
|
|
|
/* have read misc data from the stack */
|
|
|
|
mark_reg_unknown_value(state->regs, value_regno);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check read/write into map element returned by bpf_map_lookup_elem() */
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int size)
|
|
|
|
{
|
|
|
|
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
|
|
|
|
|
|
|
|
if (off < 0 || off + size > map->value_size) {
|
|
|
|
verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
|
|
|
|
map->value_size, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
#define MAX_PACKET_OFF 0xffff
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
const struct bpf_call_arg_meta *meta)
|
2016-07-19 21:16:56 +02:00
|
|
|
{
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
switch (env->prog->type) {
|
|
|
|
case BPF_PROG_TYPE_SCHED_CLS:
|
|
|
|
case BPF_PROG_TYPE_SCHED_ACT:
|
2016-07-19 21:16:56 +02:00
|
|
|
case BPF_PROG_TYPE_XDP:
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
if (meta)
|
|
|
|
return meta->pkt_access;
|
|
|
|
|
|
|
|
env->seen_direct_write = true;
|
2016-07-19 21:16:56 +02:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
|
2016-05-06 04:49:10 +02:00
|
|
|
int size)
|
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs;
|
|
|
|
struct bpf_reg_state *reg = ®s[regno];
|
2016-05-06 04:49:10 +02:00
|
|
|
|
2016-05-20 03:17:13 +02:00
|
|
|
off += reg->off;
|
bpf, verifier: enforce larger zero range for pkt on overloading stack buffs
Current contract for the following two helper argument types is:
* ARG_CONST_STACK_SIZE: passed argument pair must be (ptr, >0).
* ARG_CONST_STACK_SIZE_OR_ZERO: passed argument pair can be either
(NULL, 0) or (ptr, >0).
With 6841de8b0d03 ("bpf: allow helpers access the packet directly"), we can
pass also raw packet data to helpers, so depending on the argument type
being PTR_TO_PACKET, we now either assert memory via check_packet_access()
or check_stack_boundary(). As a result, the tests in check_packet_access()
currently allow more than intended with regards to reg->imm.
Back in 969bf05eb3ce ("bpf: direct packet access"), check_packet_access()
was fine to ignore size argument since in check_mem_access() size was
bpf_size_to_bytes() derived and prior to the call to check_packet_access()
guaranteed to be larger than zero.
However, for the above two argument types, it currently means, we can have
a <= 0 size and thus breaking current guarantees for helpers. Enforce a
check for size <= 0 and bail out if so.
check_stack_boundary() doesn't have such an issue since it already tests
for access_size <= 0 and bails out, resp. access_size == 0 in case of NULL
pointer passed when allowed.
Fixes: 6841de8b0d03 ("bpf: allow helpers access the packet directly")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:12 +02:00
|
|
|
if (off < 0 || size <= 0 || off + size > reg->range) {
|
2016-05-20 03:17:13 +02:00
|
|
|
verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
|
|
|
|
off, size, regno, reg->id, reg->off, reg->range);
|
2016-05-06 04:49:10 +02:00
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check access to 'struct bpf_context' fields */
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
|
2016-06-16 03:25:38 +02:00
|
|
|
enum bpf_access_type t, enum bpf_reg_type *reg_type)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
|
|
|
if (env->prog->aux->ops->is_valid_access &&
|
2016-06-16 03:25:38 +02:00
|
|
|
env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
|
2016-04-07 03:43:28 +02:00
|
|
|
/* remember the offset of last byte accessed in ctx */
|
|
|
|
if (env->prog->aux->max_ctx_offset < off + size)
|
|
|
|
env->prog->aux->max_ctx_offset = off + size;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return 0;
|
2016-04-07 03:43:28 +02:00
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
|
|
|
verbose("invalid bpf_context access off=%d size=%d\n", off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
{
|
|
|
|
if (env->allow_ptr_leaks)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (env->cur_state.regs[regno].type) {
|
|
|
|
case UNKNOWN_VALUE:
|
|
|
|
case CONST_IMM:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_ptr_alignment(struct bpf_verifier_env *env,
|
|
|
|
struct bpf_reg_state *reg, int off, int size)
|
2016-05-06 04:49:10 +02:00
|
|
|
{
|
|
|
|
if (reg->type != PTR_TO_PACKET) {
|
|
|
|
if (off % size != 0) {
|
2016-09-21 12:43:57 +02:00
|
|
|
verbose("misaligned access off %d size %d\n",
|
|
|
|
off, size);
|
2016-05-06 04:49:10 +02:00
|
|
|
return -EACCES;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (env->prog->type) {
|
|
|
|
case BPF_PROG_TYPE_SCHED_CLS:
|
|
|
|
case BPF_PROG_TYPE_SCHED_ACT:
|
2016-07-19 21:16:47 +02:00
|
|
|
case BPF_PROG_TYPE_XDP:
|
2016-05-06 04:49:10 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
verbose("verifier is misconfigured\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
|
|
|
|
/* misaligned access to packet is ok on x86,arm,arm64 */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (reg->id && size != 1) {
|
|
|
|
verbose("Unknown packet alignment. Only byte-sized access allowed\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* skb->data is NET_IP_ALIGN-ed */
|
|
|
|
if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
|
|
|
|
verbose("misaligned packet access off %d+%d+%d size %d\n",
|
|
|
|
NET_IP_ALIGN, reg->off, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check whether memory at (regno + off) is accessible for t = (read | write)
|
|
|
|
* if t==write, value_regno is a register which value is stored into memory
|
|
|
|
* if t==read, value_regno is a register which will receive the value from memory
|
|
|
|
* if t==write && value_regno==-1, some unknown value is stored into memory
|
|
|
|
* if t==read && value_regno==-1, don't care what we read from memory
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int bpf_size, enum bpf_access_type t,
|
|
|
|
int value_regno)
|
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state *state = &env->cur_state;
|
|
|
|
struct bpf_reg_state *reg = &state->regs[regno];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int size, err = 0;
|
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
if (reg->type == PTR_TO_STACK)
|
|
|
|
off += reg->imm;
|
2015-07-23 23:24:40 +02:00
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
size = bpf_size_to_bytes(bpf_size);
|
|
|
|
if (size < 0)
|
|
|
|
return size;
|
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
err = check_ptr_alignment(env, reg, off, size);
|
|
|
|
if (err)
|
|
|
|
return err;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
if (reg->type == PTR_TO_MAP_VALUE) {
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
if (t == BPF_WRITE && value_regno >= 0 &&
|
|
|
|
is_pointer_value(env, value_regno)) {
|
|
|
|
verbose("R%d leaks addr into map\n", value_regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
err = check_map_access(env, regno, off, size);
|
|
|
|
if (!err && t == BPF_READ && value_regno >= 0)
|
|
|
|
mark_reg_unknown_value(state->regs, value_regno);
|
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
} else if (reg->type == PTR_TO_CTX) {
|
2016-06-16 03:25:38 +02:00
|
|
|
enum bpf_reg_type reg_type = UNKNOWN_VALUE;
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
if (t == BPF_WRITE && value_regno >= 0 &&
|
|
|
|
is_pointer_value(env, value_regno)) {
|
|
|
|
verbose("R%d leaks addr into ctx\n", value_regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2016-06-16 03:25:38 +02:00
|
|
|
err = check_ctx_access(env, off, size, t, ®_type);
|
2016-05-06 04:49:10 +02:00
|
|
|
if (!err && t == BPF_READ && value_regno >= 0) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
mark_reg_unknown_value(state->regs, value_regno);
|
2016-06-16 03:25:38 +02:00
|
|
|
if (env->allow_ptr_leaks)
|
2016-05-06 04:49:10 +02:00
|
|
|
/* note that reg.[id|off|range] == 0 */
|
2016-06-16 03:25:38 +02:00
|
|
|
state->regs[value_regno].type = reg_type;
|
2016-05-06 04:49:10 +02:00
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (off >= 0 || off < -MAX_BPF_STACK) {
|
|
|
|
verbose("invalid stack off=%d size=%d\n", off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
if (t == BPF_WRITE) {
|
|
|
|
if (!env->allow_ptr_leaks &&
|
|
|
|
state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
|
|
|
|
size != BPF_REG_SIZE) {
|
|
|
|
verbose("attempt to corrupt spilled pointer on stack\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
err = check_stack_write(state, off, size, value_regno);
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
} else {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
err = check_stack_read(state, off, size, value_regno);
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
}
|
2016-05-06 04:49:10 +02:00
|
|
|
} else if (state->regs[regno].type == PTR_TO_PACKET) {
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
|
2016-05-06 04:49:10 +02:00
|
|
|
verbose("cannot write into packet\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2016-07-19 21:16:56 +02:00
|
|
|
if (t == BPF_WRITE && value_regno >= 0 &&
|
|
|
|
is_pointer_value(env, value_regno)) {
|
|
|
|
verbose("R%d leaks addr into packet\n", value_regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2016-05-06 04:49:10 +02:00
|
|
|
err = check_packet_access(env, regno, off, size);
|
|
|
|
if (!err && t == BPF_READ && value_regno >= 0)
|
|
|
|
mark_reg_unknown_value(state->regs, value_regno);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else {
|
|
|
|
verbose("R%d invalid mem access '%s'\n",
|
2016-05-06 04:49:09 +02:00
|
|
|
regno, reg_type_str[reg->type]);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return -EACCES;
|
|
|
|
}
|
2016-05-06 04:49:10 +02:00
|
|
|
|
|
|
|
if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks &&
|
|
|
|
state->regs[value_regno].type == UNKNOWN_VALUE) {
|
|
|
|
/* 1 or 2 byte load zero-extends, determine the number of
|
|
|
|
* zero upper bits. Not doing it fo 4 byte load, since
|
|
|
|
* such values cannot be added to ptr_to_packet anyway.
|
|
|
|
*/
|
|
|
|
state->regs[value_regno].imm = 64 - size * 8;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int err;
|
|
|
|
|
|
|
|
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
|
|
|
|
insn->imm != 0) {
|
|
|
|
verbose("BPF_XADD uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check whether atomic_add can read the memory */
|
|
|
|
err = check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_READ, -1);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check whether atomic_add can write into the same memory */
|
|
|
|
return check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_WRITE, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* when register 'regno' is passed into function that will read 'access_size'
|
|
|
|
* bytes from that pointer, make sure that it's within stack boundary
|
|
|
|
* and all elements of stack are initialized
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
int access_size, bool zero_size_allowed,
|
|
|
|
struct bpf_call_arg_meta *meta)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state *state = &env->cur_state;
|
|
|
|
struct bpf_reg_state *regs = state->regs;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int off, i;
|
|
|
|
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-19 23:05:22 +01:00
|
|
|
if (regs[regno].type != PTR_TO_STACK) {
|
|
|
|
if (zero_size_allowed && access_size == 0 &&
|
|
|
|
regs[regno].type == CONST_IMM &&
|
|
|
|
regs[regno].imm == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
verbose("R%d type=%s expected=%s\n", regno,
|
|
|
|
reg_type_str[regs[regno].type],
|
|
|
|
reg_type_str[PTR_TO_STACK]);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return -EACCES;
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-19 23:05:22 +01:00
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
|
|
|
off = regs[regno].imm;
|
|
|
|
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
|
|
|
|
access_size <= 0) {
|
|
|
|
verbose("invalid stack type R%d off=%d access_size=%d\n",
|
|
|
|
regno, off, access_size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
if (meta && meta->raw_mode) {
|
|
|
|
meta->access_size = access_size;
|
|
|
|
meta->regno = regno;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
for (i = 0; i < access_size; i++) {
|
2014-10-28 23:11:41 +01:00
|
|
|
if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("invalid indirect read from stack off %d+%d size %d\n",
|
|
|
|
off, i, access_size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
|
2016-04-13 00:10:50 +02:00
|
|
|
enum bpf_arg_type arg_type,
|
|
|
|
struct bpf_call_arg_meta *meta)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno];
|
2016-08-12 03:17:16 +02:00
|
|
|
enum bpf_reg_type expected_type, type = reg->type;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int err = 0;
|
|
|
|
|
2015-03-12 17:21:42 +01:00
|
|
|
if (arg_type == ARG_DONTCARE)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return 0;
|
|
|
|
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type == NOT_INIT) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("R%d !read_ok\n", regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
if (arg_type == ARG_ANYTHING) {
|
|
|
|
if (is_pointer_value(env, regno)) {
|
|
|
|
verbose("R%d leaks addr into helper function\n", regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2015-03-12 17:21:42 +01:00
|
|
|
return 0;
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
}
|
2015-03-12 17:21:42 +01:00
|
|
|
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
|
|
|
|
verbose("helper access to the packet is not allowed\n");
|
2016-08-12 03:17:16 +02:00
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-19 23:05:22 +01:00
|
|
|
if (arg_type == ARG_PTR_TO_MAP_KEY ||
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
arg_type == ARG_PTR_TO_MAP_VALUE) {
|
|
|
|
expected_type = PTR_TO_STACK;
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type != PTR_TO_PACKET && type != expected_type)
|
|
|
|
goto err_type;
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-19 23:05:22 +01:00
|
|
|
} else if (arg_type == ARG_CONST_STACK_SIZE ||
|
|
|
|
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
expected_type = CONST_IMM;
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type != expected_type)
|
|
|
|
goto err_type;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else if (arg_type == ARG_CONST_MAP_PTR) {
|
|
|
|
expected_type = CONST_PTR_TO_MAP;
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type != expected_type)
|
|
|
|
goto err_type;
|
2015-03-27 03:53:57 +01:00
|
|
|
} else if (arg_type == ARG_PTR_TO_CTX) {
|
|
|
|
expected_type = PTR_TO_CTX;
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type != expected_type)
|
|
|
|
goto err_type;
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
} else if (arg_type == ARG_PTR_TO_STACK ||
|
|
|
|
arg_type == ARG_PTR_TO_RAW_STACK) {
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-19 23:05:22 +01:00
|
|
|
expected_type = PTR_TO_STACK;
|
|
|
|
/* One exception here. In case function allows for NULL to be
|
|
|
|
* passed in as argument, it's a CONST_IMM type. Final test
|
|
|
|
* happens during stack boundary checking.
|
|
|
|
*/
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type == CONST_IMM && reg->imm == 0)
|
|
|
|
/* final test in check_stack_boundary() */;
|
|
|
|
else if (type != PTR_TO_PACKET && type != expected_type)
|
|
|
|
goto err_type;
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else {
|
|
|
|
verbose("unsupported arg_type %d\n", arg_type);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (arg_type == ARG_CONST_MAP_PTR) {
|
|
|
|
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
|
2016-04-13 00:10:50 +02:00
|
|
|
meta->map_ptr = reg->map_ptr;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
|
|
|
|
/* bpf_map_xxx(..., map_ptr, ..., key) call:
|
|
|
|
* check that [key, key + map->key_size) are within
|
|
|
|
* stack limits and initialized
|
|
|
|
*/
|
2016-04-13 00:10:50 +02:00
|
|
|
if (!meta->map_ptr) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* in function declaration map_ptr must come before
|
|
|
|
* map_key, so that it's verified and known before
|
|
|
|
* we have to check map_key here. Otherwise it means
|
|
|
|
* that kernel subsystem misconfigured verifier
|
|
|
|
*/
|
|
|
|
verbose("invalid map_ptr to access map->key\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type == PTR_TO_PACKET)
|
|
|
|
err = check_packet_access(env, regno, 0,
|
|
|
|
meta->map_ptr->key_size);
|
|
|
|
else
|
|
|
|
err = check_stack_boundary(env, regno,
|
|
|
|
meta->map_ptr->key_size,
|
|
|
|
false, NULL);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
|
|
|
|
/* bpf_map_xxx(..., map_ptr, ..., value) call:
|
|
|
|
* check [value, value + map->value_size) validity
|
|
|
|
*/
|
2016-04-13 00:10:50 +02:00
|
|
|
if (!meta->map_ptr) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* kernel subsystem misconfigured verifier */
|
|
|
|
verbose("invalid map_ptr to access map->value\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2016-08-12 03:17:16 +02:00
|
|
|
if (type == PTR_TO_PACKET)
|
|
|
|
err = check_packet_access(env, regno, 0,
|
|
|
|
meta->map_ptr->value_size);
|
|
|
|
else
|
|
|
|
err = check_stack_boundary(env, regno,
|
|
|
|
meta->map_ptr->value_size,
|
|
|
|
false, NULL);
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-19 23:05:22 +01:00
|
|
|
} else if (arg_type == ARG_CONST_STACK_SIZE ||
|
|
|
|
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
|
|
|
|
bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
|
|
|
/* bpf_xxx(..., buf, len) call will access 'len' bytes
|
|
|
|
* from stack pointer 'buf'. Check it
|
|
|
|
* note: regno == len, regno - 1 == buf
|
|
|
|
*/
|
|
|
|
if (regno == 0) {
|
|
|
|
/* kernel subsystem misconfigured verifier */
|
|
|
|
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2016-08-12 03:17:16 +02:00
|
|
|
if (regs[regno - 1].type == PTR_TO_PACKET)
|
|
|
|
err = check_packet_access(env, regno - 1, 0, reg->imm);
|
|
|
|
else
|
|
|
|
err = check_stack_boundary(env, regno - 1, reg->imm,
|
|
|
|
zero_size_allowed, meta);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
2016-08-12 03:17:16 +02:00
|
|
|
err_type:
|
|
|
|
verbose("R%d type=%s expected=%s\n", regno,
|
|
|
|
reg_type_str[type], reg_type_str[expected_type]);
|
|
|
|
return -EACCES;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
|
|
|
|
2015-08-06 09:02:35 +02:00
|
|
|
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
|
|
|
|
{
|
|
|
|
if (!map)
|
|
|
|
return 0;
|
|
|
|
|
2016-04-28 03:56:21 +02:00
|
|
|
/* We need a two way check, first is from map perspective ... */
|
|
|
|
switch (map->map_type) {
|
|
|
|
case BPF_MAP_TYPE_PROG_ARRAY:
|
|
|
|
if (func_id != BPF_FUNC_tail_call)
|
|
|
|
goto error;
|
|
|
|
break;
|
|
|
|
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
|
|
|
|
if (func_id != BPF_FUNC_perf_event_read &&
|
|
|
|
func_id != BPF_FUNC_perf_event_output)
|
|
|
|
goto error;
|
|
|
|
break;
|
|
|
|
case BPF_MAP_TYPE_STACK_TRACE:
|
|
|
|
if (func_id != BPF_FUNC_get_stackid)
|
|
|
|
goto error;
|
|
|
|
break;
|
2016-06-30 19:28:43 +02:00
|
|
|
case BPF_MAP_TYPE_CGROUP_ARRAY:
|
2016-08-18 07:17:32 +02:00
|
|
|
if (func_id != BPF_FUNC_skb_under_cgroup &&
|
2016-08-12 17:56:52 +02:00
|
|
|
func_id != BPF_FUNC_current_task_under_cgroup)
|
2016-06-30 19:28:44 +02:00
|
|
|
goto error;
|
|
|
|
break;
|
2016-04-28 03:56:21 +02:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ... and second from the function itself. */
|
|
|
|
switch (func_id) {
|
|
|
|
case BPF_FUNC_tail_call:
|
|
|
|
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
|
|
|
|
goto error;
|
|
|
|
break;
|
|
|
|
case BPF_FUNC_perf_event_read:
|
|
|
|
case BPF_FUNC_perf_event_output:
|
|
|
|
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
|
|
|
|
goto error;
|
|
|
|
break;
|
|
|
|
case BPF_FUNC_get_stackid:
|
|
|
|
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
|
|
|
|
goto error;
|
|
|
|
break;
|
2016-08-12 17:56:52 +02:00
|
|
|
case BPF_FUNC_current_task_under_cgroup:
|
2016-08-12 22:17:17 +02:00
|
|
|
case BPF_FUNC_skb_under_cgroup:
|
2016-06-30 19:28:44 +02:00
|
|
|
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
|
|
|
|
goto error;
|
|
|
|
break;
|
2016-04-28 03:56:21 +02:00
|
|
|
default:
|
|
|
|
break;
|
2015-08-06 09:02:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2016-04-28 03:56:21 +02:00
|
|
|
error:
|
|
|
|
verbose("cannot pass map_type %d into func %d\n",
|
|
|
|
map->map_type, func_id);
|
|
|
|
return -EINVAL;
|
2015-08-06 09:02:35 +02:00
|
|
|
}
|
|
|
|
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
static int check_raw_mode(const struct bpf_func_proto *fn)
|
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
if (fn->arg1_type == ARG_PTR_TO_RAW_STACK)
|
|
|
|
count++;
|
|
|
|
if (fn->arg2_type == ARG_PTR_TO_RAW_STACK)
|
|
|
|
count++;
|
|
|
|
if (fn->arg3_type == ARG_PTR_TO_RAW_STACK)
|
|
|
|
count++;
|
|
|
|
if (fn->arg4_type == ARG_PTR_TO_RAW_STACK)
|
|
|
|
count++;
|
|
|
|
if (fn->arg5_type == ARG_PTR_TO_RAW_STACK)
|
|
|
|
count++;
|
|
|
|
|
|
|
|
return count > 1 ? -EINVAL : 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
|
2016-05-06 04:49:10 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state *state = &env->cur_state;
|
|
|
|
struct bpf_reg_state *regs = state->regs, *reg;
|
2016-05-06 04:49:10 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_REG; i++)
|
|
|
|
if (regs[i].type == PTR_TO_PACKET ||
|
|
|
|
regs[i].type == PTR_TO_PACKET_END)
|
|
|
|
mark_reg_unknown_value(regs, i);
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
|
|
|
|
if (state->stack_slot_type[i] != STACK_SPILL)
|
|
|
|
continue;
|
|
|
|
reg = &state->spilled_regs[i / BPF_REG_SIZE];
|
|
|
|
if (reg->type != PTR_TO_PACKET &&
|
|
|
|
reg->type != PTR_TO_PACKET_END)
|
|
|
|
continue;
|
|
|
|
reg->type = UNKNOWN_VALUE;
|
|
|
|
reg->imm = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_call(struct bpf_verifier_env *env, int func_id)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state *state = &env->cur_state;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
const struct bpf_func_proto *fn = NULL;
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = state->regs;
|
|
|
|
struct bpf_reg_state *reg;
|
2016-04-13 00:10:50 +02:00
|
|
|
struct bpf_call_arg_meta meta;
|
2016-05-06 04:49:10 +02:00
|
|
|
bool changes_data;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int i, err;
|
|
|
|
|
|
|
|
/* find function prototype */
|
|
|
|
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
|
|
|
|
verbose("invalid func %d\n", func_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (env->prog->aux->ops->get_func_proto)
|
|
|
|
fn = env->prog->aux->ops->get_func_proto(func_id);
|
|
|
|
|
|
|
|
if (!fn) {
|
|
|
|
verbose("unknown func %d\n", func_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* eBPF programs must be GPL compatible to use GPL-ed functions */
|
2015-03-01 12:31:47 +01:00
|
|
|
if (!env->prog->gpl_compatible && fn->gpl_only) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("cannot call GPL only function from proprietary program\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
changes_data = bpf_helper_changes_skb_data(fn->func);
|
|
|
|
|
2016-04-13 00:10:50 +02:00
|
|
|
memset(&meta, 0, sizeof(meta));
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
meta.pkt_access = fn->pkt_access;
|
2016-04-13 00:10:50 +02:00
|
|
|
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
/* We only support one arg being in raw mode at the moment, which
|
|
|
|
* is sufficient for the helper functions we have right now.
|
|
|
|
*/
|
|
|
|
err = check_raw_mode(fn);
|
|
|
|
if (err) {
|
|
|
|
verbose("kernel subsystem misconfigured func %d\n", func_id);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check args */
|
2016-04-13 00:10:50 +02:00
|
|
|
err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
2016-04-13 00:10:50 +02:00
|
|
|
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
2016-04-13 00:10:50 +02:00
|
|
|
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
2016-04-13 00:10:50 +02:00
|
|
|
err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
2016-04-13 00:10:50 +02:00
|
|
|
err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bpf, verifier: add ARG_PTR_TO_RAW_STACK type
When passing buffers from eBPF stack space into a helper function, we have
ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure
that such buffers are initialized, within boundaries, etc.
However, the downside with this is that we have a couple of helper functions
such as bpf_skb_load_bytes() that fill out the passed buffer in the expected
success case anyway, so zero initializing them prior to the helper call is
unneeded/wasted instructions in the eBPF program that can be avoided.
Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK.
The idea is to skip the STACK_MISC check in check_stack_boundary() and color
the related stack slots as STACK_MISC after we checked all call arguments.
Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of
the helper function will fill the provided buffer area, so that we cannot leak
any uninitialized stack memory. This f.e. means that error paths need to
memset() the buffers, but the expected fast-path doesn't have to do this
anymore.
Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK
argument, we can keep it simple and don't need to check for multiple areas.
Should in future such a use-case really appear, we have check_raw_mode() that
will make sure we implement support for it first.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-13 00:10:51 +02:00
|
|
|
/* Mark slots with STACK_MISC in case of raw mode, stack offset
|
|
|
|
* is inferred from register state.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < meta.access_size; i++) {
|
|
|
|
err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* reset caller saved regs */
|
|
|
|
for (i = 0; i < CALLER_SAVED_REGS; i++) {
|
|
|
|
reg = regs + caller_saved[i];
|
|
|
|
reg->type = NOT_INIT;
|
|
|
|
reg->imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* update return register */
|
|
|
|
if (fn->ret_type == RET_INTEGER) {
|
|
|
|
regs[BPF_REG_0].type = UNKNOWN_VALUE;
|
|
|
|
} else if (fn->ret_type == RET_VOID) {
|
|
|
|
regs[BPF_REG_0].type = NOT_INIT;
|
|
|
|
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
|
|
|
|
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
|
|
|
|
/* remember map_ptr, so that check_map_access()
|
|
|
|
* can check 'value_size' boundary of memory access
|
|
|
|
* to map element returned from bpf_map_lookup_elem()
|
|
|
|
*/
|
2016-04-13 00:10:50 +02:00
|
|
|
if (meta.map_ptr == NULL) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("kernel subsystem misconfigured verifier\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2016-04-13 00:10:50 +02:00
|
|
|
regs[BPF_REG_0].map_ptr = meta.map_ptr;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else {
|
|
|
|
verbose("unknown return type %d of func %d\n",
|
|
|
|
fn->ret_type, func_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
bpf: allow bpf programs to tail-call other bpf programs
introduce bpf_tail_call(ctx, &jmp_table, index) helper function
which can be used from BPF programs like:
int bpf_prog(struct pt_regs *ctx)
{
...
bpf_tail_call(ctx, &jmp_table, index);
...
}
that is roughly equivalent to:
int bpf_prog(struct pt_regs *ctx)
{
...
if (jmp_table[index])
return (*jmp_table[index])(ctx);
...
}
The important detail that it's not a normal call, but a tail call.
The kernel stack is precious, so this helper reuses the current
stack frame and jumps into another BPF program without adding
extra call frame.
It's trivially done in interpreter and a bit trickier in JITs.
In case of x64 JIT the bigger part of generated assembler prologue
is common for all programs, so it is simply skipped while jumping.
Other JITs can do similar prologue-skipping optimization or
do stack unwind before jumping into the next program.
bpf_tail_call() arguments:
ctx - context pointer
jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
index - index in the jump table
Since all BPF programs are idenitified by file descriptor, user space
need to populate the jmp_table with FDs of other BPF programs.
If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere
and program execution continues as normal.
New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can
populate this jmp_table array with FDs of other bpf programs.
Programs can share the same jmp_table array or use multiple jmp_tables.
The chain of tail calls can form unpredictable dynamic loops therefore
tail_call_cnt is used to limit the number of calls and currently is set to 32.
Use cases:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
==========
- simplify complex programs by splitting them into a sequence of small programs
- dispatch routine
For tracing and future seccomp the program may be triggered on all system
calls, but processing of syscall arguments will be different. It's more
efficient to implement them as:
int syscall_entry(struct seccomp_data *ctx)
{
bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */);
... default: process unknown syscall ...
}
int sys_write_event(struct seccomp_data *ctx) {...}
int sys_read_event(struct seccomp_data *ctx) {...}
syscall_jmp_table[__NR_write] = sys_write_event;
syscall_jmp_table[__NR_read] = sys_read_event;
For networking the program may call into different parsers depending on
packet format, like:
int packet_parser(struct __sk_buff *skb)
{
... parse L2, L3 here ...
__u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol));
bpf_tail_call(skb, &ipproto_jmp_table, ipproto);
... default: process unknown protocol ...
}
int parse_tcp(struct __sk_buff *skb) {...}
int parse_udp(struct __sk_buff *skb) {...}
ipproto_jmp_table[IPPROTO_TCP] = parse_tcp;
ipproto_jmp_table[IPPROTO_UDP] = parse_udp;
- for TC use case, bpf_tail_call() allows to implement reclassify-like logic
- bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table
are atomic, so user space can build chains of BPF programs on the fly
Implementation details:
=======================
- high performance of bpf_tail_call() is the goal.
It could have been implemented without JIT changes as a wrapper on top of
BPF_PROG_RUN() macro, but with two downsides:
. all programs would have to pay performance penalty for this feature and
tail call itself would be slower, since mandatory stack unwind, return,
stack allocate would be done for every tailcall.
. tailcall would be limited to programs running preempt_disabled, since
generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would
need to be either global per_cpu variable accessed by helper and by wrapper
or global variable protected by locks.
In this implementation x64 JIT bypasses stack unwind and jumps into the
callee program after prologue.
- bpf_prog_array_compatible() ensures that prog_type of callee and caller
are the same and JITed/non-JITed flag is the same, since calling JITed
program from non-JITed is invalid, since stack frames are different.
Similarly calling kprobe type program from socket type program is invalid.
- jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map'
abstraction, its user space API and all of verifier logic.
It's in the existing arraymap.c file, since several functions are
shared with regular array map.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-20 01:59:03 +02:00
|
|
|
|
2016-04-13 00:10:50 +02:00
|
|
|
err = check_map_func_compatibility(meta.map_ptr, func_id);
|
2015-08-06 09:02:35 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
bpf: allow bpf programs to tail-call other bpf programs
introduce bpf_tail_call(ctx, &jmp_table, index) helper function
which can be used from BPF programs like:
int bpf_prog(struct pt_regs *ctx)
{
...
bpf_tail_call(ctx, &jmp_table, index);
...
}
that is roughly equivalent to:
int bpf_prog(struct pt_regs *ctx)
{
...
if (jmp_table[index])
return (*jmp_table[index])(ctx);
...
}
The important detail that it's not a normal call, but a tail call.
The kernel stack is precious, so this helper reuses the current
stack frame and jumps into another BPF program without adding
extra call frame.
It's trivially done in interpreter and a bit trickier in JITs.
In case of x64 JIT the bigger part of generated assembler prologue
is common for all programs, so it is simply skipped while jumping.
Other JITs can do similar prologue-skipping optimization or
do stack unwind before jumping into the next program.
bpf_tail_call() arguments:
ctx - context pointer
jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
index - index in the jump table
Since all BPF programs are idenitified by file descriptor, user space
need to populate the jmp_table with FDs of other BPF programs.
If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere
and program execution continues as normal.
New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can
populate this jmp_table array with FDs of other bpf programs.
Programs can share the same jmp_table array or use multiple jmp_tables.
The chain of tail calls can form unpredictable dynamic loops therefore
tail_call_cnt is used to limit the number of calls and currently is set to 32.
Use cases:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
==========
- simplify complex programs by splitting them into a sequence of small programs
- dispatch routine
For tracing and future seccomp the program may be triggered on all system
calls, but processing of syscall arguments will be different. It's more
efficient to implement them as:
int syscall_entry(struct seccomp_data *ctx)
{
bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */);
... default: process unknown syscall ...
}
int sys_write_event(struct seccomp_data *ctx) {...}
int sys_read_event(struct seccomp_data *ctx) {...}
syscall_jmp_table[__NR_write] = sys_write_event;
syscall_jmp_table[__NR_read] = sys_read_event;
For networking the program may call into different parsers depending on
packet format, like:
int packet_parser(struct __sk_buff *skb)
{
... parse L2, L3 here ...
__u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol));
bpf_tail_call(skb, &ipproto_jmp_table, ipproto);
... default: process unknown protocol ...
}
int parse_tcp(struct __sk_buff *skb) {...}
int parse_udp(struct __sk_buff *skb) {...}
ipproto_jmp_table[IPPROTO_TCP] = parse_tcp;
ipproto_jmp_table[IPPROTO_UDP] = parse_udp;
- for TC use case, bpf_tail_call() allows to implement reclassify-like logic
- bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table
are atomic, so user space can build chains of BPF programs on the fly
Implementation details:
=======================
- high performance of bpf_tail_call() is the goal.
It could have been implemented without JIT changes as a wrapper on top of
BPF_PROG_RUN() macro, but with two downsides:
. all programs would have to pay performance penalty for this feature and
tail call itself would be slower, since mandatory stack unwind, return,
stack allocate would be done for every tailcall.
. tailcall would be limited to programs running preempt_disabled, since
generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would
need to be either global per_cpu variable accessed by helper and by wrapper
or global variable protected by locks.
In this implementation x64 JIT bypasses stack unwind and jumps into the
callee program after prologue.
- bpf_prog_array_compatible() ensures that prog_type of callee and caller
are the same and JITed/non-JITed flag is the same, since calling JITed
program from non-JITed is invalid, since stack frames are different.
Similarly calling kprobe type program from socket type program is invalid.
- jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map'
abstraction, its user space API and all of verifier logic.
It's in the existing arraymap.c file, since several functions are
shared with regular array map.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-20 01:59:03 +02:00
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
if (changes_data)
|
|
|
|
clear_all_pkt_pointers(env);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_packet_ptr_add(struct bpf_verifier_env *env,
|
|
|
|
struct bpf_insn *insn)
|
2016-05-06 04:49:10 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs;
|
|
|
|
struct bpf_reg_state *dst_reg = ®s[insn->dst_reg];
|
|
|
|
struct bpf_reg_state *src_reg = ®s[insn->src_reg];
|
|
|
|
struct bpf_reg_state tmp_reg;
|
2016-05-06 04:49:10 +02:00
|
|
|
s32 imm;
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_K) {
|
|
|
|
/* pkt_ptr += imm */
|
|
|
|
imm = insn->imm;
|
|
|
|
|
|
|
|
add_imm:
|
|
|
|
if (imm <= 0) {
|
|
|
|
verbose("addition of negative constant to packet pointer is not allowed\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
if (imm >= MAX_PACKET_OFF ||
|
|
|
|
imm + dst_reg->off >= MAX_PACKET_OFF) {
|
|
|
|
verbose("constant %d is too large to add to packet pointer\n",
|
|
|
|
imm);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
/* a constant was added to pkt_ptr.
|
|
|
|
* Remember it while keeping the same 'id'
|
|
|
|
*/
|
|
|
|
dst_reg->off += imm;
|
|
|
|
} else {
|
2016-05-20 03:17:14 +02:00
|
|
|
if (src_reg->type == PTR_TO_PACKET) {
|
|
|
|
/* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
|
|
|
|
tmp_reg = *dst_reg; /* save r7 state */
|
|
|
|
*dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */
|
|
|
|
src_reg = &tmp_reg; /* pretend it's src_reg state */
|
|
|
|
/* if the checks below reject it, the copy won't matter,
|
|
|
|
* since we're rejecting the whole program. If all ok,
|
|
|
|
* then imm22 state will be added to r7
|
|
|
|
* and r7 will be pkt(id=0,off=22,r=62) while
|
|
|
|
* r6 will stay as pkt(id=0,off=0,r=62)
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
if (src_reg->type == CONST_IMM) {
|
|
|
|
/* pkt_ptr += reg where reg is known constant */
|
|
|
|
imm = src_reg->imm;
|
|
|
|
goto add_imm;
|
|
|
|
}
|
|
|
|
/* disallow pkt_ptr += reg
|
|
|
|
* if reg is not uknown_value with guaranteed zero upper bits
|
|
|
|
* otherwise pkt_ptr may overflow and addition will become
|
|
|
|
* subtraction which is not allowed
|
|
|
|
*/
|
|
|
|
if (src_reg->type != UNKNOWN_VALUE) {
|
|
|
|
verbose("cannot add '%s' to ptr_to_packet\n",
|
|
|
|
reg_type_str[src_reg->type]);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
if (src_reg->imm < 48) {
|
|
|
|
verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n",
|
|
|
|
src_reg->imm);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
/* dst_reg stays as pkt_ptr type and since some positive
|
|
|
|
* integer value was added to the pointer, increment its 'id'
|
|
|
|
*/
|
bpf: fix method of PTR_TO_PACKET reg id generation
Using per-register incrementing ID can lead to
find_good_pkt_pointers() confusing registers which
have completely different values. Consider example:
0: (bf) r6 = r1
1: (61) r8 = *(u32 *)(r6 +76)
2: (61) r0 = *(u32 *)(r6 +80)
3: (bf) r7 = r8
4: (07) r8 += 32
5: (2d) if r8 > r0 goto pc+9
R0=pkt_end R1=ctx R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=0,off=32,r=32) R10=fp
6: (bf) r8 = r7
7: (bf) r9 = r7
8: (71) r1 = *(u8 *)(r7 +0)
9: (0f) r8 += r1
10: (71) r1 = *(u8 *)(r7 +1)
11: (0f) r9 += r1
12: (07) r8 += 32
13: (2d) if r8 > r0 goto pc+1
R0=pkt_end R1=inv56 R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=1,off=32,r=32) R9=pkt(id=1,off=0,r=32) R10=fp
14: (71) r1 = *(u8 *)(r9 +16)
15: (b7) r7 = 0
16: (bf) r0 = r7
17: (95) exit
We need to get a UNKNOWN_VALUE with imm to force id
generation so lines 0-5 make r7 a valid packet pointer.
We then read two different bytes from the packet and
add them to copies of the constructed packet pointer.
r8 (line 9) and r9 (line 11) will get the same id of 1,
independently. When either of them is validated (line
13) - find_good_pkt_pointers() will also mark the other
as safe. This leads to access on line 14 being mistakenly
considered safe.
Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-02 17:12:14 +02:00
|
|
|
dst_reg->id = ++env->id_gen;
|
2016-05-06 04:49:10 +02:00
|
|
|
|
|
|
|
/* something was added to pkt_ptr, set range and off to zero */
|
|
|
|
dst_reg->off = 0;
|
|
|
|
dst_reg->range = 0;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
2016-05-06 04:49:10 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs;
|
|
|
|
struct bpf_reg_state *dst_reg = ®s[insn->dst_reg];
|
2016-05-06 04:49:10 +02:00
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
s64 imm_log2;
|
|
|
|
|
|
|
|
/* for type == UNKNOWN_VALUE:
|
|
|
|
* imm > 0 -> number of zero upper bits
|
|
|
|
* imm == 0 -> don't track which is the same as all bits can be non-zero
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *src_reg = ®s[insn->src_reg];
|
2016-05-06 04:49:10 +02:00
|
|
|
|
|
|
|
if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
|
|
|
|
dst_reg->imm && opcode == BPF_ADD) {
|
|
|
|
/* dreg += sreg
|
|
|
|
* where both have zero upper bits. Adding them
|
|
|
|
* can only result making one more bit non-zero
|
|
|
|
* in the larger value.
|
|
|
|
* Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
|
|
|
|
* 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
|
|
|
|
*/
|
|
|
|
dst_reg->imm = min(dst_reg->imm, src_reg->imm);
|
|
|
|
dst_reg->imm--;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (src_reg->type == CONST_IMM && src_reg->imm > 0 &&
|
|
|
|
dst_reg->imm && opcode == BPF_ADD) {
|
|
|
|
/* dreg += sreg
|
|
|
|
* where dreg has zero upper bits and sreg is const.
|
|
|
|
* Adding them can only result making one more bit
|
|
|
|
* non-zero in the larger value.
|
|
|
|
*/
|
|
|
|
imm_log2 = __ilog2_u64((long long)src_reg->imm);
|
|
|
|
dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
|
|
|
|
dst_reg->imm--;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/* all other cases non supported yet, just mark dst_reg */
|
|
|
|
dst_reg->imm = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* sign extend 32-bit imm into 64-bit to make sure that
|
|
|
|
* negative values occupy bit 63. Note ilog2() would have
|
|
|
|
* been incorrect, since sizeof(insn->imm) == 4
|
|
|
|
*/
|
|
|
|
imm_log2 = __ilog2_u64((long long)insn->imm);
|
|
|
|
|
|
|
|
if (dst_reg->imm && opcode == BPF_LSH) {
|
|
|
|
/* reg <<= imm
|
|
|
|
* if reg was a result of 2 byte load, then its imm == 48
|
|
|
|
* which means that upper 48 bits are zero and shifting this reg
|
|
|
|
* left by 4 would mean that upper 44 bits are still zero
|
|
|
|
*/
|
|
|
|
dst_reg->imm -= insn->imm;
|
|
|
|
} else if (dst_reg->imm && opcode == BPF_MUL) {
|
|
|
|
/* reg *= imm
|
|
|
|
* if multiplying by 14 subtract 4
|
|
|
|
* This is conservative calculation of upper zero bits.
|
|
|
|
* It's not trying to special case insn->imm == 1 or 0 cases
|
|
|
|
*/
|
|
|
|
dst_reg->imm -= imm_log2 + 1;
|
|
|
|
} else if (opcode == BPF_AND) {
|
|
|
|
/* reg &= imm */
|
|
|
|
dst_reg->imm = 63 - imm_log2;
|
|
|
|
} else if (dst_reg->imm && opcode == BPF_ADD) {
|
|
|
|
/* reg += imm */
|
|
|
|
dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
|
|
|
|
dst_reg->imm--;
|
|
|
|
} else if (opcode == BPF_RSH) {
|
|
|
|
/* reg >>= imm
|
|
|
|
* which means that after right shift, upper bits will be zero
|
|
|
|
* note that verifier already checked that
|
|
|
|
* 0 <= imm < 64 for shift insn
|
|
|
|
*/
|
|
|
|
dst_reg->imm += insn->imm;
|
|
|
|
if (unlikely(dst_reg->imm > 64))
|
|
|
|
/* some dumb code did:
|
|
|
|
* r2 = *(u32 *)mem;
|
|
|
|
* r2 >>= 32;
|
|
|
|
* and all bits are zero now */
|
|
|
|
dst_reg->imm = 64;
|
|
|
|
} else {
|
|
|
|
/* all other alu ops, means that we don't know what will
|
|
|
|
* happen to the value, mark it with unknown number of zero bits
|
|
|
|
*/
|
|
|
|
dst_reg->imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dst_reg->imm < 0) {
|
|
|
|
/* all 64 bits of the register can contain non-zero bits
|
|
|
|
* and such value cannot be added to ptr_to_packet, since it
|
|
|
|
* may overflow, mark it as unknown to avoid further eval
|
|
|
|
*/
|
|
|
|
dst_reg->imm = 0;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
|
|
|
|
struct bpf_insn *insn)
|
2016-05-06 04:49:10 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs;
|
|
|
|
struct bpf_reg_state *dst_reg = ®s[insn->dst_reg];
|
|
|
|
struct bpf_reg_state *src_reg = ®s[insn->src_reg];
|
2016-05-06 04:49:10 +02:00
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
|
|
|
|
/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
|
|
|
|
* Don't care about overflow or negative values, just add them
|
|
|
|
*/
|
|
|
|
if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K)
|
|
|
|
dst_reg->imm += insn->imm;
|
|
|
|
else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
|
|
|
|
src_reg->type == CONST_IMM)
|
|
|
|
dst_reg->imm += src_reg->imm;
|
|
|
|
else
|
|
|
|
mark_reg_unknown_value(regs, insn->dst_reg);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check validity of 32-bit and 64-bit arithmetic operations */
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (opcode == BPF_END || opcode == BPF_NEG) {
|
|
|
|
if (opcode == BPF_NEG) {
|
|
|
|
if (BPF_SRC(insn->code) != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->off != 0 || insn->imm != 0) {
|
|
|
|
verbose("BPF_NEG uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
|
|
|
|
(insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
|
|
|
|
verbose("BPF_END uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
if (is_pointer_value(env, insn->dst_reg)) {
|
|
|
|
verbose("R%d pointer arithmetic prohibited\n",
|
|
|
|
insn->dst_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check dest operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (opcode == BPF_MOV) {
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (insn->imm != 0 || insn->off != 0) {
|
|
|
|
verbose("BPF_MOV uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
|
|
|
|
verbose("BPF_MOV uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check dest operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (BPF_CLASS(insn->code) == BPF_ALU64) {
|
|
|
|
/* case: R1 = R2
|
|
|
|
* copy register state to dest reg
|
|
|
|
*/
|
|
|
|
regs[insn->dst_reg] = regs[insn->src_reg];
|
|
|
|
} else {
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
if (is_pointer_value(env, insn->src_reg)) {
|
|
|
|
verbose("R%d partial copy of pointer\n",
|
|
|
|
insn->src_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
regs[insn->dst_reg].type = UNKNOWN_VALUE;
|
|
|
|
regs[insn->dst_reg].map_ptr = NULL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* case: R = imm
|
|
|
|
* remember the value we stored into this reg
|
|
|
|
*/
|
|
|
|
regs[insn->dst_reg].type = CONST_IMM;
|
|
|
|
regs[insn->dst_reg].imm = insn->imm;
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (opcode > BPF_END) {
|
|
|
|
verbose("invalid BPF_ALU opcode %x\n", opcode);
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
} else { /* all other ALU ops: and, sub, xor, add, ... */
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (insn->imm != 0 || insn->off != 0) {
|
|
|
|
verbose("BPF_ALU uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
|
|
|
|
verbose("BPF_ALU uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
|
|
|
|
BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
|
|
|
|
verbose("div by zero\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-01-12 20:17:08 +01:00
|
|
|
if ((opcode == BPF_LSH || opcode == BPF_RSH ||
|
|
|
|
opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
|
|
|
|
int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
|
|
|
|
|
|
|
|
if (insn->imm < 0 || insn->imm >= size) {
|
|
|
|
verbose("invalid shift %d\n", insn->imm);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
/* check dest operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
dst_reg = ®s[insn->dst_reg];
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* pattern match 'bpf_add Rx, imm' instruction */
|
|
|
|
if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
|
2016-05-06 04:49:09 +02:00
|
|
|
dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
|
|
|
|
dst_reg->type = PTR_TO_STACK;
|
|
|
|
dst_reg->imm = insn->imm;
|
|
|
|
return 0;
|
2016-05-06 04:49:10 +02:00
|
|
|
} else if (opcode == BPF_ADD &&
|
|
|
|
BPF_CLASS(insn->code) == BPF_ALU64 &&
|
2016-05-20 03:17:14 +02:00
|
|
|
(dst_reg->type == PTR_TO_PACKET ||
|
|
|
|
(BPF_SRC(insn->code) == BPF_X &&
|
|
|
|
regs[insn->src_reg].type == PTR_TO_PACKET))) {
|
2016-05-06 04:49:10 +02:00
|
|
|
/* ptr_to_packet += K|X */
|
|
|
|
return check_packet_ptr_add(env, insn);
|
|
|
|
} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
|
|
|
|
dst_reg->type == UNKNOWN_VALUE &&
|
|
|
|
env->allow_ptr_leaks) {
|
|
|
|
/* unknown += K|X */
|
|
|
|
return evaluate_reg_alu(env, insn);
|
|
|
|
} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
|
|
|
|
dst_reg->type == CONST_IMM &&
|
|
|
|
env->allow_ptr_leaks) {
|
|
|
|
/* reg_imm += K|X */
|
|
|
|
return evaluate_reg_imm_alu(env, insn);
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
} else if (is_pointer_value(env, insn->dst_reg)) {
|
|
|
|
verbose("R%d pointer arithmetic prohibited\n",
|
|
|
|
insn->dst_reg);
|
|
|
|
return -EACCES;
|
|
|
|
} else if (BPF_SRC(insn->code) == BPF_X &&
|
|
|
|
is_pointer_value(env, insn->src_reg)) {
|
|
|
|
verbose("R%d pointer arithmetic prohibited\n",
|
|
|
|
insn->src_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
/* mark dest operand */
|
|
|
|
mark_reg_unknown_value(regs, insn->dst_reg);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static void find_good_pkt_pointers(struct bpf_verifier_state *state,
|
|
|
|
struct bpf_reg_state *dst_reg)
|
2016-05-06 04:49:10 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = state->regs, *reg;
|
2016-05-06 04:49:10 +02:00
|
|
|
int i;
|
bpf: fix range propagation on direct packet access
LLVM can generate code that tests for direct packet access via
skb->data/data_end in a way that currently gets rejected by the
verifier, example:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
invalid access to packet, off=20 size=1, R9(id=0,off=0,r=0)
The reason why this gets rejected despite a proper test is that we
currently call find_good_pkt_pointers() only in case where we detect
tests like rX > pkt_end, where rX is of type pkt(id=Y,off=Z,r=0) and
derived, for example, from a register of type pkt(id=Y,off=0,r=0)
pointing to skb->data. find_good_pkt_pointers() then fills the range
in the current branch to pkt(id=Y,off=0,r=Z) on success.
For above case, we need to extend that to recognize pkt_end >= rX
pattern and mark the other branch that is taken on success with the
appropriate pkt(id=Y,off=0,r=Z) type via find_good_pkt_pointers().
Since eBPF operates on BPF_JGT (>) and BPF_JGE (>=), these are the
only two practical options to test for from what LLVM could have
generated, since there's no such thing as BPF_JLT (<) or BPF_JLE (<=)
that we would need to take into account as well.
After the fix:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=54) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=54) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
29: (bf) r1 = r8
30: (25) if r8 > 0x3c goto pc+47
R1=inv56 R2=imm40 R3=pkt_end R4=inv R6=ctx R8=inv56
R9=pkt(id=0,off=0,r=54) R10=fp
31: (b7) r1 = 1
[...]
Verifier test cases are also added in this work, one that demonstrates
the mentioned example here and one that tries a bad packet access for
the current/fall-through branch (the one with types pkt(id=X,off=Y,r=0),
pkt(id=X,off=0,r=0)), then a case with good and bad accesses, and two
with both test variants (>, >=).
Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-08 01:03:42 +02:00
|
|
|
|
|
|
|
/* LLVM can generate two kind of checks:
|
|
|
|
*
|
|
|
|
* Type 1:
|
|
|
|
*
|
|
|
|
* r2 = r3;
|
|
|
|
* r2 += 8;
|
|
|
|
* if (r2 > pkt_end) goto <handle exception>
|
|
|
|
* <access okay>
|
|
|
|
*
|
|
|
|
* Where:
|
|
|
|
* r2 == dst_reg, pkt_end == src_reg
|
|
|
|
* r2=pkt(id=n,off=8,r=0)
|
|
|
|
* r3=pkt(id=n,off=0,r=0)
|
|
|
|
*
|
|
|
|
* Type 2:
|
|
|
|
*
|
|
|
|
* r2 = r3;
|
|
|
|
* r2 += 8;
|
|
|
|
* if (pkt_end >= r2) goto <access okay>
|
|
|
|
* <handle exception>
|
|
|
|
*
|
|
|
|
* Where:
|
|
|
|
* pkt_end == dst_reg, r2 == src_reg
|
|
|
|
* r2=pkt(id=n,off=8,r=0)
|
|
|
|
* r3=pkt(id=n,off=0,r=0)
|
|
|
|
*
|
|
|
|
* Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
|
|
|
|
* so that range of bytes [r3, r3 + 8) is safe to access.
|
2016-05-06 04:49:10 +02:00
|
|
|
*/
|
bpf: fix range propagation on direct packet access
LLVM can generate code that tests for direct packet access via
skb->data/data_end in a way that currently gets rejected by the
verifier, example:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
invalid access to packet, off=20 size=1, R9(id=0,off=0,r=0)
The reason why this gets rejected despite a proper test is that we
currently call find_good_pkt_pointers() only in case where we detect
tests like rX > pkt_end, where rX is of type pkt(id=Y,off=Z,r=0) and
derived, for example, from a register of type pkt(id=Y,off=0,r=0)
pointing to skb->data. find_good_pkt_pointers() then fills the range
in the current branch to pkt(id=Y,off=0,r=Z) on success.
For above case, we need to extend that to recognize pkt_end >= rX
pattern and mark the other branch that is taken on success with the
appropriate pkt(id=Y,off=0,r=Z) type via find_good_pkt_pointers().
Since eBPF operates on BPF_JGT (>) and BPF_JGE (>=), these are the
only two practical options to test for from what LLVM could have
generated, since there's no such thing as BPF_JLT (<) or BPF_JLE (<=)
that we would need to take into account as well.
After the fix:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=54) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=54) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
29: (bf) r1 = r8
30: (25) if r8 > 0x3c goto pc+47
R1=inv56 R2=imm40 R3=pkt_end R4=inv R6=ctx R8=inv56
R9=pkt(id=0,off=0,r=54) R10=fp
31: (b7) r1 = 1
[...]
Verifier test cases are also added in this work, one that demonstrates
the mentioned example here and one that tries a bad packet access for
the current/fall-through branch (the one with types pkt(id=X,off=Y,r=0),
pkt(id=X,off=0,r=0)), then a case with good and bad accesses, and two
with both test variants (>, >=).
Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-08 01:03:42 +02:00
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
for (i = 0; i < MAX_BPF_REG; i++)
|
|
|
|
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
|
|
|
|
regs[i].range = dst_reg->off;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
|
|
|
|
if (state->stack_slot_type[i] != STACK_SPILL)
|
|
|
|
continue;
|
|
|
|
reg = &state->spilled_regs[i / BPF_REG_SIZE];
|
|
|
|
if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
|
|
|
|
reg->range = dst_reg->off;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
struct bpf_insn *insn, int *insn_idx)
|
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
|
|
|
|
struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (opcode > BPF_EXIT) {
|
|
|
|
verbose("invalid BPF_JMP opcode %x\n", opcode);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (insn->imm != 0) {
|
|
|
|
verbose("BPF_JMP uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
|
|
|
|
if (is_pointer_value(env, insn->src_reg)) {
|
|
|
|
verbose("R%d pointer comparison prohibited\n",
|
|
|
|
insn->src_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_JMP uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
dst_reg = ®s[insn->dst_reg];
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* detect if R == 0 where R was initialized to zero earlier */
|
|
|
|
if (BPF_SRC(insn->code) == BPF_K &&
|
|
|
|
(opcode == BPF_JEQ || opcode == BPF_JNE) &&
|
2016-05-06 04:49:09 +02:00
|
|
|
dst_reg->type == CONST_IMM && dst_reg->imm == insn->imm) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (opcode == BPF_JEQ) {
|
|
|
|
/* if (imm == imm) goto pc+off;
|
|
|
|
* only follow the goto, ignore fall-through
|
|
|
|
*/
|
|
|
|
*insn_idx += insn->off;
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
/* if (imm != imm) goto pc+off;
|
|
|
|
* only follow fall-through branch, since
|
|
|
|
* that's where the program will go
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
|
|
|
|
if (!other_branch)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (BPF_SRC(insn->code) == BPF_K &&
|
2016-05-06 04:49:09 +02:00
|
|
|
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
|
|
|
|
dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (opcode == BPF_JEQ) {
|
|
|
|
/* next fallthrough insn can access memory via
|
|
|
|
* this register
|
|
|
|
*/
|
|
|
|
regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
|
|
|
|
/* branch targer cannot access it, since reg == 0 */
|
2016-05-06 04:49:11 +02:00
|
|
|
mark_reg_unknown_value(other_branch->regs,
|
|
|
|
insn->dst_reg);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else {
|
|
|
|
other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
|
2016-05-06 04:49:11 +02:00
|
|
|
mark_reg_unknown_value(regs, insn->dst_reg);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
2016-05-06 04:49:10 +02:00
|
|
|
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
|
|
|
|
dst_reg->type == PTR_TO_PACKET &&
|
|
|
|
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
|
bpf: fix range propagation on direct packet access
LLVM can generate code that tests for direct packet access via
skb->data/data_end in a way that currently gets rejected by the
verifier, example:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
invalid access to packet, off=20 size=1, R9(id=0,off=0,r=0)
The reason why this gets rejected despite a proper test is that we
currently call find_good_pkt_pointers() only in case where we detect
tests like rX > pkt_end, where rX is of type pkt(id=Y,off=Z,r=0) and
derived, for example, from a register of type pkt(id=Y,off=0,r=0)
pointing to skb->data. find_good_pkt_pointers() then fills the range
in the current branch to pkt(id=Y,off=0,r=Z) on success.
For above case, we need to extend that to recognize pkt_end >= rX
pattern and mark the other branch that is taken on success with the
appropriate pkt(id=Y,off=0,r=Z) type via find_good_pkt_pointers().
Since eBPF operates on BPF_JGT (>) and BPF_JGE (>=), these are the
only two practical options to test for from what LLVM could have
generated, since there's no such thing as BPF_JLT (<) or BPF_JLE (<=)
that we would need to take into account as well.
After the fix:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=54) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=54) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
29: (bf) r1 = r8
30: (25) if r8 > 0x3c goto pc+47
R1=inv56 R2=imm40 R3=pkt_end R4=inv R6=ctx R8=inv56
R9=pkt(id=0,off=0,r=54) R10=fp
31: (b7) r1 = 1
[...]
Verifier test cases are also added in this work, one that demonstrates
the mentioned example here and one that tries a bad packet access for
the current/fall-through branch (the one with types pkt(id=X,off=Y,r=0),
pkt(id=X,off=0,r=0)), then a case with good and bad accesses, and two
with both test variants (>, >=).
Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-08 01:03:42 +02:00
|
|
|
find_good_pkt_pointers(this_branch, dst_reg);
|
|
|
|
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
|
|
|
|
dst_reg->type == PTR_TO_PACKET_END &&
|
|
|
|
regs[insn->src_reg].type == PTR_TO_PACKET) {
|
|
|
|
find_good_pkt_pointers(other_branch, ®s[insn->src_reg]);
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
} else if (is_pointer_value(env, insn->dst_reg)) {
|
|
|
|
verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
|
|
|
|
return -EACCES;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
}
|
|
|
|
if (log_level)
|
bpf: fix range propagation on direct packet access
LLVM can generate code that tests for direct packet access via
skb->data/data_end in a way that currently gets rejected by the
verifier, example:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
invalid access to packet, off=20 size=1, R9(id=0,off=0,r=0)
The reason why this gets rejected despite a proper test is that we
currently call find_good_pkt_pointers() only in case where we detect
tests like rX > pkt_end, where rX is of type pkt(id=Y,off=Z,r=0) and
derived, for example, from a register of type pkt(id=Y,off=0,r=0)
pointing to skb->data. find_good_pkt_pointers() then fills the range
in the current branch to pkt(id=Y,off=0,r=Z) on success.
For above case, we need to extend that to recognize pkt_end >= rX
pattern and mark the other branch that is taken on success with the
appropriate pkt(id=Y,off=0,r=Z) type via find_good_pkt_pointers().
Since eBPF operates on BPF_JGT (>) and BPF_JGE (>=), these are the
only two practical options to test for from what LLVM could have
generated, since there's no such thing as BPF_JLT (<) or BPF_JLE (<=)
that we would need to take into account as well.
After the fix:
[...]
7: (61) r3 = *(u32 *)(r6 +80)
8: (61) r9 = *(u32 *)(r6 +76)
9: (bf) r2 = r9
10: (07) r2 += 54
11: (3d) if r3 >= r2 goto pc+12
R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx
R9=pkt(id=0,off=0,r=0) R10=fp
12: (18) r4 = 0xffffff7a
14: (05) goto pc+430
[...]
from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=54) R3=pkt_end R4=inv
R6=ctx R9=pkt(id=0,off=0,r=54) R10=fp
24: (7b) *(u64 *)(r10 -40) = r1
25: (b7) r1 = 0
26: (63) *(u32 *)(r6 +56) = r1
27: (b7) r2 = 40
28: (71) r8 = *(u8 *)(r9 +20)
29: (bf) r1 = r8
30: (25) if r8 > 0x3c goto pc+47
R1=inv56 R2=imm40 R3=pkt_end R4=inv R6=ctx R8=inv56
R9=pkt(id=0,off=0,r=54) R10=fp
31: (b7) r1 = 1
[...]
Verifier test cases are also added in this work, one that demonstrates
the mentioned example here and one that tries a bad packet access for
the current/fall-through branch (the one with types pkt(id=X,off=Y,r=0),
pkt(id=X,off=0,r=0)), then a case with good and bad accesses, and two
with both test variants (>, >=).
Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-08 01:03:42 +02:00
|
|
|
print_verifier_state(this_branch);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
/* return the map pointer stored inside BPF_LD_IMM64 instruction */
|
|
|
|
static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
|
|
|
|
{
|
|
|
|
u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
|
|
|
|
|
|
|
|
return (struct bpf_map *) (unsigned long) imm64;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* verify BPF_LD_IMM64 instruction */
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int err;
|
|
|
|
|
|
|
|
if (BPF_SIZE(insn->code) != BPF_DW) {
|
|
|
|
verbose("invalid BPF_LD_IMM insn\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (insn->off != 0) {
|
|
|
|
verbose("BPF_LD_IMM64 uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (insn->src_reg == 0)
|
|
|
|
/* generic move 64-bit immediate into a register */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
|
|
|
|
BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
|
|
|
|
|
|
|
|
regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
|
|
|
|
regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ebpf: add sched_cls_type and map it to sk_filter's verifier ops
As discussed recently and at netconf/netdev01, we want to prevent making
bpf_verifier_ops registration available for modules, but have them at a
controlled place inside the kernel instead.
The reason for this is, that out-of-tree modules can go crazy and define
and register any verfifier ops they want, doing all sorts of crap, even
bypassing available GPLed eBPF helper functions. We don't want to offer
such a shiny playground, of course, but keep strict control to ourselves
inside the core kernel.
This also encourages us to design eBPF user helpers carefully and
generically, so they can be shared among various subsystems using eBPF.
For the eBPF traffic classifier (cls_bpf), it's a good start to share
the same helper facilities as we currently do in eBPF for socket filters.
That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus
one day if there's a good reason to diverge the set of helper functions
from the set available to socket filters, we keep ABI compatibility.
In future, we could place all bpf_prog_type_list at a central place,
perhaps.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-01 12:31:46 +01:00
|
|
|
static bool may_access_skb(enum bpf_prog_type type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case BPF_PROG_TYPE_SOCKET_FILTER:
|
|
|
|
case BPF_PROG_TYPE_SCHED_CLS:
|
2015-03-20 15:11:11 +01:00
|
|
|
case BPF_PROG_TYPE_SCHED_ACT:
|
ebpf: add sched_cls_type and map it to sk_filter's verifier ops
As discussed recently and at netconf/netdev01, we want to prevent making
bpf_verifier_ops registration available for modules, but have them at a
controlled place inside the kernel instead.
The reason for this is, that out-of-tree modules can go crazy and define
and register any verfifier ops they want, doing all sorts of crap, even
bypassing available GPLed eBPF helper functions. We don't want to offer
such a shiny playground, of course, but keep strict control to ourselves
inside the core kernel.
This also encourages us to design eBPF user helpers carefully and
generically, so they can be shared among various subsystems using eBPF.
For the eBPF traffic classifier (cls_bpf), it's a good start to share
the same helper facilities as we currently do in eBPF for socket filters.
That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus
one day if there's a good reason to diverge the set of helper functions
from the set available to socket filters, we keep ABI compatibility.
In future, we could place all bpf_prog_type_list at a central place,
perhaps.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-01 12:31:46 +01:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-02 00:06:34 +01:00
|
|
|
/* verify safety of LD_ABS|LD_IND instructions:
|
|
|
|
* - they can only appear in the programs where ctx == skb
|
|
|
|
* - since they are wrappers of function calls, they scratch R1-R5 registers,
|
|
|
|
* preserve R6-R9, and store return value into R0
|
|
|
|
*
|
|
|
|
* Implicit input:
|
|
|
|
* ctx == skb == R6 == CTX
|
|
|
|
*
|
|
|
|
* Explicit input:
|
|
|
|
* SRC == any register
|
|
|
|
* IMM == 32-bit immediate
|
|
|
|
*
|
|
|
|
* Output:
|
|
|
|
* R0 - 8/16/32-bit skb data converted to cpu endianness
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
2014-12-02 00:06:34 +01:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = env->cur_state.regs;
|
2014-12-02 00:06:34 +01:00
|
|
|
u8 mode = BPF_MODE(insn->code);
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *reg;
|
2014-12-02 00:06:34 +01:00
|
|
|
int i, err;
|
|
|
|
|
2015-03-01 12:31:47 +01:00
|
|
|
if (!may_access_skb(env->prog->type)) {
|
2016-05-06 04:49:09 +02:00
|
|
|
verbose("BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
|
2014-12-02 00:06:34 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
|
2016-04-12 19:26:19 +02:00
|
|
|
BPF_SIZE(insn->code) == BPF_DW ||
|
2014-12-02 00:06:34 +01:00
|
|
|
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
|
2016-05-06 04:49:09 +02:00
|
|
|
verbose("BPF_LD_[ABS|IND] uses reserved fields\n");
|
2014-12-02 00:06:34 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check whether implicit source operand (register R6) is readable */
|
|
|
|
err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (regs[BPF_REG_6].type != PTR_TO_CTX) {
|
|
|
|
verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mode == BPF_IND) {
|
|
|
|
/* check explicit source operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reset caller saved regs to unreadable */
|
|
|
|
for (i = 0; i < CALLER_SAVED_REGS; i++) {
|
|
|
|
reg = regs + caller_saved[i];
|
|
|
|
reg->type = NOT_INIT;
|
|
|
|
reg->imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mark destination R0 register as readable, since it contains
|
|
|
|
* the value fetched from the packet
|
|
|
|
*/
|
|
|
|
regs[BPF_REG_0].type = UNKNOWN_VALUE;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 09:17:05 +02:00
|
|
|
/* non-recursive DFS pseudo code
|
|
|
|
* 1 procedure DFS-iterative(G,v):
|
|
|
|
* 2 label v as discovered
|
|
|
|
* 3 let S be a stack
|
|
|
|
* 4 S.push(v)
|
|
|
|
* 5 while S is not empty
|
|
|
|
* 6 t <- S.pop()
|
|
|
|
* 7 if t is what we're looking for:
|
|
|
|
* 8 return t
|
|
|
|
* 9 for all edges e in G.adjacentEdges(t) do
|
|
|
|
* 10 if edge e is already labelled
|
|
|
|
* 11 continue with the next edge
|
|
|
|
* 12 w <- G.adjacentVertex(t,e)
|
|
|
|
* 13 if vertex w is not discovered and not explored
|
|
|
|
* 14 label e as tree-edge
|
|
|
|
* 15 label w as discovered
|
|
|
|
* 16 S.push(w)
|
|
|
|
* 17 continue at 5
|
|
|
|
* 18 else if vertex w is discovered
|
|
|
|
* 19 label e as back-edge
|
|
|
|
* 20 else
|
|
|
|
* 21 // vertex w is explored
|
|
|
|
* 22 label e as forward- or cross-edge
|
|
|
|
* 23 label t as explored
|
|
|
|
* 24 S.pop()
|
|
|
|
*
|
|
|
|
* convention:
|
|
|
|
* 0x10 - discovered
|
|
|
|
* 0x11 - discovered and fall-through edge labelled
|
|
|
|
* 0x12 - discovered and fall-through and branch edges labelled
|
|
|
|
* 0x20 - explored
|
|
|
|
*/
|
|
|
|
|
|
|
|
enum {
|
|
|
|
DISCOVERED = 0x10,
|
|
|
|
EXPLORED = 0x20,
|
|
|
|
FALLTHROUGH = 1,
|
|
|
|
BRANCH = 2,
|
|
|
|
};
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
|
2014-09-26 09:17:05 +02:00
|
|
|
static int *insn_stack; /* stack of insns to process */
|
|
|
|
static int cur_stack; /* current stack index */
|
|
|
|
static int *insn_state;
|
|
|
|
|
|
|
|
/* t, w, e - match pseudo-code above:
|
|
|
|
* t - index of current instruction
|
|
|
|
* w - next instruction
|
|
|
|
* e - edge
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
|
2014-09-26 09:17:05 +02:00
|
|
|
{
|
|
|
|
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (w < 0 || w >= env->prog->len) {
|
|
|
|
verbose("jump out of range from insn %d to %d\n", t, w);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
if (e == BRANCH)
|
|
|
|
/* mark branch target for state pruning */
|
|
|
|
env->explored_states[w] = STATE_LIST_MARK;
|
|
|
|
|
2014-09-26 09:17:05 +02:00
|
|
|
if (insn_state[w] == 0) {
|
|
|
|
/* tree-edge */
|
|
|
|
insn_state[t] = DISCOVERED | e;
|
|
|
|
insn_state[w] = DISCOVERED;
|
|
|
|
if (cur_stack >= env->prog->len)
|
|
|
|
return -E2BIG;
|
|
|
|
insn_stack[cur_stack++] = w;
|
|
|
|
return 1;
|
|
|
|
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
|
|
|
|
verbose("back-edge from insn %d to %d\n", t, w);
|
|
|
|
return -EINVAL;
|
|
|
|
} else if (insn_state[w] == EXPLORED) {
|
|
|
|
/* forward- or cross-edge */
|
|
|
|
insn_state[t] = DISCOVERED | e;
|
|
|
|
} else {
|
|
|
|
verbose("insn state internal bug\n");
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* non-recursive depth-first-search to detect loops in BPF program
|
|
|
|
* loop == back-edge in directed graph
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int check_cfg(struct bpf_verifier_env *env)
|
2014-09-26 09:17:05 +02:00
|
|
|
{
|
|
|
|
struct bpf_insn *insns = env->prog->insnsi;
|
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
int ret = 0;
|
|
|
|
int i, t;
|
|
|
|
|
|
|
|
insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
|
|
|
|
if (!insn_state)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
|
|
|
|
if (!insn_stack) {
|
|
|
|
kfree(insn_state);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
|
|
|
|
insn_stack[0] = 0; /* 0 is the first instruction */
|
|
|
|
cur_stack = 1;
|
|
|
|
|
|
|
|
peek_stack:
|
|
|
|
if (cur_stack == 0)
|
|
|
|
goto check_state;
|
|
|
|
t = insn_stack[cur_stack - 1];
|
|
|
|
|
|
|
|
if (BPF_CLASS(insns[t].code) == BPF_JMP) {
|
|
|
|
u8 opcode = BPF_OP(insns[t].code);
|
|
|
|
|
|
|
|
if (opcode == BPF_EXIT) {
|
|
|
|
goto mark_explored;
|
|
|
|
} else if (opcode == BPF_CALL) {
|
|
|
|
ret = push_insn(t, t + 1, FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-05 22:33:17 +02:00
|
|
|
if (t + 1 < insn_cnt)
|
|
|
|
env->explored_states[t + 1] = STATE_LIST_MARK;
|
2014-09-26 09:17:05 +02:00
|
|
|
} else if (opcode == BPF_JA) {
|
|
|
|
if (BPF_SRC(insns[t].code) != BPF_K) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
/* unconditional jump with single edge */
|
|
|
|
ret = push_insn(t, t + insns[t].off + 1,
|
|
|
|
FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
/* tell verifier to check for equivalent states
|
|
|
|
* after every call and jump
|
|
|
|
*/
|
2015-04-15 00:57:13 +02:00
|
|
|
if (t + 1 < insn_cnt)
|
|
|
|
env->explored_states[t + 1] = STATE_LIST_MARK;
|
2014-09-26 09:17:05 +02:00
|
|
|
} else {
|
|
|
|
/* conditional jump with two edges */
|
|
|
|
ret = push_insn(t, t + 1, FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
|
|
|
|
|
|
|
ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* all other non-branch instructions with single
|
|
|
|
* fall-through edge
|
|
|
|
*/
|
|
|
|
ret = push_insn(t, t + 1, FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
mark_explored:
|
|
|
|
insn_state[t] = EXPLORED;
|
|
|
|
if (cur_stack-- <= 0) {
|
|
|
|
verbose("pop stack internal bug\n");
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
goto peek_stack;
|
|
|
|
|
|
|
|
check_state:
|
|
|
|
for (i = 0; i < insn_cnt; i++) {
|
|
|
|
if (insn_state[i] != EXPLORED) {
|
|
|
|
verbose("unreachable insn %d\n", i);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret = 0; /* cfg looks good */
|
|
|
|
|
|
|
|
err_free:
|
|
|
|
kfree(insn_state);
|
|
|
|
kfree(insn_stack);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
/* the following conditions reduce the number of explored insns
|
|
|
|
* from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
|
|
|
|
struct bpf_reg_state *cur)
|
2016-05-06 04:49:10 +02:00
|
|
|
{
|
|
|
|
if (old->id != cur->id)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* old ptr_to_packet is more conservative, since it allows smaller
|
|
|
|
* range. Ex:
|
|
|
|
* old(off=0,r=10) is equal to cur(off=0,r=20), because
|
|
|
|
* old(off=0,r=10) means that with range=10 the verifier proceeded
|
|
|
|
* further and found no issues with the program. Now we're in the same
|
|
|
|
* spot with cur(off=0,r=20), so we're safe too, since anything further
|
|
|
|
* will only be looking at most 10 bytes after this pointer.
|
|
|
|
*/
|
|
|
|
if (old->off == cur->off && old->range < cur->range)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0)
|
|
|
|
* since both cannot be used for packet access and safe(old)
|
|
|
|
* pointer has smaller off that could be used for further
|
|
|
|
* 'if (ptr > data_end)' check
|
|
|
|
* Ex:
|
|
|
|
* old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean
|
|
|
|
* that we cannot access the packet.
|
|
|
|
* The safe range is:
|
|
|
|
* [ptr, ptr + range - off)
|
|
|
|
* so whenever off >=range, it means no safe bytes from this pointer.
|
|
|
|
* When comparing old->off <= cur->off, it means that older code
|
|
|
|
* went with smaller offset and that offset was later
|
|
|
|
* used to figure out the safe range after 'if (ptr > data_end)' check
|
|
|
|
* Say, 'old' state was explored like:
|
|
|
|
* ... R3(off=0, r=0)
|
|
|
|
* R4 = R3 + 20
|
|
|
|
* ... now R4(off=20,r=0) <-- here
|
|
|
|
* if (R4 > data_end)
|
|
|
|
* ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access.
|
|
|
|
* ... the code further went all the way to bpf_exit.
|
|
|
|
* Now the 'cur' state at the mark 'here' has R4(off=30,r=0).
|
|
|
|
* old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier
|
|
|
|
* goes further, such cur_R4 will give larger safe packet range after
|
|
|
|
* 'if (R4 > data_end)' and all further insn were already good with r=20,
|
|
|
|
* so they will be good with r=30 and we can prune the search.
|
|
|
|
*/
|
|
|
|
if (old->off <= cur->off &&
|
|
|
|
old->off >= old->range && cur->off >= cur->range)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
/* compare two verifier states
|
|
|
|
*
|
|
|
|
* all states stored in state_list are known to be valid, since
|
|
|
|
* verifier reached 'bpf_exit' instruction through them
|
|
|
|
*
|
|
|
|
* this function is called when verifier exploring different branches of
|
|
|
|
* execution popped from the state stack. If it sees an old state that has
|
|
|
|
* more strict register state and more strict stack state then this execution
|
|
|
|
* branch doesn't need to be explored further, since verifier already
|
|
|
|
* concluded that more strict state leads to valid finish.
|
|
|
|
*
|
|
|
|
* Therefore two states are equivalent if register state is more conservative
|
|
|
|
* and explored stack state is more conservative than the current one.
|
|
|
|
* Example:
|
|
|
|
* explored current
|
|
|
|
* (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
|
|
|
|
* (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
|
|
|
|
*
|
|
|
|
* In other words if current stack state (one being explored) has more
|
|
|
|
* valid slots than old one that already passed validation, it means
|
|
|
|
* the verifier can stop exploring and conclude that current state is valid too
|
|
|
|
*
|
|
|
|
* Similarly with registers. If explored state has register type as invalid
|
|
|
|
* whereas register type in current state is meaningful, it means that
|
|
|
|
* the current state will reach 'bpf_exit' instruction safely
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static bool states_equal(struct bpf_verifier_state *old,
|
|
|
|
struct bpf_verifier_state *cur)
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *rold, *rcur;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_REG; i++) {
|
2016-05-06 04:49:09 +02:00
|
|
|
rold = &old->regs[i];
|
|
|
|
rcur = &cur->regs[i];
|
|
|
|
|
|
|
|
if (memcmp(rold, rcur, sizeof(*rold)) == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (rold->type == NOT_INIT ||
|
|
|
|
(rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT))
|
|
|
|
continue;
|
|
|
|
|
2016-05-06 04:49:10 +02:00
|
|
|
if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
|
|
|
|
compare_ptrs_to_packet(rold, rcur))
|
|
|
|
continue;
|
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
return false;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_STACK; i++) {
|
2014-10-28 23:11:41 +01:00
|
|
|
if (old->stack_slot_type[i] == STACK_INVALID)
|
|
|
|
continue;
|
|
|
|
if (old->stack_slot_type[i] != cur->stack_slot_type[i])
|
|
|
|
/* Ex: old explored (safe) state has STACK_SPILL in
|
|
|
|
* this stack slot, but current has has STACK_MISC ->
|
|
|
|
* this verifier states are not equivalent,
|
|
|
|
* return false to continue verification of this path
|
|
|
|
*/
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
return false;
|
2014-10-28 23:11:41 +01:00
|
|
|
if (i % BPF_REG_SIZE)
|
|
|
|
continue;
|
|
|
|
if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
|
|
|
|
&cur->spilled_regs[i / BPF_REG_SIZE],
|
|
|
|
sizeof(old->spilled_regs[0])))
|
|
|
|
/* when explored and current stack slot types are
|
|
|
|
* the same, check that stored pointers types
|
|
|
|
* are the same as well.
|
|
|
|
* Ex: explored safe path could have stored
|
2016-09-21 12:43:57 +02:00
|
|
|
* (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
|
2014-10-28 23:11:41 +01:00
|
|
|
* but current path has stored:
|
2016-09-21 12:43:57 +02:00
|
|
|
* (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
|
2014-10-28 23:11:41 +01:00
|
|
|
* such verifier states are not equivalent.
|
|
|
|
* return false to continue verification of this path
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
continue;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state_list *new_sl;
|
|
|
|
struct bpf_verifier_state_list *sl;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
|
|
|
|
sl = env->explored_states[insn_idx];
|
|
|
|
if (!sl)
|
|
|
|
/* this 'insn_idx' instruction wasn't marked, so we will not
|
|
|
|
* be doing state search here
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
while (sl != STATE_LIST_MARK) {
|
|
|
|
if (states_equal(&sl->state, &env->cur_state))
|
|
|
|
/* reached equivalent register/stack state,
|
|
|
|
* prune the search
|
|
|
|
*/
|
|
|
|
return 1;
|
|
|
|
sl = sl->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* there were no equivalent states, remember current one.
|
|
|
|
* technically the current state is not proven to be safe yet,
|
|
|
|
* but it will either reach bpf_exit (which means it's safe) or
|
|
|
|
* it will be rejected. Since there are no loops, we won't be
|
|
|
|
* seeing this 'insn_idx' instruction again on the way to bpf_exit
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
if (!new_sl)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* add new state to the head of linked list */
|
|
|
|
memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
|
|
|
|
new_sl->next = env->explored_states[insn_idx];
|
|
|
|
env->explored_states[insn_idx] = new_sl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static int do_check(struct bpf_verifier_env *env)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state *state = &env->cur_state;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
struct bpf_insn *insns = env->prog->insnsi;
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_reg_state *regs = state->regs;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
int insn_idx, prev_insn_idx = 0;
|
|
|
|
int insn_processed = 0;
|
|
|
|
bool do_print_state = false;
|
|
|
|
|
|
|
|
init_reg_state(regs);
|
|
|
|
insn_idx = 0;
|
|
|
|
for (;;) {
|
|
|
|
struct bpf_insn *insn;
|
|
|
|
u8 class;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (insn_idx >= insn_cnt) {
|
|
|
|
verbose("invalid insn idx %d insn_cnt %d\n",
|
|
|
|
insn_idx, insn_cnt);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn = &insns[insn_idx];
|
|
|
|
class = BPF_CLASS(insn->code);
|
|
|
|
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-05 22:33:17 +02:00
|
|
|
if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
verbose("BPF program is too large. Proccessed %d insn\n",
|
|
|
|
insn_processed);
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
err = is_state_visited(env, insn_idx);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
if (err == 1) {
|
|
|
|
/* found equivalent state, can prune the search */
|
|
|
|
if (log_level) {
|
|
|
|
if (do_print_state)
|
|
|
|
verbose("\nfrom %d to %d: safe\n",
|
|
|
|
prev_insn_idx, insn_idx);
|
|
|
|
else
|
|
|
|
verbose("%d: safe\n", insn_idx);
|
|
|
|
}
|
|
|
|
goto process_bpf_exit;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (log_level && do_print_state) {
|
|
|
|
verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
|
2016-05-06 04:49:09 +02:00
|
|
|
print_verifier_state(&env->cur_state);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
do_print_state = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (log_level) {
|
|
|
|
verbose("%d: ", insn_idx);
|
|
|
|
print_bpf_insn(insn);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (class == BPF_ALU || class == BPF_ALU64) {
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
err = check_alu_op(env, insn);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (class == BPF_LDX) {
|
2016-09-21 12:43:56 +02:00
|
|
|
enum bpf_reg_type *prev_src_type, src_reg_type;
|
2015-03-13 19:57:42 +01:00
|
|
|
|
|
|
|
/* check for reserved fields is already done */
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2015-04-16 01:19:33 +02:00
|
|
|
src_reg_type = regs[insn->src_reg].type;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check that memory (src_reg + off) is readable,
|
|
|
|
* the state of dst_reg will be updated by this func
|
|
|
|
*/
|
|
|
|
err = check_mem_access(env, insn->src_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_READ,
|
|
|
|
insn->dst_reg);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2016-09-02 03:37:21 +02:00
|
|
|
if (BPF_SIZE(insn->code) != BPF_W &&
|
|
|
|
BPF_SIZE(insn->code) != BPF_DW) {
|
2015-04-16 01:19:33 +02:00
|
|
|
insn_idx++;
|
|
|
|
continue;
|
|
|
|
}
|
2015-03-13 19:57:42 +01:00
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
|
|
|
|
|
|
|
|
if (*prev_src_type == NOT_INIT) {
|
2015-03-13 19:57:42 +01:00
|
|
|
/* saw a valid insn
|
|
|
|
* dst_reg = *(u32 *)(src_reg + off)
|
2016-09-21 12:43:56 +02:00
|
|
|
* save type to validate intersecting paths
|
2015-03-13 19:57:42 +01:00
|
|
|
*/
|
2016-09-21 12:43:56 +02:00
|
|
|
*prev_src_type = src_reg_type;
|
2015-03-13 19:57:42 +01:00
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
} else if (src_reg_type != *prev_src_type &&
|
2015-03-13 19:57:42 +01:00
|
|
|
(src_reg_type == PTR_TO_CTX ||
|
2016-09-21 12:43:56 +02:00
|
|
|
*prev_src_type == PTR_TO_CTX)) {
|
2015-03-13 19:57:42 +01:00
|
|
|
/* ABuser program is trying to use the same insn
|
|
|
|
* dst_reg = *(u32*) (src_reg + off)
|
|
|
|
* with different pointer types:
|
|
|
|
* src_reg == ctx in one branch and
|
|
|
|
* src_reg == stack|map in some other branch.
|
|
|
|
* Reject it.
|
|
|
|
*/
|
|
|
|
verbose("same insn cannot be used with different pointers\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else if (class == BPF_STX) {
|
2016-09-21 12:43:56 +02:00
|
|
|
enum bpf_reg_type *prev_dst_type, dst_reg_type;
|
2015-06-04 19:11:54 +02:00
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
if (BPF_MODE(insn->code) == BPF_XADD) {
|
|
|
|
err = check_xadd(env, insn);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
insn_idx++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2015-06-04 19:11:54 +02:00
|
|
|
dst_reg_type = regs[insn->dst_reg].type;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
/* check that memory (dst_reg + off) is writeable */
|
|
|
|
err = check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_WRITE,
|
|
|
|
insn->src_reg);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
|
|
|
|
|
|
|
|
if (*prev_dst_type == NOT_INIT) {
|
|
|
|
*prev_dst_type = dst_reg_type;
|
|
|
|
} else if (dst_reg_type != *prev_dst_type &&
|
2015-06-04 19:11:54 +02:00
|
|
|
(dst_reg_type == PTR_TO_CTX ||
|
2016-09-21 12:43:56 +02:00
|
|
|
*prev_dst_type == PTR_TO_CTX)) {
|
2015-06-04 19:11:54 +02:00
|
|
|
verbose("same insn cannot be used with different pointers\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else if (class == BPF_ST) {
|
|
|
|
if (BPF_MODE(insn->code) != BPF_MEM ||
|
|
|
|
insn->src_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_ST uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check that memory (dst_reg + off) is writeable */
|
|
|
|
err = check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_WRITE,
|
|
|
|
-1);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (class == BPF_JMP) {
|
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
|
|
|
|
if (opcode == BPF_CALL) {
|
|
|
|
if (BPF_SRC(insn->code) != BPF_K ||
|
|
|
|
insn->off != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->dst_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_CALL uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = check_call(env, insn->imm);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (opcode == BPF_JA) {
|
|
|
|
if (BPF_SRC(insn->code) != BPF_K ||
|
|
|
|
insn->imm != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->dst_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_JA uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn_idx += insn->off + 1;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
} else if (opcode == BPF_EXIT) {
|
|
|
|
if (BPF_SRC(insn->code) != BPF_K ||
|
|
|
|
insn->imm != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->dst_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_EXIT uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* eBPF calling convetion is such that R0 is used
|
|
|
|
* to return the value from eBPF program.
|
|
|
|
* Make sure that it's readable at this time
|
|
|
|
* of bpf_exit, which means that program wrote
|
|
|
|
* something into it earlier
|
|
|
|
*/
|
|
|
|
err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
if (is_pointer_value(env, BPF_REG_0)) {
|
|
|
|
verbose("R0 leaks addr as return value\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
process_bpf_exit:
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
insn_idx = pop_stack(env, &prev_insn_idx);
|
|
|
|
if (insn_idx < 0) {
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
do_print_state = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
err = check_cond_jmp_op(env, insn, &insn_idx);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
} else if (class == BPF_LD) {
|
|
|
|
u8 mode = BPF_MODE(insn->code);
|
|
|
|
|
|
|
|
if (mode == BPF_ABS || mode == BPF_IND) {
|
2014-12-02 00:06:34 +01:00
|
|
|
err = check_ld_abs(env, insn);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
} else if (mode == BPF_IMM) {
|
|
|
|
err = check_ld_imm(env, insn);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
insn_idx++;
|
|
|
|
} else {
|
|
|
|
verbose("invalid BPF_LD mode\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
verbose("unknown insn class %d\n", class);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn_idx++;
|
|
|
|
}
|
|
|
|
|
2016-05-06 04:49:09 +02:00
|
|
|
verbose("processed %d insns\n", insn_processed);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-02 03:37:23 +02:00
|
|
|
static int check_map_prog_compatibility(struct bpf_map *map,
|
|
|
|
struct bpf_prog *prog)
|
|
|
|
|
|
|
|
{
|
|
|
|
if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
|
|
|
|
(map->map_type == BPF_MAP_TYPE_HASH ||
|
|
|
|
map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
|
|
|
|
(map->map_flags & BPF_F_NO_PREALLOC)) {
|
|
|
|
verbose("perf_event programs can only use preallocated hash map\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
/* look for pseudo eBPF instructions that access map FDs and
|
|
|
|
* replace them with actual map pointers
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
|
2014-09-26 09:17:04 +02:00
|
|
|
{
|
|
|
|
struct bpf_insn *insn = env->prog->insnsi;
|
|
|
|
int insn_cnt = env->prog->len;
|
2016-09-02 03:37:23 +02:00
|
|
|
int i, j, err;
|
2014-09-26 09:17:04 +02:00
|
|
|
|
|
|
|
for (i = 0; i < insn_cnt; i++, insn++) {
|
2015-03-13 19:57:42 +01:00
|
|
|
if (BPF_CLASS(insn->code) == BPF_LDX &&
|
2015-06-04 19:11:54 +02:00
|
|
|
(BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
|
2015-03-13 19:57:42 +01:00
|
|
|
verbose("BPF_LDX uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2015-06-04 19:11:54 +02:00
|
|
|
if (BPF_CLASS(insn->code) == BPF_STX &&
|
|
|
|
((BPF_MODE(insn->code) != BPF_MEM &&
|
|
|
|
BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
|
|
|
|
verbose("BPF_STX uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
|
|
|
|
struct bpf_map *map;
|
|
|
|
struct fd f;
|
|
|
|
|
|
|
|
if (i == insn_cnt - 1 || insn[1].code != 0 ||
|
|
|
|
insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
|
|
|
|
insn[1].off != 0) {
|
|
|
|
verbose("invalid bpf_ld_imm64 insn\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insn->src_reg == 0)
|
|
|
|
/* valid generic load 64-bit imm */
|
|
|
|
goto next_insn;
|
|
|
|
|
|
|
|
if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
|
|
|
|
verbose("unrecognized bpf_ld_imm64 insn\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
f = fdget(insn->imm);
|
2015-10-29 14:58:07 +01:00
|
|
|
map = __bpf_map_get(f);
|
2014-09-26 09:17:04 +02:00
|
|
|
if (IS_ERR(map)) {
|
|
|
|
verbose("fd %d is not pointing to valid bpf_map\n",
|
|
|
|
insn->imm);
|
|
|
|
return PTR_ERR(map);
|
|
|
|
}
|
|
|
|
|
2016-09-02 03:37:23 +02:00
|
|
|
err = check_map_prog_compatibility(map, env->prog);
|
|
|
|
if (err) {
|
|
|
|
fdput(f);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
/* store map pointer inside BPF_LD_IMM64 instruction */
|
|
|
|
insn[0].imm = (u32) (unsigned long) map;
|
|
|
|
insn[1].imm = ((u64) (unsigned long) map) >> 32;
|
|
|
|
|
|
|
|
/* check whether we recorded this map already */
|
|
|
|
for (j = 0; j < env->used_map_cnt; j++)
|
|
|
|
if (env->used_maps[j] == map) {
|
|
|
|
fdput(f);
|
|
|
|
goto next_insn;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (env->used_map_cnt >= MAX_USED_MAPS) {
|
|
|
|
fdput(f);
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* hold the map. If the program is rejected by verifier,
|
|
|
|
* the map will be released by release_maps() or it
|
|
|
|
* will be used by the valid program until it's unloaded
|
|
|
|
* and all maps are released in free_bpf_prog_info()
|
|
|
|
*/
|
2016-04-28 03:56:20 +02:00
|
|
|
map = bpf_map_inc(map, false);
|
|
|
|
if (IS_ERR(map)) {
|
|
|
|
fdput(f);
|
|
|
|
return PTR_ERR(map);
|
|
|
|
}
|
|
|
|
env->used_maps[env->used_map_cnt++] = map;
|
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
fdput(f);
|
|
|
|
next_insn:
|
|
|
|
insn++;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now all pseudo BPF_LD_IMM64 instructions load valid
|
|
|
|
* 'struct bpf_map *' into a register instead of user map_fd.
|
|
|
|
* These pointers will be used later by verifier to validate map access.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* drop refcnt of maps used by the rejected program */
|
2016-09-21 12:43:57 +02:00
|
|
|
static void release_maps(struct bpf_verifier_env *env)
|
2014-09-26 09:17:04 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < env->used_map_cnt; i++)
|
|
|
|
bpf_map_put(env->used_maps[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
|
2016-09-21 12:43:57 +02:00
|
|
|
static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
|
2014-09-26 09:17:04 +02:00
|
|
|
{
|
|
|
|
struct bpf_insn *insn = env->prog->insnsi;
|
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < insn_cnt; i++, insn++)
|
|
|
|
if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
|
|
|
|
insn->src_reg = 0;
|
|
|
|
}
|
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
/* convert load instructions that access fields of 'struct __sk_buff'
|
|
|
|
* into sequence of instructions that access fields of 'struct sk_buff'
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
2015-03-13 19:57:42 +01:00
|
|
|
{
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
const struct bpf_verifier_ops *ops = env->prog->aux->ops;
|
2016-09-21 12:43:56 +02:00
|
|
|
const int insn_cnt = env->prog->len;
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
struct bpf_insn insn_buf[16], *insn;
|
2015-03-13 19:57:42 +01:00
|
|
|
struct bpf_prog *new_prog;
|
2015-06-04 19:11:54 +02:00
|
|
|
enum bpf_access_type type;
|
2016-09-21 12:43:56 +02:00
|
|
|
int i, cnt, delta = 0;
|
2015-03-13 19:57:42 +01:00
|
|
|
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
if (ops->gen_prologue) {
|
|
|
|
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
|
|
|
|
env->prog);
|
|
|
|
if (cnt >= ARRAY_SIZE(insn_buf)) {
|
|
|
|
verbose("bpf verifier is misconfigured\n");
|
|
|
|
return -EINVAL;
|
|
|
|
} else if (cnt) {
|
|
|
|
new_prog = bpf_patch_insn_single(env->prog, 0,
|
|
|
|
insn_buf, cnt);
|
|
|
|
if (!new_prog)
|
|
|
|
return -ENOMEM;
|
|
|
|
env->prog = new_prog;
|
2016-09-21 12:43:56 +02:00
|
|
|
delta += cnt - 1;
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ops->convert_ctx_access)
|
2015-03-13 19:57:42 +01:00
|
|
|
return 0;
|
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
insn = env->prog->insnsi + delta;
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
for (i = 0; i < insn_cnt; i++, insn++) {
|
2016-09-02 03:37:21 +02:00
|
|
|
if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
|
|
|
|
insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
|
2015-06-04 19:11:54 +02:00
|
|
|
type = BPF_READ;
|
2016-09-02 03:37:21 +02:00
|
|
|
else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
|
|
|
|
insn->code == (BPF_STX | BPF_MEM | BPF_DW))
|
2015-06-04 19:11:54 +02:00
|
|
|
type = BPF_WRITE;
|
|
|
|
else
|
2015-03-13 19:57:42 +01:00
|
|
|
continue;
|
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
|
2015-03-13 19:57:42 +01:00
|
|
|
continue;
|
|
|
|
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 00:26:13 +02:00
|
|
|
cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
|
|
|
|
insn->off, insn_buf, env->prog);
|
2015-03-13 19:57:42 +01:00
|
|
|
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
|
|
|
verbose("bpf verifier is misconfigured\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
|
|
|
|
cnt);
|
2015-03-13 19:57:42 +01:00
|
|
|
if (!new_prog)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
delta += cnt - 1;
|
2015-03-13 19:57:42 +01:00
|
|
|
|
|
|
|
/* keep walking new program and skip insns we just inserted */
|
|
|
|
env->prog = new_prog;
|
2016-09-21 12:43:56 +02:00
|
|
|
insn = new_prog->insnsi + i + delta;
|
2015-03-13 19:57:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
static void free_states(struct bpf_verifier_env *env)
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
{
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_state_list *sl, *sln;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!env->explored_states)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < env->prog->len; i++) {
|
|
|
|
sl = env->explored_states[i];
|
|
|
|
|
|
|
|
if (sl)
|
|
|
|
while (sl != STATE_LIST_MARK) {
|
|
|
|
sln = sl->next;
|
|
|
|
kfree(sl);
|
|
|
|
sl = sln;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
kfree(env->explored_states);
|
|
|
|
}
|
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
|
2014-09-26 09:17:02 +02:00
|
|
|
{
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
char __user *log_ubuf = NULL;
|
2016-09-21 12:43:57 +02:00
|
|
|
struct bpf_verifier_env *env;
|
2014-09-26 09:17:02 +02:00
|
|
|
int ret = -EINVAL;
|
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
return -E2BIG;
|
|
|
|
|
2016-09-21 12:43:57 +02:00
|
|
|
/* 'struct bpf_verifier_env' can be global, but since it's not small,
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
* allocate/free it every time bpf_check() is called
|
|
|
|
*/
|
2016-09-21 12:43:57 +02:00
|
|
|
env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
if (!env)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2016-09-21 12:43:56 +02:00
|
|
|
env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
|
|
|
|
(*prog)->len);
|
|
|
|
ret = -ENOMEM;
|
|
|
|
if (!env->insn_aux_data)
|
|
|
|
goto err_free_env;
|
2015-03-13 19:57:42 +01:00
|
|
|
env->prog = *prog;
|
2014-09-26 09:17:04 +02:00
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
/* grab the mutex to protect few globals used by verifier */
|
|
|
|
mutex_lock(&bpf_verifier_lock);
|
|
|
|
|
|
|
|
if (attr->log_level || attr->log_buf || attr->log_size) {
|
|
|
|
/* user requested verbose verifier output
|
|
|
|
* and supplied buffer to store the verification trace
|
|
|
|
*/
|
|
|
|
log_level = attr->log_level;
|
|
|
|
log_ubuf = (char __user *) (unsigned long) attr->log_buf;
|
|
|
|
log_size = attr->log_size;
|
|
|
|
log_len = 0;
|
|
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
|
/* log_* values have to be sane */
|
|
|
|
if (log_size < 128 || log_size > UINT_MAX >> 8 ||
|
|
|
|
log_level == 0 || log_ubuf == NULL)
|
2016-09-21 12:43:56 +02:00
|
|
|
goto err_unlock;
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
|
|
|
|
ret = -ENOMEM;
|
|
|
|
log_buf = vmalloc(log_size);
|
|
|
|
if (!log_buf)
|
2016-09-21 12:43:56 +02:00
|
|
|
goto err_unlock;
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
} else {
|
|
|
|
log_level = 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
ret = replace_map_fd_with_map_ptr(env);
|
|
|
|
if (ret < 0)
|
|
|
|
goto skip_full_check;
|
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
env->explored_states = kcalloc(env->prog->len,
|
2016-09-21 12:43:57 +02:00
|
|
|
sizeof(struct bpf_verifier_state_list *),
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
GFP_USER);
|
|
|
|
ret = -ENOMEM;
|
|
|
|
if (!env->explored_states)
|
|
|
|
goto skip_full_check;
|
|
|
|
|
2014-09-26 09:17:05 +02:00
|
|
|
ret = check_cfg(env);
|
|
|
|
if (ret < 0)
|
|
|
|
goto skip_full_check;
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:23:21 +02:00
|
|
|
env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
ret = do_check(env);
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
skip_full_check:
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:06 +02:00
|
|
|
while (pop_stack(env, NULL) >= 0);
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 03:50:01 +02:00
|
|
|
free_states(env);
|
2014-09-26 09:17:04 +02:00
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
if (ret == 0)
|
|
|
|
/* program is valid, convert *(u32*)(ctx + off) accesses */
|
|
|
|
ret = convert_ctx_accesses(env);
|
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
if (log_level && log_len >= log_size - 1) {
|
|
|
|
BUG_ON(log_len >= log_size);
|
|
|
|
/* verifier log exceeded user supplied buffer */
|
|
|
|
ret = -ENOSPC;
|
|
|
|
/* fall through to return what was recorded */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy verifier log back to user space including trailing zero */
|
|
|
|
if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_log_buf;
|
|
|
|
}
|
|
|
|
|
2014-09-26 09:17:04 +02:00
|
|
|
if (ret == 0 && env->used_map_cnt) {
|
|
|
|
/* if program passed verifier, update used_maps in bpf_prog_info */
|
2015-03-13 19:57:42 +01:00
|
|
|
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
|
|
|
|
sizeof(env->used_maps[0]),
|
|
|
|
GFP_KERNEL);
|
2014-09-26 09:17:04 +02:00
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
if (!env->prog->aux->used_maps) {
|
2014-09-26 09:17:04 +02:00
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_log_buf;
|
|
|
|
}
|
|
|
|
|
2015-03-13 19:57:42 +01:00
|
|
|
memcpy(env->prog->aux->used_maps, env->used_maps,
|
2014-09-26 09:17:04 +02:00
|
|
|
sizeof(env->used_maps[0]) * env->used_map_cnt);
|
2015-03-13 19:57:42 +01:00
|
|
|
env->prog->aux->used_map_cnt = env->used_map_cnt;
|
2014-09-26 09:17:04 +02:00
|
|
|
|
|
|
|
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
|
|
|
|
* bpf_ld_imm64 instructions
|
|
|
|
*/
|
|
|
|
convert_pseudo_ld_imm64(env);
|
|
|
|
}
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
|
|
|
|
free_log_buf:
|
|
|
|
if (log_level)
|
|
|
|
vfree(log_buf);
|
2015-03-13 19:57:42 +01:00
|
|
|
if (!env->prog->aux->used_maps)
|
2014-09-26 09:17:04 +02:00
|
|
|
/* if we didn't copy map pointers into bpf_prog_info, release
|
|
|
|
* them now. Otherwise free_bpf_prog_info() will release them.
|
|
|
|
*/
|
|
|
|
release_maps(env);
|
2015-03-13 19:57:42 +01:00
|
|
|
*prog = env->prog;
|
2016-09-21 12:43:56 +02:00
|
|
|
err_unlock:
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 09:17:03 +02:00
|
|
|
mutex_unlock(&bpf_verifier_lock);
|
2016-09-21 12:43:56 +02:00
|
|
|
vfree(env->insn_aux_data);
|
|
|
|
err_free_env:
|
|
|
|
kfree(env);
|
2014-09-26 09:17:02 +02:00
|
|
|
return ret;
|
|
|
|
}
|