From f292b87d3ac020418644d8a4bbf29814890505cb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 6 Jul 2018 14:34:29 -0700
Subject: [PATCH 01/11] bpf: include errno.h from bpf-cgroup.h

Commit fdb5c4531c1e ("bpf: fix attach type BPF_LIRC_MODE2 dependency
wrt CONFIG_CGROUP_BPF") caused some build issues, detected by 0-DAY
kernel test infrastructure.

The problem is that cgroup_bpf_prog_attach/detach/query() functions
can return -EINVAL error code, which is not defined. Fix this adding
errno.h to includes.

Fixes: fdb5c4531c1e ("bpf: fix attach type BPF_LIRC_MODE2 dependency wrt CONFIG_CGROUP_BPF")
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Sean Young <sean@mess.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 79795c5fa7c3..d50c2f0a655a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -2,6 +2,7 @@
 #ifndef _BPF_CGROUP_H
 #define _BPF_CGROUP_H
 
+#include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <uapi/linux/bpf.h>
 

From b9626f45abccd044f8048269c67720f0719f2d4e Mon Sep 17 00:00:00 2001
From: Taeung Song <treeze.taeung@gmail.com>
Date: Tue, 10 Jul 2018 02:51:21 +0900
Subject: [PATCH 02/11] samples/bpf: Fix tc and ip paths in xdp2skb_meta.sh

The below path error can occur:

  # ./xdp2skb_meta.sh --dev eth0 --list
  ./xdp2skb_meta.sh: line 61: /usr/sbin/tc: No such file or directory

So just use command names instead of absolute paths of tc and ip.
In addition, it allow callers to redefine $TC and $IP paths

Fixes: 36e04a2d78d9 ("samples/bpf: xdp2skb_meta shows transferring info from XDP to SKB")
Reviewed-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Taeung Song <treeze.taeung@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/xdp2skb_meta.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh
index b9c9549c4c27..4bde9d066c46 100755
--- a/samples/bpf/xdp2skb_meta.sh
+++ b/samples/bpf/xdp2skb_meta.sh
@@ -16,8 +16,8 @@
 BPF_FILE=xdp2skb_meta_kern.o
 DIR=$(dirname $0)
 
-export TC=/usr/sbin/tc
-export IP=/usr/sbin/ip
+[ -z "$TC" ] && TC=tc
+[ -z "$IP" ] && IP=ip
 
 function usage() {
     echo ""
@@ -53,7 +53,7 @@ function _call_cmd() {
     local allow_fail="$2"
     shift 2
     if [[ -n "$VERBOSE" ]]; then
-	echo "$(basename $cmd) $@"
+	echo "$cmd $@"
     fi
     if [[ -n "$DRYRUN" ]]; then
 	return

From 59ee4129a279070d8e2f9dc1660330f6593c7808 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 10 Jul 2018 00:43:22 +0200
Subject: [PATCH 03/11] bpf: fix ldx in ld_abs rewrite for large offsets

Mark reported that syzkaller triggered a KASAN detected slab-out-of-bounds
bug in ___bpf_prog_run() with a BPF_LD | BPF_ABS word load at offset 0x8001.
After further investigation it became clear that the issue was the
BPF_LDX_MEM() which takes offset as an argument whereas it cannot encode
larger than S16_MAX offsets into it. For this synthetical case we need to
move the full address into tmp register instead and do the LDX without
immediate value.

Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 5fa66a33927f..a13f5b1f1636 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -459,11 +459,21 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
 	     (!unaligned_ok && offset >= 0 &&
 	      offset + ip_align >= 0 &&
 	      offset + ip_align % size == 0))) {
+		bool ldx_off_ok = offset <= S16_MAX;
+
 		*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
 		*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
-		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian);
-		*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D,
-				      offset);
+		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
+				      size, 2 + endian + (!ldx_off_ok * 2));
+		if (ldx_off_ok) {
+			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+					      BPF_REG_D, offset);
+		} else {
+			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
+			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
+			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+					      BPF_REG_TMP, 0);
+		}
 		if (endian)
 			*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
 		*insn++ = BPF_JMP_A(8);

From 61d769807f273fda962866f3d4c677cda9974d3c Mon Sep 17 00:00:00 2001
From: Mathieu Xhonneux <m.xhonneux@gmail.com>
Date: Tue, 10 Jul 2018 16:54:02 +0000
Subject: [PATCH 04/11] bpf: fix availability probing for seg6 helpers

bpf_lwt_seg6_* helpers require CONFIG_IPV6_SEG6_BPF, and currently
return -EOPNOTSUPP to indicate unavailability. This patch forces the
BPF verifier to reject programs using these helpers when
!CONFIG_IPV6_SEG6_BPF, allowing users to more easily probe if they are
available or not.

Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index a13f5b1f1636..06da770f543f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4536,10 +4536,10 @@ static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
 	.arg4_type	= ARG_CONST_SIZE
 };
 
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
 	   const void *, from, u32, len)
 {
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	struct seg6_bpf_srh_state *srh_state =
 		this_cpu_ptr(&seg6_bpf_srh_states);
 	void *srh_tlvs, *srh_end, *ptr;
@@ -4565,9 +4565,6 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
 
 	memcpy(skb->data + offset, from, len);
 	return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
-	return -EOPNOTSUPP;
-#endif
 }
 
 static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
@@ -4583,7 +4580,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
 BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
 	   u32, action, void *, param, u32, param_len)
 {
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	struct seg6_bpf_srh_state *srh_state =
 		this_cpu_ptr(&seg6_bpf_srh_states);
 	struct ipv6_sr_hdr *srh;
@@ -4631,9 +4627,6 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
 	default:
 		return -EINVAL;
 	}
-#else /* CONFIG_IPV6_SEG6_BPF */
-	return -EOPNOTSUPP;
-#endif
 }
 
 static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
@@ -4649,7 +4642,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
 BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
 	   s32, len)
 {
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	struct seg6_bpf_srh_state *srh_state =
 		this_cpu_ptr(&seg6_bpf_srh_states);
 	void *srh_end, *srh_tlvs, *ptr;
@@ -4693,9 +4685,6 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
 	srh_state->hdrlen += len;
 	srh_state->valid = 0;
 	return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
-	return -EOPNOTSUPP;
-#endif
 }
 
 static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
@@ -4706,6 +4695,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
 	.arg2_type	= ARG_ANYTHING,
 	.arg3_type	= ARG_ANYTHING,
 };
+#endif /* CONFIG_IPV6_SEG6_BPF */
 
 bool bpf_helper_changes_pkt_data(void *func)
 {
@@ -4727,11 +4717,12 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_xdp_adjust_meta ||
 	    func == bpf_msg_pull_data ||
 	    func == bpf_xdp_adjust_tail ||
-	    func == bpf_lwt_push_encap ||
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	    func == bpf_lwt_seg6_store_bytes ||
 	    func == bpf_lwt_seg6_adjust_srh ||
-	    func == bpf_lwt_seg6_action
-	    )
+	    func == bpf_lwt_seg6_action ||
+#endif
+	    func == bpf_lwt_push_encap)
 		return true;
 
 	return false;
@@ -5066,12 +5057,14 @@ static const struct bpf_func_proto *
 lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	case BPF_FUNC_lwt_seg6_store_bytes:
 		return &bpf_lwt_seg6_store_bytes_proto;
 	case BPF_FUNC_lwt_seg6_action:
 		return &bpf_lwt_seg6_action_proto;
 	case BPF_FUNC_lwt_seg6_adjust_srh:
 		return &bpf_lwt_seg6_adjust_srh_proto;
+#endif
 	default:
 		return lwt_out_func_proto(func_id, prog);
 	}

From b65f370d0671c4980ffe866c41e327b88893245c Mon Sep 17 00:00:00 2001
From: Okash Khawaja <osk@fb.com>
Date: Tue, 10 Jul 2018 14:33:07 -0700
Subject: [PATCH 05/11] bpf: btf: Fix bitfield extraction for big endian

When extracting bitfield from a number, btf_int_bits_seq_show() builds
a mask and accesses least significant byte of the number in a way
specific to little-endian. This patch fixes that by checking endianness
of the machine and then shifting left and right the unneeded bits.

Thanks to Martin Lau for the help in navigating potential pitfalls when
dealing with endianess and for the final solution.

Fixes: b00b8daec828 ("bpf: btf: Add pretty print capability for data with BTF type info")
Signed-off-by: Okash Khawaja <osk@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/btf.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 2d49d18b793a..e016ac3afa24 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -991,16 +991,13 @@ static void btf_int_bits_seq_show(const struct btf *btf,
 				  void *data, u8 bits_offset,
 				  struct seq_file *m)
 {
+	u16 left_shift_bits, right_shift_bits;
 	u32 int_data = btf_type_int(t);
 	u16 nr_bits = BTF_INT_BITS(int_data);
 	u16 total_bits_offset;
 	u16 nr_copy_bytes;
 	u16 nr_copy_bits;
-	u8 nr_upper_bits;
-	union {
-		u64 u64_num;
-		u8  u8_nums[8];
-	} print_num;
+	u64 print_num;
 
 	total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
 	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
@@ -1008,21 +1005,20 @@ static void btf_int_bits_seq_show(const struct btf *btf,
 	nr_copy_bits = nr_bits + bits_offset;
 	nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
 
-	print_num.u64_num = 0;
-	memcpy(&print_num.u64_num, data, nr_copy_bytes);
+	print_num = 0;
+	memcpy(&print_num, data, nr_copy_bytes);
 
-	/* Ditch the higher order bits */
-	nr_upper_bits = BITS_PER_BYTE_MASKED(nr_copy_bits);
-	if (nr_upper_bits) {
-		/* We need to mask out some bits of the upper byte. */
-		u8 mask = (1 << nr_upper_bits) - 1;
+#ifdef __BIG_ENDIAN_BITFIELD
+	left_shift_bits = bits_offset;
+#else
+	left_shift_bits = BITS_PER_U64 - nr_copy_bits;
+#endif
+	right_shift_bits = BITS_PER_U64 - nr_bits;
 
-		print_num.u8_nums[nr_copy_bytes - 1] &= mask;
-	}
+	print_num <<= left_shift_bits;
+	print_num >>= right_shift_bits;
 
-	print_num.u64_num >>= bits_offset;
-
-	seq_printf(m, "0x%llx", print_num.u64_num);
+	seq_printf(m, "0x%llx", print_num);
 }
 
 static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,

From 6e6fddc78323533be570873abb728b7e0ba7e024 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Jul 2018 15:30:14 +0200
Subject: [PATCH 06/11] bpf: fix panic due to oob in bpf_prog_test_run_skb

sykzaller triggered several panics similar to the below:

  [...]
  [  248.851531] BUG: KASAN: use-after-free in _copy_to_user+0x5c/0x90
  [  248.857656] Read of size 985 at addr ffff8808017ffff2 by task a.out/1425
  [...]
  [  248.865902] CPU: 1 PID: 1425 Comm: a.out Not tainted 4.18.0-rc4+ #13
  [  248.865903] Hardware name: Supermicro SYS-5039MS-H12TRF/X11SSE-F, BIOS 2.1a 03/08/2018
  [  248.865905] Call Trace:
  [  248.865910]  dump_stack+0xd6/0x185
  [  248.865911]  ? show_regs_print_info+0xb/0xb
  [  248.865913]  ? printk+0x9c/0xc3
  [  248.865915]  ? kmsg_dump_rewind_nolock+0xe4/0xe4
  [  248.865919]  print_address_description+0x6f/0x270
  [  248.865920]  kasan_report+0x25b/0x380
  [  248.865922]  ? _copy_to_user+0x5c/0x90
  [  248.865924]  check_memory_region+0x137/0x190
  [  248.865925]  kasan_check_read+0x11/0x20
  [  248.865927]  _copy_to_user+0x5c/0x90
  [  248.865930]  bpf_test_finish.isra.8+0x4f/0xc0
  [  248.865932]  bpf_prog_test_run_skb+0x6a0/0xba0
  [...]

After scrubbing the BPF prog a bit from the noise, turns out it called
bpf_skb_change_head() for the lwt_xmit prog with headroom of 2. Nothing
wrong in that, however, this was run with repeat >> 0 in bpf_prog_test_run_skb()
and the same skb thus keeps changing until the pskb_expand_head() called
from skb_cow() keeps bailing out in atomic alloc context with -ENOMEM.
So upon return we'll basically have 0 headroom left yet blindly do the
__skb_push() of 14 bytes and keep copying data from there in bpf_test_finish()
out of bounds. Fix to check if we have enough headroom and if pskb_expand_head()
fails, bail out with error.

Another bug independent of this fix (but related in triggering above) is
that BPF_PROG_TEST_RUN should be reworked to reset the skb/xdp buffer to
it's original state from input as otherwise repeating the same test in a
loop won't work for benchmarking when underlying input buffer is getting
changed by the prog each time and reused for the next run leading to
unexpected results.

Fixes: 1cf1cae963c2 ("bpf: introduce BPF_PROG_TEST_RUN command")
Reported-by: syzbot+709412e651e55ed96498@syzkaller.appspotmail.com
Reported-by: syzbot+54f39d6ab58f39720a55@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                          | 17 ++++++++++++---
 tools/testing/selftests/bpf/test_verifier.c | 23 ++++++++++++++++++++-
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 68c3578343b4..22a78eedf4b1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -96,6 +96,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 	u32 size = kattr->test.data_size_in;
 	u32 repeat = kattr->test.repeat;
 	u32 retval, duration;
+	int hh_len = ETH_HLEN;
 	struct sk_buff *skb;
 	void *data;
 	int ret;
@@ -131,12 +132,22 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 	skb_reset_network_header(skb);
 
 	if (is_l2)
-		__skb_push(skb, ETH_HLEN);
+		__skb_push(skb, hh_len);
 	if (is_direct_pkt_access)
 		bpf_compute_data_pointers(skb);
 	retval = bpf_test_run(prog, skb, repeat, &duration);
-	if (!is_l2)
-		__skb_push(skb, ETH_HLEN);
+	if (!is_l2) {
+		if (skb_headroom(skb) < hh_len) {
+			int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+			if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
+				kfree_skb(skb);
+				return -ENOMEM;
+			}
+		}
+		memset(__skb_push(skb, hh_len), 0, hh_len);
+	}
+
 	size = skb->len;
 	/* bpf program can never convert linear skb to non-linear */
 	if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 2ecd27b670d7..f5f7bcc96046 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4974,6 +4974,24 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
 	},
+	{
+		"make headroom for LWT_XMIT",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 34),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+			/* split for s390 to succeed */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 42),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
 	{
 		"invalid access of tc_classid for LWT_IN",
 		.insns = {
@@ -12554,8 +12572,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	}
 
 	if (fd_prog >= 0) {
+		__u8 tmp[TEST_DATA_LEN << 2];
+		__u32 size_tmp = sizeof(tmp);
+
 		err = bpf_prog_test_run(fd_prog, 1, test->data,
-					sizeof(test->data), NULL, NULL,
+					sizeof(test->data), tmp, &size_tmp,
 					&retval, NULL);
 		if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
 			printf("Unexpected bpf_prog_test_run error\n");

From c7a897843224a92209f306c984975b704969b89d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 12 Jul 2018 21:44:28 +0200
Subject: [PATCH 07/11] bpf: don't leave partial mangled prog in jit_subprogs
 error path

syzkaller managed to trigger the following bug through fault injection:

  [...]
  [  141.043668] verifier bug. No program starts at insn 3
  [  141.044648] WARNING: CPU: 3 PID: 4072 at kernel/bpf/verifier.c:1613
                 get_callee_stack_depth kernel/bpf/verifier.c:1612 [inline]
  [  141.044648] WARNING: CPU: 3 PID: 4072 at kernel/bpf/verifier.c:1613
                 fixup_call_args kernel/bpf/verifier.c:5587 [inline]
  [  141.044648] WARNING: CPU: 3 PID: 4072 at kernel/bpf/verifier.c:1613
                 bpf_check+0x525e/0x5e60 kernel/bpf/verifier.c:5952
  [  141.047355] CPU: 3 PID: 4072 Comm: a.out Not tainted 4.18.0-rc4+ #51
  [  141.048446] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),BIOS 1.10.2-1 04/01/2014
  [  141.049877] Call Trace:
  [  141.050324]  __dump_stack lib/dump_stack.c:77 [inline]
  [  141.050324]  dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
  [  141.050950]  ? dump_stack_print_info.cold.2+0x52/0x52 lib/dump_stack.c:60
  [  141.051837]  panic+0x238/0x4e7 kernel/panic.c:184
  [  141.052386]  ? add_taint.cold.5+0x16/0x16 kernel/panic.c:385
  [  141.053101]  ? __warn.cold.8+0x148/0x1ba kernel/panic.c:537
  [  141.053814]  ? __warn.cold.8+0x117/0x1ba kernel/panic.c:530
  [  141.054506]  ? get_callee_stack_depth kernel/bpf/verifier.c:1612 [inline]
  [  141.054506]  ? fixup_call_args kernel/bpf/verifier.c:5587 [inline]
  [  141.054506]  ? bpf_check+0x525e/0x5e60 kernel/bpf/verifier.c:5952
  [  141.055163]  __warn.cold.8+0x163/0x1ba kernel/panic.c:538
  [  141.055820]  ? get_callee_stack_depth kernel/bpf/verifier.c:1612 [inline]
  [  141.055820]  ? fixup_call_args kernel/bpf/verifier.c:5587 [inline]
  [  141.055820]  ? bpf_check+0x525e/0x5e60 kernel/bpf/verifier.c:5952
  [...]

What happens in jit_subprogs() is that kcalloc() for the subprog func
buffer is failing with NULL where we then bail out. Latter is a plain
return -ENOMEM, and this is definitely not okay since earlier in the
loop we are walking all subprogs and temporarily rewrite insn->off to
remember the subprog id as well as insn->imm to temporarily point the
call to __bpf_call_base + 1 for the initial JIT pass. Thus, bailing
out in such state and handing this over to the interpreter is troublesome
since later/subsequent e.g. find_subprog() lookups are based on wrong
insn->imm.

Therefore, once we hit this point, we need to jump to out_free path
where we undo all changes from earlier loop, so that interpreter can
work on unmodified insn->{off,imm}.

Another point is that should find_subprog() fail in jit_subprogs() due
to a verifier bug, then we also should not simply defer the program to
the interpreter since also here we did partial modifications. Instead
we should just bail out entirely and return an error to the user who is
trying to load the program.

Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs")
Reported-by: syzbot+7d427828b2ea6e592804@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9e2bf834f13a..63aaac52a265 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5430,6 +5430,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		if (insn->code != (BPF_JMP | BPF_CALL) ||
 		    insn->src_reg != BPF_PSEUDO_CALL)
 			continue;
+		/* Upon error here we cannot fall back to interpreter but
+		 * need a hard reject of the program. Thus -EFAULT is
+		 * propagated in any case.
+		 */
 		subprog = find_subprog(env, i + insn->imm + 1);
 		if (subprog < 0) {
 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
@@ -5450,7 +5454,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 
 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
 	if (!func)
-		return -ENOMEM;
+		goto out_undo_insn;
 
 	for (i = 0; i < env->subprog_cnt; i++) {
 		subprog_start = subprog_end;
@@ -5515,7 +5519,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		tmp = bpf_int_jit_compile(func[i]);
 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
-			err = -EFAULT;
+			err = -ENOTSUPP;
 			goto out_free;
 		}
 		cond_resched();
@@ -5552,6 +5556,7 @@ out_free:
 		if (func[i])
 			bpf_jit_free(func[i]);
 	kfree(func);
+out_undo_insn:
 	/* cleanup main prog to be interpreted */
 	prog->jit_requested = 0;
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
@@ -5578,6 +5583,8 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 		err = jit_subprogs(env);
 		if (err == 0)
 			return 0;
+		if (err == -EFAULT)
+			return err;
 	}
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	for (i = 0; i < prog->len; i++, insn++) {

From 509d7648135f914a3dd64c17484b33df5dd0a19c Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jul 2018 10:12:49 +0200
Subject: [PATCH 08/11] xsk: do not return ENXIO from TX copy mode

This patch removes the ENXIO return code from TX copy-mode when
someone has forcefully changed the number of queues on the device so
that the queue bound to the socket is no longer available. Just
silently stop sending anything as in zero-copy mode so the error
reporting gets consistent between the two modes.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 7d220cbd09b6..08d09115093e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -244,10 +244,8 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 			goto out;
 		}
 
-		if (xs->queue_id >= xs->dev->real_num_tx_queues) {
-			err = -ENXIO;
+		if (xs->queue_id >= xs->dev->real_num_tx_queues)
 			goto out;
-		}
 
 		skb = sock_alloc_send_skb(sk, len, 1, &err);
 		if (unlikely(!skb)) {

From 9684f5e7c8cdf076aeec81344d4893a30f7aa6a1 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jul 2018 10:12:50 +0200
Subject: [PATCH 09/11] xsk: do not return EAGAIN from sendmsg when completion
 queue is full

This patch stops returning EAGAIN in TX copy mode when the completion
queue is full as zero-copy does not do this. Instead this situation
can be detected by comparing the head and tail pointers of the
completion queue in both modes. In any case, EAGAIN was not the
correct error code here since no amount of calling sendmsg will solve
the problem. Only consuming one or more messages on the completion
queue will fix this.

With this patch, the error reporting becomes consistent between copy
mode and zero-copy mode.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 08d09115093e..87567232d0f8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -233,10 +233,8 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 			goto out;
 		}
 
-		if (xskq_reserve_addr(xs->umem->cq)) {
-			err = -EAGAIN;
+		if (xskq_reserve_addr(xs->umem->cq))
 			goto out;
-		}
 
 		len = desc.len;
 		if (unlikely(len > xs->dev->mtu)) {

From 6efb4436f7fcc50cc3fb9a113d0f16e3968172b1 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jul 2018 10:12:51 +0200
Subject: [PATCH 10/11] xsk: always return ENOBUFS from sendmsg if there is no
 TX queue

This patch makes sure ENOBUFS is always returned from sendmsg if there
is no TX queue configured. This was not the case for zero-copy
mode. With this patch this error reporting is consistent between copy
mode and zero-copy mode.

Fixes: ac98d8aab61b ("xsk: wire upp Tx zero-copy functions")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 87567232d0f8..9c784307f7b0 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -218,9 +218,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 	struct sk_buff *skb;
 	int err = 0;
 
-	if (unlikely(!xs->tx))
-		return -ENOBUFS;
-
 	mutex_lock(&xs->mutex);
 
 	while (xskq_peek_desc(xs->tx, &desc)) {
@@ -296,6 +293,8 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		return -ENXIO;
 	if (unlikely(!(xs->dev->flags & IFF_UP)))
 		return -ENETDOWN;
+	if (unlikely(!xs->tx))
+		return -ENOBUFS;
 	if (need_wait)
 		return -EOPNOTSUPP;
 

From 09210c4bcc065d9d91ef3c051902ad18252cd3c0 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jul 2018 10:12:52 +0200
Subject: [PATCH 11/11] xsk: do not return EMSGSIZE in copy mode for packets
 larger than MTU

This patch stops returning EMSGSIZE from sendmsg in copy mode when the
size of the packet is larger than the MTU. Just send it to the device
so that it will drop it as in zero-copy mode. This makes the error
reporting consistent between copy mode and zero-copy mode.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9c784307f7b0..72335c2e8108 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -233,15 +233,10 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 		if (xskq_reserve_addr(xs->umem->cq))
 			goto out;
 
-		len = desc.len;
-		if (unlikely(len > xs->dev->mtu)) {
-			err = -EMSGSIZE;
-			goto out;
-		}
-
 		if (xs->queue_id >= xs->dev->real_num_tx_queues)
 			goto out;
 
+		len = desc.len;
 		skb = sock_alloc_send_skb(sk, len, 1, &err);
 		if (unlikely(!skb)) {
 			err = -EAGAIN;