From 66293c46c9314d2b3e80be829a48fed17a848146 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Apr 2019 11:09:25 +0200 Subject: [PATCH 01/48] netfilter: nf_tables: delay chain policy update until transaction is complete When we process a long ruleset of the form chain input { type filter hook input priority filter; policy drop; ... } Then the base chain gets registered early on, we then continue to process/validate the next messages coming in the same transaction. Problem is that if the base chain policy is 'drop', it will take effect immediately, which causes all traffic to get blocked until the transaction completes or is aborted. Fix this by deferring the policy until the transaction has been processed and all of the rules have been flagged as active. Reported-by: Jann Haber Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 50 +++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1606eaa5ae0d..f2e12ae50544 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -214,33 +214,33 @@ static int nft_deltable(struct nft_ctx *ctx) return err; } -static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWCHAIN) nft_activate_next(ctx->net, ctx->chain); list_add_tail(&trans->list, &ctx->net->nft.commit_list); - return 0; + return trans; } static int nft_delchain(struct nft_ctx *ctx) { - int err; + struct nft_trans *trans; - err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN); - if (err < 0) - return err; + trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN); + if (IS_ERR(trans)) + return PTR_ERR(trans); ctx->table->use--; nft_deactivate_next(ctx->net, ctx->chain); - return err; + return 0; } static void nft_rule_expr_activate(const struct nft_ctx *ctx, @@ -1615,6 +1615,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_base_chain *basechain; struct nft_stats __percpu *stats; struct net *net = ctx->net; + struct nft_trans *trans; struct nft_chain *chain; struct nft_rule **rules; int err; @@ -1662,7 +1663,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, ops->dev = hook.dev; chain->flags |= NFT_BASE_CHAIN; - basechain->policy = policy; + basechain->policy = NF_ACCEPT; } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -1698,13 +1699,18 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err) goto err2; - err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); - if (err < 0) { + trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); rhltable_remove(&table->chains_ht, &chain->rhlhead, nft_chain_ht_params); goto err2; } + nft_trans_chain_policy(trans) = -1; + if (nft_is_base_chain(chain)) + nft_trans_chain_policy(trans) = policy; + table->use++; list_add_tail_rcu(&chain->list, &table->chains); @@ -6311,6 +6317,27 @@ static int nf_tables_validate(struct net *net) return 0; } +/* a drop policy has to be deferred until all rules have been activated, + * otherwise a large ruleset that contains a drop-policy base chain will + * cause all packets to get dropped until the full transaction has been + * processed. + * + * We defer the drop policy until the transaction has been finalized. + */ +static void nft_chain_commit_drop_policy(struct nft_trans *trans) +{ + struct nft_base_chain *basechain; + + if (nft_trans_chain_policy(trans) != NF_DROP) + return; + + if (!nft_is_base_chain(trans->ctx.chain)) + return; + + basechain = nft_base_chain(trans->ctx.chain); + basechain->policy = NF_DROP; +} + static void nft_chain_commit_update(struct nft_trans *trans) { struct nft_base_chain *basechain; @@ -6632,6 +6659,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); /* trans destroyed after rcu grace period */ } else { + nft_chain_commit_drop_policy(trans); nft_clear(net, trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); nft_trans_destroy(trans); From 270a8a297f42ecff82060aaa53118361f09c1f7d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Apr 2019 11:54:56 +0200 Subject: [PATCH 02/48] netfilter: nft_flow_offload: add entry to flowtable after confirmation This is fixing flow offload for UDP traffic where packets only follow one single direction. The flow_offload_fixup_tcp() mechanism works fine in case that the offloaded entry remains in SYN_RECV state, given sequence tracking is reset and that conntrack handles syn+ack packets as a retransmission, ie. sES + synack => sIG for reply traffic. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 6e6b9adf7d38..8968c7f5a72e 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -94,8 +94,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (help) goto out; - if (ctinfo == IP_CT_NEW || - ctinfo == IP_CT_RELATED) + if (!nf_ct_is_confirmed(ct)) goto out; if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) From 26a302afbe328ecb7507cae2035d938e6635131b Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 30 Apr 2019 01:55:29 +0900 Subject: [PATCH 03/48] netfilter: nf_flow_table: fix netdev refcnt leak flow_offload_alloc() calls nf_route() to get a dst_entry. Internally, nf_route() calls ip_route_output_key() that allocates a dst_entry and holds it. So, a dst_entry should be released by dst_release() if nf_route() is successful. Otherwise, netns exit routine cannot be finished and the following message is printed: [ 257.490952] unregister_netdevice: waiting for lo to become free. Usage count = 1 Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 8968c7f5a72e..69d7a8439c7a 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -112,6 +112,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err_flow_add; + dst_release(route.tuple[!dir].dst); return; err_flow_add: From 33cc3c0cfa64c86b6c4bbee86997aea638534931 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 30 Apr 2019 01:55:54 +0900 Subject: [PATCH 04/48] netfilter: nf_flow_table: check ttl value in flow offload data path nf_flow_offload_ip_hook() and nf_flow_offload_ipv6_hook() do not check ttl value. So, ttl value overflow may occur. Fixes: 97add9f0d66d ("netfilter: flow table support for IPv4") Fixes: 0995210753a2 ("netfilter: flow table support for IPv6") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 1d291a51cd45..46022a2867d7 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -181,6 +181,9 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, iph->protocol != IPPROTO_UDP) return -1; + if (iph->ttl <= 1) + return -1; + thoff = iph->ihl * 4; if (!pskb_may_pull(skb, thoff + sizeof(*ports))) return -1; @@ -411,6 +414,9 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, ip6h->nexthdr != IPPROTO_UDP) return -1; + if (ip6h->hop_limit <= 1) + return -1; + thoff = sizeof(*ip6h); if (!pskb_may_pull(skb, thoff + sizeof(*ports))) return -1; From f5e85ce8e733c2547827f6268136b70b802eabdb Mon Sep 17 00:00:00 2001 From: Jakub Jankowski Date: Thu, 25 Apr 2019 23:46:50 +0200 Subject: [PATCH 05/48] netfilter: nf_conntrack_h323: restore boundary check correctness Since commit bc7d811ace4a ("netfilter: nf_ct_h323: Convert CHECK_BOUND macro to function"), NAT traversal for H.323 doesn't work, failing to parse H323-UserInformation. nf_h323_error_boundary() compares contents of the bitstring, not the addresses, preventing valid H.323 packets from being conntrack'd. This looks like an oversight from when CHECK_BOUND macro was converted to a function. To fix it, stop dereferencing bs->cur and bs->end. Fixes: bc7d811ace4a ("netfilter: nf_ct_h323: Convert CHECK_BOUND macro to function") Signed-off-by: Jakub Jankowski Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 1601275efe2d..4c2ef42e189c 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -172,7 +172,7 @@ static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits) if (bits % BITS_PER_BYTE > 0) bytes++; - if (*bs->cur + bytes > *bs->end) + if (bs->cur + bytes > bs->end) return 1; return 0; From edbd82c5fba009f68d20b5db585be1e667c605f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Apr 2019 14:33:22 +0200 Subject: [PATCH 06/48] netfilter: nf_tables: fix base chain stat rcu_dereference usage Following splat gets triggered when nfnetlink monitor is running while xtables-nft selftests are running: net/netfilter/nf_tables_api.c:1272 suspicious rcu_dereference_check() usage! other info that might help us debug this: 1 lock held by xtables-nft-mul/27006: #0: 00000000e0f85be9 (&net->nft.commit_mutex){+.+.}, at: nf_tables_valid_genid+0x1a/0x50 Call Trace: nf_tables_fill_chain_info.isra.45+0x6cc/0x6e0 nf_tables_chain_notify+0xf8/0x1a0 nf_tables_commit+0x165c/0x1740 nf_tables_fill_chain_info() can be called both from dumps (rcu read locked) or from the transaction path if a userspace process subscribed to nftables notifications. In the 'table dump' case, rcu_access_pointer() cannot be used: We do not hold transaction mutex so the pointer can be NULLed right after the check. Just unconditionally fetch the value, then have the helper return immediately if its NULL. In the notification case we don't hold the rcu read lock, but updates are prevented due to transaction mutex. Use rcu_dereference_check() to make lockdep aware of this. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f2e12ae50544..e4f6ecac48c3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1190,6 +1190,9 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) u64 pkts, bytes; int cpu; + if (!stats) + return 0; + memset(&total, 0, sizeof(total)); for_each_possible_cpu(cpu) { cpu_stats = per_cpu_ptr(stats, cpu); @@ -1247,6 +1250,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); const struct nf_hook_ops *ops = &basechain->ops; + struct nft_stats __percpu *stats; struct nlattr *nest; nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); @@ -1268,8 +1272,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (rcu_access_pointer(basechain->stats) && - nft_dump_stats(skb, rcu_dereference(basechain->stats))) + stats = rcu_dereference_check(basechain->stats, + lockdep_commit_lock_is_held(net)); + if (nft_dump_stats(skb, stats)) goto nla_put_failure; } From 43c8f131184faf20c07221f3e09724611c6525d8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 3 May 2019 01:56:38 +0900 Subject: [PATCH 07/48] netfilter: nf_flow_table: fix missing error check for rhashtable_insert_fast rhashtable_insert_fast() may return an error value when memory allocation fails, but flow_offload_add() does not check for errors. This patch just adds missing error checking. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 7aabfd4b1e50..a9e4f74b1ff6 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -185,14 +185,25 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = { int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) { - flow->timeout = (u32)jiffies; + int err; - rhashtable_insert_fast(&flow_table->rhashtable, - &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, - nf_flow_offload_rhash_params); - rhashtable_insert_fast(&flow_table->rhashtable, - &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, - nf_flow_offload_rhash_params); + err = rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[0].node, + nf_flow_offload_rhash_params); + if (err < 0) + return err; + + err = rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[1].node, + nf_flow_offload_rhash_params); + if (err < 0) { + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[0].node, + nf_flow_offload_rhash_params); + return err; + } + + flow->timeout = (u32)jiffies; return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); From f8e608982022fad035160870f5b06086d3cba54d Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Fri, 3 May 2019 17:40:07 +0200 Subject: [PATCH 08/48] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression Commit 59c08c69c278 ("netfilter: ctnetlink: Support L3 protocol-filter on flush") introduced a user-space regression when flushing connection track entries. Before this commit, the nfgen_family field was not used by the kernel and all entries were removed. Since this commit, nfgen_family is used to filter out entries that should not be removed. One example a broken tool is conntrack. conntrack always sets nfgen_family to AF_INET, so after 59c08c69c278 only IPv4 entries were removed with the -F parameter. Pablo Neira Ayuso suggested using nfgenmsg->version to resolve the regression, and this commit implements his suggestion. nfgenmsg->version is so far set to zero, so it is well-suited to be used as a flag for selecting old or new flush behavior. If version is 0, nfgen_family is ignored and all entries are used. If user-space sets the version to one (or any other value than 0), then the new behavior is used. As version only can have two valid values, I chose not to add a new NFNETLINK_VERSION-constant. Fixes: 59c08c69c278 ("netfilter: ctnetlink: Support L3 protocol-filter on flush") Reported-by: Nicolas Dichtel Suggested-by: Pablo Neira Ayuso Signed-off-by: Kristian Evensen Tested-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d7f61b0547c6..d2715b4d2e72 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1254,7 +1254,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, struct nf_conntrack_tuple tuple; struct nf_conn *ct; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - u_int8_t u3 = nfmsg->nfgen_family; + u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC; struct nf_conntrack_zone zone; int err; From b33c448c4f920d5399acea9ccbb508baec272f6f Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 3 May 2019 12:39:08 -0600 Subject: [PATCH 09/48] netfilter: nf_conntrack_h323: Remove deprecated config check CONFIG_NF_CONNTRACK_IPV6 has been deprecated so replace it with a check for IPV6 instead. Use nf_ip6_route6() instead of v6ops->route() and keep the IS_MODULE() in nf_ipv6_ops as mentioned by Florian so that direct calls are used when IPV6 is builtin and indirect calls are used only when IPV6 is a module. Fixes: a0ae2562c6c4b2 ("netfilter: conntrack: remove l3proto abstraction") Signed-off-by: Subash Abhinov Kasiviswanathan Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 005589c6d0f6..12de40390e97 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -748,24 +748,19 @@ static int callforward_do_filter(struct net *net, } break; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { - const struct nf_ipv6_ops *v6ops; struct rt6_info *rt1, *rt2; struct flowi6 fl1, fl2; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return 0; - memset(&fl1, 0, sizeof(fl1)); fl1.daddr = src->in6; memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->in6; - if (!v6ops->route(net, (struct dst_entry **)&rt1, + if (!nf_ip6_route(net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1), false)) { - if (!v6ops->route(net, (struct dst_entry **)&rt2, + if (!nf_ip6_route(net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr), rt6_nexthop(rt2, &fl2.daddr)) && From 8cd2bc981c5335cacc432cba7666c2741c3e912f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 30 Apr 2019 22:56:14 +0900 Subject: [PATCH 10/48] netfilter: nf_flow_table: do not flow offload deleted conntrack entries Conntrack entries can be deleted by the masquerade module. In that case, flow offload should be deleted too, but GC and data-path of flow offload do not check for conntrack status bits, hence flow offload entries will be removed only by the timeout. Update garbage collector and data-path to check for ct->status. If IPS_DYING_BIT is set, garbage collector removes flow offload entries and data-path routine ignores them. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index a9e4f74b1ff6..4469519a4879 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -243,6 +243,7 @@ flow_offload_lookup(struct nf_flowtable *flow_table, { struct flow_offload_tuple_rhash *tuplehash; struct flow_offload *flow; + struct flow_offload_entry *e; int dir; tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, @@ -255,6 +256,10 @@ flow_offload_lookup(struct nf_flowtable *flow_table, if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)) return NULL; + e = container_of(flow, struct flow_offload_entry, flow); + if (unlikely(nf_ct_is_dying(e->ct))) + return NULL; + return tuplehash; } EXPORT_SYMBOL_GPL(flow_offload_lookup); @@ -301,8 +306,10 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow) static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; + struct flow_offload_entry *e; - if (nf_flow_has_expired(flow) || + e = container_of(flow, struct flow_offload_entry, flow); + if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) || (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) flow_offload_del(flow_table, flow); } From 680f6af5337c98d116e4f127cea7845339dba8da Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 5 May 2019 18:47:33 +0200 Subject: [PATCH 11/48] netfilter: ebtables: CONFIG_COMPAT: reject trailing data after last rule If userspace provides a rule blob with trailing data after last target, we trigger a splat, then convert ruleset to 64bit format (with trailing data), then pass that to do_replace_finish() which then returns -EINVAL. Erroring out right away avoids the splat plus unneeded translation and error unwind. Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support") Reported-by: Tetsuo Handa Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3cad01ac64e4..3a1b94b5c0e5 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2158,7 +2158,9 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user, if (ret < 0) return ret; - WARN_ON(size_remaining); + if (size_remaining) + return -EINVAL; + return state->buf_kern_offset; } From c6c9c0596c21d30169c7a37b242685dc95fedb07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 7 May 2019 18:21:26 +0200 Subject: [PATCH 12/48] netfilter: nf_tables: remove NFT_CT_TIMEOUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Never used anywhere in the code. Fixes: 7e0b2b57f01d ("netfilter: nft_ct: add ct timeout support") Reported-by: Stéphane Veyret Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a66c8de006cc..92bb1e2b2425 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -966,7 +966,6 @@ enum nft_socket_keys { * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address) * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address) * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address) - * @NFT_CT_TIMEOUT: connection tracking timeout policy assigned to conntrack */ enum nft_ct_keys { NFT_CT_STATE, @@ -992,7 +991,6 @@ enum nft_ct_keys { NFT_CT_DST_IP, NFT_CT_SRC_IP6, NFT_CT_DST_IP6, - NFT_CT_TIMEOUT, __NFT_CT_MAX }; #define NFT_CT_MAX (__NFT_CT_MAX - 1) From e711ab936a44ee9f63f1746c09029543f1b29dd2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 10 May 2019 11:37:58 +0200 Subject: [PATCH 13/48] Revert "selinux: do not report error on connect(AF_UNSPEC)" This reverts commit c7e0d6cca86581092cbbf2cd868b3601495554cf. It was agreed a slightly different fix via the selinux tree. v1 -> v2: - use the correct reverted commit hash Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- security/selinux/hooks.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d82b87c16b0a..c61787b15f27 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4649,7 +4649,7 @@ static int selinux_socket_connect_helper(struct socket *sock, struct lsm_network_audit net = {0,}; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; - unsigned short snum = 0; + unsigned short snum; u32 sid, perm; /* sctp_connectx(3) calls via selinux_sctp_bind_connect() @@ -4674,12 +4674,12 @@ static int selinux_socket_connect_helper(struct socket *sock, break; default: /* Note that SCTP services expect -EINVAL, whereas - * others must handle this at the protocol level: - * connect(AF_UNSPEC) on a connected socket is - * a documented way disconnect the socket. + * others expect -EAFNOSUPPORT. */ if (sksec->sclass == SECCLASS_SCTP_SOCKET) return -EINVAL; + else + return -EAFNOSUPPORT; } err = sel_netport_sid(sk->sk_protocol, snum, &sid); From bdfad5aec1392b93495b77b864d58d7f101dc1c1 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 10 May 2019 12:52:12 +1000 Subject: [PATCH 14/48] bridge: Fix error path for kobject_init_and_add() Currently error return from kobject_init_and_add() is not followed by a call to kobject_put(). This means there is a memory leak. We currently set p to NULL so that kfree() may be called on it as a noop, the code is arguably clearer if we move the kfree() up closer to where it is called (instead of after goto jump). Remove a goto label 'err1' and jump to call to kobject_put() in error return from kobject_init_and_add() fixing the memory leak. Re-name goto label 'put_back' to 'err1' now that we don't use err1, following current nomenclature (err1, err2 ...). Move call to kfree out of the error code at bottom of function up to closer to where memory was allocated. Add comment to clarify call to kfree(). Signed-off-by: Tobin C. Harding Signed-off-by: David S. Miller --- net/bridge/br_if.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4a9aaa3fac8f..6d4a24a7534b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -602,13 +602,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, call_netdevice_notifiers(NETDEV_JOIN, dev); err = dev_set_allmulti(dev, 1); - if (err) - goto put_back; + if (err) { + kfree(p); /* kobject not yet init'd, manually free */ + goto err1; + } err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), SYSFS_BRIDGE_PORT_ATTR); if (err) - goto err1; + goto err2; err = br_sysfs_addif(p); if (err) @@ -700,12 +702,9 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: kobject_put(&p->kobj); - p = NULL; /* kobject_put frees */ -err1: dev_set_allmulti(dev, -1); -put_back: +err1: dev_put(dev); - kfree(p); return err; } From b96a54154be10f8d4f79d5d1cbfee7176947ad64 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 10 May 2019 11:00:28 +0800 Subject: [PATCH 15/48] dsa: tag_brcm: Fix build error without CONFIG_NET_DSA_TAG_BRCM_PREPEND Fix gcc build error: net/dsa/tag_brcm.c:211:16: error: brcm_prepend_netdev_ops undeclared here (not in a function); did you mean brcm_netdev_ops? DSA_TAG_DRIVER(brcm_prepend_netdev_ops); ^ ./include/net/dsa.h:708:10: note: in definition of macro DSA_TAG_DRIVER .ops = &__ops, \ ^~~~~ ./include/net/dsa.h:701:36: warning: dsa_tag_driver_brcm_prepend_netdev_ops defined but not used [-Wunused-variable] #define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops ^ ./include/net/dsa.h:707:30: note: in expansion of macro DSA_TAG_DRIVER_NAME static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \ ^~~~~~~~~~~~~~~~~~~ net/dsa/tag_brcm.c:211:1: note: in expansion of macro DSA_TAG_DRIVER DSA_TAG_DRIVER(brcm_prepend_netdev_ops); Like the CONFIG_NET_DSA_TAG_BRCM case, brcm_prepend_netdev_ops and DSA_TAG_PROTO_BRCM_PREPEND should be wrappeed by CONFIG_NET_DSA_TAG_BRCM_PREPEND. Reported-by: Hulk Robot Fixes: b74b70c44986 ("net: dsa: Support prepended Broadcom tag") Signed-off-by: YueHaibing Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_brcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index d52db5f2c721..9c3114179690 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -206,10 +206,10 @@ static const struct dsa_device_ops brcm_prepend_netdev_ops = { .rcv = brcm_tag_rcv_prepend, .overhead = BRCM_TAG_LEN, }; -#endif DSA_TAG_DRIVER(brcm_prepend_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_PREPEND); +#endif static struct dsa_tag_driver *dsa_tag_driver_array[] = { #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) From 62740e97881c78b45a117a358a866fb32975def6 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 9 May 2019 23:13:43 -0500 Subject: [PATCH 16/48] net/ibmvnic: Update MAC address settings after adapter reset It was discovered in testing that the underlying hardware MAC address will revert to initial settings following a device reset, but the driver fails to resend the current OS MAC settings. This oversight can result in dropped packets should the scenario occur. Fix this by informing hardware of current MAC address settings following any adapter initialization or resets. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 53 ++++++++++++++++-------------- drivers/net/ethernet/ibm/ibmvnic.h | 2 -- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b398d6c94dbd..2be3bcd0192f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -118,7 +118,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); static int ibmvnic_init(struct ibmvnic_adapter *); static int ibmvnic_reset_init(struct ibmvnic_adapter *); static void release_crq_queue(struct ibmvnic_adapter *); -static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p); +static int __ibmvnic_set_mac(struct net_device *, u8 *); static int init_crq_queue(struct ibmvnic_adapter *adapter); static int send_query_phys_parms(struct ibmvnic_adapter *adapter); @@ -849,11 +849,7 @@ static int ibmvnic_login(struct net_device *netdev) } } while (retry); - /* handle pending MAC address changes after successful login */ - if (adapter->mac_change_pending) { - __ibmvnic_set_mac(netdev, &adapter->desired.mac); - adapter->mac_change_pending = false; - } + __ibmvnic_set_mac(netdev, adapter->mac_addr); return 0; } @@ -1686,28 +1682,40 @@ static void ibmvnic_set_multi(struct net_device *netdev) } } -static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p) +static int __ibmvnic_set_mac(struct net_device *netdev, u8 *dev_addr) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - struct sockaddr *addr = p; union ibmvnic_crq crq; int rc; - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; + if (!is_valid_ether_addr(dev_addr)) { + rc = -EADDRNOTAVAIL; + goto err; + } memset(&crq, 0, sizeof(crq)); crq.change_mac_addr.first = IBMVNIC_CRQ_CMD; crq.change_mac_addr.cmd = CHANGE_MAC_ADDR; - ether_addr_copy(&crq.change_mac_addr.mac_addr[0], addr->sa_data); + ether_addr_copy(&crq.change_mac_addr.mac_addr[0], dev_addr); init_completion(&adapter->fw_done); rc = ibmvnic_send_crq(adapter, &crq); - if (rc) - return rc; + if (rc) { + rc = -EIO; + goto err; + } + wait_for_completion(&adapter->fw_done); /* netdev->dev_addr is changed in handle_change_mac_rsp function */ - return adapter->fw_done_rc ? -EIO : 0; + if (adapter->fw_done_rc) { + rc = -EIO; + goto err; + } + + return 0; +err: + ether_addr_copy(adapter->mac_addr, netdev->dev_addr); + return rc; } static int ibmvnic_set_mac(struct net_device *netdev, void *p) @@ -1716,13 +1724,10 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) struct sockaddr *addr = p; int rc; - if (adapter->state == VNIC_PROBED) { - memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr)); - adapter->mac_change_pending = true; - return 0; - } - - rc = __ibmvnic_set_mac(netdev, addr); + rc = 0; + ether_addr_copy(adapter->mac_addr, addr->sa_data); + if (adapter->state != VNIC_PROBED) + rc = __ibmvnic_set_mac(netdev, addr->sa_data); return rc; } @@ -3937,8 +3942,8 @@ static int handle_change_mac_rsp(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc); goto out; } - memcpy(netdev->dev_addr, &crq->change_mac_addr_rsp.mac_addr[0], - ETH_ALEN); + ether_addr_copy(netdev->dev_addr, + &crq->change_mac_addr_rsp.mac_addr[0]); out: complete(&adapter->fw_done); return rc; @@ -4852,8 +4857,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) init_completion(&adapter->init_done); adapter->resetting = false; - adapter->mac_change_pending = false; - do { rc = init_crq_queue(adapter); if (rc) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index cffdac372a33..dcf2eb6d9290 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -969,7 +969,6 @@ struct ibmvnic_tunables { u64 rx_entries; u64 tx_entries; u64 mtu; - struct sockaddr mac; }; struct ibmvnic_adapter { @@ -1091,7 +1090,6 @@ struct ibmvnic_adapter { bool resetting; bool napi_enabled, from_passive_init; - bool mac_change_pending; bool failover_pending; bool force_reset_recovery; From 0655f9943df2f2d71f406fd77b51d05548134fc2 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 9 May 2019 23:13:44 -0500 Subject: [PATCH 17/48] net/ibmvnic: Update carrier state after link state change Only set the device carrier state to on after receiving an up link state indication from the underlying adapter. Likewise, if a down link indication is receieved, update the carrier state accordingly. This fix ensures that accurate carrier state is reported by the driver following a link state update by the underlying adapter. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2be3bcd0192f..3dcd9c3d8781 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1111,7 +1111,6 @@ static int ibmvnic_open(struct net_device *netdev) } rc = __ibmvnic_open(netdev); - netif_carrier_on(netdev); return rc; } @@ -1864,8 +1863,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, adapter->reset_reason != VNIC_RESET_CHANGE_PARAM) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev); - netif_carrier_on(netdev); - return 0; } @@ -1935,8 +1932,6 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, return 0; } - netif_carrier_on(netdev); - return 0; } @@ -4480,6 +4475,10 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, crq->link_state_indication.phys_link_state; adapter->logical_link_state = crq->link_state_indication.logical_link_state; + if (adapter->phys_link_state && adapter->logical_link_state) + netif_carrier_on(netdev); + else + netif_carrier_off(netdev); break; case CHANGE_MAC_ADDR_RSP: netdev_dbg(netdev, "Got MAC address change Response\n"); From 265749861a2483263e6cd4c5e305640e4651c110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 10 May 2019 11:35:14 +0200 Subject: [PATCH 18/48] of_net: remove nvmem-mac-address property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit d01f449c008a ("of_net: add NVMEM support to of_get_mac_address") I've added `nvmem-mac-address` property which was wrong idea as I've allocated the property with devm_kzalloc and then added it to DT, so then 2 entities would be refcounting the allocation. So if the driver unbinds, the buffer is freed, but DT code would be still referencing that memory. I'm removing this property completely instead of fixing it, as it's not needed to have it. Fixes: d01f449c008a ("of_net: add NVMEM support to of_get_mac_address") Suggested-by: Rob Herring Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller --- drivers/of/of_net.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c index 9649cd53e955..a4b392a5406b 100644 --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c @@ -52,39 +52,22 @@ static const void *of_get_mac_addr(struct device_node *np, const char *name) static const void *of_get_mac_addr_nvmem(struct device_node *np) { int ret; - u8 mac[ETH_ALEN]; - struct property *pp; + const void *mac; + u8 nvmem_mac[ETH_ALEN]; struct platform_device *pdev = of_find_device_by_node(np); if (!pdev) return ERR_PTR(-ENODEV); - ret = nvmem_get_mac_address(&pdev->dev, &mac); + ret = nvmem_get_mac_address(&pdev->dev, &nvmem_mac); if (ret) return ERR_PTR(ret); - pp = devm_kzalloc(&pdev->dev, sizeof(*pp), GFP_KERNEL); - if (!pp) + mac = devm_kmemdup(&pdev->dev, nvmem_mac, ETH_ALEN, GFP_KERNEL); + if (!mac) return ERR_PTR(-ENOMEM); - pp->name = "nvmem-mac-address"; - pp->length = ETH_ALEN; - pp->value = devm_kmemdup(&pdev->dev, mac, ETH_ALEN, GFP_KERNEL); - if (!pp->value) { - ret = -ENOMEM; - goto free; - } - - ret = of_add_property(np, pp); - if (ret) - goto free; - - return pp->value; -free: - devm_kfree(&pdev->dev, pp->value); - devm_kfree(&pdev->dev, pp); - - return ERR_PTR(ret); + return mac; } /** From 51828950272d192ecab06da399d58941d2fd4952 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 10 May 2019 11:35:15 +0200 Subject: [PATCH 19/48] dt-bindings: doc: net: remove Linux API references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 687e3d5550c7 ("dt-bindings: doc: reflect new NVMEM of_get_mac_address behaviour") I've kept or added references to Linux of_get_mac_address API which is unwanted so this patch fixes that by removing those references. Fixes: 687e3d5550c7 ("dt-bindings: doc: reflect new NVMEM of_get_mac_address behaviour") Suggested-by: Rob Herring Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/keystone-netcp.txt | 6 +++--- .../devicetree/bindings/net/wireless/mediatek,mt76.txt | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt index 3a65aabc76a2..6262c2f293b0 100644 --- a/Documentation/devicetree/bindings/net/keystone-netcp.txt +++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt @@ -139,9 +139,9 @@ Optional properties: sub-module attached to this interface. The MAC address will be determined using the optional properties defined in -ethernet.txt, as provided by the of_get_mac_address API and only if efuse-mac -is set to 0. If any of the optional MAC address properties are not present, -then the driver will use random MAC address. +ethernet.txt and only if efuse-mac is set to 0. If all of the optional MAC +address properties are not present, then the driver will use a random MAC +address. Example binding: diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt index 74665502f4cf..7e675dafc256 100644 --- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt +++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt @@ -16,8 +16,8 @@ Optional properties: - ieee80211-freq-limit: See ieee80211.txt - mediatek,mtd-eeprom: Specify a MTD partition + offset containing EEPROM data -The driver is using of_get_mac_address API, so the MAC address can be as well -be set with corresponding optional properties defined in net/ethernet.txt. +The MAC address can as well be set with corresponding optional properties +defined in net/ethernet.txt. Optional nodes: - led: Properties for a connected LED From 1be91314532c4a2779fa17665b6d0368972b570b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 10 May 2019 11:35:16 +0200 Subject: [PATCH 20/48] powerpc: tsi108: fix similar warning reported by kbuild test robot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes following (similar) warning reported by kbuild test robot: In function ‘memcpy’, inlined from ‘smsc75xx_init_mac_address’ at drivers/net/usb/smsc75xx.c:778:3, inlined from ‘smsc75xx_bind’ at drivers/net/usb/smsc75xx.c:1501:2: ./include/linux/string.h:355:9: warning: argument 2 null where non-null expected [-Wnonnull] return __builtin_memcpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/usb/smsc75xx.c: In function ‘smsc75xx_bind’: ./include/linux/string.h:355:9: note: in a call to built-in function ‘__builtin_memcpy’ I've replaced the offending memcpy with ether_addr_copy, because I'm 100% sure, that of_get_mac_address can't return NULL as it returns valid pointer or ERR_PTR encoded value, nothing else. I'm hesitant to just change IS_ERR into IS_ERR_OR_NULL check, as this would make the warning disappear also, but it would be confusing to check for impossible return value just to make a compiler happy. I'm now changing all occurencies of memcpy to ether_addr_copy after the of_get_mac_address call, as it's very likely, that we're going to get similar reports from kbuild test robot in the future. Fixes: ea168cdf1299 ("powerpc: tsi108: support of_get_mac_address new ERR_PTR error") Reported-by: kbuild test robot Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller --- arch/powerpc/sysdev/tsi108_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c index c92dcac85231..026619c9a8cb 100644 --- a/arch/powerpc/sysdev/tsi108_dev.c +++ b/arch/powerpc/sysdev/tsi108_dev.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -106,7 +107,7 @@ static int __init tsi108_eth_of_init(void) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) - memcpy(tsi_eth_data.mac_addr, mac_addr, 6); + ether_addr_copy(tsi_eth_data.mac_addr, mac_addr); ph = of_get_property(np, "mdio-handle", NULL); mdio = of_find_node_by_phandle(*ph); From 2d2924af96881f3300239156d6ff316a1a3e48e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 10 May 2019 11:35:17 +0200 Subject: [PATCH 21/48] net: ethernet: fix similar warning reported by kbuild test robot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes following (similar) warning reported by kbuild test robot: In function ‘memcpy’, inlined from ‘smsc75xx_init_mac_address’ at drivers/net/usb/smsc75xx.c:778:3, inlined from ‘smsc75xx_bind’ at drivers/net/usb/smsc75xx.c:1501:2: ./include/linux/string.h:355:9: warning: argument 2 null where non-null expected [-Wnonnull] return __builtin_memcpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/usb/smsc75xx.c: In function ‘smsc75xx_bind’: ./include/linux/string.h:355:9: note: in a call to built-in function ‘__builtin_memcpy’ I've replaced the offending memcpy with ether_addr_copy, because I'm 100% sure, that of_get_mac_address can't return NULL as it returns valid pointer or ERR_PTR encoded value, nothing else. I'm hesitant to just change IS_ERR into IS_ERR_OR_NULL check, as this would make the warning disappear also, but it would be confusing to check for impossible return value just to make a compiler happy. I'm now changing all occurencies of memcpy to ether_addr_copy after the of_get_mac_address call, as it's very likely, that we're going to get similar reports from kbuild test robot in the future. Fixes: a51645f70f63 ("net: ethernet: support of_get_mac_address new ERR_PTR error") Reported-by: kbuild test robot Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller --- drivers/net/ethernet/allwinner/sun4i-emac.c | 2 +- drivers/net/ethernet/arc/emac_main.c | 2 +- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 2 +- drivers/net/ethernet/davicom/dm9000.c | 2 +- drivers/net/ethernet/freescale/fec_mpc52xx.c | 2 +- drivers/net/ethernet/freescale/fman/mac.c | 2 +- drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 2 +- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/micrel/ks8851.c | 2 +- drivers/net/ethernet/micrel/ks8851_mll.c | 2 +- drivers/net/ethernet/nxp/lpc_eth.c | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 +- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 37ebd890ef51..9e06dff619c3 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -871,7 +871,7 @@ static int emac_probe(struct platform_device *pdev) /* Read MAC-address from DT */ mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) - memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); + ether_addr_copy(ndev->dev_addr, mac_addr); /* Check if the MAC address is valid, if not get a random one */ if (!is_valid_ether_addr(ndev->dev_addr)) { diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 7f89ad5c336d..13a1d99b29c6 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -961,7 +961,7 @@ int arc_emac_probe(struct net_device *ndev, int interface) mac_addr = of_get_mac_address(dev->of_node); if (!IS_ERR(mac_addr)) - memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); + ether_addr_copy(ndev->dev_addr, mac_addr); else eth_hw_addr_random(ndev); diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 15b1130aa4ae..0e5de88fd6e8 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1504,7 +1504,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev) mac = of_get_mac_address(pdev->dev.of_node); if (!IS_ERR(mac)) - memcpy(netdev->dev_addr, mac, ETH_ALEN); + ether_addr_copy(netdev->dev_addr, mac); else eth_hw_addr_random(netdev); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 953ee5616801..5e1aff9a5fd6 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1413,7 +1413,7 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) - memcpy(pdata->dev_addr, mac_addr, sizeof(pdata->dev_addr)); + ether_addr_copy(pdata->dev_addr, mac_addr); return pdata; } diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 7b7e526869a7..30cdb246d020 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -903,7 +903,7 @@ static int mpc52xx_fec_probe(struct platform_device *op) */ mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) { - memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); + ether_addr_copy(ndev->dev_addr, mac_addr); } else { struct mpc52xx_fec __iomem *fec = priv->fec; diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 9cd2c28d17df..7ab8095db192 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -729,7 +729,7 @@ static int mac_probe(struct platform_device *_of_dev) err = -EINVAL; goto _return_of_get_parent; } - memcpy(mac_dev->addr, mac_addr, sizeof(mac_dev->addr)); + ether_addr_copy(mac_dev->addr, mac_addr); /* Get the port handles */ nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL); diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 90ea7a115d0f..5fad73b2e123 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1015,7 +1015,7 @@ static int fs_enet_probe(struct platform_device *ofdev) mac_addr = of_get_mac_address(ofdev->dev.of_node); if (!IS_ERR(mac_addr)) - memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); + ether_addr_copy(ndev->dev_addr, mac_addr); ret = fep->ops->allocate_bd(ndev); if (ret) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index df13c693b038..e670cd293dba 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -873,7 +873,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) - memcpy(dev->dev_addr, mac_addr, ETH_ALEN); + ether_addr_copy(dev->dev_addr, mac_addr); if (model && !strcasecmp(model, "TSEC")) priv->device_flags |= FSL_GIANFAR_DEV_HAS_GIGABIT | diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 216e99af2b5a..4d6892d2f0a4 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3911,7 +3911,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) - memcpy(dev->dev_addr, mac_addr, ETH_ALEN); + ether_addr_copy(dev->dev_addr, mac_addr); ugeth->ug_info = ug_info; ugeth->dev = device; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 07e254fc96ef..409b69fd4374 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2750,7 +2750,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, mac_addr = of_get_mac_address(pnp); if (!IS_ERR(mac_addr)) - memcpy(ppd.mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(ppd.mac_addr, mac_addr); mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 8186135883ed..e758650b2c26 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4565,7 +4565,7 @@ static int mvneta_probe(struct platform_device *pdev) dt_mac_addr = of_get_mac_address(dn); if (!IS_ERR(dt_mac_addr)) { mac_from = "device tree"; - memcpy(dev->dev_addr, dt_mac_addr, ETH_ALEN); + ether_addr_copy(dev->dev_addr, dt_mac_addr); } else { mvneta_get_mac_addr(pp, hw_mac_addr); if (is_valid_ether_addr(hw_mac_addr)) { diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 9d070cca3e9e..5adf307fbbfd 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4805,7 +4805,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, */ iap = of_get_mac_address(hw->pdev->dev.of_node); if (!IS_ERR(iap)) - memcpy(dev->dev_addr, iap, ETH_ALEN); + ether_addr_copy(dev->dev_addr, iap); else memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8, ETH_ALEN); diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index b44172a901ed..ba4fdf1b0dea 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -426,7 +426,7 @@ static void ks8851_init_mac(struct ks8851_net *ks) mac_addr = of_get_mac_address(ks->spidev->dev.of_node); if (!IS_ERR(mac_addr)) { - memcpy(dev->dev_addr, mac_addr, ETH_ALEN); + ether_addr_copy(dev->dev_addr, mac_addr); ks8851_write_mac_addr(dev); return; } diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index dc76b0d15234..e5c8412c08c1 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -1328,7 +1328,7 @@ static int ks8851_probe(struct platform_device *pdev) if (pdev->dev.of_node) { mac = of_get_mac_address(pdev->dev.of_node); if (!IS_ERR(mac)) - memcpy(ks->mac_addr, mac, ETH_ALEN); + ether_addr_copy(ks->mac_addr, mac); } else { struct ks8851_mll_platform_data *pdata; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index da138edddd32..fec604c4c0d3 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1369,7 +1369,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) if (!is_valid_ether_addr(ndev->dev_addr)) { const char *macaddr = of_get_mac_address(np); if (!IS_ERR(macaddr)) - memcpy(ndev->dev_addr, macaddr, ETH_ALEN); + ether_addr_copy(ndev->dev_addr, macaddr); } if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 7c4e282242d5..6354f19a31eb 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3193,7 +3193,7 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) - memcpy(pdata->mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(pdata->mac_addr, mac_addr); pdata->no_ether_link = of_property_read_bool(np, "renesas,no-ether-link"); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b18eeb05b993..634fc484a0b3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2233,7 +2233,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, no_phy_slave: mac_addr = of_get_mac_address(slave_node); if (!IS_ERR(mac_addr)) { - memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(slave_data->mac_addr, mac_addr); } else { ret = ti_cm_get_macid(&pdev->dev, i, slave_data->mac_addr); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 997475c209c0..47c45152132e 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -361,7 +361,7 @@ static void temac_do_set_mac_address(struct net_device *ndev) static int temac_init_mac_address(struct net_device *ndev, const void *address) { - memcpy(ndev->dev_addr, address, ETH_ALEN); + ether_addr_copy(ndev->dev_addr, address); if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); temac_do_set_mac_address(ndev); diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 691170753563..6886270da695 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1167,7 +1167,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev) if (!IS_ERR(mac_address)) { /* Set the MAC address. */ - memcpy(ndev->dev_addr, mac_address, ETH_ALEN); + ether_addr_copy(ndev->dev_addr, mac_address); } else { dev_warn(dev, "No MAC address found, using random\n"); eth_hw_addr_random(ndev); From 1b9705d9713b67996608763e2317673d8378b03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 10 May 2019 11:35:18 +0200 Subject: [PATCH 22/48] net: wireless: mt76: fix similar warning reported by kbuild test robot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes following (similar) warning reported by kbuild test robot: In function ‘memcpy’, inlined from ‘smsc75xx_init_mac_address’ at drivers/net/usb/smsc75xx.c:778:3, inlined from ‘smsc75xx_bind’ at drivers/net/usb/smsc75xx.c:1501:2: ./include/linux/string.h:355:9: warning: argument 2 null where non-null expected [-Wnonnull] return __builtin_memcpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/usb/smsc75xx.c: In function ‘smsc75xx_bind’: ./include/linux/string.h:355:9: note: in a call to built-in function ‘__builtin_memcpy’ I've replaced the offending memcpy with ether_addr_copy, because I'm 100% sure, that of_get_mac_address can't return NULL as it returns valid pointer or ERR_PTR encoded value, nothing else. I'm hesitant to just change IS_ERR into IS_ERR_OR_NULL check, as this would make the warning disappear also, but it would be confusing to check for impossible return value just to make a compiler happy. I'm now changing all occurencies of memcpy to ether_addr_copy after the of_get_mac_address call, as it's very likely, that we're going to get similar reports from kbuild test robot in the future. Fixes: d31a36b5f407 ("net: wireless: support of_get_mac_address new ERR_PTR error") Reported-by: kbuild test robot Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller --- drivers/net/wireless/mediatek/mt76/eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c index 04964937a3af..b7a49ae6b327 100644 --- a/drivers/net/wireless/mediatek/mt76/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/eeprom.c @@ -95,7 +95,7 @@ mt76_eeprom_override(struct mt76_dev *dev) mac = of_get_mac_address(np); if (!IS_ERR(mac)) - memcpy(dev->macaddr, mac, ETH_ALEN); + ether_addr_copy(dev->macaddr, mac); #endif if (!is_valid_ether_addr(dev->macaddr)) { From daf3ddbe11a2ff74c95bc814df8e5fe3201b4cb5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 10 May 2019 22:11:26 +0200 Subject: [PATCH 23/48] net: phy: realtek: add missing page operations Add missing page operation callbacks to few Realtek drivers. This also fixes a NPE after the referenced commit added code to the RTL8211E driver that uses phy_select_page(). Fixes: f81dadbcf7fd ("net: phy: realtek: Add rtl8211e rx/tx delays config") Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Reported-by: Vicente Bergas Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 761ce3b1e7bd..29ce07312509 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -275,6 +275,8 @@ static struct phy_driver realtek_drvs[] = { .config_aneg = rtl8211_config_aneg, .read_mmd = &genphy_read_mmd_unsupported, .write_mmd = &genphy_write_mmd_unsupported, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc912), .name = "RTL8211B Gigabit Ethernet", @@ -284,12 +286,16 @@ static struct phy_driver realtek_drvs[] = { .write_mmd = &genphy_write_mmd_unsupported, .suspend = rtl8211b_suspend, .resume = rtl8211b_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc913), .name = "RTL8211C Gigabit Ethernet", .config_init = rtl8211c_config_init, .read_mmd = &genphy_read_mmd_unsupported, .write_mmd = &genphy_write_mmd_unsupported, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc914), .name = "RTL8211DN Gigabit Ethernet", @@ -297,6 +303,8 @@ static struct phy_driver realtek_drvs[] = { .config_intr = rtl8211e_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc915), .name = "RTL8211E Gigabit Ethernet", @@ -305,6 +313,8 @@ static struct phy_driver realtek_drvs[] = { .config_intr = &rtl8211e_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc916), .name = "RTL8211F Gigabit Ethernet", @@ -322,6 +332,8 @@ static struct phy_driver realtek_drvs[] = { .resume = genphy_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc961), .name = "RTL8366RB Gigabit Ethernet", From af959b18fd447170a10865283ba691af4353cc7f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 May 2019 03:03:09 +0200 Subject: [PATCH 24/48] bpf: fix out of bounds backwards jmps due to dead code removal systemtap folks reported the following splat recently: [ 7790.862212] WARNING: CPU: 3 PID: 26759 at arch/x86/kernel/kprobes/core.c:1022 kprobe_fault_handler+0xec/0xf0 [...] [ 7790.864113] CPU: 3 PID: 26759 Comm: sshd Not tainted 5.1.0-0.rc7.git1.1.fc31.x86_64 #1 [ 7790.864198] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS[...] [ 7790.864314] RIP: 0010:kprobe_fault_handler+0xec/0xf0 [ 7790.864375] Code: 48 8b 50 [...] [ 7790.864714] RSP: 0018:ffffc06800bdbb48 EFLAGS: 00010082 [ 7790.864812] RAX: ffff9e2b75a16320 RBX: 0000000000000000 RCX: 0000000000000000 [ 7790.865306] RDX: ffffffffffffffff RSI: 000000000000000e RDI: ffffc06800bdbbf8 [ 7790.865514] RBP: ffffc06800bdbbf8 R08: 0000000000000000 R09: 0000000000000000 [ 7790.865960] R10: 0000000000000000 R11: 0000000000000000 R12: ffffc06800bdbbf8 [ 7790.866037] R13: ffff9e2ab56a0418 R14: ffff9e2b6d0bb400 R15: ffff9e2b6d268000 [ 7790.866114] FS: 00007fde49937d80(0000) GS:ffff9e2b75a00000(0000) knlGS:0000000000000000 [ 7790.866193] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7790.866318] CR2: 0000000000000000 CR3: 000000012f312000 CR4: 00000000000006e0 [ 7790.866419] Call Trace: [ 7790.866677] do_user_addr_fault+0x64/0x480 [ 7790.867513] do_page_fault+0x33/0x210 [ 7790.868002] async_page_fault+0x1e/0x30 [ 7790.868071] RIP: 0010: (null) [ 7790.868144] Code: Bad RIP value. [ 7790.868229] RSP: 0018:ffffc06800bdbca8 EFLAGS: 00010282 [ 7790.868362] RAX: ffff9e2b598b60f8 RBX: ffffc06800bdbe48 RCX: 0000000000000004 [ 7790.868629] RDX: 0000000000000004 RSI: ffffc06800bdbc6c RDI: ffff9e2b598b60f0 [ 7790.868834] RBP: ffffc06800bdbcf8 R08: 0000000000000000 R09: 0000000000000004 [ 7790.870432] R10: 00000000ff6f7a03 R11: 0000000000000000 R12: 0000000000000001 [ 7790.871859] R13: ffffc06800bdbcb8 R14: 0000000000000000 R15: ffff9e2acd0a5310 [ 7790.873455] ? vfs_read+0x5/0x170 [ 7790.874639] ? vfs_read+0x1/0x170 [ 7790.875834] ? trace_call_bpf+0xf6/0x260 [ 7790.877044] ? vfs_read+0x1/0x170 [ 7790.878208] ? vfs_read+0x5/0x170 [ 7790.879345] ? kprobe_perf_func+0x233/0x260 [ 7790.880503] ? vfs_read+0x1/0x170 [ 7790.881632] ? vfs_read+0x5/0x170 [ 7790.882751] ? kprobe_ftrace_handler+0x92/0xf0 [ 7790.883926] ? __vfs_read+0x30/0x30 [ 7790.885050] ? ftrace_ops_assist_func+0x94/0x100 [ 7790.886183] ? vfs_read+0x1/0x170 [ 7790.887283] ? vfs_read+0x5/0x170 [ 7790.888348] ? ksys_read+0x5a/0xe0 [ 7790.889389] ? do_syscall_64+0x5c/0xa0 [ 7790.890401] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe After some debugging, turns out that the logic in 2cbd95a5c4fb ("bpf: change parameters of call/branch offset adjustment") has a bug that is exposed after 52875a04f4b2 ("bpf: verifier: remove dead code") in that we miss some of the jump offset adjustments after code patching when we remove dead code, more concretely, upon backward jump spanning over the area that is being removed. BPF insns of a case that was hit pre 52875a04f4b2: [...] 676: (85) call bpf_perf_event_output#-47616 677: (05) goto pc-636 678: (62) *(u32 *)(r10 -64) = 0 679: (bf) r7 = r10 680: (07) r7 += -64 681: (05) goto pc-44 682: (05) goto pc-1 683: (05) goto pc-1 BPF insns afterwards: [...] 618: (85) call bpf_perf_event_output#-47616 619: (05) goto pc-638 620: (62) *(u32 *)(r10 -64) = 0 621: (bf) r7 = r10 622: (07) r7 += -64 623: (05) goto pc-44 To illustrate the bug, situation looks as follows: ____ 0 | | <-- foo: [...] 1 |____| 2 |____| <-- pos / end_new ^ 3 | | | 4 | | | len 5 |____| | (remove region) 6 | | <-- end_old v 7 | | 8 | | <-- curr (jmp foo) 9 |____| The condition curr >= end_new && curr + off + 1 < end_new in the branch delta adjustments is never hit because curr + off + 1 < end_new is compared as unsigned and therefore curr + off + 1 > end_new in unsigned realm as curr + off + 1 becomes negative since the insns are memmove()'d before the offset adjustments. Correct BPF insns after this fix: [...] 618: (85) call bpf_perf_event_output#-47216 619: (05) goto pc-578 620: (62) *(u32 *)(r10 -64) = 0 621: (bf) r7 = r10 622: (07) r7 += -64 623: (05) goto pc-44 Note that unprivileged case is not affected from this. Fixes: 52875a04f4b2 ("bpf: verifier: remove dead code") Fixes: 2cbd95a5c4fb ("bpf: change parameters of call/branch offset adjustment") Reported-by: Frank Ch. Eigler Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3ba56e73c90e..242a643af82f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -338,7 +338,7 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) } static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, - s32 end_new, u32 curr, const bool probe_pass) + s32 end_new, s32 curr, const bool probe_pass) { const s64 imm_min = S32_MIN, imm_max = S32_MAX; s32 delta = end_new - end_old; @@ -356,7 +356,7 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, } static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, - s32 end_new, u32 curr, const bool probe_pass) + s32 end_new, s32 curr, const bool probe_pass) { const s32 off_min = S16_MIN, off_max = S16_MAX; s32 delta = end_new - end_old; From 8f5e24514cbd63c352b663636f955fb286f2c3bd Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 11 May 2019 02:56:33 +0200 Subject: [PATCH 25/48] net: qrtr: use protocol endiannes variable sparse was unable to verify endiannes correctness due to reassignment from le32_to_cpu to the same variable - fix this warning up by providing a proper __le32 type and initializing it. This is not actually fixing any bug - rather just addressing the sparse warning. Signed-off-by: Nicholas Mc Guire Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index dd0e97f4f6c0..801872a2e7aa 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -728,12 +728,13 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int, struct sockaddr_qrtr *, struct sockaddr_qrtr *); + __le32 qrtr_type = cpu_to_le32(QRTR_TYPE_DATA); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; struct qrtr_node *node; struct sk_buff *skb; + u32 type = 0; size_t plen; - u32 type = QRTR_TYPE_DATA; int rc; if (msg->msg_flags & ~(MSG_DONTWAIT)) @@ -807,8 +808,8 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } /* control messages already require the type as 'command' */ - skb_copy_bits(skb, 0, &type, 4); - type = le32_to_cpu(type); + skb_copy_bits(skb, 0, &qrtr_type, 4); + type = le32_to_cpu(qrtr_type); } rc = enqueue_fn(node, skb, type, &ipc->us, addr); From 8f779443b440f975f0eea80a68e9adccc15050c0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 11 May 2019 07:44:48 +0200 Subject: [PATCH 26/48] net: phy: realtek: fix double page ops in generic Realtek driver When adding missing callbacks I missed that one had them set already. Interesting that the compiler didn't complain. Fixes: daf3ddbe11a2 ("net: phy: realtek: add missing page operations") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 29ce07312509..4988ccea684a 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -332,8 +332,6 @@ static struct phy_driver realtek_drvs[] = { .resume = genphy_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, - .read_page = rtl821x_read_page, - .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc961), .name = "RTL8366RB Gigabit Ethernet", From 92285a079eedfe104a773a7c4293f77a01f456fb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 12 May 2019 20:42:37 +0200 Subject: [PATCH 27/48] netfilter: nf_tables: correct NFT_LOGLEVEL_MAX value should be same as NFT_LOGLEVEL_AUDIT, so use -, not +. Fixes: 7eced5ab5a73 ("netfilter: nf_tables: add NFT_LOGLEVEL_* enumeration and use it") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 92bb1e2b2425..7bdb234f3d8c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1134,7 +1134,7 @@ enum nft_log_level { NFT_LOGLEVEL_AUDIT, __NFT_LOGLEVEL_MAX }; -#define NFT_LOGLEVEL_MAX (__NFT_LOGLEVEL_MAX + 1) +#define NFT_LOGLEVEL_MAX (__NFT_LOGLEVEL_MAX - 1) /** * enum nft_queue_attributes - nf_tables queue expression netlink attributes From 87671375108625bb7f8a09f0809a369d460ebe43 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 11 May 2019 23:14:45 +0300 Subject: [PATCH 28/48] net: dsa: Initialize DSA_SKB_CB(skb)->deferred_xmit variable The sk_buff control block can have any contents on xmit put there by the stack, so initialization is mandatory, since we are checking its value after the actual DSA xmit (the tagger may have changed it). The DSA_SKB_ZERO() macro could have been used for this purpose, but: - Zeroizing a 48-byte memory region in the hotpath is best avoided. - It would have triggered a warning with newer compilers since __dsa_skb_cb contains a structure within a structure, and the {0} initializer was incorrect for that purpose. So simply remove the DSA_SKB_ZERO() macro and initialize the deferred_xmit variable by hand (which should be done for all further dsa_skb_cb variables which need initialization - currently none - to avoid the performance penalty). Fixes: 97a69a0dea9a ("net: dsa: Add support for deferred xmit") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 3 --- net/dsa/slave.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 6aaaadd6a413..35ca1f2c6e28 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -102,9 +102,6 @@ struct __dsa_skb_cb { #define DSA_SKB_CB_COPY(nskb, skb) \ { *__DSA_SKB_CB(nskb) = *__DSA_SKB_CB(skb); } -#define DSA_SKB_CB_ZERO(skb) \ - { *__DSA_SKB_CB(skb) = (struct __dsa_skb_cb) {0}; } - #define DSA_SKB_CB_PRIV(skb) \ ((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv)) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fe7b6a62e8f1..9892ca1f6859 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -463,6 +463,8 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) s->tx_bytes += skb->len; u64_stats_update_end(&s->syncp); + DSA_SKB_CB(skb)->deferred_xmit = false; + /* Identify PTP protocol packets, clone them, and pass them to the * switch driver */ From 506f0e09ce36477ac610b6e9b52cbd4d5ead0c01 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 11 May 2019 23:14:46 +0300 Subject: [PATCH 29/48] net: dsa: Remove dangerous DSA_SKB_CLONE() macro This does not cause any bug now because it has no users, but its body contains two pointer definitions within a code block: struct sk_buff *clone = _clone; \ struct sk_buff *skb = _skb; \ When calling the macro as DSA_SKB_CLONE(clone, skb), these variables would obscure the arguments that the macro was called with, and the initializers would be a no-op instead of doing their job (undefined behavior, by the way, but GCC nicely puts NULL pointers instead). So simply remove this broken macro and leave users to simply call "DSA_SKB_CB(skb)->clone = clone" by hand when needed. There is one functional difference when doing what I just suggested above: the control block won't be transferred from the original skb into the clone. Since there's no foreseen need for the control block in the clone ATM, this is ok. Fixes: b68b0dd0fb2d ("net: dsa: Keep private info in the skb->cb") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 35ca1f2c6e28..1f6b8608b0b7 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -105,15 +105,6 @@ struct __dsa_skb_cb { #define DSA_SKB_CB_PRIV(skb) \ ((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv)) -#define DSA_SKB_CB_CLONE(_clone, _skb) \ - { \ - struct sk_buff *clone = _clone; \ - struct sk_buff *skb = _skb; \ - \ - DSA_SKB_CB_COPY(clone, skb); \ - DSA_SKB_CB(skb)->clone = clone; \ - } - struct dsa_switch_tree { struct list_head list; From 1c9b1420ac137a0060dc67d3840bdaae9bcf4bae Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 11 May 2019 23:14:47 +0300 Subject: [PATCH 30/48] net: dsa: Remove the now unused DSA_SKB_CB_COPY() macro It's best to not expose this, due to the performance hit it may cause when calling it. Fixes: b68b0dd0fb2d ("net: dsa: Keep private info in the skb->cb") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 1f6b8608b0b7..685294817712 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -99,9 +99,6 @@ struct __dsa_skb_cb { #define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb)) -#define DSA_SKB_CB_COPY(nskb, skb) \ - { *__DSA_SKB_CB(nskb) = *__DSA_SKB_CB(skb); } - #define DSA_SKB_CB_PRIV(skb) \ ((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv)) From 3285a9aa6570df84d0c1ea8f1691fbee45c8d821 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sun, 12 May 2019 16:09:49 +0530 Subject: [PATCH 31/48] net: dccp : proto: remove Unneeded variable "err" Fix below issue reported by coccicheck net/dccp/proto.c:266:5-8: Unneeded variable: "err". Return "0" on line 310 Signed-off-by: Hariprasad Kelam Signed-off-by: David S. Miller --- net/dccp/proto.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0e2f71ab8367..5dd85ec51bfe 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -263,7 +263,6 @@ int dccp_disconnect(struct sock *sk, int flags) struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - int err = 0; const int old_state = sk->sk_state; if (old_state != DCCP_CLOSED) @@ -307,7 +306,7 @@ int dccp_disconnect(struct sock *sk, int flags) WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); - return err; + return 0; } EXPORT_SYMBOL_GPL(dccp_disconnect); From 9858381253acad69a4538a448eb9aa674c4f70d6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 May 2019 22:51:33 +0000 Subject: [PATCH 32/48] bpf: add various test cases for backward jumps Add a couple of tests to make sure branch(/call) offset adjustments are correctly performed. Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/verifier/jump.c | 195 ++++++++++++++++++++ 1 file changed, 195 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/jump.c b/tools/testing/selftests/bpf/verifier/jump.c index 8e6fcc8940f0..6f951d1ff0a4 100644 --- a/tools/testing/selftests/bpf/verifier/jump.c +++ b/tools/testing/selftests/bpf/verifier/jump.c @@ -178,3 +178,198 @@ .result_unpriv = REJECT, .result = ACCEPT, }, +{ + "jump test 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_1, 16), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -20), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jump test 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 2, 16), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, -20), + }, + .result = ACCEPT, + .retval = 3, +}, +{ + "jump test 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_1, 16), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, -20), + }, + .result = ACCEPT, + .retval = 3, +}, +{ + "jump/call test 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 2, 16), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -20), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "jump out of range from insn 1 to 4", +}, +{ + "jump/call test 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 2, 16), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -20), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "last insn is not an exit or jmp", +}, +{ + "jump/call test 11", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 2, 26), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -31), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 3, +}, From 748c7c821aca5e32fab5676193365fc2705af366 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 10 May 2019 15:51:22 +0100 Subject: [PATCH 33/48] bpf: fix script for generating man page on BPF helpers The script broke on parsing function prototype for bpf_strtoul(). This is because the last argument for the function is a pointer to an "unsigned long". The current version of the script only accepts "const" and "struct", but not "unsigned", at the beginning of argument types made of several words. One solution could be to add "unsigned" to the list, but the issue could come up again in the future (what about "long int"?). It turns out we do not need to have such restrictions on the words: so let's simply accept any series of words instead. Reported-by: Yonghong Song Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- scripts/bpf_helpers_doc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 5010a4d5bfba..894cc58c1a03 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 # SPDX-License-Identifier: GPL-2.0-only # -# Copyright (C) 2018 Netronome Systems, Inc. +# Copyright (C) 2018-2019 Netronome Systems, Inc. # In case user attempts to run with Python 2. from __future__ import print_function @@ -39,7 +39,7 @@ class Helper(object): Break down helper function protocol into smaller chunks: return type, name, distincts arguments. """ - arg_re = re.compile('((const )?(struct )?(\w+|...))( (\**)(\w+))?$') + arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$') res = {} proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') @@ -54,8 +54,8 @@ class Helper(object): capture = arg_re.match(a) res['args'].append({ 'type' : capture.group(1), - 'star' : capture.group(6), - 'name' : capture.group(7) + 'star' : capture.group(5), + 'name' : capture.group(6) }) return res From 32e7dc281cdfd36959fa35f210b45d204ec786f3 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 10 May 2019 15:51:23 +0100 Subject: [PATCH 34/48] bpf: fix recurring typo in documentation for BPF helpers "Underlaying packet buffer" should be an "underlying" one, in the warning about invalidated data and data_end pointers. Through copy-and-paste, the typo occurred no fewer than 19 times in the documentation. Let's fix it. Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 72336bac7573..989ef6b1c269 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -629,7 +629,7 @@ union bpf_attr { * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ * **->swhash** and *skb*\ **->l4hash** to 0). * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -654,7 +654,7 @@ union bpf_attr { * flexibility and can handle sizes larger than 2 or 4 for the * checksum to update. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -686,7 +686,7 @@ union bpf_attr { * flexibility and can handle sizes larger than 2 or 4 for the * checksum to update. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -741,7 +741,7 @@ union bpf_attr { * efficient, but it is handled through an action code where the * redirection happens only after the eBPF program has returned. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -806,7 +806,7 @@ union bpf_attr { * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to * be **ETH_P_8021Q**. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -818,7 +818,7 @@ union bpf_attr { * Description * Pop a VLAN header from the packet associated to *skb*. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1168,7 +1168,7 @@ union bpf_attr { * All values for *flags* are reserved for future usage, and must * be left at zero. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1281,7 +1281,7 @@ union bpf_attr { * implicitly linearizes, unclones and drops offloads from the * *skb*. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1317,7 +1317,7 @@ union bpf_attr { * **bpf_skb_pull_data()** to effectively unclone the *skb* from * the very beginning in case it is indeed cloned. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1369,7 +1369,7 @@ union bpf_attr { * All values for *flags* are reserved for future usage, and must * be left at zero. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1384,7 +1384,7 @@ union bpf_attr { * can be used to prepare the packet for pushing or popping * headers. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1531,7 +1531,7 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; **len** is the length of the inner MAC header. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1610,7 +1610,7 @@ union bpf_attr { * more flexibility as the user is free to store whatever meta * data they need. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1852,7 +1852,7 @@ union bpf_attr { * copied if necessary (i.e. if data was not linear and if start * and end pointers do not point to the same chunk). * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1886,7 +1886,7 @@ union bpf_attr { * only possible to shrink the packet as of this writing, * therefore *delta* must be a negative integer. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2072,7 +2072,7 @@ union bpf_attr { * by bpf programs of types BPF_PROG_TYPE_LWT_IN and * BPF_PROG_TYPE_LWT_XMIT. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2087,7 +2087,7 @@ union bpf_attr { * inside the outermost IPv6 Segment Routing Header can be * modified through this helper. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2103,7 +2103,7 @@ union bpf_attr { * after the segments are accepted. *delta* can be as well * positive (growing) as negative (shrinking). * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2132,7 +2132,7 @@ union bpf_attr { * encapsulation policy. * Type of param: **struct ipv6_sr_hdr**. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with From 80867c5e3c0275c7208816faac768a44fa3747be Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 10 May 2019 15:51:24 +0100 Subject: [PATCH 35/48] bpf: fix minor issues in documentation for BPF helpers. This commit brings many minor fixes to the documentation for BPF helper functions. Mostly, this is limited to formatting fixes and improvements. In particular, fix broken formatting for bpf_skb_adjust_room(). Besides formatting, replace the mention of "bpf_fullsock()" (that is not associated with any function or type exposed to the user) in the description of bpf_sk_storage_get() by "full socket". Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 103 ++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 989ef6b1c269..63e0cf66f01a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1518,18 +1518,18 @@ union bpf_attr { * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. * Adjusting mss in this way is not allowed for datagrams. * - * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: - * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: * Any new space is reserved to hold a tunnel header. * Configure skb offsets and other fields accordingly. * - * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: - * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, + * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: * Use with ENCAP_L3 flags to further specify the tunnel type. * - * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **: + * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): * Use with ENCAP_L3/L4 flags to further specify the tunnel - * type; **len** is the length of the inner MAC header. + * type; *len* is the length of the inner MAC header. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2061,16 +2061,16 @@ union bpf_attr { * **BPF_LWT_ENCAP_IP** * IP encapsulation (GRE/GUE/IPIP/etc). The outer header * must be IPv4 or IPv6, followed by zero or more - * additional headers, up to LWT_BPF_MAX_HEADROOM total - * bytes in all prepended headers. Please note that - * if skb_is_gso(skb) is true, no more than two headers - * can be prepended, and the inner header, if present, - * should be either GRE or UDP/GUE. + * additional headers, up to **LWT_BPF_MAX_HEADROOM** + * total bytes in all prepended headers. Please note that + * if **skb_is_gso**\ (*skb*) is true, no more than two + * headers can be prepended, and the inner header, if + * present, should be either GRE or UDP/GUE. * - * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of - * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called - * by bpf programs of types BPF_PROG_TYPE_LWT_IN and - * BPF_PROG_TYPE_LWT_XMIT. + * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs + * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can + * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and + * **BPF_PROG_TYPE_LWT_XMIT**. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2126,11 +2126,11 @@ union bpf_attr { * Type of *param*: **int**. * **SEG6_LOCAL_ACTION_END_B6** * End.B6 action: Endpoint bound to an SRv6 policy. - * Type of param: **struct ipv6_sr_hdr**. + * Type of *param*: **struct ipv6_sr_hdr**. * **SEG6_LOCAL_ACTION_END_B6_ENCAP** * End.B6.Encap action: Endpoint bound to an SRv6 * encapsulation policy. - * Type of param: **struct ipv6_sr_hdr**. + * Type of *param*: **struct ipv6_sr_hdr**. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2285,7 +2285,8 @@ union bpf_attr { * Return * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** - * result is from **reuse->socks**\ [] using the hash of the tuple. + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2321,7 +2322,8 @@ union bpf_attr { * Return * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** - * result is from **reuse->socks**\ [] using the hash of the tuple. + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. * * int bpf_sk_release(struct bpf_sock *sock) * Description @@ -2490,31 +2492,34 @@ union bpf_attr { * network namespace *netns*. The return value must be checked, * and if non-**NULL**, released via **bpf_sk_release**\ (). * - * This function is identical to bpf_sk_lookup_tcp, except that it - * also returns timewait or request sockets. Use bpf_sk_fullsock - * or bpf_tcp_socket to access the full structure. + * This function is identical to **bpf_sk_lookup_tcp**\ (), except + * that it also returns timewait or request sockets. Use + * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the + * full structure. * * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** - * result is from **reuse->socks**\ [] using the hash of the tuple. + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. * * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description - * Check whether iph and th contain a valid SYN cookie ACK for - * the listening socket in sk. + * Check whether *iph* and *th* contain a valid SYN cookie ACK for + * the listening socket in *sk*. * - * iph points to the start of the IPv4 or IPv6 header, while - * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr). + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). * - * th points to the start of the TCP header, while th_len contains - * sizeof(struct tcphdr). + * *th* points to the start of the TCP header, while *th_len* + * contains **sizeof**\ (**struct tcphdr**). * * Return - * 0 if iph and th are a valid SYN cookie ACK, or a negative error - * otherwise. + * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative + * error otherwise. * * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description @@ -2592,17 +2597,17 @@ union bpf_attr { * and save the result in *res*. * * The string may begin with an arbitrary amount of white space - * (as determined by isspace(3)) followed by a single optional '-' - * sign. + * (as determined by **isspace**\ (3)) followed by a single + * optional '**-**' sign. * * Five least significant bits of *flags* encode base, other bits * are currently unused. * * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space strtol(3). + * similar to user space **strtol**\ (3). * Return * Number of characters consumed on success. Must be positive but - * no more than buf_len. + * no more than *buf_len*. * * **-EINVAL** if no valid digits were found or unsupported base * was provided. @@ -2616,16 +2621,16 @@ union bpf_attr { * given base and save the result in *res*. * * The string may begin with an arbitrary amount of white space - * (as determined by isspace(3)). + * (as determined by **isspace**\ (3)). * * Five least significant bits of *flags* encode base, other bits * are currently unused. * * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space strtoul(3). + * similar to user space **strtoul**\ (3). * Return * Number of characters consumed on success. Must be positive but - * no more than buf_len. + * no more than *buf_len*. * * **-EINVAL** if no valid digits were found or unsupported base * was provided. @@ -2634,26 +2639,26 @@ union bpf_attr { * * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) * Description - * Get a bpf-local-storage from a sk. + * Get a bpf-local-storage from a *sk*. * * Logically, it could be thought of getting the value from * a *map* with *sk* as the **key**. From this * perspective, the usage is not much different from - * **bpf_map_lookup_elem(map, &sk)** except this - * helper enforces the key must be a **bpf_fullsock()** - * and the map must be a BPF_MAP_TYPE_SK_STORAGE also. + * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this + * helper enforces the key must be a full socket and the map must + * be a **BPF_MAP_TYPE_SK_STORAGE** also. * * Underneath, the value is stored locally at *sk* instead of - * the map. The *map* is used as the bpf-local-storage **type**. - * The bpf-local-storage **type** (i.e. the *map*) is searched - * against all bpf-local-storages residing at sk. + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf-local-storages residing at *sk*. * - * An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be + * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used - * together with BPF_SK_STORAGE_GET_F_CREATE to specify + * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify * the initial value of a bpf-local-storage. If *value* is - * NULL, the new bpf-local-storage will be zero initialized. + * **NULL**, the new bpf-local-storage will be zero initialized. * Return * A bpf-local-storage pointer is returned on success. * @@ -2662,7 +2667,7 @@ union bpf_attr { * * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description - * Delete a bpf-local-storage from a sk. + * Delete a bpf-local-storage from a *sk*. * Return * 0 on success. * From c1fe1e701ee3344c7c9bcb8e14c4b8a40cad019e Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 10 May 2019 15:51:25 +0100 Subject: [PATCH 36/48] tools: bpf: synchronise BPF UAPI header with tools Synchronise the bpf.h header under tools, to report the fixes and additions recently brought to the documentation for the BPF helpers. Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 141 +++++++++++++++++---------------- 1 file changed, 73 insertions(+), 68 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 72336bac7573..63e0cf66f01a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -629,7 +629,7 @@ union bpf_attr { * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ * **->swhash** and *skb*\ **->l4hash** to 0). * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -654,7 +654,7 @@ union bpf_attr { * flexibility and can handle sizes larger than 2 or 4 for the * checksum to update. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -686,7 +686,7 @@ union bpf_attr { * flexibility and can handle sizes larger than 2 or 4 for the * checksum to update. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -741,7 +741,7 @@ union bpf_attr { * efficient, but it is handled through an action code where the * redirection happens only after the eBPF program has returned. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -806,7 +806,7 @@ union bpf_attr { * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to * be **ETH_P_8021Q**. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -818,7 +818,7 @@ union bpf_attr { * Description * Pop a VLAN header from the packet associated to *skb*. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1168,7 +1168,7 @@ union bpf_attr { * All values for *flags* are reserved for future usage, and must * be left at zero. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1281,7 +1281,7 @@ union bpf_attr { * implicitly linearizes, unclones and drops offloads from the * *skb*. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1317,7 +1317,7 @@ union bpf_attr { * **bpf_skb_pull_data()** to effectively unclone the *skb* from * the very beginning in case it is indeed cloned. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1369,7 +1369,7 @@ union bpf_attr { * All values for *flags* are reserved for future usage, and must * be left at zero. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1384,7 +1384,7 @@ union bpf_attr { * can be used to prepare the packet for pushing or popping * headers. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1518,20 +1518,20 @@ union bpf_attr { * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. * Adjusting mss in this way is not allowed for datagrams. * - * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: - * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: * Any new space is reserved to hold a tunnel header. * Configure skb offsets and other fields accordingly. * - * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: - * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, + * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: * Use with ENCAP_L3 flags to further specify the tunnel type. * - * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **: + * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): * Use with ENCAP_L3/L4 flags to further specify the tunnel - * type; **len** is the length of the inner MAC header. + * type; *len* is the length of the inner MAC header. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1610,7 +1610,7 @@ union bpf_attr { * more flexibility as the user is free to store whatever meta * data they need. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1852,7 +1852,7 @@ union bpf_attr { * copied if necessary (i.e. if data was not linear and if start * and end pointers do not point to the same chunk). * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -1886,7 +1886,7 @@ union bpf_attr { * only possible to shrink the packet as of this writing, * therefore *delta* must be a negative integer. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2061,18 +2061,18 @@ union bpf_attr { * **BPF_LWT_ENCAP_IP** * IP encapsulation (GRE/GUE/IPIP/etc). The outer header * must be IPv4 or IPv6, followed by zero or more - * additional headers, up to LWT_BPF_MAX_HEADROOM total - * bytes in all prepended headers. Please note that - * if skb_is_gso(skb) is true, no more than two headers - * can be prepended, and the inner header, if present, - * should be either GRE or UDP/GUE. + * additional headers, up to **LWT_BPF_MAX_HEADROOM** + * total bytes in all prepended headers. Please note that + * if **skb_is_gso**\ (*skb*) is true, no more than two + * headers can be prepended, and the inner header, if + * present, should be either GRE or UDP/GUE. * - * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of - * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called - * by bpf programs of types BPF_PROG_TYPE_LWT_IN and - * BPF_PROG_TYPE_LWT_XMIT. + * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs + * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can + * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and + * **BPF_PROG_TYPE_LWT_XMIT**. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2087,7 +2087,7 @@ union bpf_attr { * inside the outermost IPv6 Segment Routing Header can be * modified through this helper. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2103,7 +2103,7 @@ union bpf_attr { * after the segments are accepted. *delta* can be as well * positive (growing) as negative (shrinking). * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2126,13 +2126,13 @@ union bpf_attr { * Type of *param*: **int**. * **SEG6_LOCAL_ACTION_END_B6** * End.B6 action: Endpoint bound to an SRv6 policy. - * Type of param: **struct ipv6_sr_hdr**. + * Type of *param*: **struct ipv6_sr_hdr**. * **SEG6_LOCAL_ACTION_END_B6_ENCAP** * End.B6.Encap action: Endpoint bound to an SRv6 * encapsulation policy. - * Type of param: **struct ipv6_sr_hdr**. + * Type of *param*: **struct ipv6_sr_hdr**. * - * A call to this helper is susceptible to change the underlaying + * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with @@ -2285,7 +2285,8 @@ union bpf_attr { * Return * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** - * result is from **reuse->socks**\ [] using the hash of the tuple. + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2321,7 +2322,8 @@ union bpf_attr { * Return * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** - * result is from **reuse->socks**\ [] using the hash of the tuple. + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. * * int bpf_sk_release(struct bpf_sock *sock) * Description @@ -2490,31 +2492,34 @@ union bpf_attr { * network namespace *netns*. The return value must be checked, * and if non-**NULL**, released via **bpf_sk_release**\ (). * - * This function is identical to bpf_sk_lookup_tcp, except that it - * also returns timewait or request sockets. Use bpf_sk_fullsock - * or bpf_tcp_socket to access the full structure. + * This function is identical to **bpf_sk_lookup_tcp**\ (), except + * that it also returns timewait or request sockets. Use + * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the + * full structure. * * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** - * result is from **reuse->socks**\ [] using the hash of the tuple. + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. * * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description - * Check whether iph and th contain a valid SYN cookie ACK for - * the listening socket in sk. + * Check whether *iph* and *th* contain a valid SYN cookie ACK for + * the listening socket in *sk*. * - * iph points to the start of the IPv4 or IPv6 header, while - * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr). + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). * - * th points to the start of the TCP header, while th_len contains - * sizeof(struct tcphdr). + * *th* points to the start of the TCP header, while *th_len* + * contains **sizeof**\ (**struct tcphdr**). * * Return - * 0 if iph and th are a valid SYN cookie ACK, or a negative error - * otherwise. + * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative + * error otherwise. * * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description @@ -2592,17 +2597,17 @@ union bpf_attr { * and save the result in *res*. * * The string may begin with an arbitrary amount of white space - * (as determined by isspace(3)) followed by a single optional '-' - * sign. + * (as determined by **isspace**\ (3)) followed by a single + * optional '**-**' sign. * * Five least significant bits of *flags* encode base, other bits * are currently unused. * * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space strtol(3). + * similar to user space **strtol**\ (3). * Return * Number of characters consumed on success. Must be positive but - * no more than buf_len. + * no more than *buf_len*. * * **-EINVAL** if no valid digits were found or unsupported base * was provided. @@ -2616,16 +2621,16 @@ union bpf_attr { * given base and save the result in *res*. * * The string may begin with an arbitrary amount of white space - * (as determined by isspace(3)). + * (as determined by **isspace**\ (3)). * * Five least significant bits of *flags* encode base, other bits * are currently unused. * * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space strtoul(3). + * similar to user space **strtoul**\ (3). * Return * Number of characters consumed on success. Must be positive but - * no more than buf_len. + * no more than *buf_len*. * * **-EINVAL** if no valid digits were found or unsupported base * was provided. @@ -2634,26 +2639,26 @@ union bpf_attr { * * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) * Description - * Get a bpf-local-storage from a sk. + * Get a bpf-local-storage from a *sk*. * * Logically, it could be thought of getting the value from * a *map* with *sk* as the **key**. From this * perspective, the usage is not much different from - * **bpf_map_lookup_elem(map, &sk)** except this - * helper enforces the key must be a **bpf_fullsock()** - * and the map must be a BPF_MAP_TYPE_SK_STORAGE also. + * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this + * helper enforces the key must be a full socket and the map must + * be a **BPF_MAP_TYPE_SK_STORAGE** also. * * Underneath, the value is stored locally at *sk* instead of - * the map. The *map* is used as the bpf-local-storage **type**. - * The bpf-local-storage **type** (i.e. the *map*) is searched - * against all bpf-local-storages residing at sk. + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf-local-storages residing at *sk*. * - * An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be + * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used - * together with BPF_SK_STORAGE_GET_F_CREATE to specify + * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify * the initial value of a bpf-local-storage. If *value* is - * NULL, the new bpf-local-storage will be zero initialized. + * **NULL**, the new bpf-local-storage will be zero initialized. * Return * A bpf-local-storage pointer is returned on success. * @@ -2662,7 +2667,7 @@ union bpf_attr { * * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description - * Delete a bpf-local-storage from a sk. + * Delete a bpf-local-storage from a *sk*. * Return * 0 on success. * From ff1f28c03f6a7cb5ee5802288258c2fc07ed9b07 Mon Sep 17 00:00:00 2001 From: Kelsey Skunberg Date: Sun, 12 May 2019 01:29:18 -0600 Subject: [PATCH 37/48] selftests: bpf: Add files generated after build to .gitignore The following files are generated after building /selftests/bpf/ and should be added to .gitignore: - libbpf.pc - libbpf.so.* Signed-off-by: Kelsey Skunberg Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 41e8a689aa77..a877803e4ba8 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -32,3 +32,5 @@ test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user alu32 +libbpf.pc +libbpf.so.* From d7c4b3980c18e81c0470f5df6d96d832f446d26f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 10 May 2019 14:13:15 -0700 Subject: [PATCH 38/48] libbpf: detect supported kernel BTF features and sanitize BTF Depending on used versions of libbpf, Clang, and kernel, it's possible to have valid BPF object files with valid BTF information, that still won't load successfully due to Clang emitting newer BTF features (e.g., BTF_KIND_FUNC, .BTF.ext's line_info/func_info, BTF_KIND_DATASEC, etc), that are not yet supported by older kernel. This patch adds detection of BTF features and sanitizes BPF object's BTF by substituting various supported BTF kinds, which have compatible layout: - BTF_KIND_FUNC -> BTF_KIND_TYPEDEF - BTF_KIND_FUNC_PROTO -> BTF_KIND_ENUM - BTF_KIND_VAR -> BTF_KIND_INT - BTF_KIND_DATASEC -> BTF_KIND_STRUCT Replacement is done in such a way as to preserve as much information as possible (names, sizes, etc) where possible without violating kernel's validation rules. v2->v3: - remove duplicate #defines from libbpf_util.h v1->v2: - add internal libbpf_internal.h w/ common stuff - switch SK storage BTF to use new libbpf__probe_raw_btf() Reported-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 130 +++++++++++++++++++++++++++++++- tools/lib/bpf/libbpf_internal.h | 27 +++++++ tools/lib/bpf/libbpf_probes.c | 71 +++++++++-------- 3 files changed, 196 insertions(+), 32 deletions(-) create mode 100644 tools/lib/bpf/libbpf_internal.h diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 11a65db4b93f..7e3b79d7c25f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -44,6 +44,7 @@ #include "btf.h" #include "str_error.h" #include "libbpf_util.h" +#include "libbpf_internal.h" #ifndef EM_BPF #define EM_BPF 247 @@ -128,6 +129,10 @@ struct bpf_capabilities { __u32 name:1; /* v5.2: kernel support for global data sections. */ __u32 global_data:1; + /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ + __u32 btf_func:1; + /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ + __u32 btf_datasec:1; }; /* @@ -1021,6 +1026,74 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) return false; } +static void bpf_object__sanitize_btf(struct bpf_object *obj) +{ + bool has_datasec = obj->caps.btf_datasec; + bool has_func = obj->caps.btf_func; + struct btf *btf = obj->btf; + struct btf_type *t; + int i, j, vlen; + __u16 kind; + + if (!obj->btf || (has_func && has_datasec)) + return; + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + t = (struct btf_type *)btf__type_by_id(btf, i); + kind = BTF_INFO_KIND(t->info); + + if (!has_datasec && kind == BTF_KIND_VAR) { + /* replace VAR with INT */ + t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); + t->size = sizeof(int); + *(int *)(t+1) = BTF_INT_ENC(0, 0, 32); + } else if (!has_datasec && kind == BTF_KIND_DATASEC) { + /* replace DATASEC with STRUCT */ + struct btf_var_secinfo *v = (void *)(t + 1); + struct btf_member *m = (void *)(t + 1); + struct btf_type *vt; + char *name; + + name = (char *)btf__name_by_offset(btf, t->name_off); + while (*name) { + if (*name == '.') + *name = '_'; + name++; + } + + vlen = BTF_INFO_VLEN(t->info); + t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); + for (j = 0; j < vlen; j++, v++, m++) { + /* order of field assignments is important */ + m->offset = v->offset * 8; + m->type = v->type; + /* preserve variable name as member name */ + vt = (void *)btf__type_by_id(btf, v->type); + m->name_off = vt->name_off; + } + } else if (!has_func && kind == BTF_KIND_FUNC_PROTO) { + /* replace FUNC_PROTO with ENUM */ + vlen = BTF_INFO_VLEN(t->info); + t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); + t->size = sizeof(__u32); /* kernel enforced */ + } else if (!has_func && kind == BTF_KIND_FUNC) { + /* replace FUNC with TYPEDEF */ + t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); + } + } +} + +static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) +{ + if (!obj->btf_ext) + return; + + if (!obj->caps.btf_func) { + btf_ext__free(obj->btf_ext); + obj->btf_ext = NULL; + } +} + static int bpf_object__elf_collect(struct bpf_object *obj, int flags) { Elf *elf = obj->efile.elf; @@ -1164,8 +1237,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->btf = NULL; } else { err = btf__finalize_data(obj, obj->btf); - if (!err) + if (!err) { + bpf_object__sanitize_btf(obj); err = btf__load(obj->btf); + } if (err) { pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n", BTF_ELF_SEC, err); @@ -1187,6 +1262,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); obj->btf_ext = NULL; + } else { + bpf_object__sanitize_btf_ext(obj); } } } @@ -1556,12 +1633,63 @@ bpf_object__probe_global_data(struct bpf_object *obj) return 0; } +static int bpf_object__probe_btf_func(struct bpf_object *obj) +{ + const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + int res; + + res = libbpf__probe_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); + if (res < 0) + return res; + if (res > 0) + obj->caps.btf_func = 1; + return 0; +} + +static int bpf_object__probe_btf_datasec(struct bpf_object *obj) +{ + const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + int res; + + res = libbpf__probe_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); + if (res < 0) + return res; + if (res > 0) + obj->caps.btf_datasec = 1; + return 0; +} + static int bpf_object__probe_caps(struct bpf_object *obj) { int (*probe_fn[])(struct bpf_object *obj) = { bpf_object__probe_name, bpf_object__probe_global_data, + bpf_object__probe_btf_func, + bpf_object__probe_btf_datasec, }; int i, ret; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h new file mode 100644 index 000000000000..789e435b5900 --- /dev/null +++ b/tools/lib/bpf/libbpf_internal.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Internal libbpf helpers. + * + * Copyright (c) 2019 Facebook + */ + +#ifndef __LIBBPF_LIBBPF_INTERNAL_H +#define __LIBBPF_LIBBPF_INTERNAL_H + +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset) +#define BTF_PARAM_ENC(name, type) (name), (type) +#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) + +int libbpf__probe_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len); + +#endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index a2c64a9ce1a6..5e2aa83f637a 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -15,6 +15,7 @@ #include "bpf.h" #include "libbpf.h" +#include "libbpf_internal.h" static bool grep(const char *buffer, const char *pattern) { @@ -132,21 +133,43 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) return errno != EINVAL && errno != EOPNOTSUPP; } -static int load_btf(void) +int libbpf__probe_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len) { -#define BTF_INFO_ENC(kind, kind_flag, vlen) \ - ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) -#define BTF_TYPE_ENC(name, info, size_or_type) \ - (name), (info), (size_or_type) -#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ - ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) -#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ - BTF_INT_ENC(encoding, bits_offset, bits) -#define BTF_MEMBER_ENC(name, type, bits_offset) \ - (name), (type), (bits_offset) + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = types_len, + .str_off = types_len, + .str_len = str_len, + }; + int btf_fd, btf_len; + __u8 *raw_btf; - const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; + btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len; + raw_btf = malloc(btf_len); + if (!raw_btf) + return -ENOMEM; + + memcpy(raw_btf, &hdr, sizeof(hdr)); + memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); + memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); + + btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); + if (btf_fd < 0) { + free(raw_btf); + return 0; + } + + close(btf_fd); + free(raw_btf); + return 1; +} + +static int load_sk_storage_btf(void) +{ + const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l"; /* struct bpf_spin_lock { * int val; * }; @@ -155,7 +178,7 @@ static int load_btf(void) * struct bpf_spin_lock l; * }; */ - __u32 btf_raw_types[] = { + __u32 types[] = { /* int */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ /* struct bpf_spin_lock */ /* [2] */ @@ -166,23 +189,9 @@ static int load_btf(void) BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ }; - struct btf_header btf_hdr = { - .magic = BTF_MAGIC, - .version = BTF_VERSION, - .hdr_len = sizeof(struct btf_header), - .type_len = sizeof(btf_raw_types), - .str_off = sizeof(btf_raw_types), - .str_len = sizeof(btf_str_sec), - }; - __u8 raw_btf[sizeof(struct btf_header) + sizeof(btf_raw_types) + - sizeof(btf_str_sec)]; - memcpy(raw_btf, &btf_hdr, sizeof(btf_hdr)); - memcpy(raw_btf + sizeof(btf_hdr), btf_raw_types, sizeof(btf_raw_types)); - memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), - btf_str_sec, sizeof(btf_str_sec)); - - return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); + return libbpf__probe_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); } bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) @@ -222,7 +231,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) value_size = 8; max_entries = 0; map_flags = BPF_F_NO_PREALLOC; - btf_fd = load_btf(); + btf_fd = load_sk_storage_btf(); if (btf_fd < 0) return false; break; From e2f7fc0ac6957cabff4cecf6c721979b571af208 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Wed, 8 May 2019 18:08:58 +0200 Subject: [PATCH 39/48] bpf: fix undefined behavior in narrow load handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 31fd85816dbe ("bpf: permits narrower load from bpf program context fields") made the verifier add AND instructions to clear the unwanted bits with a mask when doing a narrow load. The mask is computed with (1 << size * 8) - 1 where "size" is the size of the narrow load. When doing a 4 byte load of a an 8 byte field the verifier shifts the literal 1 by 32 places to the left. This results in an overflow of a signed integer, which is an undefined behavior. Typically, the computed mask was zero, so the result of the narrow load ended up being zero too. Cast the literal to long long to avoid overflows. Note that narrow load of the 4 byte fields does not have the undefined behavior, because the load size can only be either 1 or 2 bytes, so shifting 1 by 8 or 16 places will not overflow it. And reading 4 bytes would not be a narrow load of a 4 bytes field. Fixes: 31fd85816dbe ("bpf: permits narrower load from bpf program context fields") Reviewed-by: Alban Crequy Reviewed-by: Iago López Galeiras Signed-off-by: Krzesimir Nowak Cc: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7b05e8938d5c..95f9354495ad 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7599,7 +7599,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->dst_reg, shift); insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, - (1 << size * 8) - 1); + (1ULL << size * 8) - 1); } } From da86f59f170df3f46ea5366a519522a2234d1094 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 13 May 2019 09:30:33 +0200 Subject: [PATCH 40/48] net: mvpp2: cls: Add missing NETIF_F_NTUPLE flag Now that the mvpp2 driver supports classification offloading, we must add the NETIF_F_NTUPLE to the features list. Since the current code doesn't allow disabling the feature, we don't set the flag in dev->hw_features. Fixes: 90b509b39ac9 ("net: mvpp2: cls: Add Classification offload support") Reported-by: Jakub Kicinski Acked-by: Jakub Kicinski Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 56d43d9b43ef..d38952eb7aa9 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5058,8 +5058,10 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO | NETIF_F_HW_VLAN_CTAG_FILTER; - if (mvpp22_rss_is_supported()) + if (mvpp22_rss_is_supported()) { dev->hw_features |= NETIF_F_RXHASH; + dev->features |= NETIF_F_NTUPLE; + } if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) { dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); From 3ee9ae74ba4a85c3b378a0e49ec1481655f54500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Mon, 13 May 2019 11:41:39 +0200 Subject: [PATCH 41/48] of_net: Fix missing of_find_device_by_node ref count drop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit of_find_device_by_node takes a reference to the embedded struct device which needs to be dropped after use. Fixes: d01f449c008a ("of_net: add NVMEM support to of_get_mac_address") Reported-by: kbuild test robot Reported-by: Julia Lawall Signed-off-by: Petr Štetiar Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/of/of_net.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c index a4b392a5406b..6f1be80e8c4e 100644 --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c @@ -60,10 +60,13 @@ static const void *of_get_mac_addr_nvmem(struct device_node *np) return ERR_PTR(-ENODEV); ret = nvmem_get_mac_address(&pdev->dev, &nvmem_mac); - if (ret) + if (ret) { + put_device(&pdev->dev); return ERR_PTR(ret); + } mac = devm_kmemdup(&pdev->dev, nvmem_mac, ETH_ALEN, GFP_KERNEL); + put_device(&pdev->dev); if (!mac) return ERR_PTR(-ENOMEM); From 5afcd14cfc7fed1bcc8abcee2cef82732772bfc2 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 13 May 2019 13:15:17 +0200 Subject: [PATCH 42/48] net: seeq: fix crash caused by not set dev.parent The old MIPS implementation of dma_cache_sync() didn't use the dev argument, but commit c9eb6172c328 ("dma-mapping: turn dma_cache_sync into a dma_map_ops method") changed that, so we now need to set dev.parent. Signed-off-by: Thomas Bogendoerfer Signed-off-by: David S. Miller --- drivers/net/ethernet/seeq/sgiseeq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 70cce63a6081..696037d5ac3d 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -735,6 +735,7 @@ static int sgiseeq_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); sp = netdev_priv(dev); /* Make private data page aligned */ From dffe7d2e04670ff98e4dacf258df30446e2e80d4 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Mon, 13 May 2019 15:41:45 +0900 Subject: [PATCH 43/48] net: phy: realtek: Replace phy functions with non-locked version in rtl8211e_config_init() After calling phy_select_page() and until calling phy_restore_page(), the mutex 'mdio_lock' is already locked, so the driver should use non-locked version of phy functions. Or there will be a deadlock with 'mdio_lock'. This replaces phy functions called from rtl8211e_config_init() to avoid the deadlock issue. Fixes: f81dadbcf7fd ("net: phy: realtek: Add rtl8211e rx/tx delays config") Signed-off-by: Kunihiko Hayashi Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 4988ccea684a..a669945eb829 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -217,12 +217,12 @@ static int rtl8211e_config_init(struct phy_device *phydev) if (oldpage < 0) goto err_restore_page; - ret = phy_write(phydev, RTL821x_EXT_PAGE_SELECT, 0xa4); + ret = __phy_write(phydev, RTL821x_EXT_PAGE_SELECT, 0xa4); if (ret) goto err_restore_page; - ret = phy_modify(phydev, 0x1c, RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, - val); + ret = __phy_modify(phydev, 0x1c, RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, + val); err_restore_page: return phy_restore_page(phydev, oldpage, ret); From 0ecfc7e1ff67899eec3020b4b6d5126f82319ed6 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Sun, 12 May 2019 23:12:37 +0200 Subject: [PATCH 44/48] net: meson: fixup g12a glue ephy id The phy id chosen by Amlogic is incorrectly set in the mdio mux and does not match the phy driver. It was not detected before because DT forces the use the correct driver for the internal PHY. Fixes: 7090425104db ("net: phy: add amlogic g12a mdio mux support") Reported-by: Qi Duan Signed-off-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux-meson-g12a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio-mux-meson-g12a.c b/drivers/net/phy/mdio-mux-meson-g12a.c index 6fa29ea8e2a3..6644762ff2ab 100644 --- a/drivers/net/phy/mdio-mux-meson-g12a.c +++ b/drivers/net/phy/mdio-mux-meson-g12a.c @@ -33,7 +33,7 @@ #define ETH_PLL_CTL7 0x60 #define ETH_PHY_CNTL0 0x80 -#define EPHY_G12A_ID 0x33000180 +#define EPHY_G12A_ID 0x33010180 #define ETH_PHY_CNTL1 0x84 #define PHY_CNTL1_ST_MODE GENMASK(2, 0) #define PHY_CNTL1_ST_PHYADD GENMASK(7, 3) From a9b8a2b39ce65df45687cf9ef648885c2a99fe75 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 10 May 2019 17:57:09 -0400 Subject: [PATCH 45/48] bonding: fix arp_validate toggling in active-backup mode There's currently a problem with toggling arp_validate on and off with an active-backup bond. At the moment, you can start up a bond, like so: modprobe bonding mode=1 arp_interval=100 arp_validate=0 arp_ip_targets=192.168.1.1 ip link set bond0 down echo "ens4f0" > /sys/class/net/bond0/bonding/slaves echo "ens4f1" > /sys/class/net/bond0/bonding/slaves ip link set bond0 up ip addr add 192.168.1.2/24 dev bond0 Pings to 192.168.1.1 work just fine. Now turn on arp_validate: echo 1 > /sys/class/net/bond0/bonding/arp_validate Pings to 192.168.1.1 continue to work just fine. Now when you go to turn arp_validate off again, the link falls flat on it's face: echo 0 > /sys/class/net/bond0/bonding/arp_validate dmesg ... [133191.911987] bond0: Setting arp_validate to none (0) [133194.257793] bond0: bond_should_notify_peers: slave ens4f0 [133194.258031] bond0: link status definitely down for interface ens4f0, disabling it [133194.259000] bond0: making interface ens4f1 the new active one [133197.330130] bond0: link status definitely down for interface ens4f1, disabling it [133197.331191] bond0: now running without any active interface! The problem lies in bond_options.c, where passing in arp_validate=0 results in bond->recv_probe getting set to NULL. This flies directly in the face of commit 3fe68df97c7f, which says we need to set recv_probe = bond_arp_recv, even if we're not using arp_validate. Said commit fixed this in bond_option_arp_interval_set, but missed that we can get to that same state in bond_option_arp_validate_set as well. One solution would be to universally set recv_probe = bond_arp_recv here as well, but I don't think bond_option_arp_validate_set has any business touching recv_probe at all, and that should be left to the arp_interval code, so we can just make things much tidier here. Fixes: 3fe68df97c7f ("bonding: always set recv_probe to bond_arp_rcv in arp monitor") CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index da1fc17295d9..b996967af8d9 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1098,13 +1098,6 @@ static int bond_option_arp_validate_set(struct bonding *bond, { netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n", newval->string, newval->value); - - if (bond->dev->flags & IFF_UP) { - if (!newval->value) - bond->recv_probe = NULL; - else if (bond->params.arp_interval) - bond->recv_probe = bond_arp_rcv; - } bond->params.arp_validate = newval->value; return 0; From b1c17a9a353878602fd5bfe9103e4afe5e9a3f96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 May 2019 09:38:55 -0700 Subject: [PATCH 46/48] flow_dissector: disable preemption around BPF calls Various things in eBPF really require us to disable preemption before running an eBPF program. syzbot reported : BUG: assuming atomic context at net/core/flow_dissector.c:737 in_atomic(): 0, irqs_disabled(): 0, pid: 24710, name: syz-executor.3 2 locks held by syz-executor.3/24710: #0: 00000000e81a4bf1 (&tfile->napi_mutex){+.+.}, at: tun_get_user+0x168e/0x3ff0 drivers/net/tun.c:1850 #1: 00000000254afebd (rcu_read_lock){....}, at: __skb_flow_dissect+0x1e1/0x4bb0 net/core/flow_dissector.c:822 CPU: 1 PID: 24710 Comm: syz-executor.3 Not tainted 5.1.0+ #6 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 __cant_sleep kernel/sched/core.c:6165 [inline] __cant_sleep.cold+0xa3/0xbb kernel/sched/core.c:6142 bpf_flow_dissect+0xfe/0x390 net/core/flow_dissector.c:737 __skb_flow_dissect+0x362/0x4bb0 net/core/flow_dissector.c:853 skb_flow_dissect_flow_keys_basic include/linux/skbuff.h:1322 [inline] skb_probe_transport_header include/linux/skbuff.h:2500 [inline] skb_probe_transport_header include/linux/skbuff.h:2493 [inline] tun_get_user+0x2cfe/0x3ff0 drivers/net/tun.c:1940 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2037 call_write_iter include/linux/fs.h:1872 [inline] do_iter_readv_writev+0x5fd/0x900 fs/read_write.c:693 do_iter_write fs/read_write.c:970 [inline] do_iter_write+0x184/0x610 fs/read_write.c:951 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1015 do_writev+0x15b/0x330 fs/read_write.c:1058 __do_sys_writev fs/read_write.c:1131 [inline] __se_sys_writev fs/read_write.c:1128 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1128 do_syscall_64+0x103/0x670 arch/x86/entry/common.c:298 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Petar Penkov Cc: Stanislav Fomichev Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9ca784c592ac..548f39dde307 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -734,7 +734,9 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; + preempt_disable(); result = BPF_PROG_RUN(prog, ctx); + preempt_enable(); flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, From a8577e1312668a1d0fc996a0e5b31b3233fd79b3 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 13 May 2019 16:16:36 +0300 Subject: [PATCH 47/48] net: ethernet: ti: netcp_ethss: fix build Fix reported build fail: ERROR: "cpsw_ale_flush_multicast" [drivers/net/ethernet/ti/keystone_netcp_ethss.ko] undefined! ERROR: "cpsw_ale_create" [drivers/net/ethernet/ti/keystone_netcp_ethss.ko] undefined! ERROR: "cpsw_ale_add_vlan" [drivers/net/ethernet/ti/keystone_netcp_ethss.ko] undefined! Fixes: 16f54164828b ("net: ethernet: ti: cpsw: drop CONFIG_TI_CPSW_ALE config option") Reported-by: kbuild test robot Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index c3f53a40b48f..ed12e1e5df2f 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -19,4 +19,4 @@ ti_cpsw-y := cpsw.o davinci_cpdma.o cpsw_ale.o cpsw_priv.o cpsw_sl.o cpsw_ethtoo obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o keystone_netcp-y := netcp_core.o cpsw_ale.o obj-$(CONFIG_TI_KEYSTONE_NETCP_ETHSS) += keystone_netcp_ethss.o -keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o +keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale.o From d4c26eb6e721683a0f93e346ce55bc8dc3cbb175 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 13 May 2019 13:06:39 +0000 Subject: [PATCH 48/48] net: ethernet: stmmac: dwmac-sun8i: enable support of unicast filtering When adding more MAC addresses to a dwmac-sun8i interface, the device goes directly in promiscuous mode. This is due to IFF_UNICAST_FLT missing flag. So since the hardware support unicast filtering, let's add IFF_UNICAST_FLT. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 195669f550f0..ba124a4da793 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1015,6 +1015,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) mac->mac = &sun8i_dwmac_ops; mac->dma = &sun8i_dwmac_dma_ops; + priv->dev->priv_flags |= IFF_UNICAST_FLT; + /* The loopback bit seems to be re-set when link change * Simply mask it each time * Speed 10/100/1000 are set in BIT(2)/BIT(3)