From c055d5b03bb4cb69d349d787c9787c0383abd8b2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 05:08:19 +0100 Subject: [PATCH 1/8] netfilter: bridge: query conntrack about skb dnat ask conntrack instead of storing ipv4 address in nf_bridge_info->data. Ths avoids the need to use ->data during NF_PRE_ROUTING. Only two functions that need ->data remain. These will be addressed in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 6 ------ net/bridge/br_netfilter.c | 27 +++++++++++++++++++++------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index bb39113ea596..de123d769ffc 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -54,12 +54,6 @@ static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) return 0; } -struct bridge_skb_cb { - union { - __be32 ipv4; - } daddr; -}; - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index b260a97275db..261fcd5a42d6 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -37,17 +37,16 @@ #include #include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + #include #include "br_private.h" #ifdef CONFIG_SYSCTL #include #endif -#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ - (skb->nf_bridge->data))->daddr.ipv4) -#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr) -#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr) - #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; @@ -322,6 +321,22 @@ free_skb: return 0; } +static bool dnat_took_place(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_untracked(ct)) + return false; + + return test_bit(IPS_DST_NAT_BIT, &ct->status); +#else + return false; +#endif +} + /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * @@ -625,7 +640,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_DROP; if (!setup_pre_routing(skb)) return NF_DROP; - store_orig_dstaddr(skb); + skb->protocol = htons(ETH_P_IP); NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, From e4bb9bcbfb7d67431dfd49860f62770a7f40193b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 10:36:48 +0100 Subject: [PATCH 2/8] netfilter: bridge: remove BRNF_STATE_BRIDGED flag Its not needed anymore since 2bf540b73ed5b ([NETFILTER]: bridge-netfilter: remove deferred hooks). Before this it was possible to have physoutdev set for locally generated packets -- this isn't the case anymore: BRNF_STATE_BRIDGED flag is set when we assign nf_bridge->physoutdev, so physoutdev != NULL means BRNF_STATE_BRIDGED is set. If physoutdev is NULL, then we are looking at locally-delivered and routed packet. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 1 - net/bridge/br_netfilter.c | 9 ++++++--- net/netfilter/xt_physdev.c | 3 +-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index de123d769ffc..ed0d3bf953c3 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -19,7 +19,6 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 -#define BRNF_BRIDGED 0x04 #define BRNF_NF_BRIDGE_PREROUTING 0x08 #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 261fcd5a42d6..bd2d24d1ff21 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -736,8 +736,6 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) return NF_DROP; - /* The physdev module checks on this */ - nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); @@ -857,7 +855,12 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; - if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) + /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in + * on a bridge, but was delivered locally and is now being routed: + * + * POST_ROUTING was already invoked from the ip stack. + */ + if (!nf_bridge || !nf_bridge->physoutdev) return NF_ACCEPT; if (!realoutdev) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index f440f57a452f..50a52043650f 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -56,8 +56,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) /* This only makes sense in the FORWARD and POSTROUTING chains */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && - (!!(nf_bridge->mask & BRNF_BRIDGED) ^ - !(info->invert & XT_PHYSDEV_OP_BRIDGED))) + (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) return false; if ((info->bitmask & XT_PHYSDEV_OP_ISIN && From 1ca9e41770cba46dcc7c2a9c6ac28350ed866695 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 16 Mar 2015 11:25:17 -0700 Subject: [PATCH 3/8] netfilter: Remove uses of seq_ return values The seq_printf/seq_puts/seq_putc return values, because they are frequently misused, will eventually be converted to void. See: commit 1f33c41c03da ("seq_file: Rename seq_overflow() to seq_has_overflowed() and make public") Miscellanea: o realign arguments Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +++- net/netfilter/nf_conntrack_acct.c | 8 +++++--- net/netfilter/nf_conntrack_expect.c | 4 +++- net/netfilter/nfnetlink_log.c | 12 +++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index a460a87e14f8..f0dfe92a00d6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -300,7 +300,9 @@ static int exp_seq_show(struct seq_file *s, void *v) __nf_ct_l3proto_find(exp->tuple.src.l3num), __nf_ct_l4proto_find(exp->tuple.src.l3num, exp->tuple.dst.protonum)); - return seq_putc(s, '\n'); + seq_putc(s, '\n'); + + return 0; } static const struct seq_operations exp_seq_ops = { diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index a4b5e2a435ac..45da11afa785 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -47,9 +47,11 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) return 0; counter = acct->counter; - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&counter[dir].packets), - (unsigned long long)atomic64_read(&counter[dir].bytes)); + seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); + + return 0; }; EXPORT_SYMBOL_GPL(seq_print_acct); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 91a1837acd0e..7a17070c5dab 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -561,7 +561,9 @@ static int exp_seq_show(struct seq_file *s, void *v) helper->expect_policy[expect->class].name); } - return seq_putc(s, '\n'); + seq_putc(s, '\n'); + + return 0; } static const struct seq_operations exp_seq_ops = { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11d85b3813f2..db5e3a80fc6d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -998,11 +998,13 @@ static int seq_show(struct seq_file *s, void *v) { const struct nfulnl_instance *inst = v; - return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", - inst->group_num, - inst->peer_portid, inst->qlen, - inst->copy_mode, inst->copy_range, - inst->flushtimeout, atomic_read(&inst->use)); + seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", + inst->group_num, + inst->peer_portid, inst->qlen, + inst->copy_mode, inst->copy_range, + inst->flushtimeout, atomic_read(&inst->use)); + + return 0; } static const struct seq_operations nful_seq_ops = { From ffdb210eb415501c289f6becafb54fe2f4535efa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 17 Mar 2015 19:53:23 +0100 Subject: [PATCH 4/8] netfilter: nf_tables: consolidate error path of nf_tables_newtable() Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ea51833c8f5a..a072d8769b9b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -687,11 +687,10 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, if (!try_module_get(afi->owner)) return -EAFNOSUPPORT; + err = -ENOMEM; table = kzalloc(sizeof(*table), GFP_KERNEL); - if (table == NULL) { - module_put(afi->owner); - return -ENOMEM; - } + if (table == NULL) + goto err1; nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); @@ -700,13 +699,16 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); - if (err < 0) { - kfree(table); - module_put(afi->owner); - return err; - } + if (err < 0) + goto err2; + list_add_tail_rcu(&table->list, &afi->tables); return 0; +err2: + kfree(table); +err1: + module_put(afi->owner); + return err; } static int nft_flush_table(struct nft_ctx *ctx) From 8d0451638ad3f7ccd5250c1dd90e06ad487b2703 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Mar 2015 20:55:31 +0100 Subject: [PATCH 5/8] netfilter: bridge: kill nf_bridge_pad The br_netfilter frag output function calls skb_cow_head() so in case it needs a larger headroom to e.g. re-add a previously stripped PPPOE or VLAN header things will still work (at cost of reallocation). We can then move nf_bridge_encap_header_len to br_netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 22 ---------------------- net/bridge/br_netfilter.c | 12 ++++++++++++ net/ipv4/ip_output.c | 5 +---- 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ed0d3bf953c3..2734977199ca 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -23,18 +23,6 @@ enum nf_br_hook_priorities { #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 -static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) -{ - switch (skb->protocol) { - case __cpu_to_be16(ETH_P_8021Q): - return VLAN_HLEN; - case __cpu_to_be16(ETH_P_PPP_SES): - return PPPOE_SES_HLEN; - default: - return 0; - } -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) @@ -44,15 +32,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) int br_handle_frame_finish(struct sk_buff *skb); -/* This is called by the IP fragmenting code and it ensures there is - * enough room for the encapsulating header (if there is one). */ -static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) -{ - if (skb->nf_bridge) - return nf_bridge_encap_header_len(skb); - return 0; -} - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -62,7 +41,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) } #else -#define nf_bridge_pad(skb) (0) #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bd2d24d1ff21..f3884a1b942f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -153,6 +153,18 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) return nf_bridge; } +static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __cpu_to_be16(ETH_P_8021Q): + return VLAN_HLEN; + case __cpu_to_be16(ETH_P_PPP_SES): + return PPPOE_SES_HLEN; + default: + return 0; + } +} + static inline void nf_bridge_push_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a7aea2048a0d..90b49e88e84a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -636,10 +636,7 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ - /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header - */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); + ll_rs = LL_RESERVED_SPACE(rt->dst.dev); /* * Fragment the datagram. From 16c45eda96038aae848b6cfd42e2bf4b5e80f365 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 21 Mar 2015 15:19:14 +0000 Subject: [PATCH 6/8] netfilter: nft_rbtree: fix locking Fix a race condition and unnecessary locking: * the root rb_node must only be accessed under the lock in nft_rbtree_lookup() * the lock is not needed in lookup functions in netlink context Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_rbtree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 46214f245665..2c75361077f7 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -37,10 +37,11 @@ static bool nft_rbtree_lookup(const struct nft_set *set, { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; - const struct rb_node *parent = priv->root.rb_node; + const struct rb_node *parent; int d; spin_lock_bh(&nft_rbtree_lock); + parent = priv->root.rb_node; while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -158,7 +159,6 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) struct nft_rbtree_elem *rbe; int d; - spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -173,11 +173,9 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem->data, rbe->data); elem->flags = rbe->flags; - spin_unlock_bh(&nft_rbtree_lock); return 0; } } - spin_unlock_bh(&nft_rbtree_lock); return -ENOENT; } From 55df35d22fe3433032d82b8c67dfd283cb071953 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 21 Mar 2015 15:19:16 +0000 Subject: [PATCH 7/8] netfilter: nf_tables: reject NFT_SET_ELEM_INTERVAL_END flag for non-interval sets Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a072d8769b9b..f7e3371ce856 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3138,6 +3138,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + elem.flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; } if (set->flags & NFT_SET_MAP) { From e35158e40110270600698f19bda5e21d8ce709d7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 21 Mar 2015 20:20:23 +0100 Subject: [PATCH 8/8] netfilter: ip6t_REJECT: check for IP6T_F_PROTO Make sure IP6T_F_PROTO is set to enforce layer 4 protocol matching from the ip6_tables core. Suggested-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_REJECT.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 544b0a9da1b5..12331efd49cf 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -83,7 +83,8 @@ static int reject_tg6_check(const struct xt_tgchk_param *par) return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ipv6.proto != IPPROTO_TCP || + if (!(e->ipv6.flags & IP6T_F_PROTO) || + e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { pr_info("TCP_RESET illegal for non-tcp\n"); return -EINVAL;