From e4781421e883340b796da5a724bda7226817990b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Dec 2016 21:57:02 +0100 Subject: [PATCH 01/27] netfilter: merge udp and udplite conntrack helpers udplite was copied from udp, they are virtually 100% identical. This adds udplite tracker to udp instead, removes udplite module, and then makes the udplite tracker builtin. udplite will then simply re-use udp timeout settings. It makes little sense to add separate sysctls, nowadays we have fine-grained timeout policy support via the CT target. old: text data bss dec hex filename 1633 672 0 2305 901 nf_conntrack_proto_udp.o 1756 672 0 2428 97c nf_conntrack_proto_udplite.o 69526 17937 268 87731 156b3 nf_conntrack.ko new: text data bss dec hex filename 2442 1184 0 3626 e2a nf_conntrack_proto_udp.o 68565 17721 268 86554 1521a nf_conntrack.ko Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../net/netfilter/ipv4/nf_conntrack_ipv4.h | 1 + .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 1 + include/net/netns/conntrack.h | 16 - net/netfilter/Makefile | 1 - net/netfilter/nf_conntrack_proto_udp.c | 123 +++++++ net/netfilter/nf_conntrack_proto_udplite.c | 324 ------------------ 6 files changed, 125 insertions(+), 341 deletions(-) delete mode 100644 net/netfilter/nf_conntrack_proto_udplite.c diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 919e4e8af327..6ff32815641b 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -14,6 +14,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index eaea968f8657..c59b82456f89 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -5,6 +5,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index cf799fc3fdec..17724c62de97 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -69,19 +69,6 @@ struct nf_sctp_net { }; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -enum udplite_conntrack { - UDPLITE_CT_UNREPLIED, - UDPLITE_CT_REPLIED, - UDPLITE_CT_MAX -}; - -struct nf_udplite_net { - struct nf_proto_net pn; - unsigned int timeouts[UDPLITE_CT_MAX]; -}; -#endif - struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -94,9 +81,6 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - struct nf_udplite_net udplite; -#endif }; struct ct_pcpu { diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ca30d1960f1d..bf5c577113b6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,7 +7,6 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o -nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 20f35ed68030..ae63944c9dc4 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -108,6 +108,59 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +static int udplite_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + u8 pf, unsigned int hooknum) +{ + unsigned int udplen = skb->len - dataoff; + const struct udphdr *hdr; + struct udphdr _hdr; + unsigned int cscov; + + /* Header is too small? */ + hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (!hdr) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: short packet "); + return -NF_ACCEPT; + } + + cscov = ntohs(hdr->len); + if (cscov == 0) { + cscov = udplen; + } else if (cscov < sizeof(*hdr) || cscov > udplen) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: invalid checksum coverage "); + return -NF_ACCEPT; + } + + /* UDPLITE mandates checksums */ + if (!hdr->check) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: checksum missing "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. */ + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && + nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, + pf)) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: bad UDPLite checksum "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} +#endif + static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, @@ -290,6 +343,41 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +{ + .l3proto = PF_INET, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .allow_clash = true, + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .packet = udp_packet, + .get_timeouts = udp_get_timeouts, + .new = udp_new, + .error = udplite_error, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, + .nla_policy = nf_ct_port_nla_policy, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, +}; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); +#endif + struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, @@ -322,3 +410,38 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); + +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +{ + .l3proto = PF_INET6, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .allow_clash = true, + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .packet = udp_packet, + .get_timeouts = udp_get_timeouts, + .new = udp_new, + .error = udplite_error, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, + .nla_policy = nf_ct_port_nla_policy, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, +}; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); +#endif diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c deleted file mode 100644 index c35f7bf05d8c..000000000000 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ /dev/null @@ -1,324 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team - * (C) 2007 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { - [UDPLITE_CT_UNREPLIED] = 30*HZ, - [UDPLITE_CT_REPLIED] = 180*HZ, -}; - -static inline struct nf_udplite_net *udplite_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.udplite; -} - -static bool udplite_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, - struct nf_conntrack_tuple *tuple) -{ - const struct udphdr *hp; - struct udphdr _hdr; - - /* Actually only need first 4 bytes to get ports. */ - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); - if (hp == NULL) - return false; - - tuple->src.u.udp.port = hp->source; - tuple->dst.u.udp.port = hp->dest; - return true; -} - -static bool udplite_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) -{ - tuple->src.u.udp.port = orig->dst.u.udp.port; - tuple->dst.u.udp.port = orig->src.u.udp.port; - return true; -} - -/* Print out the per-protocol part of the tuple. */ -static void udplite_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); -} - -static unsigned int *udplite_get_timeouts(struct net *net) -{ - return udplite_pernet(net)->timeouts; -} - -/* Returns verdict for packet, and may modify conntracktype */ -static int udplite_packet(struct nf_conn *ct, - const struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, - unsigned int *timeouts) -{ - /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDPLITE_CT_REPLIED]); - /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } else { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDPLITE_CT_UNREPLIED]); - } - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff, unsigned int *timeouts) -{ - return true; -} - -static int udplite_error(struct net *net, struct nf_conn *tmpl, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info *ctinfo, - u_int8_t pf, - unsigned int hooknum) -{ - unsigned int udplen = skb->len - dataoff; - const struct udphdr *hdr; - struct udphdr _hdr; - unsigned int cscov; - - /* Header is too small? */ - hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); - if (hdr == NULL) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: short packet "); - return -NF_ACCEPT; - } - - cscov = ntohs(hdr->len); - if (cscov == 0) - cscov = udplen; - else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: invalid checksum coverage "); - return -NF_ACCEPT; - } - - /* UDPLITE mandates checksums */ - if (!hdr->check) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: checksum missing "); - return -NF_ACCEPT; - } - - /* Checksum invalid? Ignore. */ - if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && - nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, - pf)) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: bad UDPLite checksum "); - return -NF_ACCEPT; - } - - return NF_ACCEPT; -} - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - -#include -#include - -static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], - struct net *net, void *data) -{ - unsigned int *timeouts = data; - struct nf_udplite_net *un = udplite_pernet(net); - - /* set default timeouts for UDPlite. */ - timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; - timeouts[UDPLITE_CT_REPLIED] = un->timeouts[UDPLITE_CT_REPLIED]; - - if (tb[CTA_TIMEOUT_UDPLITE_UNREPLIED]) { - timeouts[UDPLITE_CT_UNREPLIED] = - ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_UNREPLIED])) * HZ; - } - if (tb[CTA_TIMEOUT_UDPLITE_REPLIED]) { - timeouts[UDPLITE_CT_REPLIED] = - ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_REPLIED])) * HZ; - } - return 0; -} - -static int -udplite_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) -{ - const unsigned int *timeouts = data; - - if (nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_UNREPLIED, - htonl(timeouts[UDPLITE_CT_UNREPLIED] / HZ)) || - nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_REPLIED, - htonl(timeouts[UDPLITE_CT_REPLIED] / HZ))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -ENOSPC; -} - -static const struct nla_policy -udplite_timeout_nla_policy[CTA_TIMEOUT_UDPLITE_MAX+1] = { - [CTA_TIMEOUT_UDPLITE_UNREPLIED] = { .type = NLA_U32 }, - [CTA_TIMEOUT_UDPLITE_REPLIED] = { .type = NLA_U32 }, -}; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - -#ifdef CONFIG_SYSCTL -static struct ctl_table udplite_sysctl_table[] = { - { - .procname = "nf_conntrack_udplite_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_udplite_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_udplite_net *un) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(udplite_sysctl_table, - sizeof(udplite_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED]; - pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED]; -#endif - return 0; -} - -static int udplite_init_net(struct net *net, u_int16_t proto) -{ - struct nf_udplite_net *un = udplite_pernet(net); - struct nf_proto_net *pn = &un->pn; - - if (!pn->users) { - int i; - - for (i = 0 ; i < UDPLITE_CT_MAX; i++) - un->timeouts[i] = udplite_timeouts[i]; - } - - return udplite_kmemdup_sysctl_table(pn, un); -} - -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = -{ - .l3proto = PF_INET, - .l4proto = IPPROTO_UDPLITE, - .name = "udplite", - .allow_clash = true, - .pkt_to_tuple = udplite_pkt_to_tuple, - .invert_tuple = udplite_invert_tuple, - .print_tuple = udplite_print_tuple, - .packet = udplite_packet, - .get_timeouts = udplite_get_timeouts, - .new = udplite_new, - .error = udplite_error, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - .ctnl_timeout = { - .nlattr_to_obj = udplite_timeout_nlattr_to_obj, - .obj_to_nlattr = udplite_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, - .obj_size = sizeof(unsigned int) * - CTA_TIMEOUT_UDPLITE_MAX, - .nla_policy = udplite_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udplite_init_net, -}; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); - -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = -{ - .l3proto = PF_INET6, - .l4proto = IPPROTO_UDPLITE, - .name = "udplite", - .allow_clash = true, - .pkt_to_tuple = udplite_pkt_to_tuple, - .invert_tuple = udplite_invert_tuple, - .print_tuple = udplite_print_tuple, - .packet = udplite_packet, - .get_timeouts = udplite_get_timeouts, - .new = udplite_new, - .error = udplite_error, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - .ctnl_timeout = { - .nlattr_to_obj = udplite_timeout_nlattr_to_obj, - .obj_to_nlattr = udplite_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, - .obj_size = sizeof(unsigned int) * - CTA_TIMEOUT_UDPLITE_MAX, - .nla_policy = udplite_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udplite_init_net, -}; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); From 9700ba805b4f58d26c4621ad6ba6b0e632e7a04b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Dec 2016 21:57:03 +0100 Subject: [PATCH 02/27] netfilter: nat: merge udp and udplite helpers udplite nat was copied from udp nat, they are virtually 100% identical. Not really surprising given udplite is just udp with partial csum coverage. old: text data bss dec hex filename 11606 1457 210 13273 33d9 nf_nat.ko 330 0 2 332 14c nf_nat_proto_udp.o 276 0 2 278 116 nf_nat_proto_udplite.o new: text data bss dec hex filename 11598 1457 210 13265 33d1 nf_nat.ko 640 0 4 644 284 nf_nat_proto_udp.o Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Makefile | 1 - net/netfilter/nf_nat_proto_udp.c | 78 +++++++++++++++++++++++----- net/netfilter/nf_nat_proto_udplite.c | 73 -------------------------- 3 files changed, 66 insertions(+), 86 deletions(-) delete mode 100644 net/netfilter/nf_nat_proto_udplite.c diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index bf5c577113b6..6b3034f12661 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -46,7 +46,6 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ # NAT protocols (nf_nat) nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o -nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index b1e627227b6e..edd4a77dc09a 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -30,20 +30,15 @@ udp_unique_tuple(const struct nf_nat_l3proto *l3proto, &udp_port_rover); } -static bool -udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) +static void +__udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, struct udphdr *hdr, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, bool do_csum) { - struct udphdr *hdr; __be16 *portptr, newport; - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - hdr = (struct udphdr *)(skb->data + hdroff); - if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src port */ newport = tuple->src.u.udp.port; @@ -53,7 +48,7 @@ udp_manip_pkt(struct sk_buff *skb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + if (do_csum) { l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, @@ -62,9 +57,68 @@ udp_manip_pkt(struct sk_buff *skb, hdr->check = CSUM_MANGLED_0; } *portptr = newport; +} + +static bool udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + bool do_csum; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; + + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum); return true; } +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE +static u16 udplite_port_rover; + +static bool udplite_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true); + return true; +} + +static void +udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udplite_port_rover); +} + +const struct nf_nat_l4proto nf_nat_l4proto_udplite = { + .l4proto = IPPROTO_UDPLITE, + .manip_pkt = udplite_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = udplite_unique_tuple, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; +#endif /* CONFIG_NF_NAT_PROTO_UDPLITE */ + const struct nf_nat_l4proto nf_nat_l4proto_udp = { .l4proto = IPPROTO_UDP, .manip_pkt = udp_manip_pkt, diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c deleted file mode 100644 index 366bfbfd82a1..000000000000 --- a/net/netfilter/nf_nat_proto_udplite.c +++ /dev/null @@ -1,73 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include -#include -#include - -static u16 udplite_port_rover; - -static void -udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &udplite_port_rover); -} - -static bool -udplite_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct udphdr *hdr; - __be16 *portptr, newport; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of source port */ - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst port */ - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - - l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - - *portptr = newport; - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_udplite = { - .l4proto = IPPROTO_UDPLITE, - .manip_pkt = udplite_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = udplite_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; From 237bab6611c607a9e63d50164609923feb8b83b3 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Dec 2016 19:58:58 +0800 Subject: [PATCH 03/27] netfilter: nf_tables: add missing descriptions in nft_ct_keys We missed to add descriptions about NFT_CT_LABELS, NFT_CT_PKTS and NFT_CT_BYTES, now add it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 881d49e94569..5726f90bfc2f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -860,6 +860,9 @@ enum nft_rt_attributes { * @NFT_CT_PROTOCOL: conntrack layer 4 protocol * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + * @NFT_CT_LABELS: conntrack labels + * @NFT_CT_PKTS: conntrack packets + * @NFT_CT_BYTES: conntrack bytes */ enum nft_ct_keys { NFT_CT_STATE, From 949a358418aae397d7cf1622aa6515eca766b9e7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Dec 2016 19:58:59 +0800 Subject: [PATCH 04/27] netfilter: nft_ct: add average bytes per packet support Similar to xt_connbytes, user can match how many average bytes per packet a connection has transferred so far. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_ct.c | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 5726f90bfc2f..b00a05d1ee56 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -863,6 +863,7 @@ enum nft_rt_attributes { * @NFT_CT_LABELS: conntrack labels * @NFT_CT_PKTS: conntrack packets * @NFT_CT_BYTES: conntrack bytes + * @NFT_CT_AVGPKT: conntrack average bytes per packet */ enum nft_ct_keys { NFT_CT_STATE, @@ -881,6 +882,7 @@ enum nft_ct_keys { NFT_CT_LABELS, NFT_CT_PKTS, NFT_CT_BYTES, + NFT_CT_AVGPKT, }; /** diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index e6baeaebe653..d774d7823688 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -129,6 +129,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, &count, sizeof(count)); return; } + case NFT_CT_AVGPKT: { + const struct nf_conn_acct *acct = nf_conn_acct_find(ct); + u64 avgcnt = 0, bcnt = 0, pcnt = 0; + + if (acct) { + pcnt = nft_ct_get_eval_counter(acct->counter, + NFT_CT_PKTS, priv->dir); + bcnt = nft_ct_get_eval_counter(acct->counter, + NFT_CT_BYTES, priv->dir); + if (pcnt != 0) + avgcnt = div64_u64(bcnt, pcnt); + } + + memcpy(dest, &avgcnt, sizeof(avgcnt)); + return; + } case NFT_CT_L3PROTOCOL: *dest = nf_ct_l3num(ct); return; @@ -316,6 +332,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; case NFT_CT_BYTES: case NFT_CT_PKTS: + case NFT_CT_AVGPKT: /* no direction? return sum of original + reply */ if (tb[NFTA_CT_DIRECTION] == NULL) priv->dir = IP_CT_DIR_MAX; @@ -346,7 +363,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS) + if (priv->key == NFT_CT_BYTES || + priv->key == NFT_CT_PKTS || + priv->key == NFT_CT_AVGPKT) nf_ct_set_acct(ctx->net, true); return 0; @@ -445,6 +464,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) break; case NFT_CT_BYTES: case NFT_CT_PKTS: + case NFT_CT_AVGPKT: if (priv->dir < IP_CT_DIR_MAX && nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) goto nla_put_failure; From 300ae149468f440a2629fa8b33d0ce1e860d479f Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 2 Jan 2017 13:29:40 +0100 Subject: [PATCH 05/27] netfilter: select LIBCRC32C together with SCTP conntrack nf_conntrack needs to compute crc32c when dealing with SCTP packets. Moreover, NF_NAT_PROTO_SCTP (currently selecting LIBCRC32C) can be enabled only if conntrack support for SCTP is enabled. Therefore, move enabling of kernel support for crc32c so that it is selected when NF_CT_PROTO_SCTP=y. Signed-off-by: Davide Caratti Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 63729b489c2c..6d425e355bf5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -162,6 +162,7 @@ config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED default y + select LIBCRC32C help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. @@ -397,7 +398,6 @@ config NF_NAT_PROTO_SCTP bool default NF_NAT && NF_CT_PROTO_SCTP depends on NF_NAT && NF_CT_PROTO_SCTP - select LIBCRC32C config NF_NAT_AMANDA tristate From cf6e007eef83476c5d541453d84e08b07befe124 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 2 Jan 2017 13:29:41 +0100 Subject: [PATCH 06/27] netfilter: conntrack: validate SCTP crc32c in PREROUTING implement sctp_error to let nf_conntrack_in validate crc32c on the packet transport header. Assign skb->ip_summed to CHECKSUM_UNNECESSARY and return NF_ACCEPT in case of successful validation; otherwise, return -NF_ACCEPT to let netfilter skip connection tracking, like other protocols do. Besides preventing corrupted packets from matching conntrack entries, this fixes functionality of REJECT target: it was not generating any ICMP upon reception of SCTP packets, because it was computing RFC 1624 checksum on the packet and systematically mismatching crc32c in the SCTP header. Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_sctp.c | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index a0efde38da44..44a647418948 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -22,7 +22,9 @@ #include #include #include +#include +#include #include #include #include @@ -505,6 +507,34 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, + u8 pf, unsigned int hooknum) +{ + const struct sctphdr *sh; + struct sctphdr _sctph; + const char *logmsg; + + sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); + if (!sh) { + logmsg = "nf_ct_sctp: short packet "; + goto out_invalid; + } + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && + skb->ip_summed == CHECKSUM_NONE) { + if (sh->checksum != sctp_compute_cksum(skb, dataoff)) { + logmsg = "nf_ct_sctp: bad CRC "; + goto out_invalid; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + return NF_ACCEPT; +out_invalid: + if (LOG_INVALID(net, IPPROTO_SCTP)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg); + return -NF_ACCEPT; +} + #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include @@ -752,6 +782,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, + .error = sctp_error, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, @@ -786,6 +817,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, + .error = sctp_error, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, From 4cc4b72c136a45aeccd86f66b0859b148b47d881 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 20 Dec 2016 22:02:13 +0800 Subject: [PATCH 07/27] netfilter: xt_connlimit: use rb_entry() To make the code clearer, use rb_entry() instead of container_of() to deal with rbtree. Signed-off-by: Geliang Tang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connlimit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 2aff2b7c4689..660b61dbd776 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -218,7 +218,7 @@ count_tree(struct net *net, struct rb_root *root, int diff; bool addit; - rbconn = container_of(*rbnode, struct xt_connlimit_rb, node); + rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); parent = *rbnode; diff = same_source_net(addr, mask, &rbconn->addr, family); @@ -398,7 +398,7 @@ static void destroy_tree(struct rb_root *r) struct rb_node *node; while ((node = rb_first(r)) != NULL) { - rbconn = container_of(node, struct xt_connlimit_rb, node); + rbconn = rb_entry(node, struct xt_connlimit_rb, node); rb_erase(node, r); From f32815d21d4d8287336fb9cef4d2d9e0866214c2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:40 -0500 Subject: [PATCH 08/27] xtables: add xt_match, xt_target and data copy_to_user functions xt_entry_target, xt_entry_match and their private data may contain kernel data. Introduce helper functions xt_match_to_user, xt_target_to_user and xt_data_to_user that copy only the expected fields. These replace existing logic that calls copy_to_user on entire structs, then overwrites select fields. Private data is defined in xt_match and xt_target. All matches and targets that maintain kernel data store this at the tail of their private structure. Extend xt_match and xt_target with .usersize to limit how many bytes of data are copied. The remainder is cleared. If compatsize is specified, usersize can only safely be used if all fields up to usersize use platform-independent types. Otherwise, the compat_to_user callback must be defined. This patch does not yet enable the support logic. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 9 +++++ net/netfilter/x_tables.c | 54 ++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5117e4d2ddfa..be378cf47fcc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -167,6 +167,7 @@ struct xt_match { const char *table; unsigned int matchsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -207,6 +208,7 @@ struct xt_target { const char *table; unsigned int targetsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -287,6 +289,13 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +int xt_match_to_user(const struct xt_entry_match *m, + struct xt_entry_match __user *u); +int xt_target_to_user(const struct xt_entry_target *t, + struct xt_entry_target __user *u); +int xt_data_to_user(void __user *dst, const void *src, + int usersize, int size); + void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2ff499680cc6..feccf527abdd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -262,6 +262,60 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) } EXPORT_SYMBOL_GPL(xt_request_find_target); + +static int xt_obj_to_user(u16 __user *psize, u16 size, + void __user *pname, const char *name, + u8 __user *prev, u8 rev) +{ + if (put_user(size, psize)) + return -EFAULT; + if (copy_to_user(pname, name, strlen(name) + 1)) + return -EFAULT; + if (put_user(rev, prev)) + return -EFAULT; + + return 0; +} + +#define XT_OBJ_TO_USER(U, K, TYPE, C_SIZE) \ + xt_obj_to_user(&U->u.TYPE##_size, C_SIZE ? : K->u.TYPE##_size, \ + U->u.user.name, K->u.kernel.TYPE->name, \ + &U->u.user.revision, K->u.kernel.TYPE->revision) + +int xt_data_to_user(void __user *dst, const void *src, + int usersize, int size) +{ + usersize = usersize ? : size; + if (copy_to_user(dst, src, usersize)) + return -EFAULT; + if (usersize != size && clear_user(dst + usersize, size - usersize)) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL_GPL(xt_data_to_user); + +#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ + xt_data_to_user(U->data, K->data, \ + K->u.kernel.TYPE->usersize, \ + C_SIZE ? : K->u.kernel.TYPE->TYPE##size) + +int xt_match_to_user(const struct xt_entry_match *m, + struct xt_entry_match __user *u) +{ + return XT_OBJ_TO_USER(u, m, match, 0) || + XT_DATA_TO_USER(u, m, match, 0); +} +EXPORT_SYMBOL_GPL(xt_match_to_user); + +int xt_target_to_user(const struct xt_entry_target *t, + struct xt_entry_target __user *u) +{ + return XT_OBJ_TO_USER(u, t, target, 0) || + XT_DATA_TO_USER(u, t, target, 0); +} +EXPORT_SYMBOL_GPL(xt_target_to_user); + static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_match *m; From f77bc5b23fb1af51fc0faa8a479dea8969eb5079 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:41 -0500 Subject: [PATCH 09/27] iptables: use match, target and data copy_to_user helpers Convert iptables to copying entries, matches and targets one by one, using the xt_match_to_user and xt_target_to_user helper functions. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ip_tables.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 91656a1d8fbd..384b85713e06 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -826,10 +826,6 @@ copy_entries_to_user(unsigned int total_size, return PTR_ERR(counters); loc_cpu_entry = private->entries; - if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { - ret = -EFAULT; - goto free_counters; - } /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ @@ -839,6 +835,10 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = (struct ipt_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } if (copy_to_user(userptr + off + offsetof(struct ipt_entry, counters), &counters[num], @@ -852,23 +852,14 @@ copy_entries_to_user(unsigned int total_size, i += m->u.match_size) { m = (void *)e + i; - if (copy_to_user(userptr + off + i - + offsetof(struct xt_entry_match, - u.user.name), - m->u.kernel.match->name, - strlen(m->u.kernel.match->name)+1) - != 0) { + if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ipt_get_target_c(e); - if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct xt_entry_target, - u.user.name), - t->u.kernel.target->name, - strlen(t->u.kernel.target->name)+1) != 0) { + if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } From e47ddb2c4691fd2bd8d25745ecb6848408899757 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:42 -0500 Subject: [PATCH 10/27] ip6tables: use match, target and data copy_to_user helpers Convert ip6tables to copying entries, matches and targets one by one, using the xt_match_to_user and xt_target_to_user helper functions. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6_tables.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 25a022d41a70..1e15c54fd5e2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -855,10 +855,6 @@ copy_entries_to_user(unsigned int total_size, return PTR_ERR(counters); loc_cpu_entry = private->entries; - if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { - ret = -EFAULT; - goto free_counters; - } /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ @@ -868,6 +864,10 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = (struct ip6t_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], @@ -881,23 +881,14 @@ copy_entries_to_user(unsigned int total_size, i += m->u.match_size) { m = (void *)e + i; - if (copy_to_user(userptr + off + i - + offsetof(struct xt_entry_match, - u.user.name), - m->u.kernel.match->name, - strlen(m->u.kernel.match->name)+1) - != 0) { + if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ip6t_get_target_c(e); - if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct xt_entry_target, - u.user.name), - t->u.kernel.target->name, - strlen(t->u.kernel.target->name)+1) != 0) { + if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } From 244b531bee2bdcfcd35ca2fff9cc7d073b3aa060 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:43 -0500 Subject: [PATCH 11/27] arptables: use match, target and data copy_to_user helpers Convert arptables to copying entries, matches and targets one by one, using the xt_match_to_user and xt_target_to_user helper functions. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a467e1236c43..6241a81fd7f5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -677,11 +677,6 @@ static int copy_entries_to_user(unsigned int total_size, return PTR_ERR(counters); loc_cpu_entry = private->entries; - /* ... then copy entire thing ... */ - if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { - ret = -EFAULT; - goto free_counters; - } /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ @@ -689,6 +684,10 @@ static int copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = (struct arpt_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } if (copy_to_user(userptr + off + offsetof(struct arpt_entry, counters), &counters[num], @@ -698,11 +697,7 @@ static int copy_entries_to_user(unsigned int total_size, } t = arpt_get_target_c(e); - if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct xt_entry_target, - u.user.name), - t->u.kernel.target->name, - strlen(t->u.kernel.target->name)+1) != 0) { + if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } From b5040f6c33a51e6e1fddab75baf0f45d4943cde4 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:44 -0500 Subject: [PATCH 12/27] ebtables: use match, target and data copy_to_user helpers Convert ebtables to copying entries, matches and targets one by one. The solution is analogous to that of generic xt_(match|target)_to_user helpers, but is applied to different structs. Convert existing helpers ebt_make_XXXname helpers that overwrite fields of an already copy_to_user'd struct with ebt_XXX_to_user helpers that copy all relevant fields of the struct from scratch. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 80 ++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 537e3d506fc2..79b69917f521 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1346,56 +1346,72 @@ static int update_counters(struct net *net, const void __user *user, hlp.num_counters, user, len); } -static inline int ebt_make_matchname(const struct ebt_entry_match *m, - const char *base, char __user *ubase) +static inline int ebt_obj_to_user(char __user *um, const char *_name, + const char *data, int entrysize, + int usersize, int datasize) { - char __user *hlp = ubase + ((char *)m - base); - char name[EBT_FUNCTION_MAXNAMELEN] = {}; + char name[EBT_FUNCTION_MAXNAMELEN] = {0}; /* ebtables expects 32 bytes long names but xt_match names are 29 bytes * long. Copy 29 bytes and fill remaining bytes with zeroes. */ - strlcpy(name, m->u.match->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) + strlcpy(name, _name, sizeof(name)); + if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) || + put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) || + xt_data_to_user(um + entrysize, data, usersize, datasize)) return -EFAULT; + return 0; } -static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, - const char *base, char __user *ubase) +static inline int ebt_match_to_user(const struct ebt_entry_match *m, + const char *base, char __user *ubase) { - char __user *hlp = ubase + ((char *)w - base); - char name[EBT_FUNCTION_MAXNAMELEN] = {}; - - strlcpy(name, w->u.watcher->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) - return -EFAULT; - return 0; + return ebt_obj_to_user(ubase + ((char *)m - base), + m->u.match->name, m->data, sizeof(*m), + m->u.match->usersize, m->match_size); } -static inline int ebt_make_names(struct ebt_entry *e, const char *base, - char __user *ubase) +static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w, + const char *base, char __user *ubase) +{ + return ebt_obj_to_user(ubase + ((char *)w - base), + w->u.watcher->name, w->data, sizeof(*w), + w->u.watcher->usersize, w->watcher_size); +} + +static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base, + char __user *ubase) { int ret; char __user *hlp; const struct ebt_entry_target *t; - char name[EBT_FUNCTION_MAXNAMELEN] = {}; - if (e->bitmask == 0) + if (e->bitmask == 0) { + /* special case !EBT_ENTRY_OR_ENTRIES */ + if (copy_to_user(ubase + ((char *)e - base), e, + sizeof(struct ebt_entries))) + return -EFAULT; return 0; + } + + if (copy_to_user(ubase + ((char *)e - base), e, sizeof(*e))) + return -EFAULT; hlp = ubase + (((char *)e + e->target_offset) - base); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase); + ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase); if (ret != 0) return ret; - ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); + ret = EBT_WATCHER_ITERATE(e, ebt_watcher_to_user, base, ubase); if (ret != 0) return ret; - strlcpy(name, t->u.target->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) - return -EFAULT; + ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t), + t->u.target->usersize, t->target_size); + if (ret != 0) + return ret; + return 0; } @@ -1475,13 +1491,9 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, if (ret) return ret; - if (copy_to_user(tmp.entries, entries, entries_size)) { - BUGPRINT("Couldn't copy entries to userspace\n"); - return -EFAULT; - } /* set the match/watcher/target names right */ return EBT_ENTRY_ITERATE(entries, entries_size, - ebt_make_names, entries, tmp.entries); + ebt_entry_to_user, entries, tmp.entries); } static int do_ebt_set_ctl(struct sock *sk, @@ -1630,8 +1642,10 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, if (match->compat_to_user) { if (match->compat_to_user(cm->data, m->data)) return -EFAULT; - } else if (copy_to_user(cm->data, m->data, msize)) + } else { + if (xt_data_to_user(cm->data, m->data, match->usersize, msize)) return -EFAULT; + } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; @@ -1657,8 +1671,10 @@ static int compat_target_to_user(struct ebt_entry_target *t, if (target->compat_to_user) { if (target->compat_to_user(cm->data, t->data)) return -EFAULT; - } else if (copy_to_user(cm->data, t->data, tsize)) - return -EFAULT; + } else { + if (xt_data_to_user(cm->data, t->data, target->usersize, tsize)) + return -EFAULT; + } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; From 4915f7bbc402071539ec0cb8c75aad878c982402 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:45 -0500 Subject: [PATCH 13/27] xtables: use match, target and data copy_to_user helpers in compat Convert compat to copying entries, matches and targets one by one, using the xt_match_to_user and xt_target_to_user helper functions. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index feccf527abdd..016db6be94b9 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -619,17 +619,14 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, int off = xt_compat_match_offset(match); u_int16_t msize = m->u.user.match_size - off; - if (copy_to_user(cm, m, sizeof(*cm)) || - put_user(msize, &cm->u.user.match_size) || - copy_to_user(cm->u.user.name, m->u.kernel.match->name, - strlen(m->u.kernel.match->name) + 1)) + if (XT_OBJ_TO_USER(cm, m, match, msize)) return -EFAULT; if (match->compat_to_user) { if (match->compat_to_user((void __user *)cm->data, m->data)) return -EFAULT; } else { - if (copy_to_user(cm->data, m->data, msize - sizeof(*cm))) + if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) return -EFAULT; } @@ -977,17 +974,14 @@ int xt_compat_target_to_user(const struct xt_entry_target *t, int off = xt_compat_target_offset(target); u_int16_t tsize = t->u.user.target_size - off; - if (copy_to_user(ct, t, sizeof(*ct)) || - put_user(tsize, &ct->u.user.target_size) || - copy_to_user(ct->u.user.name, t->u.kernel.target->name, - strlen(t->u.kernel.target->name) + 1)) + if (XT_OBJ_TO_USER(ct, t, target, tsize)) return -EFAULT; if (target->compat_to_user) { if (target->compat_to_user((void __user *)ct->data, t->data)) return -EFAULT; } else { - if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct))) + if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) return -EFAULT; } From ec23189049651b16dc2ffab35a4371dc1f491aca Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:46 -0500 Subject: [PATCH 14/27] xtables: extend matches and targets with .usersize In matches and targets that define a kernel-only tail to their xt_match and xt_target data structs, add a field .usersize that specifies up to where data is to be shared with userspace. Performed a search for comment "Used internally by the kernel" to find relevant matches and targets. Manually inspected the structs to derive a valid offsetof. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_limit.c | 1 + net/ipv4/netfilter/ipt_CLUSTERIP.c | 1 + net/ipv6/netfilter/ip6t_NPT.c | 2 ++ net/netfilter/xt_CT.c | 3 +++ net/netfilter/xt_RATEEST.c | 1 + net/netfilter/xt_TEE.c | 2 ++ net/netfilter/xt_bpf.c | 2 ++ net/netfilter/xt_cgroup.c | 1 + net/netfilter/xt_connlimit.c | 1 + net/netfilter/xt_hashlimit.c | 4 ++++ net/netfilter/xt_limit.c | 2 ++ net/netfilter/xt_quota.c | 1 + net/netfilter/xt_rateest.c | 1 + net/netfilter/xt_string.c | 1 + 14 files changed, 23 insertions(+) diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 517e78befcb2..61a9f1be1263 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -105,6 +105,7 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = { .match = ebt_limit_mt, .checkentry = ebt_limit_mt_check, .matchsize = sizeof(struct ebt_limit_info), + .usersize = offsetof(struct ebt_limit_info, prev), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct ebt_compat_limit_info), #endif diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 21db00d0362b..8a3d20ebb815 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -468,6 +468,7 @@ static struct xt_target clusterip_tg_reg __read_mostly = { .checkentry = clusterip_tg_check, .destroy = clusterip_tg_destroy, .targetsize = sizeof(struct ipt_clusterip_tgt_info), + .usersize = offsetof(struct ipt_clusterip_tgt_info, config), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), #endif /* CONFIG_COMPAT */ diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 590f767db5d4..a379d2f79b19 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -112,6 +112,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), + .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | @@ -123,6 +124,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), + .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 95c750358747..26b0bccfa0c5 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -373,6 +373,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .name = "CT", .family = NFPROTO_UNSPEC, .targetsize = sizeof(struct xt_ct_target_info), + .usersize = offsetof(struct xt_ct_target_info, ct), .checkentry = xt_ct_tg_check_v0, .destroy = xt_ct_tg_destroy_v0, .target = xt_ct_target_v0, @@ -384,6 +385,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_UNSPEC, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), + .usersize = offsetof(struct xt_ct_target_info, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -395,6 +397,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_UNSPEC, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), + .usersize = offsetof(struct xt_ct_target_info, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 91a373a3f534..498b54fd04d7 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -162,6 +162,7 @@ static struct xt_target xt_rateest_tg_reg __read_mostly = { .checkentry = xt_rateest_tg_checkentry, .destroy = xt_rateest_tg_destroy, .targetsize = sizeof(struct xt_rateest_target_info), + .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 1c57ace75ae6..86b0580b2216 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -133,6 +133,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .target = tee_tg4, .targetsize = sizeof(struct xt_tee_tginfo), + .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, @@ -144,6 +145,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .target = tee_tg6, .targetsize = sizeof(struct xt_tee_tginfo), + .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 2dedaa23ab0a..38986a95216c 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -110,6 +110,7 @@ static struct xt_match bpf_mt_reg[] __read_mostly = { .match = bpf_mt, .destroy = bpf_mt_destroy, .matchsize = sizeof(struct xt_bpf_info), + .usersize = offsetof(struct xt_bpf_info, filter), .me = THIS_MODULE, }, { @@ -120,6 +121,7 @@ static struct xt_match bpf_mt_reg[] __read_mostly = { .match = bpf_mt_v1, .destroy = bpf_mt_destroy_v1, .matchsize = sizeof(struct xt_bpf_info_v1), + .usersize = offsetof(struct xt_bpf_info_v1, filter), .me = THIS_MODULE, }, }; diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a086a914865f..1db1ce59079f 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -122,6 +122,7 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = { .checkentry = cgroup_mt_check_v1, .match = cgroup_mt_v1, .matchsize = sizeof(struct xt_cgroup_info_v1), + .usersize = offsetof(struct xt_cgroup_info_v1, priv), .destroy = cgroup_mt_destroy_v1, .me = THIS_MODULE, .hooks = (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 660b61dbd776..b8fd4ab762ed 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -431,6 +431,7 @@ static struct xt_match connlimit_mt_reg __read_mostly = { .checkentry = connlimit_mt_check, .match = connlimit_mt, .matchsize = sizeof(struct xt_connlimit_info), + .usersize = offsetof(struct xt_connlimit_info, data), .destroy = connlimit_mt_destroy, .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 10063408141d..26ef70c50e3b 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -838,6 +838,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .usersize = offsetof(struct xt_hashlimit_mtinfo1, hinfo), .checkentry = hashlimit_mt_check_v1, .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, @@ -848,6 +849,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, @@ -859,6 +861,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .usersize = offsetof(struct xt_hashlimit_mtinfo1, hinfo), .checkentry = hashlimit_mt_check_v1, .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, @@ -869,6 +872,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index bef850596558..dab962df1787 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -192,6 +192,8 @@ static struct xt_match limit_mt_reg __read_mostly = { .compatsize = sizeof(struct compat_xt_rateinfo), .compat_from_user = limit_mt_compat_from_user, .compat_to_user = limit_mt_compat_to_user, +#else + .usersize = offsetof(struct xt_rateinfo, prev), #endif .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 44c8eb4c9d66..10d61a6eed71 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -73,6 +73,7 @@ static struct xt_match quota_mt_reg __read_mostly = { .checkentry = quota_mt_check, .destroy = quota_mt_destroy, .matchsize = sizeof(struct xt_quota_info), + .usersize = offsetof(struct xt_quota_info, master), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 1db02f6fca54..755d2f6693a2 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -133,6 +133,7 @@ static struct xt_match xt_rateest_mt_reg __read_mostly = { .checkentry = xt_rateest_mt_checkentry, .destroy = xt_rateest_mt_destroy, .matchsize = sizeof(struct xt_rateest_match_info), + .usersize = offsetof(struct xt_rateest_match_info, est1), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 0bc3460319c8..423293ee57c2 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -77,6 +77,7 @@ static struct xt_match xt_string_mt_reg __read_mostly = { .match = string_mt, .destroy = string_mt_destroy, .matchsize = sizeof(struct xt_string_info), + .usersize = offsetof(struct xt_string_info, config), .me = THIS_MODULE, }; From 9a6d87626252496311cd122aaa9fbf21ae19e449 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 7 Jan 2017 21:33:54 +0800 Subject: [PATCH 15/27] netfilter: pkttype: unnecessary to check ipv6 multicast address Since there's no broadcast address in IPV6, so in ipv6 family, the PACKET_LOOPBACK must be multicast packets, there's no need to check it again. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 5 +---- net/netfilter/xt_pkttype.c | 3 +-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 66c7f4b4c49b..9a22b24346b8 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -154,10 +154,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = PACKET_BROADCAST; break; case NFPROTO_IPV6: - if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) - *dest = PACKET_MULTICAST; - else - *dest = PACKET_BROADCAST; + *dest = PACKET_MULTICAST; break; default: WARN_ON(1); diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 57efb703ff18..1ef99151b3ba 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -33,8 +33,7 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) else if (xt_family(par) == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (xt_family(par) == NFPROTO_IPV6 && - ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) + else if (xt_family(par) == NFPROTO_IPV6) type = PACKET_MULTICAST; else type = PACKET_BROADCAST; From f169fd695b192dd7b23aff8e69d25a1bc881bbfa Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 7 Jan 2017 21:33:55 +0800 Subject: [PATCH 16/27] netfilter: nft_meta: deal with PACKET_LOOPBACK in netdev family After adding the following nft rule, then ping 224.0.0.1: # nft add rule netdev t c pkttype host counter The warning complain message will be printed out again and again: WARNING: CPU: 0 PID: 10182 at net/netfilter/nft_meta.c:163 \ nft_meta_get_eval+0x3fe/0x460 [nft_meta] [...] Call Trace: dump_stack+0x85/0xc2 __warn+0xcb/0xf0 warn_slowpath_null+0x1d/0x20 nft_meta_get_eval+0x3fe/0x460 [nft_meta] nft_do_chain+0xff/0x5e0 [nf_tables] So we should deal with PACKET_LOOPBACK in netdev family too. For ipv4, convert it to PACKET_BROADCAST/MULTICAST according to the destination address's type; For ipv6, convert it to PACKET_MULTICAST directly. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9a22b24346b8..e1f5ca9b423b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -156,8 +156,34 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFPROTO_IPV6: *dest = PACKET_MULTICAST; break; + case NFPROTO_NETDEV: + switch (skb->protocol) { + case htons(ETH_P_IP): { + int noff = skb_network_offset(skb); + struct iphdr *iph, _iph; + + iph = skb_header_pointer(skb, noff, + sizeof(_iph), &_iph); + if (!iph) + goto err; + + if (ipv4_is_multicast(iph->daddr)) + *dest = PACKET_MULTICAST; + else + *dest = PACKET_BROADCAST; + + break; + } + case htons(ETH_P_IPV6): + *dest = PACKET_MULTICAST; + break; + default: + WARN_ON_ONCE(1); + goto err; + } + break; default: - WARN_ON(1); + WARN_ON_ONCE(1); goto err; } break; From 1a28ad74ebd8f9d3c7eae0d781f72a6d30545e17 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 16 Jan 2017 22:02:57 +0800 Subject: [PATCH 17/27] netfilter: nf_tables: eliminate useless condition checks The return value of nf_tables_table_lookup() is valid pointer or one pointer error. There are two cases: 1) IS_ERR(table) is true, it would return the error or reset the table as NULL, it is unnecessary to perform the latter check "table != NULL". 2) IS_ERR(obj) is false, the table is one valid pointer. It is also unnecessary to perform that check. The nf_tables_newset() and nf_tables_newobj() have same logic codes. In summary, we could move the block of condition check "table != NULL" in the else block to eliminate the original condition checks. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a019a87e58ee..6e07c214c208 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -696,10 +696,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); - table = NULL; - } - - if (table != NULL) { + } else { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -2963,10 +2960,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(set)) { if (PTR_ERR(set) != -ENOENT) return PTR_ERR(set); - set = NULL; - } - - if (set != NULL) { + } else { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -4153,10 +4147,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, if (err != -ENOENT) return err; - obj = NULL; - } - - if (obj != NULL) { + } else { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; From 10435c1192d06bdb0bac7666452d8219d7e7c477 Mon Sep 17 00:00:00 2001 From: Feng Date: Fri, 20 Jan 2017 21:40:43 +0800 Subject: [PATCH 18/27] netfilter: nf_tables: Eliminate duplicated code in nf_tables_table_enable() If something fails in nf_tables_table_enable(), it unregisters the chains. But the rollback code is the same as nf_tables_table_disable() almostly, except there is one counter check. Now create one wrapper function to eliminate the duplicated codes. Signed-off-by: Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 48 ++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6e07c214c208..e6741ac4ccc1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -576,6 +576,28 @@ err: return err; } +static void _nf_tables_table_disable(struct net *net, + const struct nft_af_info *afi, + struct nft_table *table, + u32 cnt) +{ + struct nft_chain *chain; + u32 i = 0; + + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + if (cnt && i++ == cnt) + break; + + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); + } +} + static int nf_tables_table_enable(struct net *net, const struct nft_af_info *afi, struct nft_table *table) @@ -598,18 +620,8 @@ static int nf_tables_table_enable(struct net *net, } return 0; err: - list_for_each_entry(chain, &table->chains, list) { - if (!nft_is_active_next(net, chain)) - continue; - if (!(chain->flags & NFT_BASE_CHAIN)) - continue; - - if (i-- <= 0) - break; - - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); - } + if (i) + _nf_tables_table_disable(net, afi, table, i); return err; } @@ -617,17 +629,7 @@ static void nf_tables_table_disable(struct net *net, const struct nft_af_info *afi, struct nft_table *table) { - struct nft_chain *chain; - - list_for_each_entry(chain, &table->chains, list) { - if (!nft_is_active_next(net, chain)) - continue; - if (!(chain->flags & NFT_BASE_CHAIN)) - continue; - - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); - } + _nf_tables_table_disable(net, afi, table, 0); } static int nf_tables_updtable(struct nft_ctx *ctx) From 11df4b760f11ca7528c62b1c4b870735d1c62116 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:53 +0100 Subject: [PATCH 19/27] netfilter: conntrack: no need to pass ctinfo to error handler It is never accessed for reading and the only places that write to it are the icmp(6) handlers, which also set skb->nfct (and skb->nfctinfo). The conntrack core specifically checks for attached skb->nfct after ->error() invocation and returns early in this case. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 12 ++++++------ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 12 ++++++------ net/netfilter/nf_conntrack_core.c | 3 +-- net/netfilter/nf_conntrack_proto_dccp.c | 1 - net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 1 - net/netfilter/nf_conntrack_proto_udp.c | 3 +-- 8 files changed, 16 insertions(+), 20 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e7b836590f0b..85e993e278d5 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -55,7 +55,7 @@ struct nf_conntrack_l4proto { void (*destroy)(struct nf_conn *ct); int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d075b3cf2400..566afac98a88 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -128,13 +128,13 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); @@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { @@ -169,11 +169,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, } if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; + skb->nfctinfo = ctinfo; return NF_ACCEPT; } @@ -181,7 +181,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, static int icmp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) + u8 pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; @@ -225,7 +225,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, hooknum); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index f5a61bc3ec2b..44b9af3f813e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -145,12 +145,12 @@ static int icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int icmp6off, - enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); @@ -176,7 +176,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), &intuple); @@ -185,19 +185,19 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } else { if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + ctinfo += IP_CT_IS_REPLY; } /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; + skb->nfctinfo = ctinfo; return NF_ACCEPT; } static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) + u8 pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; @@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3a073cd9fcf4..86186a2e2715 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1326,8 +1326,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL) { - ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, - pf, hooknum); + ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum); if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index b68ce6ac13b3..93dd1c5b7bff 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -561,7 +561,6 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, static int dccp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 44a647418948..33279aab583d 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -508,7 +508,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, } static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u8 pf, unsigned int hooknum) { const struct sctphdr *sh; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 69f687740c76..b122e9dacfed 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -750,7 +750,6 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ae63944c9dc4..f6ebce6178ca 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -112,7 +112,6 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u8 pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; @@ -162,7 +161,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, #endif static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u_int8_t pf, unsigned int hooknum) { From 6e10148c5c85629832d9156f337cbf67e96b69fe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:54 +0100 Subject: [PATCH 20/27] netfilter: reset netfilter state when duplicating packet We should also toss nf_bridge_info, if any -- packet is leaving via ip_local_out, also, this skb isn't bridged -- it is a locally generated copy. Also this avoids the need to touch this later when skb->nfct is replaced with 'unsigned long _nfct' in followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_dup_ipv4.c | 2 +- net/ipv6/netfilter/nf_dup_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index cf986e1c7bbd..a981ef7151ca 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -68,7 +68,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ - nf_conntrack_put(skb->nfct); + nf_reset(skb); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 4a84b5ad9ecb..5f52e5f90e7e 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -57,7 +57,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, return; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb->nfct); + nf_reset(skb); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); From 97a6ad13decc16c5adbf181283932daba7e17faf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:55 +0100 Subject: [PATCH 21/27] netfilter: reduce direct skb->nfct usage Next patch makes direct skb->nfct access illegal, reduce noise in next patch by using accessors we already have. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 9 ++++++--- net/netfilter/nf_conntrack_core.c | 15 +++++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cd6018a9ee24..2a344ebd7ebe 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1554,10 +1554,13 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; + struct nf_conn *untracked; + + nf_conntrack_put(&ct->ct_general); + untracked = nf_ct_untracked_get(); + nf_conntrack_get(&untracked->ct_general); + skb->nfct = &untracked->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); } #endif } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 86186a2e2715..adb7af3a4c4c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -686,8 +686,11 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, !nfct_nat(ct) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { - nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct); - nf_conntrack_put(skb->nfct); + enum ip_conntrack_info oldinfo; + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); + + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); /* Assign conntrack already in hashes to this skbuff. Don't * modify skb->nfctinfo to ensure consistent stateful filtering. */ @@ -1288,7 +1291,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct, *tmpl = NULL; + struct nf_conn *ct, *tmpl; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -1298,9 +1301,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - if (skb->nfct) { + tmpl = nf_ct_get(skb, &ctinfo); + if (tmpl) { /* Previously seen (loopback or untracked)? Ignore. */ - tmpl = (struct nf_conn *)skb->nfct; if (!nf_ct_is_template(tmpl)) { NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; @@ -1364,7 +1367,7 @@ repeat: /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put(skb->nfct); + nf_conntrack_put(&ct->ct_general); skb->nfct = NULL; NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) From cb9c68363efb6d1f950ec55fb06e031ee70db5fc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:56 +0100 Subject: [PATCH 22/27] skbuff: add and use skb_nfct helper Followup patch renames skb->nfct and changes its type so add a helper to avoid intrusive rename change later. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 13 ++++++++++--- include/net/netfilter/nf_conntrack_core.h | 2 +- net/core/skbuff.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 8 ++++---- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 4 ++-- net/ipv4/netfilter/nf_dup_ipv4.c | 2 +- net/ipv6/netfilter/ip6t_SYNPROXY.c | 8 ++++---- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 ++-- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 4 ++-- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_nat_helper.c | 2 +- net/netfilter/xt_CT.c | 2 +- net/openvswitch/conntrack.c | 6 +++--- net/sched/cls_flow.c | 2 +- 15 files changed, 36 insertions(+), 29 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b53c0cfd417e..276431e047af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3553,6 +3553,15 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, skb->csum = csum_add(skb->csum, delta); } +static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + return skb->nfct; +#else + return NULL; +#endif +} + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) @@ -3652,9 +3661,7 @@ static inline bool skb_irq_freeable(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_XFRM) !skb->sp && #endif -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - !skb->nfct && -#endif + !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); } diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 62e17d1319ff..84ec7ca5f195 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -62,7 +62,7 @@ int __nf_conntrack_confirm(struct sk_buff *skb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int nf_conntrack_confirm(struct sk_buff *skb) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); int ret = NF_ACCEPT; if (ct && !nf_ct_is_untracked(ct)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5a03730fbc1a..cac3ebfb4b45 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -655,7 +655,7 @@ static void skb_release_head_state(struct sk_buff *skb) skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb->nfct); + nf_conntrack_put(skb_nfct(skb)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 30c0de53e254..a12d4f0aa674 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -107,8 +107,8 @@ synproxy_send_client_synack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void @@ -230,8 +230,8 @@ synproxy_send_client_ack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 566afac98a88..478a025909fc 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -137,7 +137,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(skb->nfct == NULL); + NF_CT_ASSERT(!skb_nfct(skb)); zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 49bd6a54404f..346bf7ccac08 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -45,7 +45,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) { + if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -75,7 +75,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv, #if !IS_ENABLED(CONFIG_NF_NAT) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif #endif diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index a981ef7151ca..1a5e1f53ceaa 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -71,7 +71,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, nf_reset(skb); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); #endif /* * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 98c8dd38575a..2dc01d2c6ec0 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -121,8 +121,8 @@ synproxy_send_client_synack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void @@ -244,8 +244,8 @@ synproxy_send_client_ack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 44b9af3f813e..09f1661a4e88 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -153,7 +153,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(skb->nfct == NULL); + NF_CT_ASSERT(!skb_nfct(skb)); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuplepr(skb, @@ -224,7 +224,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, noct_valid_new[type]) { skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 8e0bdd058787..ada60d1a991b 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -37,7 +37,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) { + if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -61,7 +61,7 @@ static unsigned int ipv6_defrag(void *priv, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index adb7af3a4c4c..78aebf0ee6e3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1357,7 +1357,7 @@ repeat: goto out; } - NF_CT_ASSERT(skb->nfct); + NF_CT_ASSERT(skb_nfct(skb)); /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); @@ -1528,7 +1528,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) /* Attach to new skbuff, and increment count */ nskb->nfct = &ct->ct_general; nskb->nfctinfo = ctinfo; - nf_conntrack_get(nskb->nfct); + nf_conntrack_get(skb_nfct(nskb)); } /* Bring out ya dead! */ diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 2840abb5bb99..211661cb2c90 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -60,7 +60,7 @@ static void mangle_contents(struct sk_buff *skb, __skb_trim(skb, skb->len + rep_len - match_len); } - if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) { + if (nf_ct_l3num((struct nf_conn *)skb_nfct(skb)) == NFPROTO_IPV4) { /* fix IP hdr checksum information */ ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 26b0bccfa0c5..cd7e29910ae1 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -415,7 +415,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); return XT_CONTINUE; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 6b78bab27755..452557946147 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -721,8 +721,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, /* Associate skb with specified zone. */ if (tmpl) { - if (skb->nfct) - nf_conntrack_put(skb->nfct); + if (skb_nfct(skb)) + nf_conntrack_put(skb_nfct(skb)); nf_conntrack_get(&tmpl->ct_general); skb->nfct = &tmpl->ct_general; skb->nfctinfo = IP_CT_NEW; @@ -819,7 +819,7 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, if (err) return err; - ct = (struct nf_conn *)skb->nfct; + ct = (struct nf_conn *)skb_nfct(skb); if (ct) nf_ct_deliver_cached_events(ct); } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6575aba87630..3d6b9286c203 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -129,7 +129,7 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return addr_fold(skb->nfct); + return addr_fold(skb_nfct(skb)); #else return 0; #endif From c74454fadd5ea6fc866ffe2c417a0dba56b2bf1c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:57 +0100 Subject: [PATCH 23/27] netfilter: add and use nf_ct_set helper Add a helper to assign a nf_conn entry and the ctinfo bits to an sk_buff. This avoids changing code in followup patch that merges skb->nfct and skb->nfctinfo into skb->_nfct. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 3 +-- include/net/netfilter/nf_conntrack.h | 8 ++++++++ net/ipv4/netfilter/ipt_SYNPROXY.c | 3 +-- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 3 +-- net/ipv4/netfilter/nf_dup_ipv4.c | 3 +-- net/ipv6/netfilter/ip6t_SYNPROXY.c | 3 +-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++---- net/ipv6/netfilter/nf_dup_ipv6.c | 3 +-- net/netfilter/nf_conntrack_core.c | 11 +++-------- net/netfilter/nft_ct.c | 3 +-- net/netfilter/xt_CT.c | 6 ++---- net/openvswitch/conntrack.c | 6 ++---- 12 files changed, 24 insertions(+), 34 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2a344ebd7ebe..4b46c591b542 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1559,8 +1559,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb) nf_conntrack_put(&ct->ct_general); untracked = nf_ct_untracked_get(); nf_conntrack_get(&untracked->ct_general); - skb->nfct = &untracked->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, untracked, IP_CT_NEW); } #endif } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5916aa9ab3f0..d704aed11684 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -34,6 +34,7 @@ union nf_conntrack_proto { struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct nf_ct_gre gre; + unsigned int tmpl_padto; }; union nf_conntrack_expect_proto { @@ -341,6 +342,13 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +static inline void +nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) +{ + skb->nfct = &ct->ct_general; + skb->nfctinfo = info; +} + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index a12d4f0aa674..3240a2614e82 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -57,8 +57,7 @@ synproxy_send_tcp(struct net *net, goto free_nskb; if (nfct) { - nskb->nfct = nfct; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 478a025909fc..73c591d8a9a8 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -172,8 +172,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = ctinfo; + nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index 1a5e1f53ceaa..f0dbff05fc28 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -69,8 +69,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ nf_reset(skb); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb_nfct(skb)); #endif /* diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 2dc01d2c6ec0..4ef1ddd4bbbd 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -71,8 +71,7 @@ synproxy_send_tcp(struct net *net, skb_dst_set(nskb, dst); if (nfct) { - nskb->nfct = nfct; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 09f1661a4e88..d2c2ccbfbe72 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -189,8 +189,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, } /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = ctinfo; + nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); return NF_ACCEPT; } @@ -222,8 +221,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb_nfct(skb)); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 5f52e5f90e7e..ff04f6a7f45b 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -58,8 +58,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb->nfct); #endif if (hooknum == NF_INET_PRE_ROUTING || diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 78aebf0ee6e3..c9bd10747864 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -691,10 +691,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_conntrack_put(&loser_ct->ct_general); - /* Assign conntrack already in hashes to this skbuff. Don't - * modify skb->nfctinfo to ensure consistent stateful filtering. - */ - skb->nfct = &ct->ct_general; + nf_ct_set(skb, ct, oldinfo); return NF_ACCEPT; } NF_CT_STAT_INC(net, drop); @@ -1282,8 +1279,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } *set_reply = 0; } - skb->nfct = &ct->ct_general; - skb->nfctinfo = *ctinfo; + nf_ct_set(skb, ct, *ctinfo); return ct; } @@ -1526,8 +1522,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ - nskb->nfct = &ct->ct_general; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, ct, ctinfo); nf_conntrack_get(skb_nfct(nskb)); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d774d7823688..66a2377510e1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -554,8 +554,7 @@ static void nft_notrack_eval(const struct nft_expr *expr, ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, ct, IP_CT_NEW); } static struct nft_expr_type nft_notrack_type; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index cd7e29910ae1..51f00e1e1208 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -30,8 +30,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (!ct) ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, ct, IP_CT_NEW); return XT_CONTINUE; } @@ -413,8 +412,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) if (skb->nfct != NULL) return XT_CONTINUE; - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb_nfct(skb)); return XT_CONTINUE; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 452557946147..d1fbfcaa009a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -460,8 +460,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ct = nf_ct_tuplehash_to_ctrack(h); - skb->nfct = &ct->ct_general; - skb->nfctinfo = ovs_ct_get_info(h); + nf_ct_set(skb, ct, ovs_ct_get_info(h)); return ct; } @@ -724,8 +723,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, if (skb_nfct(skb)) nf_conntrack_put(skb_nfct(skb)); nf_conntrack_get(&tmpl->ct_general); - skb->nfct = &tmpl->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, tmpl, IP_CT_NEW); } err = nf_conntrack_in(net, info->family, From 303223092081963513494b4377fa1ac9e362ed4b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:58 +0100 Subject: [PATCH 24/27] netfilter: guarantee 8 byte minalign for template addresses The next change will merge skb->nfct pointer and skb->nfctinfo status bits into single skb->_nfct (unsigned long) area. For this to work nf_conn addresses must always be aligned at least on an 8 byte boundary since we will need the lower 3bits to store nfctinfo. Conntrack templates are allocated via kmalloc. kbuild test robot reported BUILD_BUG_ON failed: NFCT_INFOMASK >= ARCH_KMALLOC_MINALIGN on v1 of this patchset, so not all platforms meet this requirement. Do manual alignment if needed, the alignment offset is stored in the nf_conn entry protocol area. This works because templates are not handed off to L4 protocol trackers. Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 29 +++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d704aed11684..06d3d2d24fe0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -163,6 +163,8 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); +#define NFCT_INFOMASK 7UL + /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c9bd10747864..768968fba7f6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -350,16 +350,31 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) + /* Released via destroy_conntrack() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) { - struct nf_conn *tmpl; + struct nf_conn *tmpl, *p; - tmpl = kzalloc(sizeof(*tmpl), flags); - if (tmpl == NULL) - return NULL; + if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { + tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); + if (!tmpl) + return NULL; + + p = tmpl; + tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); + if (tmpl != p) { + tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); + tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; + } + } else { + tmpl = kzalloc(sizeof(*tmpl), flags); + if (!tmpl) + return NULL; + } tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); @@ -374,7 +389,11 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl) { nf_ct_ext_destroy(tmpl); nf_ct_ext_free(tmpl); - kfree(tmpl); + + if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) + kfree((char *)tmpl - tmpl->proto.tmpl_padto); + else + kfree(tmpl); } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); From a9e419dc7be6997409dca6d1b9daf3cc7046902f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:59 +0100 Subject: [PATCH 25/27] netfilter: merge ctinfo into nfct pointer storage area After this change conntrack operations (lookup, creation, matching from ruleset) only access one instead of two sk_buff cache lines. This works for normal conntracks because those are allocated from a slab that guarantees hw cacheline or 8byte alignment (whatever is larger) so the 3 bits needed for ctinfo won't overlap with nf_conn addresses. Template allocation now does manual address alignment (see previous change) on arches that don't have sufficent kmalloc min alignment. Some spots intentionally use skb->_nfct instead of skb_nfct() helpers, this is to avoid undoing the skb_nfct() use when we remove untracked conntrack object in the future. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 21 +++++++++------------ include/net/netfilter/nf_conntrack.h | 11 ++++++----- net/ipv6/netfilter/nf_dup_ipv6.c | 2 +- net/netfilter/core.c | 2 +- net/netfilter/nf_conntrack_core.c | 11 ++++++----- net/netfilter/nf_conntrack_standalone.c | 3 +++ net/netfilter/xt_CT.c | 4 ++-- 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 276431e047af..ac0bc085b139 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -585,7 +585,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header - * @nfctinfo: Relationship of this skb to the connection * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs @@ -594,7 +593,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function - * @nfct: Associated connection, if any + * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index @@ -668,7 +667,7 @@ struct sk_buff { struct sec_path *sp; #endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; + unsigned long _nfct; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info *nf_bridge; @@ -721,7 +720,6 @@ struct sk_buff { __u8 pkt_type:3; __u8 pfmemalloc:1; __u8 ignore_df:1; - __u8 nfctinfo:3; __u8 nf_trace:1; __u8 ip_summed:2; @@ -836,6 +834,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) #define SKB_DST_NOREF 1UL #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) +#define SKB_NFCT_PTRMASK ~(7UL) /** * skb_dst - returns skb dst_entry * @skb: buffer @@ -3556,7 +3555,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return skb->nfct; + return (void *)(skb->_nfct & SKB_NFCT_PTRMASK); #else return NULL; #endif @@ -3590,8 +3589,8 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) static inline void nf_reset(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; + nf_conntrack_put(skb_nfct(skb)); + skb->_nfct = 0; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); @@ -3611,10 +3610,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - dst->nfct = src->nfct; - nf_conntrack_get(src->nfct); - if (copy) - dst->nfctinfo = src->nfctinfo; + dst->_nfct = src->_nfct; + nf_conntrack_get(skb_nfct(src)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dst->nf_bridge = src->nf_bridge; @@ -3629,7 +3626,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(dst->nfct); + nf_conntrack_put(skb_nfct(dst)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(dst->nf_bridge); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 06d3d2d24fe0..f540f9ad2af4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -76,7 +76,7 @@ struct nf_conn { /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * - * Hint, SKB address this struct and refcnt via skb->nfct and + * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, * beware nf_ct_get() is different and don't inc refcnt. @@ -164,13 +164,15 @@ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); #define NFCT_INFOMASK 7UL +#define NFCT_PTRMASK ~(NFCT_INFOMASK) /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { - *ctinfo = skb->nfctinfo; - return (struct nf_conn *)skb->nfct; + *ctinfo = skb->_nfct & NFCT_INFOMASK; + + return (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK); } /* decrement reference count on a conntrack */ @@ -347,8 +349,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl); static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { - skb->nfct = &ct->ct_general; - skb->nfctinfo = info; + skb->_nfct = (unsigned long)ct | info; } #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index ff04f6a7f45b..888ecd106e5f 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -59,7 +59,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index ce6adfae521a..a87a6f8a74d8 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -375,7 +375,7 @@ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) { void (*attach)(struct sk_buff *, const struct sk_buff *); - if (skb->nfct) { + if (skb->_nfct) { rcu_read_lock(); attach = rcu_dereference(ip_ct_attach); if (attach) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 768968fba7f6..47c4ea53daa6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1239,7 +1239,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } -/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */ static inline struct nf_conn * resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, @@ -1323,7 +1323,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } - skb->nfct = NULL; + skb->_nfct = 0; } /* rcu_read_lock()ed by nf_hook_thresh */ @@ -1352,7 +1352,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } /* ICMP[v6] protocol trackers may assign one conntrack. */ - if (skb->nfct) + if (skb->_nfct) goto out; } repeat: @@ -1383,7 +1383,7 @@ repeat: * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(&ct->ct_general); - skb->nfct = NULL; + skb->_nfct = 0; NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); @@ -1878,7 +1878,8 @@ int nf_conntrack_init_start(void) nf_conntrack_max = max_factor * nf_conntrack_htable_size; nf_conntrack_cachep = kmem_cache_create("nf_conntrack", - sizeof(struct nf_conn), 0, + sizeof(struct nf_conn), + NFCT_INFOMASK + 1, SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index d009ae663453..2256147dcaad 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -642,6 +642,9 @@ static int __init nf_conntrack_standalone_init(void) if (ret < 0) goto out_start; + BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK); + BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER); + #ifdef CONFIG_SYSCTL nf_ct_netfilter_header = register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 51f00e1e1208..b008db0184b8 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -23,7 +23,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) + if (skb->_nfct != 0) return XT_CONTINUE; /* special case the untracked ct : we want the percpu object */ @@ -409,7 +409,7 @@ static unsigned int notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) { /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) + if (skb->_nfct != 0) return XT_CONTINUE; nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); From 90c1aff702d449a1a248c4829d51c0bc677f968e Mon Sep 17 00:00:00 2001 From: David Windsor Date: Mon, 23 Jan 2017 22:24:29 -0500 Subject: [PATCH 26/27] ipvs: free ip_vs_dest structs when refcnt=0 Currently, the ip_vs_dest cache frees ip_vs_dest objects when their reference count becomes < 0. Aside from not being semantically sound, this is problematic for the new type refcount_t, which will be introduced shortly in a separate patch. refcount_t is the new kernel type for holding reference counts, and provides overflow protection and a constrained interface relative to atomic_t (the type currently being used for kernel reference counts). Per Julian Anastasov: "The problem is that dest_trash currently holds deleted dests (unlinked from RCU lists) with refcnt=0." Changing dest_trash to hold dest with refcnt=1 will allow us to free ip_vs_dest structs when their refcnt=0, in ip_vs_dest_put_and_free(). Signed-off-by: David Windsor Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4b46c591b542..7bdfa7d78363 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1421,7 +1421,7 @@ static inline void ip_vs_dest_put(struct ip_vs_dest *dest) static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) { - if (atomic_dec_return(&dest->refcnt) < 0) + if (atomic_dec_and_test(&dest->refcnt)) kfree(dest); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 55e0169caa4c..5fc4836e7c79 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -711,7 +711,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, dest->vport == svc->port))) { /* HIT */ list_del(&dest->t_list); - ip_vs_dest_hold(dest); goto out; } } @@ -741,7 +740,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) * When the ip_vs_control_clearup is activated by ipvs module exit, * the service tables must have been flushed and all the connections * are expired, and the refcnt of each destination in the trash must - * be 0, so we simply release them here. + * be 1, so we simply release them here. */ static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) { @@ -1080,11 +1079,10 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); - /* dest lives in trash without reference */ + /* dest lives in trash with reference */ list_add(&dest->t_list, &ipvs->dest_trash); dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); - ip_vs_dest_put(dest); } @@ -1160,7 +1158,7 @@ static void ip_vs_dest_trash_expire(unsigned long data) spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - if (atomic_read(&dest->refcnt) > 0) + if (atomic_read(&dest->refcnt) > 1) continue; if (dest->idle_start) { if (time_before(now, dest->idle_start + From 2851940ffee313e0ff12540a8e11a8c54dea9c65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Tue, 31 Jan 2017 10:30:06 +0100 Subject: [PATCH 27/27] netfilter: allow logging from non-init namespaces Commit 69b34fb996b2 ("netfilter: xt_LOG: add net namespace support for xt_LOG") disabled logging packets using the LOG target from non-init namespaces. The motivation was to prevent containers from flooding kernel log of the host. The plan was to keep it that way until syslog namespace implementation allows containers to log in a safe way. However, the work on syslog namespace seems to have hit a dead end somewhere in 2013 and there are users who want to use xt_LOG in all network namespaces. This patch allows to do so by setting /proc/sys/net/netfilter/nf_log_all_netns to a nonzero value. This sysctl is only accessible from init_net so that one cannot switch the behaviour from inside a container. Signed-off-by: Michal Kubecek Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/netfilter-sysctl.txt | 10 ++++++++ include/net/netfilter/nf_log.h | 3 +++ net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/netfilter/nf_log_arp.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 2 +- net/netfilter/nf_log.c | 24 +++++++++++++++++++ 7 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 Documentation/networking/netfilter-sysctl.txt diff --git a/Documentation/networking/netfilter-sysctl.txt b/Documentation/networking/netfilter-sysctl.txt new file mode 100644 index 000000000000..55791e50e169 --- /dev/null +++ b/Documentation/networking/netfilter-sysctl.txt @@ -0,0 +1,10 @@ +/proc/sys/net/netfilter/* Variables: + +nf_log_all_netns - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + By default, only init_net namespace can log packets into kernel log + with LOG target; this aims to prevent containers from flooding host + kernel log. If enabled, this target also works in other network + namespaces. This variable is only accessible from init_net. diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 450f87f95415..42e0696f38d8 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -51,6 +51,9 @@ struct nf_logger { struct module *me; }; +/* sysctl_nf_log_all_netns - allow LOG target in all network namespaces */ +extern int sysctl_nf_log_all_netns; + /* Function to register/unregister log function. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index e88bd4827ac1..98b9c8e8615e 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,7 +78,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, unsigned int bitmask; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; spin_lock_bh(&ebt_log_lock); diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index b24795e2ee6d..f6f713376e6e 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -87,7 +87,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 856648966f4c..c83a9963269b 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -319,7 +319,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 57d86066a13b..055c51b80f5d 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -351,7 +351,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3dca90dc24ad..0a034f52b912 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,6 +16,9 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 +int sysctl_nf_log_all_netns __read_mostly; +EXPORT_SYMBOL(sysctl_nf_log_all_netns); + static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); @@ -414,6 +417,18 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; +static struct ctl_table_header *nf_log_sysctl_fhdr; + +static struct ctl_table nf_log_sysctl_ftable[] = { + { + .procname = "nf_log_all_netns", + .data = &sysctl_nf_log_all_netns, + .maxlen = sizeof(sysctl_nf_log_all_netns), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; static int nf_log_proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -483,6 +498,10 @@ static int netfilter_log_sysctl_init(struct net *net) nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; } + nf_log_sysctl_fhdr = register_net_sysctl(net, "net/netfilter", + nf_log_sysctl_ftable); + if (!nf_log_sysctl_fhdr) + goto err_freg; } for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) @@ -499,6 +518,9 @@ static int netfilter_log_sysctl_init(struct net *net) err_reg: if (!net_eq(net, &init_net)) kfree(table); + else + unregister_net_sysctl_table(nf_log_sysctl_fhdr); +err_freg: err_alloc: return -ENOMEM; } @@ -511,6 +533,8 @@ static void netfilter_log_sysctl_exit(struct net *net) unregister_net_sysctl_table(net->nf.nf_log_dir_header); if (!net_eq(net, &init_net)) kfree(table); + else + unregister_net_sysctl_table(nf_log_sysctl_fhdr); } #else static int netfilter_log_sysctl_init(struct net *net)