From 51b0a5d8c21a91801bbef9bcc8639dc0b206c6cd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 26 Sep 2014 14:35:14 +0200 Subject: [PATCH 1/9] netfilter: nft_reject: introduce icmp code abstraction for inet and bridge This patch introduces the NFT_REJECT_ICMPX_UNREACH type which provides an abstraction to the ICMP and ICMPv6 codes that you can use from the inet and bridge tables, they are: * NFT_REJECT_ICMPX_NO_ROUTE: no route to host - network unreachable * NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable * NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable * NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratevely prohibited You can still use the specific codes when restricting the rule to match the corresponding layer 3 protocol. I decided to not overload the existing NFT_REJECT_ICMP_UNREACH to have different semantics depending on the table family and to allow the user to specify ICMP family specific codes if they restrict it to the corresponding family. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 1 + include/net/netfilter/nft_reject.h | 9 +-- include/uapi/linux/netfilter/nf_tables.h | 21 ++++++ net/bridge/netfilter/nft_reject_bridge.c | 95 ++++++++++++++++++++++-- net/ipv4/netfilter/nft_reject_ipv4.c | 1 - net/netfilter/nft_reject.c | 37 +++++++++ net/netfilter/nft_reject_inet.c | 94 ++++++++++++++++++++++- 7 files changed, 241 insertions(+), 17 deletions(-) diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index f713b5a31d62..8ce06385a552 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -5,6 +5,7 @@ #include #include #include +#include static inline void nf_send_unreach(struct sk_buff *skb_in, int code) { diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index 36b0da2d55bb..60fa1530006b 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -14,12 +14,7 @@ int nft_reject_init(const struct nft_ctx *ctx, int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr); -void nft_reject_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt); - -void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt); +int nft_reject_icmp_code(u8 code); +int nft_reject_icmpv6_code(u8 code); #endif diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b72ccfeaf865..c26df6787fb0 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -749,12 +749,33 @@ enum nft_queue_attributes { * * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable * @NFT_REJECT_TCP_RST: reject using TCP RST + * @NFT_REJECT_ICMPX_UNREACH: abstracted ICMP unreachable for bridge and inet */ enum nft_reject_types { NFT_REJECT_ICMP_UNREACH, NFT_REJECT_TCP_RST, + NFT_REJECT_ICMPX_UNREACH, }; +/** + * enum nft_reject_code - Generic reject codes for IPv4/IPv6 + * + * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable + * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable + * @NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable + * @NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratively prohibited + * + * These codes are mapped to real ICMP and ICMPv6 codes. + */ +enum nft_reject_inet_code { + NFT_REJECT_ICMPX_NO_ROUTE = 0, + NFT_REJECT_ICMPX_PORT_UNREACH, + NFT_REJECT_ICMPX_HOST_UNREACH, + NFT_REJECT_ICMPX_ADMIN_PROHIBITED, + __NFT_REJECT_ICMPX_MAX +}; +#define NFT_REJECT_ICMPX_MAX (__NFT_REJECT_ICMPX_MAX + 1) + /** * enum nft_reject_attributes - nf_tables reject expression netlink attributes * diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index ee3ffe93e14e..a76479535df2 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -14,21 +14,106 @@ #include #include #include +#include +#include static void nft_reject_bridge_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) { + struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + switch (eth_hdr(pkt->skb)->h_proto) { case htons(ETH_P_IP): - return nft_reject_ipv4_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nf_send_unreach(pkt->skb, + nft_reject_icmp_code(priv->icmp_code)); + break; + } + break; case htons(ETH_P_IPV6): - return nft_reject_ipv6_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nf_send_unreach6(net, pkt->skb, + nft_reject_icmpv6_code(priv->icmp_code), + pkt->ops->hooknum); + break; + } + break; default: /* No explicit way to reject this protocol, drop it. */ - data[NFT_REG_VERDICT].verdict = NF_DROP; break; } + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static int nft_reject_bridge_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + int icmp_code; + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + + icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + if (priv->type == NFT_REJECT_ICMPX_UNREACH && + icmp_code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + priv->icmp_code = icmp_code; + break; + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + return 0; +} + +static int nft_reject_bridge_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; } static struct nft_expr_type nft_reject_bridge_type; @@ -36,8 +121,8 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_bridge_eval, - .init = nft_reject_init, - .dump = nft_reject_dump, + .init = nft_reject_bridge_init, + .dump = nft_reject_bridge_dump, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index e79718a382f2..ed33299c56d1 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index f3448c296446..ec8a456092a7 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, @@ -70,5 +72,40 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_reject_dump); +static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { + [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH, + [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH, + [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH, + [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMP_PKT_FILTERED, +}; + +int nft_reject_icmp_code(u8 code) +{ + if (code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + return icmp_code_v4[code]; +} + +EXPORT_SYMBOL_GPL(nft_reject_icmp_code); + + +static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { + [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE, + [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH, + [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH, + [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMPV6_ADM_PROHIBITED, +}; + +int nft_reject_icmpv6_code(u8 code) +{ + if (code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + return icmp_code_v6[code]; +} + +EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index b718a52a4654..7b5f9d58680a 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -14,17 +14,103 @@ #include #include #include +#include +#include static void nft_reject_inet_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) { + struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + switch (pkt->ops->pf) { case NFPROTO_IPV4: - return nft_reject_ipv4_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nf_send_unreach(pkt->skb, + nft_reject_icmp_code(priv->icmp_code)); + break; + } + break; case NFPROTO_IPV6: - return nft_reject_ipv6_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nf_send_unreach6(net, pkt->skb, + nft_reject_icmpv6_code(priv->icmp_code), + pkt->ops->hooknum); + break; + } + break; } + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static int nft_reject_inet_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + int icmp_code; + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + + icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + if (priv->type == NFT_REJECT_ICMPX_UNREACH && + icmp_code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + priv->icmp_code = icmp_code; + break; + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + return 0; +} + +static int nft_reject_inet_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; } static struct nft_expr_type nft_reject_inet_type; @@ -32,8 +118,8 @@ static const struct nft_expr_ops nft_reject_inet_ops = { .type = &nft_reject_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_inet_eval, - .init = nft_reject_init, - .dump = nft_reject_dump, + .init = nft_reject_inet_init, + .dump = nft_reject_inet_dump, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { From c8d7b98bec43faaa6583c3135030be5eb4693acb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 26 Sep 2014 14:35:15 +0200 Subject: [PATCH 2/9] netfilter: move nf_send_resetX() code to nf_reject_ipvX modules Move nf_send_reset() and nf_send_reset6() to nf_reject_ipv4 and nf_reject_ipv6 respectively. This code is shared by x_tables and nf_tables. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 118 +----------------- net/ipv4/netfilter/Kconfig | 6 + net/ipv4/netfilter/Makefile | 3 + net/ipv4/netfilter/nf_reject_ipv4.c | 127 +++++++++++++++++++ net/ipv6/netfilter/Kconfig | 6 + net/ipv6/netfilter/Makefile | 3 + net/ipv6/netfilter/nf_reject_ipv6.c | 163 +++++++++++++++++++++++++ 7 files changed, 309 insertions(+), 117 deletions(-) create mode 100644 net/ipv4/netfilter/nf_reject_ipv4.c create mode 100644 net/ipv6/netfilter/nf_reject_ipv6.c diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index 8ce06385a552..e8427193c777 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -1,10 +1,6 @@ #ifndef _IPV4_NF_REJECT_H #define _IPV4_NF_REJECT_H -#include -#include -#include -#include #include static inline void nf_send_unreach(struct sk_buff *skb_in, int code) @@ -12,118 +8,6 @@ static inline void nf_send_unreach(struct sk_buff *skb_in, int code) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } -/* Send RST reply */ -static void nf_send_reset(struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; - const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; - - /* IP header checks: fragment. */ - if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; - - oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); - if (oth == NULL) - return; - - /* No RST for RST. */ - if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; - - /* Check checksum */ - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); - - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; - - skb_reserve(nskb, LL_MAX_HEADER); - - skb_reset_network_header(nskb); - niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); - niph->version = 4; - niph->ihl = sizeof(struct iphdr) / 4; - niph->tos = 0; - niph->id = 0; - niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; - niph->check = 0; - niph->saddr = oiph->daddr; - niph->daddr = oiph->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - memset(tcph, 0, sizeof(*tcph)); - tcph->source = oth->dest; - tcph->dest = oth->source; - tcph->doff = sizeof(struct tcphdr) / 4; - - if (oth->ack) - tcph->seq = oth->ack_seq; - else { - tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + - oldskb->len - ip_hdrlen(oldskb) - - (oth->doff << 2)); - tcph->ack = 1; - } - - tcph->rst = 1; - tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, - niph->daddr, 0); - nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (unsigned char *)tcph - nskb->head; - nskb->csum_offset = offsetof(struct tcphdr, check); - - /* ip_route_me_harder expects skb->dst to be set */ - skb_dst_set_noref(nskb, skb_dst(oldskb)); - - nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, RTN_UNSPEC)) - goto free_nskb; - - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - - /* "Never happens" */ - if (nskb->len > dst_mtu(skb_dst(nskb))) - goto free_nskb; - - nf_ct_attach(nskb, oldskb); - -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - /* If we use ip_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - niph->tot_len = htons(nskb->len); - ip_send_check(niph); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - goto free_nskb; - dev_queue_xmit(nskb); - } else -#endif - ip_local_out(nskb); - - return; - - free_nskb: - kfree_skb(nskb); -} - +void nf_send_reset(struct sk_buff *oldskb, int hook); #endif /* _IPV4_NF_REJECT_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 345242a79db6..4c019d5c3f57 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -61,8 +61,13 @@ config NFT_CHAIN_ROUTE_IPV4 fields such as the source, destination, type of service and the packet mark. +config NF_REJECT_IPV4 + tristate "IPv4 packet rejection" + default m if NETFILTER_ADVANCED=n + config NFT_REJECT_IPV4 depends on NF_TABLES_IPV4 + select NF_REJECT_IPV4 default NFT_REJECT tristate @@ -208,6 +213,7 @@ config IP_NF_FILTER config IP_NF_TARGET_REJECT tristate "REJECT target support" depends on IP_NF_FILTER + select NF_REJECT_IPV4 default m if NETFILTER_ADVANCED=n help The REJECT target allows a filtering rule to specify that an ICMP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 14488cc5fd2c..f4cef5af0969 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -23,6 +23,9 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o +# reject +obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c new file mode 100644 index 000000000000..b023b4eb1a96 --- /dev/null +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -0,0 +1,127 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +/* Send RST reply */ +void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _otcph, *tcph; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), + sizeof(_otcph), &_otcph); + if (oth == NULL) + return; + + /* No RST for RST. */ + if (oth->rst) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + /* Check checksum */ + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) + return; + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + + skb_reset_network_header(nskb); + niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); + niph->version = 4; + niph->ihl = sizeof(struct iphdr) / 4; + niph->tos = 0; + niph->id = 0; + niph->frag_off = htons(IP_DF); + niph->protocol = IPPROTO_TCP; + niph->check = 0; + niph->saddr = oiph->daddr; + niph->daddr = oiph->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + memset(tcph, 0, sizeof(*tcph)); + tcph->source = oth->dest; + tcph->dest = oth->source; + tcph->doff = sizeof(struct tcphdr) / 4; + + if (oth->ack) + tcph->seq = oth->ack_seq; + else { + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + oldskb->len - ip_hdrlen(oldskb) - + (oth->doff << 2)); + tcph->ack = 1; + } + + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + /* ip_route_me_harder expects skb->dst to be set */ + skb_dst_set_noref(nskb, skb_dst(oldskb)); + + nskb->protocol = htons(ETH_P_IP); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); + + /* "Never happens" */ + if (nskb->len > dst_mtu(skb_dst(nskb))) + goto free_nskb; + + nf_ct_attach(nskb, oldskb); + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + goto free_nskb; + dev_queue_xmit(nskb); + } else +#endif + ip_local_out(nskb); + + return; + + free_nskb: + kfree_skb(nskb); +} +EXPORT_SYMBOL_GPL(nf_send_reset); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index bb1a40db7be1..6af874fc187f 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -40,8 +40,13 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. +config NF_REJECT_IPV6 + tristate "IPv6 packet rejection" + default m if NETFILTER_ADVANCED=n + config NFT_REJECT_IPV6 depends on NF_TABLES_IPV6 + select NF_REJECT_IPV6 default NFT_REJECT tristate @@ -208,6 +213,7 @@ config IP6_NF_FILTER config IP6_NF_TARGET_REJECT tristate "REJECT target support" depends on IP6_NF_FILTER + select NF_REJECT_IPV6 default m if NETFILTER_ADVANCED=n help The REJECT target allows a filtering rule to specify that an ICMPv6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 0f7e5b3f328d..fbb25f01143c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,9 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o +# reject +obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o + # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c new file mode 100644 index 000000000000..5f5f0438d74d --- /dev/null +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -0,0 +1,163 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; + struct dst_entry *dst = NULL; + u8 proto; + __be16 frag_off; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + pr_debug("Cannot get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP, " + "or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + pr_debug("RST is set\n"); + return; + } + + /* Check checksum. */ + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { + pr_debug("TCP checksum is invalid\n"); + return; + } + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + + skb_put(nskb, sizeof(struct ipv6hdr)); + skb_reset_network_header(nskb); + ip6h = ipv6_hdr(nskb); + ip6_flow_hdr(ip6h, tclass, 0); + ip6h->hop_limit = ip6_dst_hoplimit(dst); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->saddr = oip6h->daddr; + ip6h->daddr = oip6h->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, + &ipv6_hdr(nskb)->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial(tcph, + sizeof(struct tcphdr), 0)); + + nf_ct_attach(nskb, oldskb); + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip6_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + nskb->protocol = htons(ETH_P_IPV6); + ip6h->payload_len = htons(sizeof(struct tcphdr)); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + return; + dev_queue_xmit(nskb); + } else +#endif + ip6_local_out(nskb); +} +EXPORT_SYMBOL_GPL(nf_send_reset6); From 1109a90c01177e8f4a5fd95c5b685ad02f1fe9bb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Oct 2014 11:19:17 +0200 Subject: [PATCH 3/9] netfilter: use IS_ENABLED(CONFIG_BRIDGE_NETFILTER) In 34666d4 ("netfilter: bridge: move br_netfilter out of the core"), the bridge netfilter code has been modularized. Use IS_ENABLED instead of ifdef to cover the module case. Fixes: 34666d4 ("netfilter: bridge: move br_netfilter out of the core") Signed-off-by: Pablo Neira Ayuso --- net/core/skbuff.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/netfilter/ipt_REJECT.c | 2 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 2 +- net/netfilter/ipset/ip_set_hash_netiface.c | 4 ++-- net/netfilter/nf_log_common.c | 2 +- net/netfilter/nf_queue.c | 4 ++-- net/netfilter/nfnetlink_log.c | 8 ++++---- net/netfilter/nfnetlink_queue_core.c | 12 ++++++------ 10 files changed, 20 insertions(+), 20 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4be570a4ab21..7de3d679f3e0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -566,7 +566,7 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb->nfct); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); #endif /* XXX: IS this still necessary? - JHS */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c8fa62476461..e35b71289156 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -516,7 +516,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) hlen = iph->ihl * 4; mtu = mtu - hlen; /* Size of data space */ -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (skb->nf_bridge) mtu -= nf_bridge_mtu_reduction(skb); #endif diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 5b6e0df4ccff..8f48f5517e33 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -20,7 +20,7 @@ #include #include #include -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include #endif diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 76bd1aef257f..7e5ca6f2d0cd 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -50,7 +50,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 7b9a748c6bac..e70382e4dfb5 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -40,7 +40,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 03cdb69ac9bf..35dd35873442 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -237,7 +237,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct nf_bridge_info *nf_bridge = skb->nf_bridge; if (!nf_bridge) @@ -474,7 +474,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, ip6_netmask(&e.ip, e.cidr); if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct nf_bridge_info *nf_bridge = skb->nf_bridge; if (!nf_bridge) diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index eeb8ef4ff1a3..a2233e77cf39 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -158,7 +158,7 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (skb->nf_bridge) { const struct net_device *physindev; const struct net_device *physoutdev; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5d24b1fdb593..4c8b68e5fa16 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -52,7 +52,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry) dev_put(entry->indev); if (entry->outdev) dev_put(entry->outdev); -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; @@ -77,7 +77,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) dev_hold(entry->indev); if (entry->outdev) dev_hold(entry->outdev); -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; struct net_device *physdev; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index a11c5ff2f720..b1e3a0579416 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -36,7 +36,7 @@ #include -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include "../bridge/br_private.h" #endif @@ -429,7 +429,7 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; if (indev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; @@ -460,7 +460,7 @@ __build_packet_message(struct nfnl_log_net *log, } if (outdev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; @@ -640,7 +640,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 108120f216b1..a82077d9f59b 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -36,7 +36,7 @@ #include -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include "../bridge/br_private.h" #endif @@ -302,7 +302,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif @@ -380,7 +380,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, indev = entry->indev; if (indev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; #else @@ -410,7 +410,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } if (outdev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; #else @@ -569,7 +569,7 @@ nf_queue_entry_dup(struct nf_queue_entry *e) return NULL; } -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* When called from bridge netfilter, skb->data must point to MAC header * before calling skb_gso_segment(). Else, original MAC header is lost * and segmented skbs will be sent to wrong destination. @@ -763,7 +763,7 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) if (entry->outdev) if (entry->outdev->ifindex == ifindex) return 1; -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { if (entry->skb->nf_bridge->physindev && entry->skb->nf_bridge->physindev->ifindex == ifindex) From 1b1bc49c0fc0501bf0d1366a2a5e5c1f8dcf9cb1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Oct 2014 13:53:20 +0200 Subject: [PATCH 4/9] netfilter: nf_tables: wait for call_rcu completion on module removal Make sure the objects have been released before the nf_tables modules is removed. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 19e79f0d9ad2..556a0dfa4abc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4163,6 +4163,7 @@ static void __exit nf_tables_module_exit(void) { unregister_pernet_subsys(&nf_tables_net_ops); nfnetlink_subsys_unregister(&nf_tables_subsys); + rcu_barrier(); nf_tables_core_module_exit(); kfree(info); } From 756c1b1a7f20a42a559b40b3b77db5afcbb719d6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 17 Jun 2014 21:18:44 +0200 Subject: [PATCH 5/9] netfilter: nft_compat: remove incomplete 32/64 bits arch compat code This code was based on the wrong asumption that you can probe based on the match/target private size that we get from userspace. This doesn't work at all when you have to dump the info back to userspace since you don't know what word size the userspace utility is using. Currently, the extensions that require arch compat are limit match and the ebt_mark match/target. The standard targets are not used by the nft-xt compat layer, so they are not affected. We can work around this limitation with a new revision that uses arch agnostic types. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 114 +++++-------------------------------- 1 file changed, 14 insertions(+), 100 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 1840989092ed..7e2683c8a44a 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -101,26 +101,12 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, static void target_compat_from_user(struct xt_target *t, void *in, void *out) { -#ifdef CONFIG_COMPAT - if (t->compat_from_user) { - int pad; + int pad; - t->compat_from_user(out, in); - pad = XT_ALIGN(t->targetsize) - t->targetsize; - if (pad > 0) - memset(out + t->targetsize, 0, pad); - } else -#endif - memcpy(out, in, XT_ALIGN(t->targetsize)); -} - -static inline int nft_compat_target_offset(struct xt_target *target) -{ -#ifdef CONFIG_COMPAT - return xt_compat_target_offset(target); -#else - return 0; -#endif + memcpy(out, in, t->targetsize); + pad = XT_ALIGN(t->targetsize) - t->targetsize; + if (pad > 0) + memset(out + t->targetsize, 0, pad); } static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { @@ -208,34 +194,6 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(target->me); } -static int -target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in) -{ - int ret; - -#ifdef CONFIG_COMPAT - if (t->compat_to_user) { - mm_segment_t old_fs; - void *out; - - out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC); - if (out == NULL) - return -ENOMEM; - - /* We want to reuse existing compat_to_user */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - t->compat_to_user(out, in); - set_fs(old_fs); - ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out); - kfree(out); - } else -#endif - ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in); - - return ret; -} - static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct xt_target *target = expr->ops->data; @@ -243,7 +201,7 @@ static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || - target_dump_info(skb, target, info)) + nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info)) goto nla_put_failure; return 0; @@ -341,17 +299,12 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, static void match_compat_from_user(struct xt_match *m, void *in, void *out) { -#ifdef CONFIG_COMPAT - if (m->compat_from_user) { - int pad; + int pad; - m->compat_from_user(out, in); - pad = XT_ALIGN(m->matchsize) - m->matchsize; - if (pad > 0) - memset(out + m->matchsize, 0, pad); - } else -#endif - memcpy(out, in, XT_ALIGN(m->matchsize)); + memcpy(out, in, m->matchsize); + pad = XT_ALIGN(m->matchsize) - m->matchsize; + if (pad > 0) + memset(out + m->matchsize, 0, pad); } static int @@ -404,43 +357,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(match->me); } -static int -match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in) -{ - int ret; - -#ifdef CONFIG_COMPAT - if (m->compat_to_user) { - mm_segment_t old_fs; - void *out; - - out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC); - if (out == NULL) - return -ENOMEM; - - /* We want to reuse existing compat_to_user */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - m->compat_to_user(out, in); - set_fs(old_fs); - ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out); - kfree(out); - } else -#endif - ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in); - - return ret; -} - -static inline int nft_compat_match_offset(struct xt_match *match) -{ -#ifdef CONFIG_COMPAT - return xt_compat_match_offset(match); -#else - return 0; -#endif -} - static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) { void *info = nft_expr_priv(expr); @@ -448,7 +364,7 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || - match_dump_info(skb, match, info)) + nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info)) goto nla_put_failure; return 0; @@ -643,8 +559,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-ENOMEM); nft_match->ops.type = &nft_match_type; - nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) + - nft_compat_match_offset(match)); + nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; @@ -714,8 +629,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-ENOMEM); nft_target->ops.type = &nft_target_type; - nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) + - nft_compat_target_offset(target)); + nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.eval = nft_target_eval; nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; From 36d2af5998258344993dd43729997a7a3baa9d99 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Oct 2014 20:34:37 +0200 Subject: [PATCH 6/9] netfilter: nf_tables: allow to filter from prerouting and postrouting This allows us to emulate the NAT table in ebtables, which is actually a plain filter chain that hooks at prerouting, output and postrouting. Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_tables_bridge.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 5bcc0d8b31f2..da17a5eab8b4 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -34,9 +34,11 @@ static struct nft_af_info nft_af_bridge __read_mostly = { .owner = THIS_MODULE, .nops = 1, .hooks = { + [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, [NF_BR_LOCAL_IN] = nft_do_chain_bridge, [NF_BR_FORWARD] = nft_do_chain_bridge, [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + [NF_BR_POST_ROUTING] = nft_do_chain_bridge, }, }; From 4b7fd5d97ee6e599247b4a55122ca6ba80c8148d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 2 Oct 2014 11:13:21 +0200 Subject: [PATCH 7/9] netfilter: explicit module dependency between br_netfilter and physdev You can use physdev to match the physical interface enslaved to the bridge device. This information is stored in skb->nf_bridge and it is set up by br_netfilter. So, this is only available when iptables is used from the bridge netfilter path. Since 34666d4 ("netfilter: bridge: move br_netfilter out of the core"), the br_netfilter code is modular. To reduce the impact of this change, we can autoload the br_netfilter if the physdev match is used since we assume that the users need br_netfilter in place. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/br_netfilter.h | 6 ++++++ net/bridge/br_netfilter.c | 5 +++++ net/netfilter/xt_physdev.c | 3 +++ 3 files changed, 14 insertions(+) create mode 100644 include/net/netfilter/br_netfilter.h diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h new file mode 100644 index 000000000000..2aa6048a55c1 --- /dev/null +++ b/include/net/netfilter/br_netfilter.h @@ -0,0 +1,6 @@ +#ifndef _BR_NETFILTER_H_ +#define _BR_NETFILTER_H_ + +void br_netfilter_enable(void); + +#endif /* _BR_NETFILTER_H_ */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 97e43937aaca..fa1270cc5086 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -856,6 +856,11 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, return NF_ACCEPT; } +void br_netfilter_enable(void) +{ +} +EXPORT_SYMBOL_GPL(br_netfilter_enable); + /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static struct nf_hook_ops br_nf_ops[] __read_mostly = { diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index d7ca16b8b8df..f440f57a452f 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -13,6 +13,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer "); @@ -87,6 +88,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; + br_netfilter_enable(); + if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return -EINVAL; From 07dcc686fa8f6667dec4696804cdb43a90267b9a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 30 Sep 2014 10:50:06 +0900 Subject: [PATCH 8/9] ipvs: Clean up comment style in ip_vs.h * Consistently use the multi-line comment style for networking code: /* This * That * The other thing */ * Use single-line comment style for comments with only one line of text. * In general follow the leading '*' of each line of a comment with a single space and then text. * Add missing line break between functions, remove double line break, align comments to previous lines whenever possible. Reported-by: Sergei Shtylyov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 214 ++++++++++++++++---------------------------- 1 file changed, 75 insertions(+), 139 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 576d7f0bed5d..615b20b58545 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1,6 +1,5 @@ -/* - * IP Virtual Server - * data structure and functionality definitions +/* IP Virtual Server + * data structure and functionality definitions */ #ifndef _NET_IP_VS_H @@ -12,7 +11,7 @@ #include /* for struct list_head */ #include /* for struct rwlock_t */ -#include /* for struct atomic_t */ +#include /* for struct atomic_t */ #include #include #include @@ -30,15 +29,13 @@ #endif #include /* Netw namespace */ -/* - * Generic access of ipvs struct - */ +/* Generic access of ipvs struct */ static inline struct netns_ipvs *net_ipvs(struct net* net) { return net->ipvs; } -/* - * Get net ptr from skb in traffic cases + +/* Get net ptr from skb in traffic cases * use skb_sknet when call is from userland (ioctl or netlink) */ static inline struct net *skb_net(const struct sk_buff *skb) @@ -90,8 +87,8 @@ static inline struct net *skb_sknet(const struct sk_buff *skb) return &init_net; #endif } -/* - * This one needed for single_open_net since net is stored directly in + +/* This one needed for single_open_net since net is stored directly in * private not as a struct i.e. seq_file_net can't be used. */ static inline struct net *seq_file_single_net(struct seq_file *seq) @@ -108,7 +105,7 @@ extern int ip_vs_conn_tab_size; struct ip_vs_iphdr { __u32 len; /* IPv4 simply where L4 starts - IPv6 where L4 Transport Header starts */ + * IPv6 where L4 Transport Header starts */ __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ __s16 protocol; __s32 flags; @@ -304,16 +301,11 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, #define LeaveFunction(level) do {} while (0) #endif - -/* - * The port number of FTP service (in network order). - */ +/* The port number of FTP service (in network order). */ #define FTPPORT cpu_to_be16(21) #define FTPDATA cpu_to_be16(20) -/* - * TCP State Values - */ +/* TCP State Values */ enum { IP_VS_TCP_S_NONE = 0, IP_VS_TCP_S_ESTABLISHED, @@ -329,25 +321,19 @@ enum { IP_VS_TCP_S_LAST }; -/* - * UDP State Values - */ +/* UDP State Values */ enum { IP_VS_UDP_S_NORMAL, IP_VS_UDP_S_LAST, }; -/* - * ICMP State Values - */ +/* ICMP State Values */ enum { IP_VS_ICMP_S_NORMAL, IP_VS_ICMP_S_LAST, }; -/* - * SCTP State Values - */ +/* SCTP State Values */ enum ip_vs_sctp_states { IP_VS_SCTP_S_NONE, IP_VS_SCTP_S_INIT1, @@ -366,21 +352,18 @@ enum ip_vs_sctp_states { IP_VS_SCTP_S_LAST }; -/* - * Delta sequence info structure - * Each ip_vs_conn has 2 (output AND input seq. changes). - * Only used in the VS/NAT. +/* Delta sequence info structure + * Each ip_vs_conn has 2 (output AND input seq. changes). + * Only used in the VS/NAT. */ struct ip_vs_seq { __u32 init_seq; /* Add delta from this seq */ __u32 delta; /* Delta in sequence numbers */ __u32 previous_delta; /* Delta in sequence numbers - before last resized pkt */ + * before last resized pkt */ }; -/* - * counters per cpu - */ +/* counters per cpu */ struct ip_vs_counters { __u32 conns; /* connections scheduled */ __u32 inpkts; /* incoming packets */ @@ -388,17 +371,13 @@ struct ip_vs_counters { __u64 inbytes; /* incoming bytes */ __u64 outbytes; /* outgoing bytes */ }; -/* - * Stats per cpu - */ +/* Stats per cpu */ struct ip_vs_cpu_stats { struct ip_vs_counters ustats; struct u64_stats_sync syncp; }; -/* - * IPVS statistics objects - */ +/* IPVS statistics objects */ struct ip_vs_estimator { struct list_head list; @@ -491,9 +470,7 @@ struct ip_vs_protocol { void (*timeout_change)(struct ip_vs_proto_data *pd, int flags); }; -/* - * protocol data per netns - */ +/* protocol data per netns */ struct ip_vs_proto_data { struct ip_vs_proto_data *next; struct ip_vs_protocol *pp; @@ -520,9 +497,7 @@ struct ip_vs_conn_param { __u8 pe_data_len; }; -/* - * IP_VS structure allocated for each dynamically scheduled connection - */ +/* IP_VS structure allocated for each dynamically scheduled connection */ struct ip_vs_conn { struct hlist_node c_list; /* hashed list heads */ /* Protocol, addresses and port numbers */ @@ -561,17 +536,18 @@ struct ip_vs_conn { struct ip_vs_dest *dest; /* real server */ atomic_t in_pkts; /* incoming packet counter */ - /* packet transmitter for different forwarding methods. If it - mangles the packet, it must return NF_DROP or better NF_STOLEN, - otherwise this must be changed to a sk_buff **. - NF_ACCEPT can be returned when destination is local. + /* Packet transmitter for different forwarding methods. If it + * mangles the packet, it must return NF_DROP or better NF_STOLEN, + * otherwise this must be changed to a sk_buff **. + * NF_ACCEPT can be returned when destination is local. */ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); /* Note: we can group the following members into a structure, - in order to save more space, and the following members are - only used in VS/NAT anyway */ + * in order to save more space, and the following members are + * only used in VS/NAT anyway + */ struct ip_vs_app *app; /* bound ip_vs_app object */ void *app_data; /* Application private data */ struct ip_vs_seq in_seq; /* incoming seq. struct */ @@ -584,9 +560,7 @@ struct ip_vs_conn { struct rcu_head rcu_head; }; -/* - * To save some memory in conn table when name space is disabled. - */ +/* To save some memory in conn table when name space is disabled. */ static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) { #ifdef CONFIG_NET_NS @@ -595,6 +569,7 @@ static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) return &init_net; #endif } + static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net) { #ifdef CONFIG_NET_NS @@ -612,13 +587,12 @@ static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp, #endif } -/* - * Extended internal versions of struct ip_vs_service_user and - * ip_vs_dest_user for IPv6 support. +/* Extended internal versions of struct ip_vs_service_user and ip_vs_dest_user + * for IPv6 support. * - * We need these to conveniently pass around service and destination - * options, but unfortunately, we also need to keep the old definitions to - * maintain userspace backwards compatibility for the setsockopt interface. + * We need these to conveniently pass around service and destination + * options, but unfortunately, we also need to keep the old definitions to + * maintain userspace backwards compatibility for the setsockopt interface. */ struct ip_vs_service_user_kern { /* virtual service addresses */ @@ -656,8 +630,8 @@ struct ip_vs_dest_user_kern { /* - * The information about the virtual service offered to the net - * and the forwarding entries + * The information about the virtual service offered to the net and the + * forwarding entries. */ struct ip_vs_service { struct hlist_node s_list; /* for normal service table */ @@ -697,9 +671,8 @@ struct ip_vs_dest_dst { struct rcu_head rcu_head; }; -/* - * The real server destination forwarding entry - * with ip address, port number, and so on. +/* The real server destination forwarding entry with ip address, port number, + * and so on. */ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ @@ -738,10 +711,7 @@ struct ip_vs_dest { unsigned int in_rs_table:1; /* we are in rs_table */ }; - -/* - * The scheduler object - */ +/* The scheduler object */ struct ip_vs_scheduler { struct list_head n_list; /* d-linked list head */ char *name; /* scheduler name */ @@ -781,9 +751,7 @@ struct ip_vs_pe { int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); }; -/* - * The application module object (a.k.a. app incarnation) - */ +/* The application module object (a.k.a. app incarnation) */ struct ip_vs_app { struct list_head a_list; /* member in app list */ int type; /* IP_VS_APP_TYPE_xxx */ @@ -799,16 +767,14 @@ struct ip_vs_app { atomic_t usecnt; /* usage counter */ struct rcu_head rcu_head; - /* - * output hook: Process packet in inout direction, diff set for TCP. + /* output hook: Process packet in inout direction, diff set for TCP. * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, * 2=Mangled but checksum was not updated */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); - /* - * input hook: Process packet in outin direction, diff set for TCP. + /* input hook: Process packet in outin direction, diff set for TCP. * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, * 2=Mangled but checksum was not updated */ @@ -867,9 +833,7 @@ struct ipvs_master_sync_state { struct netns_ipvs { int gen; /* Generation */ int enable; /* enable like nf_hooks do */ - /* - * Hash table: for real service lookups - */ + /* Hash table: for real service lookups */ #define IP_VS_RTAB_BITS 4 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) @@ -903,7 +867,7 @@ struct netns_ipvs { struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; #endif /* ip_vs_conn */ - atomic_t conn_count; /* connection counter */ + atomic_t conn_count; /* connection counter */ /* ip_vs_ctl */ struct ip_vs_stats tot_stats; /* Statistics & est. */ @@ -990,9 +954,9 @@ struct netns_ipvs { char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; /* net name space ptr */ struct net *net; /* Needed by timer routines */ - /* Number of heterogeneous destinations, needed because - * heterogeneous are not supported when synchronization is - * enabled */ + /* Number of heterogeneous destinations, needed becaus heterogeneous + * are not supported when synchronization is enabled. + */ unsigned int mixed_address_family_dests; }; @@ -1147,9 +1111,8 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs) #endif -/* - * IPVS core functions - * (from ip_vs_core.c) +/* IPVS core functions + * (from ip_vs_core.c) */ const char *ip_vs_proto_name(unsigned int proto); void ip_vs_init_hash_table(struct list_head *table, int rows); @@ -1157,11 +1120,9 @@ void ip_vs_init_hash_table(struct list_head *table, int rows); #define IP_VS_APP_TYPE_FTP 1 -/* - * ip_vs_conn handling functions - * (from ip_vs_conn.c) +/* ip_vs_conn handling functions + * (from ip_vs_conn.c) */ - enum { IP_VS_DIR_INPUT = 0, IP_VS_DIR_OUTPUT, @@ -1292,9 +1253,7 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) atomic_inc(&ctl_cp->n_control); } -/* - * IPVS netns init & cleanup functions - */ +/* IPVS netns init & cleanup functions */ int ip_vs_estimator_net_init(struct net *net); int ip_vs_control_net_init(struct net *net); int ip_vs_protocol_net_init(struct net *net); @@ -1309,9 +1268,8 @@ void ip_vs_estimator_net_cleanup(struct net *net); void ip_vs_sync_net_cleanup(struct net *net); void ip_vs_service_net_cleanup(struct net *net); -/* - * IPVS application functions - * (from ip_vs_app.c) +/* IPVS application functions + * (from ip_vs_app.c) */ #define IP_VS_APP_MAX_PORTS 8 struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app); @@ -1331,9 +1289,7 @@ int unregister_ip_vs_pe(struct ip_vs_pe *pe); struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name); -/* - * Use a #define to avoid all of module.h just for these trivial ops - */ +/* Use a #define to avoid all of module.h just for these trivial ops */ #define ip_vs_pe_get(pe) \ if (pe && pe->module) \ __module_get(pe->module); @@ -1342,9 +1298,7 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name); if (pe && pe->module) \ module_put(pe->module); -/* - * IPVS protocol functions (from ip_vs_proto.c) - */ +/* IPVS protocol functions (from ip_vs_proto.c) */ int ip_vs_protocol_init(void); void ip_vs_protocol_cleanup(void); void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); @@ -1362,9 +1316,8 @@ extern struct ip_vs_protocol ip_vs_protocol_esp; extern struct ip_vs_protocol ip_vs_protocol_ah; extern struct ip_vs_protocol ip_vs_protocol_sctp; -/* - * Registering/unregistering scheduler functions - * (from ip_vs_sched.c) +/* Registering/unregistering scheduler functions + * (from ip_vs_sched.c) */ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); @@ -1383,10 +1336,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg); - -/* - * IPVS control data and functions (from ip_vs_ctl.c) - */ +/* IPVS control data and functions (from ip_vs_ctl.c) */ extern struct ip_vs_stats ip_vs_stats; extern int sysctl_ip_vs_sync_ver; @@ -1427,26 +1377,21 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) kfree(dest); } -/* - * IPVS sync daemon data and function prototypes - * (from ip_vs_sync.c) +/* IPVS sync daemon data and function prototypes + * (from ip_vs_sync.c) */ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid); int stop_sync_thread(struct net *net, int state); void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); -/* - * IPVS rate estimator prototypes (from ip_vs_est.c) - */ +/* IPVS rate estimator prototypes (from ip_vs_est.c) */ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); void ip_vs_read_estimator(struct ip_vs_stats_user *dst, struct ip_vs_stats *stats); -/* - * Various IPVS packet transmitters (from ip_vs_xmit.c) - */ +/* Various IPVS packet transmitters (from ip_vs_xmit.c) */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, @@ -1477,12 +1422,10 @@ int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, #endif #ifdef CONFIG_SYSCTL -/* - * This is a simple mechanism to ignore packets when - * we are loaded. Just set ip_vs_drop_rate to 'n' and - * we start to drop 1/rate of the packets +/* This is a simple mechanism to ignore packets when + * we are loaded. Just set ip_vs_drop_rate to 'n' and + * we start to drop 1/rate of the packets */ - static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { if (!ipvs->drop_rate) @@ -1496,9 +1439,7 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs) static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } #endif -/* - * ip_vs_fwd_tag returns the forwarding tag of the connection - */ +/* ip_vs_fwd_tag returns the forwarding tag of the connection */ #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) @@ -1557,9 +1498,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } -/* - * Forget current conntrack (unconfirmed) and attach notrack entry - */ +/* Forget current conntrack (unconfirmed) and attach notrack entry */ static inline void ip_vs_notrack(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -1576,9 +1515,8 @@ static inline void ip_vs_notrack(struct sk_buff *skb) } #ifdef CONFIG_IP_VS_NFCT -/* - * Netfilter connection tracking - * (from ip_vs_nfct.c) +/* Netfilter connection tracking + * (from ip_vs_nfct.c) */ static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) { @@ -1617,14 +1555,12 @@ static inline int ip_vs_confirm_conntrack(struct sk_buff *skb) static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) { } -/* CONFIG_IP_VS_NFCT */ -#endif +#endif /* CONFIG_IP_VS_NFCT */ static inline int ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) { - /* - * We think the overhead of processing active connections is 256 + /* We think the overhead of processing active connections is 256 * times higher than that of inactive connections in average. (This * 256 times might not be accurate, we will change it later) We * use the following formula to estimate the overhead now: From 8da4cc1b10c1aeba090d1d862b17174e4dbd50a4 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Fri, 3 Oct 2014 14:13:36 +0200 Subject: [PATCH 9/9] netfilter: nft_masq: register/unregister notifiers on module init/exit We have to register the notifiers in the masquerade expression from the the module _init and _exit path. This fixes crashes when removing the masquerade rule with no ipt_MASQUERADE support in place (which was masking the problem). Fixes: 9ba1f72 ("netfilter: nf_tables: add new nft_masq expression") Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_masq_ipv4.c | 34 ++++++++++-------------------- net/ipv6/netfilter/nft_masq_ipv6.c | 34 ++++++++++-------------------- 2 files changed, 22 insertions(+), 46 deletions(-) diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 6ea1d207b6a5..1c636d6b5b50 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -32,33 +32,12 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = verdict; } -static int nft_masq_ipv4_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - int err; - - err = nft_masq_init(ctx, expr, tb); - if (err < 0) - return err; - - nf_nat_masquerade_ipv4_register_notifier(); - return 0; -} - -static void nft_masq_ipv4_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - nf_nat_masquerade_ipv4_unregister_notifier(); -} - static struct nft_expr_type nft_masq_ipv4_type; static const struct nft_expr_ops nft_masq_ipv4_ops = { .type = &nft_masq_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), .eval = nft_masq_ipv4_eval, - .init = nft_masq_ipv4_init, - .destroy = nft_masq_ipv4_destroy, + .init = nft_masq_init, .dump = nft_masq_dump, }; @@ -73,12 +52,21 @@ static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { static int __init nft_masq_ipv4_module_init(void) { - return nft_register_expr(&nft_masq_ipv4_type); + int ret; + + ret = nft_register_expr(&nft_masq_ipv4_type); + if (ret < 0) + return ret; + + nf_nat_masquerade_ipv4_register_notifier(); + + return ret; } static void __exit nft_masq_ipv4_module_exit(void) { nft_unregister_expr(&nft_masq_ipv4_type); + nf_nat_masquerade_ipv4_unregister_notifier(); } module_init(nft_masq_ipv4_module_init); diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 4e51334ef6b7..556262f40761 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -32,33 +32,12 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = verdict; } -static int nft_masq_ipv6_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - int err; - - err = nft_masq_init(ctx, expr, tb); - if (err < 0) - return err; - - nf_nat_masquerade_ipv6_register_notifier(); - return 0; -} - -static void nft_masq_ipv6_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - nf_nat_masquerade_ipv6_unregister_notifier(); -} - static struct nft_expr_type nft_masq_ipv6_type; static const struct nft_expr_ops nft_masq_ipv6_ops = { .type = &nft_masq_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), .eval = nft_masq_ipv6_eval, - .init = nft_masq_ipv6_init, - .destroy = nft_masq_ipv6_destroy, + .init = nft_masq_init, .dump = nft_masq_dump, }; @@ -73,12 +52,21 @@ static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { static int __init nft_masq_ipv6_module_init(void) { - return nft_register_expr(&nft_masq_ipv6_type); + int ret; + + ret = nft_register_expr(&nft_masq_ipv6_type); + if (ret < 0) + return ret; + + nf_nat_masquerade_ipv6_register_notifier(); + + return ret; } static void __exit nft_masq_ipv6_module_exit(void) { nft_unregister_expr(&nft_masq_ipv6_type); + nf_nat_masquerade_ipv6_unregister_notifier(); } module_init(nft_masq_ipv6_module_init);