Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next
tree:

1) No need to set ttl from reject action for the bridge family, from
   Taehee Yoo.

2) Use a fixed timeout for flow that are passed up from the flowtable
   to conntrack, from Florian Westphal.

3) More preparation patches for tproxy support for nf_tables, from Mate
   Eckl.

4) Remove unnecessary indirection in core IPv6 checksum function, from
   Florian Westphal.

5) Use nf_ct_get_tuplepr() from openvswitch, instead of opencoding it.
   From Florian Westphal.

6) socket match now selects socket infrastructure, instead of depending
   on it. From Mate Eckl.

7) Patch series to simplify conntrack tuple building/parsing from packet
   path and ctnetlink, from Florian Westphal.

8) Fetch timeout policy from protocol helpers, instead of doing it from
   core, from Florian Westphal.

9) Merge IPv4 and IPv6 protocol trackers into conntrack core, from
   Florian Westphal.

10) Depend on CONFIG_NF_TABLES_IPV6 and CONFIG_IP6_NF_IPTABLES
    respectively, instead of IPV6. Patch from Mate Eckl.

11) Add specific function for garbage collection in conncount,
    from Yi-Hung Wei.

12) Catch number of elements in the connlimit list, from Yi-Hung Wei.

13) Move locking to nf_conncount, from Yi-Hung Wei.

14) Series of patches to add lockless tree traversal in nf_conncount,
    from Yi-Hung Wei.

15) Resolve clash in matching conntracks when race happens, from
    Martynas Pumputis.

16) If connection entry times out, remove template entry from the
    ip_vs_conn_tab table to improve behaviour under flood, from
    Julian Anastasov.

17) Remove useless parameter from nf_ct_helper_ext_add(), from Gao feng.

18) Call abort from 2-phase commit protocol before requesting modules,
    make sure this is done under the mutex, from Florian Westphal.

19) Grab module reference when starting transaction, also from Florian.

20) Dynamically allocate expression info array for pre-parsing, from
    Florian.

21) Add per netns mutex for nf_tables, from Florian Westphal.

22) A couple of patches to simplify and refactor nf_osf code to prepare
    for nft_osf support.

23) Break evaluation on missing socket, from Mate Eckl.

24) Allow to match socket mark from nft_socket, from Mate Eckl.

25) Remove dependency on nf_defrag_ipv6, now that IPv6 tracker is
    built-in into nf_conntrack. From Florian Westphal.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-07-20 22:28:28 -07:00
commit 99d20a461c
72 changed files with 2035 additions and 2414 deletions

View File

@ -29,6 +29,7 @@ struct nfnetlink_subsystem {
__u8 subsys_id; /* nfnetlink subsystem ID */
__u8 cb_count; /* number of callbacks */
const struct nfnl_callback *cb; /* callback for individual types */
struct module *owner;
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb);
void (*cleanup)(struct net *net);

View File

@ -23,9 +23,6 @@ struct nf_queue_entry;
#ifdef CONFIG_INET
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol);
int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict);
int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
@ -35,14 +32,6 @@ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
{
return 0;
}
static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
unsigned int hook,
unsigned int dataoff,
unsigned int len,
u_int8_t protocol)
{
return 0;
}
static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
struct flowi *fl, bool strict)
{

View File

@ -30,11 +30,6 @@ struct nf_ipv6_ops {
void (*route_input)(struct sk_buff *skb);
int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
__sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol);
int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict);
int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);

View File

@ -335,6 +335,11 @@ enum ip_vs_sctp_states {
IP_VS_SCTP_S_LAST
};
/* Connection templates use bits from state */
#define IP_VS_CTPL_S_NONE 0x0000
#define IP_VS_CTPL_S_ASSURED 0x0001
#define IP_VS_CTPL_S_LAST 0x0002
/* Delta sequence info structure
* Each ip_vs_conn has 2 (output AND input seq. changes).
* Only used in the VS/NAT.
@ -1221,7 +1226,7 @@ struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
struct ip_vs_dest *dest, __u32 fwmark);
void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
const char *ip_vs_state_name(__u16 proto, int state);
const char *ip_vs_state_name(const struct ip_vs_conn *cp);
void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
@ -1289,6 +1294,17 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
atomic_inc(&ctl_cp->n_control);
}
/* Mark our template as assured */
static inline void
ip_vs_control_assure_ct(struct ip_vs_conn *cp)
{
struct ip_vs_conn *ct = cp->control;
if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
(ct->flags & IP_VS_CONN_F_TEMPLATE))
ct->state |= IP_VS_CTPL_S_ASSURED;
}
/* IPVS netns init & cleanup functions */
int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
int ip_vs_control_net_init(struct netns_ipvs *ipvs);

View File

@ -574,34 +574,6 @@ static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
}
#endif
struct inet_frag_queue;
enum ip6_defrag_users {
IP6_DEFRAG_LOCAL_DELIVER,
IP6_DEFRAG_CONNTRACK_IN,
__IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_OUT,
__IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
};
void ip6_frag_init(struct inet_frag_queue *q, const void *a);
extern const struct rhashtable_params ip6_rhash_params;
/*
* Equivalent of ipv4 struct ip
*/
struct frag_queue {
struct inet_frag_queue q;
int iif;
__u16 nhoffset;
u8 ecn;
};
void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
static inline bool ipv6_addr_any(const struct in6_addr *a)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64

104
include/net/ipv6_frag.h Normal file
View File

@ -0,0 +1,104 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _IPV6_FRAG_H
#define _IPV6_FRAG_H
#include <linux/kernel.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
enum ip6_defrag_users {
IP6_DEFRAG_LOCAL_DELIVER,
IP6_DEFRAG_CONNTRACK_IN,
__IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_OUT,
__IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
};
/*
* Equivalent of ipv4 struct ip
*/
struct frag_queue {
struct inet_frag_queue q;
int iif;
__u16 nhoffset;
u8 ecn;
};
#if IS_ENABLED(CONFIG_IPV6)
static inline void ip6frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
const struct frag_v6_compare_key *key = a;
q->key.v6 = *key;
fq->ecn = 0;
}
static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed)
{
return jhash2(data,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed)
{
const struct inet_frag_queue *fq = data;
return jhash2((const u32 *)&fq->key.v6,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static inline int
ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
{
const struct frag_v6_compare_key *key = arg->key;
const struct inet_frag_queue *fq = ptr;
return !!memcmp(&fq->key, key, sizeof(*key));
}
static inline void
ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
struct sk_buff *head;
rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q);
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
head = fq->q.fragments;
if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
goto out;
head->dev = dev;
skb_get(head);
spin_unlock(&fq->q.lock);
icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
kfree_skb(head);
goto out_rcu_unlock;
out:
spin_unlock(&fq->q.lock);
out_rcu_unlock:
rcu_read_unlock();
inet_frag_put(&fq->q);
}
#endif
#endif

View File

@ -10,9 +10,6 @@
#ifndef _NF_CONNTRACK_IPV4_H
#define _NF_CONNTRACK_IPV4_H
const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;

View File

@ -41,6 +41,11 @@ union nf_conntrack_expect_proto {
/* insert expect proto private data here */
};
struct nf_conntrack_net {
unsigned int users4;
unsigned int users6;
};
#include <linux/types.h>
#include <linux/skbuff.h>

View File

@ -14,7 +14,6 @@
#define _NF_CONNTRACK_CORE_H
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
@ -40,16 +39,8 @@ void nf_conntrack_cleanup_start(void);
void nf_conntrack_init_end(void);
void nf_conntrack_cleanup_end(void);
bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff,
unsigned int dataoff, u_int16_t l3num, u_int8_t protonum,
struct net *net,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
/* Find a connection corresponding to a tuple. */
@ -75,10 +66,8 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
return ret;
}
void
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *proto);
void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *proto);
#define CONNTRACK_LOCKS 1024

View File

@ -1,8 +1,23 @@
#ifndef _NF_CONNTRACK_COUNT_H
#define _NF_CONNTRACK_COUNT_H
#include <linux/list.h>
struct nf_conncount_data;
enum nf_conncount_list_add {
NF_CONNCOUNT_ADDED, /* list add was ok */
NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */
NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */
};
struct nf_conncount_list {
spinlock_t list_lock;
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
bool dead;
};
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
unsigned int keylen);
void nf_conncount_destroy(struct net *net, unsigned int family,
@ -14,15 +29,21 @@ unsigned int nf_conncount_count(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit);
void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit);
bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
void nf_conncount_list_init(struct nf_conncount_list *list);
void nf_conncount_cache_free(struct hlist_head *hhead);
enum nf_conncount_list_add
nf_conncount_add(struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list);
void nf_conncount_cache_free(struct nf_conncount_list *list);
#endif

View File

@ -103,9 +103,7 @@ int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
unsigned int);
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
struct nf_conntrack_helper *helper,
gfp_t gfp);
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
gfp_t flags);

View File

@ -1,84 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C)2003,2004 USAGI/WIDE Project
*
* Header for use in defining a given L3 protocol for connection tracking.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*
* Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
*/
#ifndef _NF_CONNTRACK_L3PROTO_H
#define _NF_CONNTRACK_L3PROTO_H
#include <linux/netlink.h>
#include <net/netlink.h>
#include <linux/seq_file.h>
#include <net/netfilter/nf_conntrack.h>
struct nf_conntrack_l3proto {
/* L3 Protocol Family number. ex) PF_INET */
u_int16_t l3proto;
/* size of tuple nlattr, fills a hole */
u16 nla_size;
/*
* Try to fill in the third arg: nhoff is offset of l3 proto
* hdr. Return true if possible.
*/
bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple);
/*
* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
*/
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
/*
* Called before tracking.
* *dataoff: offset of protocol header (TCP, UDP,...) in skb
* *protonum: protocol number
*/
int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int (*tuple_to_nlattr)(struct sk_buff *skb,
const struct nf_conntrack_tuple *t);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
#endif
/* Called when netns wants to use connection tracking */
int (*net_ns_get)(struct net *);
void (*net_ns_put)(struct net *);
/* Module (if any) which this is connected to. */
struct module *me;
};
extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
/* Protocol global registration. */
int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
/* Existing built-in protocols */
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
static inline struct nf_conntrack_l3proto *
__nf_ct_l3proto_find(u_int16_t l3proto)
{
if (unlikely(l3proto >= NFPROTO_NUMPROTO))
return &nf_conntrack_l3proto_generic;
return rcu_dereference(nf_ct_l3protos[l3proto]);
}
#endif /*_NF_CONNTRACK_L3PROTO_H*/

View File

@ -36,7 +36,7 @@ struct nf_conntrack_l4proto {
struct net *net, struct nf_conntrack_tuple *tuple);
/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
* Only used by icmp, most protocols use a generic version.
*/
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
@ -45,13 +45,12 @@ struct nf_conntrack_l4proto {
int (*packet)(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts);
enum ip_conntrack_info ctinfo);
/* Called when a new connection for this protocol found;
* returns TRUE if it's OK. If so, packet() called next. */
bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts);
unsigned int dataoff);
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *ct);
@ -63,9 +62,6 @@ struct nf_conntrack_l4proto {
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
/* Return the array of timeouts for this protocol. */
unsigned int *(*get_timeouts)(struct net *net);
/* convert protoinfo to nfnetink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
struct nf_conn *ct);
@ -134,10 +130,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
/* Protocol global registration. */
int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
/* Generic netlink helpers */
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,

View File

@ -67,27 +67,17 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
#endif
};
static inline unsigned int *
nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
const struct nf_conntrack_l4proto *l4proto)
static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
{
unsigned int *timeouts = NULL;
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
struct nf_conn_timeout *timeout_ext;
unsigned int *timeouts;
timeout_ext = nf_ct_timeout_find(ct);
if (timeout_ext) {
if (timeout_ext)
timeouts = nf_ct_timeout_data(timeout_ext);
if (unlikely(!timeouts))
timeouts = l4proto->get_timeouts(net);
} else {
timeouts = l4proto->get_timeouts(net);
}
return timeouts;
#else
return l4proto->get_timeouts(net);
#endif
return timeouts;
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT

View File

@ -17,6 +17,14 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
return false;
}
/* assign a socket to the skb -- consumes sk */
static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_edemux;
}
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
/**

View File

@ -7,6 +7,7 @@
struct netns_nftables {
struct list_head tables;
struct list_head commit_list;
struct mutex commit_mutex;
unsigned int base_seq;
u8 gencursor;
u8 validate_state;

View File

@ -16,9 +16,14 @@
#define NF_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */
/* Check if ip TTL is less than fingerprint one */
#define NF_OSF_TTL_LESS 1
/* Do not compare ip and fingerprint TTL at all */
#define NF_OSF_TTL_NOCHECK 2
#define NF_OSF_FLAGMASK (NF_OSF_GENRE | NF_OSF_TTL | \
NF_OSF_LOG | NF_OSF_INVERT)
/* Wildcard MSS (kind of).
* It is used to implement a state machine for the different wildcard values
* of the MSS and window sizes.
@ -83,4 +88,10 @@ enum iana_options {
OSFOPT_EMPTY = 255,
};
enum nf_osf_attr_type {
OSF_ATTR_UNSPEC,
OSF_ATTR_FINGER,
OSF_ATTR_MAX,
};
#endif /* _NF_OSF_H */

View File

@ -921,10 +921,12 @@ enum nft_socket_attributes {
/*
* enum nft_socket_keys - nf_tables socket expression keys
*
* @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_
* @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
* @NFT_SOCKET_MARK: Value of the socket mark
*/
enum nft_socket_keys {
NFT_SOCKET_TRANSPARENT,
NFT_SOCKET_MARK,
__NFT_SOCKET_MAX
};
#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)

View File

@ -37,8 +37,7 @@
#define XT_OSF_TTL_TRUE NF_OSF_TTL_TRUE
#define XT_OSF_TTL_NOCHECK NF_OSF_TTL_NOCHECK
#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */
#define XT_OSF_TTL_LESS NF_OSF_TTL_LESS
#define xt_osf_wc nf_osf_wc
#define xt_osf_opt nf_osf_opt
@ -47,6 +46,7 @@
#define xt_osf_finger nf_osf_finger
#define xt_osf_nlmsg nf_osf_nlmsg
#define xt_osf_attr_type nf_osf_attr_type
/*
* Add/remove fingerprint from the kernel.
*/
@ -56,10 +56,4 @@ enum xt_osf_msg_types {
OSF_MSG_MAX,
};
enum xt_osf_attr_type {
OSF_ATTR_UNSPEC,
OSF_ATTR_FINGER,
OSF_ATTR_MAX,
};
#endif /* _XT_OSF_H */

View File

@ -89,8 +89,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
net->ipv4.sysctl_ip_default_ttl);
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->ttl = net->ipv4.sysctl_ip_default_ttl;
niph->tot_len = htons(nskb->len);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
nft_reject_br_push_etherhdr(oldskb, nskb);

View File

@ -25,7 +25,7 @@
#include <net/ieee802154_netdev.h>
#include <net/6lowpan.h>
#include <net/ipv6.h>
#include <net/ipv6_frag.h>
#include <net/inet_frag.h>
#include "6lowpan_i.h"

View File

@ -98,59 +98,6 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_ip_reroute);
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
break;
if ((protocol == 0 && !csum_fold(skb->csum)) ||
!csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - dataoff, protocol,
skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
/* fall through */
case CHECKSUM_NONE:
if (protocol == 0)
skb->csum = 0;
else
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len - dataoff,
protocol, 0);
csum = __skb_checksum_complete(skb);
}
return csum;
}
EXPORT_SYMBOL(nf_ip_checksum);
__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip_checksum(skb, hook, dataoff, protocol);
/* fall through */
case CHECKSUM_NONE:
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
skb->len - dataoff, 0);
skb->ip_summed = CHECKSUM_NONE;
return __skb_checksum_complete_head(skb, dataoff + len);
}
return csum;
}
EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict __always_unused)
{

View File

@ -9,22 +9,6 @@ config NF_DEFRAG_IPV4
tristate
default n
config NF_CONNTRACK_IPV4
tristate "IPv4 connection tracking support (required for NAT)"
depends on NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_DEFRAG_IPV4
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
This is IPv4 support on Layer 3 independent connection tracking.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
@ -112,7 +96,7 @@ config NF_REJECT_IPV4
config NF_NAT_IPV4
tristate "IPv4 NAT"
depends on NF_CONNTRACK_IPV4
depends on NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_NAT
help
@ -279,7 +263,7 @@ config IP_NF_TARGET_SYNPROXY
# NAT + specific targets: nf_conntrack
config IP_NF_NAT
tristate "iptables NAT support"
depends on NF_CONNTRACK_IPV4
depends on NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NF_NAT_IPV4
@ -340,7 +324,7 @@ config IP_NF_MANGLE
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support"
depends on IP_NF_MANGLE
depends on NF_CONNTRACK_IPV4
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
select NETFILTER_FAMILY_ARP

View File

@ -3,12 +3,6 @@
# Makefile for the netfilter modules on top of IPv4.
#
# objects for l3 independent conntrack
nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o

View File

@ -1,472 +0,0 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/route.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#include <net/netfilter/nf_log.h>
static int conntrack4_net_id __read_mostly;
static DEFINE_MUTEX(register_ipv4_hooks);
struct conntrack4_net {
unsigned int users;
};
static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
const __be32 *ap;
__be32 _addrs[2];
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * 2, _addrs);
if (ap == NULL)
return false;
tuple->src.u3.ip = ap[0];
tuple->dst.u3.ip = ap[1];
return true;
}
static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u3.ip = orig->dst.u3.ip;
tuple->dst.u3.ip = orig->src.u3.ip;
return true;
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
const struct iphdr *iph;
struct iphdr _iph;
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (iph == NULL)
return -NF_ACCEPT;
/* Conntrack defragments packets, we might still see fragments
* inside ICMP packets though. */
if (iph->frag_off & htons(IP_OFFSET))
return -NF_ACCEPT;
*dataoff = nhoff + (iph->ihl << 2);
*protonum = iph->protocol;
/* Check bogus IP headers */
if (*dataoff > skb->len) {
pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
"nhoff %u, ihl %u, skblen %u\n",
nhoff, iph->ihl << 2, skb->len);
return -NF_ACCEPT;
}
return NF_ACCEPT;
}
static unsigned int ipv4_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out;
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
enum ip_conntrack_info ctinfo;
struct nf_conn *tmpl;
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl && nf_ct_is_template(tmpl)) {
/* when skipping ct, clear templates to avoid fooling
* later targets/matches
*/
skb->_nfct = 0;
nf_ct_put(tmpl);
}
return NF_ACCEPT;
}
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
/* Connection tracking may drop packets, but never alters them, so
make it the first hook. */
static const struct nf_hook_ops ipv4_conntrack_ops[] = {
{
.hook = ipv4_conntrack_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
/* Fast function for those who don't want to parse /proc (and I don't
blame them). */
/* Reversing the socket's dst/src point of view gives us the reply
mapping. */
static int
getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
const struct inet_sock *inet = inet_sk(sk);
const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
memset(&tuple, 0, sizeof(tuple));
lock_sock(sk);
tuple.src.u3.ip = inet->inet_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.ip = inet->inet_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.src.l3num = PF_INET;
tuple.dst.protonum = sk->sk_protocol;
release_sock(sk);
/* We only do TCP and SCTP at the moment: is there a better way? */
if (tuple.dst.protonum != IPPROTO_TCP &&
tuple.dst.protonum != IPPROTO_SCTP) {
pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
*len, sizeof(struct sockaddr_in));
return -EINVAL;
}
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
sin.sin_family = AF_INET;
sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u.tcp.port;
sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u3.ip;
memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
&sin.sin_addr.s_addr, ntohs(sin.sin_port));
nf_ct_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
return 0;
}
pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
&tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
&tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
[CTA_IP_V4_SRC] = { .type = NLA_U32 },
[CTA_IP_V4_DST] = { .type = NLA_U32 },
};
static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
return -EINVAL;
t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
return 0;
}
#endif
static struct nf_sockopt_ops so_getorigdst = {
.pf = PF_INET,
.get_optmin = SO_ORIGINAL_DST,
.get_optmax = SO_ORIGINAL_DST+1,
.get = getorigdst,
.owner = THIS_MODULE,
};
static int ipv4_hooks_register(struct net *net)
{
struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
int err = 0;
mutex_lock(&register_ipv4_hooks);
cnet->users++;
if (cnet->users > 1)
goto out_unlock;
err = nf_defrag_ipv4_enable(net);
if (err) {
cnet->users = 0;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
if (err)
cnet->users = 0;
out_unlock:
mutex_unlock(&register_ipv4_hooks);
return err;
}
static void ipv4_hooks_unregister(struct net *net)
{
struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
mutex_lock(&register_ipv4_hooks);
if (cnet->users && (--cnet->users == 0))
nf_unregister_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
mutex_unlock(&register_ipv4_hooks);
}
const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.l3proto = PF_INET,
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
.get_l4proto = ipv4_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
.nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */
#endif
.net_ns_get = ipv4_hooks_register,
.net_ns_put = ipv4_hooks_unregister,
.me = THIS_MODULE,
};
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_icmp,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp4,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp4,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite4,
#endif
};
static int ipv4_net_init(struct net *net)
{
return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
}
static void ipv4_net_exit(struct net *net)
{
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
}
static struct pernet_operations ipv4_net_ops = {
.init = ipv4_net_init,
.exit = ipv4_net_exit,
.id = &conntrack4_net_id,
.size = sizeof(struct conntrack4_net),
};
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
int ret = 0;
need_conntrack();
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
nf_conntrack_l3proto_ipv4.nla_size))
return -EINVAL;
#endif
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
return ret;
}
ret = register_pernet_subsys(&ipv4_net_ops);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
goto cleanup_sockopt;
}
ret = nf_ct_l4proto_register(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
if (ret < 0)
goto cleanup_pernet;
ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
goto cleanup_l4proto;
}
return ret;
cleanup_l4proto:
nf_ct_l4proto_unregister(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
cleanup_pernet:
unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
return ret;
}
static void __exit nf_conntrack_l3proto_ipv4_fini(void)
{
synchronize_net();
nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
nf_ct_l4proto_unregister(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
unregister_pernet_subsys(&ipv4_net_ops);
nf_unregister_sockopt(&so_getorigdst);
}
module_init(nf_conntrack_l3proto_ipv4_init);
module_exit(nf_conntrack_l3proto_ipv4_fini);

View File

@ -15,7 +15,6 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/xfrm.h>
#include <net/ip6_checksum.h>
#include <net/netfilter/nf_queue.h>
int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
@ -106,71 +105,10 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
return err;
}
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
break;
if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
skb->len - dataoff, protocol,
csum_sub(skb->csum,
skb_checksum(skb, 0,
dataoff, 0)))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
/* fall through */
case CHECKSUM_NONE:
skb->csum = ~csum_unfold(
csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
skb->len - dataoff,
protocol,
csum_sub(0,
skb_checksum(skb, 0,
dataoff, 0))));
csum = __skb_checksum_complete(skb);
}
return csum;
}
EXPORT_SYMBOL(nf_ip6_checksum);
static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__wsum hsum;
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip6_checksum(skb, hook, dataoff, protocol);
/* fall through */
case CHECKSUM_NONE:
hsum = skb_checksum(skb, 0, dataoff, 0);
skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
&ip6h->daddr,
skb->len - dataoff,
protocol,
csum_sub(0, hsum)));
skb->ip_summed = CHECKSUM_NONE;
return __skb_checksum_complete_head(skb, dataoff + len);
}
return csum;
};
static const struct nf_ipv6_ops ipv6ops = {
.chk_addr = ipv6_chk_addr,
.route_input = ip6_route_input,
.fragment = ip6_fragment,
.checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
.reroute = nf_ip6_reroute,
};

View File

@ -5,26 +5,6 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
config NF_DEFRAG_IPV6
tristate
default n
config NF_CONNTRACK_IPV6
tristate "IPv6 connection tracking support"
depends on INET && IPV6 && NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_DEFRAG_IPV6
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
This is IPv6 support on Layer 3 independent connection tracking.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
@ -128,7 +108,7 @@ config NF_LOG_IPV6
config NF_NAT_IPV6
tristate "IPv6 NAT"
depends on NF_CONNTRACK_IPV6
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
help
@ -328,7 +308,7 @@ config IP6_NF_SECURITY
config IP6_NF_NAT
tristate "ip6tables NAT support"
depends on NF_CONNTRACK_IPV6
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
select NF_NAT_IPV6
@ -365,6 +345,7 @@ config IP6_NF_TARGET_NPT
endif # IP6_NF_NAT
endif # IP6_NF_IPTABLES
endmenu
config NF_DEFRAG_IPV6
tristate

View File

@ -11,12 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o

View File

@ -1,460 +0,0 @@
/*
* Copyright (C)2004 USAGI/WIDE Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*/
#include <linux/types.h>
#include <linux/ipv6.h>
#include <linux/in6.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_log.h>
static int conntrack6_net_id;
static DEFINE_MUTEX(register_ipv6_hooks);
struct conntrack6_net {
unsigned int users;
};
static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
const u_int32_t *ap;
u_int32_t _addrs[8];
ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
sizeof(_addrs), _addrs);
if (ap == NULL)
return false;
memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
return true;
}
static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
return true;
}
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
__be16 frag_off;
int protoff;
u8 nexthdr;
if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
&nexthdr, sizeof(nexthdr)) != 0) {
pr_debug("ip6_conntrack_core: can't get nexthdr\n");
return -NF_ACCEPT;
}
protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
* (protoff == skb->len) means the packet has not data, just
* IPv6 and possibly extensions headers, but it is tracked anyway
*/
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
return -NF_ACCEPT;
}
*dataoff = protoff;
*protonum = nexthdr;
return NF_ACCEPT;
}
static unsigned int ipv6_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
__be16 frag_off;
int protoff;
u8 nexthdr;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
nexthdr = ipv6_hdr(skb)->nexthdr;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
return helper->help(skb, protoff, ct, ctinfo);
}
static unsigned int ipv6_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned char pnum = ipv6_hdr(skb)->nexthdr;
int protoff;
__be16 frag_off;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
goto out;
}
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
static unsigned int ipv6_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static const struct nf_hook_ops ipv6_conntrack_ops[] = {
{
.hook = ipv6_conntrack_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_conntrack_local,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_helper,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_LAST,
},
{
.hook = ipv6_helper,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_LAST-1,
},
};
static int
ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
const struct ipv6_pinfo *inet6 = inet6_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
const struct nf_conntrack_tuple_hash *h;
struct sockaddr_in6 sin6;
struct nf_conn *ct;
__be32 flow_label;
int bound_dev_if;
lock_sock(sk);
tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.in6 = sk->sk_v6_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.dst.protonum = sk->sk_protocol;
bound_dev_if = sk->sk_bound_dev_if;
flow_label = inet6->flow_label;
release_sock(sk);
if (tuple.dst.protonum != IPPROTO_TCP &&
tuple.dst.protonum != IPPROTO_SCTP)
return -ENOPROTOOPT;
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
return -EINVAL;
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (!h) {
pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
&tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
&tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
ct = nf_ct_tuplehash_to_ctrack(h);
sin6.sin6_family = AF_INET6;
sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
memcpy(&sin6.sin6_addr,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
sizeof(sin6.sin6_addr));
nf_ct_put(ct);
sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = {
[CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 },
[CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 },
};
static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
return -EINVAL;
t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
return 0;
}
#endif
static int ipv6_hooks_register(struct net *net)
{
struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
int err = 0;
mutex_lock(&register_ipv6_hooks);
cnet->users++;
if (cnet->users > 1)
goto out_unlock;
err = nf_defrag_ipv6_enable(net);
if (err < 0) {
cnet->users = 0;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
if (err)
cnet->users = 0;
out_unlock:
mutex_unlock(&register_ipv6_hooks);
return err;
}
static void ipv6_hooks_unregister(struct net *net)
{
struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
mutex_lock(&register_ipv6_hooks);
if (cnet->users && (--cnet->users == 0))
nf_unregister_net_hooks(net, ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
mutex_unlock(&register_ipv6_hooks);
}
const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.l3proto = PF_INET6,
.pkt_to_tuple = ipv6_pkt_to_tuple,
.invert_tuple = ipv6_invert_tuple,
.get_l4proto = ipv6_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
.nla_policy = ipv6_nla_policy,
.nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
#endif
.net_ns_get = ipv6_hooks_register,
.net_ns_put = ipv6_hooks_unregister,
.me = THIS_MODULE,
};
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
static struct nf_sockopt_ops so_getorigdst6 = {
.pf = NFPROTO_IPV6,
.get_optmin = IP6T_SO_ORIGINAL_DST,
.get_optmax = IP6T_SO_ORIGINAL_DST + 1,
.get = ipv6_getorigdst,
.owner = THIS_MODULE,
};
static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite6,
#endif
};
static int ipv6_net_init(struct net *net)
{
return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
}
static void ipv6_net_exit(struct net *net)
{
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
}
static struct pernet_operations ipv6_net_ops = {
.init = ipv6_net_init,
.exit = ipv6_net_exit,
.id = &conntrack6_net_id,
.size = sizeof(struct conntrack6_net),
};
static int __init nf_conntrack_l3proto_ipv6_init(void)
{
int ret = 0;
need_conntrack();
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
nf_conntrack_l3proto_ipv6.nla_size))
return -EINVAL;
#endif
ret = nf_register_sockopt(&so_getorigdst6);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
return ret;
}
ret = register_pernet_subsys(&ipv6_net_ops);
if (ret < 0)
goto cleanup_sockopt;
ret = nf_ct_l4proto_register(builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
if (ret < 0)
goto cleanup_pernet;
ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
if (ret < 0) {
pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
goto cleanup_l4proto;
}
return ret;
cleanup_l4proto:
nf_ct_l4proto_unregister(builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
cleanup_pernet:
unregister_pernet_subsys(&ipv6_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst6);
return ret;
}
static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
nf_ct_l4proto_unregister(builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
unregister_pernet_subsys(&ipv6_net_ops);
nf_unregister_sockopt(&so_getorigdst6);
}
module_init(nf_conntrack_l3proto_ipv6_init);
module_exit(nf_conntrack_l3proto_ipv6_fini);

View File

@ -33,9 +33,8 @@
#include <net/sock.h>
#include <net/snmp.h>
#include <net/inet_frag.h>
#include <net/ipv6_frag.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
@ -151,7 +150,7 @@ static void nf_ct_frag6_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
ip6_expire_frag_queue(net, fq);
ip6frag_expire_frag_queue(net, fq);
}
/* Creation primitives. */
@ -624,16 +623,24 @@ static struct pernet_operations nf_ct_net_ops = {
.exit = nf_ct_net_exit,
};
static const struct rhashtable_params nfct_rhash_params = {
.head_offset = offsetof(struct inet_frag_queue, node),
.hashfn = ip6frag_key_hashfn,
.obj_hashfn = ip6frag_obj_hashfn,
.obj_cmpfn = ip6frag_obj_cmpfn,
.automatic_shrinking = true,
};
int nf_ct_frag6_init(void)
{
int ret = 0;
nf_frags.constructor = ip6_frag_init;
nf_frags.constructor = ip6frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
nf_frags.rhash_params = ip6_rhash_params;
nf_frags.rhash_params = nfct_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;

View File

@ -14,8 +14,7 @@
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
#include <net/ipv6_frag.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
@ -23,7 +22,6 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#endif

View File

@ -57,7 +57,7 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
#include <net/ipv6_frag.h>
#include <net/inet_ecn.h>
static const char ip6_frag_cache_name[] = "ip6-frags";
@ -72,61 +72,6 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
const struct frag_v6_compare_key *key = a;
q->key.v6 = *key;
fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
struct sk_buff *head;
rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q);
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
head = fq->q.fragments;
if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
goto out;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
head->dev = dev;
skb_get(head);
spin_unlock(&fq->q.lock);
icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
kfree_skb(head);
goto out_rcu_unlock;
out:
spin_unlock(&fq->q.lock);
out_rcu_unlock:
rcu_read_unlock();
inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
static void ip6_frag_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
@ -136,7 +81,7 @@ static void ip6_frag_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
ip6_expire_frag_queue(net, fq);
ip6frag_expire_frag_queue(net, fq);
}
static struct frag_queue *
@ -696,42 +641,19 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};
static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
{
return jhash2(data,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
{
const struct inet_frag_queue *fq = data;
return jhash2((const u32 *)&fq->key.v6,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
{
const struct frag_v6_compare_key *key = arg->key;
const struct inet_frag_queue *fq = ptr;
return !!memcmp(&fq->key, key, sizeof(*key));
}
const struct rhashtable_params ip6_rhash_params = {
static const struct rhashtable_params ip6_rhash_params = {
.head_offset = offsetof(struct inet_frag_queue, node),
.hashfn = ip6_key_hashfn,
.obj_hashfn = ip6_obj_hashfn,
.obj_cmpfn = ip6_obj_cmpfn,
.hashfn = ip6frag_key_hashfn,
.obj_hashfn = ip6frag_obj_hashfn,
.obj_cmpfn = ip6frag_obj_cmpfn,
.automatic_shrinking = true,
};
EXPORT_SYMBOL(ip6_rhash_params);
int __init ipv6_frag_init(void)
{
int ret;
ip6_frags.constructor = ip6_frag_init;
ip6_frags.constructor = ip6frag_init;
ip6_frags.destructor = NULL;
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.frag_expire = ip6_frag_expire;

View File

@ -49,6 +49,8 @@ config NETFILTER_NETLINK_LOG
config NF_CONNTRACK
tristate "Netfilter connection tracking support"
default m if NETFILTER_ADVANCED=n
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IPV6 != n
help
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
@ -615,7 +617,7 @@ config NFT_SOCKET
tristate "Netfilter nf_tables socket match support"
depends on IPV6 || IPV6=n
select NF_SOCKET_IPV4
select NF_SOCKET_IPV6 if IPV6
select NF_SOCKET_IPV6 if NF_TABLES_IPV6
help
This option allows matching for the presence or absence of a
corresponding socket and its attributes.
@ -881,7 +883,7 @@ config NETFILTER_XT_TARGET_LOG
tristate "LOG target support"
select NF_LOG_COMMON
select NF_LOG_IPV4
select NF_LOG_IPV6 if IPV6
select NF_LOG_IPV6 if IP6_NF_IPTABLES
default m if NETFILTER_ADVANCED=n
help
This option adds a `LOG' target, which allows you to create rules in
@ -973,7 +975,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
select NF_DUP_IPV6 if IPV6
select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@ -1481,8 +1483,8 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NF_SOCKET_IPV4
depends on NF_SOCKET_IPV6
select NF_SOCKET_IPV4
select NF_SOCKET_IPV6 if IP6_NF_IPTABLES
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help

View File

@ -1,7 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \
nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \
nf_conntrack_proto_icmp.o \
nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o

View File

@ -825,12 +825,23 @@ static void ip_vs_conn_expire(struct timer_list *t)
/* Unlink conn if not referenced anymore */
if (likely(ip_vs_conn_unlink(cp))) {
struct ip_vs_conn *ct = cp->control;
/* delete the timer if it is activated by other users */
del_timer(&cp->timer);
/* does anybody control me? */
if (cp->control)
if (ct) {
ip_vs_control_del(cp);
/* Drop CTL or non-assured TPL if not used anymore */
if (!cp->timeout && !atomic_read(&ct->n_control) &&
(!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
!(ct->state & IP_VS_CTPL_S_ASSURED))) {
IP_VS_DBG(4, "drop controlling connection\n");
ct->timeout = 0;
ip_vs_conn_expire_now(ct);
}
}
if ((cp->flags & IP_VS_CONN_F_NFCT) &&
!(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
@ -872,6 +883,10 @@ static void ip_vs_conn_expire(struct timer_list *t)
/* Modify timer, so that it expires as soon as possible.
* Can be called without reference only if under RCU lock.
* We can have such chain of conns linked with ->control: DATA->CTL->TPL
* - DATA (eg. FTP) and TPL (persistence) can be present depending on setup
* - cp->timeout=0 indicates all conns from chain should be dropped but
* TPL is not dropped if in assured state
*/
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
@ -1107,7 +1122,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
@ -1118,7 +1133,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
return 0;
@ -1169,7 +1184,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
else
@ -1181,7 +1196,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
}
@ -1197,8 +1212,11 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
#endif
/*
* Randomly drop connection entries before running out of memory
/* Randomly drop connection entries before running out of memory
* Can be used for DATA and CTL conns. For TPL conns there are exceptions:
* - traffic for services in OPS mode increases ct->in_pkts, so it is supported
* - traffic for services not in OPS mode does not increase ct->in_pkts in
* all cases, so it is not supported
*/
static inline int todrop_entry(struct ip_vs_conn *cp)
{
@ -1242,7 +1260,7 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
struct ip_vs_conn *cp;
rcu_read_lock();
/*
@ -1254,13 +1272,15 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->ipvs != ipvs)
continue;
if (atomic_read(&cp->n_control))
continue;
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
if (atomic_read(&cp->n_control) ||
!ip_vs_conn_ops_mode(cp))
continue;
else
/* connection template of OPS */
/* connection template of OPS */
if (ip_vs_conn_ops_mode(cp))
goto try_drop;
if (!(cp->state & IP_VS_CTPL_S_ASSURED))
goto drop;
continue;
}
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
@ -1294,15 +1314,10 @@ try_drop:
continue;
}
IP_VS_DBG(4, "del connection\n");
drop:
IP_VS_DBG(4, "drop connection\n");
cp->timeout = 0;
ip_vs_conn_expire_now(cp);
cp_c = cp->control;
/* cp->control is valid only with reference to cp */
if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
ip_vs_conn_expire_now(cp_c);
__ip_vs_conn_put(cp);
}
}
cond_resched_rcu();
}
@ -1325,15 +1340,19 @@ flush_again:
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
if (cp->ipvs != ipvs)
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
/* As timers are expired in LIFO order, restart
* the timer of controlling connection first, so
* that it is expired after us.
*/
cp_c = cp->control;
/* cp->control is valid only with reference to cp */
if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
IP_VS_DBG(4, "del controlling connection\n");
ip_vs_conn_expire_now(cp_c);
__ip_vs_conn_put(cp);
}
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
}
cond_resched_rcu();
}

View File

@ -42,6 +42,11 @@
static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
/* States for conn templates: NONE or words separated with ",", max 15 chars */
static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
[IP_VS_CTPL_S_NONE] = "NONE",
[IP_VS_CTPL_S_ASSURED] = "ASSURED",
};
/*
* register an ipvs protocol
@ -193,12 +198,20 @@ ip_vs_create_timeout_table(int *table, int size)
}
const char * ip_vs_state_name(__u16 proto, int state)
const char *ip_vs_state_name(const struct ip_vs_conn *cp)
{
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
unsigned int state = cp->state;
struct ip_vs_protocol *pp;
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
if (state >= IP_VS_CTPL_S_LAST)
return "ERR!";
return ip_vs_ctpl_state_name_table[state] ? : "?";
}
pp = ip_vs_proto_get(cp->protocol);
if (pp == NULL || pp->state_name == NULL)
return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
return pp->state_name(state);
}

View File

@ -461,6 +461,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
}
if (next_state == IP_VS_SCTP_S_ESTABLISHED)
ip_vs_control_assure_ct(cp);
}
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = next_state];

View File

@ -569,6 +569,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
}
if (new_state == IP_VS_TCP_S_ESTABLISHED)
ip_vs_control_assure_ct(cp);
}
if (likely(pd))

View File

@ -460,6 +460,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
}
cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
if (direction == IP_VS_DIR_OUTPUT)
ip_vs_control_assure_ct(cp);
}
static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)

View File

@ -1003,12 +1003,9 @@ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer
continue;
}
} else {
/* protocol in templates is not used for state/timeout */
if (state > 0) {
IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
state);
state = 0;
}
if (state >= IP_VS_CTPL_S_LAST)
IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
state);
}
ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
@ -1166,12 +1163,9 @@ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *m
goto out;
}
} else {
/* protocol in templates is not used for state/timeout */
if (state > 0) {
IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
state);
state = 0;
}
if (state >= IP_VS_CTPL_S_LAST)
IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
state);
}
if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
pe_data_len, pe_name, pe_name_len)) {

View File

@ -44,17 +44,19 @@
/* we will save the tuples of all connections we care about */
struct nf_conncount_tuple {
struct hlist_node node;
struct list_head node;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
struct rcu_head rcu_head;
};
struct nf_conncount_rb {
struct rb_node node;
struct hlist_head hhead; /* connections/hosts in same subnet */
struct nf_conncount_list list;
u32 key[MAX_KEYLEN];
struct rcu_head rcu_head;
};
static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
@ -62,6 +64,10 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_i
struct nf_conncount_data {
unsigned int keylen;
struct rb_root root[CONNCOUNT_SLOTS];
struct net *net;
struct work_struct gc_work;
unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
unsigned int gc_tree;
};
static u_int32_t conncount_rnd __read_mostly;
@ -82,26 +88,70 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
enum nf_conncount_list_add
nf_conncount_add(struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
struct nf_conncount_tuple *conn;
if (WARN_ON_ONCE(list->count > INT_MAX))
return NF_CONNCOUNT_ERR;
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
if (conn == NULL)
return false;
return NF_CONNCOUNT_ERR;
conn->tuple = *tuple;
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
hlist_add_head(&conn->node, head);
return true;
spin_lock(&list->list_lock);
if (list->dead == true) {
kmem_cache_free(conncount_conn_cachep, conn);
spin_unlock(&list->list_lock);
return NF_CONNCOUNT_SKIP;
}
list_add_tail(&conn->node, &list->head);
list->count++;
spin_unlock(&list->list_lock);
return NF_CONNCOUNT_ADDED;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
static void __conn_free(struct rcu_head *h)
{
struct nf_conncount_tuple *conn;
conn = container_of(h, struct nf_conncount_tuple, rcu_head);
kmem_cache_free(conncount_conn_cachep, conn);
}
static bool conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn)
{
bool free_entry = false;
spin_lock(&list->list_lock);
if (list->count == 0) {
spin_unlock(&list->list_lock);
return free_entry;
}
list->count--;
list_del_rcu(&conn->node);
if (list->count == 0)
free_entry = true;
spin_unlock(&list->list_lock);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry;
}
static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
find_or_evict(struct net *net, struct nf_conncount_list *list,
struct nf_conncount_tuple *conn, bool *free_entry)
{
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
@ -121,34 +171,37 @@ find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
*/
age = a - b;
if (conn->cpu == cpu || age >= 2) {
hlist_del(&conn->node);
kmem_cache_free(conncount_conn_cachep, conn);
*free_entry = conn_free(list, conn);
return ERR_PTR(-ENOENT);
}
return ERR_PTR(-EAGAIN);
}
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit)
void nf_conncount_lookup(struct net *net,
struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn;
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
struct hlist_node *n;
unsigned int length = 0;
unsigned int collect = 0;
bool free_entry = false;
/* best effort only */
*addit = tuple ? true : false;
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
found = find_or_evict(net, conn);
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
break;
found = find_or_evict(net, list, conn, &free_entry);
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
length++;
if (!tuple)
continue;
@ -156,7 +209,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
*addit = false;
}
} else if (PTR_ERR(found) == -ENOENT)
collect++;
continue;
}
@ -165,9 +219,10 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/*
* Just to be sure we have it only once in the list.
* We should not see tuples twice unless someone hooks
* this into a table without "-p tcp --syn".
*
* Attempt to avoid a re-add in this case.
*/
*addit = false;
} else if (already_closed(found_ct)) {
@ -176,19 +231,75 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
* closed already -> ditch it
*/
nf_ct_put(found_ct);
hlist_del(&conn->node);
kmem_cache_free(conncount_conn_cachep, conn);
conn_free(list, conn);
collect++;
continue;
}
nf_ct_put(found_ct);
length++;
}
return length;
}
EXPORT_SYMBOL_GPL(nf_conncount_lookup);
void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 1;
list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
/* Return true if the list is empty */
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collected = 0;
bool free_entry = false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
if (IS_ERR(found)) {
if (PTR_ERR(found) == -ENOENT) {
if (free_entry)
return true;
collected++;
}
continue;
}
found_ct = nf_ct_tuplehash_to_ctrack(found);
if (already_closed(found_ct)) {
/*
* we do not care about connections which are
* closed already -> ditch it
*/
nf_ct_put(found_ct);
if (conn_free(list, conn))
return true;
collected++;
continue;
}
nf_ct_put(found_ct);
if (collected > CONNCOUNT_GC_MAX_NODES)
return false;
}
return false;
}
EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
static void __tree_nodes_free(struct rcu_head *h)
{
struct nf_conncount_rb *rbconn;
rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
kmem_cache_free(conncount_rb_cachep, rbconn);
}
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
unsigned int gc_count)
@ -197,32 +308,46 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
rb_erase(&rbconn->node, root);
kmem_cache_free(conncount_rb_cachep, rbconn);
spin_lock(&rbconn->list.list_lock);
if (rbconn->list.count == 0 && rbconn->list.dead == false) {
rbconn->list.dead = true;
rb_erase(&rbconn->node, root);
call_rcu(&rbconn->rcu_head, __tree_nodes_free);
}
spin_unlock(&rbconn->list.list_lock);
}
}
static unsigned int
count_tree(struct net *net, struct rb_root *root,
const u32 *key, u8 keylen,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
{
set_bit(tree, data->pending_trees);
schedule_work(&data->gc_work);
}
static unsigned int
insert_tree(struct net *net,
struct nf_conncount_data *data,
struct rb_root *root,
unsigned int hash,
const u32 *key,
u8 keylen,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
enum nf_conncount_list_add ret;
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int gc_count;
bool no_gc = false;
unsigned int count = 0, gc_count = 0;
bool node_found = false;
spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
restart:
gc_count = 0;
parent = NULL;
rbnode = &(root->rb_node);
while (*rbnode) {
int diff;
bool addit;
rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
parent = *rbnode;
@ -232,33 +357,30 @@ count_tree(struct net *net, struct rb_root *root,
} else if (diff > 0) {
rbnode = &((*rbnode)->rb_right);
} else {
/* same source network -> be counted! */
unsigned int count;
count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
zone, &addit);
tree_nodes_free(root, gc_nodes, gc_count);
if (!addit)
return count;
if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
return 0; /* hotdrop */
return count + 1;
/* unlikely: other cpu added node already */
node_found = true;
ret = nf_conncount_add(&rbconn->list, tuple, zone);
if (ret == NF_CONNCOUNT_ERR) {
count = 0; /* hotdrop */
} else if (ret == NF_CONNCOUNT_ADDED) {
count = rbconn->list.count;
} else {
/* NF_CONNCOUNT_SKIP, rbconn is already
* reclaimed by gc, insert a new tree node
*/
node_found = false;
}
break;
}
if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
if (gc_count >= ARRAY_SIZE(gc_nodes))
continue;
/* only used for GC on hhead, retval and 'addit' ignored */
nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
if (hlist_empty(&rbconn->hhead))
if (nf_conncount_gc_list(net, &rbconn->list))
gc_nodes[gc_count++] = rbconn;
}
if (gc_count) {
no_gc = true;
tree_nodes_free(root, gc_nodes, gc_count);
/* tree_node_free before new allocation permits
* allocator to re-use newly free'd object.
@ -266,37 +388,138 @@ count_tree(struct net *net, struct rb_root *root,
* This is a rare event; in most cases we will find
* existing node to re-use. (or gc_count is 0).
*/
goto restart;
if (gc_count >= ARRAY_SIZE(gc_nodes))
schedule_gc_worker(data, hash);
}
if (!tuple)
return 0;
if (node_found)
goto out_unlock;
/* no match, need to insert new node */
/* expected case: match, insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL)
return 0;
goto out_unlock;
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
if (conn == NULL) {
kmem_cache_free(conncount_rb_cachep, rbconn);
return 0;
goto out_unlock;
}
conn->tuple = *tuple;
conn->zone = *zone;
memcpy(rbconn->key, key, sizeof(u32) * keylen);
INIT_HLIST_HEAD(&rbconn->hhead);
hlist_add_head(&conn->node, &rbconn->hhead);
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
count = 1;
rb_link_node(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
return 1;
out_unlock:
spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
return count;
}
static unsigned int
count_tree(struct net *net,
struct nf_conncount_data *data,
const u32 *key,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
enum nf_conncount_list_add ret;
struct rb_root *root;
struct rb_node *parent;
struct nf_conncount_rb *rbconn;
unsigned int hash;
u8 keylen = data->keylen;
hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
root = &data->root[hash];
parent = rcu_dereference_raw(root->rb_node);
while (parent) {
int diff;
bool addit;
rbconn = rb_entry(parent, struct nf_conncount_rb, node);
diff = key_diff(key, rbconn->key, keylen);
if (diff < 0) {
parent = rcu_dereference_raw(parent->rb_left);
} else if (diff > 0) {
parent = rcu_dereference_raw(parent->rb_right);
} else {
/* same source network -> be counted! */
nf_conncount_lookup(net, &rbconn->list, tuple, zone,
&addit);
if (!addit)
return rbconn->list.count;
ret = nf_conncount_add(&rbconn->list, tuple, zone);
if (ret == NF_CONNCOUNT_ERR) {
return 0; /* hotdrop */
} else if (ret == NF_CONNCOUNT_ADDED) {
return rbconn->list.count;
} else {
/* NF_CONNCOUNT_SKIP, rbconn is already
* reclaimed by gc, insert a new tree node
*/
break;
}
}
}
if (!tuple)
return 0;
return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
}
static void tree_gc_worker(struct work_struct *work)
{
struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
struct rb_root *root;
struct rb_node *node;
unsigned int tree, next_tree, gc_count = 0;
tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
root = &data->root[tree];
rcu_read_lock();
for (node = rb_first(root); node != NULL; node = rb_next(node)) {
rbconn = rb_entry(node, struct nf_conncount_rb, node);
if (nf_conncount_gc_list(data->net, &rbconn->list))
gc_nodes[gc_count++] = rbconn;
}
rcu_read_unlock();
spin_lock_bh(&nf_conncount_locks[tree]);
if (gc_count) {
tree_nodes_free(root, gc_nodes, gc_count);
}
clear_bit(tree, data->pending_trees);
next_tree = (tree + 1) % CONNCOUNT_SLOTS;
next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
if (next_tree < CONNCOUNT_SLOTS) {
data->gc_tree = next_tree;
schedule_work(work);
}
spin_unlock_bh(&nf_conncount_locks[tree]);
}
/* Count and return number of conntrack entries in 'net' with particular 'key'.
* If 'tuple' is not null, insert it into the accounting data structure.
* Call with RCU read lock.
*/
unsigned int nf_conncount_count(struct net *net,
struct nf_conncount_data *data,
@ -304,20 +527,7 @@ unsigned int nf_conncount_count(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
struct rb_root *root;
int count;
u32 hash;
hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
root = &data->root[hash];
spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
count = count_tree(net, root, key, data->keylen, tuple, zone);
spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
return count;
return count_tree(net, data, key, tuple, zone);
}
EXPORT_SYMBOL_GPL(nf_conncount_count);
@ -348,17 +558,18 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
data->root[i] = RB_ROOT;
data->keylen = keylen / sizeof(u32);
data->net = net;
INIT_WORK(&data->gc_work, tree_gc_worker);
return data;
}
EXPORT_SYMBOL_GPL(nf_conncount_init);
void nf_conncount_cache_free(struct hlist_head *hhead)
void nf_conncount_cache_free(struct nf_conncount_list *list)
{
struct nf_conncount_tuple *conn;
struct hlist_node *n;
struct nf_conncount_tuple *conn, *conn_n;
hlist_for_each_entry_safe(conn, n, hhead, node)
list_for_each_entry_safe(conn, conn_n, &list->head, node)
kmem_cache_free(conncount_conn_cachep, conn);
}
EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
@ -373,7 +584,7 @@ static void destroy_tree(struct rb_root *r)
rb_erase(node, r);
nf_conncount_cache_free(&rbconn->hhead);
nf_conncount_cache_free(&rbconn->list);
kmem_cache_free(conncount_rb_cachep, rbconn);
}
@ -384,6 +595,7 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
{
unsigned int i;
cancel_work_sync(&data->gc_work);
nf_ct_netns_put(net, family);
for (i = 0; i < ARRAY_SIZE(data->root); ++i)

View File

@ -37,7 +37,6 @@
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
@ -55,6 +54,7 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
#include <net/ip.h>
#include "nf_internals.h"
@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
return scale_hash(hash_conntrack_raw(tuple, net));
}
bool
static bool
nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int nhoff,
unsigned int dataoff,
@ -230,37 +230,151 @@ nf_ct_get_tuple(const struct sk_buff *skb,
u_int8_t protonum,
struct net *net,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
unsigned int size;
const __be32 *ap;
__be32 _addrs[8];
struct {
__be16 sport;
__be16 dport;
} _inet_hdr, *inet_hdr;
memset(tuple, 0, sizeof(*tuple));
tuple->src.l3num = l3num;
if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
switch (l3num) {
case NFPROTO_IPV4:
nhoff += offsetof(struct iphdr, saddr);
size = 2 * sizeof(__be32);
break;
case NFPROTO_IPV6:
nhoff += offsetof(struct ipv6hdr, saddr);
size = sizeof(_addrs);
break;
default:
return true;
}
ap = skb_header_pointer(skb, nhoff, size, _addrs);
if (!ap)
return false;
switch (l3num) {
case NFPROTO_IPV4:
tuple->src.u3.ip = ap[0];
tuple->dst.u3.ip = ap[1];
break;
case NFPROTO_IPV6:
memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
break;
}
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
if (unlikely(l4proto->pkt_to_tuple))
return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
/* Actually only need first 4 bytes to get ports. */
inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
if (!inet_hdr)
return false;
tuple->src.u.udp.port = inet_hdr->sport;
tuple->dst.u.udp.port = inet_hdr->dport;
return true;
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
u_int8_t *protonum)
{
int dataoff = -1;
const struct iphdr *iph;
struct iphdr _iph;
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (!iph)
return -1;
/* Conntrack defragments packets, we might still see fragments
* inside ICMP packets though.
*/
if (iph->frag_off & htons(IP_OFFSET))
return -1;
dataoff = nhoff + (iph->ihl << 2);
*protonum = iph->protocol;
/* Check bogus IP headers */
if (dataoff > skb->len) {
pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
nhoff, iph->ihl << 2, skb->len);
return -1;
}
return dataoff;
}
#if IS_ENABLED(CONFIG_IPV6)
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
u8 *protonum)
{
int protoff = -1;
unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
__be16 frag_off;
u8 nexthdr;
if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
&nexthdr, sizeof(nexthdr)) != 0) {
pr_debug("can't get nexthdr\n");
return -1;
}
protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
* (protoff == skb->len) means the packet has not data, just
* IPv6 and possibly extensions headers, but it is tracked anyway
*/
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("can't find proto in pkt\n");
return -1;
}
*protonum = nexthdr;
return protoff;
}
#endif
static int get_l4proto(const struct sk_buff *skb,
unsigned int nhoff, u8 pf, u8 *l4num)
{
switch (pf) {
case NFPROTO_IPV4:
return ipv4_get_l4proto(skb, nhoff, l4num);
#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
return ipv6_get_l4proto(skb, nhoff, l4num);
#endif
default:
*l4num = 0;
break;
}
return -1;
}
EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
u8 protonum;
int protoff;
int ret;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(l3num);
ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
if (ret != NF_ACCEPT) {
protoff = get_l4proto(skb, nhoff, l3num, &protonum);
if (protoff <= 0) {
rcu_read_unlock();
return false;
}
@ -268,7 +382,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
l4proto = __nf_ct_l4proto_find(l3num, protonum);
ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
l3proto, l4proto);
l4proto);
rcu_read_unlock();
return ret;
@ -278,19 +392,35 @@ EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
memset(inverse, 0, sizeof(*inverse));
inverse->src.l3num = orig->src.l3num;
if (l3proto->invert_tuple(inverse, orig) == 0)
return false;
switch (orig->src.l3num) {
case NFPROTO_IPV4:
inverse->src.u3.ip = orig->dst.u3.ip;
inverse->dst.u3.ip = orig->src.u3.ip;
break;
case NFPROTO_IPV6:
inverse->src.u3.in6 = orig->dst.u3.in6;
inverse->dst.u3.in6 = orig->src.u3.in6;
break;
default:
break;
}
inverse->dst.dir = !orig->dst.dir;
inverse->dst.protonum = orig->dst.protonum;
return l4proto->invert_tuple(inverse, orig);
if (unlikely(l4proto->invert_tuple))
return l4proto->invert_tuple(inverse, orig);
inverse->src.u.all = orig->dst.u.all;
inverse->dst.u.all = orig->src.u.all;
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
@ -502,6 +632,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
static inline bool
nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
{
return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
&ct2->tuplehash[IP_CT_DIR_REPLY].tuple) &&
nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) &&
nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) &&
net_eq(nf_ct_net(ct1), nf_ct_net(ct2));
}
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
@ -695,19 +837,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
enum ip_conntrack_info oldinfo;
struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
enum ip_conntrack_info oldinfo;
struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_conntrack_put(&loser_ct->ct_general);
nf_ct_set(skb, ct, oldinfo);
return NF_ACCEPT;
if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
nf_ct_match(ct, loser_ct)) {
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_conntrack_put(&loser_ct->ct_general);
nf_ct_set(skb, ct, oldinfo);
return NF_ACCEPT;
}
nf_ct_put(ct);
}
NF_CT_STAT_INC(net, drop);
return NF_DROP;
@ -1195,7 +1339,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
@ -1208,9 +1351,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
struct nf_conntrack_zone tmp;
unsigned int *timeouts;
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
pr_debug("Can't invert tuple.\n");
return NULL;
}
@ -1227,15 +1369,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
if (timeout_ext) {
timeouts = nf_ct_timeout_data(timeout_ext);
if (unlikely(!timeouts))
timeouts = l4proto->get_timeouts(net);
} else {
timeouts = l4proto->get_timeouts(net);
}
if (!l4proto->new(ct, skb, dataoff, timeouts)) {
if (!l4proto->new(ct, skb, dataoff)) {
nf_conntrack_free(ct);
pr_debug("can't track with proto module\n");
return NULL;
@ -1266,8 +1401,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
ct->master = exp->master;
if (exp->helper) {
help = nf_ct_helper_ext_add(ct, exp->helper,
GFP_ATOMIC);
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
rcu_assign_pointer(help->helper, exp->helper);
}
@ -1307,7 +1441,6 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
@ -1319,8 +1452,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
dataoff, l3num, protonum, net, &tuple, l4proto)) {
pr_debug("Can't get tuple\n");
return 0;
}
@ -1330,7 +1462,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
h = init_conntrack(net, tmpl, &tuple, l4proto,
skb, dataoff, hash);
if (!h)
return 0;
@ -1363,14 +1495,11 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
int ret;
int dataoff, ret;
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
@ -1384,14 +1513,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
}
/* rcu_read_lock()ed by nf_hook_thresh */
l3proto = __nf_ct_l3proto_find(pf);
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= 0) {
dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
if (dataoff <= 0) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
ret = NF_ACCEPT;
goto out;
}
@ -1413,8 +1540,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto);
ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
@ -1430,10 +1556,7 @@ repeat:
goto out;
}
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
ret = l4proto->packet(ct, skb, dataoff, ctinfo);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@ -1471,7 +1594,6 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
__nf_ct_l3proto_find(orig->src.l3num),
__nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
rcu_read_unlock();
@ -1609,14 +1731,14 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct nf_nat_hook *nat_hook;
unsigned int dataoff, status;
unsigned int status;
struct nf_conn *ct;
int dataoff;
u16 l3num;
u8 l4num;
@ -1625,16 +1747,15 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
return 0;
l3num = nf_ct_l3num(ct);
l3proto = nf_ct_l3proto_find_get(l3num);
if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
&l4num) <= 0)
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
if (dataoff <= 0)
return -1;
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple, l3proto, l4proto))
l4num, net, &tuple, l4proto))
return -1;
if (ct->status & IPS_SRC_NAT) {
@ -2089,9 +2210,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */

View File

@ -610,7 +610,6 @@ static int exp_seq_show(struct seq_file *s, void *v)
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
__nf_ct_l3proto_find(expect->tuple.src.l3num),
__nf_ct_l4proto_find(expect->tuple.src.l3num,
expect->tuple.dst.protonum));

View File

@ -24,7 +24,6 @@
#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
@ -193,8 +192,7 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
struct nf_conn_help *
nf_ct_helper_ext_add(struct nf_conn *ct,
struct nf_conntrack_helper *helper, gfp_t gfp)
nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
{
struct nf_conn_help *help;
@ -263,7 +261,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
if (help == NULL) {
help = nf_ct_helper_ext_add(ct, helper, flags);
help = nf_ct_helper_ext_add(ct, flags);
if (help == NULL)
return -ENOMEM;
} else {

View File

@ -1,66 +0,0 @@
/*
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
*
* Based largely upon the original ip_conntrack code which
* had the following copyright information:
*
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*/
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
return true;
}
static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
return true;
}
static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
/* Never track !!! */
return -NF_ACCEPT;
}
struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
.l3proto = PF_UNSPEC,
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
.get_l4proto = generic_get_l4proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);

View File

@ -38,7 +38,6 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_acct.h>
@ -81,9 +80,26 @@ nla_put_failure:
return -1;
}
static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
return -EMSGSIZE;
return 0;
}
static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
return -EMSGSIZE;
return 0;
}
static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto)
const struct nf_conntrack_tuple *tuple)
{
int ret = 0;
struct nlattr *nest_parms;
@ -92,8 +108,14 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
if (!nest_parms)
goto nla_put_failure;
if (likely(l3proto->tuple_to_nlattr))
ret = l3proto->tuple_to_nlattr(skb, tuple);
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_tuple_to_nlattr(skb, tuple);
break;
case NFPROTO_IPV6:
ret = ipv6_tuple_to_nlattr(skb, tuple);
break;
}
nla_nest_end(skb, nest_parms);
@ -106,13 +128,11 @@ nla_put_failure:
static int ctnetlink_dump_tuples(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
int ret;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
ret = ctnetlink_dump_tuples_ip(skb, tuple);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
@ -556,15 +576,20 @@ nla_put_failure:
return -1;
}
static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = {
[CTA_IP_V4_SRC] = { .type = NLA_U32 },
[CTA_IP_V4_DST] = { .type = NLA_U32 },
[CTA_IP_V6_SRC] = { .len = sizeof(__be32) * 4 },
[CTA_IP_V6_DST] = { .len = sizeof(__be32) * 4 },
};
#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
size_t len, len4 = 0;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
len = l3proto->nla_size;
len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@ -936,29 +961,54 @@ out:
return skb->len;
}
static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
return -EINVAL;
t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
return 0;
}
static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
return -EINVAL;
t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
return 0;
}
static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nf_conntrack_tuple *tuple)
{
struct nlattr *tb[CTA_IP_MAX+1];
struct nf_conntrack_l3proto *l3proto;
int ret = 0;
ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
if (ret < 0)
return ret;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = nla_validate_nested(attr, CTA_IP_MAX,
cta_ip_nla_policy, NULL);
if (ret)
return ret;
if (likely(l3proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_IP_MAX,
l3proto->nla_policy, NULL);
if (ret == 0)
ret = l3proto->nlattr_to_tuple(tb, tuple);
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_nlattr_to_tuple(tb, tuple);
break;
case NFPROTO_IPV6:
ret = ipv6_nlattr_to_tuple(tb, tuple);
break;
}
rcu_read_unlock();
return ret;
}
@ -1897,7 +1947,7 @@ ctnetlink_create_conntrack(struct net *net,
} else {
struct nf_conn_help *help;
help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC);
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help == NULL) {
err = -ENOMEM;
goto err2;
@ -2581,7 +2631,6 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple_mask *mask)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple m;
struct nlattr *nest_parms;
@ -2597,8 +2646,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
goto nla_put_failure;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
ret = ctnetlink_dump_tuples_ip(skb, &m);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
tuple->dst.protonum);

View File

@ -1,14 +1,4 @@
/* L3/L4 protocol support for nf_conntrack. */
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
* (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/netfilter.h>
@ -24,14 +14,36 @@
#include <linux/netdevice.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_log.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/route.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <linux/ipv6.h>
#include <linux/in6.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
extern unsigned int nf_conntrack_net_id;
static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_l3protos);
static DEFINE_MUTEX(nf_ct_proto_mutex);
@ -122,137 +134,6 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
}
EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
/* this is guaranteed to always return a valid protocol helper, since
* it falls back to generic_protocol */
const struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto)
{
struct nf_conntrack_l3proto *p;
rcu_read_lock();
p = __nf_ct_l3proto_find(l3proto);
if (!try_module_get(p->me))
p = &nf_conntrack_l3proto_generic;
rcu_read_unlock();
return p;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
int
nf_ct_l3proto_try_module_get(unsigned short l3proto)
{
const struct nf_conntrack_l3proto *p;
int ret;
retry: p = nf_ct_l3proto_find_get(l3proto);
if (p == &nf_conntrack_l3proto_generic) {
ret = request_module("nf_conntrack-%d", l3proto);
if (!ret)
goto retry;
return -EPROTOTYPE;
}
return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
void nf_ct_l3proto_module_put(unsigned short l3proto)
{
struct nf_conntrack_l3proto *p;
/* rcu_read_lock not necessary since the caller holds a reference, but
* taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
*/
rcu_read_lock();
p = __nf_ct_l3proto_find(l3proto);
module_put(p->me);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
int ret;
might_sleep();
ret = nf_ct_l3proto_try_module_get(nfproto);
if (ret < 0)
return ret;
/* we already have a reference, can't fail */
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(nfproto);
rcu_read_unlock();
if (!l3proto->net_ns_get)
return 0;
ret = l3proto->net_ns_get(net);
if (ret < 0)
nf_ct_l3proto_module_put(nfproto);
return ret;
}
int nf_ct_netns_get(struct net *net, u8 nfproto)
{
int err;
if (nfproto == NFPROTO_INET) {
err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
if (err < 0)
goto err1;
err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
if (err < 0)
goto err2;
} else {
err = nf_ct_netns_do_get(net, nfproto);
if (err < 0)
goto err1;
}
return 0;
err2:
nf_ct_netns_put(net, NFPROTO_IPV4);
err1:
return err;
}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
might_sleep();
/* same as nf_conntrack_netns_get(), reference assumed */
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(nfproto);
rcu_read_unlock();
if (WARN_ON(!l3proto))
return;
if (l3proto->net_ns_put)
l3proto->net_ns_put(net);
nf_ct_l3proto_module_put(nfproto);
}
void nf_ct_netns_put(struct net *net, uint8_t nfproto)
{
if (nfproto == NFPROTO_INET) {
nf_ct_netns_do_put(net, NFPROTO_IPV4);
nf_ct_netns_do_put(net, NFPROTO_IPV6);
} else
nf_ct_netns_do_put(net, nfproto);
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
{
@ -274,11 +155,6 @@ void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
static int kill_l3proto(struct nf_conn *i, void *data)
{
return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
}
static int kill_l4proto(struct nf_conn *i, void *data)
{
const struct nf_conntrack_l4proto *l4proto;
@ -287,52 +163,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
nf_ct_l3num(i) == l4proto->l3proto;
}
int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
{
int ret = 0;
struct nf_conntrack_l3proto *old;
if (proto->l3proto >= NFPROTO_NUMPROTO)
return -EBUSY;
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
if (proto->tuple_to_nlattr && proto->nla_size == 0)
return -EINVAL;
#endif
mutex_lock(&nf_ct_proto_mutex);
old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
lockdep_is_held(&nf_ct_proto_mutex));
if (old != &nf_conntrack_l3proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
{
BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
mutex_lock(&nf_ct_proto_mutex);
BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
lockdep_is_held(&nf_ct_proto_mutex)
) != proto);
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
&nf_conntrack_l3proto_generic);
mutex_unlock(&nf_ct_proto_mutex);
synchronize_rcu();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
const struct nf_conntrack_l4proto *l4proto)
{
@ -499,8 +329,23 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
static void
nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
mutex_lock(&nf_ct_proto_mutex);
while (num_proto-- != 0)
__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
mutex_unlock(&nf_ct_proto_mutex);
synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
}
static int
nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL, ver;
unsigned int i;
@ -518,7 +363,6 @@ int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
}
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
const struct nf_conntrack_l4proto *const l4proto[],
@ -542,20 +386,6 @@ int nf_ct_l4proto_pernet_register(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
mutex_lock(&nf_ct_proto_mutex);
while (num_proto-- != 0)
__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
mutex_unlock(&nf_ct_proto_mutex);
synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
void nf_ct_l4proto_pernet_unregister(struct net *net,
const struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
@ -565,6 +395,563 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
static unsigned int ipv4_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out;
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
enum ip_conntrack_info ctinfo;
struct nf_conn *tmpl;
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl && nf_ct_is_template(tmpl)) {
/* when skipping ct, clear templates to avoid fooling
* later targets/matches
*/
skb->_nfct = 0;
nf_ct_put(tmpl);
}
return NF_ACCEPT;
}
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
/* Connection tracking may drop packets, but never alters them, so
* make it the first hook.
*/
static const struct nf_hook_ops ipv4_conntrack_ops[] = {
{
.hook = ipv4_conntrack_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
/* Fast function for those who don't want to parse /proc (and I don't
* blame them).
* Reversing the socket's dst/src point of view gives us the reply
* mapping.
*/
static int
getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
const struct inet_sock *inet = inet_sk(sk);
const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
memset(&tuple, 0, sizeof(tuple));
lock_sock(sk);
tuple.src.u3.ip = inet->inet_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.ip = inet->inet_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.src.l3num = PF_INET;
tuple.dst.protonum = sk->sk_protocol;
release_sock(sk);
/* We only do TCP and SCTP at the moment: is there a better way? */
if (tuple.dst.protonum != IPPROTO_TCP &&
tuple.dst.protonum != IPPROTO_SCTP) {
pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
*len, sizeof(struct sockaddr_in));
return -EINVAL;
}
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
sin.sin_family = AF_INET;
sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u.tcp.port;
sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u3.ip;
memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
&sin.sin_addr.s_addr, ntohs(sin.sin_port));
nf_ct_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
return 0;
}
pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
&tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
&tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
static struct nf_sockopt_ops so_getorigdst = {
.pf = PF_INET,
.get_optmin = SO_ORIGINAL_DST,
.get_optmax = SO_ORIGINAL_DST + 1,
.get = getorigdst,
.owner = THIS_MODULE,
};
#if IS_ENABLED(CONFIG_IPV6)
static int
ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
const struct ipv6_pinfo *inet6 = inet6_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
const struct nf_conntrack_tuple_hash *h;
struct sockaddr_in6 sin6;
struct nf_conn *ct;
__be32 flow_label;
int bound_dev_if;
lock_sock(sk);
tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.in6 = sk->sk_v6_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.dst.protonum = sk->sk_protocol;
bound_dev_if = sk->sk_bound_dev_if;
flow_label = inet6->flow_label;
release_sock(sk);
if (tuple.dst.protonum != IPPROTO_TCP &&
tuple.dst.protonum != IPPROTO_SCTP)
return -ENOPROTOOPT;
if (*len < 0 || (unsigned int)*len < sizeof(sin6))
return -EINVAL;
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (!h) {
pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
&tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
&tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
ct = nf_ct_tuplehash_to_ctrack(h);
sin6.sin6_family = AF_INET6;
sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
memcpy(&sin6.sin6_addr,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
sizeof(sin6.sin6_addr));
nf_ct_put(ct);
sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
}
static struct nf_sockopt_ops so_getorigdst6 = {
.pf = NFPROTO_IPV6,
.get_optmin = IP6T_SO_ORIGINAL_DST,
.get_optmax = IP6T_SO_ORIGINAL_DST + 1,
.get = ipv6_getorigdst,
.owner = THIS_MODULE,
};
static unsigned int ipv6_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned char pnum = ipv6_hdr(skb)->nexthdr;
int protoff;
__be16 frag_off;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
goto out;
}
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
static unsigned int ipv6_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static unsigned int ipv6_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
__be16 frag_off;
int protoff;
u8 nexthdr;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
nexthdr = ipv6_hdr(skb)->nexthdr;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
return helper->help(skb, protoff, ct, ctinfo);
}
static const struct nf_hook_ops ipv6_conntrack_ops[] = {
{
.hook = ipv6_conntrack_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_conntrack_local,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_helper,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_LAST,
},
{
.hook = ipv6_helper,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_LAST - 1,
},
};
#endif
static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
int err = 0;
mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) {
case NFPROTO_IPV4:
cnet->users4++;
if (cnet->users4 > 1)
goto out_unlock;
err = nf_defrag_ipv4_enable(net);
if (err) {
cnet->users4 = 0;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
if (err)
cnet->users4 = 0;
break;
#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
cnet->users6++;
if (cnet->users6 > 1)
goto out_unlock;
err = nf_defrag_ipv6_enable(net);
if (err < 0) {
cnet->users6 = 0;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
if (err)
cnet->users6 = 0;
break;
#endif
default:
err = -EPROTO;
break;
}
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
return err;
}
static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
{
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) {
case NFPROTO_IPV4:
if (cnet->users4 && (--cnet->users4 == 0))
nf_unregister_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
break;
#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
if (cnet->users6 && (--cnet->users6 == 0))
nf_unregister_net_hooks(net, ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
break;
#endif
}
mutex_unlock(&nf_ct_proto_mutex);
}
int nf_ct_netns_get(struct net *net, u8 nfproto)
{
int err;
if (nfproto == NFPROTO_INET) {
err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
if (err < 0)
goto err1;
err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
if (err < 0)
goto err2;
} else {
err = nf_ct_netns_do_get(net, nfproto);
if (err < 0)
goto err1;
}
return 0;
err2:
nf_ct_netns_put(net, NFPROTO_IPV4);
err1:
return err;
}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
void nf_ct_netns_put(struct net *net, uint8_t nfproto)
{
if (nfproto == NFPROTO_INET) {
nf_ct_netns_do_put(net, NFPROTO_IPV4);
nf_ct_netns_do_put(net, NFPROTO_IPV6);
} else {
nf_ct_netns_do_put(net, nfproto);
}
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_icmp,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp4,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp4,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite4,
#endif
#if IS_ENABLED(CONFIG_IPV6)
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite6,
#endif
#endif /* CONFIG_IPV6 */
};
int nf_conntrack_proto_init(void)
{
int ret = 0;
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0)
return ret;
#if IS_ENABLED(CONFIG_IPV6)
ret = nf_register_sockopt(&so_getorigdst6);
if (ret < 0)
goto cleanup_sockopt;
#endif
ret = nf_ct_l4proto_register(builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
if (ret < 0)
goto cleanup_sockopt2;
return ret;
cleanup_sockopt2:
nf_unregister_sockopt(&so_getorigdst);
#if IS_ENABLED(CONFIG_IPV6)
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst6);
#endif
return ret;
}
void nf_conntrack_proto_fini(void)
{
unsigned int i;
nf_ct_l4proto_unregister(builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
nf_unregister_sockopt(&so_getorigdst);
#if IS_ENABLED(CONFIG_IPV6)
nf_unregister_sockopt(&so_getorigdst6);
#endif
/* free l3proto protocol tables */
for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
kfree(nf_ct_protos[i]);
}
int nf_conntrack_proto_pernet_init(struct net *net)
{
int err;
@ -581,6 +968,14 @@ int nf_conntrack_proto_pernet_init(struct net *net)
if (err < 0)
return err;
err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
if (err < 0) {
nf_ct_l4proto_unregister_sysctl(net, pn,
&nf_conntrack_l4proto_generic);
return err;
}
pn->users++;
return 0;
}
@ -590,25 +985,19 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
struct nf_proto_net *pn = nf_ct_l4proto_net(net,
&nf_conntrack_l4proto_generic);
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
pn->users--;
nf_ct_l4proto_unregister_sysctl(net,
pn,
&nf_conntrack_l4proto_generic);
}
int nf_conntrack_proto_init(void)
{
unsigned int i;
for (i = 0; i < NFPROTO_NUMPROTO; i++)
rcu_assign_pointer(nf_ct_l3protos[i],
&nf_conntrack_l3proto_generic);
return 0;
}
void nf_conntrack_proto_fini(void)
{
unsigned int i;
/* free l3proto protocol tables */
for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
kfree(nf_ct_protos[i]);
}
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
MODULE_ALIAS("ip_conntrack");
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");

View File

@ -23,6 +23,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_log.h>
/* Timeouts are based on values from RFC4340:
@ -388,31 +389,8 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
return &net->ct.nf_ct_proto.dccp;
}
static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
struct dccp_hdr _hdr, *dh;
/* Actually only need first 4 bytes to get ports. */
dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (dh == NULL)
return false;
tuple->src.u.dccp.port = dh->dccph_sport;
tuple->dst.u.dccp.port = dh->dccph_dport;
return true;
}
static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
const struct nf_conntrack_tuple *tuple)
{
inv->src.u.dccp.port = tuple->dst.u.dccp.port;
inv->dst.u.dccp.port = tuple->src.u.dccp.port;
return true;
}
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
@ -460,19 +438,14 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
ntohl(dhack->dccph_ack_nr_low);
}
static unsigned int *dccp_get_timeouts(struct net *net)
{
return dccp_pernet(net)->dccp_timeout;
}
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
unsigned int dataoff, enum ip_conntrack_info ctinfo)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
enum ct_dccp_roles role;
unsigned int *timeouts;
dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
BUG_ON(dh == NULL);
@ -546,6 +519,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
if (new_state != old_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
return NF_ACCEPT;
@ -864,11 +840,8 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
.packet = dccp_packet,
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
@ -900,11 +873,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
.packet = dccp_packet,
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS

View File

@ -11,6 +11,7 @@
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_timeout.h>
static const unsigned int nf_ct_generic_timeout = 600*HZ;
@ -41,34 +42,24 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
return true;
}
static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
return true;
}
static unsigned int *generic_get_timeouts(struct net *net)
{
return &(generic_pernet(net)->timeout);
}
/* Returns verdict for packet, or -1 for invalid. */
static int generic_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeout)
enum ip_conntrack_info ctinfo)
{
const unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!timeout)
timeout = &generic_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
bool ret;
@ -87,8 +78,11 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeout = data;
struct nf_generic_net *gn = generic_pernet(net);
unsigned int *timeout = data;
if (!timeout)
timeout = &gn->timeout;
if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
*timeout =
@ -168,9 +162,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
.l3proto = PF_UNSPEC,
.l4proto = 255,
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
.packet = generic_packet,
.get_timeouts = generic_get_timeouts,
.new = generic_new,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
.ctnl_timeout = {

View File

@ -39,6 +39,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
@ -179,15 +180,6 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
/* invert gre part of tuple */
static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->dst.u.gre.key = orig->src.u.gre.key;
tuple->src.u.gre.key = orig->dst.u.gre.key;
return true;
}
/* gre hdr info to tuple */
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
@ -243,8 +235,7 @@ static unsigned int *gre_get_timeouts(struct net *net)
static int gre_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
enum ip_conntrack_info ctinfo)
{
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
@ -263,8 +254,13 @@ static int gre_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = gre_get_timeouts(nf_ct_net(ct));
pr_debug(": ");
nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
@ -300,6 +296,8 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeouts = data;
struct netns_proto_gre *net_gre = gre_pernet(net);
if (!timeouts)
timeouts = gre_get_timeouts(net);
/* set default timeouts for GRE. */
timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
@ -356,11 +354,9 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
.get_timeouts = gre_get_timeouts,
.packet = gre_packet,
.new = gre_new,
.destroy = gre_destroy,

View File

@ -19,6 +19,7 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_log.h>
@ -80,12 +81,16 @@ static unsigned int *icmp_get_timeouts(struct net *net)
static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeout)
enum ip_conntrack_info ctinfo)
{
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
and also to handle correctly ICMP echo reply duplicates. */
unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!timeout)
timeout = icmp_get_timeouts(nf_ct_net(ct));
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
@ -93,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
@ -142,8 +147,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!nf_ct_invert_tuple(&innertuple, &origtuple,
&nf_conntrack_l3proto_ipv4, innerproto)) {
if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
}
@ -281,9 +285,11 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct nf_icmp_net *in = icmp_pernet(net);
if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
if (!timeout)
timeout = &in->timeout;
*timeout =
ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
} else {
} else if (timeout) {
/* Set default ICMP timeout. */
*timeout = in->timeout;
}
@ -358,7 +364,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.packet = icmp_packet,
.get_timeouts = icmp_get_timeouts,
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,

View File

@ -23,6 +23,7 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
#include <net/netfilter/nf_log.h>
@ -93,9 +94,13 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeout)
enum ip_conntrack_info ctinfo)
{
unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!timeout)
timeout = icmpv6_get_timeouts(nf_ct_net(ct));
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
and also to handle correctly ICMP echo reply duplicates. */
@ -106,7 +111,7 @@ static int icmpv6_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMPV6_ECHO_REQUEST - 128] = 1,
@ -152,8 +157,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!nf_ct_invert_tuple(&intuple, &origtuple,
&nf_conntrack_l3proto_ipv6, inproto)) {
if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
pr_debug("icmpv6_error: Can't invert tuple\n");
return -NF_ACCEPT;
}
@ -281,6 +285,8 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeout = data;
struct nf_icmp_net *in = icmpv6_pernet(net);
if (!timeout)
timeout = icmpv6_get_timeouts(net);
if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
*timeout =
ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
@ -359,7 +365,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
.packet = icmpv6_packet,
.get_timeouts = icmpv6_get_timeouts,
.new = icmpv6_new,
.error = icmpv6_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)

View File

@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
@ -150,30 +151,6 @@ static inline struct nf_sctp_net *sctp_pernet(struct net *net)
return &net->ct.nf_ct_proto.sctp;
}
static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct sctphdr *hp;
struct sctphdr _hdr;
/* Actually only need first 4 bytes to get ports. */
hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
tuple->src.u.sctp.port = hp->source;
tuple->dst.u.sctp.port = hp->dest;
return true;
}
static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.sctp.port = orig->dst.u.sctp.port;
tuple->dst.u.sctp.port = orig->src.u.sctp.port;
return true;
}
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@ -296,17 +273,11 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
return sctp_conntracks[dir][i][cur_state];
}
static unsigned int *sctp_get_timeouts(struct net *net)
{
return sctp_pernet(net)->timeouts;
}
/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
static int sctp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
enum ip_conntrack_info ctinfo)
{
enum sctp_conntrack new_state, old_state;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@ -315,6 +286,7 @@ static int sctp_packet(struct nf_conn *ct,
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
u_int32_t offset, count;
unsigned int *timeouts;
unsigned long map[256 / sizeof(unsigned long)] = { 0 };
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
@ -403,6 +375,10 @@ static int sctp_packet(struct nf_conn *ct,
}
spin_unlock_bh(&ct->lock);
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
@ -423,7 +399,7 @@ out:
/* Called when a new connection for this protocol found. */
static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
enum sctp_conntrack new_state;
const struct sctphdr *sh;
@ -780,13 +756,10 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,
@ -817,13 +790,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,

View File

@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@ -276,31 +277,6 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
return &net->ct.nf_ct_proto.tcp;
}
static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct tcphdr *hp;
struct tcphdr _hdr;
/* Actually only need first 4 bytes to get ports. */
hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
tuple->src.u.tcp.port = hp->source;
tuple->dst.u.tcp.port = hp->dest;
return true;
}
static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.tcp.port = orig->dst.u.tcp.port;
tuple->dst.u.tcp.port = orig->src.u.tcp.port;
return true;
}
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@ -793,27 +769,21 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
static unsigned int *tcp_get_timeouts(struct net *net)
{
return tcp_pernet(net)->timeouts;
}
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
enum ip_conntrack_info ctinfo)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
unsigned int index, *timeouts;
enum ip_conntrack_dir dir;
const struct tcphdr *th;
struct tcphdr _tcph;
unsigned long timeout;
unsigned int index;
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
BUG_ON(th == NULL);
@ -1046,6 +1016,10 @@ static int tcp_packet(struct nf_conn *ct,
&& new_state == TCP_CONNTRACK_FIN_WAIT)
ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = tn->timeouts;
if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
timeout = timeouts[TCP_CONNTRACK_RETRANS];
@ -1095,7 +1069,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
enum tcp_conntrack new_state;
const struct tcphdr *th;
@ -1313,10 +1287,12 @@ static unsigned int tcp_nlattr_tuple_size(void)
static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
struct nf_tcp_net *tn = tcp_pernet(net);
unsigned int *timeouts = data;
int i;
if (!timeouts)
timeouts = tn->timeouts;
/* set default TCP timeouts. */
for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
timeouts[i] = tn->timeouts[i];
@ -1559,13 +1535,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
.error = tcp_error,
.can_early_drop = tcp_can_early_drop,
@ -1597,13 +1570,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
.error = tcp_error,
.can_early_drop = tcp_can_early_drop,

View File

@ -22,6 +22,7 @@
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@ -36,33 +37,6 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
return &net->ct.nf_ct_proto.udp;
}
static bool udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct net *net,
struct nf_conntrack_tuple *tuple)
{
const struct udphdr *hp;
struct udphdr _hdr;
/* Actually only need first 4 bytes to get ports. */
hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
tuple->src.u.udp.port = hp->source;
tuple->dst.u.udp.port = hp->dest;
return true;
}
static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.udp.port = orig->dst.u.udp.port;
tuple->dst.u.udp.port = orig->src.u.udp.port;
return true;
}
static unsigned int *udp_get_timeouts(struct net *net)
{
return udp_pernet(net)->timeouts;
@ -72,9 +46,14 @@ static unsigned int *udp_get_timeouts(struct net *net)
static int udp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
enum ip_conntrack_info ctinfo)
{
unsigned int *timeouts;
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@ -92,7 +71,7 @@ static int udp_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
return true;
}
@ -203,6 +182,9 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeouts = data;
struct nf_udp_net *un = udp_pernet(net);
if (!timeouts)
timeouts = un->timeouts;
/* set default timeouts for UDP. */
timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED];
timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED];
@ -301,10 +283,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udp_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@ -333,10 +312,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udplite_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@ -365,10 +341,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udp_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@ -397,10 +370,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udplite_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@ -423,3 +393,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
#endif
#include <net/netfilter/nf_conntrack_timeout.h>

View File

@ -1,12 +1,4 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2005-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/slab.h>
@ -24,7 +16,6 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
@ -33,15 +24,14 @@
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <linux/rculist_nulls.h>
MODULE_LICENSE("GPL");
unsigned int nf_conntrack_net_id __read_mostly;
#ifdef CONFIG_NF_CONNTRACK_PROCFS
void
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
switch (l3proto->l3proto) {
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
seq_printf(s, "src=%pI4 dst=%pI4 ",
&tuple->src.u3.ip, &tuple->dst.u3.ip);
@ -282,7 +272,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_tuple_hash *hash = v;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct net *net = seq_file_net(s);
int ret = 0;
@ -303,14 +292,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!net_eq(nf_ct_net(ct), net))
goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
WARN_ON(!l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
WARN_ON(!l4proto);
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u ",
l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct),
l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
@ -320,7 +307,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
l4proto->print_conntrack(s, ct);
print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto);
l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
@ -333,8 +320,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
seq_puts(s, "[UNREPLIED] ");
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto);
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
@ -680,6 +666,8 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
static struct pernet_operations nf_conntrack_net_ops = {
.init = nf_conntrack_pernet_init,
.exit_batch = nf_conntrack_pernet_exit,
.id = &nf_conntrack_net_id,
.size = sizeof(struct nf_conntrack_net),
};
static int __init nf_conntrack_standalone_init(void)

View File

@ -107,11 +107,12 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
tcp->seen[1].td_maxwin = 0;
}
#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
unsigned int *timeouts;
unsigned int timeout;
int l4num;
@ -123,14 +124,10 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
if (!l4proto)
return;
timeouts = l4proto->get_timeouts(net);
if (!timeouts)
return;
if (l4num == IPPROTO_TCP)
timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
else if (l4num == IPPROTO_UDP)
timeout = timeouts[UDP_CT_REPLIED];
timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
else
return;

View File

@ -28,7 +28,6 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
@ -743,12 +742,6 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
{
int err;
err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
if (err < 0)
return err;
mutex_lock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
&nf_nat_l4proto_tcp);
@ -781,7 +774,6 @@ void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
synchronize_rcu();
nf_nat_l3proto_clean(l3proto->l3proto);
nf_ct_l3proto_module_put(l3proto->l3proto);
}
EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);

View File

@ -21,15 +21,14 @@
#include <linux/netfilter/nf_osf.h>
static inline int nf_osf_ttl(const struct sk_buff *skb,
const struct nf_osf_info *info,
unsigned char f_ttl)
int ttl_check, unsigned char f_ttl)
{
const struct iphdr *ip = ip_hdr(skb);
if (info->flags & NF_OSF_TTL) {
if (info->ttl == NF_OSF_TTL_TRUE)
if (ttl_check != -1) {
if (ttl_check == NF_OSF_TTL_TRUE)
return ip->ttl == f_ttl;
if (info->ttl == NF_OSF_TTL_NOCHECK)
if (ttl_check == NF_OSF_TTL_NOCHECK)
return 1;
else if (ip->ttl <= f_ttl)
return 1;
@ -52,139 +51,174 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
return ip->ttl == f_ttl;
}
struct nf_osf_hdr_ctx {
bool df;
u16 window;
u16 totlen;
const unsigned char *optp;
unsigned int optsize;
};
static bool nf_osf_match_one(const struct sk_buff *skb,
const struct nf_osf_user_finger *f,
int ttl_check,
struct nf_osf_hdr_ctx *ctx)
{
unsigned int check_WSS = 0;
int fmatch = FMATCH_WRONG;
int foptsize, optnum;
u16 mss = 0;
if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl))
return false;
/*
* Should not happen if userspace parser was written correctly.
*/
if (f->wss.wc >= OSF_WSS_MAX)
return false;
/* Check options */
foptsize = 0;
for (optnum = 0; optnum < f->opt_num; ++optnum)
foptsize += f->opt[optnum].length;
if (foptsize > MAX_IPOPTLEN ||
ctx->optsize > MAX_IPOPTLEN ||
ctx->optsize != foptsize)
return false;
check_WSS = f->wss.wc;
for (optnum = 0; optnum < f->opt_num; ++optnum) {
if (f->opt[optnum].kind == *ctx->optp) {
__u32 len = f->opt[optnum].length;
const __u8 *optend = ctx->optp + len;
fmatch = FMATCH_OK;
switch (*ctx->optp) {
case OSFOPT_MSS:
mss = ctx->optp[3];
mss <<= 8;
mss |= ctx->optp[2];
mss = ntohs((__force __be16)mss);
break;
case OSFOPT_TS:
break;
}
ctx->optp = optend;
} else
fmatch = FMATCH_OPT_WRONG;
if (fmatch != FMATCH_OK)
break;
}
if (fmatch != FMATCH_OPT_WRONG) {
fmatch = FMATCH_WRONG;
switch (check_WSS) {
case OSF_WSS_PLAIN:
if (f->wss.val == 0 || ctx->window == f->wss.val)
fmatch = FMATCH_OK;
break;
case OSF_WSS_MSS:
/*
* Some smart modems decrease mangle MSS to
* SMART_MSS_2, so we check standard, decreased
* and the one provided in the fingerprint MSS
* values.
*/
#define SMART_MSS_1 1460
#define SMART_MSS_2 1448
if (ctx->window == f->wss.val * mss ||
ctx->window == f->wss.val * SMART_MSS_1 ||
ctx->window == f->wss.val * SMART_MSS_2)
fmatch = FMATCH_OK;
break;
case OSF_WSS_MTU:
if (ctx->window == f->wss.val * (mss + 40) ||
ctx->window == f->wss.val * (SMART_MSS_1 + 40) ||
ctx->window == f->wss.val * (SMART_MSS_2 + 40))
fmatch = FMATCH_OK;
break;
case OSF_WSS_MODULO:
if ((ctx->window % f->wss.val) == 0)
fmatch = FMATCH_OK;
break;
}
}
return fmatch == FMATCH_OK;
}
static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
const struct sk_buff *skb,
const struct iphdr *ip,
unsigned char *opts)
{
const struct tcphdr *tcp;
struct tcphdr _tcph;
tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
if (!tcp)
return NULL;
if (!tcp->syn)
return NULL;
ctx->totlen = ntohs(ip->tot_len);
ctx->df = ntohs(ip->frag_off) & IP_DF;
ctx->window = ntohs(tcp->window);
if (tcp->doff * 4 > sizeof(struct tcphdr)) {
ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr);
ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
sizeof(struct tcphdr), ctx->optsize, opts);
}
return tcp;
}
bool
nf_osf_match(const struct sk_buff *skb, u_int8_t family,
int hooknum, struct net_device *in, struct net_device *out,
const struct nf_osf_info *info, struct net *net,
const struct list_head *nf_osf_fingers)
{
const unsigned char *optp = NULL, *_optp = NULL;
unsigned int optsize = 0, check_WSS = 0;
int fmatch = FMATCH_WRONG, fcount = 0;
const struct iphdr *ip = ip_hdr(skb);
const struct nf_osf_user_finger *f;
unsigned char opts[MAX_IPOPTLEN];
const struct nf_osf_finger *kf;
u16 window, totlen, mss = 0;
int fcount = 0, ttl_check;
int fmatch = FMATCH_WRONG;
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
struct tcphdr _tcph;
bool df;
tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
memset(&ctx, 0, sizeof(ctx));
tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
if (!tcp)
return false;
if (!tcp->syn)
return false;
ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
totlen = ntohs(ip->tot_len);
df = ntohs(ip->frag_off) & IP_DF;
window = ntohs(tcp->window);
if (tcp->doff * 4 > sizeof(struct tcphdr)) {
optsize = tcp->doff * 4 - sizeof(struct tcphdr);
_optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
sizeof(struct tcphdr), optsize, opts);
}
list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) {
int foptsize, optnum;
list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
f = &kf->finger;
if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
continue;
optp = _optp;
fmatch = FMATCH_WRONG;
if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl))
if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
continue;
/*
* Should not happen if userspace parser was written correctly.
*/
if (f->wss.wc >= OSF_WSS_MAX)
continue;
/* Check options */
foptsize = 0;
for (optnum = 0; optnum < f->opt_num; ++optnum)
foptsize += f->opt[optnum].length;
if (foptsize > MAX_IPOPTLEN ||
optsize > MAX_IPOPTLEN ||
optsize != foptsize)
continue;
check_WSS = f->wss.wc;
for (optnum = 0; optnum < f->opt_num; ++optnum) {
if (f->opt[optnum].kind == (*optp)) {
__u32 len = f->opt[optnum].length;
const __u8 *optend = optp + len;
fmatch = FMATCH_OK;
switch (*optp) {
case OSFOPT_MSS:
mss = optp[3];
mss <<= 8;
mss |= optp[2];
mss = ntohs((__force __be16)mss);
break;
case OSFOPT_TS:
break;
}
optp = optend;
} else
fmatch = FMATCH_OPT_WRONG;
if (fmatch != FMATCH_OK)
break;
}
if (fmatch != FMATCH_OPT_WRONG) {
fmatch = FMATCH_WRONG;
switch (check_WSS) {
case OSF_WSS_PLAIN:
if (f->wss.val == 0 || window == f->wss.val)
fmatch = FMATCH_OK;
break;
case OSF_WSS_MSS:
/*
* Some smart modems decrease mangle MSS to
* SMART_MSS_2, so we check standard, decreased
* and the one provided in the fingerprint MSS
* values.
*/
#define SMART_MSS_1 1460
#define SMART_MSS_2 1448
if (window == f->wss.val * mss ||
window == f->wss.val * SMART_MSS_1 ||
window == f->wss.val * SMART_MSS_2)
fmatch = FMATCH_OK;
break;
case OSF_WSS_MTU:
if (window == f->wss.val * (mss + 40) ||
window == f->wss.val * (SMART_MSS_1 + 40) ||
window == f->wss.val * (SMART_MSS_2 + 40))
fmatch = FMATCH_OK;
break;
case OSF_WSS_MODULO:
if ((window % f->wss.val) == 0)
fmatch = FMATCH_OK;
break;
}
}
if (fmatch != FMATCH_OK)
continue;
fmatch = FMATCH_OK;
fcount++;

View File

@ -455,20 +455,59 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
return NULL;
}
/*
* Loading a module requires dropping mutex that guards the
* transaction.
* We first need to abort any pending transactions as once
* mutex is unlocked a different client could start a new
* transaction. It must not see any 'future generation'
* changes * as these changes will never happen.
*/
#ifdef CONFIG_MODULES
static int __nf_tables_abort(struct net *net);
static void nft_request_module(struct net *net, const char *fmt, ...)
{
char module_name[MODULE_NAME_LEN];
va_list args;
int ret;
__nf_tables_abort(net);
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret))
return;
mutex_unlock(&net->nft.commit_mutex);
request_module("%s", module_name);
mutex_lock(&net->nft.commit_mutex);
}
#endif
static void lockdep_nfnl_nft_mutex_not_held(void)
{
#ifdef CONFIG_PROVE_LOCKING
WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
#endif
}
static const struct nft_chain_type *
nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
u8 family, bool autoload)
{
const struct nft_chain_type *type;
type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return type;
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (autoload) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-chain-%u-%.*s", family,
nla_len(nla), (const char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
nft_request_module(net, "nft-chain-%u-%.*s", family,
nla_len(nla), (const char *)nla_data(nla));
type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return ERR_PTR(-EAGAIN);
@ -772,6 +811,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct nft_ctx ctx;
int err;
lockdep_assert_held(&net->nft.commit_mutex);
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask);
if (IS_ERR(table)) {
@ -1012,7 +1052,17 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
return ERR_PTR(-ENOENT);
}
static struct nft_chain *nft_chain_lookup(struct nft_table *table,
static bool lockdep_commit_lock_is_held(struct net *net)
{
#ifdef CONFIG_PROVE_LOCKING
return lockdep_is_held(&net->nft.commit_mutex);
#else
return true;
#endif
}
static struct nft_chain *nft_chain_lookup(struct net *net,
struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
char search[NFT_CHAIN_MAXNAMELEN + 1];
@ -1025,7 +1075,7 @@ static struct nft_chain *nft_chain_lookup(struct nft_table *table,
nla_strlcpy(search, nla, sizeof(search));
WARN_ON(!rcu_read_lock_held() &&
!lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
!lockdep_commit_lock_is_held(net));
chain = ERR_PTR(-ENOENT);
rcu_read_lock();
@ -1265,7 +1315,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return PTR_ERR(chain);
@ -1398,6 +1448,9 @@ static int nft_chain_parse_hook(struct net *net,
struct net_device *dev;
int err;
lockdep_assert_held(&net->nft.commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
nft_hook_policy, NULL);
if (err < 0)
@ -1412,7 +1465,7 @@ static int nft_chain_parse_hook(struct net *net,
type = chain_type[family][NFT_CHAIN_T_DEFAULT];
if (nla[NFTA_CHAIN_TYPE]) {
type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
family, create);
if (IS_ERR(type))
return PTR_ERR(type);
@ -1632,7 +1685,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nla[NFTA_CHAIN_NAME]) {
struct nft_chain *chain2;
chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
chain2 = nft_chain_lookup(ctx->net, table,
nla[NFTA_CHAIN_NAME], genmask);
if (!IS_ERR(chain2))
return -EEXIST;
}
@ -1694,6 +1748,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
lockdep_assert_held(&net->nft.commit_mutex);
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
@ -1712,7 +1768,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
attr = nla[NFTA_CHAIN_HANDLE];
} else {
chain = nft_chain_lookup(table, attr, genmask);
chain = nft_chain_lookup(net, table, attr, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT) {
NL_SET_BAD_ATTR(extack, attr);
@ -1790,7 +1846,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
chain = nft_chain_lookup_byhandle(table, handle, genmask);
} else {
attr = nla[NFTA_CHAIN_NAME];
chain = nft_chain_lookup(table, attr, genmask);
chain = nft_chain_lookup(net, table, attr, genmask);
}
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, attr);
@ -1875,7 +1931,8 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
return NULL;
}
static const struct nft_expr_type *nft_expr_type_get(u8 family,
static const struct nft_expr_type *nft_expr_type_get(struct net *net,
u8 family,
struct nlattr *nla)
{
const struct nft_expr_type *type;
@ -1887,19 +1944,16 @@ static const struct nft_expr_type *nft_expr_type_get(u8 family,
if (type != NULL && try_module_get(type->owner))
return type;
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-expr-%u-%.*s", family,
nla_len(nla), (char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
nft_request_module(net, "nft-expr-%u-%.*s", family,
nla_len(nla), (char *)nla_data(nla));
if (__nft_expr_type_get(family, nla))
return ERR_PTR(-EAGAIN);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-expr-%.*s",
nla_len(nla), (char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
nft_request_module(net, "nft-expr-%.*s",
nla_len(nla), (char *)nla_data(nla));
if (__nft_expr_type_get(family, nla))
return ERR_PTR(-EAGAIN);
}
@ -1968,7 +2022,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (err < 0)
return err;
type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]);
if (IS_ERR(type))
return PTR_ERR(type);
@ -2325,7 +2379,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
@ -2359,6 +2413,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
{
struct nft_expr *expr;
lockdep_assert_held(&ctx->net->nft.commit_mutex);
/*
* Careful: some expressions might not be initialized in case this
* is called on error from nf_tables_newrule().
@ -2427,8 +2482,6 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
#define NFT_RULE_MAXEXPRS 128
static struct nft_expr_info *info;
static int nf_tables_newrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@ -2436,6 +2489,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
struct nft_expr_info *info = NULL;
int family = nfmsg->nfgen_family;
struct nft_table *table;
struct nft_chain *chain;
@ -2450,6 +2504,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
bool create;
u64 handle, pos_handle;
lockdep_assert_held(&net->nft.commit_mutex);
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
@ -2458,7 +2514,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
@ -2506,6 +2562,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
n = 0;
size = 0;
if (nla[NFTA_RULE_EXPRESSIONS]) {
info = kvmalloc_array(NFT_RULE_MAXEXPRS,
sizeof(struct nft_expr_info),
GFP_KERNEL);
if (!info)
return -ENOMEM;
nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
err = -EINVAL;
if (nla_type(tmp) != NFTA_LIST_ELEM)
@ -2598,6 +2660,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
list_add_rcu(&rule->list, &chain->rules);
}
}
kvfree(info);
chain->use++;
if (net->nft.validate_state == NFT_VALIDATE_DO)
@ -2611,6 +2674,7 @@ err1:
if (info[i].ops != NULL)
module_put(info[i].ops->type->owner);
}
kvfree(info);
return err;
}
@ -2650,7 +2714,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
}
if (nla[NFTA_RULE_CHAIN]) {
chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
@ -2742,11 +2807,11 @@ nft_select_set_ops(const struct nft_ctx *ctx,
const struct nft_set_type *type;
u32 flags = 0;
lockdep_assert_held(&ctx->net->nft.commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (list_empty(&nf_tables_set_types)) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-set");
nfnl_lock(NFNL_SUBSYS_NFTABLES);
nft_request_module(ctx->net, "nft-set");
if (!list_empty(&nf_tables_set_types))
return ERR_PTR(-EAGAIN);
}
@ -4779,7 +4844,8 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
return NULL;
}
static const struct nft_object_type *nft_obj_type_get(u32 objtype)
static const struct nft_object_type *
nft_obj_type_get(struct net *net, u32 objtype)
{
const struct nft_object_type *type;
@ -4787,11 +4853,10 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
if (type != NULL && try_module_get(type->owner))
return type;
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-obj-%u", objtype);
nfnl_lock(NFNL_SUBSYS_NFTABLES);
nft_request_module(net, "nft-obj-%u", objtype);
if (__nft_obj_type_get(objtype))
return ERR_PTR(-EAGAIN);
}
@ -4843,7 +4908,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
type = nft_obj_type_get(objtype);
type = nft_obj_type_get(net, objtype);
if (IS_ERR(type))
return PTR_ERR(type);
@ -5339,7 +5404,8 @@ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
return NULL;
}
static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
static const struct nf_flowtable_type *
nft_flowtable_type_get(struct net *net, u8 family)
{
const struct nf_flowtable_type *type;
@ -5347,11 +5413,10 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
if (type != NULL && try_module_get(type->owner))
return type;
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nf-flowtable-%u", family);
nfnl_lock(NFNL_SUBSYS_NFTABLES);
nft_request_module(net, "nf-flowtable-%u", family);
if (__nft_flowtable_type_get(family))
return ERR_PTR(-EAGAIN);
}
@ -5431,7 +5496,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err1;
}
type = nft_flowtable_type_get(family);
type = nft_flowtable_type_get(net, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err2;
@ -6202,9 +6267,9 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha
next_genbit = nft_gencursor_next(net);
g0 = rcu_dereference_protected(chain->rules_gen_0,
lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
lockdep_commit_lock_is_held(net));
g1 = rcu_dereference_protected(chain->rules_gen_1,
lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
lockdep_commit_lock_is_held(net));
/* No changes to this chain? */
if (chain->rules_next == NULL) {
@ -6412,6 +6477,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_commit_release(net);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
mutex_unlock(&net->nft.commit_mutex);
return 0;
}
@ -6563,12 +6629,25 @@ static void nf_tables_cleanup(struct net *net)
static int nf_tables_abort(struct net *net, struct sk_buff *skb)
{
return __nf_tables_abort(net);
int ret = __nf_tables_abort(net);
mutex_unlock(&net->nft.commit_mutex);
return ret;
}
static bool nf_tables_valid_genid(struct net *net, u32 genid)
{
return net->nft.base_seq == genid;
bool genid_ok;
mutex_lock(&net->nft.commit_mutex);
genid_ok = genid == 0 || net->nft.base_seq == genid;
if (!genid_ok)
mutex_unlock(&net->nft.commit_mutex);
/* else, commit mutex has to be released by commit or abort function */
return genid_ok;
}
static const struct nfnetlink_subsystem nf_tables_subsys = {
@ -6580,6 +6659,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
.abort = nf_tables_abort,
.cleanup = nf_tables_cleanup,
.valid_genid = nf_tables_valid_genid,
.owner = THIS_MODULE,
};
int nft_chain_validate_dependency(const struct nft_chain *chain,
@ -6906,8 +6986,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
case NFT_GOTO:
if (!tb[NFTA_VERDICT_CHAIN])
return -EINVAL;
chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
genmask);
chain = nft_chain_lookup(ctx->net, ctx->table,
tb[NFTA_VERDICT_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
@ -7152,6 +7232,7 @@ static int __net_init nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.tables);
INIT_LIST_HEAD(&net->nft.commit_list);
mutex_init(&net->nft.commit_mutex);
net->nft.base_seq = 1;
net->nft.validate_state = NFT_VALIDATE_SKIP;
@ -7160,11 +7241,11 @@ static int __net_init nf_tables_init_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
mutex_lock(&net->nft.commit_mutex);
if (!list_empty(&net->nft.commit_list))
__nf_tables_abort(net);
__nft_release_tables(net);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
}
@ -7179,29 +7260,19 @@ static int __init nf_tables_module_init(void)
nft_chain_filter_init();
info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info),
GFP_KERNEL);
if (info == NULL) {
err = -ENOMEM;
goto err1;
}
err = nf_tables_core_module_init();
if (err < 0)
goto err2;
return err;
err = nfnetlink_subsys_register(&nf_tables_subsys);
if (err < 0)
goto err3;
goto err;
register_netdevice_notifier(&nf_tables_flowtable_notifier);
return register_pernet_subsys(&nf_tables_net_ops);
err3:
err:
nf_tables_core_module_exit();
err2:
kfree(info);
err1:
return err;
}
@ -7213,7 +7284,6 @@ static void __exit nf_tables_module_exit(void)
unregister_pernet_subsys(&nf_tables_net_ops);
rcu_barrier();
nf_tables_core_module_exit();
kfree(info);
}
module_init(nf_tables_module_init);

View File

@ -331,18 +331,27 @@ replay:
}
}
if (!ss->commit || !ss->abort) {
if (!ss->valid_genid || !ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
if (!try_module_get(ss->owner)) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
if (!ss->valid_genid(net, genid)) {
module_put(ss->owner);
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -ERESTART, NULL);
return kfree_skb(skb);
}
nfnl_unlock(subsys_id);
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
@ -464,14 +473,10 @@ ack:
}
done:
if (status & NFNL_BATCH_REPLAY) {
const struct nfnetlink_subsystem *ss2;
ss2 = nfnl_dereference_protected(subsys_id);
if (ss2 == ss)
ss->abort(net, oskb);
ss->abort(net, oskb);
nfnl_err_reset(&err_list);
nfnl_unlock(subsys_id);
kfree_skb(skb);
module_put(ss->owner);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
err = ss->commit(net, oskb);
@ -489,8 +494,8 @@ done:
ss->cleanup(net);
nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id);
kfree_skb(skb);
module_put(ss->owner);
}
static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {

View File

@ -26,7 +26,6 @@
#include <net/sock.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_timeout.h>
@ -47,7 +46,7 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
};
static int
ctnl_timeout_parse_policy(void *timeouts,
ctnl_timeout_parse_policy(void *timeout,
const struct nf_conntrack_l4proto *l4proto,
struct net *net, const struct nlattr *attr)
{
@ -68,7 +67,7 @@ ctnl_timeout_parse_policy(void *timeouts,
if (ret < 0)
goto err;
ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
err:
kfree(tb);
@ -373,7 +372,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
struct netlink_ext_ack *extack)
{
const struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
__u16 l3num;
__u8 l4num;
int ret;
@ -393,9 +391,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
goto err;
}
timeouts = l4proto->get_timeouts(net);
ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
cda[CTA_TIMEOUT_DATA]);
if (ret < 0)
goto err;
@ -432,7 +428,6 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
struct nlattr *nest_parms;
unsigned int *timeouts = l4proto->get_timeouts(net);
int ret;
nest_parms = nla_nest_start(skb,
@ -440,7 +435,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
if (!nest_parms)
goto nla_put_failure;
ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
if (ret < 0)
goto nla_put_failure;

View File

@ -322,7 +322,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
if (!ctx.net)
return NOTIFY_DONE;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
mutex_lock(&ctx.net->nft.commit_mutex);
list_for_each_entry(table, &ctx.net->nft.tables, list) {
if (table->family != NFPROTO_NETDEV)
continue;
@ -337,7 +337,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
nft_netdev_event(event, dev, &ctx);
}
}
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
mutex_unlock(&ctx.net->nft.commit_mutex);
put_net(ctx.net);
return NOTIFY_DONE;

View File

@ -14,10 +14,9 @@
#include <net/netfilter/nf_conntrack_zones.h>
struct nft_connlimit {
spinlock_t lock;
struct hlist_head hhead;
u32 limit;
bool invert;
struct nf_conncount_list list;
u32 limit;
bool invert;
};
static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
@ -45,21 +44,19 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
return;
}
spin_lock_bh(&priv->lock);
count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
&addit);
nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
&addit);
count = priv->list.count;
if (!addit)
goto out;
if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) {
if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
regs->verdict.code = NF_DROP;
spin_unlock_bh(&priv->lock);
return;
}
count++;
out:
spin_unlock_bh(&priv->lock);
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
@ -87,8 +84,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
invert = true;
}
spin_lock_init(&priv->lock);
INIT_HLIST_HEAD(&priv->hhead);
nf_conncount_list_init(&priv->list);
priv->limit = limit;
priv->invert = invert;
@ -99,7 +95,7 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
struct nft_connlimit *priv)
{
nf_ct_netns_put(ctx->net, ctx->family);
nf_conncount_cache_free(&priv->hhead);
nf_conncount_cache_free(&priv->list);
}
static int nft_connlimit_do_dump(struct sk_buff *skb,
@ -212,8 +208,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
spin_lock_init(&priv_dst->lock);
INIT_HLIST_HEAD(&priv_dst->hhead);
nf_conncount_list_init(&priv_dst->list);
priv_dst->limit = priv_src->limit;
priv_dst->invert = priv_src->invert;
@ -225,21 +220,14 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nf_conncount_cache_free(&priv->hhead);
nf_conncount_cache_free(&priv->list);
}
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
bool addit, ret;
spin_lock_bh(&priv->lock);
nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
ret = hlist_empty(&priv->hhead);
spin_unlock_bh(&priv->lock);
return ret;
return nf_conncount_gc_list(net, &priv->list);
}
static struct nft_expr_type nft_connlimit_type;

View File

@ -870,7 +870,7 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj,
if (test_bit(IPS_HELPER_BIT, &ct->status))
return;
help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help) {
rcu_assign_pointer(help->helper, to_assign);
set_bit(IPS_HELPER_BIT, &ct->status);

View File

@ -118,6 +118,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
u64 timeout;
int err;
lockdep_assert_held(&ctx->net->nft.commit_mutex);
if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
tb[NFTA_DYNSET_OP] == NULL ||
tb[NFTA_DYNSET_SREG_KEY] == NULL)

View File

@ -31,7 +31,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
case NFPROTO_IPV4:
sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
break;
#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
break;
@ -43,7 +43,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
}
if (!sk) {
nft_reg_store8(dest, 0);
regs->verdict.code = NFT_BREAK;
return;
}
@ -54,6 +54,14 @@ static void nft_socket_eval(const struct nft_expr *expr,
case NFT_SOCKET_TRANSPARENT:
nft_reg_store8(dest, inet_sk_transparent(sk));
break;
case NFT_SOCKET_MARK:
if (sk_fullsock(sk)) {
*dest = sk->sk_mark;
} else {
regs->verdict.code = NFT_BREAK;
return;
}
break;
default:
WARN_ON(1);
regs->verdict.code = NFT_BREAK;
@ -77,7 +85,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
switch(ctx->family) {
case NFPROTO_IPV4:
#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
#endif
case NFPROTO_INET:
@ -91,6 +99,9 @@ static int nft_socket_init(const struct nft_ctx *ctx,
case NFT_SOCKET_TRANSPARENT:
len = sizeof(u8);
break;
case NFT_SOCKET_MARK:
len = sizeof(u32);
break;
default:
return -EOPNOTSUPP;
}

View File

@ -1,14 +1,128 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_queue.h>
#include <net/ip6_checksum.h>
#ifdef CONFIG_INET
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u8 protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
break;
if ((protocol == 0 && !csum_fold(skb->csum)) ||
!csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - dataoff, protocol,
skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
/* fall through */
case CHECKSUM_NONE:
if (protocol == 0)
skb->csum = 0;
else
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len - dataoff,
protocol, 0);
csum = __skb_checksum_complete(skb);
}
return csum;
}
EXPORT_SYMBOL(nf_ip_checksum);
#endif
static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u8 protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip_checksum(skb, hook, dataoff, protocol);
/* fall through */
case CHECKSUM_NONE:
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
skb->len - dataoff, 0);
skb->ip_summed = CHECKSUM_NONE;
return __skb_checksum_complete_head(skb, dataoff + len);
}
return csum;
}
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u8 protocol)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
break;
if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
skb->len - dataoff, protocol,
csum_sub(skb->csum,
skb_checksum(skb, 0,
dataoff, 0)))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
/* fall through */
case CHECKSUM_NONE:
skb->csum = ~csum_unfold(
csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
skb->len - dataoff,
protocol,
csum_sub(0,
skb_checksum(skb, 0,
dataoff, 0))));
csum = __skb_checksum_complete(skb);
}
return csum;
}
EXPORT_SYMBOL(nf_ip6_checksum);
static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u8 protocol)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__wsum hsum;
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip6_checksum(skb, hook, dataoff, protocol);
/* fall through */
case CHECKSUM_NONE:
hsum = skb_checksum(skb, 0, dataoff, 0);
skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
&ip6h->daddr,
skb->len - dataoff,
protocol,
csum_sub(0, hsum)));
skb->ip_summed = CHECKSUM_NONE;
return __skb_checksum_complete_head(skb, dataoff + len);
}
return csum;
};
__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol,
unsigned int dataoff, u8 protocol,
unsigned short family)
{
const struct nf_ipv6_ops *v6ops;
__sum16 csum = 0;
switch (family) {
@ -16,9 +130,7 @@ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
csum = nf_ip_checksum(skb, hook, dataoff, protocol);
break;
case AF_INET6:
v6ops = rcu_dereference(nf_ipv6_ops);
if (v6ops)
csum = v6ops->checksum(skb, hook, dataoff, protocol);
csum = nf_ip6_checksum(skb, hook, dataoff, protocol);
break;
}
@ -28,9 +140,8 @@ EXPORT_SYMBOL_GPL(nf_checksum);
__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol, unsigned short family)
u8 protocol, unsigned short family)
{
const struct nf_ipv6_ops *v6ops;
__sum16 csum = 0;
switch (family) {
@ -39,10 +150,8 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
protocol);
break;
case AF_INET6:
v6ops = rcu_dereference(nf_ipv6_ops);
if (v6ops)
csum = v6ops->checksum_partial(skb, hook, dataoff, len,
protocol);
csum = nf_ip6_checksum_partial(skb, hook, dataoff, len,
protocol);
break;
}

View File

@ -93,7 +93,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
return -ENOENT;
}
help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
if (help == NULL) {
nf_conntrack_helper_put(helper);
return -ENOMEM;

View File

@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
#if IS_ENABLED(CONFIG_IPV6)
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
@ -141,7 +141,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
},
#if IS_ENABLED(CONFIG_IPV6)
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "TEE",
.revision = 1,

View File

@ -36,15 +36,6 @@
#include <net/netfilter/nf_tproxy.h>
#include <linux/netfilter/xt_TPROXY.h>
/* assign a socket to the skb -- consumes sk */
static void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_edemux;
}
static unsigned int
tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)

View File

@ -26,6 +26,7 @@
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/ipv6_frag.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <linux/netfilter/nf_nat.h>
@ -607,23 +608,12 @@ static struct nf_conn *
ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
u8 l3num, struct sk_buff *skb, bool natted)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
unsigned int dataoff;
u8 protonum;
l3proto = __nf_ct_l3proto_find(l3num);
if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
&protonum) <= 0) {
pr_debug("ovs_ct_find_existing: Can't get protonum\n");
return NULL;
}
l4proto = __nf_ct_l4proto_find(l3num, protonum);
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
protonum, net, &tuple, l3proto, l4proto)) {
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num,
net, &tuple)) {
pr_debug("ovs_ct_find_existing: Can't get tuple\n");
return NULL;
}
@ -632,7 +622,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
if (natted) {
struct nf_conntrack_tuple inverse;
if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
if (!nf_ct_invert_tuplepr(&inverse, &tuple)) {
pr_debug("ovs_ct_find_existing: Inversion failed!\n");
return NULL;
}
@ -1314,7 +1304,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
return -EINVAL;
}
help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
if (!help) {
nf_conntrack_helper_put(helper);
return -ENOMEM;