0838aa7fcf
Quoting Daniel Borkmann: "When adding connection tracking template rules to a netns, f.e. to configure netfilter zones, the kernel will endlessly busy-loop as soon as we try to delete the given netns in case there's at least one template present, which is problematic i.e. if there is such bravery that the priviledged user inside the netns is assumed untrusted. Minimal example: ip netns add foo ip netns exec foo iptables -t raw -A PREROUTING -d 1.2.3.4 -j CT --zone 1 ip netns del foo What happens is that when nf_ct_iterate_cleanup() is being called from nf_conntrack_cleanup_net_list() for a provided netns, we always end up with a net->ct.count > 0 and thus jump back to i_see_dead_people. We don't get a soft-lockup as we still have a schedule() point, but the serving CPU spins on 100% from that point onwards. Since templates are normally allocated with nf_conntrack_alloc(), we also bump net->ct.count. The issue why they are not yet nf_ct_put() is because the per netns .exit() handler from x_tables (which would eventually invoke xt_CT's xt_ct_tg_destroy() that drops reference on info->ct) is called in the dependency chain at a *later* point in time than the per netns .exit() handler for the connection tracker. This is clearly a chicken'n'egg problem: after the connection tracker .exit() handler, we've teared down all the connection tracking infrastructure already, so rightfully, xt_ct_tg_destroy() cannot be invoked at a later point in time during the netns cleanup, as that would lead to a use-after-free. At the same time, we cannot make x_tables depend on the connection tracker module, so that the xt_ct_tg_destroy() would be invoked earlier in the cleanup chain." Daniel confirms this has to do with the order in which modules are loaded or having compiled nf_conntrack as modules while x_tables built-in. So we have no guarantees regarding the order in which netns callbacks are executed. Fix this by allocating the templates through kmalloc() from the respective SYNPROXY and CT targets, so they don't depend on the conntrack kmem cache. Then, release then via nf_ct_tmpl_free() from destroy_conntrack(). This branch is marked as unlikely since conntrack templates are rarely allocated and only from the configuration plane path. Note that templates are not kept in any list to avoid further dependencies with nf_conntrack anymore, thus, the tmpl larval list is removed. Reported-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Tested-by: Daniel Borkmann <daniel@iogearbox.net>
116 lines
2.7 KiB
C
116 lines
2.7 KiB
C
#ifndef __NETNS_CONNTRACK_H
|
|
#define __NETNS_CONNTRACK_H
|
|
|
|
#include <linux/list.h>
|
|
#include <linux/list_nulls.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/netfilter/nf_conntrack_tcp.h>
|
|
#include <linux/seqlock.h>
|
|
|
|
struct ctl_table_header;
|
|
struct nf_conntrack_ecache;
|
|
|
|
struct nf_proto_net {
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_header *ctl_table_header;
|
|
struct ctl_table *ctl_table;
|
|
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
|
|
struct ctl_table_header *ctl_compat_header;
|
|
struct ctl_table *ctl_compat_table;
|
|
#endif
|
|
#endif
|
|
unsigned int users;
|
|
};
|
|
|
|
struct nf_generic_net {
|
|
struct nf_proto_net pn;
|
|
unsigned int timeout;
|
|
};
|
|
|
|
struct nf_tcp_net {
|
|
struct nf_proto_net pn;
|
|
unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
|
|
unsigned int tcp_loose;
|
|
unsigned int tcp_be_liberal;
|
|
unsigned int tcp_max_retrans;
|
|
};
|
|
|
|
enum udp_conntrack {
|
|
UDP_CT_UNREPLIED,
|
|
UDP_CT_REPLIED,
|
|
UDP_CT_MAX
|
|
};
|
|
|
|
struct nf_udp_net {
|
|
struct nf_proto_net pn;
|
|
unsigned int timeouts[UDP_CT_MAX];
|
|
};
|
|
|
|
struct nf_icmp_net {
|
|
struct nf_proto_net pn;
|
|
unsigned int timeout;
|
|
};
|
|
|
|
struct nf_ip_net {
|
|
struct nf_generic_net generic;
|
|
struct nf_tcp_net tcp;
|
|
struct nf_udp_net udp;
|
|
struct nf_icmp_net icmp;
|
|
struct nf_icmp_net icmpv6;
|
|
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
|
|
struct ctl_table_header *ctl_table_header;
|
|
struct ctl_table *ctl_table;
|
|
#endif
|
|
};
|
|
|
|
struct ct_pcpu {
|
|
spinlock_t lock;
|
|
struct hlist_nulls_head unconfirmed;
|
|
struct hlist_nulls_head dying;
|
|
};
|
|
|
|
struct netns_ct {
|
|
atomic_t count;
|
|
unsigned int expect_count;
|
|
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
|
struct delayed_work ecache_dwork;
|
|
bool ecache_dwork_pending;
|
|
#endif
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_header *sysctl_header;
|
|
struct ctl_table_header *acct_sysctl_header;
|
|
struct ctl_table_header *tstamp_sysctl_header;
|
|
struct ctl_table_header *event_sysctl_header;
|
|
struct ctl_table_header *helper_sysctl_header;
|
|
#endif
|
|
char *slabname;
|
|
unsigned int sysctl_log_invalid; /* Log invalid packets */
|
|
int sysctl_events;
|
|
int sysctl_acct;
|
|
int sysctl_auto_assign_helper;
|
|
bool auto_assign_helper_warned;
|
|
int sysctl_tstamp;
|
|
int sysctl_checksum;
|
|
|
|
unsigned int htable_size;
|
|
seqcount_t generation;
|
|
struct kmem_cache *nf_conntrack_cachep;
|
|
struct hlist_nulls_head *hash;
|
|
struct hlist_head *expect_hash;
|
|
struct ct_pcpu __percpu *pcpu_lists;
|
|
struct ip_conntrack_stat __percpu *stat;
|
|
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
|
|
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
|
|
struct nf_ip_net nf_ct_proto;
|
|
#if defined(CONFIG_NF_CONNTRACK_LABELS)
|
|
unsigned int labels_used;
|
|
u8 label_words;
|
|
#endif
|
|
#ifdef CONFIG_NF_NAT_NEEDED
|
|
struct hlist_head *nat_bysource;
|
|
unsigned int nat_htable_size;
|
|
#endif
|
|
};
|
|
#endif
|