diff --git a/Documentation/networking/netfilter-sysctl.txt b/Documentation/networking/netfilter-sysctl.txt new file mode 100644 index 000000000000..55791e50e169 --- /dev/null +++ b/Documentation/networking/netfilter-sysctl.txt @@ -0,0 +1,10 @@ +/proc/sys/net/netfilter/* Variables: + +nf_log_all_netns - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + By default, only init_net namespace can log packets into kernel log + with LOG target; this aims to prevent containers from flooding host + kernel log. If enabled, this target also works in other network + namespaces. This variable is only accessible from init_net. diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5117e4d2ddfa..be378cf47fcc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -167,6 +167,7 @@ struct xt_match { const char *table; unsigned int matchsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -207,6 +208,7 @@ struct xt_target { const char *table; unsigned int targetsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -287,6 +289,13 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +int xt_match_to_user(const struct xt_entry_match *m, + struct xt_entry_match __user *u); +int xt_target_to_user(const struct xt_entry_target *t, + struct xt_entry_target __user *u); +int xt_data_to_user(void __user *dst, const void *src, + int usersize, int size); + void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f63b7e2ac11..c6a78e1892b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -585,7 +585,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header - * @nfctinfo: Relationship of this skb to the connection * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs @@ -598,7 +597,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function - * @nfct: Associated connection, if any + * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index @@ -671,7 +670,7 @@ struct sk_buff { struct sec_path *sp; #endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; + unsigned long _nfct; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info *nf_bridge; @@ -724,7 +723,6 @@ struct sk_buff { __u8 pkt_type:3; __u8 pfmemalloc:1; __u8 ignore_df:1; - __u8 nfctinfo:3; __u8 nf_trace:1; __u8 ip_summed:2; @@ -841,6 +839,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) #define SKB_DST_NOREF 1UL #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) +#define SKB_NFCT_PTRMASK ~(7UL) /** * skb_dst - returns skb dst_entry * @skb: buffer @@ -3558,6 +3557,15 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, skb->csum = csum_add(skb->csum, delta); } +static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + return (void *)(skb->_nfct & SKB_NFCT_PTRMASK); +#else + return NULL; +#endif +} + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) @@ -3586,8 +3594,8 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) static inline void nf_reset(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; + nf_conntrack_put(skb_nfct(skb)); + skb->_nfct = 0; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); @@ -3607,10 +3615,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - dst->nfct = src->nfct; - nf_conntrack_get(src->nfct); - if (copy) - dst->nfctinfo = src->nfctinfo; + dst->_nfct = src->_nfct; + nf_conntrack_get(skb_nfct(src)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dst->nf_bridge = src->nf_bridge; @@ -3625,7 +3631,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(dst->nfct); + nf_conntrack_put(skb_nfct(dst)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(dst->nf_bridge); @@ -3657,9 +3663,7 @@ static inline bool skb_irq_freeable(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_XFRM) !skb->sp && #endif -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - !skb->nfct && -#endif + !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); } diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cd6018a9ee24..7bdfa7d78363 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1421,7 +1421,7 @@ static inline void ip_vs_dest_put(struct ip_vs_dest *dest) static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) { - if (atomic_dec_return(&dest->refcnt) < 0) + if (atomic_dec_and_test(&dest->refcnt)) kfree(dest); } @@ -1554,10 +1554,12 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + struct nf_conn *untracked; + + nf_conntrack_put(&ct->ct_general); + untracked = nf_ct_untracked_get(); + nf_conntrack_get(&untracked->ct_general); + nf_ct_set(skb, untracked, IP_CT_NEW); } #endif } diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 919e4e8af327..6ff32815641b 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -14,6 +14,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index eaea968f8657..c59b82456f89 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -5,6 +5,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5916aa9ab3f0..f540f9ad2af4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -34,6 +34,7 @@ union nf_conntrack_proto { struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct nf_ct_gre gre; + unsigned int tmpl_padto; }; union nf_conntrack_expect_proto { @@ -75,7 +76,7 @@ struct nf_conn { /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * - * Hint, SKB address this struct and refcnt via skb->nfct and + * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, * beware nf_ct_get() is different and don't inc refcnt. @@ -162,12 +163,16 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); +#define NFCT_INFOMASK 7UL +#define NFCT_PTRMASK ~(NFCT_INFOMASK) + /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { - *ctinfo = skb->nfctinfo; - return (struct nf_conn *)skb->nfct; + *ctinfo = skb->_nfct & NFCT_INFOMASK; + + return (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK); } /* decrement reference count on a conntrack */ @@ -341,6 +346,12 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +static inline void +nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) +{ + skb->_nfct = (unsigned long)ct | info; +} + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 62e17d1319ff..84ec7ca5f195 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -62,7 +62,7 @@ int __nf_conntrack_confirm(struct sk_buff *skb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int nf_conntrack_confirm(struct sk_buff *skb) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); int ret = NF_ACCEPT; if (ct && !nf_ct_is_untracked(ct)) { diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e7b836590f0b..85e993e278d5 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -55,7 +55,7 @@ struct nf_conntrack_l4proto { void (*destroy)(struct nf_conn *ct); int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 450f87f95415..42e0696f38d8 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -51,6 +51,9 @@ struct nf_logger { struct module *me; }; +/* sysctl_nf_log_all_netns - allow LOG target in all network namespaces */ +extern int sysctl_nf_log_all_netns; + /* Function to register/unregister log function. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index cf799fc3fdec..17724c62de97 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -69,19 +69,6 @@ struct nf_sctp_net { }; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -enum udplite_conntrack { - UDPLITE_CT_UNREPLIED, - UDPLITE_CT_REPLIED, - UDPLITE_CT_MAX -}; - -struct nf_udplite_net { - struct nf_proto_net pn; - unsigned int timeouts[UDPLITE_CT_MAX]; -}; -#endif - struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -94,9 +81,6 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - struct nf_udplite_net udplite; -#endif }; struct ct_pcpu { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e3f27e09eb2b..7b730cab99bd 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -860,6 +860,10 @@ enum nft_rt_attributes { * @NFT_CT_PROTOCOL: conntrack layer 4 protocol * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + * @NFT_CT_LABELS: conntrack labels + * @NFT_CT_PKTS: conntrack packets + * @NFT_CT_BYTES: conntrack bytes + * @NFT_CT_AVGPKT: conntrack average bytes per packet */ enum nft_ct_keys { NFT_CT_STATE, @@ -878,6 +882,7 @@ enum nft_ct_keys { NFT_CT_LABELS, NFT_CT_PKTS, NFT_CT_BYTES, + NFT_CT_AVGPKT, }; /** diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 517e78befcb2..61a9f1be1263 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -105,6 +105,7 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = { .match = ebt_limit_mt, .checkentry = ebt_limit_mt_check, .matchsize = sizeof(struct ebt_limit_info), + .usersize = offsetof(struct ebt_limit_info, prev), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct ebt_compat_limit_info), #endif diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index e88bd4827ac1..98b9c8e8615e 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,7 +78,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, unsigned int bitmask; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; spin_lock_bh(&ebt_log_lock); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 537e3d506fc2..79b69917f521 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1346,56 +1346,72 @@ static int update_counters(struct net *net, const void __user *user, hlp.num_counters, user, len); } -static inline int ebt_make_matchname(const struct ebt_entry_match *m, - const char *base, char __user *ubase) +static inline int ebt_obj_to_user(char __user *um, const char *_name, + const char *data, int entrysize, + int usersize, int datasize) { - char __user *hlp = ubase + ((char *)m - base); - char name[EBT_FUNCTION_MAXNAMELEN] = {}; + char name[EBT_FUNCTION_MAXNAMELEN] = {0}; /* ebtables expects 32 bytes long names but xt_match names are 29 bytes * long. Copy 29 bytes and fill remaining bytes with zeroes. */ - strlcpy(name, m->u.match->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) + strlcpy(name, _name, sizeof(name)); + if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) || + put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) || + xt_data_to_user(um + entrysize, data, usersize, datasize)) return -EFAULT; + return 0; } -static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, - const char *base, char __user *ubase) +static inline int ebt_match_to_user(const struct ebt_entry_match *m, + const char *base, char __user *ubase) { - char __user *hlp = ubase + ((char *)w - base); - char name[EBT_FUNCTION_MAXNAMELEN] = {}; - - strlcpy(name, w->u.watcher->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) - return -EFAULT; - return 0; + return ebt_obj_to_user(ubase + ((char *)m - base), + m->u.match->name, m->data, sizeof(*m), + m->u.match->usersize, m->match_size); } -static inline int ebt_make_names(struct ebt_entry *e, const char *base, - char __user *ubase) +static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w, + const char *base, char __user *ubase) +{ + return ebt_obj_to_user(ubase + ((char *)w - base), + w->u.watcher->name, w->data, sizeof(*w), + w->u.watcher->usersize, w->watcher_size); +} + +static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base, + char __user *ubase) { int ret; char __user *hlp; const struct ebt_entry_target *t; - char name[EBT_FUNCTION_MAXNAMELEN] = {}; - if (e->bitmask == 0) + if (e->bitmask == 0) { + /* special case !EBT_ENTRY_OR_ENTRIES */ + if (copy_to_user(ubase + ((char *)e - base), e, + sizeof(struct ebt_entries))) + return -EFAULT; return 0; + } + + if (copy_to_user(ubase + ((char *)e - base), e, sizeof(*e))) + return -EFAULT; hlp = ubase + (((char *)e + e->target_offset) - base); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase); + ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase); if (ret != 0) return ret; - ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); + ret = EBT_WATCHER_ITERATE(e, ebt_watcher_to_user, base, ubase); if (ret != 0) return ret; - strlcpy(name, t->u.target->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) - return -EFAULT; + ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t), + t->u.target->usersize, t->target_size); + if (ret != 0) + return ret; + return 0; } @@ -1475,13 +1491,9 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, if (ret) return ret; - if (copy_to_user(tmp.entries, entries, entries_size)) { - BUGPRINT("Couldn't copy entries to userspace\n"); - return -EFAULT; - } /* set the match/watcher/target names right */ return EBT_ENTRY_ITERATE(entries, entries_size, - ebt_make_names, entries, tmp.entries); + ebt_entry_to_user, entries, tmp.entries); } static int do_ebt_set_ctl(struct sock *sk, @@ -1630,8 +1642,10 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, if (match->compat_to_user) { if (match->compat_to_user(cm->data, m->data)) return -EFAULT; - } else if (copy_to_user(cm->data, m->data, msize)) + } else { + if (xt_data_to_user(cm->data, m->data, match->usersize, msize)) return -EFAULT; + } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; @@ -1657,8 +1671,10 @@ static int compat_target_to_user(struct ebt_entry_target *t, if (target->compat_to_user) { if (target->compat_to_user(cm->data, t->data)) return -EFAULT; - } else if (copy_to_user(cm->data, t->data, tsize)) - return -EFAULT; + } else { + if (xt_data_to_user(cm->data, t->data, target->usersize, tsize)) + return -EFAULT; + } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4f8f2a1a66b5..f3557958e9bf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -654,7 +654,7 @@ static void skb_release_head_state(struct sk_buff *skb) skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb->nfct); + nf_conntrack_put(skb_nfct(skb)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a467e1236c43..6241a81fd7f5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -677,11 +677,6 @@ static int copy_entries_to_user(unsigned int total_size, return PTR_ERR(counters); loc_cpu_entry = private->entries; - /* ... then copy entire thing ... */ - if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { - ret = -EFAULT; - goto free_counters; - } /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ @@ -689,6 +684,10 @@ static int copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = (struct arpt_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } if (copy_to_user(userptr + off + offsetof(struct arpt_entry, counters), &counters[num], @@ -698,11 +697,7 @@ static int copy_entries_to_user(unsigned int total_size, } t = arpt_get_target_c(e); - if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct xt_entry_target, - u.user.name), - t->u.kernel.target->name, - strlen(t->u.kernel.target->name)+1) != 0) { + if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 91656a1d8fbd..384b85713e06 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -826,10 +826,6 @@ copy_entries_to_user(unsigned int total_size, return PTR_ERR(counters); loc_cpu_entry = private->entries; - if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { - ret = -EFAULT; - goto free_counters; - } /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ @@ -839,6 +835,10 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = (struct ipt_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } if (copy_to_user(userptr + off + offsetof(struct ipt_entry, counters), &counters[num], @@ -852,23 +852,14 @@ copy_entries_to_user(unsigned int total_size, i += m->u.match_size) { m = (void *)e + i; - if (copy_to_user(userptr + off + i - + offsetof(struct xt_entry_match, - u.user.name), - m->u.kernel.match->name, - strlen(m->u.kernel.match->name)+1) - != 0) { + if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ipt_get_target_c(e); - if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct xt_entry_target, - u.user.name), - t->u.kernel.target->name, - strlen(t->u.kernel.target->name)+1) != 0) { + if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 0a783cd73faf..52f26459efc3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -485,6 +485,7 @@ static struct xt_target clusterip_tg_reg __read_mostly = { .checkentry = clusterip_tg_check, .destroy = clusterip_tg_destroy, .targetsize = sizeof(struct ipt_clusterip_tgt_info), + .usersize = offsetof(struct ipt_clusterip_tgt_info, config), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), #endif /* CONFIG_COMPAT */ diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 30c0de53e254..3240a2614e82 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -57,8 +57,7 @@ synproxy_send_tcp(struct net *net, goto free_nskb; if (nfct) { - nskb->nfct = nfct; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } @@ -107,8 +106,8 @@ synproxy_send_client_synack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void @@ -230,8 +229,8 @@ synproxy_send_client_ack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d075b3cf2400..73c591d8a9a8 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -128,16 +128,16 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(skb->nfct == NULL); + NF_CT_ASSERT(!skb_nfct(skb)); zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ @@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { @@ -169,11 +169,10 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, } if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; + nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); return NF_ACCEPT; } @@ -181,7 +180,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, static int icmp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) + u8 pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; @@ -225,7 +224,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, hooknum); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 49bd6a54404f..346bf7ccac08 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -45,7 +45,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) { + if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -75,7 +75,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv, #if !IS_ENABLED(CONFIG_NF_NAT) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif #endif diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index cf986e1c7bbd..f0dbff05fc28 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -68,10 +68,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_reset(skb); + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); + nf_conntrack_get(skb_nfct(skb)); #endif /* * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index b24795e2ee6d..f6f713376e6e 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -87,7 +87,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 856648966f4c..c83a9963269b 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -319,7 +319,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 25a022d41a70..1e15c54fd5e2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -855,10 +855,6 @@ copy_entries_to_user(unsigned int total_size, return PTR_ERR(counters); loc_cpu_entry = private->entries; - if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { - ret = -EFAULT; - goto free_counters; - } /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ @@ -868,6 +864,10 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = (struct ip6t_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], @@ -881,23 +881,14 @@ copy_entries_to_user(unsigned int total_size, i += m->u.match_size) { m = (void *)e + i; - if (copy_to_user(userptr + off + i - + offsetof(struct xt_entry_match, - u.user.name), - m->u.kernel.match->name, - strlen(m->u.kernel.match->name)+1) - != 0) { + if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ip6t_get_target_c(e); - if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct xt_entry_target, - u.user.name), - t->u.kernel.target->name, - strlen(t->u.kernel.target->name)+1) != 0) { + if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 590f767db5d4..a379d2f79b19 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -112,6 +112,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), + .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | @@ -123,6 +124,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), + .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 98c8dd38575a..4ef1ddd4bbbd 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -71,8 +71,7 @@ synproxy_send_tcp(struct net *net, skb_dst_set(nskb, dst); if (nfct) { - nskb->nfct = nfct; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } @@ -121,8 +120,8 @@ synproxy_send_client_synack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void @@ -244,8 +243,8 @@ synproxy_send_client_ack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index f5a61bc3ec2b..d2c2ccbfbe72 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -145,15 +145,15 @@ static int icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int icmp6off, - enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(skb->nfct == NULL); + NF_CT_ASSERT(!skb_nfct(skb)); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuplepr(skb, @@ -176,7 +176,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), &intuple); @@ -185,19 +185,18 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } else { if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + ctinfo += IP_CT_IS_REPLY; } /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; + nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); return NF_ACCEPT; } static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) + u8 pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; @@ -222,9 +221,8 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); + nf_conntrack_get(skb_nfct(skb)); return NF_ACCEPT; } @@ -232,7 +230,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 8e0bdd058787..ada60d1a991b 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -37,7 +37,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) { + if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -61,7 +61,7 @@ static unsigned int ipv6_defrag(void *priv, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 4a84b5ad9ecb..888ecd106e5f 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -57,10 +57,9 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, return; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_reset(skb); + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); + nf_conntrack_get(skb_nfct(skb)); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 57d86066a13b..055c51b80f5d 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -351,7 +351,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index bbc45f8a7b2d..dfbe9deeb8c4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -162,6 +162,7 @@ config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED default y + select LIBCRC32C help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. @@ -397,7 +398,6 @@ config NF_NAT_PROTO_SCTP bool default NF_NAT && NF_CT_PROTO_SCTP depends on NF_NAT && NF_CT_PROTO_SCTP - select LIBCRC32C config NF_NAT_AMANDA tristate diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ca30d1960f1d..6b3034f12661 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,7 +7,6 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o -nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -47,7 +46,6 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ # NAT protocols (nf_nat) nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o -nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index ce6adfae521a..a87a6f8a74d8 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -375,7 +375,7 @@ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) { void (*attach)(struct sk_buff *, const struct sk_buff *); - if (skb->nfct) { + if (skb->_nfct) { rcu_read_lock(); attach = rcu_dereference(ip_ct_attach); if (attach) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 8b7416f4e01a..5aeb0dde6ccc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -710,7 +710,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, dest->vport == svc->port))) { /* HIT */ list_del(&dest->t_list); - ip_vs_dest_hold(dest); goto out; } } @@ -740,7 +739,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) * When the ip_vs_control_clearup is activated by ipvs module exit, * the service tables must have been flushed and all the connections * are expired, and the refcnt of each destination in the trash must - * be 0, so we simply release them here. + * be 1, so we simply release them here. */ static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) { @@ -1079,11 +1078,10 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); - /* dest lives in trash without reference */ + /* dest lives in trash with reference */ list_add(&dest->t_list, &ipvs->dest_trash); dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); - ip_vs_dest_put(dest); } @@ -1159,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data) spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - if (atomic_read(&dest->refcnt) > 0) + if (atomic_read(&dest->refcnt) > 1) continue; if (dest->idle_start) { if (time_before(now, dest->idle_start + diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4e8083c5e01d..071b97fcbefb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -350,16 +350,31 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) + /* Released via destroy_conntrack() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) { - struct nf_conn *tmpl; + struct nf_conn *tmpl, *p; - tmpl = kzalloc(sizeof(*tmpl), flags); - if (tmpl == NULL) - return NULL; + if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { + tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); + if (!tmpl) + return NULL; + + p = tmpl; + tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); + if (tmpl != p) { + tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); + tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; + } + } else { + tmpl = kzalloc(sizeof(*tmpl), flags); + if (!tmpl) + return NULL; + } tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); @@ -374,7 +389,11 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl) { nf_ct_ext_destroy(tmpl); nf_ct_ext_free(tmpl); - kfree(tmpl); + + if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) + kfree((char *)tmpl - tmpl->proto.tmpl_padto); + else + kfree(tmpl); } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); @@ -686,12 +705,12 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, !nfct_nat(ct) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { - nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct); - nf_conntrack_put(skb->nfct); - /* Assign conntrack already in hashes to this skbuff. Don't - * modify skb->nfctinfo to ensure consistent stateful filtering. - */ - skb->nfct = &ct->ct_general; + enum ip_conntrack_info oldinfo; + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); + + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, oldinfo); return NF_ACCEPT; } NF_CT_STAT_INC(net, drop); @@ -1218,7 +1237,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } -/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */ static inline struct nf_conn * resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, @@ -1277,8 +1296,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } *set_reply = 0; } - skb->nfct = &ct->ct_general; - skb->nfctinfo = *ctinfo; + nf_ct_set(skb, ct, *ctinfo); return ct; } @@ -1286,7 +1304,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct, *tmpl = NULL; + struct nf_conn *ct, *tmpl; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -1296,14 +1314,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - if (skb->nfct) { + tmpl = nf_ct_get(skb, &ctinfo); + if (tmpl) { /* Previously seen (loopback or untracked)? Ignore. */ - tmpl = (struct nf_conn *)skb->nfct; if (!nf_ct_is_template(tmpl)) { NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } - skb->nfct = NULL; + skb->_nfct = 0; } /* rcu_read_lock()ed by nf_hook_thresh */ @@ -1324,8 +1342,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL) { - ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, - pf, hooknum); + ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum); if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); @@ -1333,7 +1350,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } /* ICMP[v6] protocol trackers may assign one conntrack. */ - if (skb->nfct) + if (skb->_nfct) goto out; } repeat: @@ -1353,7 +1370,7 @@ repeat: goto out; } - NF_CT_ASSERT(skb->nfct); + NF_CT_ASSERT(skb_nfct(skb)); /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); @@ -1363,8 +1380,8 @@ repeat: /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; + nf_conntrack_put(&ct->ct_general); + skb->_nfct = 0; NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); @@ -1522,9 +1539,8 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ - nskb->nfct = &ct->ct_general; - nskb->nfctinfo = ctinfo; - nf_conntrack_get(nskb->nfct); + nf_ct_set(nskb, ct, ctinfo); + nf_conntrack_get(skb_nfct(nskb)); } /* Bring out ya dead! */ @@ -1860,7 +1876,8 @@ int nf_conntrack_init_start(void) nf_conntrack_max = max_factor * nf_conntrack_htable_size; nf_conntrack_cachep = kmem_cache_create("nf_conntrack", - sizeof(struct nf_conn), 0, + sizeof(struct nf_conn), + NFCT_INFOMASK + 1, SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index b68ce6ac13b3..93dd1c5b7bff 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -561,7 +561,6 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, static int dccp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index a0efde38da44..33279aab583d 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -22,7 +22,9 @@ #include #include #include +#include +#include #include #include #include @@ -505,6 +507,34 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, + unsigned int dataoff, + u8 pf, unsigned int hooknum) +{ + const struct sctphdr *sh; + struct sctphdr _sctph; + const char *logmsg; + + sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); + if (!sh) { + logmsg = "nf_ct_sctp: short packet "; + goto out_invalid; + } + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && + skb->ip_summed == CHECKSUM_NONE) { + if (sh->checksum != sctp_compute_cksum(skb, dataoff)) { + logmsg = "nf_ct_sctp: bad CRC "; + goto out_invalid; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + return NF_ACCEPT; +out_invalid: + if (LOG_INVALID(net, IPPROTO_SCTP)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg); + return -NF_ACCEPT; +} + #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include @@ -752,6 +782,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, + .error = sctp_error, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, @@ -786,6 +817,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, + .error = sctp_error, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 69f687740c76..b122e9dacfed 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -750,7 +750,6 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 20f35ed68030..f6ebce6178ca 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -108,8 +108,60 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +static int udplite_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + u8 pf, unsigned int hooknum) +{ + unsigned int udplen = skb->len - dataoff; + const struct udphdr *hdr; + struct udphdr _hdr; + unsigned int cscov; + + /* Header is too small? */ + hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (!hdr) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: short packet "); + return -NF_ACCEPT; + } + + cscov = ntohs(hdr->len); + if (cscov == 0) { + cscov = udplen; + } else if (cscov < sizeof(*hdr) || cscov > udplen) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: invalid checksum coverage "); + return -NF_ACCEPT; + } + + /* UDPLITE mandates checksums */ + if (!hdr->check) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: checksum missing "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. */ + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && + nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, + pf)) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: bad UDPLite checksum "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} +#endif + static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u_int8_t pf, unsigned int hooknum) { @@ -290,6 +342,41 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +{ + .l3proto = PF_INET, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .allow_clash = true, + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .packet = udp_packet, + .get_timeouts = udp_get_timeouts, + .new = udp_new, + .error = udplite_error, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, + .nla_policy = nf_ct_port_nla_policy, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, +}; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); +#endif + struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, @@ -322,3 +409,38 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); + +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +{ + .l3proto = PF_INET6, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .allow_clash = true, + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .packet = udp_packet, + .get_timeouts = udp_get_timeouts, + .new = udp_new, + .error = udplite_error, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, + .nla_policy = nf_ct_port_nla_policy, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, +}; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); +#endif diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c deleted file mode 100644 index c35f7bf05d8c..000000000000 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ /dev/null @@ -1,324 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team - * (C) 2007 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { - [UDPLITE_CT_UNREPLIED] = 30*HZ, - [UDPLITE_CT_REPLIED] = 180*HZ, -}; - -static inline struct nf_udplite_net *udplite_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.udplite; -} - -static bool udplite_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, - struct nf_conntrack_tuple *tuple) -{ - const struct udphdr *hp; - struct udphdr _hdr; - - /* Actually only need first 4 bytes to get ports. */ - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); - if (hp == NULL) - return false; - - tuple->src.u.udp.port = hp->source; - tuple->dst.u.udp.port = hp->dest; - return true; -} - -static bool udplite_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) -{ - tuple->src.u.udp.port = orig->dst.u.udp.port; - tuple->dst.u.udp.port = orig->src.u.udp.port; - return true; -} - -/* Print out the per-protocol part of the tuple. */ -static void udplite_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); -} - -static unsigned int *udplite_get_timeouts(struct net *net) -{ - return udplite_pernet(net)->timeouts; -} - -/* Returns verdict for packet, and may modify conntracktype */ -static int udplite_packet(struct nf_conn *ct, - const struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, - unsigned int *timeouts) -{ - /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDPLITE_CT_REPLIED]); - /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } else { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDPLITE_CT_UNREPLIED]); - } - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff, unsigned int *timeouts) -{ - return true; -} - -static int udplite_error(struct net *net, struct nf_conn *tmpl, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info *ctinfo, - u_int8_t pf, - unsigned int hooknum) -{ - unsigned int udplen = skb->len - dataoff; - const struct udphdr *hdr; - struct udphdr _hdr; - unsigned int cscov; - - /* Header is too small? */ - hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); - if (hdr == NULL) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: short packet "); - return -NF_ACCEPT; - } - - cscov = ntohs(hdr->len); - if (cscov == 0) - cscov = udplen; - else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: invalid checksum coverage "); - return -NF_ACCEPT; - } - - /* UDPLITE mandates checksums */ - if (!hdr->check) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: checksum missing "); - return -NF_ACCEPT; - } - - /* Checksum invalid? Ignore. */ - if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && - nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, - pf)) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: bad UDPLite checksum "); - return -NF_ACCEPT; - } - - return NF_ACCEPT; -} - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - -#include -#include - -static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], - struct net *net, void *data) -{ - unsigned int *timeouts = data; - struct nf_udplite_net *un = udplite_pernet(net); - - /* set default timeouts for UDPlite. */ - timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; - timeouts[UDPLITE_CT_REPLIED] = un->timeouts[UDPLITE_CT_REPLIED]; - - if (tb[CTA_TIMEOUT_UDPLITE_UNREPLIED]) { - timeouts[UDPLITE_CT_UNREPLIED] = - ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_UNREPLIED])) * HZ; - } - if (tb[CTA_TIMEOUT_UDPLITE_REPLIED]) { - timeouts[UDPLITE_CT_REPLIED] = - ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_REPLIED])) * HZ; - } - return 0; -} - -static int -udplite_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) -{ - const unsigned int *timeouts = data; - - if (nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_UNREPLIED, - htonl(timeouts[UDPLITE_CT_UNREPLIED] / HZ)) || - nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_REPLIED, - htonl(timeouts[UDPLITE_CT_REPLIED] / HZ))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -ENOSPC; -} - -static const struct nla_policy -udplite_timeout_nla_policy[CTA_TIMEOUT_UDPLITE_MAX+1] = { - [CTA_TIMEOUT_UDPLITE_UNREPLIED] = { .type = NLA_U32 }, - [CTA_TIMEOUT_UDPLITE_REPLIED] = { .type = NLA_U32 }, -}; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - -#ifdef CONFIG_SYSCTL -static struct ctl_table udplite_sysctl_table[] = { - { - .procname = "nf_conntrack_udplite_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_udplite_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_udplite_net *un) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(udplite_sysctl_table, - sizeof(udplite_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED]; - pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED]; -#endif - return 0; -} - -static int udplite_init_net(struct net *net, u_int16_t proto) -{ - struct nf_udplite_net *un = udplite_pernet(net); - struct nf_proto_net *pn = &un->pn; - - if (!pn->users) { - int i; - - for (i = 0 ; i < UDPLITE_CT_MAX; i++) - un->timeouts[i] = udplite_timeouts[i]; - } - - return udplite_kmemdup_sysctl_table(pn, un); -} - -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = -{ - .l3proto = PF_INET, - .l4proto = IPPROTO_UDPLITE, - .name = "udplite", - .allow_clash = true, - .pkt_to_tuple = udplite_pkt_to_tuple, - .invert_tuple = udplite_invert_tuple, - .print_tuple = udplite_print_tuple, - .packet = udplite_packet, - .get_timeouts = udplite_get_timeouts, - .new = udplite_new, - .error = udplite_error, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - .ctnl_timeout = { - .nlattr_to_obj = udplite_timeout_nlattr_to_obj, - .obj_to_nlattr = udplite_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, - .obj_size = sizeof(unsigned int) * - CTA_TIMEOUT_UDPLITE_MAX, - .nla_policy = udplite_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udplite_init_net, -}; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); - -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = -{ - .l3proto = PF_INET6, - .l4proto = IPPROTO_UDPLITE, - .name = "udplite", - .allow_clash = true, - .pkt_to_tuple = udplite_pkt_to_tuple, - .invert_tuple = udplite_invert_tuple, - .print_tuple = udplite_print_tuple, - .packet = udplite_packet, - .get_timeouts = udplite_get_timeouts, - .new = udplite_new, - .error = udplite_error, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - .ctnl_timeout = { - .nlattr_to_obj = udplite_timeout_nlattr_to_obj, - .obj_to_nlattr = udplite_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, - .obj_size = sizeof(unsigned int) * - CTA_TIMEOUT_UDPLITE_MAX, - .nla_policy = udplite_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udplite_init_net, -}; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index d009ae663453..2256147dcaad 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -642,6 +642,9 @@ static int __init nf_conntrack_standalone_init(void) if (ret < 0) goto out_start; + BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK); + BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER); + #ifdef CONFIG_SYSCTL nf_ct_netfilter_header = register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index ffb9e8ada899..8d85a0598b60 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -15,6 +15,9 @@ #define NFLOGGER_NAME_LEN 64 +int sysctl_nf_log_all_netns __read_mostly; +EXPORT_SYMBOL(sysctl_nf_log_all_netns); + static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); @@ -413,6 +416,18 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; +static struct ctl_table_header *nf_log_sysctl_fhdr; + +static struct ctl_table nf_log_sysctl_ftable[] = { + { + .procname = "nf_log_all_netns", + .data = &sysctl_nf_log_all_netns, + .maxlen = sizeof(sysctl_nf_log_all_netns), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; static int nf_log_proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -482,6 +497,10 @@ static int netfilter_log_sysctl_init(struct net *net) nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; } + nf_log_sysctl_fhdr = register_net_sysctl(net, "net/netfilter", + nf_log_sysctl_ftable); + if (!nf_log_sysctl_fhdr) + goto err_freg; } for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) @@ -498,6 +517,9 @@ static int netfilter_log_sysctl_init(struct net *net) err_reg: if (!net_eq(net, &init_net)) kfree(table); + else + unregister_net_sysctl_table(nf_log_sysctl_fhdr); +err_freg: err_alloc: return -ENOMEM; } @@ -510,6 +532,8 @@ static void netfilter_log_sysctl_exit(struct net *net) unregister_net_sysctl_table(net->nf.nf_log_dir_header); if (!net_eq(net, &init_net)) kfree(table); + else + unregister_net_sysctl_table(nf_log_sysctl_fhdr); } #else static int netfilter_log_sysctl_init(struct net *net) diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 2840abb5bb99..211661cb2c90 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -60,7 +60,7 @@ static void mangle_contents(struct sk_buff *skb, __skb_trim(skb, skb->len + rep_len - match_len); } - if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) { + if (nf_ct_l3num((struct nf_conn *)skb_nfct(skb)) == NFPROTO_IPV4) { /* fix IP hdr checksum information */ ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index b1e627227b6e..edd4a77dc09a 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -30,20 +30,15 @@ udp_unique_tuple(const struct nf_nat_l3proto *l3proto, &udp_port_rover); } -static bool -udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) +static void +__udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, struct udphdr *hdr, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, bool do_csum) { - struct udphdr *hdr; __be16 *portptr, newport; - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - hdr = (struct udphdr *)(skb->data + hdroff); - if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src port */ newport = tuple->src.u.udp.port; @@ -53,7 +48,7 @@ udp_manip_pkt(struct sk_buff *skb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + if (do_csum) { l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, @@ -62,9 +57,68 @@ udp_manip_pkt(struct sk_buff *skb, hdr->check = CSUM_MANGLED_0; } *portptr = newport; +} + +static bool udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + bool do_csum; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; + + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum); return true; } +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE +static u16 udplite_port_rover; + +static bool udplite_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true); + return true; +} + +static void +udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udplite_port_rover); +} + +const struct nf_nat_l4proto nf_nat_l4proto_udplite = { + .l4proto = IPPROTO_UDPLITE, + .manip_pkt = udplite_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = udplite_unique_tuple, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; +#endif /* CONFIG_NF_NAT_PROTO_UDPLITE */ + const struct nf_nat_l4proto nf_nat_l4proto_udp = { .l4proto = IPPROTO_UDP, .manip_pkt = udp_manip_pkt, diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c deleted file mode 100644 index 366bfbfd82a1..000000000000 --- a/net/netfilter/nf_nat_proto_udplite.c +++ /dev/null @@ -1,73 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include -#include -#include - -static u16 udplite_port_rover; - -static void -udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &udplite_port_rover); -} - -static bool -udplite_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct udphdr *hdr; - __be16 *portptr, newport; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of source port */ - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst port */ - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - - l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - - *portptr = newport; - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_udplite = { - .l4proto = IPPROTO_UDPLITE, - .manip_pkt = udplite_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = udplite_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1b913760f205..57eeae63f597 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -576,6 +576,28 @@ err: return err; } +static void _nf_tables_table_disable(struct net *net, + const struct nft_af_info *afi, + struct nft_table *table, + u32 cnt) +{ + struct nft_chain *chain; + u32 i = 0; + + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + if (cnt && i++ == cnt) + break; + + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); + } +} + static int nf_tables_table_enable(struct net *net, const struct nft_af_info *afi, struct nft_table *table) @@ -598,18 +620,8 @@ static int nf_tables_table_enable(struct net *net, } return 0; err: - list_for_each_entry(chain, &table->chains, list) { - if (!nft_is_active_next(net, chain)) - continue; - if (!(chain->flags & NFT_BASE_CHAIN)) - continue; - - if (i-- <= 0) - break; - - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); - } + if (i) + _nf_tables_table_disable(net, afi, table, i); return err; } @@ -617,17 +629,7 @@ static void nf_tables_table_disable(struct net *net, const struct nft_af_info *afi, struct nft_table *table) { - struct nft_chain *chain; - - list_for_each_entry(chain, &table->chains, list) { - if (!nft_is_active_next(net, chain)) - continue; - if (!(chain->flags & NFT_BASE_CHAIN)) - continue; - - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); - } + _nf_tables_table_disable(net, afi, table, 0); } static int nf_tables_updtable(struct nft_ctx *ctx) @@ -696,10 +698,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); - table = NULL; - } - - if (table != NULL) { + } else { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -2966,10 +2965,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(set)) { if (PTR_ERR(set) != -ENOENT) return PTR_ERR(set); - set = NULL; - } - - if (set != NULL) { + } else { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -4163,10 +4159,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, if (err != -ENOENT) return err; - obj = NULL; - } - - if (obj != NULL) { + } else { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index e6baeaebe653..66a2377510e1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -129,6 +129,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, &count, sizeof(count)); return; } + case NFT_CT_AVGPKT: { + const struct nf_conn_acct *acct = nf_conn_acct_find(ct); + u64 avgcnt = 0, bcnt = 0, pcnt = 0; + + if (acct) { + pcnt = nft_ct_get_eval_counter(acct->counter, + NFT_CT_PKTS, priv->dir); + bcnt = nft_ct_get_eval_counter(acct->counter, + NFT_CT_BYTES, priv->dir); + if (pcnt != 0) + avgcnt = div64_u64(bcnt, pcnt); + } + + memcpy(dest, &avgcnt, sizeof(avgcnt)); + return; + } case NFT_CT_L3PROTOCOL: *dest = nf_ct_l3num(ct); return; @@ -316,6 +332,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; case NFT_CT_BYTES: case NFT_CT_PKTS: + case NFT_CT_AVGPKT: /* no direction? return sum of original + reply */ if (tb[NFTA_CT_DIRECTION] == NULL) priv->dir = IP_CT_DIR_MAX; @@ -346,7 +363,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS) + if (priv->key == NFT_CT_BYTES || + priv->key == NFT_CT_PKTS || + priv->key == NFT_CT_AVGPKT) nf_ct_set_acct(ctx->net, true); return 0; @@ -445,6 +464,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) break; case NFT_CT_BYTES: case NFT_CT_PKTS: + case NFT_CT_AVGPKT: if (priv->dir < IP_CT_DIR_MAX && nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) goto nla_put_failure; @@ -534,8 +554,7 @@ static void nft_notrack_eval(const struct nft_expr *expr, ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, ct, IP_CT_NEW); } static struct nft_expr_type nft_notrack_type; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 66c7f4b4c49b..e1f5ca9b423b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -154,13 +154,36 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = PACKET_BROADCAST; break; case NFPROTO_IPV6: - if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) + *dest = PACKET_MULTICAST; + break; + case NFPROTO_NETDEV: + switch (skb->protocol) { + case htons(ETH_P_IP): { + int noff = skb_network_offset(skb); + struct iphdr *iph, _iph; + + iph = skb_header_pointer(skb, noff, + sizeof(_iph), &_iph); + if (!iph) + goto err; + + if (ipv4_is_multicast(iph->daddr)) + *dest = PACKET_MULTICAST; + else + *dest = PACKET_BROADCAST; + + break; + } + case htons(ETH_P_IPV6): *dest = PACKET_MULTICAST; - else - *dest = PACKET_BROADCAST; + break; + default: + WARN_ON_ONCE(1); + goto err; + } break; default: - WARN_ON(1); + WARN_ON_ONCE(1); goto err; } break; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2ff499680cc6..016db6be94b9 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -262,6 +262,60 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) } EXPORT_SYMBOL_GPL(xt_request_find_target); + +static int xt_obj_to_user(u16 __user *psize, u16 size, + void __user *pname, const char *name, + u8 __user *prev, u8 rev) +{ + if (put_user(size, psize)) + return -EFAULT; + if (copy_to_user(pname, name, strlen(name) + 1)) + return -EFAULT; + if (put_user(rev, prev)) + return -EFAULT; + + return 0; +} + +#define XT_OBJ_TO_USER(U, K, TYPE, C_SIZE) \ + xt_obj_to_user(&U->u.TYPE##_size, C_SIZE ? : K->u.TYPE##_size, \ + U->u.user.name, K->u.kernel.TYPE->name, \ + &U->u.user.revision, K->u.kernel.TYPE->revision) + +int xt_data_to_user(void __user *dst, const void *src, + int usersize, int size) +{ + usersize = usersize ? : size; + if (copy_to_user(dst, src, usersize)) + return -EFAULT; + if (usersize != size && clear_user(dst + usersize, size - usersize)) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL_GPL(xt_data_to_user); + +#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ + xt_data_to_user(U->data, K->data, \ + K->u.kernel.TYPE->usersize, \ + C_SIZE ? : K->u.kernel.TYPE->TYPE##size) + +int xt_match_to_user(const struct xt_entry_match *m, + struct xt_entry_match __user *u) +{ + return XT_OBJ_TO_USER(u, m, match, 0) || + XT_DATA_TO_USER(u, m, match, 0); +} +EXPORT_SYMBOL_GPL(xt_match_to_user); + +int xt_target_to_user(const struct xt_entry_target *t, + struct xt_entry_target __user *u) +{ + return XT_OBJ_TO_USER(u, t, target, 0) || + XT_DATA_TO_USER(u, t, target, 0); +} +EXPORT_SYMBOL_GPL(xt_target_to_user); + static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_match *m; @@ -565,17 +619,14 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, int off = xt_compat_match_offset(match); u_int16_t msize = m->u.user.match_size - off; - if (copy_to_user(cm, m, sizeof(*cm)) || - put_user(msize, &cm->u.user.match_size) || - copy_to_user(cm->u.user.name, m->u.kernel.match->name, - strlen(m->u.kernel.match->name) + 1)) + if (XT_OBJ_TO_USER(cm, m, match, msize)) return -EFAULT; if (match->compat_to_user) { if (match->compat_to_user((void __user *)cm->data, m->data)) return -EFAULT; } else { - if (copy_to_user(cm->data, m->data, msize - sizeof(*cm))) + if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) return -EFAULT; } @@ -923,17 +974,14 @@ int xt_compat_target_to_user(const struct xt_entry_target *t, int off = xt_compat_target_offset(target); u_int16_t tsize = t->u.user.target_size - off; - if (copy_to_user(ct, t, sizeof(*ct)) || - put_user(tsize, &ct->u.user.target_size) || - copy_to_user(ct->u.user.name, t->u.kernel.target->name, - strlen(t->u.kernel.target->name) + 1)) + if (XT_OBJ_TO_USER(ct, t, target, tsize)) return -EFAULT; if (target->compat_to_user) { if (target->compat_to_user((void __user *)ct->data, t->data)) return -EFAULT; } else { - if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct))) + if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) return -EFAULT; } diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 95c750358747..b008db0184b8 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -23,15 +23,14 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) + if (skb->_nfct != 0) return XT_CONTINUE; /* special case the untracked ct : we want the percpu object */ if (!ct) ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, ct, IP_CT_NEW); return XT_CONTINUE; } @@ -373,6 +372,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .name = "CT", .family = NFPROTO_UNSPEC, .targetsize = sizeof(struct xt_ct_target_info), + .usersize = offsetof(struct xt_ct_target_info, ct), .checkentry = xt_ct_tg_check_v0, .destroy = xt_ct_tg_destroy_v0, .target = xt_ct_target_v0, @@ -384,6 +384,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_UNSPEC, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), + .usersize = offsetof(struct xt_ct_target_info, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -395,6 +396,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_UNSPEC, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), + .usersize = offsetof(struct xt_ct_target_info, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -407,12 +409,11 @@ static unsigned int notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) { /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) + if (skb->_nfct != 0) return XT_CONTINUE; - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); + nf_conntrack_get(skb_nfct(skb)); return XT_CONTINUE; } diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 91a373a3f534..498b54fd04d7 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -162,6 +162,7 @@ static struct xt_target xt_rateest_tg_reg __read_mostly = { .checkentry = xt_rateest_tg_checkentry, .destroy = xt_rateest_tg_destroy, .targetsize = sizeof(struct xt_rateest_target_info), + .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 1c57ace75ae6..86b0580b2216 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -133,6 +133,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .target = tee_tg4, .targetsize = sizeof(struct xt_tee_tginfo), + .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, @@ -144,6 +145,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .target = tee_tg6, .targetsize = sizeof(struct xt_tee_tginfo), + .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 2dedaa23ab0a..38986a95216c 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -110,6 +110,7 @@ static struct xt_match bpf_mt_reg[] __read_mostly = { .match = bpf_mt, .destroy = bpf_mt_destroy, .matchsize = sizeof(struct xt_bpf_info), + .usersize = offsetof(struct xt_bpf_info, filter), .me = THIS_MODULE, }, { @@ -120,6 +121,7 @@ static struct xt_match bpf_mt_reg[] __read_mostly = { .match = bpf_mt_v1, .destroy = bpf_mt_destroy_v1, .matchsize = sizeof(struct xt_bpf_info_v1), + .usersize = offsetof(struct xt_bpf_info_v1, filter), .me = THIS_MODULE, }, }; diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a086a914865f..1db1ce59079f 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -122,6 +122,7 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = { .checkentry = cgroup_mt_check_v1, .match = cgroup_mt_v1, .matchsize = sizeof(struct xt_cgroup_info_v1), + .usersize = offsetof(struct xt_cgroup_info_v1, priv), .destroy = cgroup_mt_destroy_v1, .me = THIS_MODULE, .hooks = (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 2aff2b7c4689..b8fd4ab762ed 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -218,7 +218,7 @@ count_tree(struct net *net, struct rb_root *root, int diff; bool addit; - rbconn = container_of(*rbnode, struct xt_connlimit_rb, node); + rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); parent = *rbnode; diff = same_source_net(addr, mask, &rbconn->addr, family); @@ -398,7 +398,7 @@ static void destroy_tree(struct rb_root *r) struct rb_node *node; while ((node = rb_first(r)) != NULL) { - rbconn = container_of(node, struct xt_connlimit_rb, node); + rbconn = rb_entry(node, struct xt_connlimit_rb, node); rb_erase(node, r); @@ -431,6 +431,7 @@ static struct xt_match connlimit_mt_reg __read_mostly = { .checkentry = connlimit_mt_check, .match = connlimit_mt, .matchsize = sizeof(struct xt_connlimit_info), + .usersize = offsetof(struct xt_connlimit_info, data), .destroy = connlimit_mt_destroy, .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 10063408141d..26ef70c50e3b 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -838,6 +838,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .usersize = offsetof(struct xt_hashlimit_mtinfo1, hinfo), .checkentry = hashlimit_mt_check_v1, .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, @@ -848,6 +849,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, @@ -859,6 +861,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .usersize = offsetof(struct xt_hashlimit_mtinfo1, hinfo), .checkentry = hashlimit_mt_check_v1, .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, @@ -869,6 +872,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index bef850596558..dab962df1787 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -192,6 +192,8 @@ static struct xt_match limit_mt_reg __read_mostly = { .compatsize = sizeof(struct compat_xt_rateinfo), .compat_from_user = limit_mt_compat_from_user, .compat_to_user = limit_mt_compat_to_user, +#else + .usersize = offsetof(struct xt_rateinfo, prev), #endif .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 57efb703ff18..1ef99151b3ba 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -33,8 +33,7 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) else if (xt_family(par) == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (xt_family(par) == NFPROTO_IPV6 && - ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) + else if (xt_family(par) == NFPROTO_IPV6) type = PACKET_MULTICAST; else type = PACKET_BROADCAST; diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 44c8eb4c9d66..10d61a6eed71 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -73,6 +73,7 @@ static struct xt_match quota_mt_reg __read_mostly = { .checkentry = quota_mt_check, .destroy = quota_mt_destroy, .matchsize = sizeof(struct xt_quota_info), + .usersize = offsetof(struct xt_quota_info, master), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 1db02f6fca54..755d2f6693a2 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -133,6 +133,7 @@ static struct xt_match xt_rateest_mt_reg __read_mostly = { .checkentry = xt_rateest_mt_checkentry, .destroy = xt_rateest_mt_destroy, .matchsize = sizeof(struct xt_rateest_match_info), + .usersize = offsetof(struct xt_rateest_match_info, est1), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 0bc3460319c8..423293ee57c2 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -77,6 +77,7 @@ static struct xt_match xt_string_mt_reg __read_mostly = { .match = string_mt, .destroy = string_mt_destroy, .matchsize = sizeof(struct xt_string_info), + .usersize = offsetof(struct xt_string_info, config), .me = THIS_MODULE, }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 54253ea5976e..fbffe0ea4c4f 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -460,8 +460,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ct = nf_ct_tuplehash_to_ctrack(h); - skb->nfct = &ct->ct_general; - skb->nfctinfo = ovs_ct_get_info(h); + nf_ct_set(skb, ct, ovs_ct_get_info(h)); return ct; } @@ -722,11 +721,10 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, /* Associate skb with specified zone. */ if (tmpl) { - if (skb->nfct) - nf_conntrack_put(skb->nfct); + if (skb_nfct(skb)) + nf_conntrack_put(skb_nfct(skb)); nf_conntrack_get(&tmpl->ct_general); - skb->nfct = &tmpl->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, tmpl, IP_CT_NEW); } err = nf_conntrack_in(net, info->family, @@ -820,7 +818,7 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, if (err) return err; - ct = (struct nf_conn *)skb->nfct; + ct = (struct nf_conn *)skb_nfct(skb); if (ct) nf_ct_deliver_cached_events(ct); } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6575aba87630..3d6b9286c203 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -129,7 +129,7 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return addr_fold(skb->nfct); + return addr_fold(skb_nfct(skb)); #else return 0; #endif