diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 3fea7709a441..fc8c15a24a43 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -101,12 +101,16 @@ enum ip_conntrack_status { IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), + /* Conntrack has been offloaded to flow table. */ + IPS_OFFLOAD_BIT = 14, + IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), + /* Be careful here, modifying these bits can make things messy, * so don't let users modify them directly. */ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | - IPS_SEQ_ADJUST | IPS_TEMPLATE), + IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), __IPS_MAX_BIT = 14, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 85f643c1e227..6a64d528d076 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -901,6 +901,9 @@ static unsigned int early_drop_list(struct net *net, hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) + continue; + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); continue; @@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) return false; } +#define DAY (86400 * HZ) + +/* Set an arbitrary timeout large enough not to ever expire, this save + * us a check for the IPS_OFFLOAD_BIT from the packet path via + * nf_ct_is_expired(). + */ +static void nf_ct_offload_timeout(struct nf_conn *ct) +{ + if (nf_ct_expires(ct) < DAY / 2) + ct->timeout = nfct_time_stamp + DAY; +} + static void gc_worker(struct work_struct *work) { unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); @@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct *work) tmp = nf_ct_tuplehash_to_ctrack(h); scanned++; + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { + nf_ct_offload_timeout(tmp); + continue; + } + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); expired_count++; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 316bbdc4a158..7c7921a53b13 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1110,6 +1110,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, }; +static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) +{ + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + return 0; + + return ctnetlink_filter_match(ct, data); +} + static int ctnetlink_flush_conntrack(struct net *net, const struct nlattr * const cda[], u32 portid, int report) @@ -1122,7 +1130,7 @@ static int ctnetlink_flush_conntrack(struct net *net, return PTR_ERR(filter); } - nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, + nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, portid, report); kfree(filter); @@ -1168,6 +1176,11 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h); + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { + nf_ct_put(ct); + return -EBUSY; + } + if (cda[CTA_ID]) { u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); if (id != (u32)(unsigned long)ct) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 684cc29010a0..e97cdc1cf98c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + return; + seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } #endif diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5a101caa3e12..46d32baad095 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *s, void *v) WARN_ON(!l4proto); ret = -ENOSPC; - seq_printf(s, "%-8s %u %-8s %u %ld ", + seq_printf(s, "%-8s %u %-8s %u ", l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), - l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), - nf_ct_expires(ct) / HZ); + l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); + + if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) + seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); @@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; - if (test_bit(IPS_ASSURED_BIT, &ct->status)) + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + seq_puts(s, "[OFFLOAD] "); + else if (test_bit(IPS_ASSURED_BIT, &ct->status)) seq_puts(s, "[ASSURED] "); if (seq_has_overflowed(s))