2007-02-12 20:15:49 +01:00
|
|
|
/* netfilter.c: look after the filters for various protocols.
|
2005-08-10 05:21:49 +02:00
|
|
|
* Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
|
|
|
|
*
|
|
|
|
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
|
|
|
|
* way.
|
|
|
|
*
|
|
|
|
* Rusty Russell (C)2000 -- This code is GPL.
|
2013-04-06 15:24:29 +02:00
|
|
|
* Patrick McHardy (c) 2006-2012
|
2005-08-10 05:21:49 +02:00
|
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/netfilter.h>
|
|
|
|
#include <net/protocol.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/if.h>
|
|
|
|
#include <linux/netdevice.h>
|
2014-11-13 10:04:16 +01:00
|
|
|
#include <linux/netfilter_ipv6.h>
|
2005-08-10 05:21:49 +02:00
|
|
|
#include <linux/inetdevice.h>
|
|
|
|
#include <linux/proc_fs.h>
|
2007-02-12 20:09:55 +01:00
|
|
|
#include <linux/mutex.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 09:04:11 +01:00
|
|
|
#include <linux/slab.h>
|
2016-09-21 17:35:07 +02:00
|
|
|
#include <linux/rcupdate.h>
|
2007-09-12 12:01:34 +02:00
|
|
|
#include <net/net_namespace.h>
|
2005-08-10 05:21:49 +02:00
|
|
|
#include <net/sock.h>
|
|
|
|
|
|
|
|
#include "nf_internals.h"
|
|
|
|
|
2007-02-12 20:09:55 +01:00
|
|
|
static DEFINE_MUTEX(afinfo_mutex);
|
2006-04-06 23:18:09 +02:00
|
|
|
|
2010-03-09 20:59:15 +01:00
|
|
|
const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
|
2006-04-06 23:18:09 +02:00
|
|
|
EXPORT_SYMBOL(nf_afinfo);
|
2013-05-17 05:56:10 +02:00
|
|
|
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
|
|
|
|
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
|
2006-04-06 23:18:09 +02:00
|
|
|
|
2015-07-14 17:51:07 +02:00
|
|
|
DEFINE_PER_CPU(bool, nf_skb_duplicated);
|
|
|
|
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
|
|
|
|
|
2007-12-18 07:42:27 +01:00
|
|
|
int nf_register_afinfo(const struct nf_afinfo *afinfo)
|
2006-04-06 23:18:09 +02:00
|
|
|
{
|
2014-07-31 20:38:46 +02:00
|
|
|
mutex_lock(&afinfo_mutex);
|
2011-08-01 18:19:00 +02:00
|
|
|
RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
|
2007-02-12 20:09:55 +01:00
|
|
|
mutex_unlock(&afinfo_mutex);
|
2006-04-06 23:18:09 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nf_register_afinfo);
|
|
|
|
|
2007-12-18 07:42:27 +01:00
|
|
|
void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
|
2006-04-06 23:18:09 +02:00
|
|
|
{
|
2007-02-12 20:09:55 +01:00
|
|
|
mutex_lock(&afinfo_mutex);
|
2011-08-01 18:19:00 +02:00
|
|
|
RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
|
2007-02-12 20:09:55 +01:00
|
|
|
mutex_unlock(&afinfo_mutex);
|
2006-04-06 23:18:09 +02:00
|
|
|
synchronize_rcu();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
|
|
|
|
|
2014-08-22 04:40:15 +02:00
|
|
|
#ifdef HAVE_JUMP_LABEL
|
2012-02-24 08:31:31 +01:00
|
|
|
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
2011-11-18 18:32:46 +01:00
|
|
|
EXPORT_SYMBOL(nf_hooks_needed);
|
|
|
|
#endif
|
|
|
|
|
2007-02-12 20:10:14 +01:00
|
|
|
static DEFINE_MUTEX(nf_hook_mutex);
|
2016-09-21 17:35:07 +02:00
|
|
|
#define nf_entry_dereference(e) \
|
|
|
|
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
|
2005-08-10 05:21:49 +02:00
|
|
|
|
2016-09-21 17:35:07 +02:00
|
|
|
static struct nf_hook_entry *nf_hook_entry_head(struct net *net,
|
|
|
|
const struct nf_hook_ops *reg)
|
2005-08-10 05:21:49 +02:00
|
|
|
{
|
2016-09-21 17:35:07 +02:00
|
|
|
struct nf_hook_entry *hook_head = NULL;
|
2005-08-10 05:21:49 +02:00
|
|
|
|
2015-07-11 01:14:30 +02:00
|
|
|
if (reg->pf != NFPROTO_NETDEV)
|
2016-09-21 17:35:07 +02:00
|
|
|
hook_head = nf_entry_dereference(net->nf.hooks[reg->pf]
|
|
|
|
[reg->hooknum]);
|
2015-07-11 01:14:30 +02:00
|
|
|
else if (reg->hooknum == NF_NETDEV_INGRESS) {
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 18:19:38 +02:00
|
|
|
#ifdef CONFIG_NETFILTER_INGRESS
|
2015-07-11 01:15:06 +02:00
|
|
|
if (reg->dev && dev_net(reg->dev) == net)
|
2016-09-21 17:35:07 +02:00
|
|
|
hook_head =
|
|
|
|
nf_entry_dereference(
|
|
|
|
reg->dev->nf_hooks_ingress);
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 18:19:38 +02:00
|
|
|
#endif
|
|
|
|
}
|
2016-09-21 17:35:07 +02:00
|
|
|
return hook_head;
|
2015-07-11 01:14:30 +02:00
|
|
|
}
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 18:19:38 +02:00
|
|
|
|
2016-09-21 17:35:07 +02:00
|
|
|
/* must hold nf_hook_mutex */
|
|
|
|
static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg,
|
|
|
|
struct nf_hook_entry *entry)
|
|
|
|
{
|
|
|
|
switch (reg->pf) {
|
|
|
|
case NFPROTO_NETDEV:
|
|
|
|
/* We already checked in nf_register_net_hook() that this is
|
|
|
|
* used from ingress.
|
|
|
|
*/
|
|
|
|
rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum],
|
|
|
|
entry);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-07-20 09:31:25 +02:00
|
|
|
|
2015-07-11 01:15:06 +02:00
|
|
|
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
2015-07-11 01:14:30 +02:00
|
|
|
{
|
2016-09-21 17:35:07 +02:00
|
|
|
struct nf_hook_entry *hooks_entry;
|
2015-07-20 09:31:25 +02:00
|
|
|
struct nf_hook_entry *entry;
|
2015-07-11 01:15:06 +02:00
|
|
|
|
2016-09-21 17:35:05 +02:00
|
|
|
if (reg->pf == NFPROTO_NETDEV &&
|
|
|
|
(reg->hooknum != NF_NETDEV_INGRESS ||
|
|
|
|
!reg->dev || dev_net(reg->dev) != net))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2015-07-20 09:31:25 +02:00
|
|
|
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
|
|
|
|
if (!entry)
|
2015-07-11 01:15:06 +02:00
|
|
|
return -ENOMEM;
|
2015-07-11 01:14:30 +02:00
|
|
|
|
2015-07-20 09:31:25 +02:00
|
|
|
entry->orig_ops = reg;
|
|
|
|
entry->ops = *reg;
|
2016-09-21 17:35:07 +02:00
|
|
|
entry->next = NULL;
|
|
|
|
|
|
|
|
mutex_lock(&nf_hook_mutex);
|
|
|
|
hooks_entry = nf_hook_entry_head(net, reg);
|
2015-07-11 01:15:06 +02:00
|
|
|
|
2016-09-21 17:35:07 +02:00
|
|
|
if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) {
|
|
|
|
/* This is the case where we need to insert at the head */
|
|
|
|
entry->next = hooks_entry;
|
|
|
|
hooks_entry = NULL;
|
2015-07-18 17:21:14 +02:00
|
|
|
}
|
2015-07-11 01:14:30 +02:00
|
|
|
|
2016-09-21 17:35:07 +02:00
|
|
|
while (hooks_entry &&
|
|
|
|
reg->priority >= hooks_entry->orig_ops->priority &&
|
|
|
|
nf_entry_dereference(hooks_entry->next)) {
|
|
|
|
hooks_entry = nf_entry_dereference(hooks_entry->next);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hooks_entry) {
|
|
|
|
entry->next = nf_entry_dereference(hooks_entry->next);
|
|
|
|
rcu_assign_pointer(hooks_entry->next, entry);
|
|
|
|
} else {
|
|
|
|
nf_set_hooks_head(net, reg, entry);
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|
2016-09-21 17:35:07 +02:00
|
|
|
|
2007-02-12 20:10:14 +01:00
|
|
|
mutex_unlock(&nf_hook_mutex);
|
2015-07-11 01:13:58 +02:00
|
|
|
#ifdef CONFIG_NETFILTER_INGRESS
|
|
|
|
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
|
|
|
|
net_inc_ingress_queue();
|
|
|
|
#endif
|
2014-08-22 04:40:15 +02:00
|
|
|
#ifdef HAVE_JUMP_LABEL
|
2012-02-24 08:31:31 +01:00
|
|
|
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
|
2011-11-18 18:32:46 +01:00
|
|
|
#endif
|
2005-08-10 05:21:49 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2015-07-11 01:15:06 +02:00
|
|
|
EXPORT_SYMBOL(nf_register_net_hook);
|
2005-08-10 05:21:49 +02:00
|
|
|
|
2015-07-11 01:15:06 +02:00
|
|
|
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
2005-08-10 05:21:49 +02:00
|
|
|
{
|
2016-09-21 17:35:07 +02:00
|
|
|
struct nf_hook_entry *hooks_entry;
|
2015-07-11 01:15:06 +02:00
|
|
|
|
2007-02-12 20:10:14 +01:00
|
|
|
mutex_lock(&nf_hook_mutex);
|
2016-09-21 17:35:07 +02:00
|
|
|
hooks_entry = nf_hook_entry_head(net, reg);
|
2016-09-28 17:35:14 +02:00
|
|
|
if (hooks_entry && hooks_entry->orig_ops == reg) {
|
2016-09-21 17:35:07 +02:00
|
|
|
nf_set_hooks_head(net, reg,
|
|
|
|
nf_entry_dereference(hooks_entry->next));
|
|
|
|
goto unlock;
|
|
|
|
}
|
|
|
|
while (hooks_entry && nf_entry_dereference(hooks_entry->next)) {
|
|
|
|
struct nf_hook_entry *next =
|
|
|
|
nf_entry_dereference(hooks_entry->next);
|
|
|
|
struct nf_hook_entry *nnext;
|
|
|
|
|
|
|
|
if (next->orig_ops != reg) {
|
|
|
|
hooks_entry = next;
|
|
|
|
continue;
|
2015-07-11 01:15:06 +02:00
|
|
|
}
|
2016-09-21 17:35:07 +02:00
|
|
|
nnext = nf_entry_dereference(next->next);
|
|
|
|
rcu_assign_pointer(hooks_entry->next, nnext);
|
|
|
|
hooks_entry = next;
|
|
|
|
break;
|
2015-07-11 01:15:06 +02:00
|
|
|
}
|
2016-09-21 17:35:07 +02:00
|
|
|
|
|
|
|
unlock:
|
2007-02-12 20:10:14 +01:00
|
|
|
mutex_unlock(&nf_hook_mutex);
|
2016-09-21 17:35:07 +02:00
|
|
|
if (!hooks_entry) {
|
2015-07-11 01:15:06 +02:00
|
|
|
WARN(1, "nf_unregister_net_hook: hook not found!\n");
|
|
|
|
return;
|
|
|
|
}
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 18:19:38 +02:00
|
|
|
#ifdef CONFIG_NETFILTER_INGRESS
|
2015-07-11 01:13:58 +02:00
|
|
|
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
|
|
|
|
net_dec_ingress_queue();
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 18:19:38 +02:00
|
|
|
#endif
|
2014-08-22 04:40:15 +02:00
|
|
|
#ifdef HAVE_JUMP_LABEL
|
2012-02-24 08:31:31 +01:00
|
|
|
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
|
2011-11-18 18:32:46 +01:00
|
|
|
#endif
|
2005-08-10 05:21:49 +02:00
|
|
|
synchronize_net();
|
2016-09-21 17:35:07 +02:00
|
|
|
nf_queue_nf_hook_drop(net, hooks_entry);
|
2015-10-08 23:38:07 +02:00
|
|
|
/* other cpu might still process nfqueue verdict that used reg */
|
|
|
|
synchronize_net();
|
2016-09-21 17:35:07 +02:00
|
|
|
kfree(hooks_entry);
|
2015-07-11 01:15:06 +02:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_unregister_net_hook);
|
|
|
|
|
|
|
|
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
|
|
|
unsigned int n)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
err = nf_register_net_hook(net, ®[i]);
|
|
|
|
if (err)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (i > 0)
|
|
|
|
nf_unregister_net_hooks(net, reg, i);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_register_net_hooks);
|
|
|
|
|
|
|
|
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
|
|
|
unsigned int n)
|
|
|
|
{
|
|
|
|
while (n-- > 0)
|
|
|
|
nf_unregister_net_hook(net, ®[n]);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_unregister_net_hooks);
|
|
|
|
|
|
|
|
static LIST_HEAD(nf_hook_list);
|
|
|
|
|
2016-09-16 21:59:13 +02:00
|
|
|
static int _nf_register_hook(struct nf_hook_ops *reg)
|
2015-07-11 01:15:06 +02:00
|
|
|
{
|
|
|
|
struct net *net, *last;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
for_each_net(net) {
|
|
|
|
ret = nf_register_net_hook(net, reg);
|
|
|
|
if (ret && ret != -ENOENT)
|
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
list_add_tail(®->list, &nf_hook_list);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
rollback:
|
|
|
|
last = net;
|
|
|
|
for_each_net(net) {
|
|
|
|
if (net == last)
|
|
|
|
break;
|
|
|
|
nf_unregister_net_hook(net, reg);
|
|
|
|
}
|
2016-09-16 21:59:13 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int nf_register_hook(struct nf_hook_ops *reg)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
ret = _nf_register_hook(reg);
|
2015-07-11 01:15:06 +02:00
|
|
|
rtnl_unlock();
|
2016-09-16 21:59:13 +02:00
|
|
|
|
2015-07-11 01:15:06 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_register_hook);
|
|
|
|
|
2016-09-16 21:59:13 +02:00
|
|
|
static void _nf_unregister_hook(struct nf_hook_ops *reg)
|
2015-07-11 01:15:06 +02:00
|
|
|
{
|
|
|
|
struct net *net;
|
|
|
|
|
|
|
|
list_del(®->list);
|
|
|
|
for_each_net(net)
|
|
|
|
nf_unregister_net_hook(net, reg);
|
2016-09-16 21:59:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void nf_unregister_hook(struct nf_hook_ops *reg)
|
|
|
|
{
|
|
|
|
rtnl_lock();
|
|
|
|
_nf_unregister_hook(reg);
|
2015-07-11 01:15:06 +02:00
|
|
|
rtnl_unlock();
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_unregister_hook);
|
|
|
|
|
2006-04-06 23:09:12 +02:00
|
|
|
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
err = nf_register_hook(®[i]);
|
|
|
|
if (err)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (i > 0)
|
|
|
|
nf_unregister_hooks(reg, i);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_register_hooks);
|
|
|
|
|
2016-09-16 21:59:13 +02:00
|
|
|
/* Caller MUST take rtnl_lock() */
|
|
|
|
int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
err = _nf_register_hook(®[i]);
|
|
|
|
if (err)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (i > 0)
|
|
|
|
_nf_unregister_hooks(reg, i);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(_nf_register_hooks);
|
|
|
|
|
2006-04-06 23:09:12 +02:00
|
|
|
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
|
|
|
|
{
|
2010-10-04 22:24:12 +02:00
|
|
|
while (n-- > 0)
|
|
|
|
nf_unregister_hook(®[n]);
|
2006-04-06 23:09:12 +02:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_unregister_hooks);
|
|
|
|
|
2016-09-16 21:59:13 +02:00
|
|
|
/* Caller MUST take rtnl_lock */
|
|
|
|
void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
|
|
|
|
{
|
|
|
|
while (n-- > 0)
|
|
|
|
_nf_unregister_hook(®[n]);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(_nf_unregister_hooks);
|
|
|
|
|
2016-09-21 17:35:07 +02:00
|
|
|
unsigned int nf_iterate(struct sk_buff *skb,
|
2015-04-03 22:23:58 +02:00
|
|
|
struct nf_hook_state *state,
|
2016-09-21 17:35:07 +02:00
|
|
|
struct nf_hook_entry **entryp)
|
2005-08-10 05:21:49 +02:00
|
|
|
{
|
|
|
|
unsigned int verdict;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The caller must not block between calls to this
|
|
|
|
* function because of risk of continuing from deleted element.
|
|
|
|
*/
|
2016-09-21 17:35:07 +02:00
|
|
|
while (*entryp) {
|
|
|
|
if (state->thresh > (*entryp)->ops.priority) {
|
|
|
|
*entryp = rcu_dereference((*entryp)->next);
|
2005-08-10 05:21:49 +02:00
|
|
|
continue;
|
2016-09-21 17:35:07 +02:00
|
|
|
}
|
2005-08-10 05:21:49 +02:00
|
|
|
|
|
|
|
/* Optimization: we don't need to hold module
|
2007-02-12 20:15:49 +01:00
|
|
|
reference here, since function can't sleep. --RR */
|
2011-02-14 17:35:07 +01:00
|
|
|
repeat:
|
2016-09-21 17:35:07 +02:00
|
|
|
verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
|
2005-08-10 05:21:49 +02:00
|
|
|
if (verdict != NF_ACCEPT) {
|
|
|
|
#ifdef CONFIG_NETFILTER_DEBUG
|
|
|
|
if (unlikely((verdict & NF_VERDICT_MASK)
|
|
|
|
> NF_MAX_VERDICT)) {
|
|
|
|
NFDEBUG("Evil return from %p(%u).\n",
|
2016-09-21 17:35:07 +02:00
|
|
|
(*entryp)->ops.hook, state->hook);
|
|
|
|
*entryp = rcu_dereference((*entryp)->next);
|
2005-08-10 05:21:49 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
2012-08-22 21:59:57 +02:00
|
|
|
if (verdict != NF_REPEAT)
|
2005-08-10 05:21:49 +02:00
|
|
|
return verdict;
|
2011-02-14 17:35:07 +01:00
|
|
|
goto repeat;
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|
2016-09-21 17:35:07 +02:00
|
|
|
*entryp = rcu_dereference((*entryp)->next);
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|
|
|
|
return NF_ACCEPT;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Returns 1 if okfn() needs to be executed by the caller,
|
2016-09-21 17:35:04 +02:00
|
|
|
* -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
|
2015-04-03 22:23:58 +02:00
|
|
|
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
|
2005-08-10 05:21:49 +02:00
|
|
|
{
|
2016-09-21 17:35:07 +02:00
|
|
|
struct nf_hook_entry *entry;
|
2005-08-10 05:21:49 +02:00
|
|
|
unsigned int verdict;
|
|
|
|
int ret = 0;
|
|
|
|
|
2016-09-21 17:35:07 +02:00
|
|
|
entry = rcu_dereference(state->hook_entries);
|
2005-08-10 05:21:49 +02:00
|
|
|
next_hook:
|
2016-09-21 17:35:07 +02:00
|
|
|
verdict = nf_iterate(skb, state, &entry);
|
2005-08-10 05:21:49 +02:00
|
|
|
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
|
|
|
|
ret = 1;
|
2010-11-16 12:52:38 +01:00
|
|
|
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
|
2007-10-15 09:53:15 +02:00
|
|
|
kfree_skb(skb);
|
2011-01-18 15:52:14 +01:00
|
|
|
ret = NF_DROP_GETERR(verdict);
|
2010-11-16 12:52:38 +01:00
|
|
|
if (ret == 0)
|
|
|
|
ret = -EPERM;
|
2007-12-05 10:27:46 +01:00
|
|
|
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
|
2016-09-21 17:35:07 +02:00
|
|
|
int err;
|
|
|
|
|
|
|
|
RCU_INIT_POINTER(state->hook_entries, entry);
|
|
|
|
err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
|
2011-10-31 12:20:16 +01:00
|
|
|
if (err < 0) {
|
|
|
|
if (err == -ESRCH &&
|
2011-01-18 16:08:30 +01:00
|
|
|
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
|
|
|
|
goto next_hook;
|
2011-01-18 15:28:38 +01:00
|
|
|
kfree_skb(skb);
|
|
|
|
}
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_hook_slow);
|
|
|
|
|
|
|
|
|
2007-10-14 09:39:18 +02:00
|
|
|
int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
|
2005-08-10 05:21:49 +02:00
|
|
|
{
|
2007-10-14 09:39:18 +02:00
|
|
|
if (writable_len > skb->len)
|
2005-08-10 05:21:49 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Not exclusive use of packet? Must copy. */
|
2007-10-14 09:39:18 +02:00
|
|
|
if (!skb_cloned(skb)) {
|
|
|
|
if (writable_len <= skb_headlen(skb))
|
|
|
|
return 1;
|
|
|
|
} else if (skb_clone_writable(skb, writable_len))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (writable_len <= skb_headlen(skb))
|
|
|
|
writable_len = 0;
|
|
|
|
else
|
|
|
|
writable_len -= skb_headlen(skb);
|
|
|
|
|
|
|
|
return !!__pskb_pull_tail(skb, writable_len);
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(skb_make_writable);
|
|
|
|
|
2015-09-30 23:53:44 +02:00
|
|
|
/* This needs to be compiled in any case to avoid dependencies between the
|
|
|
|
* nfnetlink_queue code and nf_conntrack.
|
|
|
|
*/
|
2015-10-05 04:47:13 +02:00
|
|
|
struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
|
|
|
|
EXPORT_SYMBOL_GPL(nfnl_ct_hook);
|
2015-09-30 23:53:44 +02:00
|
|
|
|
2011-12-12 03:58:24 +01:00
|
|
|
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
2005-08-10 05:21:49 +02:00
|
|
|
/* This does not belong here, but locally generated errors need it if connection
|
|
|
|
tracking in use: without this, connection may not be in hash table, and hence
|
|
|
|
manufactured ICMP or RST packets will not be associated with it. */
|
2013-07-28 22:54:08 +02:00
|
|
|
void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
|
|
|
|
__rcu __read_mostly;
|
2005-08-10 05:21:49 +02:00
|
|
|
EXPORT_SYMBOL(ip_ct_attach);
|
|
|
|
|
2013-07-28 22:54:08 +02:00
|
|
|
void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
|
2005-08-10 05:21:49 +02:00
|
|
|
{
|
2013-07-28 22:54:08 +02:00
|
|
|
void (*attach)(struct sk_buff *, const struct sk_buff *);
|
2005-08-10 05:21:49 +02:00
|
|
|
|
2007-02-12 20:09:19 +01:00
|
|
|
if (skb->nfct) {
|
|
|
|
rcu_read_lock();
|
|
|
|
attach = rcu_dereference(ip_ct_attach);
|
|
|
|
if (attach)
|
|
|
|
attach(new, skb);
|
|
|
|
rcu_read_unlock();
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_ct_attach);
|
2007-03-23 19:17:27 +01:00
|
|
|
|
2010-11-15 18:17:21 +01:00
|
|
|
void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
|
2007-03-23 19:17:27 +01:00
|
|
|
EXPORT_SYMBOL(nf_ct_destroy);
|
|
|
|
|
|
|
|
void nf_conntrack_destroy(struct nf_conntrack *nfct)
|
|
|
|
{
|
|
|
|
void (*destroy)(struct nf_conntrack *);
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
destroy = rcu_dereference(nf_ct_destroy);
|
|
|
|
BUG_ON(destroy == NULL);
|
|
|
|
destroy(nfct);
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_conntrack_destroy);
|
2012-06-07 12:13:39 +02:00
|
|
|
|
2015-09-03 01:26:07 +02:00
|
|
|
/* Built-in default zone used e.g. by modules. */
|
|
|
|
const struct nf_conntrack_zone nf_ct_zone_dflt = {
|
|
|
|
.id = NF_CT_DEFAULT_ZONE_ID,
|
|
|
|
.dir = NF_CT_DEFAULT_ZONE_DIR,
|
|
|
|
};
|
|
|
|
EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
|
2007-03-23 19:17:27 +01:00
|
|
|
#endif /* CONFIG_NF_CONNTRACK */
|
2005-08-10 05:21:49 +02:00
|
|
|
|
2012-08-26 19:14:06 +02:00
|
|
|
#ifdef CONFIG_NF_NAT_NEEDED
|
|
|
|
void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
|
|
|
|
EXPORT_SYMBOL(nf_nat_decode_session_hook);
|
|
|
|
#endif
|
|
|
|
|
2015-07-11 01:15:06 +02:00
|
|
|
static int nf_register_hook_list(struct net *net)
|
|
|
|
{
|
|
|
|
struct nf_hook_ops *elem;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
list_for_each_entry(elem, &nf_hook_list, list) {
|
|
|
|
ret = nf_register_net_hook(net, elem);
|
|
|
|
if (ret && ret != -ENOENT)
|
|
|
|
goto out_undo;
|
|
|
|
}
|
|
|
|
rtnl_unlock();
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_undo:
|
|
|
|
list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
|
|
|
|
nf_unregister_net_hook(net, elem);
|
|
|
|
rtnl_unlock();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nf_unregister_hook_list(struct net *net)
|
|
|
|
{
|
|
|
|
struct nf_hook_ops *elem;
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
list_for_each_entry(elem, &nf_hook_list, list)
|
|
|
|
nf_unregister_net_hook(net, elem);
|
|
|
|
rtnl_unlock();
|
|
|
|
}
|
|
|
|
|
2013-03-25 00:50:39 +01:00
|
|
|
static int __net_init netfilter_net_init(struct net *net)
|
|
|
|
{
|
2015-07-11 01:15:06 +02:00
|
|
|
int i, h, ret;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
|
|
|
|
for (h = 0; h < NF_MAX_HOOKS; h++)
|
2016-09-21 17:35:07 +02:00
|
|
|
RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
|
2015-07-11 01:15:06 +02:00
|
|
|
}
|
|
|
|
|
2013-03-25 00:50:39 +01:00
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
|
|
|
|
net->proc_net);
|
2013-04-05 19:40:10 +02:00
|
|
|
if (!net->nf.proc_netfilter) {
|
|
|
|
if (!net_eq(net, &init_net))
|
|
|
|
pr_err("cannot create netfilter proc entry");
|
|
|
|
|
2013-03-25 00:50:39 +01:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
#endif
|
2015-07-11 01:15:06 +02:00
|
|
|
ret = nf_register_hook_list(net);
|
|
|
|
if (ret)
|
|
|
|
remove_proc_entry("netfilter", net->proc_net);
|
|
|
|
|
|
|
|
return ret;
|
2013-03-25 00:50:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __net_exit netfilter_net_exit(struct net *net)
|
|
|
|
{
|
2015-07-11 01:15:06 +02:00
|
|
|
nf_unregister_hook_list(net);
|
2013-03-25 00:50:39 +01:00
|
|
|
remove_proc_entry("netfilter", net->proc_net);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct pernet_operations netfilter_net_ops = {
|
|
|
|
.init = netfilter_net_init,
|
|
|
|
.exit = netfilter_net_exit,
|
|
|
|
};
|
|
|
|
|
2013-05-23 00:42:36 +02:00
|
|
|
int __init netfilter_init(void)
|
2005-08-10 05:21:49 +02:00
|
|
|
{
|
2015-07-11 01:15:06 +02:00
|
|
|
int ret;
|
2005-08-10 05:21:49 +02:00
|
|
|
|
2013-05-23 00:42:36 +02:00
|
|
|
ret = register_pernet_subsys(&netfilter_net_ops);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
ret = netfilter_log_init();
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_pernet;
|
2005-08-10 05:21:49 +02:00
|
|
|
|
2013-05-23 00:42:36 +02:00
|
|
|
return 0;
|
|
|
|
err_pernet:
|
|
|
|
unregister_pernet_subsys(&netfilter_net_ops);
|
|
|
|
err:
|
|
|
|
return ret;
|
2005-08-10 05:21:49 +02:00
|
|
|
}
|