diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 86334b6f8238..9f73587219e8 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -336,3 +336,13 @@ When: After the only user (hal) has seen a release with the patches Why: Over 1K .text/.data size reduction, data is available in other ways (ioctls) Who: Johannes Berg + +--------------------------- + +What: CONFIG_NF_CT_ACCT +When: 2.6.29 +Why: Accounting can now be enabled/disabled without kernel recompilation. + Currently used only to set a default value for a feature that is also + controlled by a kernel/module/sysfs/sysctl parameter. +Who: Krzysztof Piotr Oledzki + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 25e88cf5d84e..30d44b78171a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1279,6 +1279,13 @@ and is between 256 and 4096 characters. It is defined in the file This usage is only documented in each driver source file if at all. + nf_conntrack.acct= + [NETFILTER] Enable connection tracking flow accounting + 0 to disable accounting + 1 to enable accounting + Default value depends on CONFIG_NF_CT_ACCT that is + going to be removed in 2.6.29. + nfsaddrs= [NFS] See Documentation/filesystems/nfsroot.txt. diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 3e22e7817bc3..f12e3d12474b 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -1308,7 +1308,6 @@ static u32 atl1_check_link(struct atl1_adapter *adapter) dev_info(&adapter->pdev->dev, "link is down\n"); adapter->link_speed = SPEED_0; netif_carrier_off(netdev); - netif_stop_queue(netdev); } return 0; } @@ -1358,7 +1357,6 @@ static u32 atl1_check_link(struct atl1_adapter *adapter) if (!netif_carrier_ok(netdev)) { /* Link down -> Up */ netif_carrier_on(netdev); - netif_wake_queue(netdev); } return 0; } @@ -2627,6 +2625,7 @@ static s32 atl1_up(struct atl1_adapter *adapter) mod_timer(&adapter->watchdog_timer, jiffies); atlx_irq_enable(adapter); atl1_check_link(adapter); + netif_start_queue(netdev); return 0; err_up: diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 45a63172852f..39b45e901be6 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -1725,7 +1725,6 @@ static void adjust_link(struct net_device *dev) if (!priv->oldlink) { new_state = 1; priv->oldlink = 1; - netif_tx_schedule_all(dev); } } else if (priv->oldlink) { new_state = 1; diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index ae9629fa6882..c258a0586e61 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -88,6 +88,7 @@ static inline void append_crc_ccitt(unsigned char *buffer, int len) { unsigned int crc = crc_ccitt(0xffff, buffer, len) ^ 0xffff; + buffer += len; *buffer++ = crc; *buffer++ = crc >> 8; } diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index 1aa425be3067..b79d5f018f79 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -2377,8 +2377,6 @@ static void happy_meal_set_multicast(struct net_device *dev) spin_lock_irq(&hp->happy_lock); - netif_stop_queue(dev); - if ((dev->flags & IFF_ALLMULTI) || (dev->mc_count > 64)) { hme_write32(hp, bregs + BMAC_HTABLE0, 0xffff); hme_write32(hp, bregs + BMAC_HTABLE1, 0xffff); @@ -2410,8 +2408,6 @@ static void happy_meal_set_multicast(struct net_device *dev) hme_write32(hp, bregs + BMAC_HTABLE3, hash_table[3]); } - netif_wake_queue(dev); - spin_unlock_irq(&hp->happy_lock); } diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index 756ba10b79d6..8f944e57fd55 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -1588,7 +1588,6 @@ static void adjust_link(struct net_device *dev) if (!ugeth->oldlink) { new_state = 1; ugeth->oldlink = 1; - netif_tx_schedule_all(dev); } } else if (ugeth->oldlink) { new_state = 1; diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index bad1eb760f61..885cbe282260 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -122,7 +122,7 @@ enum ip_conntrack_events IPCT_NATINFO_BIT = 10, IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), - /* Counter highest bit has been set */ + /* Counter highest bit has been set, unused */ IPCT_COUNTER_FILLING_BIT = 11, IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), @@ -145,12 +145,6 @@ enum ip_conntrack_expect_events { }; #ifdef __KERNEL__ -struct ip_conntrack_counter -{ - u_int32_t packets; - u_int32_t bytes; -}; - struct ip_conntrack_stat { unsigned int searched; diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 759bc043dc65..c19595c89304 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -115,10 +115,10 @@ enum ctattr_protoinfo_sctp { enum ctattr_counters { CTA_COUNTERS_UNSPEC, - CTA_COUNTERS_PACKETS, /* old 64bit counters */ - CTA_COUNTERS_BYTES, /* old 64bit counters */ - CTA_COUNTERS32_PACKETS, - CTA_COUNTERS32_BYTES, + CTA_COUNTERS_PACKETS, /* 64bit counters */ + CTA_COUNTERS_BYTES, /* 64bit counters */ + CTA_COUNTERS32_PACKETS, /* old 32bit counters, unused */ + CTA_COUNTERS32_BYTES, /* old 32bit counters, unused */ __CTA_COUNTERS_MAX }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a85721332924..f661731f3cb1 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -48,6 +48,9 @@ enum nfulnl_attr_type { NFULA_SEQ, /* instance-local sequence number */ NFULA_SEQ_GLOBAL, /* global sequence number */ NFULA_GID, /* group id of socket */ + NFULA_HWTYPE, /* hardware type */ + NFULA_HWHEADER, /* hardware header */ + NFULA_HWLEN, /* hardware header length */ __NFULA_MAX }; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8f5b75734dd0..0741ad592da0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -88,7 +88,6 @@ struct nf_conn_help { u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; }; - #include #include @@ -111,11 +110,6 @@ struct nf_conn /* Timer function; drops refcnt when it goes off. */ struct timer_list timeout; -#ifdef CONFIG_NF_CT_ACCT - /* Accounting Information (same cache line as other written members) */ - struct ip_conntrack_counter counters[IP_CT_DIR_MAX]; -#endif - #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h new file mode 100644 index 000000000000..5d5ae55d54c4 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -0,0 +1,51 @@ +/* + * (C) 2008 Krzysztof Piotr Oledzki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _NF_CONNTRACK_ACCT_H +#define _NF_CONNTRACK_ACCT_H +#include +#include +#include +#include + +struct nf_conn_counter { + u_int64_t packets; + u_int64_t bytes; +}; + +extern int nf_ct_acct; + +static inline +struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_ACCT); +} + +static inline +struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) +{ + struct nf_conn_counter *acct; + + if (!nf_ct_acct) + return NULL; + + acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp); + if (!acct) + pr_debug("failed to add accounting extension area"); + + + return acct; +}; + +extern unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir); + +extern int nf_conntrack_acct_init(void); +extern void nf_conntrack_acct_fini(void); + +#endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index f80c0ed6d870..da8ee52613a5 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -7,11 +7,13 @@ enum nf_ct_ext_id { NF_CT_EXT_HELPER, NF_CT_EXT_NAT, + NF_CT_EXT_ACCT, NF_CT_EXT_NUM, }; #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat +#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netlink.h b/include/net/netlink.h index dfc3701dfcc3..18024b8cecb8 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -896,6 +896,9 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, #define NLA_PUT_U64(skb, attrtype, value) \ NLA_PUT_TYPE(skb, u64, attrtype, value) +#define NLA_PUT_BE64(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, __be64, attrtype, value) + #define NLA_PUT_STRING(skb, attrtype, value) \ NLA_PUT(skb, attrtype, strlen(value) + 1, value) diff --git a/net/core/dev.c b/net/core/dev.c index 2eed17bcb2dd..cbc34c0db376 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -124,6 +124,8 @@ #include #include #include +#include +#include #include "net-sysfs.h" @@ -1325,7 +1327,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) void __netif_schedule(struct Qdisc *q) { - BUG_ON(q == &noop_qdisc); + if (WARN_ON_ONCE(q == &noop_qdisc)) + return; if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; @@ -1668,34 +1671,37 @@ out_kfree_skb: * --BLG */ +static u32 simple_tx_hashrnd; +static int simple_tx_hashrnd_initialized = 0; + static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) { - u32 *addr, *ports, hash, ihl; + u32 addr1, addr2, ports; + u32 hash, ihl; u8 ip_proto; - int alen; + + if (unlikely(!simple_tx_hashrnd_initialized)) { + get_random_bytes(&simple_tx_hashrnd, 4); + simple_tx_hashrnd_initialized = 1; + } switch (skb->protocol) { case __constant_htons(ETH_P_IP): ip_proto = ip_hdr(skb)->protocol; - addr = &ip_hdr(skb)->saddr; + addr1 = ip_hdr(skb)->saddr; + addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; - alen = 2; break; case __constant_htons(ETH_P_IPV6): ip_proto = ipv6_hdr(skb)->nexthdr; - addr = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; + addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; ihl = (40 >> 2); - alen = 8; break; default: return 0; } - ports = (u32 *) (skb_network_header(skb) + (ihl * 4)); - - hash = 0; - while (alen--) - hash ^= *addr++; switch (ip_proto) { case IPPROTO_TCP: @@ -1705,14 +1711,17 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) case IPPROTO_AH: case IPPROTO_SCTP: case IPPROTO_UDPLITE: - hash ^= *ports; + ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); break; default: + ports = 0; break; } - return hash % dev->real_num_tx_queues; + hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + + return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } static struct netdev_queue *dev_pick_tx(struct net_device *dev, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 40a46d482490..3a020720e40b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -18,19 +18,7 @@ #include #include #include - -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif +#include struct ct_iter_state { unsigned int bucket; @@ -127,7 +115,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -138,7 +126,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index d2a887fc8d9b..6c6a3cba8d50 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -240,12 +240,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, This is only required for source (ie. NAT/masq) mappings. So far, we don't do local source mappings, so multiple manips not an issue. */ - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == IP_NAT_MANIP_SRC && + !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { if (find_appropriate_src(orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) - if (!nf_nat_used_tuple(tuple, ct)) - return; + if (!nf_nat_used_tuple(tuple, ct)) + return; } } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4334d5cabc5b..14544320c545 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -318,11 +318,11 @@ static int mangle_content_len(struct sk_buff *skb, buffer, buflen); } -static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, unsigned int *datalen, - enum sdp_header_types type, - enum sdp_header_types term, - char *buffer, int buflen) +static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, + unsigned int dataoff, unsigned int *datalen, + enum sdp_header_types type, + enum sdp_header_types term, + char *buffer, int buflen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -330,9 +330,9 @@ static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, &matchoff, &matchlen) <= 0) - return 0; + return -ENOENT; return mangle_packet(skb, dptr, datalen, matchoff, matchlen, - buffer, buflen); + buffer, buflen) ? 0 : -EINVAL; } static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, @@ -346,8 +346,8 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, unsigned int buflen; buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, - buffer, buflen)) + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, + buffer, buflen)) return 0; return mangle_content_len(skb, dptr, datalen); @@ -381,15 +381,27 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, /* Mangle session description owner and contact addresses */ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) return 0; - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, - SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, - buffer, buflen)) + switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, + SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, + buffer, buflen)) { + case 0: + /* + * RFC 2327: + * + * Session description + * + * c=* (connection information - not required if included in all media) + */ + case -ENOENT: + break; + default: return 0; + } return mangle_content_len(skb, dptr, datalen); } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 316c7af1d2b1..ee898e74808d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -49,6 +49,15 @@ config NF_CT_ACCT Those counters can be used for flow-based accounting or the `connbytes' match. + Please note that currently this option only sets a default state. + You may change it at boot time with nf_conntrack.acct=0/1 kernel + paramater or by loading the nf_conntrack module with acct=0/1. + + You may also disable/enable it on a running system with: + sysctl net.netfilter.nf_conntrack_acct=0/1 + + This option will be removed in 2.6.29. + If unsure, say `N'. config NF_CONNTRACK_MARK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5c4b183f6422..3bd2cc556aea 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c new file mode 100644 index 000000000000..59bd8b903a19 --- /dev/null +++ b/net/netfilter/nf_conntrack_acct.c @@ -0,0 +1,104 @@ +/* Accouting handling for netfilter. */ + +/* + * (C) 2008 Krzysztof Piotr Oledzki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_NF_CT_ACCT +#define NF_CT_ACCT_DEFAULT 1 +#else +#define NF_CT_ACCT_DEFAULT 0 +#endif + +int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; +EXPORT_SYMBOL_GPL(nf_ct_acct); + +module_param_named(acct, nf_ct_acct, bool, 0644); +MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *acct_sysctl_header; +static struct ctl_table acct_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_acct", + .data = &nf_ct_acct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + {} +}; +#endif /* CONFIG_SYSCTL */ + +unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) +{ + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + return seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)acct[dir].packets, + (unsigned long long)acct[dir].bytes); +}; +EXPORT_SYMBOL_GPL(seq_print_acct); + +static struct nf_ct_ext_type acct_extend __read_mostly = { + .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), + .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .id = NF_CT_EXT_ACCT, +}; + +int nf_conntrack_acct_init(void) +{ + int ret; + +#ifdef CONFIG_NF_CT_ACCT + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); + printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); + printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); +#endif + + ret = nf_ct_extend_register(&acct_extend); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); + return ret; + } + +#ifdef CONFIG_SYSCTL + acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, + acct_sysctl_table); + + if (!acct_sysctl_header) { + nf_ct_extend_unregister(&acct_extend); + + printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + return -ENOMEM; + } +#endif + + return 0; +} + +void nf_conntrack_acct_fini(void) +{ +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(acct_sysctl_header); +#endif + nf_ct_extend_unregister(&acct_extend); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 28d03e64200b..c519d090bdb9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -37,6 +37,7 @@ #include #include #include +#include #define NF_CONNTRACK_VERSION "0.5.0" @@ -555,6 +556,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } + nf_ct_acct_ext_add(ct, GFP_ATOMIC); + spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(tuple); if (exp) { @@ -828,17 +831,16 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } acct: -#ifdef CONFIG_NF_CT_ACCT if (do_acct) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + struct nf_conn_counter *acct; - if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) - || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) - event |= IPCT_COUNTER_FILLING; + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } } -#endif spin_unlock_bh(&nf_conntrack_lock); @@ -853,15 +855,19 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, const struct sk_buff *skb, int do_acct) { -#ifdef CONFIG_NF_CT_ACCT if (do_acct) { + struct nf_conn_counter *acct; + spin_lock_bh(&nf_conntrack_lock); - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } spin_unlock_bh(&nf_conntrack_lock); } -#endif + if (del_timer(&ct->timeout)) { ct->timeout.function((unsigned long)ct); return true; @@ -1029,6 +1035,7 @@ void nf_conntrack_cleanup(void) nf_conntrack_proto_fini(); nf_conntrack_helper_fini(); nf_conntrack_expect_fini(); + nf_conntrack_acct_fini(); } struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) @@ -1168,6 +1175,10 @@ int __init nf_conntrack_init(void) if (ret < 0) goto out_fini_expect; + ret = nf_conntrack_acct_init(); + if (ret < 0) + goto out_fini_helper; + /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); @@ -1180,6 +1191,8 @@ int __init nf_conntrack_init(void) return ret; +out_fini_helper: + nf_conntrack_helper_fini(); out_fini_expect: nf_conntrack_expect_fini(); out_fini_proto: diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 95a7967731f9..105a616c5c78 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -206,22 +207,26 @@ nla_put_failure: return -1; } -#ifdef CONFIG_NF_CT_ACCT static int ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nlattr *nest_count; + const struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; nest_count = nla_nest_start(skb, type | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; - NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS, - htonl(ct->counters[dir].packets)); - NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES, - htonl(ct->counters[dir].bytes)); + NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, + cpu_to_be64(acct[dir].packets)); + NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, + cpu_to_be64(acct[dir].bytes)); nla_nest_end(skb, nest_count); @@ -230,9 +235,6 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, nla_put_failure: return -1; } -#else -#define ctnetlink_dump_counters(a, b, c) (0) -#endif #ifdef CONFIG_NF_CONNTRACK_MARK static inline int @@ -501,11 +503,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, goto nla_put_failure; #endif - if (events & IPCT_COUNTER_FILLING && - (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) - goto nla_put_failure; - if (events & IPCT_RELATED && ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; @@ -576,11 +573,15 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } -#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) - memset(&ct->counters, 0, sizeof(ct->counters)); -#endif + IPCTNL_MSG_CT_GET_CTRZERO) { + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (acct) + memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); + } } if (cb->args[1]) { cb->args[1] = 0; @@ -832,14 +833,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nlh->nlmsg_flags & NLM_F_DUMP) { -#ifndef CONFIG_NF_CT_ACCT - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) - return -ENOTSUPP; -#endif + if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); - } if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); @@ -1152,6 +1148,8 @@ ctnetlink_create_conntrack(struct nlattr *cda[], goto err; } + nf_ct_acct_ext_add(ct, GFP_KERNEL); + #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 41183a4d2d62..30aa5b94a771 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -482,11 +482,11 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, NLA_PUT_BE32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, - htonl(ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL])); + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]); NLA_PUT_BE32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY, - htonl(ct->proto.sctp.vtag[IP_CT_DIR_REPLY])); + ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); read_unlock_bh(&sctp_lock); @@ -530,9 +530,9 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) write_lock_bh(&sctp_lock); ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = - ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL])); + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = - ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])); + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); write_unlock_bh(&sctp_lock); return 0; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46ea542d0df9..869ef9349d0f 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -25,6 +25,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); @@ -38,19 +39,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(print_tuple); -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif - struct ct_iter_state { unsigned int bucket; }; @@ -146,7 +134,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -157,7 +145,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b8173af8c24a..9a35b57ab76d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -453,6 +453,14 @@ __build_packet_message(struct nfulnl_instance *inst, } } + if (indev && skb_mac_header_was_set(skb)) { + NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)); + NLA_PUT_BE16(inst->skb, NFULA_HWLEN, + htons(skb->dev->hard_header_len)); + NLA_PUT(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, + skb_mac_header(skb)); + } + if (skb->tstamp.tv64) { struct nfulnl_msg_packet_timestamp ts; struct timeval tv = ktime_to_timeval(skb->tstamp); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 217e2b686322..beb5094703cb 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -147,17 +147,21 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph) +static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, + unsigned int family) { - struct flowi fl = { - .fl4_dst = iph->saddr, - }; + struct flowi fl = {}; const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; + if (family == PF_INET) + fl.fl4_dst = ip_hdr(skb)->saddr; + else + fl.fl6_dst = ipv6_hdr(skb)->saddr; + rcu_read_lock(); - ai = nf_get_afinfo(AF_INET); + ai = nf_get_afinfo(family); if (ai != NULL) ai->route((struct dst_entry **)&rt, &fl); rcu_read_unlock(); @@ -178,7 +182,8 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, __be16 newlen; int ret; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu4(iph), + ret = tcpmss_mangle_packet(skb, targinfo, + tcpmss_reverse_mtu(skb, PF_INET), iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -193,28 +198,6 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u_int32_t tcpmss_reverse_mtu6(const struct ipv6hdr *iph) -{ - struct flowi fl = { - .fl6_dst = iph->saddr, - }; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - rcu_read_lock(); - ai = nf_get_afinfo(AF_INET6); - if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); - } - return mtu; -} - static unsigned int tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -229,7 +212,8 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) return NF_DROP; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu6(ipv6h), + ret = tcpmss_mangle_packet(skb, targinfo, + tcpmss_reverse_mtu(skb, PF_INET6), tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d7e8983cd37f..3e39c4fe1931 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -8,6 +8,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); @@ -27,12 +28,15 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in, u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; - const struct ip_conntrack_counter *counters; + const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = ct->counters; + + counters = nf_conn_acct_find(ct); + if (!counters) + return false; switch (sinfo->what) { case XT_CONNBYTES_PKTS: diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index ed76baab4734..9f328593287e 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -173,7 +173,7 @@ time_mt(const struct sk_buff *skb, const struct net_device *in, __net_timestamp((struct sk_buff *)skb); stamp = ktime_to_ns(skb->tstamp); - do_div(stamp, NSEC_PER_SEC); + stamp = div_s64(stamp, NSEC_PER_SEC); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0ddf69286f92..cb625b4d6da5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -356,44 +356,99 @@ static struct Qdisc noqueue_qdisc = { }; -static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - struct sk_buff_head *list = &qdisc->q; +static const u8 prio2band[TC_PRIO_MAX+1] = + { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; - if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) +/* 3-band FIFO queue: old style, but should be a bit faster than + generic prio+fifo combination. + */ + +#define PFIFO_FAST_BANDS 3 + +static inline struct sk_buff_head *prio2list(struct sk_buff *skb, + struct Qdisc *qdisc) +{ + struct sk_buff_head *list = qdisc_priv(qdisc); + return list + prio2band[skb->priority & TC_PRIO_MAX]; +} + +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +{ + struct sk_buff_head *list = prio2list(skb, qdisc); + + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { + qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); + } return qdisc_drop(skb, qdisc); } -static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) { - struct sk_buff_head *list = &qdisc->q; + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); - if (!skb_queue_empty(list)) - return __qdisc_dequeue_head(qdisc, list); + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + if (!skb_queue_empty(list + prio)) { + qdisc->q.qlen--; + return __qdisc_dequeue_head(qdisc, list + prio); + } + } return NULL; } -static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - return __qdisc_requeue(skb, qdisc, &qdisc->q); + qdisc->q.qlen++; + return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); } -static void fifo_fast_reset(struct Qdisc* qdisc) +static void pfifo_fast_reset(struct Qdisc* qdisc) { - __qdisc_reset_queue(qdisc, &qdisc->q); + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) + __qdisc_reset_queue(qdisc, list + prio); + qdisc->qstats.backlog = 0; + qdisc->q.qlen = 0; } -static struct Qdisc_ops fifo_fast_ops __read_mostly = { - .id = "fifo_fast", - .priv_size = 0, - .enqueue = fifo_fast_enqueue, - .dequeue = fifo_fast_dequeue, - .requeue = fifo_fast_requeue, - .reset = fifo_fast_reset, +static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) +{ + struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; + + memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + return skb->len; + +nla_put_failure: + return -1; +} + +static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) +{ + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) + skb_queue_head_init(list + prio); + + return 0; +} + +static struct Qdisc_ops pfifo_fast_ops __read_mostly = { + .id = "pfifo_fast", + .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), + .enqueue = pfifo_fast_enqueue, + .dequeue = pfifo_fast_dequeue, + .requeue = pfifo_fast_requeue, + .init = pfifo_fast_init, + .reset = pfifo_fast_reset, + .dump = pfifo_fast_dump, .owner = THIS_MODULE, }; @@ -522,7 +577,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (dev->tx_queue_len) { qdisc = qdisc_create_dflt(dev, dev_queue, - &fifo_fast_ops, TC_H_ROOT); + &pfifo_fast_ops, TC_H_ROOT); if (!qdisc) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; @@ -550,9 +605,9 @@ void dev_activate(struct net_device *dev) int need_watchdog; /* No queueing discipline is attached to device; - * create default one i.e. fifo_fast for devices, - * which need queueing and noqueue_qdisc for - * virtual interfaces. + create default one i.e. pfifo_fast for devices, + which need queueing and noqueue_qdisc for + virtual interfaces */ if (dev_all_qdisc_sleeping_noop(dev)) @@ -576,7 +631,6 @@ static void dev_deactivate_queue(struct net_device *dev, void *_qdisc_default) { struct Qdisc *qdisc_default = _qdisc_default; - struct sk_buff *skb = NULL; struct Qdisc *qdisc; qdisc = dev_queue->qdisc; @@ -588,8 +642,6 @@ static void dev_deactivate_queue(struct net_device *dev, spin_unlock_bh(qdisc_lock(qdisc)); } - - kfree_skb(skb); } static bool some_qdisc_is_running(struct net_device *dev, int lock)