From 00fe1ae91e0d69e52e8212d23cd3ecc74a7259a0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 16:24:46 +0200 Subject: [PATCH 01/62] netfilter: xt_rateest: fix xt_rateest_mt_checkentry() commit 4a5a5c73b7cfee (slightly better error reporting) added some useless code in xt_rateest_mt_checkentry(). Fix this so that different error codes can really be returned. Signed-off-by: Eric Dumazet CC: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_rateest.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 76a083184d8e..ed0db15ab00e 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; - int ret = false; + int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) @@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) if (!est1) goto err1; + est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(info->name2); if (!est2) goto err2; - } else - est2 = NULL; - + } info->est1 = est1; info->est2 = est2; @@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) err2: xt_rateest_put(est1); err1: - return -EINVAL; + return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) From 91c66c6893a3e2bb8a88a30cb76007d5d49d32c9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 29 Jul 2011 16:38:49 +0200 Subject: [PATCH 02/62] netfilter: ip_queue: Fix small leak in ipq_build_packet_message() ipq_build_packet_message() in net/ipv4/netfilter/ip_queue.c and net/ipv6/netfilter/ip6_queue.c contain a small potential mem leak as far as I can tell. We allocate memory for 'skb' with alloc_skb() annd then call nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); NLMSG_PUT is a macro NLMSG_PUT(skb, pid, seq, type, len) \ NLMSG_NEW(skb, pid, seq, type, len, 0) that expands to NLMSG_NEW, which is also a macro which expands to: NLMSG_NEW(skb, pid, seq, type, len, flags) \ ({ if (unlikely(skb_tailroom(skb) < (int)NLMSG_SPACE(len))) \ goto nlmsg_failure; \ __nlmsg_put(skb, pid, seq, type, len, flags); }) If we take the true branch of the 'if' statement and 'goto nlmsg_failure', then we'll, at that point, return from ipq_build_packet_message() without having assigned 'skb' to anything and we'll leak the memory we allocated for it when it goes out of scope. Fix this by placing a 'kfree(skb)' at 'nlmsg_failure'. I admit that I do not know how likely this to actually happen or even if there's something that guarantees that it will never happen - I'm not that familiar with this code, but if that is so, I've not been able to spot it. Signed-off-by: Jesper Juhl Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 1 + net/ipv6/netfilter/ip6_queue.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5c9b9d963918..48f7d5b4ff37 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 249394863284..87b243a25afa 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; From 9823d9ff483af4ce8804a9eb69600ca739cd1f58 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Fri, 29 Jul 2011 16:40:30 +0200 Subject: [PATCH 03/62] netfilter: ebtables: fix ebtables build dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The configuration of ebtables shouldn't depend on CONFIG_BRIDGE_NETFILTER, only on CONFIG_NETFILTER. Reported-by: Sébastien Laveze Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- net/bridge/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index ba6f73eb06c6..a9aff9c7d027 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,7 +4,7 @@ menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification From 1c1bdd324cd50ac55f7ebf95ef249d946c6e4361 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 26 Aug 2011 12:42:11 +0530 Subject: [PATCH 04/62] ath9k_hw: Fix init mode register regression The commit 172805ad46b78717a738ca5c7908c68f0326d3a9 overwirtes additional clock settings of AR9330 to all AR9300 chips. Cc: stable@kernel.org Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 1baca8e4715d..fcafec0605f4 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -671,7 +671,7 @@ static int ar9003_hw_process_ini(struct ath_hw *ah, REG_WRITE_ARRAY(&ah->iniModesAdditional, modesIndex, regWrites); - if (AR_SREV_9300(ah)) + if (AR_SREV_9330(ah)) REG_WRITE_ARRAY(&ah->iniModesAdditional, 1, regWrites); if (AR_SREV_9340(ah) && !ah->is_clk_25mhz) From 7c2510120e9b43b0caf32c3786a6ab831f7d9e87 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 26 Aug 2011 17:24:59 +0200 Subject: [PATCH 05/62] iwlegacy: fix BUG_ON(info->control.rates[0].idx < 0) When trying to connect to 5GHz we can provide negative index to mac80211 what trigger BUG_ON. Reason of iwl-3945-rs malfunction on 5GHz is unknown and needs further investigation. For now, to do not trigger a bug, correct value and just print WARNING. Address bug: https://bugzilla.redhat.com/show_bug.cgi?id=730653 Reported-and-tested-by: Jan Teichmann Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/iwlegacy/iwl-3945-rs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c index 977bd2477c6a..164bcae821f8 100644 --- a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c @@ -822,12 +822,15 @@ static void iwl3945_rs_get_rate(void *priv_r, struct ieee80211_sta *sta, out: - rs_sta->last_txrate_idx = index; - if (sband->band == IEEE80211_BAND_5GHZ) - info->control.rates[0].idx = rs_sta->last_txrate_idx - - IWL_FIRST_OFDM_RATE; - else + if (sband->band == IEEE80211_BAND_5GHZ) { + if (WARN_ON_ONCE(index < IWL_FIRST_OFDM_RATE)) + index = IWL_FIRST_OFDM_RATE; + rs_sta->last_txrate_idx = index; + info->control.rates[0].idx = index - IWL_FIRST_OFDM_RATE; + } else { + rs_sta->last_txrate_idx = index; info->control.rates[0].idx = rs_sta->last_txrate_idx; + } IWL_DEBUG_RATE(priv, "leave: %d\n", index); } From c6675233f9015d3c0460c8aab53ed9b99d915c64 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Aug 2011 15:01:20 +0200 Subject: [PATCH 06/62] netfilter: nf_queue: reject NF_STOLEN verdicts from userspace A userspace listener may send (bogus) NF_STOLEN verdict, which causes skb leak. This problem was previously fixed via 64507fdbc29c3a622180378210ecea8659b14e40 (netfilter: nf_queue: fix NF_STOLEN skb leak) but this had to be reverted because NF_STOLEN can also be returned by a netfilter hook when iterating the rules in nf_reinject. Reject userspace NF_STOLEN verdict, as suggested by Michal Miroslaw. This is complementary to commit fad54440438a7c231a6ae347738423cbabc936d9 (netfilter: avoid double free in nf_reinject). Cc: Julian Anastasov Cc: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 11 ++++------- net/ipv6/netfilter/ip6_queue.c | 11 ++++------- net/netfilter/nfnetlink_queue.c | 4 ++-- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 48f7d5b4ff37..e59aabd0eae4 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -314,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -359,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 87b243a25afa..e63c3972a739 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -314,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -359,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 00bd475eab4b..a80b0cb03f17 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -646,8 +646,8 @@ verdicthdr_get(const struct nlattr * const nfqa[]) return NULL; vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); - verdict = ntohl(vhdr->verdict); - if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; + if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) return NULL; return vhdr; } From 4c6e4209662b2a4147cde16c2144a253a7430a49 Mon Sep 17 00:00:00 2001 From: Sanket Shah Date: Tue, 30 Aug 2011 15:23:03 +0200 Subject: [PATCH 07/62] netfilter: nf_ct_pptp: fix DNATed PPTP connection address translation When both the server and the client are NATed, the set-link-info control packet containing the peer's call-id field is not properly translated. I have verified that it was working in 2.6.16.13 kernel previously but due to rewrite, this scenario stopped working (Not knowing exact version when it stopped working). Signed-off-by: Sanket Shah Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_pptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 2fd4565144de..31d56b23b9e9 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -364,6 +364,7 @@ pptp_inbound_pkt(struct sk_buff *skb, break; case PPTP_WAN_ERROR_NOTIFY: + case PPTP_SET_LINK_INFO: case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ From 4a5cc84ae7e19fb7a72a30332ba67af43e0ad1ad Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Aug 2011 15:45:10 +0200 Subject: [PATCH 08/62] netfilter: nf_ct_tcp: fix incorrect handling of invalid TCP option Michael M. Builov reported that in the tcp_options and tcp_sack functions of netfilter TCP conntrack the incorrect handling of invalid TCP option with too big opsize may lead to read access beyond tcp-packet or buffer allocated on stack (netfilter bugzilla #738). The fix is to stop parsing the options at detecting the broken option. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 37bf94394be0..afc4ab7cfe01 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -409,7 +409,7 @@ static void tcp_options(const struct sk_buff *skb, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK_PERM && opsize == TCPOLEN_SACK_PERM) @@ -469,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK && opsize >= (TCPOLEN_SACK_BASE From bb9fc37358ffa9de1cc2b2b6f1a559b926ef50d9 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Aug 2011 15:46:13 +0200 Subject: [PATCH 09/62] netfilter: nf_ct_tcp: wrong multiplication of TCPOLEN_TSTAMP_ALIGNED in tcp_sack skips fastpath The wrong multiplication of TCPOLEN_TSTAMP_ALIGNED by 4 skips the fast path for the timestamp-only option. Bug reported by Michael M. Builov (netfilter bugzilla #738). Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index afc4ab7cfe01..8235b86b4e87 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -447,7 +447,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, BUG_ON(ptr == NULL); /* Fast path for timestamp-only option */ - if (length == TCPOLEN_TSTAMP_ALIGNED*4 + if (length == TCPOLEN_TSTAMP_ALIGNED && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) From 768b1031dc9f3eabe3e9f603e33617a0215c12a8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 30 Aug 2011 16:33:04 +0200 Subject: [PATCH 10/62] netfilter: update netfilter git URL Netfilter git trees are moving to a directory shared by Pablo and myself, update git URLs. Signed-off-by: Patrick McHardy --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1d2e79db0f58..a6669b2a77e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4386,7 +4386,8 @@ L: netfilter@vger.kernel.org L: coreteam@netfilter.org W: http://www.netfilter.org/ W: http://www.iptables.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next-2.6.git S: Supported F: include/linux/netfilter* F: include/linux/netfilter/ From 0e4660cbe51276e86dbdab17228733dbcdb49249 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 29 Aug 2011 10:06:14 +0200 Subject: [PATCH 11/62] ath9k_hw: fix calibration on 5 ghz ADC calibrations cannot run on 5 GHz with fast clock enabled. They need to be disabled, otherwise they'll hang and IQ mismatch calibration will not be run either. Signed-off-by: Felix Fietkau Reported-by: Adrian Chadd Cc: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9002_calib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c index 2d4c0910295b..2d394af82171 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c @@ -41,7 +41,8 @@ static bool ar9002_hw_is_cal_supported(struct ath_hw *ah, case ADC_DC_CAL: /* Run ADC Gain Cal for non-CCK & non 2GHz-HT20 only */ if (!IS_CHAN_B(chan) && - !(IS_CHAN_2GHZ(chan) && IS_CHAN_HT20(chan))) + !((IS_CHAN_2GHZ(chan) || IS_CHAN_A_FAST_CLOCK(ah, chan)) && + IS_CHAN_HT20(chan))) supported = true; break; } From ec0506dbe4e240ecd4c32bf74c84a88ce1ddb414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 28 Aug 2011 12:35:31 +0000 Subject: [PATCH 12/62] net: relax PKTINFO non local ipv6 udp xmit check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow transparent sockets to be less restrictive about the source ip of ipv6 udp packets being sent. Google-Bug-Id: 5018138 Signed-off-by: Maciej Å»enczykowski CC: "Erik Kline" CC: "Lorenzo Colitti" Signed-off-by: David S. Miller --- include/net/transp_v6.h | 1 + net/ipv6/datagram.c | 5 +++-- net/ipv6/ip6_flowlabel.c | 8 ++++---- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 5271a741c3a3..498433dd067d 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -39,6 +39,7 @@ extern int datagram_recv_ctl(struct sock *sk, struct sk_buff *skb); extern int datagram_send_ctl(struct net *net, + struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9ef1831746ef..b46e9f88ce37 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -599,7 +599,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct net *net, +int datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, int *hlimit, int *tclass, int *dontfrag) @@ -658,7 +658,8 @@ int datagram_send_ctl(struct net *net, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!ipv6_chk_addr(net, &src_info->ipi6_addr, + if (!inet_sk(sk)->transparent && + !ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f3caf1b8d572..543039450193 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -322,8 +322,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo } static struct ip6_flowlabel * -fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, - int optlen, int *err_p) +fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, + char __user *optval, int optlen, int *err_p) { struct ip6_flowlabel *fl = NULL; int olen; @@ -360,7 +360,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, + err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk, &junk, &junk); if (err) goto done; @@ -528,7 +528,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; - fl = fl_create(net, &freq, optval, optlen, &err); + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (fl == NULL) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 147ede38ab48..2fbda5fc4cc4 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -475,7 +475,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, + retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6a79f3081bdb..343852e5c703 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -817,8 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29213b51c499..bb95e8e1c6f9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1090,8 +1090,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; From 29c486df6a208432b370bd4be99ae1369ede28d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2011 18:57:00 -0400 Subject: [PATCH 13/62] net: ipv4: relax AF_INET check in bind() commit d0733d2e29b65 (Check for mistakenly passed in non-IPv4 address) added regression on legacy apps that use bind() with AF_UNSPEC family. Relax the check, but make sure the bind() is done on INADDR_ANY addresses, as AF_UNSPEC has probably no sane meaning for other addresses. Bugzilla reference : https://bugzilla.kernel.org/show_bug.cgi?id=42012 Signed-off-by: Eric Dumazet Reported-and-bisected-by: Rene Meier CC: Marcus Meissner Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1b745d412cf6..dd2b9478ddd1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -466,8 +466,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; if (addr->sin_family != AF_INET) { + /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) + * only if s_addr is INADDR_ANY. + */ err = -EAFNOSUPPORT; - goto out; + if (addr->sin_family != AF_UNSPEC || + addr->sin_addr.s_addr != htonl(INADDR_ANY)) + goto out; } chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); From bfbed02ff54b3f97c8a00ef1657049448f1f172a Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 29 Aug 2011 23:34:47 +0000 Subject: [PATCH 14/62] MAINTAINERS: Update ATLX driver maintainers jie.yang@atheros.com bounces and I get a 550 "Unknown address error". Perhaps they have moved on? Signed-off-by: Ian Campbell Cc: Jay Cliburn Cc: Chris Snook Cc: Jie Yang Cc: Andrew Morton Cc: Joe Perches Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 261232d8adf9..2829f69484ef 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1278,7 +1278,6 @@ F: drivers/input/misc/ati_remote2.c ATLX ETHERNET DRIVERS M: Jay Cliburn M: Chris Snook -M: Jie Yang L: netdev@vger.kernel.org W: http://sourceforge.net/projects/atl1 W: http://atl1.sourceforge.net From 40a9f52e584936267228740bf7c16c5343166d11 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 29 Aug 2011 23:41:09 +0000 Subject: [PATCH 15/62] MAINTAINERS: Update Cisco VIC driver maintainers vkolluri@cisco.com bounces and I get "Unknown address error 550". Signed-off-by: Ian Campbell Cc: Christian Benvenuti Cc: Roopa Prabhu Cc: David Wang Cc: Andrew Morton Cc: Joe Perches Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2829f69484ef..735bdf90d15c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1757,7 +1757,6 @@ F: Documentation/zh_CN/ CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti -M: Vasanthy Kolluri M: Roopa Prabhu M: David Wang S: Supported From 31a0479546a7f91ec959560be01a0b1243a2b20c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 29 Aug 2011 23:42:58 +0000 Subject: [PATCH 16/62] MAINTAINERS: Update BNA 10G Maintainer ddutt@brocade.com bounces with 550 "RESOLVER.ADR.RecipNotFound" Signed-off-by: Ian Campbell Cc: Rasesh Mody Cc: Andrew Morton Cc: Joe Perches Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 735bdf90d15c..959ff41a14c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1573,7 +1573,6 @@ F: drivers/scsi/bfa/ BROCADE BNA 10 GIGABIT ETHERNET DRIVER M: Rasesh Mody -M: Debashis Dutt L: netdev@vger.kernel.org S: Supported F: drivers/net/bna/ From e2faeec2de9e2c73958e6ea6065dde1e8cd6f3a2 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Tue, 30 Aug 2011 20:58:56 -0400 Subject: [PATCH 17/62] e1000: Fix driver to be used on PA RISC C8000 workstations The checksum field in the EEPROM on HPPA is really not a checksum but a signature (0x16d6). So allow 0x16d6 as the matching checksum on HPPA systems. This issue is present on longterm/stable kernels, I have verified that this patch is applicable back to at least 2.6.32.y kernels. v2- changed ifdef to use CONFIG_PARISC instead of __hppa__ CC: Guy Martin CC: Rolf Eike Beer CC: Matt Turner Reported-by: Mikulas Patocka Signed-off-by: Jeff Kirsher Acked-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_hw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index 8545c7aa93eb..a5a89ecb6f36 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -4026,6 +4026,12 @@ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw) checksum += eeprom_data; } +#ifdef CONFIG_PARISC + /* This is a signature and not a checksum on HP c8000 */ + if ((hw->subsystem_vendor_id == 0x103C) && (eeprom_data == 0x16d6)) + return E1000_SUCCESS; + +#endif if (checksum == (u16) EEPROM_SUM) return E1000_SUCCESS; else { From 3401dc6eba788ebc7c14ce51018d775b1c263399 Mon Sep 17 00:00:00 2001 From: George Date: Sat, 3 Sep 2011 10:58:47 -0500 Subject: [PATCH 18/62] rtlwifi: rtl8192su: Fix problem connecting to HT-enabled AP The driver fails to connect to 802.11n-enabled APs. The patch fixes Bug #42262. Signed-off-by: George Signed-off-by: Larry Finger Cc: Stable [2.6.39+] Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192cu/trx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c index 906e7aa55bc3..3e52a5496224 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c @@ -549,15 +549,16 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw, (tcb_desc->rts_use_shortpreamble ? 1 : 0) : (tcb_desc->rts_use_shortgi ? 1 : 0))); if (mac->bw_40) { - if (tcb_desc->packet_bw) { + if (rate_flag & IEEE80211_TX_RC_DUP_DATA) { SET_TX_DESC_DATA_BW(txdesc, 1); SET_TX_DESC_DATA_SC(txdesc, 3); + } else if(rate_flag & IEEE80211_TX_RC_40_MHZ_WIDTH){ + SET_TX_DESC_DATA_BW(txdesc, 1); + SET_TX_DESC_DATA_SC(txdesc, mac->cur_40_prime_sc); } else { SET_TX_DESC_DATA_BW(txdesc, 0); - if (rate_flag & IEEE80211_TX_RC_DUP_DATA) - SET_TX_DESC_DATA_SC(txdesc, - mac->cur_40_prime_sc); - } + SET_TX_DESC_DATA_SC(txdesc, 0); + } } else { SET_TX_DESC_DATA_BW(txdesc, 0); SET_TX_DESC_DATA_SC(txdesc, 0); From bac2555c6d86387132930af4d14cb47c4dd3f4f7 Mon Sep 17 00:00:00 2001 From: George Date: Sat, 3 Sep 2011 10:58:48 -0500 Subject: [PATCH 19/62] rtlwifi: Fix problem when switching connections The driver fails to clear encryption keys making it impossible to switch connections. Signed-off-by: George Signed-off-by: Larry Finger Cc: Stable [2.6.39+] Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index 1bdc1aa305c0..04c4e9eb6ee6 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -610,6 +610,11 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, mac->link_state = MAC80211_NOLINK; memset(mac->bssid, 0, 6); + + /* reset sec info */ + rtl_cam_reset_sec_info(hw); + + rtl_cam_reset_all_entry(hw); mac->vendor = PEER_UNKNOWN; RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG, @@ -1063,6 +1068,9 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, *or clear all entry here. */ rtl_cam_delete_one_entry(hw, mac_addr, key_idx); + + rtl_cam_reset_sec_info(hw); + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, From 4bae7d976976fa52d345805ba686934cd548343e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Sep 2011 12:47:39 +0200 Subject: [PATCH 20/62] mac80211: fix missing sta_lock in __sta_info_destroy Since my commit 34e895075e21be3e21e71d6317440d1ee7969ad0 ("mac80211: allow station add/remove to sleep") there is a race in mac80211 when it clears the TIM bit because a sleeping station disconnected, the spinlock isn't held around the relevant code any more. Use the right API to acquire the spinlock correctly. Cc: stable@kernel.org [2.6.34+] Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3db78b696c5c..21070e9bc8d0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -665,7 +665,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) BUG_ON(!sdata->bss); atomic_dec(&sdata->bss->num_sta_ps); - __sta_info_clear_tim_bit(sdata->bss, sta); + sta_info_clear_tim_bit(sta); } local->num_sta--; From 6a6b3f3e13decfc4b97263a83ea4e80ac8cc89ae Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Fri, 9 Sep 2011 10:41:08 +0530 Subject: [PATCH 21/62] ath9k: Fix kernel panic on unplugging the device when the device is yanked out ath_pci_remove starts doing the cleanups, unregistering the hardware etc. so we should bail out immediately when we get drv_flush callback from mac80211 when the card is being unplugged. the panic occurs after we had associated to an AP. EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at ath_reset+0xa0/0x1c0 [ath9k] EAX: 00000000 EBX: 000697c0 ECX: 00000002 EDX: f3c3ccf0 ESI: 00000000 EDI: 00000000 EBP: f43e7b78 ESP: f43e7b50 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process kworker/u:2 (pid: 182, ti=f43e6000 task=f3c3c7c0 task.ti=f43e6000) Stack: 0000002a 00000000 00000000 003e7b78 0000000f eaaa8500 ffffffea eaaa97c0 eaaaa000 00000001 f43e7ba8 fb315d23 f99e7721 ecece680 eaaac738 eaaa8500 eaaaa020 000000c8 000000c8 00000000 eaaa8d58 eaaa8500 f43e7bd0 fb080b29 Call Trace: [] ath9k_flush+0x103/0x170 [ath9k] [] __ieee80211_recalc_idle+0x2c9/0x400 [mac80211] [] ieee80211_recalc_idle+0x2e/0x60 [mac80211] [] ieee80211_mgd_deauth+0x173/0x210 [mac80211] [] ieee80211_deauth+0x19/0x20 [mac80211] [] __cfg80211_mlme_deauth+0xf3/0x140 [cfg80211] [] ? __mutex_lock_common+0x1f0/0x380 [] __cfg80211_disconnect+0x18d/0x1f0 [cfg80211] [] cfg80211_netdev_notifier_call+0x159/0x5c0 [cfg80211] [] ? packet_notifier+0x174/0x1f0 [] notifier_call_chain+0x82/0xb0 [] raw_notifier_call_chain+0x1f/0x30 [] call_netdevice_notifiers+0x2c/0x60 [] ? trace_hardirqs_on_caller+0xf4/0x180 [] __dev_close_many+0x4c/0xd0 [] dev_close_many+0x6d/0xc0 [] rollback_registered_many+0x93/0x1c0 [] ? trace_hardirqs_on+0xb/0x10 [] unregister_netdevice_many+0x15/0x50 [] ieee80211_remove_interfaces+0x7b/0xb0 [mac80211] [] ieee80211_unregister_hw+0x4b/0x110 [mac80211] [] ath9k_deinit_device+0x3a/0x60 [ath9k] [] ath_pci_remove+0x46/0x90 [ath9k] [] pci_device_remove+0x44/0x100 [] __device_release_driver+0x64/0xb0 [] device_release_driver+0x27/0x40 [] bus_remove_device+0x7b/0xa0 [] device_del+0xf1/0x180 [] device_unregister+0x10/0x20 [] pci_stop_bus_device+0x6e/0x80 [] pci_remove_bus_device+0x12/0xa0 [] pciehp_unconfigure_device+0x89/0x180 [] ? mark_held_locks+0x64/0x100 [] ? __mutex_unlock_slowpath+0xaf/0x140 [] pciehp_disable_slot+0x64/0x1b0 [] pciehp_power_thread+0xd0/0x100 [] ? process_one_work+0x100/0x4d0 [] process_one_work+0x17c/0x4d0 [] ? process_one_work+0x100/0x4d0 [] ? queue_interrupt_event+0xa0/0xa0 [] worker_thread+0x13b/0x320 [] ? trace_hardirqs_on+0xb/0x10 [] ? manage_workers+0x1e0/0x1e0 [] kthread+0x84/0x90 [] ? __init_kthread_worker+0x60/0x60 [] kernel_thread_helper+0x6/0x10 Cc: Rajkumar Manoharan Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 6530694a59ae..722967b86cf1 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2303,6 +2303,12 @@ static void ath9k_flush(struct ieee80211_hw *hw, bool drop) mutex_lock(&sc->mutex); cancel_delayed_work_sync(&sc->tx_complete_work); + if (ah->ah_flags & AH_UNPLUGGED) { + ath_dbg(common, ATH_DBG_ANY, "Device has been unplugged!\n"); + mutex_unlock(&sc->mutex); + return; + } + if (sc->sc_flags & SC_OP_INVALID) { ath_dbg(common, ATH_DBG_ANY, "Device not present\n"); mutex_unlock(&sc->mutex); From 456fc37e4519f3f551830ce01c58ddaa35807204 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 12 Sep 2011 21:08:25 +0200 Subject: [PATCH 22/62] iwlagn: fix stack corruption Alexander reported a strange crash in iwlagn that Meenakshi and Wey couldn't reproduce. I just ran into the same issue and tracked it down to stack corruption. This fixes it. The problem was introduced in commit 4b8b99b6e650d0527f3a123744b7459976581d14 Author: Wey-Yi Guy Date: Fri Jul 8 14:29:48 2011 -0700 iwlagn: radio sensor offset in le16 format Cc: Wey-Yi Guy Cc: Meenakshi Venkataraman Reported-by: Alexander Diewald Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn-ucode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c index a895a099d086..56211006a182 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c @@ -167,7 +167,7 @@ static int iwlagn_set_temperature_offset_calib(struct iwl_priv *priv) memset(&cmd, 0, sizeof(cmd)); iwl_set_calib_hdr(&cmd.hdr, IWL_PHY_CALIBRATE_TEMP_OFFSET_CMD); - memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(offset_calib)); + memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(*offset_calib)); if (!(cmd.radio_sensor_offset)) cmd.radio_sensor_offset = DEFAULT_RADIO_SENSOR_OFFSET; From 282cdb325aea4ebbc42ce753b47cc96145eb54bc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 12 Sep 2011 12:09:10 -0700 Subject: [PATCH 23/62] iwlagn: fix command queue timeout If the command queue is constantly busy, which can happen in P2P, the hangcheck timer will frequently find a command in it and will eventually reset the device because nothing sets the timestamp for this queue when commands are processed. Fix this by setting the timestamp when a command completes. Cc: stable@kernel.org #2.6.39, #3.0.0 #3.1.0 Signed-off-by: Johannes Berg SIgned-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c index a6b2b1db0b1d..222d410c586e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c +++ b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c @@ -771,6 +771,8 @@ void iwl_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb) cmd = txq->cmd[cmd_index]; meta = &txq->meta[cmd_index]; + txq->time_stamp = jiffies; + iwlagn_unmap_tfd(priv, meta, &txq->tfds[index], DMA_BIDIRECTIONAL); /* Input error checking is done when commands are added to queue. */ From dfacf1387ceb6d7d6df614b18016fd1f347a1996 Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Tue, 30 Aug 2011 00:08:39 +0000 Subject: [PATCH 24/62] bnx2x: fix BRB thresholds for dropless_fc mode Fix the thresholds according to 5778x HW and increase rx_ring size to suit new thresholds in dropless_fc mode. Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x.h | 84 ++++++++++++++++++++++++++++++---- drivers/net/bnx2x/bnx2x_cmn.c | 10 ++-- drivers/net/bnx2x/bnx2x_main.c | 33 +++++++++---- 3 files changed, 102 insertions(+), 25 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index c423504a755f..85297326506c 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -315,6 +315,14 @@ union db_prod { u32 raw; }; +/* dropless fc FW/HW related params */ +#define BRB_SIZE(bp) (CHIP_IS_E3(bp) ? 1024 : 512) +#define MAX_AGG_QS(bp) (CHIP_IS_E1(bp) ? \ + ETH_MAX_AGGREGATION_QUEUES_E1 :\ + ETH_MAX_AGGREGATION_QUEUES_E1H_E2) +#define FW_DROP_LEVEL(bp) (3 + MAX_SPQ_PENDING + MAX_AGG_QS(bp)) +#define FW_PREFETCH_CNT 16 +#define DROPLESS_FC_HEADROOM 100 /* MC hsi */ #define BCM_PAGE_SHIFT 12 @@ -331,15 +339,35 @@ union db_prod { /* SGE ring related macros */ #define NUM_RX_SGE_PAGES 2 #define RX_SGE_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge)) -#define MAX_RX_SGE_CNT (RX_SGE_CNT - 2) +#define NEXT_PAGE_SGE_DESC_CNT 2 +#define MAX_RX_SGE_CNT (RX_SGE_CNT - NEXT_PAGE_SGE_DESC_CNT) /* RX_SGE_CNT is promised to be a power of 2 */ #define RX_SGE_MASK (RX_SGE_CNT - 1) #define NUM_RX_SGE (RX_SGE_CNT * NUM_RX_SGE_PAGES) #define MAX_RX_SGE (NUM_RX_SGE - 1) #define NEXT_SGE_IDX(x) ((((x) & RX_SGE_MASK) == \ - (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1) + (MAX_RX_SGE_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_SGE_DESC_CNT : \ + (x) + 1) #define RX_SGE(x) ((x) & MAX_RX_SGE) +/* + * Number of required SGEs is the sum of two: + * 1. Number of possible opened aggregations (next packet for + * these aggregations will probably consume SGE immidiatelly) + * 2. Rest of BRB blocks divided by 2 (block will consume new SGE only + * after placement on BD for new TPA aggregation) + * + * Takes into account NEXT_PAGE_SGE_DESC_CNT "next" elements on each page + */ +#define NUM_SGE_REQ (MAX_AGG_QS(bp) + \ + (BRB_SIZE(bp) - MAX_AGG_QS(bp)) / 2) +#define NUM_SGE_PG_REQ ((NUM_SGE_REQ + MAX_RX_SGE_CNT - 1) / \ + MAX_RX_SGE_CNT) +#define SGE_TH_LO(bp) (NUM_SGE_REQ + \ + NUM_SGE_PG_REQ * NEXT_PAGE_SGE_DESC_CNT) +#define SGE_TH_HI(bp) (SGE_TH_LO(bp) + DROPLESS_FC_HEADROOM) + /* Manipulate a bit vector defined as an array of u64 */ /* Number of bits in one sge_mask array element */ @@ -551,24 +579,43 @@ struct bnx2x_fastpath { #define NUM_TX_RINGS 16 #define TX_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_tx_bd_types)) -#define MAX_TX_DESC_CNT (TX_DESC_CNT - 1) +#define NEXT_PAGE_TX_DESC_CNT 1 +#define MAX_TX_DESC_CNT (TX_DESC_CNT - NEXT_PAGE_TX_DESC_CNT) #define NUM_TX_BD (TX_DESC_CNT * NUM_TX_RINGS) #define MAX_TX_BD (NUM_TX_BD - 1) #define MAX_TX_AVAIL (MAX_TX_DESC_CNT * NUM_TX_RINGS - 2) #define NEXT_TX_IDX(x) ((((x) & MAX_TX_DESC_CNT) == \ - (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1) + (MAX_TX_DESC_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_TX_DESC_CNT : \ + (x) + 1) #define TX_BD(x) ((x) & MAX_TX_BD) #define TX_BD_POFF(x) ((x) & MAX_TX_DESC_CNT) /* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */ #define NUM_RX_RINGS 8 #define RX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd)) -#define MAX_RX_DESC_CNT (RX_DESC_CNT - 2) +#define NEXT_PAGE_RX_DESC_CNT 2 +#define MAX_RX_DESC_CNT (RX_DESC_CNT - NEXT_PAGE_RX_DESC_CNT) #define RX_DESC_MASK (RX_DESC_CNT - 1) #define NUM_RX_BD (RX_DESC_CNT * NUM_RX_RINGS) #define MAX_RX_BD (NUM_RX_BD - 1) #define MAX_RX_AVAIL (MAX_RX_DESC_CNT * NUM_RX_RINGS - 2) -#define MIN_RX_AVAIL 128 + +/* dropless fc calculations for BDs + * + * Number of BDs should as number of buffers in BRB: + * Low threshold takes into account NEXT_PAGE_RX_DESC_CNT + * "next" elements on each page + */ +#define NUM_BD_REQ BRB_SIZE(bp) +#define NUM_BD_PG_REQ ((NUM_BD_REQ + MAX_RX_DESC_CNT - 1) / \ + MAX_RX_DESC_CNT) +#define BD_TH_LO(bp) (NUM_BD_REQ + \ + NUM_BD_PG_REQ * NEXT_PAGE_RX_DESC_CNT + \ + FW_DROP_LEVEL(bp)) +#define BD_TH_HI(bp) (BD_TH_LO(bp) + DROPLESS_FC_HEADROOM) + +#define MIN_RX_AVAIL ((bp)->dropless_fc ? BD_TH_HI(bp) + 128 : 128) #define MIN_RX_SIZE_TPA_HW (CHIP_IS_E1(bp) ? \ ETH_MIN_RX_CQES_WITH_TPA_E1 : \ @@ -579,7 +626,9 @@ struct bnx2x_fastpath { MIN_RX_AVAIL)) #define NEXT_RX_IDX(x) ((((x) & RX_DESC_MASK) == \ - (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1) + (MAX_RX_DESC_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_RX_DESC_CNT : \ + (x) + 1) #define RX_BD(x) ((x) & MAX_RX_BD) /* @@ -589,14 +638,31 @@ struct bnx2x_fastpath { #define CQE_BD_REL (sizeof(union eth_rx_cqe) / sizeof(struct eth_rx_bd)) #define NUM_RCQ_RINGS (NUM_RX_RINGS * CQE_BD_REL) #define RCQ_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe)) -#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - 1) +#define NEXT_PAGE_RCQ_DESC_CNT 1 +#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - NEXT_PAGE_RCQ_DESC_CNT) #define NUM_RCQ_BD (RCQ_DESC_CNT * NUM_RCQ_RINGS) #define MAX_RCQ_BD (NUM_RCQ_BD - 1) #define MAX_RCQ_AVAIL (MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2) #define NEXT_RCQ_IDX(x) ((((x) & MAX_RCQ_DESC_CNT) == \ - (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1) + (MAX_RCQ_DESC_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_RCQ_DESC_CNT : \ + (x) + 1) #define RCQ_BD(x) ((x) & MAX_RCQ_BD) +/* dropless fc calculations for RCQs + * + * Number of RCQs should be as number of buffers in BRB: + * Low threshold takes into account NEXT_PAGE_RCQ_DESC_CNT + * "next" elements on each page + */ +#define NUM_RCQ_REQ BRB_SIZE(bp) +#define NUM_RCQ_PG_REQ ((NUM_BD_REQ + MAX_RCQ_DESC_CNT - 1) / \ + MAX_RCQ_DESC_CNT) +#define RCQ_TH_LO(bp) (NUM_RCQ_REQ + \ + NUM_RCQ_PG_REQ * NEXT_PAGE_RCQ_DESC_CNT + \ + FW_DROP_LEVEL(bp)) +#define RCQ_TH_HI(bp) (RCQ_TH_LO(bp) + DROPLESS_FC_HEADROOM) + /* This is needed for determining of last_max */ #define SUB_S16(a, b) (s16)((s16)(a) - (s16)(b)) diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c index 37e5790681ad..2a33d2433c31 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.c +++ b/drivers/net/bnx2x/bnx2x_cmn.c @@ -987,8 +987,6 @@ void __bnx2x_link_report(struct bnx2x *bp) void bnx2x_init_rx_rings(struct bnx2x *bp) { int func = BP_FUNC(bp); - int max_agg_queues = CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 : - ETH_MAX_AGGREGATION_QUEUES_E1H_E2; u16 ring_prod; int i, j; @@ -1001,7 +999,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) if (!fp->disable_tpa) { /* Fill the per-aggregtion pool */ - for (i = 0; i < max_agg_queues; i++) { + for (i = 0; i < MAX_AGG_QS(bp); i++) { struct bnx2x_agg_info *tpa_info = &fp->tpa_info[i]; struct sw_rx_bd *first_buf = @@ -1041,7 +1039,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) bnx2x_free_rx_sge_range(bp, fp, ring_prod); bnx2x_free_tpa_pool(bp, fp, - max_agg_queues); + MAX_AGG_QS(bp)); fp->disable_tpa = 1; ring_prod = 0; break; @@ -1137,9 +1135,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp) bnx2x_free_rx_bds(fp); if (!fp->disable_tpa) - bnx2x_free_tpa_pool(bp, fp, CHIP_IS_E1(bp) ? - ETH_MAX_AGGREGATION_QUEUES_E1 : - ETH_MAX_AGGREGATION_QUEUES_E1H_E2); + bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp)); } } diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index f74582a22c68..3f93e8666104 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -2756,8 +2756,14 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, u16 tpa_agg_size = 0; if (!fp->disable_tpa) { - pause->sge_th_hi = 250; - pause->sge_th_lo = 150; + pause->sge_th_lo = SGE_TH_LO(bp); + pause->sge_th_hi = SGE_TH_HI(bp); + + /* validate SGE ring has enough to cross high threshold */ + WARN_ON(bp->dropless_fc && + pause->sge_th_hi + FW_PREFETCH_CNT > + MAX_RX_SGE_CNT * NUM_RX_SGE_PAGES); + tpa_agg_size = min_t(u32, (min_t(u32, 8, MAX_SKB_FRAGS) * SGE_PAGE_SIZE * PAGES_PER_SGE), 0xffff); @@ -2771,10 +2777,21 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, /* pause - not for e1 */ if (!CHIP_IS_E1(bp)) { - pause->bd_th_hi = 350; - pause->bd_th_lo = 250; - pause->rcq_th_hi = 350; - pause->rcq_th_lo = 250; + pause->bd_th_lo = BD_TH_LO(bp); + pause->bd_th_hi = BD_TH_HI(bp); + + pause->rcq_th_lo = RCQ_TH_LO(bp); + pause->rcq_th_hi = RCQ_TH_HI(bp); + /* + * validate that rings have enough entries to cross + * high thresholds + */ + WARN_ON(bp->dropless_fc && + pause->bd_th_hi + FW_PREFETCH_CNT > + bp->rx_ring_size); + WARN_ON(bp->dropless_fc && + pause->rcq_th_hi + FW_PREFETCH_CNT > + NUM_RCQ_RINGS * MAX_RCQ_DESC_CNT); pause->pri_map = 1; } @@ -2802,9 +2819,7 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, * For PF Clients it should be the maximum avaliable number. * VF driver(s) may want to define it to a smaller value. */ - rxq_init->max_tpa_queues = - (CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 : - ETH_MAX_AGGREGATION_QUEUES_E1H_E2); + rxq_init->max_tpa_queues = MAX_AGG_QS(bp); rxq_init->cache_line_log = BNX2X_RX_ALIGN_SHIFT; rxq_init->fw_sb_id = fp->fw_sb_id; From 5f837363457a2280530373267f86092625d15a4d Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Tue, 30 Aug 2011 00:08:40 +0000 Subject: [PATCH 25/62] bnx2x: decrease print level to debug It may happen every link toggle. Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_stats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c index 771f6803b238..f5d9b4213cad 100644 --- a/drivers/net/bnx2x/bnx2x_stats.c +++ b/drivers/net/bnx2x/bnx2x_stats.c @@ -710,7 +710,8 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp) break; case MAC_TYPE_NONE: /* unreached */ - BNX2X_ERR("stats updated by DMAE but no MAC active\n"); + DP(BNX2X_MSG_STATS, + "stats updated by DMAE but no MAC active\n"); return -1; default: /* unreached */ From c2188952fc7d2ca54bb8aca1bc502618a7488baf Mon Sep 17 00:00:00 2001 From: Vladislav Zolotarov Date: Tue, 30 Aug 2011 00:08:41 +0000 Subject: [PATCH 26/62] bnx2x: fix rx ring size report Store the size in bp, read from bp when queried. Signed-off-by: Dmitry Kravkov Signed-off-by: Vladislav Zolotarov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_cmn.c | 17 +++++++++++------ drivers/net/bnx2x/bnx2x_ethtool.c | 5 +---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c index 2a33d2433c31..c4cbf9736414 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.c +++ b/drivers/net/bnx2x/bnx2x_cmn.c @@ -3091,15 +3091,20 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) struct bnx2x_fastpath *fp = &bp->fp[index]; int ring_size = 0; u8 cos; + int rx_ring_size = 0; /* if rx_ring_size specified - use it */ - int rx_ring_size = bp->rx_ring_size ? bp->rx_ring_size : - MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp); + if (!bp->rx_ring_size) { - /* allocate at least number of buffers required by FW */ - rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA : - MIN_RX_SIZE_TPA, - rx_ring_size); + rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp); + + /* allocate at least number of buffers required by FW */ + rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA : + MIN_RX_SIZE_TPA, rx_ring_size); + + bp->rx_ring_size = rx_ring_size; + } else + rx_ring_size = bp->rx_ring_size; /* Common */ sb = &bnx2x_fp(bp, index, status_blk); diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c index 221863059dae..0ceb6c7b1238 100644 --- a/drivers/net/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/bnx2x/bnx2x_ethtool.c @@ -1310,10 +1310,7 @@ static void bnx2x_get_ringparam(struct net_device *dev, if (bp->rx_ring_size) ering->rx_pending = bp->rx_ring_size; else - if (bp->state == BNX2X_STATE_OPEN && bp->num_queues) - ering->rx_pending = MAX_RX_AVAIL/bp->num_queues; - else - ering->rx_pending = MAX_RX_AVAIL; + ering->rx_pending = MAX_RX_AVAIL; ering->rx_mini_pending = 0; ering->rx_jumbo_pending = 0; From 3395a033a7c2f1a089fae7e89bf108764b59529c Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Tue, 30 Aug 2011 00:08:42 +0000 Subject: [PATCH 27/62] bnx2x: fix MF for 4-port devices Number of VNs for 4-port devices is 2 instead of 4 Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x.h | 15 ++++++------ drivers/net/bnx2x/bnx2x_main.c | 43 ++++++++++++++++++++------------- drivers/net/bnx2x/bnx2x_stats.c | 4 +-- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index 85297326506c..2621a1c56358 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -1166,11 +1166,12 @@ struct bnx2x { #define BP_PORT(bp) (bp->pfid & 1) #define BP_FUNC(bp) (bp->pfid) #define BP_ABS_FUNC(bp) (bp->pf_num) -#define BP_E1HVN(bp) (bp->pfid >> 1) -#define BP_VN(bp) (BP_E1HVN(bp)) /*remove when approved*/ -#define BP_L_ID(bp) (BP_E1HVN(bp) << 2) -#define BP_FW_MB_IDX(bp) (BP_PORT(bp) +\ - BP_VN(bp) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2 : 1)) +#define BP_VN(bp) ((bp)->pfid >> 1) +#define BP_MAX_VN_NUM(bp) (CHIP_MODE_IS_4_PORT(bp) ? 2 : 4) +#define BP_L_ID(bp) (BP_VN(bp) << 2) +#define BP_FW_MB_IDX_VN(bp, vn) (BP_PORT(bp) +\ + (vn) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2 : 1)) +#define BP_FW_MB_IDX(bp) BP_FW_MB_IDX_VN(bp, BP_VN(bp)) struct net_device *dev; struct pci_dev *pdev; @@ -1833,7 +1834,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, #define MAX_DMAE_C_PER_PORT 8 #define INIT_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \ - BP_E1HVN(bp)) + BP_VN(bp)) #define PMF_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \ E1HVN_MAX) @@ -1859,7 +1860,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, /* must be used on a CID before placing it on a HW ring */ #define HW_CID(bp, x) ((BP_PORT(bp) << 23) | \ - (BP_E1HVN(bp) << BNX2X_SWCID_SHIFT) | \ + (BP_VN(bp) << BNX2X_SWCID_SHIFT) | \ (x)) #define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe)) diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index 3f93e8666104..9633e9b6853c 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -407,8 +407,8 @@ u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type, opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET); opcode |= (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0); - opcode |= ((BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT) | - (BP_E1HVN(bp) << DMAE_COMMAND_DST_VN_SHIFT)); + opcode |= ((BP_VN(bp) << DMAE_CMD_E1HVN_SHIFT) | + (BP_VN(bp) << DMAE_COMMAND_DST_VN_SHIFT)); opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT); #ifdef __BIG_ENDIAN @@ -1419,7 +1419,7 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp) if (!CHIP_IS_E1(bp)) { /* init leading/trailing edge */ if (IS_MF(bp)) { - val = (0xee0f | (1 << (BP_E1HVN(bp) + 4))); + val = (0xee0f | (1 << (BP_VN(bp) + 4))); if (bp->port.pmf) /* enable nig and gpio3 attention */ val |= 0x1100; @@ -1471,7 +1471,7 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp) /* init leading/trailing edge */ if (IS_MF(bp)) { - val = (0xee0f | (1 << (BP_E1HVN(bp) + 4))); + val = (0xee0f | (1 << (BP_VN(bp) + 4))); if (bp->port.pmf) /* enable nig and gpio3 attention */ val |= 0x1100; @@ -2287,7 +2287,7 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp) int vn; bp->vn_weight_sum = 0; - for (vn = VN_0; vn < E1HVN_MAX; vn++) { + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) { u32 vn_cfg = bp->mf_config[vn]; u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT) * 100; @@ -2320,12 +2320,18 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp) CMNG_FLAGS_PER_PORT_FAIRNESS_VN; } +/* returns func by VN for current port */ +static inline int func_by_vn(struct bnx2x *bp, int vn) +{ + return 2 * vn + BP_PORT(bp); +} + static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn) { struct rate_shaping_vars_per_vn m_rs_vn; struct fairness_vars_per_vn m_fair_vn; u32 vn_cfg = bp->mf_config[vn]; - int func = 2*vn + BP_PORT(bp); + int func = func_by_vn(bp, vn); u16 vn_min_rate, vn_max_rate; int i; @@ -2422,7 +2428,7 @@ void bnx2x_read_mf_cfg(struct bnx2x *bp) * * and there are 2 functions per port */ - for (vn = VN_0; vn < E1HVN_MAX; vn++) { + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) { int /*abs*/func = n * (2 * vn + BP_PORT(bp)) + BP_PATH(bp); if (func >= E1H_FUNC_MAX) @@ -2454,7 +2460,7 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type) /* calculate and set min-max rate for each vn */ if (bp->port.pmf) - for (vn = VN_0; vn < E1HVN_MAX; vn++) + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) bnx2x_init_vn_minmax(bp, vn); /* always enable rate shaping and fairness */ @@ -2473,16 +2479,15 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type) static inline void bnx2x_link_sync_notify(struct bnx2x *bp) { - int port = BP_PORT(bp); int func; int vn; /* Set the attention towards other drivers on the same port */ - for (vn = VN_0; vn < E1HVN_MAX; vn++) { - if (vn == BP_E1HVN(bp)) + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) { + if (vn == BP_VN(bp)) continue; - func = ((vn << 1) | port); + func = func_by_vn(bp, vn); REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 + (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1); } @@ -2577,7 +2582,7 @@ static void bnx2x_pmf_update(struct bnx2x *bp) bnx2x_dcbx_pmf_update(bp); /* enable nig attention */ - val = (0xff0f | (1 << (BP_E1HVN(bp) + 4))); + val = (0xff0f | (1 << (BP_VN(bp) + 4))); if (bp->common.int_block == INT_BLOCK_HC) { REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val); REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val); @@ -6686,12 +6691,16 @@ static int bnx2x_init_hw_func(struct bnx2x *bp) if (CHIP_MODE_IS_4_PORT(bp)) dsb_idx = BP_FUNC(bp); else - dsb_idx = BP_E1HVN(bp); + dsb_idx = BP_VN(bp); prod_offset = (CHIP_INT_MODE_IS_BC(bp) ? IGU_BC_BASE_DSB_PROD + dsb_idx : IGU_NORM_BASE_DSB_PROD + dsb_idx); + /* + * igu prods come in chunks of E1HVN_MAX (4) - + * does not matters what is the current chip mode + */ for (i = 0; i < (num_segs * E1HVN_MAX); i += E1HVN_MAX) { addr = IGU_REG_PROD_CONS_MEMORY + @@ -7585,7 +7594,7 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode) u32 val; /* The mac address is written to entries 1-4 to preserve entry 0 which is used by the PMF */ - u8 entry = (BP_E1HVN(bp) + 1)*8; + u8 entry = (BP_VN(bp) + 1)*8; val = (mac_addr[0] << 8) | mac_addr[1]; EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry, val); @@ -8792,13 +8801,13 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp) static void __devinit bnx2x_get_igu_cam_info(struct bnx2x *bp) { int pfid = BP_FUNC(bp); - int vn = BP_E1HVN(bp); int igu_sb_id; u32 val; u8 fid, igu_sb_cnt = 0; bp->igu_base_sb = 0xff; if (CHIP_INT_MODE_IS_BC(bp)) { + int vn = BP_VN(bp); igu_sb_cnt = bp->igu_sb_cnt; bp->igu_base_sb = (CHIP_MODE_IS_4_PORT(bp) ? pfid : vn) * FP_SB_MAX_E1x; @@ -9488,7 +9497,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) bp->mf_ov = 0; bp->mf_mode = 0; - vn = BP_E1HVN(bp); + vn = BP_VN(bp); if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) { BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n", diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c index f5d9b4213cad..9908f2bbcf73 100644 --- a/drivers/net/bnx2x/bnx2x_stats.c +++ b/drivers/net/bnx2x/bnx2x_stats.c @@ -1392,7 +1392,7 @@ static void bnx2x_port_stats_base_init(struct bnx2x *bp) static void bnx2x_func_stats_base_init(struct bnx2x *bp) { - int vn, vn_max = IS_MF(bp) ? E1HVN_MAX : E1VN_MAX; + int vn, vn_max = IS_MF(bp) ? BP_MAX_VN_NUM(bp) : E1VN_MAX; u32 func_stx; /* sanity */ @@ -1405,7 +1405,7 @@ static void bnx2x_func_stats_base_init(struct bnx2x *bp) func_stx = bp->func_stx; for (vn = VN_0; vn < vn_max; vn++) { - int mb_idx = CHIP_IS_E1x(bp) ? 2*vn + BP_PORT(bp) : vn; + int mb_idx = BP_FW_MB_IDX_VN(bp, vn); bp->func_stx = SHMEM_RD(bp, func_mb[mb_idx].fw_mb_param); bnx2x_func_stats_init(bp); From 7a06a122322c89544774e789a11aa671423e9362 Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Tue, 30 Aug 2011 00:08:43 +0000 Subject: [PATCH 28/62] bnx2x: don't reset device while reading its configuration. Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_main.c | 24 +++++++++++++++--------- drivers/net/bnx2x/bnx2x_reg.h | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index 9633e9b6853c..00dc8f0fc3af 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -5822,7 +5822,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp) * take the UNDI lock to protect undi_unload flow from accessing * registers while we're resetting the chip */ - bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); + bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET); bnx2x_reset_common(bp); REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff); @@ -5834,7 +5834,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp) } REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, val); - bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); + bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET); bnx2x_init_block(bp, BLOCK_MISC, PHASE_COMMON); @@ -8570,10 +8570,12 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp) /* Check if there is any driver already loaded */ val = REG_RD(bp, MISC_REG_UNPREPARED); if (val == 0x1) { - /* Check if it is the UNDI driver + + bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET); + /* + * Check if it is the UNDI driver * UNDI driver initializes CID offset for normal bell to 0x7 */ - bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); val = REG_RD(bp, DORQ_REG_NORM_CID_OFST); if (val == 0x7) { u32 reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; @@ -8611,9 +8613,6 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp) bnx2x_fw_command(bp, reset_code, 0); } - /* now it's safe to release the lock */ - bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); - bnx2x_undi_int_disable(bp); port = BP_PORT(bp); @@ -8663,8 +8662,10 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp) bp->fw_seq = (SHMEM_RD(bp, func_mb[bp->pf_num].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); - } else - bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); + } + + /* now it's safe to release the lock */ + bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET); } } @@ -9440,6 +9441,10 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) bp->igu_base_sb = 0; } else { bp->common.int_block = INT_BLOCK_IGU; + + /* do not allow device reset during IGU info preocessing */ + bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET); + val = REG_RD(bp, IGU_REG_BLOCK_CONFIGURATION); if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { @@ -9471,6 +9476,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) bnx2x_get_igu_cam_info(bp); + bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET); } /* diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h index 40266c14e6dc..dac217d478f2 100644 --- a/drivers/net/bnx2x/bnx2x_reg.h +++ b/drivers/net/bnx2x/bnx2x_reg.h @@ -5766,7 +5766,7 @@ #define HW_LOCK_RESOURCE_RECOVERY_LEADER_0 8 #define HW_LOCK_RESOURCE_RECOVERY_LEADER_1 9 #define HW_LOCK_RESOURCE_SPIO 2 -#define HW_LOCK_RESOURCE_UNDI 5 +#define HW_LOCK_RESOURCE_RESET 5 #define AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT (0x1<<4) #define AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR (0x1<<5) #define AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR (0x1<<18) From 0735f2fc8c49f1fbbbb245d038582922984ed3d5 Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Tue, 30 Aug 2011 00:08:44 +0000 Subject: [PATCH 29/62] bnx2x: init fw_seq after undi_unload is done Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_main.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index 00dc8f0fc3af..94382a78c951 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -9623,13 +9623,6 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) /* port info */ bnx2x_get_port_hwinfo(bp); - if (!BP_NOMCP(bp)) { - bp->fw_seq = - (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & - DRV_MSG_SEQ_NUMBER_MASK); - BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq); - } - /* Get MAC addresses */ bnx2x_get_mac_hwinfo(bp); @@ -9795,6 +9788,14 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp) if (!BP_NOMCP(bp)) bnx2x_undi_unload(bp); + /* init fw_seq after undi_unload! */ + if (!BP_NOMCP(bp)) { + bp->fw_seq = + (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & + DRV_MSG_SEQ_NUMBER_MASK); + BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq); + } + if (CHIP_REV_IS_FPGA(bp)) dev_err(&bp->pdev->dev, "FPGA detected\n"); From a5c53dbcde9a156e8303acc6ecb2296bf609fe38 Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Tue, 30 Aug 2011 00:08:45 +0000 Subject: [PATCH 30/62] bnx2x: don't access removed registers on 57712 and above Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index 94382a78c951..0b68d02fe455 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -10290,17 +10290,21 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, /* clean indirect addresses */ pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS, PCICFG_VENDOR_ID_OFFSET); - /* Clean the following indirect addresses for all functions since it + /* + * Clean the following indirect addresses for all functions since it * is not used by the driver. */ REG_WR(bp, PXP2_REG_PGL_ADDR_88_F0, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F0, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0); + + if (CHIP_IS_E1x(bp)) { + REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0); + REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0); + REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0); + REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0); + } /* * Enable internal target-read (in case we are probed after PF FLR). From 150966ad56291776a1f3fed86000a027e0794922 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Tue, 30 Aug 2011 00:08:46 +0000 Subject: [PATCH 31/62] bnx2x: Fix for a host coalescing bug which impared latency. Seperated Rx and Tx coalescing to different state machines. Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: Dmitry Kravkov Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x.h | 23 ++++++++--------------- drivers/net/bnx2x/bnx2x_main.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index 2621a1c56358..e46df5331c55 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -751,25 +751,18 @@ struct bnx2x_fastpath { #define FP_CSB_FUNC_OFF \ offsetof(struct cstorm_status_block_c, func) -#define HC_INDEX_TOE_RX_CQ_CONS 0 /* Formerly Ustorm TOE CQ index */ - /* (HC_INDEX_U_TOE_RX_CQ_CONS) */ -#define HC_INDEX_ETH_RX_CQ_CONS 1 /* Formerly Ustorm ETH CQ index */ - /* (HC_INDEX_U_ETH_RX_CQ_CONS) */ -#define HC_INDEX_ETH_RX_BD_CONS 2 /* Formerly Ustorm ETH BD index */ - /* (HC_INDEX_U_ETH_RX_BD_CONS) */ +#define HC_INDEX_ETH_RX_CQ_CONS 1 -#define HC_INDEX_TOE_TX_CQ_CONS 4 /* Formerly Cstorm TOE CQ index */ - /* (HC_INDEX_C_TOE_TX_CQ_CONS) */ -#define HC_INDEX_ETH_TX_CQ_CONS_COS0 5 /* Formerly Cstorm ETH CQ index */ - /* (HC_INDEX_C_ETH_TX_CQ_CONS) */ -#define HC_INDEX_ETH_TX_CQ_CONS_COS1 6 /* Formerly Cstorm ETH CQ index */ - /* (HC_INDEX_C_ETH_TX_CQ_CONS) */ -#define HC_INDEX_ETH_TX_CQ_CONS_COS2 7 /* Formerly Cstorm ETH CQ index */ - /* (HC_INDEX_C_ETH_TX_CQ_CONS) */ +#define HC_INDEX_OOO_TX_CQ_CONS 4 + +#define HC_INDEX_ETH_TX_CQ_CONS_COS0 5 + +#define HC_INDEX_ETH_TX_CQ_CONS_COS1 6 + +#define HC_INDEX_ETH_TX_CQ_CONS_COS2 7 #define HC_INDEX_ETH_FIRST_TX_CQ_CONS HC_INDEX_ETH_TX_CQ_CONS_COS0 - #define BNX2X_RX_SB_INDEX \ (&fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS]) diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index 0b68d02fe455..c027e9341a1a 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -4828,6 +4828,37 @@ void bnx2x_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm, hc_sm->time_to_expire = 0xFFFFFFFF; } + +/* allocates state machine ids. */ +static inline +void bnx2x_map_sb_state_machines(struct hc_index_data *index_data) +{ + /* zero out state machine indices */ + /* rx indices */ + index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; + + /* tx indices */ + index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID; + + /* map indices */ + /* rx indices */ + index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |= + SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + + /* tx indices */ + index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; +} + static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid, u8 vf_valid, int fw_sb_id, int igu_sb_id) { @@ -4859,6 +4890,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid, hc_sm_p = sb_data_e2.common.state_machine; sb_data_p = (u32 *)&sb_data_e2; data_size = sizeof(struct hc_status_block_data_e2)/sizeof(u32); + bnx2x_map_sb_state_machines(sb_data_e2.index_data); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); @@ -4873,6 +4905,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid, hc_sm_p = sb_data_e1x.common.state_machine; sb_data_p = (u32 *)&sb_data_e1x; data_size = sizeof(struct hc_status_block_data_e1x)/sizeof(u32); + bnx2x_map_sb_state_machines(sb_data_e1x.index_data); } bnx2x_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID], From 27e95a8c670e0c587990ec5b9a87a7ea17873d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Maravi=C4=87?= Date: Tue, 30 Aug 2011 03:12:55 +0000 Subject: [PATCH 32/62] pkt_sched: cls_rsvp.h was outdated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this file. [ tb[] array should be indexed by X not X-1 -DaveM ] Signed-off-by: Igor Maravić Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index be4505ee67a9..b01427924f81 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, struct rsvp_filter *f, **fp; struct rsvp_session *s, **sp; struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS-1]; + struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_RSVP_MAX + 1]; struct tcf_exts e; unsigned int h1, h2; @@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map); + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); if (err < 0) return err; @@ -449,8 +449,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (f->handle != handle && handle) goto errout2; - if (tb[TCA_RSVP_CLASSID-1]) { - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); tcf_bind_filter(tp, &f->res, base); } @@ -462,7 +462,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (handle) goto errout2; - if (tb[TCA_RSVP_DST-1] == NULL) + if (tb[TCA_RSVP_DST] == NULL) goto errout2; err = -ENOBUFS; @@ -471,19 +471,19 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, goto errout2; h2 = 16; - if (tb[TCA_RSVP_SRC-1]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src)); + if (tb[TCA_RSVP_SRC]) { + memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); h2 = hash_src(f->src); } - if (tb[TCA_RSVP_PINFO-1]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO-1]); + if (tb[TCA_RSVP_PINFO]) { + pinfo = nla_data(tb[TCA_RSVP_PINFO]); f->spi = pinfo->spi; f->tunnelhdr = pinfo->tunnelhdr; } - if (tb[TCA_RSVP_CLASSID-1]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - dst = nla_data(tb[TCA_RSVP_DST-1]); + dst = nla_data(tb[TCA_RSVP_DST]); h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); err = -ENOMEM; @@ -642,8 +642,7 @@ nla_put_failure: return -1; } -static struct tcf_proto_ops RSVP_OPS = { - .next = NULL, +static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, .init = rsvp_init, From 946cedccbd7387488d2cee5da92cdfeb28d2e670 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2011 03:21:44 +0000 Subject: [PATCH 33/62] tcp: Change possible SYN flooding messages "Possible SYN flooding on port xxxx " messages can fill logs on servers. Change logic to log the message only once per listener, and add two new SNMP counters to track : TCPReqQFullDoCookies : number of times a SYNCOOKIE was replied to client TCPReqQFullDrop : number of times a SYN request was dropped because syncookies were not enabled. Based on a prior patch from Tom Herbert, and suggestions from David. Signed-off-by: Eric Dumazet CC: Tom Herbert Signed-off-by: David S. Miller --- include/linux/snmp.h | 2 ++ include/net/request_sock.h | 3 ++- include/net/tcp.h | 3 +++ net/ipv4/proc.c | 2 ++ net/ipv4/tcp_ipv4.c | 49 ++++++++++++++++++++++---------------- net/ipv6/tcp_ipv6.c | 31 +++--------------------- 6 files changed, 40 insertions(+), 50 deletions(-) diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 12b2b18e50c1..e16557a357e5 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -231,6 +231,8 @@ enum LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */ + LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */ + LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ __LINUX_MIB_MAX }; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 99e6e19b57c2..4c0766e201e3 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog; */ struct listen_sock { u8 max_qlen_log; - /* 3 bytes hole, try to use */ + u8 synflood_warned; + /* 2 bytes hole, try to use */ int qlen; int qlen_young; int clock_hand; diff --git a/include/net/tcp.h b/include/net/tcp.h index 149a415d1e0a..e9b48b094683 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -460,6 +460,9 @@ extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); extern int tcp_send_synack(struct sock *); +extern int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b14ec7d03b6e..4bfad5da94f4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), + SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), + SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1c12b8ec849d..c34f01513945 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -808,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static void syn_flood_warning(const struct sk_buff *skb) +/* + * Return 1 if a syncookie should be sent + */ +int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) { - const char *msg; + const char *msg = "Dropping request"; + int want_cookie = 0; + struct listen_sock *lopt; + + #ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) + if (sysctl_tcp_syncookies) { msg = "Sending cookies"; - else + want_cookie = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else #endif - msg = "Dropping request"; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - pr_info("TCP: Possible SYN flooding on port %d. %s.\n", - ntohs(tcp_hdr(skb)->dest), msg); + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. " + " Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; } +EXPORT_SYMBOL(tcp_syn_flood_action); /* * Save and compile IPv4 options into the request_sock if needed. @@ -1235,11 +1253,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ -#endif /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1250,14 +1264,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - want_cookie = 1; - } else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); + if (!want_cookie) + goto drop; } /* Accept backlog is full. If we have already queued enough @@ -1303,9 +1312,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fb63f4aeb7..3c9fa618b69d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -531,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, return tcp_v6_send_synack(sk, req, rvp); } -static inline void syn_flood_warning(struct sk_buff *skb) -{ -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest)); - else -#endif - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Dropping request.\n", ntohs(tcp_hdr(skb)->dest)); -} - static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree_skb(inet6_rsk(req)->pktopts); @@ -1179,11 +1165,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 -#endif if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1192,14 +1174,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - want_cookie = 1; - else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); + if (!want_cookie) + goto drop; } if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) @@ -1249,9 +1226,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { From 6b59e3191daade2b975eeec1c71c591eb5c86b7b Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 30 Aug 2011 05:33:57 +0000 Subject: [PATCH 34/62] Documentation: networking: dmfe.txt: Remove the maintainer of orphan networking driver The dmfe module is a orphan driver, and with this was removed the maintainer of the documentation. Signed-off-by: Marcos Paulo de Souza Signed-off-by: David S. Miller --- Documentation/networking/dmfe.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/dmfe.txt b/Documentation/networking/dmfe.txt index 8006c227fda2..25320bf19c86 100644 --- a/Documentation/networking/dmfe.txt +++ b/Documentation/networking/dmfe.txt @@ -1,3 +1,5 @@ +Note: This driver doesn't have a maintainer. + Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux. This program is free software; you can redistribute it and/or @@ -55,7 +57,6 @@ Test and make sure PCI latency is now correct for all cases. Authors: Sten Wang : Original Author -Tobias Ringstrom : Current Maintainer Contributors: From d9e64f83ebb8f563810b10536b23516d3bd30e80 Mon Sep 17 00:00:00 2001 From: "rajan.aggarwal85@gmail.com" Date: Tue, 30 Aug 2011 23:57:38 +0000 Subject: [PATCH 35/62] net/can/af_can.c: Change del_timer to del_timer_sync This is important for SMP platform to check if timer function is executing on other CPU with deleting the timer. Signed-off-by: Rajan Aggarwal Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 8ce926d3b2cb..9b0c32a2690c 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -857,7 +857,7 @@ static __exit void can_exit(void) struct net_device *dev; if (stats_timer) - del_timer(&can_stattimer); + del_timer_sync(&can_stattimer); can_remove_proc(); From 02009afc223aae43b8e18918fc816e4520791537 Mon Sep 17 00:00:00 2001 From: Kavan Smith Date: Wed, 31 Aug 2011 05:12:05 +0000 Subject: [PATCH 36/62] ipheth: iPhone 4 Verizon CDMA USB Product ID add Add USB product ID for iPhone 4 CDMA Verizon Tested on at least 2 devices Signed-off-by: Kavan Smith Signed-off-by: David S. Miller --- drivers/net/usb/ipheth.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 15772b1b6a91..13c1f044b40d 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -59,6 +59,7 @@ #define USB_PRODUCT_IPHONE_3G 0x1292 #define USB_PRODUCT_IPHONE_3GS 0x1294 #define USB_PRODUCT_IPHONE_4 0x1297 +#define USB_PRODUCT_IPHONE_4_VZW 0x129c #define IPHETH_USBINTF_CLASS 255 #define IPHETH_USBINTF_SUBCLASS 253 @@ -98,6 +99,10 @@ static struct usb_device_id ipheth_table[] = { USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4, IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, IPHETH_USBINTF_PROTO) }, + { USB_DEVICE_AND_INTERFACE_INFO( + USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW, + IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, + IPHETH_USBINTF_PROTO) }, { } }; MODULE_DEVICE_TABLE(usb, ipheth_table); From 0542b69e2c57fc9668ce6a03155bea6e1f557901 Mon Sep 17 00:00:00 2001 From: dpward Date: Wed, 31 Aug 2011 06:05:27 +0000 Subject: [PATCH 37/62] net: Make flow cache namespace-aware flow_cache_lookup will return a cached object (or null pointer) that the resolver (i.e. xfrm_policy_lookup) previously found for another namespace using the same key/family/dir. Instead, make the namespace part of what identifies entries in the cache. As before, flow_entry_valid will return 0 for entries where the namespace has been deleted, and they will be removed from the cache the next time flow_cache_gc_task is run. Reported-by: Andrew Dickinson Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/core/flow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/flow.c b/net/core/flow.c index bf32c33cad3b..47b6d26c2afb 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -232,7 +233,8 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, hash = flow_hash_code(fc, fcp, key); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && flow_key_compare(key, &tfle->key) == 0) { fle = tfle; @@ -246,6 +248,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; memcpy(&fle->key, key, sizeof(*key)); From 48c830120f2a20b44220aa26feda9ed15f49eaab Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 31 Aug 2011 08:03:29 +0000 Subject: [PATCH 38/62] net: copy userspace buffers on device forwarding dev_forward_skb loops an skb back into host networking stack which might hang on the memory indefinitely. In particular, this can happen in macvtap in bridged mode. Copy the userspace fragments to avoid blocking the sender in that case. As this patch makes skb_copy_ubufs extern now, I also added some documentation and made it clear the SKBTX_DEV_ZEROCOPY flag automatically instead of doing it in all callers. This can be made into a separate patch if people feel it's worth it. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/dev.c | 8 ++++++++ net/core/skbuff.c | 22 +++++++++++++++++----- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7b996ed86d5b..8bd383caa363 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -524,6 +524,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b579beb..b10ff0a71855 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + skb_orphan(skb); nf_reset(skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 27002dffe7ed..387703f56fce 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -/* skb frags copy userspace buffers to kernel */ -static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +/* skb_copy_ubufs - copy userspace skb frags buffers to kernel + * @skb: the skb to modify + * @gfp_mask: allocation priority + * + * This must be called on SKBTX_DEV_ZEROCOPY skb. + * It will copy all frags into kernel and drop the reference + * to userspace pages. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + * + * Returns 0 on success or a negative error code on failure + * to allocate kernel memory to copy to. + */ +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int i; int num_frags = skb_shinfo(skb)->nr_frags; @@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(skb)->frags[i - 1].page = head; head = (struct page *)head->private; } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; return 0; } @@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) return NULL; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } n = skb + 1; @@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) n = NULL; goto out; } - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; @@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) goto nofrags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); From 31dda0ae933bb9fea9cfe000b698c41af0417cac Mon Sep 17 00:00:00 2001 From: nhorman Date: Wed, 14 Sep 2011 03:05:02 +0000 Subject: [PATCH 39/62] net: don't clear IFF_XMIT_DST_RELEASE in ether_setup d88733150 introduced the IFF_SKB_TX_SHARING flag, which I unilaterally set in ether_setup. In doing this I didn't realize that other flags (such as IFF_XMIT_DST_RELEASE) might be set prior to calling the ether_setup routine. This patch changes ether_setup to or in SKB_TX_SHARING so as not to inadvertently clear other existing flags. Thanks to Pekka Riikonen for pointing out my error Signed-off-by: Neil Horman Reported-by: Pekka Riikonen CC: "David S. Miller" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ethernet/eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 27997d35ebd3..a2468363978e 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -340,7 +340,7 @@ void ether_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - dev->priv_flags = IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); From c482e6c064613b3fd40758ef6c33318462b83789 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 7 Sep 2011 00:47:49 +0000 Subject: [PATCH 40/62] bnx2x: Fix ETS bandwidth ETS bandwidth of 0% is not allowed by driver, so provide alternative HW configuration for this case. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index d45b1555a602..9d381db16516 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -778,9 +778,9 @@ static int bnx2x_ets_e3b0_set_cos_bw(struct bnx2x *bp, { u32 nig_reg_adress_crd_weight = 0; u32 pbf_reg_adress_crd_weight = 0; - /* Calculate and set BW for this COS*/ - const u32 cos_bw_nig = (bw * min_w_val_nig) / total_bw; - const u32 cos_bw_pbf = (bw * min_w_val_pbf) / total_bw; + /* Calculate and set BW for this COS - use 1 instead of 0 for BW */ + const u32 cos_bw_nig = ((bw ? bw : 1) * min_w_val_nig) / total_bw; + const u32 cos_bw_pbf = ((bw ? bw : 1) * min_w_val_pbf) / total_bw; switch (cos_entry) { case 0: @@ -852,18 +852,12 @@ static int bnx2x_ets_e3b0_get_total_bw( /* Calculate total BW requested */ for (cos_idx = 0; cos_idx < ets_params->num_of_cos; cos_idx++) { if (bnx2x_cos_state_bw == ets_params->cos[cos_idx].state) { - - if (0 == ets_params->cos[cos_idx].params.bw_params.bw) { - DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config BW" - "was set to 0\n"); - return -EINVAL; + *total_bw += + ets_params->cos[cos_idx].params.bw_params.bw; } - *total_bw += - ets_params->cos[cos_idx].params.bw_params.bw; - } } - /*Check taotl BW is valid */ + /* Check total BW is valid */ if ((100 != *total_bw) || (0 == *total_bw)) { if (0 == *total_bw) { DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config toatl BW" From 6b1f3900fc0909fbf3bd672242378015f76b3df8 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 7 Sep 2011 00:47:54 +0000 Subject: [PATCH 41/62] bnx2x: Enable FEC for 57810-KR Enable FEC(Forward Error Correction) for 57810-KR to reduce link errors. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 6 ++++++ drivers/net/bnx2x/bnx2x_reg.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 9d381db16516..f7a7ac3e889c 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -3624,6 +3624,12 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1, val16); + /* Advertised and set FEC (Forward Error Correction) */ + bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, + MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2, + (MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY | + MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ)); + /* Enable CL37 BAM */ if (REG_RD(bp, params->shmem_base + offsetof(struct shmem_region, dev_info. diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h index dac217d478f2..057738623ba4 100644 --- a/drivers/net/bnx2x/bnx2x_reg.h +++ b/drivers/net/bnx2x/bnx2x_reg.h @@ -6853,6 +6853,9 @@ Theotherbitsarereservedandshouldbezero*/ #define MDIO_WC_REG_IEEE0BLK_AUTONEGNP 0x7 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT0 0x10 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1 0x11 +#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2 0x12 +#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY 0x4000 +#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ 0x8000 #define MDIO_WC_REG_PMD_IEEE9BLK_TENGBASE_KR_PMD_CONTROL_REGISTER_150 0x96 #define MDIO_WC_REG_XGXSBLK0_XGXSCONTROL 0x8000 #define MDIO_WC_REG_XGXSBLK0_MISCCONTROL1 0x800e From 0582242049c67d59c3a95cd1cba8995fa955c858 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 7 Sep 2011 00:47:58 +0000 Subject: [PATCH 42/62] bnx2x: Remove fiber remote fault detection Remove remote fault detection as a tactic retreat due to link issues involved with it. Once issue is resolved, this feature will be restored again. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index f7a7ac3e889c..db5913da5527 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -10638,8 +10638,7 @@ static struct bnx2x_phy phy_warpcore = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT, .addr = 0xff, .def_md_devad = 0, - .flags = (FLAGS_HW_LOCK_REQUIRED | - FLAGS_TX_ERROR_CHECK), + .flags = FLAGS_HW_LOCK_REQUIRED, .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10765,8 +10764,7 @@ static struct bnx2x_phy phy_8706 = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706, .addr = 0xff, .def_md_devad = 0, - .flags = (FLAGS_INIT_XGXS_FIRST | - FLAGS_TX_ERROR_CHECK), + .flags = FLAGS_INIT_XGXS_FIRST, .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10797,8 +10795,7 @@ static struct bnx2x_phy phy_8726 = { .addr = 0xff, .def_md_devad = 0, .flags = (FLAGS_HW_LOCK_REQUIRED | - FLAGS_INIT_XGXS_FIRST | - FLAGS_TX_ERROR_CHECK), + FLAGS_INIT_XGXS_FIRST), .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10829,8 +10826,7 @@ static struct bnx2x_phy phy_8727 = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727, .addr = 0xff, .def_md_devad = 0, - .flags = (FLAGS_FAN_FAILURE_DET_REQ | - FLAGS_TX_ERROR_CHECK), + .flags = FLAGS_FAN_FAILURE_DET_REQ, .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, From 4d7e25d6cc4312b1f949123fea7026fd56441513 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 7 Sep 2011 00:48:03 +0000 Subject: [PATCH 43/62] bnx2x: Fix XMAC loopback test Change XMAC loopback type from CORE LOCAL to LINE LOCAL for the BCM578xx due to intermittent problem with the loopback with this configuration. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 2 +- drivers/net/bnx2x/bnx2x_reg.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index db5913da5527..342807585c2b 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -1720,7 +1720,7 @@ static int bnx2x_xmac_enable(struct link_params *params, /* Check loopback mode */ if (lb) - val |= XMAC_CTRL_REG_CORE_LOCAL_LPBK; + val |= XMAC_CTRL_REG_LINE_LOCAL_LPBK; REG_WR(bp, xmac_base + XMAC_REG_CTRL, val); bnx2x_set_xumac_nig(params, ((vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) != 0), 1); diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h index 057738623ba4..750e8445dac4 100644 --- a/drivers/net/bnx2x/bnx2x_reg.h +++ b/drivers/net/bnx2x/bnx2x_reg.h @@ -5320,7 +5320,7 @@ #define XCM_REG_XX_OVFL_EVNT_ID 0x20058 #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_LOCAL_FAULT_STATUS (0x1<<0) #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_REMOTE_FAULT_STATUS (0x1<<1) -#define XMAC_CTRL_REG_CORE_LOCAL_LPBK (0x1<<3) +#define XMAC_CTRL_REG_LINE_LOCAL_LPBK (0x1<<2) #define XMAC_CTRL_REG_RX_EN (0x1<<1) #define XMAC_CTRL_REG_SOFT_RESET (0x1<<6) #define XMAC_CTRL_REG_TX_EN (0x1<<0) From ab505dec96340946079d1288f49041bea9f259ff Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 7 Sep 2011 00:48:06 +0000 Subject: [PATCH 44/62] bnx2x: Fix 578xx link LED Fix 1G link LED for the BCM578xx-SFI/KR. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 342807585c2b..ba15bdc5a1a9 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -5924,7 +5924,7 @@ int bnx2x_set_led(struct link_params *params, (tmp | EMAC_LED_OVERRIDE)); /* * return here without enabling traffic - * LED blink andsetting rate in ON mode. + * LED blink and setting rate in ON mode. * In oper mode, enabling LED blink * and setting rate is needed. */ @@ -5936,7 +5936,11 @@ int bnx2x_set_led(struct link_params *params, * This is a work-around for HW issue found when link * is up in CL73 */ - REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1); + if ((!CHIP_IS_E3(bp)) || + (CHIP_IS_E3(bp) && + mode == LED_MODE_ON)) + REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1); + if (CHIP_IS_E1x(bp) || CHIP_IS_E2(bp) || (mode == LED_MODE_ON)) From 8d661637407963d1990e53c36d53ace123219da3 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 7 Sep 2011 00:48:11 +0000 Subject: [PATCH 45/62] bnx2x: Fix ethtool advertisement Enable changing advertisement settings via ethtool. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_ethtool.c | 43 ++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c index 0ceb6c7b1238..cf3e47914dd7 100644 --- a/drivers/net/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/bnx2x/bnx2x_ethtool.c @@ -363,13 +363,50 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) } /* advertise the requested speed and duplex if supported */ - cmd->advertising &= bp->port.supported[cfg_idx]; + if (cmd->advertising & ~(bp->port.supported[cfg_idx])) { + DP(NETIF_MSG_LINK, "Advertisement parameters " + "are not supported\n"); + return -EINVAL; + } bp->link_params.req_line_speed[cfg_idx] = SPEED_AUTO_NEG; - bp->link_params.req_duplex[cfg_idx] = DUPLEX_FULL; - bp->port.advertising[cfg_idx] |= (ADVERTISED_Autoneg | + bp->link_params.req_duplex[cfg_idx] = cmd->duplex; + bp->port.advertising[cfg_idx] = (ADVERTISED_Autoneg | cmd->advertising); + if (cmd->advertising) { + bp->link_params.speed_cap_mask[cfg_idx] = 0; + if (cmd->advertising & ADVERTISED_10baseT_Half) { + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF; + } + if (cmd->advertising & ADVERTISED_10baseT_Full) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL; + + if (cmd->advertising & ADVERTISED_100baseT_Full) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL; + + if (cmd->advertising & ADVERTISED_100baseT_Half) { + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF; + } + if (cmd->advertising & ADVERTISED_1000baseT_Half) { + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_1G; + } + if (cmd->advertising & (ADVERTISED_1000baseT_Full | + ADVERTISED_1000baseKX_Full)) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_1G; + + if (cmd->advertising & (ADVERTISED_10000baseT_Full | + ADVERTISED_10000baseKX4_Full | + ADVERTISED_10000baseKR_Full)) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_10G; + } } else { /* forced speed */ /* advertise the requested speed and duplex if supported */ switch (speed) { From 86c432ca5d6da90a26ac8d3e680f2268b502d9c5 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 1 Sep 2011 12:09:29 +0000 Subject: [PATCH 46/62] Revert "sfc: Use write-combining to reduce TX latency" and follow-ups This reverts commits 65f0b417dee94f779ce9b77102b7d73c93723b39, d88d6b05fee3cc78e5b0273eb58c31201dcc6b76, fcfa060468a4edcf776f0c1211d826d5de1668c1, 747df2258b1b9a2e25929ef496262c339c380009 and 867955f5682f7157fdafe8670804b9f8ea077bc7. Depending on the processor model, write-combining may result in reordering that the NIC will not tolerate. This typically results in a DMA error event and reset by the driver, logged as: sfc 0000:0e:00.0: eth2: TX DMA Q reports TX_EV_PKT_ERR. sfc 0000:0e:00.0: eth2: resetting (ALL) Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/efx.c | 18 ++------------ drivers/net/sfc/io.h | 15 +++--------- drivers/net/sfc/mcdi.c | 46 +++++++++++++---------------------- drivers/net/sfc/nic.c | 7 ------ drivers/net/sfc/nic.h | 2 -- drivers/net/sfc/siena.c | 25 +++---------------- drivers/net/sfc/workarounds.h | 2 -- 7 files changed, 27 insertions(+), 88 deletions(-) diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index faca764aa21b..b59abc706d93 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -1050,7 +1050,6 @@ static int efx_init_io(struct efx_nic *efx) { struct pci_dev *pci_dev = efx->pci_dev; dma_addr_t dma_mask = efx->type->max_dma_mask; - bool use_wc; int rc; netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); @@ -1101,21 +1100,8 @@ static int efx_init_io(struct efx_nic *efx) rc = -EIO; goto fail3; } - - /* bug22643: If SR-IOV is enabled then tx push over a write combined - * mapping is unsafe. We need to disable write combining in this case. - * MSI is unsupported when SR-IOV is enabled, and the firmware will - * have removed the MSI capability. So write combining is safe if - * there is an MSI capability. - */ - use_wc = (!EFX_WORKAROUND_22643(efx) || - pci_find_capability(pci_dev, PCI_CAP_ID_MSI)); - if (use_wc) - efx->membase = ioremap_wc(efx->membase_phys, - efx->type->mem_map_size); - else - efx->membase = ioremap_nocache(efx->membase_phys, - efx->type->mem_map_size); + efx->membase = ioremap_nocache(efx->membase_phys, + efx->type->mem_map_size); if (!efx->membase) { netif_err(efx, probe, efx->net_dev, "could not map memory BAR at %llx+%x\n", diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index cc978803d484..dc45110b2456 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h @@ -48,9 +48,9 @@ * replacing the low 96 bits with zero does not affect functionality. * - If the host writes to the last dword address of such a register * (i.e. the high 32 bits) the underlying register will always be - * written. If the collector and the current write together do not - * provide values for all 128 bits of the register, the low 96 bits - * will be written as zero. + * written. If the collector does not hold values for the low 96 + * bits of the register, they will be written as zero. Writing to + * the last qword does not have this effect and must not be done. * - If the host writes to the address of any other part of such a * register while the collector already holds values for some other * register, the write is discarded and the collector maintains its @@ -103,7 +103,6 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value, _efx_writed(efx, value->u32[2], reg + 8); _efx_writed(efx, value->u32[3], reg + 12); #endif - wmb(); mmiowb(); spin_unlock_irqrestore(&efx->biu_lock, flags); } @@ -126,7 +125,6 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase, __raw_writel((__force u32)value->u32[0], membase + addr); __raw_writel((__force u32)value->u32[1], membase + addr + 4); #endif - wmb(); mmiowb(); spin_unlock_irqrestore(&efx->biu_lock, flags); } @@ -141,7 +139,6 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value, /* No lock required */ _efx_writed(efx, value->u32[0], reg); - wmb(); } /* Read a 128-bit CSR, locking as appropriate. */ @@ -152,7 +149,6 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value, spin_lock_irqsave(&efx->biu_lock, flags); value->u32[0] = _efx_readd(efx, reg + 0); - rmb(); value->u32[1] = _efx_readd(efx, reg + 4); value->u32[2] = _efx_readd(efx, reg + 8); value->u32[3] = _efx_readd(efx, reg + 12); @@ -175,7 +171,6 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase, value->u64[0] = (__force __le64)__raw_readq(membase + addr); #else value->u32[0] = (__force __le32)__raw_readl(membase + addr); - rmb(); value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4); #endif spin_unlock_irqrestore(&efx->biu_lock, flags); @@ -242,14 +237,12 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, #ifdef EFX_USE_QWORD_IO _efx_writeq(efx, value->u64[0], reg + 0); - _efx_writeq(efx, value->u64[1], reg + 8); #else _efx_writed(efx, value->u32[0], reg + 0); _efx_writed(efx, value->u32[1], reg + 4); +#endif _efx_writed(efx, value->u32[2], reg + 8); _efx_writed(efx, value->u32[3], reg + 12); -#endif - wmb(); } #define efx_writeo_page(efx, value, reg, page) \ _efx_writeo_page(efx, value, \ diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c index 3dd45ed61f0a..81a425397468 100644 --- a/drivers/net/sfc/mcdi.c +++ b/drivers/net/sfc/mcdi.c @@ -50,20 +50,6 @@ static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) return &nic_data->mcdi; } -static inline void -efx_mcdi_readd(struct efx_nic *efx, efx_dword_t *value, unsigned reg) -{ - struct siena_nic_data *nic_data = efx->nic_data; - value->u32[0] = (__force __le32)__raw_readl(nic_data->mcdi_smem + reg); -} - -static inline void -efx_mcdi_writed(struct efx_nic *efx, const efx_dword_t *value, unsigned reg) -{ - struct siena_nic_data *nic_data = efx->nic_data; - __raw_writel((__force u32)value->u32[0], nic_data->mcdi_smem + reg); -} - void efx_mcdi_init(struct efx_nic *efx) { struct efx_mcdi_iface *mcdi; @@ -84,8 +70,8 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned pdu = MCDI_PDU(efx); - unsigned doorbell = MCDI_DOORBELL(efx); + unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); + unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx); unsigned int i; efx_dword_t hdr; u32 xflags, seqno; @@ -106,28 +92,29 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, MCDI_HEADER_SEQ, seqno, MCDI_HEADER_XFLAGS, xflags); - efx_mcdi_writed(efx, &hdr, pdu); + efx_writed(efx, &hdr, pdu); for (i = 0; i < inlen; i += 4) - efx_mcdi_writed(efx, (const efx_dword_t *)(inbuf + i), - pdu + 4 + i); + _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); + + /* Ensure the payload is written out before the header */ + wmb(); /* ring the doorbell with a distinctive value */ - EFX_POPULATE_DWORD_1(hdr, EFX_DWORD_0, 0x45789abc); - efx_mcdi_writed(efx, &hdr, doorbell); + _efx_writed(efx, (__force __le32) 0x45789abc, doorbell); } static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned int pdu = MCDI_PDU(efx); + unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); int i; BUG_ON(atomic_read(&mcdi->state) == MCDI_STATE_QUIESCENT); BUG_ON(outlen & 3 || outlen >= 0x100); for (i = 0; i < outlen; i += 4) - efx_mcdi_readd(efx, (efx_dword_t *)(outbuf + i), pdu + 4 + i); + *((__le32 *)(outbuf + i)) = _efx_readd(efx, pdu + 4 + i); } static int efx_mcdi_poll(struct efx_nic *efx) @@ -135,7 +122,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) struct efx_mcdi_iface *mcdi = efx_mcdi(efx); unsigned int time, finish; unsigned int respseq, respcmd, error; - unsigned int pdu = MCDI_PDU(efx); + unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); unsigned int rc, spins; efx_dword_t reg; @@ -161,7 +148,8 @@ static int efx_mcdi_poll(struct efx_nic *efx) time = get_seconds(); - efx_mcdi_readd(efx, ®, pdu); + rmb(); + efx_readd(efx, ®, pdu); /* All 1's indicates that shared memory is in reset (and is * not a valid header). Wait for it to come out reset before @@ -188,7 +176,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) respseq, mcdi->seqno); rc = EIO; } else if (error) { - efx_mcdi_readd(efx, ®, pdu + 4); + efx_readd(efx, ®, pdu + 4); switch (EFX_DWORD_FIELD(reg, EFX_DWORD_0)) { #define TRANSLATE_ERROR(name) \ case MC_CMD_ERR_ ## name: \ @@ -222,21 +210,21 @@ out: /* Test and clear MC-rebooted flag for this port/function */ int efx_mcdi_poll_reboot(struct efx_nic *efx) { - unsigned int addr = MCDI_REBOOT_FLAG(efx); + unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_REBOOT_FLAG(efx); efx_dword_t reg; uint32_t value; if (efx_nic_rev(efx) < EFX_REV_SIENA_A0) return false; - efx_mcdi_readd(efx, ®, addr); + efx_readd(efx, ®, addr); value = EFX_DWORD_FIELD(reg, EFX_DWORD_0); if (value == 0) return 0; EFX_ZERO_DWORD(reg); - efx_mcdi_writed(efx, ®, addr); + efx_writed(efx, ®, addr); if (value == MC_STATUS_DWORD_ASSERT) return -EINTR; diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c index bafa23a6874c..3edfbaf5f022 100644 --- a/drivers/net/sfc/nic.c +++ b/drivers/net/sfc/nic.c @@ -1936,13 +1936,6 @@ void efx_nic_get_regs(struct efx_nic *efx, void *buf) size = min_t(size_t, table->step, 16); - if (table->offset >= efx->type->mem_map_size) { - /* No longer mapped; return dummy data */ - memcpy(buf, "\xde\xc0\xad\xde", 4); - buf += table->rows * size; - continue; - } - for (i = 0; i < table->rows; i++) { switch (table->step) { case 4: /* 32-bit register or SRAM */ diff --git a/drivers/net/sfc/nic.h b/drivers/net/sfc/nic.h index 4bd1f2839dfe..7443f99c977f 100644 --- a/drivers/net/sfc/nic.h +++ b/drivers/net/sfc/nic.h @@ -143,12 +143,10 @@ static inline struct falcon_board *falcon_board(struct efx_nic *efx) /** * struct siena_nic_data - Siena NIC state * @mcdi: Management-Controller-to-Driver Interface - * @mcdi_smem: MCDI shared memory mapping. The mapping is always uncacheable. * @wol_filter_id: Wake-on-LAN packet filter id */ struct siena_nic_data { struct efx_mcdi_iface mcdi; - void __iomem *mcdi_smem; int wol_filter_id; }; diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index 5735e84c69de..2c3bd93fab54 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -250,26 +250,12 @@ static int siena_probe_nic(struct efx_nic *efx) efx_reado(efx, ®, FR_AZ_CS_DEBUG); efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; - /* Initialise MCDI */ - nic_data->mcdi_smem = ioremap_nocache(efx->membase_phys + - FR_CZ_MC_TREG_SMEM, - FR_CZ_MC_TREG_SMEM_STEP * - FR_CZ_MC_TREG_SMEM_ROWS); - if (!nic_data->mcdi_smem) { - netif_err(efx, probe, efx->net_dev, - "could not map MCDI at %llx+%x\n", - (unsigned long long)efx->membase_phys + - FR_CZ_MC_TREG_SMEM, - FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS); - rc = -ENOMEM; - goto fail1; - } efx_mcdi_init(efx); /* Recover from a failed assertion before probing */ rc = efx_mcdi_handle_assertion(efx); if (rc) - goto fail2; + goto fail1; /* Let the BMC know that the driver is now in charge of link and * filter settings. We must do this before we reset the NIC */ @@ -324,7 +310,6 @@ fail4: fail3: efx_mcdi_drv_attach(efx, false, NULL); fail2: - iounmap(nic_data->mcdi_smem); fail1: kfree(efx->nic_data); return rc; @@ -404,8 +389,6 @@ static int siena_init_nic(struct efx_nic *efx) static void siena_remove_nic(struct efx_nic *efx) { - struct siena_nic_data *nic_data = efx->nic_data; - efx_nic_free_buffer(efx, &efx->irq_status); siena_reset_hw(efx, RESET_TYPE_ALL); @@ -415,8 +398,7 @@ static void siena_remove_nic(struct efx_nic *efx) efx_mcdi_drv_attach(efx, false, NULL); /* Tear down the private nic state */ - iounmap(nic_data->mcdi_smem); - kfree(nic_data); + kfree(efx->nic_data); efx->nic_data = NULL; } @@ -656,7 +638,8 @@ const struct efx_nic_type siena_a0_nic_type = { .default_mac_ops = &efx_mcdi_mac_operations, .revision = EFX_REV_SIENA_A0, - .mem_map_size = FR_CZ_MC_TREG_SMEM, /* MC_TREG_SMEM mapped separately */ + .mem_map_size = (FR_CZ_MC_TREG_SMEM + + FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS), .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL, .buf_tbl_base = FR_BZ_BUF_FULL_TBL, diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h index 99ff11400cef..e4dd3a7f304b 100644 --- a/drivers/net/sfc/workarounds.h +++ b/drivers/net/sfc/workarounds.h @@ -38,8 +38,6 @@ #define EFX_WORKAROUND_15783 EFX_WORKAROUND_ALWAYS /* Legacy interrupt storm when interrupt fifo fills */ #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA -/* Write combining and sriov=enabled are incompatible */ -#define EFX_WORKAROUND_22643 EFX_WORKAROUND_SIENA /* Spurious parity errors in TSORT buffers */ #define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A From 483f97f8b2b7f0ab09e14c06fe327d5e346fac28 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 1 Sep 2011 12:09:59 +0000 Subject: [PATCH 47/62] sfc: Use 64-bit writes for TX push where possible This was originally done as part of commit 65f0b417dee94f779ce9b77102b7d73c93723b39 ("sfc: Use write-combining to reduce TX latency"), but that had to be reverted. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/io.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index dc45110b2456..751d1ec112cc 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h @@ -48,9 +48,9 @@ * replacing the low 96 bits with zero does not affect functionality. * - If the host writes to the last dword address of such a register * (i.e. the high 32 bits) the underlying register will always be - * written. If the collector does not hold values for the low 96 - * bits of the register, they will be written as zero. Writing to - * the last qword does not have this effect and must not be done. + * written. If the collector and the current write together do not + * provide values for all 128 bits of the register, the low 96 bits + * will be written as zero. * - If the host writes to the address of any other part of such a * register while the collector already holds values for some other * register, the write is discarded and the collector maintains its @@ -237,12 +237,13 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, #ifdef EFX_USE_QWORD_IO _efx_writeq(efx, value->u64[0], reg + 0); + _efx_writeq(efx, value->u64[1], reg + 8); #else _efx_writed(efx, value->u32[0], reg + 0); _efx_writed(efx, value->u32[1], reg + 4); -#endif _efx_writed(efx, value->u32[2], reg + 8); _efx_writed(efx, value->u32[3], reg + 12); +#endif } #define efx_writeo_page(efx, value, reg, page) \ _efx_writeo_page(efx, value, \ From 5229d87edcd80a3bceb0708ebd767faff2e589a9 Mon Sep 17 00:00:00 2001 From: Toshiharu Okada Date: Thu, 1 Sep 2011 14:20:07 +0000 Subject: [PATCH 48/62] pch_gbe: fixed the issue which receives an unnecessary packet. This patch fixed the issue which receives an unnecessary packet before link When using PHY of GMII, an unnecessary packet is received, And it becomes impossible to receive a packet after link up. Signed-off-by: Toshiharu Okada Signed-off-by: David S. Miller --- drivers/net/pch_gbe/pch_gbe_main.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index eac3c5ca9731..48ff87c455ae 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -717,13 +717,6 @@ static void pch_gbe_configure_rx(struct pch_gbe_adapter *adapter) iowrite32(rdba, &hw->reg->RX_DSC_BASE); iowrite32(rdlen, &hw->reg->RX_DSC_SIZE); iowrite32((rdba + rdlen), &hw->reg->RX_DSC_SW_P); - - /* Enables Receive DMA */ - rxdma = ioread32(&hw->reg->DMA_CTRL); - rxdma |= PCH_GBE_RX_DMA_EN; - iowrite32(rxdma, &hw->reg->DMA_CTRL); - /* Enables Receive */ - iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN); } /** @@ -1097,6 +1090,19 @@ void pch_gbe_update_stats(struct pch_gbe_adapter *adapter) spin_unlock_irqrestore(&adapter->stats_lock, flags); } +static void pch_gbe_start_receive(struct pch_gbe_hw *hw) +{ + u32 rxdma; + + /* Enables Receive DMA */ + rxdma = ioread32(&hw->reg->DMA_CTRL); + rxdma |= PCH_GBE_RX_DMA_EN; + iowrite32(rxdma, &hw->reg->DMA_CTRL); + /* Enables Receive */ + iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN); + return; +} + /** * pch_gbe_intr - Interrupt Handler * @irq: Interrupt number @@ -1717,6 +1723,7 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter) pch_gbe_alloc_tx_buffers(adapter, tx_ring); pch_gbe_alloc_rx_buffers(adapter, rx_ring, rx_ring->count); adapter->tx_queue_len = netdev->tx_queue_len; + pch_gbe_start_receive(&adapter->hw); mod_timer(&adapter->watchdog_timer, jiffies); From 124d770a6459be21b84445f6ebf7dbfb60d43585 Mon Sep 17 00:00:00 2001 From: Toshiharu Okada Date: Thu, 1 Sep 2011 14:20:08 +0000 Subject: [PATCH 49/62] pch_gbe: added the process of FIFO over run error This patch added the processing which should be done to hardware, when a FIFO over run error occurred. Signed-off-by: Toshiharu Okada Signed-off-by: David S. Miller --- drivers/net/pch_gbe/pch_gbe.h | 12 +- drivers/net/pch_gbe/pch_gbe_main.c | 269 ++++++++++++++++++----------- 2 files changed, 178 insertions(+), 103 deletions(-) diff --git a/drivers/net/pch_gbe/pch_gbe.h b/drivers/net/pch_gbe/pch_gbe.h index 59fac77d0dbb..a09a07197eb5 100644 --- a/drivers/net/pch_gbe/pch_gbe.h +++ b/drivers/net/pch_gbe/pch_gbe.h @@ -127,8 +127,8 @@ struct pch_gbe_regs { /* Reset */ #define PCH_GBE_ALL_RST 0x80000000 /* All reset */ -#define PCH_GBE_TX_RST 0x40000000 /* TX MAC, TX FIFO, TX DMA reset */ -#define PCH_GBE_RX_RST 0x04000000 /* RX MAC, RX FIFO, RX DMA reset */ +#define PCH_GBE_TX_RST 0x00008000 /* TX MAC, TX FIFO, TX DMA reset */ +#define PCH_GBE_RX_RST 0x00004000 /* RX MAC, RX FIFO, RX DMA reset */ /* TCP/IP Accelerator Control */ #define PCH_GBE_EX_LIST_EN 0x00000008 /* External List Enable */ @@ -276,6 +276,9 @@ struct pch_gbe_regs { #define PCH_GBE_RX_DMA_EN 0x00000002 /* Enables Receive DMA */ #define PCH_GBE_TX_DMA_EN 0x00000001 /* Enables Transmission DMA */ +/* RX DMA STATUS */ +#define PCH_GBE_IDLE_CHECK 0xFFFFFFFE + /* Wake On LAN Status */ #define PCH_GBE_WLS_BR 0x00000008 /* Broadcas Address */ #define PCH_GBE_WLS_MLT 0x00000004 /* Multicast Address */ @@ -471,6 +474,7 @@ struct pch_gbe_tx_desc { struct pch_gbe_buffer { struct sk_buff *skb; dma_addr_t dma; + unsigned char *rx_buffer; unsigned long time_stamp; u16 length; bool mapped; @@ -511,6 +515,9 @@ struct pch_gbe_tx_ring { struct pch_gbe_rx_ring { struct pch_gbe_rx_desc *desc; dma_addr_t dma; + unsigned char *rx_buff_pool; + dma_addr_t rx_buff_pool_logic; + unsigned int rx_buff_pool_size; unsigned int size; unsigned int count; unsigned int next_to_use; @@ -622,6 +629,7 @@ struct pch_gbe_adapter { unsigned long rx_buffer_len; unsigned long tx_queue_len; bool have_msi; + bool rx_stop_flag; }; extern const char pch_driver_version[]; diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index 48ff87c455ae..39ce0ee44ad7 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -20,7 +20,6 @@ #include "pch_gbe.h" #include "pch_gbe_api.h" -#include #define DRV_VERSION "1.00" const char pch_driver_version[] = DRV_VERSION; @@ -34,6 +33,7 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_WATCHDOG_PERIOD (1 * HZ) /* watchdog time */ #define PCH_GBE_COPYBREAK_DEFAULT 256 #define PCH_GBE_PCI_BAR 1 +#define PCH_GBE_RESERVE_MEMORY 0x200000 /* 2MB */ /* Macros for ML7223 */ #define PCI_VENDOR_ID_ROHM 0x10db @@ -52,6 +52,7 @@ const char pch_driver_version[] = DRV_VERSION; ) /* Ethertype field values */ +#define PCH_GBE_MAX_RX_BUFFER_SIZE 0x2880 #define PCH_GBE_MAX_JUMBO_FRAME_SIZE 10318 #define PCH_GBE_FRAME_SIZE_2048 2048 #define PCH_GBE_FRAME_SIZE_4096 4096 @@ -83,10 +84,12 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_INT_ENABLE_MASK ( \ PCH_GBE_INT_RX_DMA_CMPLT | \ PCH_GBE_INT_RX_DSC_EMP | \ + PCH_GBE_INT_RX_FIFO_ERR | \ PCH_GBE_INT_WOL_DET | \ PCH_GBE_INT_TX_CMPLT \ ) +#define PCH_GBE_INT_DISABLE_ALL 0 static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT; @@ -138,6 +141,27 @@ static void pch_gbe_wait_clr_bit(void *reg, u32 bit) if (!tmp) pr_err("Error: busy bit is not cleared\n"); } + +/** + * pch_gbe_wait_clr_bit_irq - Wait to clear a bit for interrupt context + * @reg: Pointer of register + * @busy: Busy bit + */ +static int pch_gbe_wait_clr_bit_irq(void *reg, u32 bit) +{ + u32 tmp; + int ret = -1; + /* wait busy */ + tmp = 20; + while ((ioread32(reg) & bit) && --tmp) + udelay(5); + if (!tmp) + pr_err("Error: busy bit is not cleared\n"); + else + ret = 0; + return ret; +} + /** * pch_gbe_mac_mar_set - Set MAC address register * @hw: Pointer to the HW structure @@ -189,6 +213,17 @@ static void pch_gbe_mac_reset_hw(struct pch_gbe_hw *hw) return; } +static void pch_gbe_mac_reset_rx(struct pch_gbe_hw *hw) +{ + /* Read the MAC address. and store to the private data */ + pch_gbe_mac_read_mac_addr(hw); + iowrite32(PCH_GBE_RX_RST, &hw->reg->RESET); + pch_gbe_wait_clr_bit_irq(&hw->reg->RESET, PCH_GBE_RX_RST); + /* Setup the MAC address */ + pch_gbe_mac_mar_set(hw, hw->mac.addr, 0); + return; +} + /** * pch_gbe_mac_init_rx_addrs - Initialize receive address's * @hw: Pointer to the HW structure @@ -671,13 +706,8 @@ static void pch_gbe_setup_rctl(struct pch_gbe_adapter *adapter) tcpip = ioread32(&hw->reg->TCPIP_ACC); - if (netdev->features & NETIF_F_RXCSUM) { - tcpip &= ~PCH_GBE_RX_TCPIPACC_OFF; - tcpip |= PCH_GBE_RX_TCPIPACC_EN; - } else { - tcpip |= PCH_GBE_RX_TCPIPACC_OFF; - tcpip &= ~PCH_GBE_RX_TCPIPACC_EN; - } + tcpip |= PCH_GBE_RX_TCPIPACC_OFF; + tcpip &= ~PCH_GBE_RX_TCPIPACC_EN; iowrite32(tcpip, &hw->reg->TCPIP_ACC); return; } @@ -1090,6 +1120,35 @@ void pch_gbe_update_stats(struct pch_gbe_adapter *adapter) spin_unlock_irqrestore(&adapter->stats_lock, flags); } +static void pch_gbe_stop_receive(struct pch_gbe_adapter *adapter) +{ + struct pch_gbe_hw *hw = &adapter->hw; + u32 rxdma; + u16 value; + int ret; + + /* Disable Receive DMA */ + rxdma = ioread32(&hw->reg->DMA_CTRL); + rxdma &= ~PCH_GBE_RX_DMA_EN; + iowrite32(rxdma, &hw->reg->DMA_CTRL); + /* Wait Rx DMA BUS is IDLE */ + ret = pch_gbe_wait_clr_bit_irq(&hw->reg->RX_DMA_ST, PCH_GBE_IDLE_CHECK); + if (ret) { + /* Disable Bus master */ + pci_read_config_word(adapter->pdev, PCI_COMMAND, &value); + value &= ~PCI_COMMAND_MASTER; + pci_write_config_word(adapter->pdev, PCI_COMMAND, value); + /* Stop Receive */ + pch_gbe_mac_reset_rx(hw); + /* Enable Bus master */ + value |= PCI_COMMAND_MASTER; + pci_write_config_word(adapter->pdev, PCI_COMMAND, value); + } else { + /* Stop Receive */ + pch_gbe_mac_reset_rx(hw); + } +} + static void pch_gbe_start_receive(struct pch_gbe_hw *hw) { u32 rxdma; @@ -1129,7 +1188,15 @@ static irqreturn_t pch_gbe_intr(int irq, void *data) if (int_st & PCH_GBE_INT_RX_FRAME_ERR) adapter->stats.intr_rx_frame_err_count++; if (int_st & PCH_GBE_INT_RX_FIFO_ERR) - adapter->stats.intr_rx_fifo_err_count++; + if (!adapter->rx_stop_flag) { + adapter->stats.intr_rx_fifo_err_count++; + pr_debug("Rx fifo over run\n"); + adapter->rx_stop_flag = true; + int_en = ioread32(&hw->reg->INT_EN); + iowrite32((int_en & ~PCH_GBE_INT_RX_FIFO_ERR), + &hw->reg->INT_EN); + pch_gbe_stop_receive(adapter); + } if (int_st & PCH_GBE_INT_RX_DMA_ERR) adapter->stats.intr_rx_dma_err_count++; if (int_st & PCH_GBE_INT_TX_FIFO_ERR) @@ -1141,7 +1208,7 @@ static irqreturn_t pch_gbe_intr(int irq, void *data) /* When Rx descriptor is empty */ if ((int_st & PCH_GBE_INT_RX_DSC_EMP)) { adapter->stats.intr_rx_dsc_empty_count++; - pr_err("Rx descriptor is empty\n"); + pr_debug("Rx descriptor is empty\n"); int_en = ioread32(&hw->reg->INT_EN); iowrite32((int_en & ~PCH_GBE_INT_RX_DSC_EMP), &hw->reg->INT_EN); if (hw->mac.tx_fc_enable) { @@ -1191,29 +1258,23 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter, unsigned int i; unsigned int bufsz; - bufsz = adapter->rx_buffer_len + PCH_GBE_DMA_ALIGN; + bufsz = adapter->rx_buffer_len + NET_IP_ALIGN; i = rx_ring->next_to_use; while ((cleaned_count--)) { buffer_info = &rx_ring->buffer_info[i]; - skb = buffer_info->skb; - if (skb) { - skb_trim(skb, 0); - } else { - skb = netdev_alloc_skb(netdev, bufsz); - if (unlikely(!skb)) { - /* Better luck next round */ - adapter->stats.rx_alloc_buff_failed++; - break; - } - /* 64byte align */ - skb_reserve(skb, PCH_GBE_DMA_ALIGN); - - buffer_info->skb = skb; - buffer_info->length = adapter->rx_buffer_len; + skb = netdev_alloc_skb(netdev, bufsz); + if (unlikely(!skb)) { + /* Better luck next round */ + adapter->stats.rx_alloc_buff_failed++; + break; } + /* align */ + skb_reserve(skb, NET_IP_ALIGN); + buffer_info->skb = skb; + buffer_info->dma = dma_map_single(&pdev->dev, - skb->data, + buffer_info->rx_buffer, buffer_info->length, DMA_FROM_DEVICE); if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { @@ -1246,6 +1307,36 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter, return; } +static int +pch_gbe_alloc_rx_buffers_pool(struct pch_gbe_adapter *adapter, + struct pch_gbe_rx_ring *rx_ring, int cleaned_count) +{ + struct pci_dev *pdev = adapter->pdev; + struct pch_gbe_buffer *buffer_info; + unsigned int i; + unsigned int bufsz; + unsigned int size; + + bufsz = adapter->rx_buffer_len; + + size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY; + rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size, + &rx_ring->rx_buff_pool_logic, + GFP_KERNEL); + if (!rx_ring->rx_buff_pool) { + pr_err("Unable to allocate memory for the receive poll buffer\n"); + return -ENOMEM; + } + memset(rx_ring->rx_buff_pool, 0, size); + rx_ring->rx_buff_pool_size = size; + for (i = 0; i < rx_ring->count; i++) { + buffer_info = &rx_ring->buffer_info[i]; + buffer_info->rx_buffer = rx_ring->rx_buff_pool + bufsz * i; + buffer_info->length = bufsz; + } + return 0; +} + /** * pch_gbe_alloc_tx_buffers - Allocate transmit buffers * @adapter: Board private structure @@ -1386,7 +1477,7 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, unsigned int i; unsigned int cleaned_count = 0; bool cleaned = false; - struct sk_buff *skb, *new_skb; + struct sk_buff *skb; u8 dma_status; u16 gbec_status; u32 tcp_ip_status; @@ -1407,13 +1498,12 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, rx_desc->gbec_status = DSC_INIT16; buffer_info = &rx_ring->buffer_info[i]; skb = buffer_info->skb; + buffer_info->skb = NULL; /* unmap dma */ dma_unmap_single(&pdev->dev, buffer_info->dma, buffer_info->length, DMA_FROM_DEVICE); buffer_info->mapped = false; - /* Prefetch the packet */ - prefetch(skb->data); pr_debug("RxDecNo = 0x%04x Status[DMA:0x%02x GBE:0x%04x " "TCP:0x%08x] BufInf = 0x%p\n", @@ -1433,70 +1523,16 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, pr_err("Receive CRC Error\n"); } else { /* get receive length */ - /* length convert[-3] */ - length = (rx_desc->rx_words_eob) - 3; + /* length convert[-3], length includes FCS length */ + length = (rx_desc->rx_words_eob) - 3 - ETH_FCS_LEN; + if (rx_desc->rx_words_eob & 0x02) + length = length - 4; + /* + * buffer_info->rx_buffer: [Header:14][payload] + * skb->data: [Reserve:2][Header:14][payload] + */ + memcpy(skb->data, buffer_info->rx_buffer, length); - /* Decide the data conversion method */ - if (!(netdev->features & NETIF_F_RXCSUM)) { - /* [Header:14][payload] */ - if (NET_IP_ALIGN) { - /* Because alignment differs, - * the new_skb is newly allocated, - * and data is copied to new_skb.*/ - new_skb = netdev_alloc_skb(netdev, - length + NET_IP_ALIGN); - if (!new_skb) { - /* dorrop error */ - pr_err("New skb allocation " - "Error\n"); - goto dorrop; - } - skb_reserve(new_skb, NET_IP_ALIGN); - memcpy(new_skb->data, skb->data, - length); - skb = new_skb; - } else { - /* DMA buffer is used as SKB as it is.*/ - buffer_info->skb = NULL; - } - } else { - /* [Header:14][padding:2][payload] */ - /* The length includes padding length */ - length = length - PCH_GBE_DMA_PADDING; - if ((length < copybreak) || - (NET_IP_ALIGN != PCH_GBE_DMA_PADDING)) { - /* Because alignment differs, - * the new_skb is newly allocated, - * and data is copied to new_skb. - * Padding data is deleted - * at the time of a copy.*/ - new_skb = netdev_alloc_skb(netdev, - length + NET_IP_ALIGN); - if (!new_skb) { - /* dorrop error */ - pr_err("New skb allocation " - "Error\n"); - goto dorrop; - } - skb_reserve(new_skb, NET_IP_ALIGN); - memcpy(new_skb->data, skb->data, - ETH_HLEN); - memcpy(&new_skb->data[ETH_HLEN], - &skb->data[ETH_HLEN + - PCH_GBE_DMA_PADDING], - length - ETH_HLEN); - skb = new_skb; - } else { - /* Padding data is deleted - * by moving header data.*/ - memmove(&skb->data[PCH_GBE_DMA_PADDING], - &skb->data[0], ETH_HLEN); - skb_reserve(skb, NET_IP_ALIGN); - buffer_info->skb = NULL; - } - } - /* The length includes FCS length */ - length = length - ETH_FCS_LEN; /* update status of driver */ adapter->stats.rx_bytes += length; adapter->stats.rx_packets++; @@ -1515,7 +1551,6 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, pr_debug("Receive skb->ip_summed: %d length: %d\n", skb->ip_summed, length); } -dorrop: /* return some buffers to hardware, one at a time is too slow */ if (unlikely(cleaned_count >= PCH_GBE_RX_BUFFER_WRITE)) { pch_gbe_alloc_rx_buffers(adapter, rx_ring, @@ -1720,6 +1755,11 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter) pr_err("Error: can't bring device up\n"); return err; } + err = pch_gbe_alloc_rx_buffers_pool(adapter, rx_ring, rx_ring->count); + if (err) { + pr_err("Error: can't bring device up\n"); + return err; + } pch_gbe_alloc_tx_buffers(adapter, tx_ring); pch_gbe_alloc_rx_buffers(adapter, rx_ring, rx_ring->count); adapter->tx_queue_len = netdev->tx_queue_len; @@ -1741,6 +1781,7 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter) void pch_gbe_down(struct pch_gbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring; /* signal that we're down so the interrupt handler does not * reschedule our watchdog timer */ @@ -1759,6 +1800,12 @@ void pch_gbe_down(struct pch_gbe_adapter *adapter) pch_gbe_reset(adapter); pch_gbe_clean_tx_ring(adapter, adapter->tx_ring); pch_gbe_clean_rx_ring(adapter, adapter->rx_ring); + + pci_free_consistent(adapter->pdev, rx_ring->rx_buff_pool_size, + rx_ring->rx_buff_pool, rx_ring->rx_buff_pool_logic); + rx_ring->rx_buff_pool_logic = 0; + rx_ring->rx_buff_pool_size = 0; + rx_ring->rx_buff_pool = NULL; } /** @@ -2011,6 +2058,8 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); int max_frame; + unsigned long old_rx_buffer_len = adapter->rx_buffer_len; + int err; max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) || @@ -2025,14 +2074,24 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu) else if (max_frame <= PCH_GBE_FRAME_SIZE_8192) adapter->rx_buffer_len = PCH_GBE_FRAME_SIZE_8192; else - adapter->rx_buffer_len = PCH_GBE_MAX_JUMBO_FRAME_SIZE; - netdev->mtu = new_mtu; - adapter->hw.mac.max_frame_size = max_frame; + adapter->rx_buffer_len = PCH_GBE_MAX_RX_BUFFER_SIZE; - if (netif_running(netdev)) - pch_gbe_reinit_locked(adapter); - else + if (netif_running(netdev)) { + pch_gbe_down(adapter); + err = pch_gbe_up(adapter); + if (err) { + adapter->rx_buffer_len = old_rx_buffer_len; + pch_gbe_up(adapter); + return -ENOMEM; + } else { + netdev->mtu = new_mtu; + adapter->hw.mac.max_frame_size = max_frame; + } + } else { pch_gbe_reset(adapter); + netdev->mtu = new_mtu; + adapter->hw.mac.max_frame_size = max_frame; + } pr_debug("max_frame : %d rx_buffer_len : %d mtu : %d max_frame_size : %d\n", max_frame, (u32) adapter->rx_buffer_len, netdev->mtu, @@ -2110,6 +2169,7 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget) int work_done = 0; bool poll_end_flag = false; bool cleaned = false; + u32 int_en; pr_debug("budget : %d\n", budget); @@ -2117,8 +2177,15 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget) if (!netif_carrier_ok(netdev)) { poll_end_flag = true; } else { - cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring); pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget); + if (adapter->rx_stop_flag) { + adapter->rx_stop_flag = false; + pch_gbe_start_receive(&adapter->hw); + int_en = ioread32(&adapter->hw.reg->INT_EN); + iowrite32((int_en | PCH_GBE_INT_RX_FIFO_ERR), + &adapter->hw.reg->INT_EN); + } + cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring); if (cleaned) work_done = budget; From 7756332f5b64c9c1535712b9679792e8bd4f0019 Mon Sep 17 00:00:00 2001 From: Toshiharu Okada Date: Thu, 1 Sep 2011 14:20:09 +0000 Subject: [PATCH 50/62] pch_gbe: support ML7831 IOH Support new device OKI SEMICONDUCTOR ML7831 IOH(Input/Output Hub) ML7831 is for general purpose use. ML7831 is companion chip for Intel Atom E6xx series. ML7831 is completely compatible for Intel EG20T PCH. Signed-off-by: Toshiharu Okada Signed-off-by: David S. Miller --- drivers/net/Kconfig | 11 ++++++----- drivers/net/pch_gbe/pch_gbe_main.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8d0314dbd946..a44874e24f2a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2535,7 +2535,7 @@ config S6GMAC source "drivers/net/stmmac/Kconfig" config PCH_GBE - tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GbE" + tristate "Intel EG20T PCH/OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE" depends on PCI select MII ---help--- @@ -2548,10 +2548,11 @@ config PCH_GBE This driver enables Gigabit Ethernet function. This driver also can be used for OKI SEMICONDUCTOR IOH(Input/ - Output Hub), ML7223. - ML7223 IOH is for MP(Media Phone) use. - ML7223 is companion chip for Intel Atom E6xx series. - ML7223 is completely compatible for Intel EG20T PCH. + Output Hub), ML7223/ML7831. + ML7223 IOH is for MP(Media Phone) use. ML7831 IOH is for general + purpose use. + ML7223/ML7831 is companion chip for Intel Atom E6xx series. + ML7223/ML7831 is completely compatible for Intel EG20T PCH. config FTGMAC100 tristate "Faraday FTGMAC100 Gigabit Ethernet support" diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index 39ce0ee44ad7..567ff10889be 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -39,6 +39,9 @@ const char pch_driver_version[] = DRV_VERSION; #define PCI_VENDOR_ID_ROHM 0x10db #define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 +/* Macros for ML7831 */ +#define PCI_DEVICE_ID_ROHM_ML7831_GBE 0x8802 + #define PCH_GBE_TX_WEIGHT 64 #define PCH_GBE_RX_WEIGHT 64 #define PCH_GBE_RX_BUFFER_WRITE 16 @@ -2526,6 +2529,13 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gbe_pcidev_id) = { .class = (PCI_CLASS_NETWORK_ETHERNET << 8), .class_mask = (0xFFFF00) }, + {.vendor = PCI_VENDOR_ID_ROHM, + .device = PCI_DEVICE_ID_ROHM_ML7831_GBE, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = (PCI_CLASS_NETWORK_ETHERNET << 8), + .class_mask = (0xFFFF00) + }, /* required last entry */ {0} }; From 33a48ab105a75d37021e422a0a3283241099b142 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 7 Sep 2011 14:41:03 +0000 Subject: [PATCH 51/62] ibmveth: Fix DMA unmap error Commit 6e8ab30ec677 (ibmveth: Add scatter-gather support) introduced a DMA mapping API inconsistency resulting in dma_unmap_page getting called on memory mapped via dma_map_single. This was seen when CONFIG_DMA_API_DEBUG was enabled. Fix up this API usage inconsistency. Signed-off-by: Brian King Acked-by: Anton Blanchard Cc: # v2.6.37+ Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 3e6679269400..dcf65d8f10d2 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1026,7 +1026,12 @@ retry_bounce: netdev->stats.tx_bytes += skb->len; } - for (i = 0; i < skb_shinfo(skb)->nr_frags + 1; i++) + dma_unmap_single(&adapter->vdev->dev, + descs[0].fields.address, + descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); + + for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); From b93da27f5234198433345e40b39ff59797bc6f6e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 7 Sep 2011 14:41:04 +0000 Subject: [PATCH 52/62] ibmveth: Fix issue with DMA mapping failure descs[].fields.address is 32bit which truncates any dma mapping errors so dma_mapping_error() fails to catch it. Use a dma_addr_t to do the comparison. With this patch I was able to transfer many gigabytes of data with IOMMU fault injection set at 10% probability. Signed-off-by: Anton Blanchard Cc: # v2.6.37+ Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index dcf65d8f10d2..5b8b411d5a80 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -930,6 +930,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, union ibmveth_buf_desc descs[6]; int last, i; int force_bounce = 0; + dma_addr_t dma_addr; /* * veth handles a maximum of 6 segments including the header, so @@ -994,17 +995,16 @@ retry_bounce: } /* Map the header */ - descs[0].fields.address = dma_map_single(&adapter->vdev->dev, skb->data, - skb_headlen(skb), - DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->vdev->dev, descs[0].fields.address)) + dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) goto map_failed; descs[0].fields.flags_len = desc_flags | skb_headlen(skb); + descs[0].fields.address = dma_addr; /* Map the frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - unsigned long dma_addr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr = dma_map_page(&adapter->vdev->dev, frag->page, From 91aae1e5c407d4fc79f6983e6c6ba04756c004cb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 7 Sep 2011 14:41:05 +0000 Subject: [PATCH 53/62] ibmveth: Checksum offload is always disabled Commit b9367bf3ee6d (net: ibmveth: convert to hw_features) reversed a check in ibmveth_set_csum_offload that results in checksum offload never being enabled. Signed-off-by: Anton Blanchard Cc: # 3.0+ Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 5b8b411d5a80..07830f918aed 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -812,7 +812,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) } else adapter->fw_ipv6_csum_support = data; - if (ret != H_SUCCESS || ret6 != H_SUCCESS) + if (ret == H_SUCCESS || ret6 == H_SUCCESS) adapter->rx_csum = data; else rc1 = -EIO; From fb82fd204b6e6c67661bbd37df032edafb2da56e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 7 Sep 2011 14:41:06 +0000 Subject: [PATCH 54/62] ibmveth: Fix checksum offload failure handling Fix a number of issues in ibmveth_set_csum_offload: - set_attr6 and clr_attr6 may be used uninitialised - We store the result of the IPV4 checksum change in ret but overwrite it in a couple of places before checking it again later. Add ret4 to make it obvious what we are doing. - We weren't clearing the NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM flags if the enable of that hypervisor feature failed. Signed-off-by: Anton Blanchard Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 07830f918aed..8dd5fccef725 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -757,7 +757,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) struct ibmveth_adapter *adapter = netdev_priv(dev); unsigned long set_attr, clr_attr, ret_attr; unsigned long set_attr6, clr_attr6; - long ret, ret6; + long ret, ret4, ret6; int rc1 = 0, rc2 = 0; int restart = 0; @@ -770,6 +770,8 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) set_attr = 0; clr_attr = 0; + set_attr6 = 0; + clr_attr6 = 0; if (data) { set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM; @@ -784,16 +786,20 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) && !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) && (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) { - ret = h_illan_attributes(adapter->vdev->unit_address, clr_attr, + ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr, set_attr, &ret_attr); - if (ret != H_SUCCESS) { + if (ret4 != H_SUCCESS) { netdev_err(dev, "unable to change IPv4 checksum " "offload settings. %d rc=%ld\n", - data, ret); + data, ret4); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr, clr_attr, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IP_CSUM; - ret = h_illan_attributes(adapter->vdev->unit_address, - set_attr, clr_attr, &ret_attr); } else { adapter->fw_ipv4_csum_support = data; } @@ -804,15 +810,18 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) if (ret6 != H_SUCCESS) { netdev_err(dev, "unable to change IPv6 checksum " "offload settings. %d rc=%ld\n", - data, ret); + data, ret6); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr6, clr_attr6, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IPV6_CSUM; - ret = h_illan_attributes(adapter->vdev->unit_address, - set_attr6, clr_attr6, - &ret_attr); } else adapter->fw_ipv6_csum_support = data; - if (ret == H_SUCCESS || ret6 == H_SUCCESS) + if (ret4 == H_SUCCESS || ret6 == H_SUCCESS) adapter->rx_csum = data; else rc1 = -EIO; From d5ccd496601b8776a516d167a6485754575dc38f Mon Sep 17 00:00:00 2001 From: Max Matveev Date: Mon, 29 Aug 2011 21:02:24 +0000 Subject: [PATCH 55/62] sctp: deal with multiple COOKIE_ECHO chunks Attempt to reduce the number of IP packets emitted in response to single SCTP packet (2e3216cd) introduced a complication - if a packet contains two COOKIE_ECHO chunks and nothing else then SCTP state machine corks the socket while processing first COOKIE_ECHO and then loses the association and forgets to uncork the socket. To deal with the issue add new SCTP command which can be used to set association explictly. Use this new command when processing second COOKIE_ECHO chunk to restore the context for SCTP state machine. Signed-off-by: Max Matveev Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 + net/sctp/sm_sideeffect.c | 5 +++++ net/sctp/sm_statefuns.c | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6506458ccd33..712b3bebeda7 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -109,6 +109,7 @@ typedef enum { SCTP_CMD_SEND_MSG, /* Send the whole use message */ SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ + SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST } sctp_verb_t; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 167c880cf8da..76388b083f28 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; + + case SCTP_CMD_SET_ASOC: + asoc = cmd->obj.asoc; + break; + default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 49b847b00f99..a0f31e6c1c63 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2047,6 +2047,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); + /* Restore association pointer to provide SCTP command interpeter + * with a valid context in case it needs to manipulate + * the queues */ + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, + SCTP_ASOC((struct sctp_association *)asoc)); + return retval; nomem: From 4fb66b8210c7d7147b164e19b1b44da916a75691 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Sep 2011 02:19:23 +0000 Subject: [PATCH 56/62] caif: fix a potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bd30ce4bc0b7 (caif: Use RCU instead of spin-lock in caif_dev.c) added a potential NULL dereference in case alloc_percpu() fails. caif_device_alloc() can also use GFP_KERNEL instead of GFP_ATOMIC. Signed-off-by: Eric Dumazet CC: Sjur Brændeland Acked-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 7c2fa0a08148..7f9ac0742d19 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -93,10 +93,14 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev) caifdevs = caif_device_list(dev_net(dev)); BUG_ON(!caifdevs); - caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); + caifd = kzalloc(sizeof(*caifd), GFP_KERNEL); if (!caifd) return NULL; caifd->pcpu_refcnt = alloc_percpu(int); + if (!caifd->pcpu_refcnt) { + kfree(caifd); + return NULL; + } caifd->netdev = dev; dev_hold(dev); return caifd; From 19c1ea14c930db5e9c0cd7c3c6f4d01457dfcd69 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 4 Sep 2011 20:24:20 +0000 Subject: [PATCH 57/62] ipv4: Fix fib_info->fib_metrics leak Commit 4670994d(net,rcu: convert call_rcu(fc_rport_free_rcu) to kfree_rcu()) introduced a memory leak. This patch reverts it. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 33e2c35b74b7..80106d89d548 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }; /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); + kfree(fi); +} void free_fib_info(struct fib_info *fi) { @@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi) } endfor_nexthops(fi); fib_info_cnt--; release_net(fi->fib_net); - kfree_rcu(fi, rcu); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) From 728871bc05afc8ff310b17dba3e57a2472792b13 Mon Sep 17 00:00:00 2001 From: David Ward Date: Mon, 5 Sep 2011 16:47:23 +0000 Subject: [PATCH 58/62] net: Align AF-specific flowi structs to long AF-specific flowi structs are now passed to flow_key_compare, which must also be aligned to a long. Signed-off-by: David Ward Signed-off-by: David S. Miller --- include/net/flow.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index 78113daadd63..2ec377d9ab9f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -68,7 +68,7 @@ struct flowi4 { #define fl4_ipsec_spi uli.spi #define fl4_mh_type uli.mht.type #define fl4_gre_key uli.gre_key -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, @@ -112,7 +112,7 @@ struct flowi6 { #define fl6_ipsec_spi uli.spi #define fl6_mh_type uli.mht.type #define fl6_gre_key uli.gre_key -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowidn { struct flowi_common __fl_common; @@ -127,7 +127,7 @@ struct flowidn { union flowi_uli uli; #define fld_sport uli.ports.sport #define fld_dport uli.ports.dport -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowi { union { From aa1c366e4febc7f5c2b84958a2dd7cd70e28f9d0 Mon Sep 17 00:00:00 2001 From: dpward Date: Mon, 5 Sep 2011 16:47:24 +0000 Subject: [PATCH 59/62] net: Handle different key sizes between address families in flow cache With the conversion of struct flowi to a union of AF-specific structs, some operations on the flow cache need to account for the exact size of the key. Signed-off-by: David Ward Signed-off-by: David S. Miller --- include/net/flow.h | 19 +++++++++++++++++++ net/core/flow.c | 31 +++++++++++++++++-------------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index 2ec377d9ab9f..a09447749e2d 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -7,6 +7,7 @@ #ifndef _NET_FLOW_H #define _NET_FLOW_H +#include #include #include @@ -161,6 +162,24 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) return container_of(fldn, struct flowi, u.dn); } +typedef unsigned long flow_compare_t; + +static inline size_t flow_key_size(u16 family) +{ + switch (family) { + case AF_INET: + BUILD_BUG_ON(sizeof(struct flowi4) % sizeof(flow_compare_t)); + return sizeof(struct flowi4) / sizeof(flow_compare_t); + case AF_INET6: + BUILD_BUG_ON(sizeof(struct flowi6) % sizeof(flow_compare_t)); + return sizeof(struct flowi6) / sizeof(flow_compare_t); + case AF_DECnet: + BUILD_BUG_ON(sizeof(struct flowidn) % sizeof(flow_compare_t)); + return sizeof(struct flowidn) / sizeof(flow_compare_t); + } + return 0; +} + #define FLOW_DIR_IN 0 #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 diff --git a/net/core/flow.c b/net/core/flow.c index 47b6d26c2afb..555a456efb07 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -173,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -216,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -223,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -231,12 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); + hash = flow_hash_code(fc, fcp, key, keysize); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { if (tfle->net == net && tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -251,7 +254,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; From 9566042ef84fd2a282d00d3163074ec9b3f93a70 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 16 Sep 2011 09:09:50 +0000 Subject: [PATCH 60/62] IRDA: Fix global type conflicts in net/irda/irsysctl.c v2 The externs here didn't agree with the declarations in qos.c. Better would be probably to move this into a header, but since it's common practice to have naked externs with sysctls I left it for now. Cc: samuel@sortiz.org Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- net/irda/irsysctl.c | 6 +++--- net/irda/qos.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index d0b70dadf73b..2615ffc8e785 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -40,9 +40,9 @@ extern int sysctl_slot_timeout; extern int sysctl_fast_poll_increase; extern char sysctl_devname[]; extern int sysctl_max_baud_rate; -extern int sysctl_min_tx_turn_time; -extern int sysctl_max_tx_data_size; -extern int sysctl_max_tx_window; +extern unsigned int sysctl_min_tx_turn_time; +extern unsigned int sysctl_max_tx_data_size; +extern unsigned int sysctl_max_tx_window; extern int sysctl_max_noreply_time; extern int sysctl_warn_noreply_time; extern int sysctl_lap_keepalive_time; diff --git a/net/irda/qos.c b/net/irda/qos.c index 1b51bcf42394..4369f7f41bcb 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -60,7 +60,7 @@ int sysctl_max_noreply_time = 12; * Default is 10us which means using the unmodified value given by the * peer except if it's 0 (0 is likely a bug in the other stack). */ -unsigned sysctl_min_tx_turn_time = 10; +unsigned int sysctl_min_tx_turn_time = 10; /* * Maximum data size to be used in transmission in payload of LAP frame. * There is a bit of confusion in the IrDA spec : @@ -75,13 +75,13 @@ unsigned sysctl_min_tx_turn_time = 10; * bytes frames or all negotiated frame sizes, but you can use the sysctl * to play with this value anyway. * Jean II */ -unsigned sysctl_max_tx_data_size = 2042; +unsigned int sysctl_max_tx_data_size = 2042; /* * Maximum transmit window, i.e. number of LAP frames between turn-around. * This allow to override what the peer told us. Some peers are buggy and * don't always support what they tell us. * Jean II */ -unsigned sysctl_max_tx_window = 7; +unsigned int sysctl_max_tx_window = 7; static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get); static int irlap_param_link_disconnect(void *instance, irda_param_t *parm, From 34b8686d278f00fb16234e74be44c253d6d6b676 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 16 Sep 2011 07:57:43 +0000 Subject: [PATCH 61/62] can: ti_hecc: include linux/io.h This fixes a build breakage for OMAP3 boards. Signed-off-by: Daniel Mack Cc: Wolfgang Grandegger Cc: netdev@vger.kernel.org Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- drivers/net/can/ti_hecc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index a81249246ece..2adc294f512a 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include From 8e2ec639173f325977818c45011ee176ef2b11f6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 5 Sep 2011 21:34:30 +0000 Subject: [PATCH 62/62] ipv6: don't use inetpeer to store metrics for routes. Current IPv6 implementation uses inetpeer to store metrics for routes. The problem of inetpeer is that it doesn't take subnet prefix length in to consideration. If two routes have the same address but different prefix length, they share same inetpeer. So changing metrics of one route also affects the other. The fix is to allocate separate metrics storage for each route. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv6/route.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9e69eb0ec6dd..1250f9020670 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -104,6 +104,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; + if (!(rt->dst.flags & DST_HOST)) + return NULL; + if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); @@ -252,6 +255,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct inet_peer *peer = rt->rt6i_peer; + if (!(rt->dst.flags & DST_HOST)) + dst_destroy_metrics_generic(dst); + if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); @@ -723,9 +729,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, ipv6_addr_copy(&rt->rt6i_gateway, daddr); } - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -775,9 +779,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct rt6_info *rt = ip6_rt_copy(ort, daddr); if (rt) { - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; @@ -1078,12 +1080,15 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, neigh = NULL; } - rt->rt6i_idev = idev; + rt->dst.flags |= DST_HOST; + rt->dst.output = ip6_output; dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); - rt->dst.output = ip6_output; + + ipv6_addr_copy(&rt->rt6i_dst.addr, addr); + rt->rt6i_dst.plen = 128; + rt->rt6i_idev = idev; spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; @@ -1261,6 +1266,14 @@ int ip6_route_add(struct fib6_config *cfg) if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; + if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { + u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!metrics) { + err = -ENOMEM; + goto out; + } + dst_init_metrics(&rt->dst, metrics, 0); + } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1607,9 +1620,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_dst.plen = 128; - nrt->dst.flags |= DST_HOST; - ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); @@ -1754,9 +1764,10 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, if (rt) { rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; + rt->dst.flags |= DST_HOST; ipv6_addr_copy(&rt->rt6i_dst.addr, dest); - rt->rt6i_dst.plen = ort->rt6i_dst.plen; + rt->rt6i_dst.plen = 128; dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev;