From 1dace8c801ac531022bd31a7316a6b4351837617 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 4 Mar 2010 16:10:14 -0500 Subject: [PATCH 01/38] vhost: fix error path in vhost_net_set_backend An error could cause vhost_net_set_backend to exit without unlocking vq->mutex. Fix this. Signed-off-by: Jeff Dike Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ad37da2b6cb5..fcafb6b170fb 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -508,12 +508,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) /* Verify that ring has been setup correctly. */ if (!vhost_vq_access_ok(vq)) { r = -EFAULT; - goto err; + goto err_vq; } sock = get_socket(fd); if (IS_ERR(sock)) { r = PTR_ERR(sock); - goto err; + goto err_vq; } /* start polling new socket */ @@ -524,12 +524,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); rcu_assign_pointer(vq->private_data, sock); vhost_net_enable_vq(n, vq); - mutex_unlock(&vq->mutex); done: if (oldsock) { vhost_net_flush_vq(n, index); fput(oldsock->file); } + +err_vq: + mutex_unlock(&vq->mutex); err: mutex_unlock(&n->dev.mutex); return r; From 3f60ebc9d6291863652d564bacc430629271e6a9 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Thu, 11 Mar 2010 17:45:26 +0200 Subject: [PATCH 02/38] wl1251: fix potential crash In case debugfs does not init for some reason (or is disabled on older kernels) driver does not allocate stats.fw_stats structure, but tries to clear it later and trips on a NULL pointer: Unable to handle kernel NULL pointer dereference at virtual address 00000000 PC is at __memzero+0x24/0x80 Backtrace: [] (wl1251_debugfs_reset+0x0/0x30 [wl1251]) [] (wl1251_op_stop+0x0/0x12c [wl1251]) [] (ieee80211_stop_device+0x0/0x74 [mac80211]) [] (ieee80211_stop+0x0/0x4ac [mac80211]) [] (dev_close+0x0/0xb4) [] (dev_change_flags+0x0/0x184) [] (devinet_ioctl+0x0/0x704) [] (inet_ioctl+0x0/0x100) Add a NULL pointer check to fix this. Signed-off-by: Grazvydas Ignotas Acked-by: Kalle Valo Cc: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/wl1251_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/wl12xx/wl1251_debugfs.c b/drivers/net/wireless/wl12xx/wl1251_debugfs.c index 0ccba57fb9fb..05e4d68eb4cc 100644 --- a/drivers/net/wireless/wl12xx/wl1251_debugfs.c +++ b/drivers/net/wireless/wl12xx/wl1251_debugfs.c @@ -466,7 +466,8 @@ out: void wl1251_debugfs_reset(struct wl1251 *wl) { - memset(wl->stats.fw_stats, 0, sizeof(*wl->stats.fw_stats)); + if (wl->stats.fw_stats != NULL) + memset(wl->stats.fw_stats, 0, sizeof(*wl->stats.fw_stats)); wl->stats.retry_count = 0; wl->stats.excessive_retries = 0; } From 4fdec031b9169b3c17938b9c4168f099f457169c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 12 Mar 2010 04:02:43 +0100 Subject: [PATCH 03/38] ath9k: fix BUG_ON triggered by PAE frames When I initially stumbled upon sequence number problems with PAE frames in ath9k, I submitted a patch to remove all special cases for PAE frames and let them go through the normal transmit path. Out of concern about crypto incompatibility issues, this change was merged instead: commit 6c8afef551fef87a3bf24f8a74c69a7f2f72fc82 Author: Sujith Date: Tue Feb 9 10:07:00 2010 +0530 ath9k: Fix sequence numbers for PAE frames After a lot of testing, I'm able to reliably trigger a driver crash on rekeying with current versions with this change in place. It seems that the driver does not support sending out regular MPDUs with the same TID while an A-MPDU session is active. This leads to duplicate entries in the TID Tx buffer, which hits the following BUG_ON in ath_tx_addto_baw(): index = ATH_BA_INDEX(tid->seq_start, bf->bf_seqno); cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1); BUG_ON(tid->tx_buf[cindex] != NULL); I believe until we actually have a reproducible case of an incompatibility with another AP using no PAE special cases, we should simply get rid of this mess. This patch completely fixes my crash issues in STA mode and makes it stay connected without throughput drops or connectivity issues even when the AP is configured to a very short group rekey interval. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/xmit.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index b2c8207f7bc1..294b486bc3ed 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1353,25 +1353,6 @@ static enum ath9k_pkt_type get_hw_packet_type(struct sk_buff *skb) return htype; } -static bool is_pae(struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr; - __le16 fc; - - hdr = (struct ieee80211_hdr *)skb->data; - fc = hdr->frame_control; - - if (ieee80211_is_data(fc)) { - if (ieee80211_is_nullfunc(fc) || - /* Port Access Entity (IEEE 802.1X) */ - (skb->protocol == cpu_to_be16(ETH_P_PAE))) { - return true; - } - } - - return false; -} - static int get_hw_crypto_keytype(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); @@ -1696,7 +1677,7 @@ static void ath_tx_start_dma(struct ath_softc *sc, struct ath_buf *bf, goto tx_done; } - if ((tx_info->flags & IEEE80211_TX_CTL_AMPDU) && !is_pae(skb)) { + if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) { /* * Try aggregation if it's a unicast data frame * and the destination is HT capable. From c8406ea8fa1adde8dc5400127281d497bbcdb84a Mon Sep 17 00:00:00 2001 From: Adel Gadllah Date: Sun, 14 Mar 2010 19:16:25 +0100 Subject: [PATCH 04/38] iwlwifi: Silence tfds_in_queue message Commit a239a8b47cc0e5e6d7416a89f340beac06d5edaa introduced a noisy message, that fills up the log very fast. The error seems not to be fatal (the connection is stable and performance is ok), so make it IWL_DEBUG_TX rather than IWL_ERR. Signed-off-by: Adel Gadllah Cc: stable@kernel.org Acked-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 1ed5206721ec..8c12311dbb0a 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -124,7 +124,7 @@ void iwl_free_tfds_in_queue(struct iwl_priv *priv, if (priv->stations[sta_id].tid[tid].tfds_in_queue >= freed) priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; else { - IWL_ERR(priv, "free more than tfds_in_queue (%u:%d)\n", + IWL_DEBUG_TX(priv, "free more than tfds_in_queue (%u:%d)\n", priv->stations[sta_id].tid[tid].tfds_in_queue, freed); priv->stations[sta_id].tid[tid].tfds_in_queue = 0; From 0e255572121180c900e24e33b87047abd8153cce Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 8 Mar 2010 23:24:22 +0200 Subject: [PATCH 05/38] vhost: fix interrupt mitigation with raw sockets A thinko in code means we never trigger interrupt mitigation. Fix this. Reported-by: Juan Quintela Reported-by: Unai Uribarri Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index fcafb6b170fb..a6a88dfd5029 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -125,7 +125,7 @@ static void handle_tx(struct vhost_net *net) mutex_lock(&vq->mutex); vhost_disable_notify(vq); - if (wmem < sock->sk->sk_sndbuf * 2) + if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); hdr_size = vq->hdr_size; From 535297a6ae4c3b7a0562e71fac15c213eeec68e7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 17 Mar 2010 16:06:11 +0200 Subject: [PATCH 06/38] vhost: fix error handling in vring ioctls Stanse found a locking problem in vhost_set_vring: several returns from VHOST_SET_VRING_KICK, VHOST_SET_VRING_CALL, VHOST_SET_VRING_ERR with the vq->mutex held. Fix these up. Reported-by: Jiri Slaby Acked-by: Laurent Chavey Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7cd55e078794..7bd7a1e4409d 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -476,8 +476,10 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) if (r < 0) break; eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); - if (IS_ERR(eventfp)) - return PTR_ERR(eventfp); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } if (eventfp != vq->kick) { pollstop = filep = vq->kick; pollstart = vq->kick = eventfp; @@ -489,8 +491,10 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) if (r < 0) break; eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); - if (IS_ERR(eventfp)) - return PTR_ERR(eventfp); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } if (eventfp != vq->call) { filep = vq->call; ctx = vq->call_ctx; @@ -505,8 +509,10 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) if (r < 0) break; eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); - if (IS_ERR(eventfp)) - return PTR_ERR(eventfp); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } if (eventfp != vq->error) { filep = vq->error; vq->error = eventfp; From b634f87522dff87712df8bda2a6c9061954d552a Mon Sep 17 00:00:00 2001 From: Alexandra Kossovsky Date: Thu, 18 Mar 2010 20:29:24 -0700 Subject: [PATCH 07/38] tcp: Fix OOB POLLIN avoidance. From: Alexandra.Kossovsky@oktetlabs.ru Fixes kernel bugzilla #15541 Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5901010fad55..ae16f809e716 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -429,7 +429,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_seq == tp->copied_seq && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) - target--; + target++; /* Potential race condition. If read of tp below will * escape above sk->sk_state, we can be illegally awaken From 658cc524305c9759019c4430ded231f631472482 Mon Sep 17 00:00:00 2001 From: Abraham Arce Date: Tue, 16 Mar 2010 12:24:54 +0000 Subject: [PATCH 08/38] KS8851: Avoid NULL pointer in set rx mode Kernel NULL pointer dereference when setting mode for IFF_MULTICAST. Tested on SDP OMAP4430 board. ks8851 spi1.0: message enable is 0 ks8851 spi1.0: revision 0, MAC f2:f4:2f:56:37:de, IRQ 194 Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP last sysfs file: Modules linked in: CPU: 0 Not tainted (2.6.34-rc1-01039-g38d7ed1-dirty #3) PC is at ks8851_set_rx_mode+0x88/0x124 LR is at bitrev32+0x24/0x2c Backtrace: [] ? (ks8851_set_rx_mode+0x0/0x124) [] (__dev_set_rx_mode+0x0/0x90) [] (dev_mc_add+0x0/0x78) [] (igmp_group_added+0x0/0x64) [] (ip_mc_inc_group+0x0/0x150) [] (ip_mc_up+0x0/0x64) [] (inetdev_event+0x0/0x3d4) [] (notifier_call_chain+0x0/0x78) [] (__raw_notifier_call_chain+0x0/0x24) [] (raw_notifier_call_chain+0x0/0x28) [] (call_netdevice_notifiers+0x0/0x24) [] (__dev_notify_flags+0x0/0x68) [] (dev_change_flags+0x0/0x4c) [] (ip_auto_config+0x0/0xf1c) [] (do_one_initcall+0x0/0x1bc) [] (kernel_init+0x0/0x234) Code: e15130bc e1833012 e14130bc e5943000 (e5934000) ---[ end trace ed0fb00a94142792 ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Abraham Arce Signed-off-by: David S. Miller --- drivers/net/ks8851.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c index 0573e0bb4444..13cc1ca261d9 100644 --- a/drivers/net/ks8851.c +++ b/drivers/net/ks8851.c @@ -976,7 +976,6 @@ static void ks8851_set_rx_mode(struct net_device *dev) crc >>= (32 - 6); /* get top six bits */ rxctrl.mchash[crc >> 4] |= (1 << (crc & 0xf)); - mcptr = mcptr->next; } rxctrl.rxcr1 = RXCR1_RXME | RXCR1_RXPAFMA; From 17da69b8bfbe441a33a873ad5dd7d3d85800bf2b Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Wed, 17 Mar 2010 00:09:29 +0000 Subject: [PATCH 09/38] jme: Fix VLAN memory leak Fix memory leak while receiving 8021q tagged packet which is not registered by user. Signed-off-by: Guo-Fu Tseng Cc: stable@kernel.org Signed-off-by: David S. Miller --- drivers/net/jme.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/jme.c b/drivers/net/jme.c index 0f31497833df..cfc7b9824d6f 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -946,6 +946,8 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx) jme->jme_vlan_rx(skb, jme->vlgrp, le16_to_cpu(rxdesc->descwb.vlan)); NET_STAT(jme).rx_bytes += 4; + } else { + dev_kfree_skb(skb); } } else { jme->jme_rx(skb); From bf5e5360fd1df1ae429ebbd81838d7d0879797d1 Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Wed, 17 Mar 2010 00:09:30 +0000 Subject: [PATCH 10/38] jme: Protect vlgrp structure by pause RX actions. Temporary stop the RX IRQ, and disable (sync) tasklet or napi. And restore it after finished the vlgrp pointer assignment. Signed-off-by: Guo-Fu Tseng Cc: stable@kernel.org Signed-off-by: David S. Miller --- drivers/net/jme.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/net/jme.c b/drivers/net/jme.c index cfc7b9824d6f..c0b59a555384 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -2083,12 +2083,45 @@ jme_tx_timeout(struct net_device *netdev) jme_reset_link(jme); } +static inline void jme_pause_rx(struct jme_adapter *jme) +{ + atomic_dec(&jme->link_changing); + + jme_set_rx_pcc(jme, PCC_OFF); + if (test_bit(JME_FLAG_POLL, &jme->flags)) { + JME_NAPI_DISABLE(jme); + } else { + tasklet_disable(&jme->rxclean_task); + tasklet_disable(&jme->rxempty_task); + } +} + +static inline void jme_resume_rx(struct jme_adapter *jme) +{ + struct dynpcc_info *dpi = &(jme->dpi); + + if (test_bit(JME_FLAG_POLL, &jme->flags)) { + JME_NAPI_ENABLE(jme); + } else { + tasklet_hi_enable(&jme->rxclean_task); + tasklet_hi_enable(&jme->rxempty_task); + } + dpi->cur = PCC_P1; + dpi->attempt = PCC_P1; + dpi->cnt = 0; + jme_set_rx_pcc(jme, PCC_P1); + + atomic_inc(&jme->link_changing); +} + static void jme_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) { struct jme_adapter *jme = netdev_priv(netdev); + jme_pause_rx(jme); jme->vlgrp = grp; + jme_resume_rx(jme); } static void From 54d259d474e1fee6f6bb8f0f1360d85195199ac5 Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Wed, 17 Mar 2010 00:09:31 +0000 Subject: [PATCH 11/38] jme: Advance driver version number Advance driver version number after some bug fix. Signed-off-by: Guo-Fu Tseng Signed-off-by: David S. Miller --- drivers/net/jme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/jme.h b/drivers/net/jme.h index c19db9146a2f..07ad3a457185 100644 --- a/drivers/net/jme.h +++ b/drivers/net/jme.h @@ -25,7 +25,7 @@ #define __JME_H_INCLUDED__ #define DRV_NAME "jme" -#define DRV_VERSION "1.0.5" +#define DRV_VERSION "1.0.6" #define PFX DRV_NAME ": " #define PCI_DEVICE_ID_JMICRON_JMC250 0x0250 From 0641e4fbf2f824faee00ea74c459a088d94905fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Mar 2010 21:16:45 -0700 Subject: [PATCH 12/38] net: Potential null skb->dev dereference When doing "ifenslave -d bond0 eth0", there is chance to get NULL dereference in netif_receive_skb(), because dev->master suddenly becomes NULL after we tested it. We should use ACCESS_ONCE() to avoid this (or rcu_dereference()) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- net/8021q/vlan_core.c | 4 ++-- net/core/dev.c | 8 +++++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c79a88be7c33..fa8b47637997 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2059,12 +2059,12 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and * ARP on active-backup slaves with arp_validate enabled. */ -static inline int skb_bond_should_drop(struct sk_buff *skb) +static inline int skb_bond_should_drop(struct sk_buff *skb, + struct net_device *master) { - struct net_device *dev = skb->dev; - struct net_device *master = dev->master; - if (master) { + struct net_device *dev = skb->dev; + if (master->priv_flags & IFF_MASTER_ARPMON) dev->last_rx = jiffies; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index c0316e0ca6e8..c584a0af77d3 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -11,7 +11,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, if (netpoll_rx(skb)) return NET_RX_DROP; - if (skb_bond_should_drop(skb)) + if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) goto drop; skb->skb_iif = skb->dev->ifindex; @@ -83,7 +83,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, { struct sk_buff *p; - if (skb_bond_should_drop(skb)) + if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) goto drop; skb->skb_iif = skb->dev->ifindex; diff --git a/net/core/dev.c b/net/core/dev.c index bcc490cc9452..59d4394d2ce8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2483,6 +2483,7 @@ int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; + struct net_device *master; struct net_device *null_or_orig; struct net_device *null_or_bond; int ret = NET_RX_DROP; @@ -2503,11 +2504,12 @@ int netif_receive_skb(struct sk_buff *skb) null_or_orig = NULL; orig_dev = skb->dev; - if (orig_dev->master) { - if (skb_bond_should_drop(skb)) + master = ACCESS_ONCE(orig_dev->master); + if (master) { + if (skb_bond_should_drop(skb, master)) null_or_orig = orig_dev; /* deliver only exact match */ else - skb->dev = orig_dev->master; + skb->dev = master; } __get_cpu_var(netdev_rx_stat).total++; From 1097cd17700c4e9903b7bbfcec1432f61784cb53 Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Thu, 18 Mar 2010 14:34:52 +0000 Subject: [PATCH 13/38] ixgbe: Fix 82599 multispeed fiber link issues due to Tx laser flapping Fix 82599 link issues during driver load and unload test using multi-speed 10G & 1G fiber modules. When connected back to back sometime 82599 multispeed fiber modules would link at 1G speed instead of 10G highest speed, due to a race condition in autotry process involving Tx laser flapping. Move autotry autoneg-37 tx laser flapping process from multispeed module init setup to driver unload. This will alert the link partner to restart its autotry process when it tries to establish the link with the link partner Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_82599.c | 78 ++++++++++++++++++--------------- drivers/net/ixgbe/ixgbe_main.c | 11 +++++ drivers/net/ixgbe/ixgbe_type.h | 1 + 3 files changed, 54 insertions(+), 36 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c index 1f30e163bd9c..b405a00817c6 100644 --- a/drivers/net/ixgbe/ixgbe_82599.c +++ b/drivers/net/ixgbe/ixgbe_82599.c @@ -39,6 +39,7 @@ #define IXGBE_82599_MC_TBL_SIZE 128 #define IXGBE_82599_VFT_TBL_SIZE 128 +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg, @@ -68,7 +69,9 @@ static void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) if (hw->phy.multispeed_fiber) { /* Set up dual speed SFP+ support */ mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber; + mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber; } else { + mac->ops.flap_tx_laser = NULL; if ((mac->ops.get_media_type(hw) == ixgbe_media_type_backplane) && (hw->phy.smart_speed == ixgbe_smart_speed_auto || @@ -412,6 +415,41 @@ s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, return status; } +/** + * ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser + * @hw: pointer to hardware structure + * + * When the driver changes the link speeds that it can support, + * it sets autotry_restart to true to indicate that we need to + * initiate a new autotry session with the link partner. To do + * so, we set the speed then disable and re-enable the tx laser, to + * alert the link partner that it also needs to restart autotry on its + * end. This is consistent with true clause 37 autoneg, which also + * involves a loss of signal. + **/ +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + + hw_dbg(hw, "ixgbe_flap_tx_laser_multispeed_fiber\n"); + + if (hw->mac.autotry_restart) { + /* Disable tx laser; allow 100us to go dark per spec */ + esdp_reg |= IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + udelay(100); + + /* Enable tx laser; allow 100ms to light up */ + esdp_reg &= ~IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + msleep(100); + + hw->mac.autotry_restart = false; + } +} + /** * ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed * @hw: pointer to hardware structure @@ -439,16 +477,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, hw->mac.ops.get_link_capabilities(hw, &phy_link_speed, &negotiation); speed &= phy_link_speed; - /* - * When the driver changes the link speeds that it can support, - * it sets autotry_restart to true to indicate that we need to - * initiate a new autotry session with the link partner. To do - * so, we set the speed then disable and re-enable the tx laser, to - * alert the link partner that it also needs to restart autotry on its - * end. This is consistent with true clause 37 autoneg, which also - * involves a loss of signal. - */ - /* * Try each speed one by one, highest priority first. We do this in * software because 10gb fiber doesn't support speed autonegotiation. @@ -466,6 +494,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, /* Set the module link speed */ esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5); IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); /* Allow module to change analog characteristics (1G->10G) */ msleep(40); @@ -478,19 +507,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, return status; /* Flap the tx laser if it has not already been done */ - if (hw->mac.autotry_restart) { - /* Disable tx laser; allow 100us to go dark per spec */ - esdp_reg |= IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - udelay(100); - - /* Enable tx laser; allow 2ms to light up per spec */ - esdp_reg &= ~IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - msleep(2); - - hw->mac.autotry_restart = false; - } + hw->mac.ops.flap_tx_laser(hw); /* * Wait for the controller to acquire link. Per IEEE 802.3ap, @@ -525,6 +542,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, esdp_reg &= ~IXGBE_ESDP_SDP5; esdp_reg |= IXGBE_ESDP_SDP5_DIR; IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); /* Allow module to change analog characteristics (10G->1G) */ msleep(40); @@ -537,19 +555,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, return status; /* Flap the tx laser if it has not already been done */ - if (hw->mac.autotry_restart) { - /* Disable tx laser; allow 100us to go dark per spec */ - esdp_reg |= IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - udelay(100); - - /* Enable tx laser; allow 2ms to light up per spec */ - esdp_reg &= ~IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - msleep(2); - - hw->mac.autotry_restart = false; - } + hw->mac.ops.flap_tx_laser(hw); /* Wait for the link partner to also set speed */ msleep(100); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 684af371462d..b858a1a79d0e 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -5018,6 +5018,7 @@ static void ixgbe_multispeed_fiber_task(struct work_struct *work) autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiation); + hw->mac.autotry_restart = false; if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, negotiation, true); adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; @@ -6380,6 +6381,16 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) del_timer_sync(&adapter->sfp_timer); cancel_work_sync(&adapter->watchdog_task); cancel_work_sync(&adapter->sfp_task); + if (adapter->hw.phy.multispeed_fiber) { + struct ixgbe_hw *hw = &adapter->hw; + /* + * Restart clause 37 autoneg, disable and re-enable + * the tx laser, to clear & alert the link partner + * that it needs to restart autotry + */ + hw->mac.autotry_restart = true; + hw->mac.ops.flap_tx_laser(hw); + } cancel_work_sync(&adapter->multispeed_fiber_task); cancel_work_sync(&adapter->sfp_config_module_task); if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE || diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index 2be907466593..0ed5ab37cc53 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -2397,6 +2397,7 @@ struct ixgbe_mac_operations { s32 (*enable_rx_dma)(struct ixgbe_hw *, u32); /* Link */ + void (*flap_tx_laser)(struct ixgbe_hw *); s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool); s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *, From 0ecad5a262923967147e2d1725e277a2a5fbcdd4 Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Thu, 18 Mar 2010 15:16:56 +0000 Subject: [PATCH 14/38] ixgbe: Fix 82599 KX4 Wake on LAN issue after an improper system shutdown Advanced Power Management is disabled for 82599 KX4 connections by clearing GRC.APME bit, causing it to not wake the system from an improper system shutdown. By default GRC.APME is enabled and software is not supposed to clear these settings during adapter probe. Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index b858a1a79d0e..18b5b217f310 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -6246,9 +6246,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, case IXGBE_DEV_ID_82599_KX4: adapter->wol = (IXGBE_WUFC_MAG | IXGBE_WUFC_EX | IXGBE_WUFC_MC | IXGBE_WUFC_BC); - /* Enable ACPI wakeup in GRC */ - IXGBE_WRITE_REG(hw, IXGBE_GRC, - (IXGBE_READ_REG(hw, IXGBE_GRC) & ~IXGBE_GRC_APME)); break; default: adapter->wol = 0; From 11bc3088373e913f165a8652601c6f8b8dc4aea2 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Thu, 18 Mar 2010 22:18:41 -0700 Subject: [PATCH 15/38] smsc95xx: Fix tx checksum offload for small packets TX checksum offload does not work properly when transmitting UDP packets with 0, 1 or 2 bytes of data. This patch works around the problem by calculating checksums for these packets in the driver. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index d222d7e25273..73f9a31cf94d 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1189,9 +1189,21 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev, } if (csum) { - u32 csum_preamble = smsc95xx_calc_csum_preamble(skb); - skb_push(skb, 4); - memcpy(skb->data, &csum_preamble, 4); + if (skb->len <= 45) { + /* workaround - hardware tx checksum does not work + * properly with extremely small packets */ + long csstart = skb->csum_start - skb_headroom(skb); + __wsum calc = csum_partial(skb->data + csstart, + skb->len - csstart, 0); + *((__sum16 *)(skb->data + csstart + + skb->csum_offset)) = csum_fold(calc); + + csum = false; + } else { + u32 csum_preamble = smsc95xx_calc_csum_preamble(skb); + skb_push(skb, 4); + memcpy(skb->data, &csum_preamble, 4); + } } skb_push(skb, 4); From d11a4dc18bf41719c9f0d7ed494d295dd2973b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Thu, 18 Mar 2010 23:20:20 +0000 Subject: [PATCH 16/38] ipv4: check rt_genid in dst_check Xfrm_dst keeps a reference to ipv4 rtable entries on each cached bundle. The only way to renew xfrm_dst when the underlying route has changed, is to implement dst_check for this. This is what ipv6 side does too. The problems started after 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 ("ipsec: Fix bogus bundle flowi") which fixed a bug causing xfrm_dst to not get reused, until that all lookups always generated new xfrm_dst with new route reference and path mtu worked. But after the fix, the old routes started to get reused even after they were expired causing pmtu to break (well it would occationally work if the rtable gc had run recently and marked the route obsolete causing dst_check to get called). Signed-off-by: Timo Teras Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/route.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a770df2493d2..32d396196df8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1441,7 +1441,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, dev_hold(rt->u.dst.dev); if (rt->idev) in_dev_hold(rt->idev); - rt->u.dst.obsolete = 0; + rt->u.dst.obsolete = -1; rt->u.dst.lastuse = jiffies; rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; @@ -1506,7 +1506,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) struct dst_entry *ret = dst; if (rt) { - if (dst->obsolete) { + if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || @@ -1726,7 +1726,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - return NULL; + if (rt_is_expired((struct rtable *)dst)) + return NULL; + return dst; } static void ipv4_dst_destroy(struct dst_entry *dst) @@ -1888,7 +1890,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!rth) goto e_nobufs; - rth->u.dst.output= ip_rt_bug; + rth->u.dst.output = ip_rt_bug; + rth->u.dst.obsolete = -1; atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2054,6 +2057,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; + rth->u.dst.obsolete = -1; rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); @@ -2218,6 +2222,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; + rth->u.dst.obsolete = -1; rth->rt_genid = rt_genid(net); atomic_set(&rth->u.dst.__refcnt, 1); @@ -2444,6 +2449,7 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; + rth->u.dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); From 10414444cb8a8ee8893e00390b7cf40502e28352 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 18 Mar 2010 23:00:22 +0000 Subject: [PATCH 17/38] ipv6: Remove redundant dst NULL check in ip6_dst_check As the only path leading to ip6_dst_check makes an indirect call through dst->ops, dst cannot be NULL in ip6_dst_check. This patch removes this check in case it misleads people who come across this code. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 52cd3eff31dc..7fcb0e5d1213 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -879,7 +879,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; - if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) return dst; return NULL; From 97e3ecd112ba45eb217cddab59f48659bc15d9d0 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 18 Mar 2010 11:27:32 +0000 Subject: [PATCH 18/38] TCP: check min TTL on received ICMP packets This adds RFC5082 checks for TTL on received ICMP packets. It adds some security against spoofed ICMP packets disrupting GTSM protected sessions. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 70df40980a87..f4df5f931f36 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -370,6 +370,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } + icsk = inet_csk(sk); tp = tcp_sk(sk); seq = ntohl(th->seq); From 936332b8e00103fc20eb7e915c9a3bcb2835a11a Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Fri, 19 Mar 2010 04:33:10 +0000 Subject: [PATCH 19/38] ixgbe: fix for real_num_tx_queues update issue Currently netdev_features_change is called before fcoe tx queues setup is done, so this patch moves calling of netdev_features_change after tx queues setup is done in ixgbe_init_interrupt_scheme, so that real_num_tx_queues is updated correctly on each fcoe enable or disable. This allows additional fcoe queues updated correctly in vlan driver for their correct queue selection. Signed-off-by: Vasu Dev Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_fcoe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c index 4123dec0dfb7..700cfc0aa1b9 100644 --- a/drivers/net/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ixgbe/ixgbe_fcoe.c @@ -614,9 +614,9 @@ int ixgbe_fcoe_enable(struct net_device *netdev) netdev->vlan_features |= NETIF_F_FSO; netdev->vlan_features |= NETIF_F_FCOE_MTU; netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1; - netdev_features_change(netdev); ixgbe_init_interrupt_scheme(adapter); + netdev_features_change(netdev); if (netif_running(netdev)) netdev->netdev_ops->ndo_open(netdev); @@ -660,11 +660,11 @@ int ixgbe_fcoe_disable(struct net_device *netdev) netdev->vlan_features &= ~NETIF_F_FSO; netdev->vlan_features &= ~NETIF_F_FCOE_MTU; netdev->fcoe_ddp_xid = 0; - netdev_features_change(netdev); ixgbe_cleanup_fcoe(adapter); - ixgbe_init_interrupt_scheme(adapter); + netdev_features_change(netdev); + if (netif_running(netdev)) netdev->netdev_ops->ndo_open(netdev); rc = 0; From fd3686a842717b890fbe3024b83a616c54d5dba0 Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Fri, 19 Mar 2010 04:41:33 +0000 Subject: [PATCH 20/38] ixgbe: Set IXGBE_RSC_CB(skb)->DMA field to zero after unmapping the address As per Simon Horman's feedback set IXGBE_RSC_CB(skb)->dma to zero after unmapping HWRSC DMA address to avoid double freeing. Signed-off-by: Mallikarjuna R Chilakala Acked-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 18b5b217f310..d75c46ff31f6 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -935,10 +935,12 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (skb->prev) skb = ixgbe_transform_rsc_queue(skb, &(rx_ring->rsc_count)); if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { - if (IXGBE_RSC_CB(skb)->dma) + if (IXGBE_RSC_CB(skb)->dma) { pci_unmap_single(pdev, IXGBE_RSC_CB(skb)->dma, rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); + IXGBE_RSC_CB(skb)->dma = 0; + } if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) rx_ring->rsc_count += skb_shinfo(skb)->nr_frags; else @@ -3126,10 +3128,12 @@ static void ixgbe_clean_rx_ring(struct ixgbe_adapter *adapter, rx_buffer_info->skb = NULL; do { struct sk_buff *this = skb; - if (IXGBE_RSC_CB(this)->dma) + if (IXGBE_RSC_CB(this)->dma) { pci_unmap_single(pdev, IXGBE_RSC_CB(this)->dma, rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); + IXGBE_RSC_CB(this)->dma = 0; + } skb = skb->prev; dev_kfree_skb(this); } while (skb); From 33bd9f601ea21c4389870e425ae4eaf210d49b95 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 19 Mar 2010 02:59:52 +0000 Subject: [PATCH 21/38] ixgbevf: Fix VF Stats accounting after reset The counters in the 82599 Virtual Function are not clear on read. They accumulate to the maximum value and then roll over. They are also not cleared when the VF executes a soft reset, so it is possible they are non-zero when the driver loads and starts. This has all been accounted for in the code that keeps the stats up to date but there is one case that is not. When the PF driver is reset the counters in the VF are all reset to zero. This adds an additional accounting overhead into the VF driver when the PF is reset under its feet. This patch adds additional counters that are used by the VF driver to accumulate and save stats after a PF reset has been detected. Prior to this patch displaying the stats in the VF after the PF has reset would show bogus data. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbevf/ethtool.c | 42 ++++++++++++------ drivers/net/ixgbevf/ixgbevf_main.c | 68 +++++++++++++++++++----------- drivers/net/ixgbevf/vf.h | 6 +++ 3 files changed, 78 insertions(+), 38 deletions(-) diff --git a/drivers/net/ixgbevf/ethtool.c b/drivers/net/ixgbevf/ethtool.c index 399be0c34c36..6fdd651abcd1 100644 --- a/drivers/net/ixgbevf/ethtool.c +++ b/drivers/net/ixgbevf/ethtool.c @@ -46,22 +46,32 @@ struct ixgbe_stats { int sizeof_stat; int stat_offset; int base_stat_offset; + int saved_reset_offset; }; -#define IXGBEVF_STAT(m, b) sizeof(((struct ixgbevf_adapter *)0)->m), \ - offsetof(struct ixgbevf_adapter, m), \ - offsetof(struct ixgbevf_adapter, b) +#define IXGBEVF_STAT(m, b, r) sizeof(((struct ixgbevf_adapter *)0)->m), \ + offsetof(struct ixgbevf_adapter, m), \ + offsetof(struct ixgbevf_adapter, b), \ + offsetof(struct ixgbevf_adapter, r) static struct ixgbe_stats ixgbe_gstrings_stats[] = { - {"rx_packets", IXGBEVF_STAT(stats.vfgprc, stats.base_vfgprc)}, - {"tx_packets", IXGBEVF_STAT(stats.vfgptc, stats.base_vfgptc)}, - {"rx_bytes", IXGBEVF_STAT(stats.vfgorc, stats.base_vfgorc)}, - {"tx_bytes", IXGBEVF_STAT(stats.vfgotc, stats.base_vfgotc)}, - {"tx_busy", IXGBEVF_STAT(tx_busy, zero_base)}, - {"multicast", IXGBEVF_STAT(stats.vfmprc, stats.base_vfmprc)}, - {"rx_csum_offload_good", IXGBEVF_STAT(hw_csum_rx_good, zero_base)}, - {"rx_csum_offload_errors", IXGBEVF_STAT(hw_csum_rx_error, zero_base)}, - {"tx_csum_offload_ctxt", IXGBEVF_STAT(hw_csum_tx_good, zero_base)}, - {"rx_header_split", IXGBEVF_STAT(rx_hdr_split, zero_base)}, + {"rx_packets", IXGBEVF_STAT(stats.vfgprc, stats.base_vfgprc, + stats.saved_reset_vfgprc)}, + {"tx_packets", IXGBEVF_STAT(stats.vfgptc, stats.base_vfgptc, + stats.saved_reset_vfgptc)}, + {"rx_bytes", IXGBEVF_STAT(stats.vfgorc, stats.base_vfgorc, + stats.saved_reset_vfgorc)}, + {"tx_bytes", IXGBEVF_STAT(stats.vfgotc, stats.base_vfgotc, + stats.saved_reset_vfgotc)}, + {"tx_busy", IXGBEVF_STAT(tx_busy, zero_base, zero_base)}, + {"multicast", IXGBEVF_STAT(stats.vfmprc, stats.base_vfmprc, + stats.saved_reset_vfmprc)}, + {"rx_csum_offload_good", IXGBEVF_STAT(hw_csum_rx_good, zero_base, + zero_base)}, + {"rx_csum_offload_errors", IXGBEVF_STAT(hw_csum_rx_error, zero_base, + zero_base)}, + {"tx_csum_offload_ctxt", IXGBEVF_STAT(hw_csum_tx_good, zero_base, + zero_base)}, + {"rx_header_split", IXGBEVF_STAT(rx_hdr_split, zero_base, zero_base)}, }; #define IXGBE_QUEUE_STATS_LEN 0 @@ -455,10 +465,14 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, ixgbe_gstrings_stats[i].stat_offset; char *b = (char *)adapter + ixgbe_gstrings_stats[i].base_stat_offset; + char *r = (char *)adapter + + ixgbe_gstrings_stats[i].saved_reset_offset; data[i] = ((ixgbe_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p) - ((ixgbe_gstrings_stats[i].sizeof_stat == - sizeof(u64)) ? *(u64 *)b : *(u32 *)b); + sizeof(u64)) ? *(u64 *)b : *(u32 *)b) + + ((ixgbe_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)r : *(u32 *)r); } } diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index ca653c49b765..43927e1b3432 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -1610,6 +1610,44 @@ static inline void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, (adapter->rx_ring[rxr].count - 1)); } +static void ixgbevf_save_reset_stats(struct ixgbevf_adapter *adapter) +{ + /* Only save pre-reset stats if there are some */ + if (adapter->stats.vfgprc || adapter->stats.vfgptc) { + adapter->stats.saved_reset_vfgprc += adapter->stats.vfgprc - + adapter->stats.base_vfgprc; + adapter->stats.saved_reset_vfgptc += adapter->stats.vfgptc - + adapter->stats.base_vfgptc; + adapter->stats.saved_reset_vfgorc += adapter->stats.vfgorc - + adapter->stats.base_vfgorc; + adapter->stats.saved_reset_vfgotc += adapter->stats.vfgotc - + adapter->stats.base_vfgotc; + adapter->stats.saved_reset_vfmprc += adapter->stats.vfmprc - + adapter->stats.base_vfmprc; + } +} + +static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + adapter->stats.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); + adapter->stats.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); + adapter->stats.last_vfgorc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); + adapter->stats.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); + adapter->stats.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); + adapter->stats.last_vfgotc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); + adapter->stats.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); + + adapter->stats.base_vfgprc = adapter->stats.last_vfgprc; + adapter->stats.base_vfgorc = adapter->stats.last_vfgorc; + adapter->stats.base_vfgptc = adapter->stats.last_vfgptc; + adapter->stats.base_vfgotc = adapter->stats.last_vfgotc; + adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; +} + static int ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1656,6 +1694,9 @@ static int ixgbevf_up_complete(struct ixgbevf_adapter *adapter) /* enable transmits */ netif_tx_start_all_queues(netdev); + ixgbevf_save_reset_stats(adapter); + ixgbevf_init_last_counter_stats(adapter); + /* bring the link up in the watchdog, this could race with our first * link up interrupt but shouldn't be a problem */ adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; @@ -2228,27 +2269,6 @@ out: return err; } -static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - adapter->stats.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); - adapter->stats.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); - adapter->stats.last_vfgorc |= - (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); - adapter->stats.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); - adapter->stats.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); - adapter->stats.last_vfgotc |= - (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); - adapter->stats.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); - - adapter->stats.base_vfgprc = adapter->stats.last_vfgprc; - adapter->stats.base_vfgorc = adapter->stats.last_vfgorc; - adapter->stats.base_vfgptc = adapter->stats.last_vfgptc; - adapter->stats.base_vfgotc = adapter->stats.last_vfgotc; - adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; -} - #define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \ { \ u32 current_counter = IXGBE_READ_REG(hw, reg); \ @@ -2416,9 +2436,9 @@ static void ixgbevf_watchdog_task(struct work_struct *work) } } -pf_has_reset: ixgbevf_update_stats(adapter); +pf_has_reset: /* Force detection of hung controller every watchdog period */ adapter->detect_tx_hung = true; @@ -3390,8 +3410,6 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, /* setup the private structure */ err = ixgbevf_sw_init(adapter); - ixgbevf_init_last_counter_stats(adapter); - #ifdef MAX_SKB_FRAGS netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | @@ -3449,6 +3467,8 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, adapter->netdev_registered = true; + ixgbevf_init_last_counter_stats(adapter); + /* print the MAC address */ hw_dbg(hw, "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", netdev->dev_addr[0], diff --git a/drivers/net/ixgbevf/vf.h b/drivers/net/ixgbevf/vf.h index 799600e92700..1f31b052d4b4 100644 --- a/drivers/net/ixgbevf/vf.h +++ b/drivers/net/ixgbevf/vf.h @@ -157,6 +157,12 @@ struct ixgbevf_hw_stats { u64 vfgorc; u64 vfgotc; u64 vfmprc; + + u64 saved_reset_vfgprc; + u64 saved_reset_vfgptc; + u64 saved_reset_vfgorc; + u64 saved_reset_vfgotc; + u64 saved_reset_vfmprc; }; struct ixgbevf_info { From 4c3a822395c01d50ca2ba3aa4529e19d237a2f8c Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 19 Mar 2010 03:00:12 +0000 Subject: [PATCH 22/38] ixgbevf: Shorten up delay timer for watchdog task The recovery from PF reset works better when you shorten up the delay until the watchdog task executes. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbevf/ixgbevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index 43927e1b3432..3de93ae70e5a 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -965,7 +965,7 @@ static irqreturn_t ixgbevf_msix_mbx(int irq, void *data) if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 10)); + round_jiffies(jiffies + 1)); return IRQ_HANDLED; } From 29b8dd024bd48c3d1d1e5140f5bbb683786f998e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 19 Mar 2010 03:00:31 +0000 Subject: [PATCH 23/38] ixgbevf: Message formatting cleanups Clean up some text output formatting. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbevf/ixgbevf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index 3de93ae70e5a..d6cbd943a6f0 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -2419,7 +2419,7 @@ static void ixgbevf_watchdog_task(struct work_struct *work) if (!netif_carrier_ok(netdev)) { hw_dbg(&adapter->hw, "NIC Link is Up %s, ", ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) ? - "10 Gbps" : "1 Gbps")); + "10 Gbps\n" : "1 Gbps\n")); netif_carrier_on(netdev); netif_tx_wake_all_queues(netdev); } else { @@ -2695,7 +2695,7 @@ static int ixgbevf_open(struct net_device *netdev) if (hw->adapter_stopped) { err = IXGBE_ERR_MBX; printk(KERN_ERR "Unable to start - perhaps the PF" - "Driver isn't up yet\n"); + " Driver isn't up yet\n"); goto err_setup_reset; } } From b894fa2627e28c078740dc7041cd08c7e2c353ab Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Fri, 19 Mar 2010 06:07:48 +0000 Subject: [PATCH 24/38] igb: Add support for 82576 ET2 Quad Port Server Adapter Signed-off-by: Carolyn Wyborny Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/igb/e1000_82575.c | 1 + drivers/net/igb/e1000_hw.h | 1 + drivers/net/igb/igb_main.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c index 9d7fa2fb85ea..0bc990ec4a8e 100644 --- a/drivers/net/igb/e1000_82575.c +++ b/drivers/net/igb/e1000_82575.c @@ -94,6 +94,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: case E1000_DEV_ID_82576_SERDES_QUAD: mac->type = e1000_82576; break; diff --git a/drivers/net/igb/e1000_hw.h b/drivers/net/igb/e1000_hw.h index 448005276b26..82a533f5192a 100644 --- a/drivers/net/igb/e1000_hw.h +++ b/drivers/net/igb/e1000_hw.h @@ -41,6 +41,7 @@ struct e1000_hw; #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 #define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 #define E1000_DEV_ID_82576_NS 0x150A #define E1000_DEV_ID_82576_NS_SERDES 0x1518 #define E1000_DEV_ID_82576_SERDES_QUAD 0x150D diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 0ed25f059a00..45a0e4fd5871 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -72,6 +72,7 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, From ea93fd9456ad32cd85b2d7914b58c6313cc40c9e Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Fri, 19 Mar 2010 22:43:29 -0700 Subject: [PATCH 25/38] KS8695: update ksp->next_rx_desc_read at the end of rx loop There is no need to adjust the next rx descriptor after each packet, so do it only once at the end of the routine. Signed-off-by: Eric Dumazet Signed-off-by: Yegor Yefremov --- drivers/net/arm/ks8695net.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c index a1d4188c430b..e7810b74f396 100644 --- a/drivers/net/arm/ks8695net.c +++ b/drivers/net/arm/ks8695net.c @@ -449,11 +449,10 @@ ks8695_rx_irq(int irq, void *dev_id) } /** - * ks8695_rx - Receive packets called by NAPI poll method + * ks8695_rx - Receive packets called by NAPI poll method * @ksp: Private data for the KS8695 Ethernet - * @budget: The max packets would be receive + * @budget: Number of packets allowed to process */ - static int ks8695_rx(struct ks8695_priv *ksp, int budget) { struct net_device *ndev = ksp->ndev; @@ -461,7 +460,6 @@ static int ks8695_rx(struct ks8695_priv *ksp, int budget) int buff_n; u32 flags; int pktlen; - int last_rx_processed = -1; int received = 0; buff_n = ksp->next_rx_desc_read; @@ -471,6 +469,7 @@ static int ks8695_rx(struct ks8695_priv *ksp, int budget) cpu_to_le32(RDES_OWN)))) { rmb(); flags = le32_to_cpu(ksp->rx_ring[buff_n].status); + /* Found an SKB which we own, this means we * received a packet */ @@ -533,23 +532,18 @@ rx_failure: ksp->rx_ring[buff_n].status = cpu_to_le32(RDES_OWN); rx_finished: received++; - /* And note this as processed so we can start - * from here next time - */ - last_rx_processed = buff_n; buff_n = (buff_n + 1) & MAX_RX_DESC_MASK; - /*And note which RX descriptor we last did */ - if (likely(last_rx_processed != -1)) - ksp->next_rx_desc_read = - (last_rx_processed + 1) & - MAX_RX_DESC_MASK; } + + /* And note which RX descriptor we last did */ + ksp->next_rx_desc_read = buff_n; + /* And refill the buffers */ ks8695_refill_rxbuffers(ksp); - /* Kick the RX DMA engine, in case it became - * suspended */ + /* Kick the RX DMA engine, in case it became suspended */ ks8695_writereg(ksp, KS8695_DRSC, 0); + return received; } From a50436f2cd6e85794f7e1aad795ca8302177b896 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Mar 2010 06:04:14 +0000 Subject: [PATCH 26/38] net: ipmr/ip6mr: fix potential out-of-bounds vif_table access mfc_parent of cache entries is used to index into the vif_table and is initialised from mfcctl->mfcc_parent. This can take values of to 2^16-1, while the vif_table has only MAXVIFS (32) entries. The same problem affects ip6mr. Refuse invalid values to fix a potential out-of-bounds access. Unlike the other validity checks, this is checked in ipmr_mfc_add() instead of the setsockopt handler since its unused in the delete path and might be uninitialized. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 3 +++ net/ipv6/ip6mr.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8582e12e4a62..0b9d03c54dc3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -802,6 +802,9 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) int line; struct mfc_cache *uc, *c, **cp; + if (mfc->mfcc_parent >= MAXVIFS) + return -ENFILE; + line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); for (cp = &net->ipv4.mfc_cache_array[line]; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 52e0f74fdfe0..23e4ac0cc30e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1113,6 +1113,9 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) unsigned char ttls[MAXMIFS]; int i; + if (mfc->mf6cc_parent >= MAXMIFS) + return -ENFILE; + memset(ttls, 255, MAXMIFS); for (i = 0; i < MAXMIFS; i++) { if (IF_ISSET(i, &mfc->mf6cc_ifset)) From 6830c25b7d08fbbd922959425193791bc42079f2 Mon Sep 17 00:00:00 2001 From: Lennart Schulte Date: Wed, 17 Mar 2010 02:16:29 +0000 Subject: [PATCH 27/38] tcp: Fix tcp_mark_head_lost() with packets == 0 A packet is marked as lost in case packets == 0, although nothing should be done. This results in a too early retransmitted packet during recovery in some cases. This small patch fixes this issue by returning immediately. Signed-off-by: Lennart Schulte Signed-off-by: Arnd Hannemann Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 788851ca8c5d..c096a4218b8f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2511,6 +2511,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) int err; unsigned int mss; + if (packets == 0) + return; + WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { skb = tp->lost_skb_hint; From f5d410f2ea7ba340f11815a56e05b9fa9421c421 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Mar 2010 13:30:44 +0000 Subject: [PATCH 28/38] netlink: fix unaligned access in nla_get_be64() This patch fixes a unaligned access in nla_get_be64() that was introduced by myself in a17c859849402315613a0015ac8fbf101acf0cc1. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netlink.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index f82e463c875a..4fc05b58503e 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -945,7 +945,11 @@ static inline u64 nla_get_u64(const struct nlattr *nla) */ static inline __be64 nla_get_be64(const struct nlattr *nla) { - return *(__be64 *) nla_data(nla); + __be64 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + + return tmp; } /** From 73852e8151b7d7a529fbe019ab6d2d0c02d8f3f2 Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Tue, 16 Mar 2010 05:22:44 +0000 Subject: [PATCH 29/38] NET_DMA: free skbs periodically Under NET_DMA, data transfer can grind to a halt when userland issues a large read on a socket with a high RCVLOWAT (i.e., 512 KB for both). This appears to be because the NET_DMA design queues up lots of memcpy operations, but doesn't issue or wait for them (and thus free the associated skbs) until it is time for tcp_recvmesg() to return. The socket hangs when its TCP window goes to zero before enough data is available to satisfy the read. Periodically issue asynchronous memcpy operations, and free skbs for ones that have completed, to prevent sockets from going into zero-window mode. Signed-off-by: Steven J. Magnani Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 63 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ae16f809e716..6afb6d8662b2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1254,6 +1254,39 @@ static void tcp_prequeue_process(struct sock *sk) tp->ucopy.memory = 0; } +#ifdef CONFIG_NET_DMA +static void tcp_service_net_dma(struct sock *sk, bool wait) +{ + dma_cookie_t done, used; + dma_cookie_t last_issued; + struct tcp_sock *tp = tcp_sk(sk); + + if (!tp->ucopy.dma_chan) + return; + + last_issued = tp->ucopy.dma_cookie; + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + + do { + if (dma_async_memcpy_complete(tp->ucopy.dma_chan, + last_issued, &done, + &used) == DMA_SUCCESS) { + /* Safe to free early-copied skbs now */ + __skb_queue_purge(&sk->sk_async_wait_queue); + break; + } else { + struct sk_buff *skb; + while ((skb = skb_peek(&sk->sk_async_wait_queue)) && + (dma_async_is_complete(skb->dma_cookie, done, + used) == DMA_SUCCESS)) { + __skb_dequeue(&sk->sk_async_wait_queue); + kfree_skb(skb); + } + } + } while (wait); +} +#endif + static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1546,6 +1579,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* __ Set realtime policy in scheduler __ */ } +#ifdef CONFIG_NET_DMA + if (tp->ucopy.dma_chan) + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); +#endif if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); @@ -1554,6 +1591,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk_wait_data(sk, &timeo); #ifdef CONFIG_NET_DMA + tcp_service_net_dma(sk, false); /* Don't block */ tp->ucopy.wakeup = 0; #endif @@ -1633,6 +1671,9 @@ do_prequeue: copied = -EFAULT; break; } + + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if ((offset + used) == skb->len) copied_early = 1; @@ -1702,27 +1743,9 @@ skip_copy: } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) { - dma_cookie_t done, used; + tcp_service_net_dma(sk, true); /* Wait for queue to drain */ + tp->ucopy.dma_chan = NULL; - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); - - while (dma_async_memcpy_complete(tp->ucopy.dma_chan, - tp->ucopy.dma_cookie, &done, - &used) == DMA_IN_PROGRESS) { - /* do partial cleanup of sk_async_wait_queue */ - while ((skb = skb_peek(&sk->sk_async_wait_queue)) && - (dma_async_is_complete(skb->dma_cookie, done, - used) == DMA_SUCCESS)) { - __skb_dequeue(&sk->sk_async_wait_queue); - kfree_skb(skb); - } - } - - /* Safe to free early-copied skbs now */ - __skb_queue_purge(&sk->sk_async_wait_queue); - tp->ucopy.dma_chan = NULL; - } if (tp->ucopy.pinned_list) { dma_unpin_iovec_pages(tp->ucopy.pinned_list); tp->ucopy.pinned_list = NULL; From 1a50307ba1826e4da0024e64b245ce4eadf7688a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Mar 2010 14:24:42 +0000 Subject: [PATCH 30/38] netlink: fix NETLINK_RECV_NO_ENOBUFS in netlink_set_err() Currently, ENOBUFS errors are reported to the socket via netlink_set_err() even if NETLINK_RECV_NO_ENOBUFS is set. However, that should not happen. This fixes this problem and it changes the prototype of netlink_set_err() to return the number of sockets that have set the NETLINK_RECV_NO_ENOBUFS socket option. This return value is used in the next patch in these bugfix series. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- net/netlink/af_netlink.c | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index fde27c017326..6eaca5e1e8ca 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -188,7 +188,7 @@ extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, __u32 group, gfp_t allocation); -extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); +extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 320d0423a240..acbbae1e89b5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1093,6 +1093,7 @@ static inline int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) { struct netlink_sock *nlk = nlk_sk(sk); + int ret = 0; if (sk == p->exclude_sk) goto out; @@ -1104,10 +1105,15 @@ static inline int do_one_set_err(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; + if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { + ret = 1; + goto out; + } + sk->sk_err = p->code; sk->sk_error_report(sk); out: - return 0; + return ret; } /** @@ -1116,12 +1122,16 @@ out: * @pid: the PID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) + * + * This function returns the number of broadcast listeners that have set the + * NETLINK_RECV_NO_ENOBUFS socket option. */ -void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; struct sock *sk; + int ret = 0; info.exclude_sk = ssk; info.pid = pid; @@ -1132,9 +1142,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_lock(&nl_table_lock); sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) - do_one_set_err(sk, &info); + ret += do_one_set_err(sk, &info); read_unlock(&nl_table_lock); + return ret; } EXPORT_SYMBOL(netlink_set_err); From 37b7ef7203240b3aba577bb1ff6765fe15225976 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Mar 2010 13:30:21 +0000 Subject: [PATCH 31/38] netfilter: ctnetlink: fix reliable event delivery if message building fails This patch fixes a bug that allows to lose events when reliable event delivery mode is used, ie. if NETLINK_BROADCAST_SEND_ERROR and NETLINK_RECV_NO_ENOBUFS socket options are set. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 2 +- net/netfilter/nf_conntrack_netlink.c | 4 +++- net/netfilter/nfnetlink.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 53923868c9bd..361d6b5630ee 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -76,7 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(struct net *net, unsigned int group); extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags); -extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); +extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); extern void nfnl_lock(void); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2b2af631d2b8..569410a85953 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -582,7 +582,9 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(net, 0, group, -ENOBUFS); + if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0) + return -ENOBUFS; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8eb0cc23ada3..6afa3d52ea5f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(net->nfnl, pid, group, error); + return netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); From 101545f6fef4a0a3ea8daf0b5b880df2c6a92a69 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 15 Mar 2010 14:12:58 -0700 Subject: [PATCH 32/38] Bluetooth: Fix potential bad memory access with sysfs files When creating a high number of Bluetooth sockets (L2CAP, SCO and RFCOMM) it is possible to scribble repeatedly on arbitrary pages of memory. Ensure that the content of these sysfs files is always less than one page. Even if this means truncating. The files in question are scheduled to be moved over to debugfs in the future anyway. Based on initial patches from Neil Brown and Linus Torvalds Reported-by: Neil Brown Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 10 +++++++++- net/bluetooth/rfcomm/core.c | 13 ++++++++++++- net/bluetooth/rfcomm/sock.c | 11 ++++++++++- net/bluetooth/sco.c | 11 ++++++++++- 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 4db7ae2fe07d..27551820741e 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3944,16 +3944,24 @@ static ssize_t l2cap_sysfs_show(struct class *dev, struct sock *sk; struct hlist_node *node; char *str = buf; + int size = PAGE_SIZE; read_lock_bh(&l2cap_sk_list.lock); sk_for_each(sk, node, &l2cap_sk_list.head) { struct l2cap_pinfo *pi = l2cap_pi(sk); + int len; - str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", + len = snprintf(str, size, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state, __le16_to_cpu(pi->psm), pi->scid, pi->dcid, pi->imtu, pi->omtu, pi->sec_level); + + size -= len; + if (size <= 0) + break; + + str += len; } read_unlock_bh(&l2cap_sk_list.lock); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index db8a68e1a5ba..cf164073269d 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2105,6 +2105,7 @@ static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, struct rfcomm_session *s; struct list_head *pp, *p; char *str = buf; + int size = PAGE_SIZE; rfcomm_lock(); @@ -2113,11 +2114,21 @@ static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, list_for_each(pp, &s->dlcs) { struct sock *sk = s->sock->sk; struct rfcomm_dlc *d = list_entry(pp, struct rfcomm_dlc, list); + int len; - str += sprintf(str, "%s %s %ld %d %d %d %d\n", + len = snprintf(str, size, "%s %s %ld %d %d %d %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits); + + size -= len; + if (size <= 0) + break; + + str += len; } + + if (size <= 0) + break; } rfcomm_unlock(); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ca87d6ac6a20..8d0ee0b8a6b6 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1068,13 +1068,22 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, struct sock *sk; struct hlist_node *node; char *str = buf; + int size = PAGE_SIZE; read_lock_bh(&rfcomm_sk_list.lock); sk_for_each(sk, node, &rfcomm_sk_list.head) { - str += sprintf(str, "%s %s %d %d\n", + int len; + + len = snprintf(str, size, "%s %s %d %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state, rfcomm_pi(sk)->channel); + + size -= len; + if (size <= 0) + break; + + str += len; } read_unlock_bh(&rfcomm_sk_list.lock); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f93b939539bc..967a75175c66 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -960,13 +960,22 @@ static ssize_t sco_sysfs_show(struct class *dev, struct sock *sk; struct hlist_node *node; char *str = buf; + int size = PAGE_SIZE; read_lock_bh(&sco_sk_list.lock); sk_for_each(sk, node, &sco_sk_list.head) { - str += sprintf(str, "%s %s %d\n", + int len; + + len = snprintf(str, size, "%s %s %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state); + + size -= len; + if (size <= 0) + break; + + str += len; } read_unlock_bh(&sco_sk_list.lock); From aef7d97cc604309b66f6f45cce02cd734934cd4e Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 21 Mar 2010 05:27:45 +0100 Subject: [PATCH 33/38] Bluetooth: Convert debug files to actually use debugfs instead of sysfs Some of the debug files ended up wrongly in sysfs, because at that point of time, debugfs didn't exist. Convert these files to use debugfs and also seq_file. This patch converts all of these files at once and then removes the exported symbol for the Bluetooth sysfs class. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 2 +- net/bluetooth/hci_sysfs.c | 3 +- net/bluetooth/l2cap.c | 51 +++++++++++++++++------------- net/bluetooth/rfcomm/core.c | 52 +++++++++++++++++-------------- net/bluetooth/rfcomm/sock.c | 47 ++++++++++++++++------------ net/bluetooth/sco.c | 47 +++++++++++++++------------- 6 files changed, 114 insertions(+), 88 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 04a6908e38d2..ff77e8f882f1 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -176,6 +176,6 @@ extern void hci_sock_cleanup(void); extern int bt_sysfs_init(void); extern void bt_sysfs_cleanup(void); -extern struct class *bt_class; +extern struct dentry *bt_debugfs; #endif /* __BLUETOOTH_H */ diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index cafb55b0cea5..05fd125f74fe 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -8,8 +8,7 @@ #include #include -struct class *bt_class = NULL; -EXPORT_SYMBOL_GPL(bt_class); +static struct class *bt_class; struct dentry *bt_debugfs = NULL; EXPORT_SYMBOL_GPL(bt_debugfs); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 27551820741e..43e17f7d7ecd 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -3937,39 +3939,42 @@ drop: return 0; } -static ssize_t l2cap_sysfs_show(struct class *dev, - struct class_attribute *attr, - char *buf) +static int l2cap_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; struct hlist_node *node; - char *str = buf; - int size = PAGE_SIZE; read_lock_bh(&l2cap_sk_list.lock); sk_for_each(sk, node, &l2cap_sk_list.head) { struct l2cap_pinfo *pi = l2cap_pi(sk); - int len; - len = snprintf(str, size, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", - batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), - sk->sk_state, __le16_to_cpu(pi->psm), pi->scid, - pi->dcid, pi->imtu, pi->omtu, pi->sec_level); - - size -= len; - if (size <= 0) - break; - - str += len; + seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", + batostr(&bt_sk(sk)->src), + batostr(&bt_sk(sk)->dst), + sk->sk_state, __le16_to_cpu(pi->psm), + pi->scid, pi->dcid, + pi->imtu, pi->omtu, pi->sec_level); } read_unlock_bh(&l2cap_sk_list.lock); - return str - buf; + return 0; } -static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL); +static int l2cap_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, l2cap_debugfs_show, inode->i_private); +} + +static const struct file_operations l2cap_debugfs_fops = { + .open = l2cap_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *l2cap_debugfs; static const struct proto_ops l2cap_sock_ops = { .family = PF_BLUETOOTH, @@ -4029,8 +4034,12 @@ static int __init l2cap_init(void) goto error; } - if (class_create_file(bt_class, &class_attr_l2cap) < 0) - BT_ERR("Failed to create L2CAP info file"); + if (bt_debugfs) { + l2cap_debugfs = debugfs_create_file("l2cap", 0444, + bt_debugfs, NULL, &l2cap_debugfs_fops); + if (!l2cap_debugfs) + BT_ERR("Failed to create L2CAP debug file"); + } BT_INFO("L2CAP ver %s", VERSION); BT_INFO("L2CAP socket layer initialized"); @@ -4044,7 +4053,7 @@ error: static void __exit l2cap_exit(void) { - class_remove_file(bt_class, &class_attr_l2cap); + debugfs_remove(l2cap_debugfs); if (bt_sock_unregister(BTPROTO_L2CAP) < 0) BT_ERR("L2CAP socket unregistration failed"); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index cf164073269d..13f114e8b0f9 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -2098,14 +2100,10 @@ static struct hci_cb rfcomm_cb = { .security_cfm = rfcomm_security_cfm }; -static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, - struct class_attribute *attr, - char *buf) +static int rfcomm_dlc_debugfs_show(struct seq_file *f, void *x) { struct rfcomm_session *s; struct list_head *pp, *p; - char *str = buf; - int size = PAGE_SIZE; rfcomm_lock(); @@ -2114,29 +2112,33 @@ static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, list_for_each(pp, &s->dlcs) { struct sock *sk = s->sock->sk; struct rfcomm_dlc *d = list_entry(pp, struct rfcomm_dlc, list); - int len; - len = snprintf(str, size, "%s %s %ld %d %d %d %d\n", - batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), - d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits); - - size -= len; - if (size <= 0) - break; - - str += len; + seq_printf(f, "%s %s %ld %d %d %d %d\n", + batostr(&bt_sk(sk)->src), + batostr(&bt_sk(sk)->dst), + d->state, d->dlci, d->mtu, + d->rx_credits, d->tx_credits); } - - if (size <= 0) - break; } rfcomm_unlock(); - return (str - buf); + return 0; } -static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL); +static int rfcomm_dlc_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, rfcomm_dlc_debugfs_show, inode->i_private); +} + +static const struct file_operations rfcomm_dlc_debugfs_fops = { + .open = rfcomm_dlc_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rfcomm_dlc_debugfs; /* ---- Initialization ---- */ static int __init rfcomm_init(void) @@ -2153,8 +2155,12 @@ static int __init rfcomm_init(void) goto unregister; } - if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0) - BT_ERR("Failed to create RFCOMM info file"); + if (bt_debugfs) { + rfcomm_dlc_debugfs = debugfs_create_file("rfcomm_dlc", 0444, + bt_debugfs, NULL, &rfcomm_dlc_debugfs_fops); + if (!rfcomm_dlc_debugfs) + BT_ERR("Failed to create RFCOMM debug file"); + } err = rfcomm_init_ttys(); if (err < 0) @@ -2182,7 +2188,7 @@ unregister: static void __exit rfcomm_exit(void) { - class_remove_file(bt_class, &class_attr_rfcomm_dlc); + debugfs_remove(rfcomm_dlc_debugfs); hci_unregister_cb(&rfcomm_cb); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8d0ee0b8a6b6..7f439765403d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include @@ -1061,37 +1063,38 @@ done: return result; } -static ssize_t rfcomm_sock_sysfs_show(struct class *dev, - struct class_attribute *attr, - char *buf) +static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; struct hlist_node *node; - char *str = buf; - int size = PAGE_SIZE; read_lock_bh(&rfcomm_sk_list.lock); sk_for_each(sk, node, &rfcomm_sk_list.head) { - int len; - - len = snprintf(str, size, "%s %s %d %d\n", - batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), + seq_printf(f, "%s %s %d %d\n", + batostr(&bt_sk(sk)->src), + batostr(&bt_sk(sk)->dst), sk->sk_state, rfcomm_pi(sk)->channel); - - size -= len; - if (size <= 0) - break; - - str += len; } read_unlock_bh(&rfcomm_sk_list.lock); - return (str - buf); + return 0; } -static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL); +static int rfcomm_sock_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, rfcomm_sock_debugfs_show, inode->i_private); +} + +static const struct file_operations rfcomm_sock_debugfs_fops = { + .open = rfcomm_sock_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rfcomm_sock_debugfs; static const struct proto_ops rfcomm_sock_ops = { .family = PF_BLUETOOTH, @@ -1131,8 +1134,12 @@ int __init rfcomm_init_sockets(void) if (err < 0) goto error; - if (class_create_file(bt_class, &class_attr_rfcomm) < 0) - BT_ERR("Failed to create RFCOMM info file"); + if (bt_debugfs) { + rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444, + bt_debugfs, NULL, &rfcomm_sock_debugfs_fops); + if (!rfcomm_sock_debugfs) + BT_ERR("Failed to create RFCOMM debug file"); + } BT_INFO("RFCOMM socket layer initialized"); @@ -1146,7 +1153,7 @@ error: void rfcomm_cleanup_sockets(void) { - class_remove_file(bt_class, &class_attr_rfcomm); + debugfs_remove(rfcomm_sock_debugfs); if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) BT_ERR("RFCOMM socket layer unregistration failed"); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 967a75175c66..e5b16b76b22e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include #include @@ -953,37 +955,36 @@ drop: return 0; } -static ssize_t sco_sysfs_show(struct class *dev, - struct class_attribute *attr, - char *buf) +static int sco_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; struct hlist_node *node; - char *str = buf; - int size = PAGE_SIZE; read_lock_bh(&sco_sk_list.lock); sk_for_each(sk, node, &sco_sk_list.head) { - int len; - - len = snprintf(str, size, "%s %s %d\n", - batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), - sk->sk_state); - - size -= len; - if (size <= 0) - break; - - str += len; + seq_printf(f, "%s %s %d\n", batostr(&bt_sk(sk)->src), + batostr(&bt_sk(sk)->dst), sk->sk_state); } read_unlock_bh(&sco_sk_list.lock); - return (str - buf); + return 0; } -static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL); +static int sco_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, sco_debugfs_show, inode->i_private); +} + +static const struct file_operations sco_debugfs_fops = { + .open = sco_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *sco_debugfs; static const struct proto_ops sco_sock_ops = { .family = PF_BLUETOOTH, @@ -1041,8 +1042,12 @@ static int __init sco_init(void) goto error; } - if (class_create_file(bt_class, &class_attr_sco) < 0) - BT_ERR("Failed to create SCO info file"); + if (bt_debugfs) { + sco_debugfs = debugfs_create_file("sco", 0444, + bt_debugfs, NULL, &sco_debugfs_fops); + if (!sco_debugfs) + BT_ERR("Failed to create SCO debug file"); + } BT_INFO("SCO (Voice Link) ver %s", VERSION); BT_INFO("SCO socket layer initialized"); @@ -1056,7 +1061,7 @@ error: static void __exit sco_exit(void) { - class_remove_file(bt_class, &class_attr_sco); + debugfs_remove(sco_debugfs); if (bt_sock_unregister(BTPROTO_SCO) < 0) BT_ERR("SCO socket unregistration failed"); From c2c77ec83bdad17fb688557b5b3fdc36661dd1c6 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 19 Mar 2010 10:26:28 +0200 Subject: [PATCH 34/38] Bluetooth: Fix kernel crash on L2CAP stress tests Added very simple check that req buffer has enough space to fit configuration parameters. Shall be enough to reject packets with configuration size more than req buffer. Crash trace below [ 6069.659393] Unable to handle kernel paging request at virtual address 02000205 [ 6069.673034] Internal error: Oops: 805 [#1] PREEMPT ... [ 6069.727172] PC is at l2cap_add_conf_opt+0x70/0xf0 [l2cap] [ 6069.732604] LR is at l2cap_recv_frame+0x1350/0x2e78 [l2cap] ... [ 6070.030303] Backtrace: [ 6070.032806] [] (l2cap_add_conf_opt+0x0/0xf0 [l2cap]) from [] (l2cap_recv_frame+0x1350/0x2e78 [l2cap]) [ 6070.043823] r8:dc5d3100 r7:df2a91d6 r6:00000001 r5:df2a8000 r4:00000200 [ 6070.050659] [] (l2cap_recv_frame+0x0/0x2e78 [l2cap]) from [] (l2cap_recv_acldata+0x2bc/0x350 [l2cap]) [ 6070.061798] [] (l2cap_recv_acldata+0x0/0x350 [l2cap]) from [] (hci_rx_task+0x244/0x478 [bluetooth]) [ 6070.072631] r6:dc647700 r5:00000001 r4:df2ab740 [ 6070.077362] [] (hci_rx_task+0x0/0x478 [bluetooth]) from [] (tasklet_action+0x78/0xd8) [ 6070.087005] [] (tasklet_action+0x0/0xd8) from [] Signed-off-by: Andrei Emeltchenko Acked-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 43e17f7d7ecd..7794a2e2adce 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -2832,6 +2832,11 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr int len = cmd->len - sizeof(*rsp); char req[64]; + if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { + l2cap_send_disconn_req(conn, sk); + goto done; + } + /* throw out any old stored conf requests */ result = L2CAP_CONF_SUCCESS; len = l2cap_parse_conf_rsp(sk, rsp->data, From 634a4b2038a6eba4c211fb906fa2f6ec9a4bbfc7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 21 Mar 2010 18:01:05 -0700 Subject: [PATCH 35/38] net: suppress lockdep-RCU false positive in FIB trie. Allow fib_find_node() to be called either under rcu_read_lock() protection or with RTNL held. Signed-off-by: Paul E. McKenney Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index af5d89792860..01ef8ba9025c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -961,7 +961,9 @@ fib_find_node(struct trie *t, u32 key) struct node *n; pos = 0; - n = rcu_dereference(t->trie); + n = rcu_dereference_check(t->trie, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); while (n != NULL && NODE_TYPE(n) == T_TNODE) { tn = (struct tnode *) n; From 5e016cbf6cffd4a53b7922e0c91b775399d7fe47 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 21 Mar 2010 20:55:13 -0700 Subject: [PATCH 36/38] ipv4: Don't drop redirected route cache entry unless PTMU actually expired TCP sessions over IPv4 can get stuck if routers between endpoints do not fragment packets but implement PMTU instead, and we are using those routers because of an ICMP redirect. Setup is as follows MTU1 MTU2 MTU1 A--------B------C------D with MTU1 > MTU2. A and D are endpoints, B and C are routers. B and C implement PMTU and drop packets larger than MTU2 (for example because DF is set on all packets). TCP sessions are initiated between A and D. There is packet loss between A and D, causing frequent TCP retransmits. After the number of retransmits on a TCP session reaches tcp_retries1, tcp calls dst_negative_advice() prior to each retransmit. This results in route cache entries for the peer to be deleted in ipv4_negative_advice() if the Path MTU is set. If the outstanding data on an affected TCP session is larger than MTU2, packets sent from the endpoints will be dropped by B or C, and ICMP NEEDFRAG will be returned. A and D receive NEEDFRAG messages and update PMTU. Before the next retransmit, tcp will again call dst_negative_advice(), causing the route cache entry (with correct PMTU) to be deleted. The retransmitted packet will be larger than MTU2, causing it to be dropped again. This sequence repeats until the TCP session aborts or is terminated. Problem is fixed by removing redirected route cache entries in ipv4_negative_advice() only if the PMTU is expired. Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 32d396196df8..54fd68c14c87 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1510,7 +1510,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || - rt->u.dst.expires) { + (rt->u.dst.expires && + time_after_eq(jiffies, rt->u.dst.expires))) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, rt->fl.oif, rt_genid(dev_net(dst->dev))); From 9bf35c8dddd56f7f247a27346f74f5adc18071f4 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Sun, 21 Mar 2010 21:19:02 -0700 Subject: [PATCH 37/38] if_tunnel.h: add missing ams/byteorder.h include When compiling userspace application which includes if_tunnel.h and uses GRE_* defines you will get undefined reference to __cpu_to_be16. Fix this by adding missing #include Cc: stable@kernel.org Signed-off-by: Paulius Zaleckas Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 1822d635be6b..16b92d008bed 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -2,6 +2,7 @@ #define _IF_TUNNEL_H_ #include +#include #ifdef __KERNEL__ #include From 243aad830e8a4cdda261626fbaeddde16b08d04a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Sat, 20 Mar 2010 02:27:58 +0000 Subject: [PATCH 38/38] ip_gre: include route header_len in max_headroom calculation Taking route's header_len into account, and updating gre device needed_headroom will give better hints on upper bound of required headroom. This is useful if the gre traffic is xfrm'ed. Signed-off-by: Timo Teras Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f47c9f76754b..f78402d097b3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -810,11 +810,13 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tunnel->err_count = 0; } - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; if (!new_skb) { ip_rt_put(rt); txq->tx_dropped++;