From 1c37a72c1bd0b83be8b95cff7f1bc9b1f32bd433 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 3 Mar 2014 13:37:14 +0200 Subject: [PATCH 01/70] mac80211: consider virtual mon when calculating min_def When calculating the current max bw required for a channel context, we didn't consider the virtual monitor interface, resulting in its channel context being narrower than configured. This broke monitor mode with iwlmvm, which uses the minimal width. Reported-by: Ido Yariv Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f43613a97dd6..0c1ecfdf9a12 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -100,6 +100,12 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, } max_bw = max(max_bw, width); } + + /* use the configured bandwidth in case of monitor interface */ + sdata = rcu_dereference(local->monitor_sdata); + if (sdata && rcu_access_pointer(sdata->vif.chanctx_conf) == conf) + max_bw = max(max_bw, conf->def.width); + rcu_read_unlock(); return max_bw; From bc00a91d627026f08abf69bf5d0015499dd30c2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Mar 2014 13:55:54 +0100 Subject: [PATCH 02/70] cfg80211: remove racy beacon_interval assignment In case of AP mode, the beacon interval is already reset to zero inside cfg80211_stop_ap(), and in the other modes it isn't relevant. Remove the assignment to remove a potential race since the assignment isn't properly locked. Reported-by: Michal Kazior Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 010892b81a06..a3bf18d11609 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -788,8 +788,6 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, default: break; } - - wdev->beacon_interval = 0; } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, From 1e9291996c4eedf79883f47ec635235e39d3d6cd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 4 Mar 2014 10:28:23 +0200 Subject: [PATCH 03/70] iwlwifi: mvm: don't WARN when statistics are handled late Since the statistics handler is asynchrous, it can very well be that we will handle the statistics (hence the RSSI fluctuation) when we already disassociated. Don't WARN on this case. This solves: https://bugzilla.redhat.com/show_bug.cgi?id=1071998 Cc: [3.10+] Fixes: 2b76ef13086f ("iwlwifi: mvm: implement reduced Tx power") Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/bt-coex.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c index 76cde6ce6551..18a895a949d4 100644 --- a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c @@ -872,8 +872,11 @@ void iwl_mvm_bt_rssi_event(struct iwl_mvm *mvm, struct ieee80211_vif *vif, lockdep_assert_held(&mvm->mutex); - /* Rssi update while not associated ?! */ - if (WARN_ON_ONCE(mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT)) + /* + * Rssi update while not associated - can happen since the statistics + * are handled asynchronously + */ + if (mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT) return; /* No BT - reports should be disabled */ From acfcd9ed588465e48efe5ceb67177096d51c95a9 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Tue, 4 Mar 2014 14:47:39 +0200 Subject: [PATCH 04/70] iwlwifi: fix and add 7265 series HW IDs Update of the HW IDs for the 7265 series. Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/drv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index f47bcbe2945a..3872ead75488 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -359,13 +359,12 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { /* 7265 Series */ {IWL_PCI_DEVICE(0x095A, 0x5010, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5110, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x5112, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5100, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x510A, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5310, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_n_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5210, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5012, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5412, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5410, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5400, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x1010, iwl7265_2ac_cfg)}, From b6251243ac0a3cb63afe748133f6600f77526078 Mon Sep 17 00:00:00 2001 From: Ivaylo Dimitrov Date: Sat, 1 Mar 2014 14:52:06 +0200 Subject: [PATCH 05/70] wl1251: use skb_trim to make skb shorter the current code is directly setting skb->len, which is not correct and brings problems with HAVE_EFFICIENT_UNALIGNED_ACCESS enabled in config Signed-off-by: Ivaylo Dimitrov Signed-off-by: John W. Linville --- drivers/net/wireless/ti/wl1251/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wl1251/rx.c b/drivers/net/wireless/ti/wl1251/rx.c index 123c4bb50e0a..cde0eaf99714 100644 --- a/drivers/net/wireless/ti/wl1251/rx.c +++ b/drivers/net/wireless/ti/wl1251/rx.c @@ -180,7 +180,7 @@ static void wl1251_rx_body(struct wl1251 *wl, wl1251_mem_read(wl, rx_packet_ring_addr, rx_buffer, length); /* The actual length doesn't include the target's alignment */ - skb->len = desc->length - PLCP_HEADER_LENGTH; + skb_trim(skb, desc->length - PLCP_HEADER_LENGTH); fc = (u16 *)skb->data; From 864a6040f395464003af8dd0d8ca86fed19866d4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Mar 2014 13:46:53 +0100 Subject: [PATCH 06/70] mac80211: clear sequence/fragment number in QoS-null frames Avoid leaking data by sending uninitialized memory and setting an invalid (non-zero) fragment number (the sequence number is ignored anyway) by setting the seq_ctrl field to zero. Cc: stable@vger.kernel.org Fixes: 3f52b7e328c5 ("mac80211: mesh power save basics") Fixes: ce662b44ce22 ("mac80211: send (QoS) Null if no buffered frames") Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/mesh_ps.c | 1 + net/mac80211/sta_info.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 2802f9d9279d..ad8b377b4b9f 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) sdata->vif.addr); nullfunc->frame_control = fc; nullfunc->duration_id = 0; + nullfunc->seq_ctrl = 0; /* no address resolution for this frame -> set addr 1 immediately */ memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a023b432143b..137a192e64bc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1206,6 +1206,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); From 24b9bf43e93e0edd89072da51cf1fab95fc69dec Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 3 Mar 2014 23:19:18 +0100 Subject: [PATCH 07/70] net: fix for a race condition in the inet frag code I stumbled upon this very serious bug while hunting for another one, it's a very subtle race condition between inet_frag_evictor, inet_frag_intern and the IPv4/6 frag_queue and expire functions (basically the users of inet_frag_kill/inet_frag_put). What happens is that after a fragment has been added to the hash chain but before it's been added to the lru_list (inet_frag_lru_add) in inet_frag_intern, it may get deleted (either by an expired timer if the system load is high or the timer sufficiently low, or by the fraq_queue function for different reasons) before it's added to the lru_list, then after it gets added it's a matter of time for the evictor to get to a piece of memory which has been freed leading to a number of different bugs depending on what's left there. I've been able to trigger this on both IPv4 and IPv6 (which is normal as the frag code is the same), but it's been much more difficult to trigger on IPv4 due to the protocol differences about how fragments are treated. The setup I used to reproduce this is: 2 machines with 4 x 10G bonded in a RR bond, so the same flow can be seen on multiple cards at the same time. Then I used multiple instances of ping/ping6 to generate fragmented packets and flood the machines with them while running other processes to load the attacked machine. *It is very important to have the _same flow_ coming in on multiple CPUs concurrently. Usually the attacked machine would die in less than 30 minutes, if configured properly to have many evictor calls and timeouts it could happen in 10 minutes or so. An important point to make is that any caller (frag_queue or timer) of inet_frag_kill will remove both the timer refcount and the original/guarding refcount thus removing everything that's keeping the frag from being freed at the next inet_frag_put. All of this could happen before the frag was ever added to the LRU list, then it gets added and the evictor uses a freed fragment. An example for IPv6 would be if a fragment is being added and is at the stage of being inserted in the hash after the hash lock is released, but before inet_frag_lru_add executes (or is able to obtain the lru lock) another overlapping fragment for the same flow arrives at a different CPU which finds it in the hash, but since it's overlapping it drops it invoking inet_frag_kill and thus removing all guarding refcounts, and afterwards freeing it by invoking inet_frag_put which removes the last refcount added previously by inet_frag_find, then inet_frag_lru_add gets executed by inet_frag_intern and we have a freed fragment in the lru_list. The fix is simple, just move the lru_add under the hash chain locked region so when a removing function is called it'll have to wait for the fragment to be added to the lru_list, and then it'll remove it (it works because the hash chain removal is done before the lru_list one and there's no window between the two list adds when the frag can get dropped). With this fix applied I couldn't kill the same machine in 24 hours with the same setup. Fixes: 3ef0eb0db4bf ("net: frag, move LRU list maintenance outside of rwlock") CC: Florian Westphal CC: Jesper Dangaard Brouer CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index bb075fc9a14f..322dcebfc588 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); + inet_frag_lru_add(nf, qp); spin_unlock(&hb->chain_lock); read_unlock(&f->lock); - inet_frag_lru_add(nf, qp); + return qp; } From 6565b9eeef194afbb3beec80d6dd2447f4091f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 4 Mar 2014 03:57:35 +0100 Subject: [PATCH 08/70] bridge: multicast: add sanity check for query source addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MLD queries are supposed to have an IPv6 link-local source address according to RFC2710, section 4 and RFC3810, section 5.1.14. This patch adds a sanity check to ignore such broken MLD queries. Without this check, such malformed MLD queries can result in a denial of service: The queries are ignored by any MLD listener therefore they will not respond with an MLD report. However, without this patch these malformed MLD queries would enable the snooping part in the bridge code, potentially shutting down the according ports towards these hosts for multicast traffic as the bridge did not learn about these listeners. Reported-by: Jan Stancek Signed-off-by: Linus Lüssing Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ef66365b7354..fb0e36fac668 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1235,6 +1235,12 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { + err = -EINVAL; + goto out; + } + if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; From 367d56f7b4d5ce61e883c64f81786c7a3ae88eea Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Mar 2014 15:35:20 +0800 Subject: [PATCH 09/70] net/mlx4: Support shutdown() interface In kexec scenario, we failed to load the mlx4 driver in the second kernel because the ownership bit was hold by the first kernel without release correctly. The patch adds shutdown() interface so that the ownership can be released correctly in the first kernel. It also helps avoiding EEH error happened during boot stage of the second kernel because of undesired traffic, which can't be handled by hardware during that stage on Power platform. Signed-off-by: Gavin Shan Tested-by: Wei Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d711158b0d4b..5a6105f1ba6d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2684,6 +2684,7 @@ static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, + .shutdown = mlx4_remove_one, .remove = mlx4_remove_one, .err_handler = &mlx4_err_handler, }; From 10c3271712f58215f4d336a1e30aa25be09cd5d1 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 4 Mar 2014 20:47:48 +0800 Subject: [PATCH 10/70] r8152: disable the ECM mode There are known issues for switching the drivers between ECM mode and vendor mode. The interrup transfer may become abnormal. The hardware may have the opportunity to die if you change the configuration without unloading the current driver first, because all the control transfers of the current driver would fail after the command of switching the configuration. Although to use the ecm driver and vendor driver independently is fine, it may have problems to change the driver from one to the other by switching the configuration. Additionally, now the vendor mode driver is more powerful than the ECM driver. Thus, disable the ECM mode driver, and let r8152 to set the configuration to vendor mode and reset the device automatically. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/Makefile | 2 +- drivers/net/usb/cdc_ether.c | 7 + drivers/net/usb/r8152.c | 15 ++- drivers/net/usb/r815x.c | 248 ------------------------------------ 4 files changed, 17 insertions(+), 255 deletions(-) delete mode 100644 drivers/net/usb/r815x.c diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index 433f0a00c683..e2797f1e1b31 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_USB_HSO) += hso.o obj-$(CONFIG_USB_NET_AX8817X) += asix.o asix-y := asix_devices.o asix_common.o ax88172a.o obj-$(CONFIG_USB_NET_AX88179_178A) += ax88179_178a.o -obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o r815x.o +obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o obj-$(CONFIG_USB_NET_CDC_EEM) += cdc_eem.o obj-$(CONFIG_USB_NET_DM9601) += dm9601.o obj-$(CONFIG_USB_NET_SR9700) += sr9700.o diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 42e176912c8e..bd363b27e854 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -652,6 +652,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Samsung USB Ethernet Adapters */ +{ + USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, 0xa101, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d89dbe395ad2..adb12f349a61 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -449,9 +449,6 @@ enum rtl8152_flags { #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 -#define REALTEK_USB_DEVICE(vend, prod) \ - USB_DEVICE_INTERFACE_CLASS(vend, prod, USB_CLASS_VENDOR_SPEC) - struct rx_desc { __le32 opts1; #define RX_LEN_MASK 0x7fff @@ -2739,6 +2736,12 @@ static int rtl8152_probe(struct usb_interface *intf, struct net_device *netdev; int ret; + if (udev->actconfig->desc.bConfigurationValue != 1) { + usb_driver_set_configuration(udev, 1); + return -ENODEV; + } + + usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { dev_err(&intf->dev, "Out of memory\n"); @@ -2819,9 +2822,9 @@ static void rtl8152_disconnect(struct usb_interface *intf) /* table of devices that work with this driver */ static struct usb_device_id rtl8152_table[] = { - {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8152)}, - {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8153)}, - {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, PRODUCT_ID_SAMSUNG)}, + {USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8152)}, + {USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8153)}, + {USB_DEVICE(VENDOR_ID_SAMSUNG, PRODUCT_ID_SAMSUNG)}, {} }; diff --git a/drivers/net/usb/r815x.c b/drivers/net/usb/r815x.c deleted file mode 100644 index f0a8791b7636..000000000000 --- a/drivers/net/usb/r815x.c +++ /dev/null @@ -1,248 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define RTL815x_REQT_READ 0xc0 -#define RTL815x_REQT_WRITE 0x40 -#define RTL815x_REQ_GET_REGS 0x05 -#define RTL815x_REQ_SET_REGS 0x05 - -#define MCU_TYPE_PLA 0x0100 -#define OCP_BASE 0xe86c -#define BASE_MII 0xa400 - -#define BYTE_EN_DWORD 0xff -#define BYTE_EN_WORD 0x33 -#define BYTE_EN_BYTE 0x11 - -#define R815x_PHY_ID 32 -#define REALTEK_VENDOR_ID 0x0bda - - -static int pla_read_word(struct usb_device *udev, u16 index) -{ - int ret; - u8 shift = index & 2; - __le32 *tmp; - - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - index &= ~3; - - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL815x_REQ_GET_REGS, RTL815x_REQT_READ, - index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); - if (ret < 0) - goto out2; - - ret = __le32_to_cpu(*tmp); - ret >>= (shift * 8); - ret &= 0xffff; - -out2: - kfree(tmp); - return ret; -} - -static int pla_write_word(struct usb_device *udev, u16 index, u32 data) -{ - __le32 *tmp; - u32 mask = 0xffff; - u16 byen = BYTE_EN_WORD; - u8 shift = index & 2; - int ret; - - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - data &= mask; - - if (shift) { - byen <<= shift; - mask <<= (shift * 8); - data <<= (shift * 8); - index &= ~3; - } - - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL815x_REQ_GET_REGS, RTL815x_REQT_READ, - index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); - if (ret < 0) - goto out3; - - data |= __le32_to_cpu(*tmp) & ~mask; - *tmp = __cpu_to_le32(data); - - ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), - RTL815x_REQ_SET_REGS, RTL815x_REQT_WRITE, - index, MCU_TYPE_PLA | byen, tmp, sizeof(*tmp), - 500); - -out3: - kfree(tmp); - return ret; -} - -static int ocp_reg_read(struct usbnet *dev, u16 addr) -{ - u16 ocp_base, ocp_index; - int ret; - - ocp_base = addr & 0xf000; - ret = pla_write_word(dev->udev, OCP_BASE, ocp_base); - if (ret < 0) - goto out; - - ocp_index = (addr & 0x0fff) | 0xb000; - ret = pla_read_word(dev->udev, ocp_index); - -out: - return ret; -} - -static int ocp_reg_write(struct usbnet *dev, u16 addr, u16 data) -{ - u16 ocp_base, ocp_index; - int ret; - - ocp_base = addr & 0xf000; - ret = pla_write_word(dev->udev, OCP_BASE, ocp_base); - if (ret < 0) - goto out1; - - ocp_index = (addr & 0x0fff) | 0xb000; - ret = pla_write_word(dev->udev, ocp_index, data); - -out1: - return ret; -} - -static int r815x_mdio_read(struct net_device *netdev, int phy_id, int reg) -{ - struct usbnet *dev = netdev_priv(netdev); - int ret; - - if (phy_id != R815x_PHY_ID) - return -EINVAL; - - if (usb_autopm_get_interface(dev->intf) < 0) - return -ENODEV; - - ret = ocp_reg_read(dev, BASE_MII + reg * 2); - - usb_autopm_put_interface(dev->intf); - return ret; -} - -static -void r815x_mdio_write(struct net_device *netdev, int phy_id, int reg, int val) -{ - struct usbnet *dev = netdev_priv(netdev); - - if (phy_id != R815x_PHY_ID) - return; - - if (usb_autopm_get_interface(dev->intf) < 0) - return; - - ocp_reg_write(dev, BASE_MII + reg * 2, val); - - usb_autopm_put_interface(dev->intf); -} - -static int r8153_bind(struct usbnet *dev, struct usb_interface *intf) -{ - int status; - - status = usbnet_cdc_bind(dev, intf); - if (status < 0) - return status; - - dev->mii.dev = dev->net; - dev->mii.mdio_read = r815x_mdio_read; - dev->mii.mdio_write = r815x_mdio_write; - dev->mii.phy_id_mask = 0x3f; - dev->mii.reg_num_mask = 0x1f; - dev->mii.phy_id = R815x_PHY_ID; - dev->mii.supports_gmii = 1; - - return status; -} - -static int r8152_bind(struct usbnet *dev, struct usb_interface *intf) -{ - int status; - - status = usbnet_cdc_bind(dev, intf); - if (status < 0) - return status; - - dev->mii.dev = dev->net; - dev->mii.mdio_read = r815x_mdio_read; - dev->mii.mdio_write = r815x_mdio_write; - dev->mii.phy_id_mask = 0x3f; - dev->mii.reg_num_mask = 0x1f; - dev->mii.phy_id = R815x_PHY_ID; - dev->mii.supports_gmii = 0; - - return status; -} - -static const struct driver_info r8152_info = { - .description = "RTL8152 ECM Device", - .flags = FLAG_ETHER | FLAG_POINTTOPOINT, - .bind = r8152_bind, - .unbind = usbnet_cdc_unbind, - .status = usbnet_cdc_status, - .manage_power = usbnet_manage_power, -}; - -static const struct driver_info r8153_info = { - .description = "RTL8153 ECM Device", - .flags = FLAG_ETHER | FLAG_POINTTOPOINT, - .bind = r8153_bind, - .unbind = usbnet_cdc_unbind, - .status = usbnet_cdc_status, - .manage_power = usbnet_manage_power, -}; - -static const struct usb_device_id products[] = { -{ - USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8152, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &r8152_info, -}, - -{ - USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8153, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &r8153_info, -}, - - { }, /* END */ -}; -MODULE_DEVICE_TABLE(usb, products); - -static struct usb_driver r815x_driver = { - .name = "r815x", - .id_table = products, - .probe = usbnet_probe, - .disconnect = usbnet_disconnect, - .suspend = usbnet_suspend, - .resume = usbnet_resume, - .reset_resume = usbnet_resume, - .supports_autosuspend = 1, - .disable_hub_initiated_lpm = 1, -}; - -module_usb_driver(r815x_driver); - -MODULE_AUTHOR("Hayes Wang"); -MODULE_DESCRIPTION("Realtek USB ECM device"); -MODULE_LICENSE("GPL"); From c485658bae87faccd7aed540fd2ca3ab37992310 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 4 Mar 2014 16:35:51 +0100 Subject: [PATCH 11/70] net: sctp: fix skb leakage in COOKIE ECHO path of chunk->auth_chunk While working on ec0223ec48a9 ("net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable"), we noticed that there's a skb memory leakage in the error path. Running the same reproducer as in ec0223ec48a9 and by unconditionally jumping to the error label (to simulate an error condition) in sctp_sf_do_5_1D_ce() receive path lets kmemleak detector bark about the unfreed chunk->auth_chunk skb clone: Unreferenced object 0xffff8800b8f3a000 (size 256): comm "softirq", pid 0, jiffies 4294769856 (age 110.757s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 89 ab 75 5e d4 01 58 13 00 00 00 00 00 00 00 00 ..u^..X......... backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0xc8/0x210 [] skb_clone+0x49/0xb0 [] sctp_endpoint_bh_rcv+0x1d9/0x230 [sctp] [] sctp_inq_push+0x4c/0x70 [sctp] [] sctp_rcv+0x82e/0x9a0 [sctp] [] ip_local_deliver_finish+0xa8/0x210 [] nf_reinject+0xbf/0x180 [] nfqnl_recv_verdict+0x1d2/0x2b0 [nfnetlink_queue] [] nfnetlink_rcv_msg+0x14b/0x250 [nfnetlink] [] netlink_rcv_skb+0xa9/0xc0 [] nfnetlink_rcv+0x23f/0x408 [nfnetlink] [] netlink_unicast+0x168/0x250 [] netlink_sendmsg+0x2e1/0x3f0 [] sock_sendmsg+0x8b/0xc0 [] ___sys_sendmsg+0x369/0x380 What happens is that commit bbd0d59809f9 clones the skb containing the AUTH chunk in sctp_endpoint_bh_rcv() when having the edge case that an endpoint requires COOKIE-ECHO chunks to be authenticated: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- ------------------ AUTH; COOKIE-ECHO ----------------> <-------------------- COOKIE-ACK --------------------- When we enter sctp_sf_do_5_1D_ce() and before we actually get to the point where we process (and subsequently free) a non-NULL chunk->auth_chunk, we could hit the "goto nomem_init" path from an error condition and thus leave the cloned skb around w/o freeing it. The fix is to centrally free such clones in sctp_chunk_destroy() handler that is invoked from sctp_chunk_free() after all refs have dropped; and also move both kfree_skb(chunk->auth_chunk) there, so that chunk->auth_chunk is either NULL (since sctp_chunkify() allocs new chunks through kmem_cache_zalloc()) or non-NULL with a valid skb pointer. chunk->skb and chunk->auth_chunk are the only skbs in the sctp_chunk structure that need to be handeled. While at it, we should use consume_skb() for both. It is the same as dev_kfree_skb() but more appropriately named as we are not a device but a protocol. Also, this effectively replaces the kfree_skb() from both invocations into consume_skb(). Functions are the same only that kfree_skb() assumes that the frame was being dropped after a failure (e.g. for tools like drop monitor), usage of consume_skb() seems more appropriate in function sctp_chunk_destroy() though. Fixes: bbd0d59809f9 ("[SCTP]: Implement the receive and verification of AUTH chunk") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Cc: Neil Horman Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 4 ++-- net/sctp/sm_statefuns.c | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 632090b961c3..3a1767ef3201 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1421,8 +1421,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk) BUG_ON(!list_empty(&chunk->list)); list_del_init(&chunk->transmitted_list); - /* Free the chunk skb data and the SCTP_chunk stub itself. */ - dev_kfree_skb(chunk->skb); + consume_skb(chunk->skb); + consume_skb(chunk->auth_chunk); SCTP_DBG_OBJCNT_DEC(chunk); kmem_cache_free(sctp_chunk_cachep, chunk); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ae65b6b5973a..01e002430c85 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -760,7 +760,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, /* Make sure that we and the peer are AUTH capable */ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { - kfree_skb(chunk->auth_chunk); sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -775,10 +774,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, auth.transport = chunk->transport; ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); - - /* We can now safely free the auth_chunk clone */ - kfree_skb(chunk->auth_chunk); - if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); From 920309086652651c7d59791e5b83e2f10112b293 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Tue, 4 Mar 2014 08:46:39 -0800 Subject: [PATCH 12/70] net: macb: Check DMA mappings for error With CONFIG_DMA_API_DEBUG enabled the following warning is printed: WARNING: CPU: 0 PID: 619 at lib/dma-debug.c:1101 check_unmap+0x758/0x894() macb e000b000.ethernet: DMA-API: device driver failed to check map error[device address=0x000000002d171c02] [size=322 bytes] [mapped as single] Modules linked in: CPU: 0 PID: 619 Comm: udhcpc Not tainted 3.14.0-rc3-xilinx-00219-gd158fc7f36a2 #63 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0xc8) [] (dump_stack) from [] (warn_slowpath_common+0x60/0x84) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x2c/0x3c) [] (warn_slowpath_fmt) from [] (check_unmap+0x758/0x894) [] (check_unmap) from [] (debug_dma_unmap_page+0x64/0x70) [] (debug_dma_unmap_page) from [] (macb_interrupt+0x1f8/0x2dc) [] (macb_interrupt) from [] (handle_irq_event_percpu+0x2c/0x178) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x3c/0x5c) [] (handle_irq_event) from [] (handle_fasteoi_irq+0xb8/0x100) [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x20/0x30) [] (generic_handle_irq) from [] (handle_IRQ+0x64/0x8c) [] (handle_IRQ) from [] (gic_handle_irq+0x3c/0x60) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x78) Exception stack(0xed197f60 to 0xed197fa8) 7f60: 00000134 60000013 bd94362e bd94362e be96b37c 00000014 fffffd72 00000122 7f80: c000ebe4 ed196000 00000000 00000011 c032c0d8 ed197fa8 c0064008 c000ea20 7fa0: 60000013 ffffffff [] (__irq_svc) from [] (ret_fast_syscall+0x0/0x48) ---[ end trace 478f921d0d542d1e ]--- Mapped at: [] debug_dma_map_page+0x48/0x11c [] macb_start_xmit+0x184/0x2a8 [] dev_hard_start_xmit+0x334/0x470 [] sch_direct_xmit+0x78/0x2f8 [] __dev_queue_xmit+0x318/0x708 due to missing checks of the dma mapping. Add the appropriate checks to fix this. Signed-off-by: Soren Brinkmann Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 3190d38e16fb..1f03e2041e86 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -632,11 +632,16 @@ static void gem_rx_refill(struct macb *bp) "Unable to allocate sk_buff\n"); break; } - bp->rx_skbuff[entry] = skb; /* now fill corresponding descriptor entry */ paddr = dma_map_single(&bp->pdev->dev, skb->data, bp->rx_buffer_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&bp->pdev->dev, paddr)) { + dev_kfree_skb(skb); + break; + } + + bp->rx_skbuff[entry] = skb; if (entry == RX_RING_SIZE - 1) paddr |= MACB_BIT(RX_WRAP); @@ -1036,11 +1041,15 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) } entry = macb_tx_ring_wrap(bp->tx_head); - bp->tx_head++; netdev_vdbg(bp->dev, "Allocated ring entry %u\n", entry); mapping = dma_map_single(&bp->pdev->dev, skb->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&bp->pdev->dev, mapping)) { + kfree_skb(skb); + goto unlock; + } + bp->tx_head++; tx_skb = &bp->tx_skb[entry]; tx_skb->skb = skb; tx_skb->mapping = mapping; @@ -1066,6 +1075,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1) netif_stop_queue(dev); +unlock: spin_unlock_irqrestore(&bp->lock, flags); return NETDEV_TX_OK; From 48330e08fa168395b9fd9f369f06cca1df204361 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Tue, 4 Mar 2014 08:46:40 -0800 Subject: [PATCH 13/70] net: macb: DMA-unmap full rx-buffer When allocating RX buffers a fixed size is used, while freeing is based on actually received bytes, resulting in the following kernel warning when CONFIG_DMA_API_DEBUG is enabled: WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1051 check_unmap+0x258/0x894() macb e000b000.ethernet: DMA-API: device driver frees DMA memory with different size [device address=0x000000002d170040] [map size=1536 bytes] [unmap size=60 bytes] Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.0-rc3-xilinx-00220-g49f84081ce4f #65 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0xc8) [] (dump_stack) from [] (warn_slowpath_common+0x60/0x84) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x2c/0x3c) [] (warn_slowpath_fmt) from [] (check_unmap+0x258/0x894) [] (check_unmap) from [] (debug_dma_unmap_page+0x64/0x70) [] (debug_dma_unmap_page) from [] (gem_rx+0x118/0x170) [] (gem_rx) from [] (macb_poll+0x24/0x94) [] (macb_poll) from [] (net_rx_action+0x6c/0x188) [] (net_rx_action) from [] (__do_softirq+0x108/0x280) [] (__do_softirq) from [] (irq_exit+0x84/0xf8) [] (irq_exit) from [] (handle_IRQ+0x68/0x8c) [] (handle_IRQ) from [] (gic_handle_irq+0x3c/0x60) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x78) Exception stack(0xc056df20 to 0xc056df68) df20: 00000001 c0577430 00000000 c0577430 04ce8e0d 00000002 edfce238 00000000 df40: 04e20f78 00000002 c05981f4 00000000 00000008 c056df68 c0064008 c02d7658 df60: 20000013 ffffffff [] (__irq_svc) from [] (cpuidle_enter_state+0x54/0xf8) [] (cpuidle_enter_state) from [] (cpuidle_idle_call+0xe0/0x138) [] (cpuidle_idle_call) from [] (arch_cpu_idle+0x8/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0xbc/0x124) [] (cpu_startup_entry) from [] (start_kernel+0x350/0x3b0) ---[ end trace d5fdc38641bd3a11 ]--- Mapped at: [] debug_dma_map_page+0x48/0x11c [] gem_rx_refill+0x154/0x1f8 [] macb_open+0x270/0x3e0 [] __dev_open+0x7c/0xfc [] __dev_change_flags+0x8c/0x140 Fixing this by passing the same size which is passed during mapping the memory to the unmap function as well. Signed-off-by: Soren Brinkmann Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 1f03e2041e86..d0c38e01e99f 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -730,7 +730,7 @@ static int gem_rx(struct macb *bp, int budget) skb_put(skb, len); addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr)); dma_unmap_single(&bp->pdev->dev, addr, - len, DMA_FROM_DEVICE); + bp->rx_buffer_size, DMA_FROM_DEVICE); skb->protocol = eth_type_trans(skb, bp->dev); skb_checksum_none_assert(skb); From 1b07da516ee25250f458c76c012ebe4cd677a84f Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 4 Mar 2014 14:11:06 -0800 Subject: [PATCH 14/70] hyperv: Move state setting for link query It moves the state setting for query into rndis_filter_receive_response(). All callbacks including query-complete and status-callback are synchronized by channel->inbound_lock. This prevents pentential race between them. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ++++ drivers/net/hyperv/rndis_filter.c | 21 ++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7141a1937360..d6fce9750b95 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -442,6 +442,8 @@ static int netvsc_probe(struct hv_device *dev, if (!net) return -ENOMEM; + netif_carrier_off(net); + net_device_ctx = netdev_priv(net); net_device_ctx->device_ctx = dev; hv_set_drvdata(dev, net); @@ -473,6 +475,8 @@ static int netvsc_probe(struct hv_device *dev, pr_err("Unable to register netdev.\n"); rndis_filter_device_remove(dev); free_netdev(net); + } else { + schedule_delayed_work(&net_device_ctx->dwork, 0); } return ret; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 1084e5de3ceb..b54fd257652b 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -243,6 +243,22 @@ static int rndis_filter_send_request(struct rndis_device *dev, return ret; } +static void rndis_set_link_state(struct rndis_device *rdev, + struct rndis_request *request) +{ + u32 link_status; + struct rndis_query_complete *query_complete; + + query_complete = &request->response_msg.msg.query_complete; + + if (query_complete->status == RNDIS_STATUS_SUCCESS && + query_complete->info_buflen == sizeof(u32)) { + memcpy(&link_status, (void *)((unsigned long)query_complete + + query_complete->info_buf_offset), sizeof(u32)); + rdev->link_state = link_status != 0; + } +} + static void rndis_filter_receive_response(struct rndis_device *dev, struct rndis_message *resp) { @@ -272,6 +288,10 @@ static void rndis_filter_receive_response(struct rndis_device *dev, sizeof(struct rndis_message) + RNDIS_EXT_LEN) { memcpy(&request->response_msg, resp, resp->msg_len); + if (request->request_msg.ndis_msg_type == + RNDIS_MSG_QUERY && request->request_msg.msg. + query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) + rndis_set_link_state(dev, request); } else { netdev_err(ndev, "rndis response buffer overflow " @@ -620,7 +640,6 @@ static int rndis_filter_query_device_link_status(struct rndis_device *dev) ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &link_status, &size); - dev->link_state = (link_status != 0) ? true : false; return ret; } From c99b1861c232e1f641f13b8645e0febb3712cc71 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:13 -0800 Subject: [PATCH 15/70] mwifiex: copy AP's HT capability info correctly While preparing association request, intersection of device's HT capability information and corresponding fields advertised by AP is used. This patch fixes an error while copying this field from AP's beacon. Cc: Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/11n.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mwifiex/11n.c b/drivers/net/wireless/mwifiex/11n.c index 6261f8c53d44..7db1a89fdd95 100644 --- a/drivers/net/wireless/mwifiex/11n.c +++ b/drivers/net/wireless/mwifiex/11n.c @@ -308,8 +308,7 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, ht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_ht_cap)); memcpy((u8 *) ht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *) bss_desc->bcn_ht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_ht_cap, le16_to_cpu(ht_cap->header.len)); mwifiex_fill_cap_info(priv, radio_type, ht_cap); From d51246481c7f28bbfa1f814ded2da65e531cd4b2 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:14 -0800 Subject: [PATCH 16/70] mwifiex: save and copy AP's VHT capability info correctly While preparing association request, intersection of device's VHT capability information and corresponding field advertised by AP is used. This patch fixes a couple errors while saving and copying vht_cap and vht_oper fields from AP's beacon. Cc: # 3.9+ Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/11ac.c | 3 +-- drivers/net/wireless/mwifiex/scan.c | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mwifiex/11ac.c b/drivers/net/wireless/mwifiex/11ac.c index 5e0eec4d71c7..5d9a8084665d 100644 --- a/drivers/net/wireless/mwifiex/11ac.c +++ b/drivers/net/wireless/mwifiex/11ac.c @@ -189,8 +189,7 @@ int mwifiex_cmd_append_11ac_tlv(struct mwifiex_private *priv, vht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_vht_cap)); memcpy((u8 *)vht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *)bss_desc->bcn_vht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_vht_cap, le16_to_cpu(vht_cap->header.len)); mwifiex_fill_vht_cap_tlv(priv, vht_cap, bss_desc->bss_band); diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c index 0a8a26e10f01..668547c2de84 100644 --- a/drivers/net/wireless/mwifiex/scan.c +++ b/drivers/net/wireless/mwifiex/scan.c @@ -2101,12 +2101,12 @@ mwifiex_save_curr_bcn(struct mwifiex_private *priv) curr_bss->ht_info_offset); if (curr_bss->bcn_vht_cap) - curr_bss->bcn_ht_cap = (void *)(curr_bss->beacon_buf + - curr_bss->vht_cap_offset); + curr_bss->bcn_vht_cap = (void *)(curr_bss->beacon_buf + + curr_bss->vht_cap_offset); if (curr_bss->bcn_vht_oper) - curr_bss->bcn_ht_oper = (void *)(curr_bss->beacon_buf + - curr_bss->vht_info_offset); + curr_bss->bcn_vht_oper = (void *)(curr_bss->beacon_buf + + curr_bss->vht_info_offset); if (curr_bss->bcn_bss_co_2040) curr_bss->bcn_bss_co_2040 = From bb5016eac1656506df1a9d6057ce5bec342afbef Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 6 Mar 2014 11:14:30 +0100 Subject: [PATCH 17/70] l2tp: fix manual sequencing (de)activation in L2TPv2 Commit e0d4435f "l2tp: Update PPP-over-L2TP driver to work over L2TPv3" broke the PPPOL2TP_SO_SENDSEQ setsockopt. The L2TP header length was previously computed by pppol2tp_l2t_header_len() before each call to l2tp_xmit_skb(). Now that header length is retrieved from the hdr_len session field, this field must be updated every time the L2TP header format is modified, or l2tp_xmit_skb() won't push the right amount of data for the L2TP header. This patch uses l2tp_session_set_header_len() to adjust hdr_len every time sequencing is (de)activated from userspace (either by the PPPOL2TP_SO_SENDSEQ setsockopt or the L2TP_ATTR_SEND_SEQ netlink attribute). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- net/l2tp/l2tp_core.h | 1 + net/l2tp/l2tp_netlink.c | 4 +++- net/l2tp/l2tp_ppp.c | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 735d0f60c83a..85d9d94c0a3c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -112,7 +112,6 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; -static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) @@ -1863,7 +1862,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ -static void l2tp_session_set_header_len(struct l2tp_session *session, int version) +void l2tp_session_set_header_len(struct l2tp_session *session, int version) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; @@ -1876,6 +1875,7 @@ static void l2tp_session_set_header_len(struct l2tp_session *session, int versio } } +EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 1f01ba3435bc..3f93ccd6ba97 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int length, int (*payload_hook)(struct sk_buff *skb)); int l2tp_session_queue_purge(struct l2tp_session *session); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); +void l2tp_session_set_header_len(struct l2tp_session *session, int version); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 4cfd722e9153..bd7387adea9e 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -578,8 +578,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_RECV_SEQ]) session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); - if (info->attrs[L2TP_ATTR_SEND_SEQ]) + if (info->attrs[L2TP_ATTR_SEND_SEQ]) { session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); + l2tp_session_set_header_len(session, session->tunnel->version); + } if (info->attrs[L2TP_ATTR_LNS_MODE]) session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index be5fadf34739..6bfeaa777135 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1312,6 +1312,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } + l2tp_session_set_header_len(session, session->tunnel->version); l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set send_seq=%d\n", session->name, session->send_seq); From 9e9cb6221aa7cb04765484fe87cc2d1b92edce64 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 6 Mar 2014 11:15:10 +0100 Subject: [PATCH 18/70] l2tp: fix userspace reception on plain L2TP sockets As pppol2tp_recv() never queues up packets to plain L2TP sockets, pppol2tp_recvmsg() never returns data to userspace, thus making the recv*() system calls unusable. Instead of dropping packets when the L2TP socket isn't bound to a PPP channel, this patch adds them to its reception queue. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6bfeaa777135..5990919356a5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -254,12 +254,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int po = pppox_sk(sk); ppp_input(&po->chan, skb); } else { - l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: socket not bound\n", - session->name); + l2tp_dbg(session, PPPOL2TP_MSG_DATA, + "%s: recv %d byte data frame, passing to L2TP socket\n", + session->name, data_len); - /* Not bound. Nothing we can do, so discard. */ - atomic_long_inc(&session->stats.rx_errors); - kfree_skb(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + } } return; From 6d4ebeb4df0176b1973875840a9f7e91394c0685 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:16 +0100 Subject: [PATCH 19/70] tipc: allow connection shutdown callback to be invoked in advance Currently connection shutdown callback function is called when connection instance is released in tipc_conn_kref_release(), and receiving packets and sending packets are running in different threads. Even if connection is closed by the thread of receiving packets, its shutdown callback may not be called immediately as the connection reference count is non-zero at that moment. So, although the connection is shut down by the thread of receiving packets, the thread of sending packets doesn't know it. Before its shutdown callback is invoked to tell the sending thread its connection has been closed, the sending thread may deliver messages by tipc_conn_sendmsg(), this is why the following error information appears: "Sending subscription event failed, no memory" To eliminate it, allow connection shutdown callback function to be called before connection id is removed in tipc_close_conn(), which makes the sending thread know the truth in time that its socket is closed so that it doesn't send message to it. We also remove the "Sending XXX failed..." error reporting for topology and config services. Signed-off-by: Ying Xue Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/config.c | 9 ++------- net/tipc/server.c | 8 +++----- net/tipc/subscr.c | 8 ++------ 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/net/tipc/config.c b/net/tipc/config.c index e74eef2e7490..e6d721692ae0 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -376,7 +376,6 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, struct tipc_cfg_msg_hdr *req_hdr; struct tipc_cfg_msg_hdr *rep_hdr; struct sk_buff *rep_buf; - int ret; /* Validate configuration message header (ignore invalid message) */ req_hdr = (struct tipc_cfg_msg_hdr *)buf; @@ -398,12 +397,8 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); rep_hdr->tcm_len = htonl(rep_buf->len); rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - - ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, - rep_buf->len); - if (ret < 0) - pr_err("Sending cfg reply message failed, no memory\n"); - + tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, + rep_buf->len); kfree_skb(rep_buf); } } diff --git a/net/tipc/server.c b/net/tipc/server.c index 373979789a73..bbaa8f56024a 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -87,7 +87,6 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct tipc_server *s = con->server; if (con->sock) { tipc_sock_release_local(con->sock); @@ -95,10 +94,6 @@ static void tipc_conn_kref_release(struct kref *kref) } tipc_clean_outqueues(con); - - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); - kfree(con); } @@ -181,6 +176,9 @@ static void tipc_close_conn(struct tipc_conn *con) struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 7cb0bd5b1176..a6ce3bbf3eaf 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -96,20 +96,16 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, { struct tipc_subscriber *subscriber = sub->subscriber; struct kvec msg_sect; - int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); sub->evt.found_lower = htohl(found_lower, sub->swap); sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, - msg_sect.iov_base, msg_sect.iov_len); - if (ret < 0) - pr_err("Sending subscription event failed, no memory\n"); + tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base, + msg_sect.iov_len); } /** From 4652edb70e8a7eebbe47fa931940f65522c36e8f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:17 +0100 Subject: [PATCH 20/70] tipc: fix connection refcount leak When tipc_conn_sendmsg() calls tipc_conn_lookup() to query a connection instance, its reference count value is increased if it's found. But subsequently if it's found that the connection is closed, the work of sending message is not queued into its server send workqueue, and the connection reference count is not decreased. This will cause a reference count leak. To reproduce this problem, an application would need to open and closes topology server connections with high intensity. We fix this by immediately decrementing the connection reference count if a send fails due to the connection being closed. Signed-off-by: Ying Xue Acked-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/server.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index bbaa8f56024a..646a930eefbf 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -427,10 +427,12 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(&e->list, &con->outqueue); spin_unlock_bh(&con->outqueue_lock); - if (test_bit(CF_CONNECTED, &con->flags)) + if (test_bit(CF_CONNECTED, &con->flags)) { if (!queue_work(s->send_wq, &con->swork)) conn_put(con); - + } else { + conn_put(con); + } return 0; } From fe8e4649397915cf3b2ab0b695929a27e543967e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:18 +0100 Subject: [PATCH 21/70] tipc: avoid to unnecessary process switch under non-block mode When messages are received via tipc socket under non-block mode, schedule_timeout() is called in tipc_wait_for_rcvmsg(), that is, the process of receiving messages will be scheduled once although timeout value passed to schedule_timeout() is 0. The same issue exists in accept()/wait_for_accept(). To avoid this unnecessary process switch, we only call schedule_timeout() if the timeout value is non-zero. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a4cf274455aa..0ed0eaa62f29 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -997,7 +997,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { if (sock->state == SS_DISCONNECTING) { err = -ENOTCONN; break; @@ -1623,7 +1623,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) for (;;) { prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); From edcc0511b5ee7235282a688cd604e3ae7f9e1fc9 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:19 +0100 Subject: [PATCH 22/70] tipc: drop subscriber connection id invalidation When a topology server subscriber is disconnected, the associated connection id is set to zero. A check vs zero is then done in the subscription timeout function to see if the subscriber have been shut down. This is unnecessary, because all subscription timers will be cancelled when a subscriber terminates. Setting the connection id to zero is actually harmful because id zero is the identity of the topology server listening socket, and can cause a race that leads to this socket being closed instead. Signed-off-by: Erik Hugne Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/subscr.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index a6ce3bbf3eaf..11c9ae00837d 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -149,14 +149,6 @@ static void subscr_timeout(struct tipc_subscription *sub) /* The spin lock per subscriber is used to protect its members */ spin_lock_bh(&subscriber->lock); - /* Validate if the connection related to the subscriber is - * closed (in case subscriber is terminating) - */ - if (subscriber->conid == 0) { - spin_unlock_bh(&subscriber->lock); - return; - } - /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { spin_unlock_bh(&subscriber->lock); @@ -211,9 +203,6 @@ static void subscr_release(struct tipc_subscriber *subscriber) spin_lock_bh(&subscriber->lock); - /* Invalidate subscriber reference */ - subscriber->conid = 0; - /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { From 1bb8dce57f4d15233688c68990852a10eb1cd79f Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:20 +0100 Subject: [PATCH 23/70] tipc: fix memory leak during module removal When the TIPC module is removed, the tasklet handler is disabled before all other subsystems. This will cause lingering publications in the name table because the node_down tasklets responsible to clean up publications from an unreachable node will never run. When the name table is shut down, these publications are detected and an error message is logged: tipc: nametbl_stop(): orphaned hash chain detected This is actually a memory leak, introduced with commit 993b858e37b3120ee76d9957a901cca22312ffaa ("tipc: correct the order of stopping services at rmmod") Instead of just logging an error and leaking memory, we free the orphaned entries during nametable shutdown. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_table.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 48302be175ce..042e8e3cabc0 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -941,17 +941,48 @@ int tipc_nametbl_init(void) return 0; } +/** + * tipc_purge_publications - remove all publications for a given type + * + * tipc_nametbl_lock must be held when calling this function + */ +static void tipc_purge_publications(struct name_seq *seq) +{ + struct publication *publ, *safe; + struct sub_seq *sseq; + struct name_info *info; + + if (!seq->sseqs) { + nameseq_delete_empty(seq); + return; + } + sseq = seq->sseqs; + info = sseq->info; + list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, + publ->ref, publ->key); + } +} + void tipc_nametbl_stop(void) { u32 i; + struct name_seq *seq; + struct hlist_head *seq_head; + struct hlist_node *safe; - /* Verify name table is empty, then release it */ + /* Verify name table is empty and purge any lingering + * publications, then release the name table + */ write_lock_bh(&tipc_nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&table.types[i])) continue; - pr_err("nametbl_stop(): orphaned hash chain detected\n"); - break; + seq_head = &table.types[i]; + hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { + tipc_purge_publications(seq); + } + continue; } kfree(table.types); table.types = NULL; From 2892505ea170094f982516bb38105eac45f274b1 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:21 +0100 Subject: [PATCH 24/70] tipc: don't log disabled tasklet handler errors Failure to schedule a TIPC tasklet with tipc_k_signal because the tasklet handler is disabled is not an error. It means TIPC is currently in the process of shutting down. We remove the error logging in this case. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/handler.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/tipc/handler.c b/net/tipc/handler.c index e4bc8a296744..1fabf160501f 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -58,7 +58,6 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) spin_lock_bh(&qitem_lock); if (!handler_enabled) { - pr_err("Signal request ignored by handler\n"); spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } From e588e2f286ed7da011ed357c24c5b9a554e26595 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Mar 2014 18:06:41 +0100 Subject: [PATCH 25/70] inet: frag: make sure forced eviction removes all frags Quoting Alexander Aring: While fragmentation and unloading of 6lowpan module I got this kernel Oops after few seconds: BUG: unable to handle kernel paging request at f88bbc30 [..] Modules linked in: ipv6 [last unloaded: 6lowpan] Call Trace: [] ? call_timer_fn+0x54/0xb3 [] ? process_timeout+0xa/0xa [] run_timer_softirq+0x140/0x15f Problem is that incomplete frags are still around after unload; when their frag expire timer fires, we get crash. When a netns is removed (also done when unloading module), inet_frag calls the evictor with 'force' argument to purge remaining frags. The evictor loop terminates when accounted memory ('work') drops to 0 or the lru-list becomes empty. However, the mem accounting is done via percpu counters and may not be accurate, i.e. loop may terminate prematurely. Alter evictor to only stop once the lru list is empty when force is requested. Reported-by: Phoebe Buckheister Reported-by: Alexander Aring Tested-by: Alexander Aring Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 322dcebfc588..3b01959bf4bb 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) } work = frag_mem_limit(nf) - nf->low_thresh; - while (work > 0) { + while (work > 0 || force) { spin_lock(&nf->lru_lock); if (list_empty(&nf->lru_list)) { From 4ae6e50c76def306d726a5d2678e88998ad5258e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 4 Mar 2014 17:35:44 -0700 Subject: [PATCH 26/70] phy: fix compiler array bounds warning on settings[] With -Werror=array-bounds, gcc v4.7.x warns that in phy_find_valid(), the settings[] "array subscript is above array bounds", I think because idx is a signed integer and if the caller supplied idx < 0, we pass the guard but still reference out of bounds. Fix this by making idx unsigned here and elsewhere. Signed-off-by: Bjorn Helgaas Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 19c9eca0ef26..76d96b9ebcdb 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -164,9 +164,9 @@ static const struct phy_setting settings[] = { * of that setting. Returns the index of the last setting if * none of the others match. */ -static inline int phy_find_setting(int speed, int duplex) +static inline unsigned int phy_find_setting(int speed, int duplex) { - int idx = 0; + unsigned int idx = 0; while (idx < ARRAY_SIZE(settings) && (settings[idx].speed != speed || settings[idx].duplex != duplex)) @@ -185,7 +185,7 @@ static inline int phy_find_setting(int speed, int duplex) * the mask in features. Returns the index of the last setting * if nothing else matches. */ -static inline int phy_find_valid(int idx, u32 features) +static inline unsigned int phy_find_valid(unsigned int idx, u32 features) { while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) idx++; @@ -204,7 +204,7 @@ static inline int phy_find_valid(int idx, u32 features) static void phy_sanitize_settings(struct phy_device *phydev) { u32 features = phydev->supported; - int idx; + unsigned int idx; /* Sanitize settings based on PHY capabilities */ if ((features & SUPPORTED_Autoneg) == 0) @@ -954,7 +954,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) (phydev->interface == PHY_INTERFACE_MODE_RGMII))) { int eee_lp, eee_cap, eee_adv; u32 lp, cap, adv; - int idx, status; + int status; + unsigned int idx; /* Read phy status to properly get the right settings */ status = phy_read_status(phydev); From adca4767821e54c72d4a2f467af77923f2c87e07 Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Tue, 4 Mar 2014 17:24:10 -0800 Subject: [PATCH 27/70] net: Improve SO_TIMESTAMPING documentation and fix a minor code bug The original documentation was very unclear. The code fix is presumably related to the formerly unclear documentation: SOCK_TIMESTAMPING_RX_SOFTWARE has no effect on __sock_recv_timestamp's behavior, so calling __sock_recv_ts_and_drops from sock_recv_ts_and_drops if only SOCK_TIMESTAMPING_RX_SOFTWARE is set is pointless. This should have no user-observable effect. Signed-off-by: Andy Lutomirski Acked-by: Richard Cochran Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 48 ++++++++++++++--------- include/net/sock.h | 1 - 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 661d3c316a17..048c92b487f6 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -21,26 +21,38 @@ has such a feature). SO_TIMESTAMPING: -Instructs the socket layer which kind of information is wanted. The -parameter is an integer with some of the following bits set. Setting -other bits is an error and doesn't change the current state. +Instructs the socket layer which kind of information should be collected +and/or reported. The parameter is an integer with some of the following +bits set. Setting other bits is an error and doesn't change the current +state. -SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware -SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or - fails, then do it in software -SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp - as generated by the hardware -SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or - fails, then do it in software -SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp -SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to - the system time base -SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in - software +Four of the bits are requests to the stack to try to generate +timestamps. Any combination of them is valid. -SOF_TIMESTAMPING_TX/RX determine how time stamps are generated. -SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the -following control message: +SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamps in hardware +SOF_TIMESTAMPING_TX_SOFTWARE: try to obtain send time stamps in software +SOF_TIMESTAMPING_RX_HARDWARE: try to obtain receive time stamps in hardware +SOF_TIMESTAMPING_RX_SOFTWARE: try to obtain receive time stamps in software + +The other three bits control which timestamps will be reported in a +generated control message. If none of these bits are set or if none of +the set bits correspond to data that is available, then the control +message will not be generated: + +SOF_TIMESTAMPING_SOFTWARE: report systime if available +SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available +SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available + +It is worth noting that timestamps may be collected for reasons other +than being requested by a particular socket with +SOF_TIMESTAMPING_[TR]X_(HARD|SOFT)WARE. For example, most drivers that +can generate hardware receive timestamps ignore +SOF_TIMESTAMPING_RX_HARDWARE. It is still a good idea to set that flag +in case future drivers pay attention. + +If timestamps are reported, they will appear in a control message with +cmsg_level==SOL_SOCKET, cmsg_type==SO_TIMESTAMPING, and a payload like +this: struct scm_timestamping { struct timespec systime; diff --git a/include/net/sock.h b/include/net/sock.h index 5c3f7c3624aa..7c4167bc8266 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2186,7 +2186,6 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, { #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \ (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE)) From 0a13404dd3bf4ea870e3d96270b5a382edca85c0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:29:58 +1100 Subject: [PATCH 28/70] net: unix socket code abuses csum_partial The unix socket code is using the result of csum_partial to hash into a lookup table: unix_hash_fold(csum_partial(sunaddr, len, 0)); csum_partial is only guaranteed to produce something that can be folded into a checksum, as its prototype explains: * returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic The 32bit value should not be used directly. Depending on the alignment, the ppc64 csum_partial will return different 32bit partial checksums that will fold into the same 16bit checksum. This difference causes the following testcase (courtesy of Gustavo) to sometimes fail: #include #include int main() { int fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); int i = 1; setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &i, 4); struct sockaddr addr; addr.sa_family = AF_LOCAL; bind(fd, &addr, 2); listen(fd, 128); struct sockaddr_storage ss; socklen_t sslen = (socklen_t)sizeof(ss); getsockname(fd, (struct sockaddr*)&ss, &sslen); fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); if (connect(fd, (struct sockaddr*)&ss, sslen) == -1){ perror(NULL); return 1; } printf("OK\n"); return 0; } As suggested by davem, fix this by using csum_fold to fold the partial 32bit checksum into a 16bit checksum before using it. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/unix/af_unix.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 29fc8bee9702..ce6ec6c2f4de 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -163,9 +163,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline unsigned int unix_hash_fold(__wsum n) { - unsigned int hash = (__force unsigned int)n; + unsigned int hash = (__force unsigned int)csum_fold(n); - hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } From d746ca9561440685edb62614d1bcbbc27ff50e66 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:51:37 +1100 Subject: [PATCH 29/70] ibmveth: Fix endian issues with MAC addresses The code to load a MAC address into a u64 for passing to the hypervisor via a register is broken on little endian. Create a helper function called ibmveth_encode_mac_addr which does the right thing in both big and little endian. We were storing the MAC address in a long in struct ibmveth_adapter. It's never used so remove it - we don't need another place in the driver where we create endian issues with MAC addresses. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmveth.c | 25 ++++++++++++++++--------- drivers/net/ethernet/ibm/ibmveth.h | 1 - 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 4be971590461..1fc8334fc181 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -522,10 +522,21 @@ retry: return rc; } +static u64 ibmveth_encode_mac_addr(u8 *mac) +{ + int i; + u64 encoded = 0; + + for (i = 0; i < ETH_ALEN; i++) + encoded = (encoded << 8) | mac[i]; + + return encoded; +} + static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); - u64 mac_address = 0; + u64 mac_address; int rxq_entries = 1; unsigned long lpar_rc; int rc; @@ -579,8 +590,7 @@ static int ibmveth_open(struct net_device *netdev) adapter->rx_queue.num_slots = rxq_entries; adapter->rx_queue.toggle = 1; - memcpy(&mac_address, netdev->dev_addr, netdev->addr_len); - mac_address = mac_address >> 16; + mac_address = ibmveth_encode_mac_addr(netdev->dev_addr); rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | adapter->rx_queue.queue_len; @@ -1183,8 +1193,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) /* add the addresses to the filter table */ netdev_for_each_mc_addr(ha, netdev) { /* add the multicast address to the filter table */ - unsigned long mcast_addr = 0; - memcpy(((char *)&mcast_addr)+2, ha->addr, ETH_ALEN); + u64 mcast_addr; + mcast_addr = ibmveth_encode_mac_addr(ha->addr); lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, IbmVethMcastAddFilter, mcast_addr); @@ -1372,9 +1382,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); - adapter->mac_addr = 0; - memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN); - netdev->irq = dev->irq; netdev->netdev_ops = &ibmveth_netdev_ops; netdev->ethtool_ops = &netdev_ethtool_ops; @@ -1383,7 +1390,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; netdev->features |= netdev->hw_features; - memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len); + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 451ba7949e15..1f37499d4398 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -138,7 +138,6 @@ struct ibmveth_adapter { struct napi_struct napi; struct net_device_stats stats; unsigned int mcastFilterSize; - unsigned long mac_addr; void * buffer_list_addr; void * filter_list_addr; dma_addr_t buffer_list_dma; From d2d273ffabd315eecefce21a4391d44b6e156b73 Mon Sep 17 00:00:00 2001 From: Anton Nayshtut Date: Wed, 5 Mar 2014 08:30:08 +0200 Subject: [PATCH 30/70] ipv6: Fix exthdrs offload registration. Without this fix, ipv6_exthdrs_offload_init doesn't register IPPROTO_DSTOPTS offload, but returns 0 (as the IPPROTO_ROUTING registration actually succeeds). This then causes the ipv6_gso_segment to drop IPv6 packets with IPPROTO_DSTOPTS header. The issue detected and the fix verified by running MS HCK Offload LSO test on top of QEMU Windows guests, as this test sends IPv6 packets with IPPROTO_DSTOPTS. Signed-off-by: Anton Nayshtut Signed-off-by: David S. Miller --- net/ipv6/exthdrs_offload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3abfd06..447a7fbd1bb6 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void) int ret; ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) + if (ret) goto out; ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) + if (ret) goto out_rt; out: From 5bd4e4c158ceb4e76ce9ed005c876d59caad8af2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 6 Mar 2014 16:53:11 -0500 Subject: [PATCH 31/70] bonding: correctly handle out of range parameters for lp_interval We didn't correctly check cases where the value for lp_interval is not within the legal range due to a missing table terminator. This would let userspace trigger a kernel panic by specifying a value out of range: echo -1 > /sys/devices/virtual/net/bond0/bonding/lp_interval Introduced by commit 4325b374f84 ("bonding: convert lp_interval to use the new option API"). Acked-by: Nikolay Aleksandrov Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index c37878432717..298c26509095 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -121,6 +121,7 @@ static struct bond_opt_value bond_resend_igmp_tbl[] = { static struct bond_opt_value bond_lp_interval_tbl[] = { { "minval", 1, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", INT_MAX, BOND_VALFLAG_MAX}, + { NULL, -1, 0}, }; static struct bond_option bond_opts[] = { From 57352ef4f5f19969a50d42e84b274287993b576f Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 6 Mar 2014 18:28:16 +0200 Subject: [PATCH 32/70] net/mlx4_core: Fix memory access error in mlx4_QUERY_DEV_CAP_wrapper() Fix a regression introduced by [1]. outbox was accessed instead of outbox->buf. Typo was copy-pasted to [2] and [3]. [1] - cc1ade9 mlx4_core: Disable memory windows for virtual functions [2] - 4de6580 mlx4_core: Add support for steerable IB UD QPs [3] - 7ffdf72 net/mlx4_core: Add basic support for TCP/IP offloads under tunneling Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 91b69ff4b4a2..8726e34cee22 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -859,7 +859,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); /* For guests, disable vxlan tunneling */ - MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN); field &= 0xf7; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); @@ -869,7 +869,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); /* For guests, disable mw type 2 */ - MLX4_GET(bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); @@ -883,7 +883,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, } /* turn off ipoib managed steering for guests */ - MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); field &= ~0x80; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); From 97989356af0ec8b1b1658d804892abb354127330 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 6 Mar 2014 18:28:17 +0200 Subject: [PATCH 33/70] net/mlx4_core: mlx4_init_slave() shouldn't access comm channel before PF is ready Currently, the PF call to pci_enable_sriov from the PF probe function stalls for 10 seconds times the number of VFs probed on the host. This happens because the way for such VFs to determine of the PF initialization finished, is by attempting to issue reset on the comm-channel and get timeout (after 10s). The PF probe function is called from a kenernel workqueue, and therefore during that time, rcu lock is being held and kernel's workqueue is stalled. This blocks other processes that try to use the workqueue or rcu lock. For example, interface renaming which is calling rcu_synchronize is blocked, and timedout by systemd. Changed mlx4_init_slave() to allow VF probed on the host to immediatly detect that the PF is not ready, and return EPROBE_DEFER instantly. Only when the PF finishes the initialization, allow such VFs to access the comm channel. This issue and fix are relevant only for probed VFs on the hypervisor, there is no way to pass this information to a VM until comm channel is ready, so in a VM, if PF is not ready, the first command will be timedout after 10 seconds and return EPROBE_DEFER. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5a6105f1ba6d..30a08a60f059 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -150,6 +150,8 @@ struct mlx4_port_config { struct pci_dev *pdev; }; +static atomic_t pf_loading = ATOMIC_INIT(0); + int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { @@ -1407,6 +1409,11 @@ static int mlx4_init_slave(struct mlx4_dev *dev) u32 slave_read; u32 cmd_channel_ver; + if (atomic_read(&pf_loading)) { + mlx4_warn(dev, "PF is not ready. Deferring probe\n"); + return -EPROBE_DEFER; + } + mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); @@ -2319,7 +2326,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) if (num_vfs) { mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); + + atomic_inc(&pf_loading); err = pci_enable_sriov(pdev, num_vfs); + atomic_dec(&pf_loading); + if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); From c88507fbad8055297c1d1e21e599f46960cbee39 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 6 Mar 2014 17:51:57 +0100 Subject: [PATCH 34/70] ipv6: don't set DST_NOCOUNT for remotely added routes DST_NOCOUNT should only be used if an authorized user adds routes locally. In case of routes which are added on behalf of router advertisments this flag must not get used as it allows an unlimited number of routes getting added remotely. Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11dac21e6586..fba54a407bb2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1513,7 +1513,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; From a8d9bc2e9f5d1c5a25e33cec096d2a1652d3fd52 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 9 Mar 2014 15:45:32 -0800 Subject: [PATCH 35/70] bnx2: Fix shutdown sequence The pci shutdown handler added in: bnx2: Add pci shutdown handler commit 25bfb1dd4ba3b2d9a49ce9d9b0cd7be1840e15ed created a shutdown down sequence without chip reset if the device was never brought up. This can cause the firmware to shutdown the PHY prematurely and cause MMIO read cycles to be unresponsive. On some systems, it may generate NMI in the bnx2's pci shutdown handler. The fix is to tell the firmware not to shutdown the PHY if there was no prior chip reset. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 37 +++++++++++++++++++++++++--- drivers/net/ethernet/broadcom/bnx2.h | 5 ++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index cda25ac45b47..6c9e1c9bdeb8 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2507,6 +2507,7 @@ bnx2_fw_sync(struct bnx2 *bp, u32 msg_data, int ack, int silent) bp->fw_wr_seq++; msg_data |= bp->fw_wr_seq; + bp->fw_last_msg = msg_data; bnx2_shmem_wr(bp, BNX2_DRV_MB, msg_data); @@ -4000,8 +4001,23 @@ bnx2_setup_wol(struct bnx2 *bp) wol_msg = BNX2_DRV_MSG_CODE_SUSPEND_NO_WOL; } - if (!(bp->flags & BNX2_FLAG_NO_WOL)) - bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT3 | wol_msg, 1, 0); + if (!(bp->flags & BNX2_FLAG_NO_WOL)) { + u32 val; + + wol_msg |= BNX2_DRV_MSG_DATA_WAIT3; + if (bp->fw_last_msg || BNX2_CHIP(bp) != BNX2_CHIP_5709) { + bnx2_fw_sync(bp, wol_msg, 1, 0); + return; + } + /* Tell firmware not to power down the PHY yet, otherwise + * the chip will take a long time to respond to MMIO reads. + */ + val = bnx2_shmem_rd(bp, BNX2_PORT_FEATURE); + bnx2_shmem_wr(bp, BNX2_PORT_FEATURE, + val | BNX2_PORT_FEATURE_ASF_ENABLED); + bnx2_fw_sync(bp, wol_msg, 1, 0); + bnx2_shmem_wr(bp, BNX2_PORT_FEATURE, val); + } } @@ -4033,9 +4049,22 @@ bnx2_set_power_state(struct bnx2 *bp, pci_power_t state) if (bp->wol) pci_set_power_state(bp->pdev, PCI_D3hot); - } else { - pci_set_power_state(bp->pdev, PCI_D3hot); + break; + } + if (!bp->fw_last_msg && BNX2_CHIP(bp) == BNX2_CHIP_5709) { + u32 val; + + /* Tell firmware not to power down the PHY yet, + * otherwise the other port may not respond to + * MMIO reads. + */ + val = bnx2_shmem_rd(bp, BNX2_BC_STATE_CONDITION); + val &= ~BNX2_CONDITION_PM_STATE_MASK; + val |= BNX2_CONDITION_PM_STATE_UNPREP; + bnx2_shmem_wr(bp, BNX2_BC_STATE_CONDITION, val); + } + pci_set_power_state(bp->pdev, PCI_D3hot); /* No more memory access after this point until * device is brought back to D0. diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h index f1cf2c44e7ed..e341bc366fa5 100644 --- a/drivers/net/ethernet/broadcom/bnx2.h +++ b/drivers/net/ethernet/broadcom/bnx2.h @@ -6900,6 +6900,7 @@ struct bnx2 { u16 fw_wr_seq; u16 fw_drv_pulse_wr_seq; + u32 fw_last_msg; int rx_max_ring; int rx_ring_size; @@ -7406,6 +7407,10 @@ struct bnx2_rv2p_fw_file { #define BNX2_CONDITION_MFW_RUN_NCSI 0x00006000 #define BNX2_CONDITION_MFW_RUN_NONE 0x0000e000 #define BNX2_CONDITION_MFW_RUN_MASK 0x0000e000 +#define BNX2_CONDITION_PM_STATE_MASK 0x00030000 +#define BNX2_CONDITION_PM_STATE_FULL 0x00030000 +#define BNX2_CONDITION_PM_STATE_PREP 0x00020000 +#define BNX2_CONDITION_PM_STATE_UNPREP 0x00010000 #define BNX2_BC_STATE_DEBUG_CMD 0x1dc #define BNX2_BC_STATE_BC_DBG_CMD_SIGNATURE 0x42440000 From 37314363cd65d19c71bea5f222e5108c93dc3c78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 8 Mar 2014 08:01:19 -0800 Subject: [PATCH 36/70] pkt_sched: move the sanity test in qdisc_list_add() The WARN_ON(root == &noop_qdisc)) added in qdisc_list_add() can trigger in normal conditions when devices are not up. It should be done only right before the list_add_tail() call. Fixes: e57a784d8cae4 ("pkt_sched: set root qdisc before change() in attach_default_qdiscs()") Reported-by: Valdis Kletnieks Tested-by: Mirco Tischler Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_api.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1313145e3b86..a07d55e75698 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -273,11 +273,12 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_list_add(struct Qdisc *q) { - struct Qdisc *root = qdisc_dev(q)->qdisc; + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + struct Qdisc *root = qdisc_dev(q)->qdisc; - WARN_ON_ONCE(root == &noop_qdisc); - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) + WARN_ON_ONCE(root == &noop_qdisc); list_add_tail(&q->list, &root->list); + } } EXPORT_SYMBOL(qdisc_list_add); From bc48bc806442114ea44f61d6b18e02c2bd6236fd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 9 Mar 2014 04:03:22 +0000 Subject: [PATCH 37/70] bna: Replace large udelay() with mdelay() udelay() does not work on some architectures for values above 2000, in particular on ARM: ERROR: "__bad_udelay" [drivers/net/ethernet/brocade/bna/bna.ko] undefined! Reported-by: Vagrant Cascadian Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 1803c3959044..354ae9792bad 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1704,7 +1704,7 @@ bfa_flash_sem_get(void __iomem *bar) while (!bfa_raw_sem_get(bar)) { if (--n <= 0) return BFA_STATUS_BADFLASH; - udelay(10000); + mdelay(10); } return BFA_STATUS_OK; } From 2818fa0fa068bcbc87d6bd9064e3c1f72d6fcc2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Mar 2014 22:57:52 -0800 Subject: [PATCH 38/70] pkt_sched: fq: do not hold qdisc lock while allocating memory Resizing fq hash table allocates memory while holding qdisc spinlock, with BH disabled. This is definitely not good, as allocation might sleep. We can drop the lock and get it when needed, we hold RTNL so no other changes can happen at the same time. Signed-off-by: Eric Dumazet Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 08ef7a42c0e4..21e251766eb1 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -601,6 +601,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *array; + void *old_fq_root; u32 idx; if (q->fq_root && log == q->fq_trees_log) @@ -615,13 +616,19 @@ static int fq_resize(struct Qdisc *sch, u32 log) for (idx = 0; idx < (1U << log); idx++) array[idx] = RB_ROOT; - if (q->fq_root) { - fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); - fq_free(q->fq_root); - } + sch_tree_lock(sch); + + old_fq_root = q->fq_root; + if (old_fq_root) + fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); + q->fq_root = array; q->fq_trees_log = log; + sch_tree_unlock(sch); + + fq_free(old_fq_root); + return 0; } @@ -697,9 +704,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } - if (!err) + if (!err) { + sch_tree_unlock(sch); err = fq_resize(sch, fq_log); - + sch_tree_lock(sch); + } while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); From 5bd076708664313f2bdbbc1cf71093313b7774a1 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 10 Mar 2014 22:58:34 +0800 Subject: [PATCH 39/70] Xen-netback: Fix issue caused by using gso_type wrongly Current netback uses gso_type to check whether the skb contains gso offload, and this is wrong. Gso_size is the right one to check gso existence, and gso_type is only used to check gso type. Some skbs contains nonzero gso_type and zero gso_size, current netback would treat these skbs as gso and create wrong response for this. This also causes ssh failure to domu from other server. V2: use skb_is_gso function as Paul Durrant suggested Signed-off-by: Annie Li Acked-by: Wei Liu Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 39 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index e5284bca2d90..438d0c09b7e6 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -240,7 +240,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; unsigned long bytes; - int gso_type; + int gso_type = XEN_NETIF_GSO_TYPE_NONE; /* Data must not cross a page boundary. */ BUG_ON(size + offset > PAGE_SIZE<gso_type & SKB_GSO_TCPV4) - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - else - gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + } if (*head && ((1 << gso_type) & vif->gso_mask)) vif->rx.req_cons++; @@ -338,19 +338,15 @@ static int xenvif_gop_skb(struct sk_buff *skb, int head = 1; int old_meta_prod; int gso_type; - int gso_size; old_meta_prod = npo->meta_prod; - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - gso_size = skb_shinfo(skb)->gso_size; - } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - gso_size = skb_shinfo(skb)->gso_size; - } else { - gso_type = XEN_NETIF_GSO_TYPE_NONE; - gso_size = 0; + gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; } /* Set up a GSO prefix descriptor, if necessary */ @@ -358,7 +354,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; meta->gso_type = gso_type; - meta->gso_size = gso_size; + meta->gso_size = skb_shinfo(skb)->gso_size; meta->size = 0; meta->id = req->id; } @@ -368,7 +364,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, if ((1 << gso_type) & vif->gso_mask) { meta->gso_type = gso_type; - meta->gso_size = gso_size; + meta->gso_size = skb_shinfo(skb)->gso_size; } else { meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; @@ -500,8 +496,9 @@ static void xenvif_rx_action(struct xenvif *vif) size = skb_frag_size(&skb_shinfo(skb)->frags[i]); max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE); } - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + if (skb_is_gso(skb) && + (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || + skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) max_slots_needed++; /* If the skb may not fit then bail out now */ From dd38743b4cc2f86be250eaf156cf113ba3dd531a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Bostr=C3=B6m?= Date: Mon, 10 Mar 2014 16:17:15 +0100 Subject: [PATCH 40/70] vlan: Set correct source MAC address with TX VLAN offload enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With TX VLAN offload enabled the source MAC address for frames sent using the VLAN interface is currently set to the address of the real interface. This is wrong since the VLAN interface may be configured with a different address. The bug was introduced in commit 2205369a314e12fcec4781cc73ac9c08fc2b47de ("vlan: Fix header ops passthru when doing TX VLAN offload."). This patch sets the source address before calling the create function of the real interface. Signed-off-by: Peter Boström Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index de51c48c4393..4b65aa492fb6 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -538,6 +538,9 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; + if (saddr == NULL) + saddr = dev->dev_addr; + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); } From d25f06ea466ea521b563b76661180b4e44714ae6 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 10 Mar 2014 06:55:55 -0400 Subject: [PATCH 41/70] vmxnet3: fix netpoll race condition vmxnet3's netpoll driver is incorrectly coded. It directly calls vmxnet3_do_poll, which is the driver internal napi poll routine. As the netpoll controller method doesn't block real napi polls in any way, there is a potential for race conditions in which the netpoll controller method and the napi poll method run concurrently. The result is data corruption causing panics such as this one recently observed: PID: 1371 TASK: ffff88023762caa0 CPU: 1 COMMAND: "rs:main Q:Reg" #0 [ffff88023abd5780] machine_kexec at ffffffff81038f3b #1 [ffff88023abd57e0] crash_kexec at ffffffff810c5d92 #2 [ffff88023abd58b0] oops_end at ffffffff8152b570 #3 [ffff88023abd58e0] die at ffffffff81010e0b #4 [ffff88023abd5910] do_trap at ffffffff8152add4 #5 [ffff88023abd5970] do_invalid_op at ffffffff8100cf95 #6 [ffff88023abd5a10] invalid_op at ffffffff8100bf9b [exception RIP: vmxnet3_rq_rx_complete+1968] RIP: ffffffffa00f1e80 RSP: ffff88023abd5ac8 RFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff88023b5dcee0 RCX: 00000000000000c0 RDX: 0000000000000000 RSI: 00000000000005f2 RDI: ffff88023b5dcee0 RBP: ffff88023abd5b48 R8: 0000000000000000 R9: ffff88023a3b6048 R10: 0000000000000000 R11: 0000000000000002 R12: ffff8802398d4cd8 R13: ffff88023af35140 R14: ffff88023b60c890 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff88023abd5b50] vmxnet3_do_poll at ffffffffa00f204a [vmxnet3] #8 [ffff88023abd5b80] vmxnet3_netpoll at ffffffffa00f209c [vmxnet3] #9 [ffff88023abd5ba0] netpoll_poll_dev at ffffffff81472bb7 The fix is to do as other drivers do, and have the poll controller call the top half interrupt handler, which schedules a napi poll properly to recieve frames Tested by myself, successfully. Signed-off-by: Neil Horman CC: Shreyas Bhatewara CC: "VMware, Inc." CC: "David S. Miller" CC: stable@vger.kernel.org Reviewed-by: Shreyas N Bhatewara Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3be786faaaec..b7daa02ff026 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1761,12 +1761,18 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int i; - if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE) - vmxnet3_disable_all_intrs(adapter); - - vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size); - vmxnet3_enable_all_intrs(adapter); + switch (adapter->intr.type) { + case VMXNET3_IT_MSIX: + for (i = 0; i < adapter->num_rx_queues; i++) + vmxnet3_msix_rx(0, &adapter->rx_queue[i]); + break; + case VMXNET3_IT_MSI: + default: + vmxnet3_intr(0, adapter->netdev); + break; + } } #endif /* CONFIG_NET_POLL_CONTROLLER */ From 83bf79b6bb64e686ccf0b66b6828b7bfe9f70ae9 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:31 +0100 Subject: [PATCH 42/70] stmmac: disable at run-time the EEE if not supported This patch is to disable the EEE (so HW and timers) for example when the phy communicates that the EEE can be supported anymore. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 078ad0ec8593..b418f9437bf0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -286,10 +286,25 @@ bool stmmac_eee_init(struct stmmac_priv *priv) /* MAC core supports the EEE feature. */ if (priv->dma_cap.eee) { - /* Check if the PHY supports EEE */ - if (phy_init_eee(priv->phydev, 1)) - goto out; + int tx_lpi_timer = priv->tx_lpi_timer; + /* Check if the PHY supports EEE */ + if (phy_init_eee(priv->phydev, 1)) { + /* To manage at run-time if the EEE cannot be supported + * anymore (for example because the lp caps have been + * changed). + * In that case the driver disable own timers. + */ + if (priv->eee_active) { + pr_debug("stmmac: disable EEE\n"); + del_timer_sync(&priv->eee_ctrl_timer); + priv->hw->mac->set_eee_timer(priv->ioaddr, 0, + tx_lpi_timer); + } + priv->eee_active = 0; + goto out; + } + /* Activate the EEE and start timers */ if (!priv->eee_active) { priv->eee_active = 1; init_timer(&priv->eee_ctrl_timer); @@ -300,13 +315,13 @@ bool stmmac_eee_init(struct stmmac_priv *priv) priv->hw->mac->set_eee_timer(priv->ioaddr, STMMAC_DEFAULT_LIT_LS, - priv->tx_lpi_timer); + tx_lpi_timer); } else /* Set HW EEE according to the speed */ priv->hw->mac->set_eee_pls(priv->ioaddr, priv->phydev->link); - pr_info("stmmac: Energy-Efficient Ethernet initialized\n"); + pr_debug("stmmac: Energy-Efficient Ethernet initialized\n"); ret = true; } From d916701c670701e1ab4d0ed3d4e64d6023bccec9 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:32 +0100 Subject: [PATCH 43/70] stmmac: fix and better tune the default buffer sizes This patch is to fix and tune the default buffer sizes. It reduces the default bufsize used by the driver from 4KiB to 1536 bytes. Patch has been tested on both ARM and SH4 platform based. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b418f9437bf0..536583655643 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -92,8 +92,8 @@ static int tc = TC_DEFAULT; module_param(tc, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(tc, "DMA threshold control value"); -#define DMA_BUFFER_SIZE BUF_SIZE_4KiB -static int buf_sz = DMA_BUFFER_SIZE; +#define DEFAULT_BUFSIZE 1536 +static int buf_sz = DEFAULT_BUFSIZE; module_param(buf_sz, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(buf_sz, "DMA buffer size"); @@ -136,8 +136,8 @@ static void stmmac_verify_args(void) dma_rxsize = DMA_RX_SIZE; if (unlikely(dma_txsize < 0)) dma_txsize = DMA_TX_SIZE; - if (unlikely((buf_sz < DMA_BUFFER_SIZE) || (buf_sz > BUF_SIZE_16KiB))) - buf_sz = DMA_BUFFER_SIZE; + if (unlikely((buf_sz < DEFAULT_BUFSIZE) || (buf_sz > BUF_SIZE_16KiB))) + buf_sz = DEFAULT_BUFSIZE; if (unlikely(flow_ctrl > 1)) flow_ctrl = FLOW_AUTO; else if (likely(flow_ctrl < 0)) @@ -901,10 +901,10 @@ static int stmmac_set_bfsize(int mtu, int bufsize) ret = BUF_SIZE_8KiB; else if (mtu >= BUF_SIZE_2KiB) ret = BUF_SIZE_4KiB; - else if (mtu >= DMA_BUFFER_SIZE) + else if (mtu > DEFAULT_BUFSIZE) ret = BUF_SIZE_2KiB; else - ret = DMA_BUFFER_SIZE; + ret = DEFAULT_BUFSIZE; return ret; } From 29896a674c8ef3d75134dacb7b9cbb3f5b894b6d Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:33 +0100 Subject: [PATCH 44/70] stmmac: fix chained mode This patch is to fix the chain mode that was broken and generated a panic. This patch reviews the chain/ring modes now shaing the same structure and taking care about the pointers and callbacks. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/chain_mode.c | 2 +- drivers/net/ethernet/stmicro/stmmac/common.h | 20 ++----- .../net/ethernet/stmicro/stmmac/ring_mode.c | 9 ++- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 56 ++++++++----------- 4 files changed, 34 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index 72d282bf33a5..c553f6b5a913 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -151,7 +151,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) sizeof(struct dma_desc))); } -const struct stmmac_chain_mode_ops chain_mode_ops = { +const struct stmmac_mode_ops chain_mode_ops = { .init = stmmac_init_dma_chain, .is_jumbo_frm = stmmac_is_jumbo_frm, .jumbo_frm = stmmac_jumbo_frm, diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 7834a3993946..74610f3aca9e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -419,20 +419,13 @@ struct mii_regs { unsigned int data; /* MII Data */ }; -struct stmmac_ring_mode_ops { - unsigned int (*is_jumbo_frm) (int len, int ehn_desc); - unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); - void (*refill_desc3) (void *priv, struct dma_desc *p); - void (*init_desc3) (struct dma_desc *p); - void (*clean_desc3) (void *priv, struct dma_desc *p); - int (*set_16kib_bfsize) (int mtu); -}; - -struct stmmac_chain_mode_ops { +struct stmmac_mode_ops { void (*init) (void *des, dma_addr_t phy_addr, unsigned int size, unsigned int extend_desc); unsigned int (*is_jumbo_frm) (int len, int ehn_desc); unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); + int (*set_16kib_bfsize)(int mtu); + void (*init_desc3)(struct dma_desc *p); void (*refill_desc3) (void *priv, struct dma_desc *p); void (*clean_desc3) (void *priv, struct dma_desc *p); }; @@ -441,8 +434,7 @@ struct mac_device_info { const struct stmmac_ops *mac; const struct stmmac_desc_ops *desc; const struct stmmac_dma_ops *dma; - const struct stmmac_ring_mode_ops *ring; - const struct stmmac_chain_mode_ops *chain; + const struct stmmac_mode_ops *mode; const struct stmmac_hwtimestamp *ptp; struct mii_regs mii; /* MII register Addresses */ struct mac_link link; @@ -460,7 +452,7 @@ void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, void stmmac_set_mac(void __iomem *ioaddr, bool enable); void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr); -extern const struct stmmac_ring_mode_ops ring_mode_ops; -extern const struct stmmac_chain_mode_ops chain_mode_ops; +extern const struct stmmac_mode_ops ring_mode_ops; +extern const struct stmmac_mode_ops chain_mode_ops; #endif /* __COMMON_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index a96c7c2f5f3f..650a4be6bce5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -100,10 +100,9 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) { struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; - if (unlikely(priv->plat->has_gmac)) - /* Fill DES3 in case of RING mode */ - if (priv->dma_buf_sz >= BUF_SIZE_8KiB) - p->des3 = p->des2 + BUF_SIZE_8KiB; + /* Fill DES3 in case of RING mode */ + if (priv->dma_buf_sz >= BUF_SIZE_8KiB) + p->des3 = p->des2 + BUF_SIZE_8KiB; } /* In ring mode we need to fill the desc3 because it is used as buffer */ @@ -126,7 +125,7 @@ static int stmmac_set_16kib_bfsize(int mtu) return ret; } -const struct stmmac_ring_mode_ops ring_mode_ops = { +const struct stmmac_mode_ops ring_mode_ops = { .is_jumbo_frm = stmmac_is_jumbo_frm, .jumbo_frm = stmmac_jumbo_frm, .refill_desc3 = stmmac_refill_desc3, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 536583655643..8543e1cfd55e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -966,9 +966,9 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, p->des2 = priv->rx_skbuff_dma[i]; - if ((priv->mode == STMMAC_RING_MODE) && + if ((priv->hw->mode->init_desc3) && (priv->dma_buf_sz == BUF_SIZE_16KiB)) - priv->hw->ring->init_desc3(p); + priv->hw->mode->init_desc3(p); return 0; } @@ -999,11 +999,8 @@ static int init_dma_desc_rings(struct net_device *dev) unsigned int bfsize = 0; int ret = -ENOMEM; - /* Set the max buffer size according to the DESC mode - * and the MTU. Note that RING mode allows 16KiB bsize. - */ - if (priv->mode == STMMAC_RING_MODE) - bfsize = priv->hw->ring->set_16kib_bfsize(dev->mtu); + if (priv->hw->mode->set_16kib_bfsize) + bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu); if (bfsize < BUF_SIZE_16KiB) bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz); @@ -1044,15 +1041,15 @@ static int init_dma_desc_rings(struct net_device *dev) /* Setup the chained descriptor addresses */ if (priv->mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) { - priv->hw->chain->init(priv->dma_erx, priv->dma_rx_phy, - rxsize, 1); - priv->hw->chain->init(priv->dma_etx, priv->dma_tx_phy, - txsize, 1); + priv->hw->mode->init(priv->dma_erx, priv->dma_rx_phy, + rxsize, 1); + priv->hw->mode->init(priv->dma_etx, priv->dma_tx_phy, + txsize, 1); } else { - priv->hw->chain->init(priv->dma_rx, priv->dma_rx_phy, - rxsize, 0); - priv->hw->chain->init(priv->dma_tx, priv->dma_tx_phy, - txsize, 0); + priv->hw->mode->init(priv->dma_rx, priv->dma_rx_phy, + rxsize, 0); + priv->hw->mode->init(priv->dma_tx, priv->dma_tx_phy, + txsize, 0); } } @@ -1303,7 +1300,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) DMA_TO_DEVICE); priv->tx_skbuff_dma[entry] = 0; } - priv->hw->ring->clean_desc3(priv, p); + priv->hw->mode->clean_desc3(priv, p); if (likely(skb != NULL)) { dev_kfree_skb(skb); @@ -1859,6 +1856,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int nfrags = skb_shinfo(skb)->nr_frags; struct dma_desc *desc, *first; unsigned int nopaged_len = skb_headlen(skb); + unsigned int enh_desc = priv->plat->enh_desc; if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) { if (!netif_queue_stopped(dev)) { @@ -1886,27 +1884,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) first = desc; /* To program the descriptors according to the size of the frame */ - if (priv->mode == STMMAC_RING_MODE) { - is_jumbo = priv->hw->ring->is_jumbo_frm(skb->len, - priv->plat->enh_desc); - if (unlikely(is_jumbo)) - entry = priv->hw->ring->jumbo_frm(priv, skb, - csum_insertion); - } else { - is_jumbo = priv->hw->chain->is_jumbo_frm(skb->len, - priv->plat->enh_desc); - if (unlikely(is_jumbo)) - entry = priv->hw->chain->jumbo_frm(priv, skb, - csum_insertion); - } + if (enh_desc) + is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc); + if (likely(!is_jumbo)) { desc->des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); priv->tx_skbuff_dma[entry] = desc->des2; priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum_insertion, priv->mode); - } else + } else { desc = first; + entry = priv->hw->mode->jumbo_frm(priv, skb, csum_insertion); + } for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -2044,7 +2034,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) p->des2 = priv->rx_skbuff_dma[entry]; - priv->hw->ring->refill_desc3(priv, p); + priv->hw->mode->refill_desc3(priv, p); if (netif_msg_rx_status(priv)) pr_debug("\trefill entry #%d\n", entry); @@ -2648,11 +2638,11 @@ static int stmmac_hw_init(struct stmmac_priv *priv) /* To use the chained or ring mode */ if (chain_mode) { - priv->hw->chain = &chain_mode_ops; + priv->hw->mode = &chain_mode_ops; pr_info(" Chain mode enabled\n"); priv->mode = STMMAC_CHAIN_MODE; } else { - priv->hw->ring = &ring_mode_ops; + priv->hw->mode = &ring_mode_ops; pr_info(" Ring mode enabled\n"); priv->mode = STMMAC_RING_MODE; } From c5e9103dc397114523c65bd24ff0548bcdca54b4 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:34 +0100 Subject: [PATCH 45/70] stmmac: dwmac-sti: fix broken STiD127 compatibility This is to fix the compatibility to the STiD127 SoC. Signed-off-by: Giuseppe Cavallaro Cc: Srinivas Kandagatla Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index c61bc72b8e90..8fb32a80f1c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -36,7 +36,7 @@ static const struct of_device_id stmmac_dt_ids[] = { #ifdef CONFIG_DWMAC_STI { .compatible = "st,stih415-dwmac", .data = &sti_gmac_data}, { .compatible = "st,stih416-dwmac", .data = &sti_gmac_data}, - { .compatible = "st,stih127-dwmac", .data = &sti_gmac_data}, + { .compatible = "st,stid127-dwmac", .data = &sti_gmac_data}, #endif /* SoC specific glue layers should come before generic bindings */ { .compatible = "st,spear600-gmac"}, From 8cb19905e9287a93ce7c2cbbdf742a060b00e219 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:04 +0200 Subject: [PATCH 46/70] skbuff: skb_segment: s/frag/nskb_frag/ frag points at nskb, so name it appropriately Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/skbuff.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5d6236d9fdce..60e8cd717cb3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2876,7 +2876,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) do { struct sk_buff *nskb; - skb_frag_t *frag; + skb_frag_t *nskb_frag; int hsize; int size; @@ -2969,7 +2969,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) continue; } - frag = skb_shinfo(nskb)->frags; + nskb_frag = skb_shinfo(nskb)->frags; skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); @@ -2997,13 +2997,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *frag = *skb_frag; - __skb_frag_ref(frag); - size = skb_frag_size(frag); + *nskb_frag = *skb_frag; + __skb_frag_ref(nskb_frag); + size = skb_frag_size(nskb_frag); if (pos < offset) { - frag->page_offset += offset - pos; - skb_frag_size_sub(frag, offset - pos); + nskb_frag->page_offset += offset - pos; + skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; @@ -3013,11 +3013,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_frag++; pos += size; } else { - skb_frag_size_sub(frag, pos + size - (offset + len)); + skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } - frag++; + nskb_frag++; } skip_fraglist: From 4e1beba12d094c6c761ba5c49032b9b9e46380e8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:14 +0200 Subject: [PATCH 47/70] skbuff: skb_segment: s/skb_frag/frag/ skb_frag can in fact point at either skb or fskb so rename it generally "frag". Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/skbuff.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 60e8cd717cb3..d788a9845762 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2850,7 +2850,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *skb_frag = skb_shinfo(skb)->frags; + skb_frag_t *frag = skb_shinfo(skb)->frags; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2896,19 +2896,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) i = 0; nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + frag = skb_shinfo(fskb)->frags; pos += skb_headlen(fskb); while (pos < offset + len) { BUG_ON(i >= nfrags); - size = skb_frag_size(skb_frag); + size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; - skb_frag++; + frag++; } nskb = skb_clone(fskb, GFP_ATOMIC); @@ -2982,7 +2982,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) i = 0; nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + frag = skb_shinfo(fskb)->frags; BUG_ON(!nfrags); @@ -2997,7 +2997,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *nskb_frag = *skb_frag; + *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); @@ -3010,7 +3010,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (pos + size <= offset + len) { i++; - skb_frag++; + frag++; pos += size; } else { skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); From df5771ffefb13f8af5392bd54fd7e2b596a3a357 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:19 +0200 Subject: [PATCH 48/70] skbuff: skb_segment: s/skb/head_skb/ rename local variable to make it easier to tell at a glance that we are dealing with a head skb. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/skbuff.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d788a9845762..fdc065dc869a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2838,41 +2838,42 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** * skb_segment - Perform protocol segmentation on skb. - * @skb: buffer to segment + * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) +struct sk_buff *skb_segment(struct sk_buff *head_skb, + netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *frag = skb_shinfo(skb)->frags; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int doffset = skb->data - skb_mac_header(skb); + struct sk_buff *fskb = skb_shinfo(head_skb)->frag_list; + skb_frag_t *frag = skb_shinfo(head_skb)->frags; + unsigned int mss = skb_shinfo(head_skb)->gso_size; + unsigned int doffset = head_skb->data - skb_mac_header(head_skb); unsigned int offset = doffset; - unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; unsigned int len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); - int nfrags = skb_shinfo(skb)->nr_frags; + int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; - proto = skb_network_protocol(skb); + proto = skb_network_protocol(head_skb); if (unlikely(!proto)) return ERR_PTR(-EINVAL); csum = !!can_checksum_protocol(features, proto); - __skb_push(skb, doffset); - headroom = skb_headroom(skb); - pos = skb_headlen(skb); + __skb_push(head_skb, doffset); + headroom = skb_headroom(head_skb); + pos = skb_headlen(head_skb); do { struct sk_buff *nskb; @@ -2880,11 +2881,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) int hsize; int size; - len = skb->len - offset; + len = head_skb->len - offset; if (len > mss) len = mss; - hsize = skb_headlen(skb) - offset; + hsize = skb_headlen(head_skb) - offset; if (hsize < 0) hsize = 0; if (hsize > len || !sg) @@ -2933,7 +2934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) __skb_push(nskb, doffset); } else { nskb = __alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC, skb_alloc_rx_flag(skb), + GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) @@ -2949,12 +2950,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) segs = nskb; tail = nskb; - __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; + __copy_skb_header(nskb, head_skb); + nskb->mac_len = head_skb->mac_len; skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); - skb_copy_from_linear_data_offset(skb, -tnl_hlen, + skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); @@ -2963,7 +2964,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (!sg) { nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(skb, offset, + nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); continue; @@ -2971,10 +2972,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) nskb_frag = skb_shinfo(nskb)->frags; - skb_copy_from_linear_data_offset(skb, offset, + skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { @@ -3031,7 +3033,7 @@ perform_csum_check: nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; } - } while ((offset += len) < skb->len); + } while ((offset += len) < head_skb->len); return segs; From 1a4cedaf65491e66e1e55b8428c89209da729209 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 19:27:59 +0200 Subject: [PATCH 49/70] skbuff: skb_segment: s/fskb/list_skb/ fskb is unrelated to frag: it's coming from frag_list. Rename it list_skb to avoid confusion. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/skbuff.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fdc065dc869a..dc4f7683ff52 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2850,7 +2850,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(head_skb)->frag_list; + struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); @@ -2891,14 +2891,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags && skb_headlen(fskb) && - (skb_headlen(fskb) == len || sg)) { - BUG_ON(skb_headlen(fskb) > len); + if (!hsize && i >= nfrags && skb_headlen(list_skb) && + (skb_headlen(list_skb) == len || sg)) { + BUG_ON(skb_headlen(list_skb) > len); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - frag = skb_shinfo(fskb)->frags; - pos += skb_headlen(fskb); + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; + pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); @@ -2912,8 +2912,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, frag++; } - nskb = skb_clone(fskb, GFP_ATOMIC); - fskb = fskb->next; + nskb = skb_clone(list_skb, GFP_ATOMIC); + list_skb = list_skb->next; if (unlikely(!nskb)) goto err; @@ -2980,15 +2980,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(fskb)); + BUG_ON(skb_headlen(list_skb)); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - frag = skb_shinfo(fskb)->frags; + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; BUG_ON(!nfrags); - fskb = fskb->next; + list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= From 1fd819ecb90cc9b822cd84d3056ddba315d3340f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 19:28:08 +0200 Subject: [PATCH 50/70] skbuff: skb_segment: orphan frags before copying skb_segment copies frags around, so we need to copy them carefully to avoid accessing user memory after reporting completion to userspace through a callback. skb_segment doesn't normally happen on datapath: TSO needs to be disabled - so disabling zero copy in this case does not look like a big deal. Signed-off-by: Michael S. Tsirkin Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dc4f7683ff52..869c7afe3b07 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2854,6 +2854,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); + struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; @@ -2898,6 +2899,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; pos += skb_headlen(list_skb); while (pos < offset + len) { @@ -2985,6 +2987,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; BUG_ON(!nfrags); @@ -2999,6 +3002,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, goto err; } + if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) + goto err; + *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); From c3f9b01849ef3bc69024990092b9f42e20df7797 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Mar 2014 09:50:11 -0700 Subject: [PATCH 51/70] tcp: tcp_release_cb() should release socket ownership Lars Persson reported following deadlock : -000 |M:0x0:0x802B6AF8(asm) <-- arch_spin_lock -001 |tcp_v4_rcv(skb = 0x8BD527A0) <-- sk = 0x8BE6B2A0 -002 |ip_local_deliver_finish(skb = 0x8BD527A0) -003 |__netif_receive_skb_core(skb = 0x8BD527A0, ?) -004 |netif_receive_skb(skb = 0x8BD527A0) -005 |elk_poll(napi = 0x8C770500, budget = 64) -006 |net_rx_action(?) -007 |__do_softirq() -008 |do_softirq() -009 |local_bh_enable() -010 |tcp_rcv_established(sk = 0x8BE6B2A0, skb = 0x87D3A9E0, th = 0x814EBE14, ?) -011 |tcp_v4_do_rcv(sk = 0x8BE6B2A0, skb = 0x87D3A9E0) -012 |tcp_delack_timer_handler(sk = 0x8BE6B2A0) -013 |tcp_release_cb(sk = 0x8BE6B2A0) -014 |release_sock(sk = 0x8BE6B2A0) -015 |tcp_sendmsg(?, sk = 0x8BE6B2A0, ?, ?) -016 |sock_sendmsg(sock = 0x8518C4C0, msg = 0x87D8DAA8, size = 4096) -017 |kernel_sendmsg(?, ?, ?, ?, size = 4096) -018 |smb_send_kvec() -019 |smb_send_rqst(server = 0x87C4D400, rqst = 0x87D8DBA0) -020 |cifs_call_async() -021 |cifs_async_writev(wdata = 0x87FD6580) -022 |cifs_writepages(mapping = 0x852096E4, wbc = 0x87D8DC88) -023 |__writeback_single_inode(inode = 0x852095D0, wbc = 0x87D8DC88) -024 |writeback_sb_inodes(sb = 0x87D6D800, wb = 0x87E4A9C0, work = 0x87D8DD88) -025 |__writeback_inodes_wb(wb = 0x87E4A9C0, work = 0x87D8DD88) -026 |wb_writeback(wb = 0x87E4A9C0, work = 0x87D8DD88) -027 |wb_do_writeback(wb = 0x87E4A9C0, force_wait = 0) -028 |bdi_writeback_workfn(work = 0x87E4A9CC) -029 |process_one_work(worker = 0x8B045880, work = 0x87E4A9CC) -030 |worker_thread(__worker = 0x8B045880) -031 |kthread(_create = 0x87CADD90) -032 |ret_from_kernel_thread(asm) Bug occurs because __tcp_checksum_complete_user() enables BH, assuming it is running from softirq context. Lars trace involved a NIC without RX checksum support but other points are problematic as well, like the prequeue stuff. Problem is triggered by a timer, that found socket being owned by user. tcp_release_cb() should call tcp_write_timer_handler() or tcp_delack_timer_handler() in the appropriate context : BH disabled and socket lock held, but 'owned' field cleared, as if they were running from timer handlers. Fixes: 6f458dfb4092 ("tcp: improve latencies of timer triggered events") Reported-by: Lars Persson Tested-by: Lars Persson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ net/core/sock.c | 5 ++++- net/ipv4/tcp_output.c | 11 +++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 7c4167bc8266..b9586a137cad 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1488,6 +1488,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_owned_by_user(sk) ((sk)->sk_lock.owned) +static inline void sock_release_ownership(struct sock *sk) +{ + sk->sk_lock.owned = 0; +} + /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. diff --git a/net/core/sock.c b/net/core/sock.c index 5b6a9431b017..c0fc6bdad1e3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2357,10 +2357,13 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); + /* Warning : release_cb() might need to release sk ownership, + * ie call sock_release_ownership(sk) before us. + */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); - sk->sk_lock.owned = 0; + sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0eb4e337ec8..17a11e65e57f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -767,6 +767,17 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); + /* Here begins the tricky part : + * We are called from release_sock() with : + * 1) BH disabled + * 2) sk_lock.slock spinlock held + * 3) socket owned by us (sk->sk_lock.owned == 1) + * + * But following code is meant to be called from BH handlers, + * so we should keep BH disabled, but early release socket ownership + */ + sock_release_ownership(sk); + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); __sock_put(sk); From 9ed973cc40c588abeaa58aea0683ea665132d11d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 10 Mar 2014 22:25:24 +0100 Subject: [PATCH 52/70] bridge: multicast: add sanity check for general query destination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit General IGMP and MLD queries are supposed to have the multicast link-local all-nodes address as their destination according to RFC2236 section 9, RFC3376 section 4.1.12/9.1, RFC2710 section 8 and RFC3810 section 5.1.15. Without this check, such malformed IGMP/MLD queries can result in a denial of service: The queries are ignored by most IGMP/MLD listeners therefore they will not respond with an IGMP/MLD report. However, without this patch these malformed MLD queries would enable the snooping part in the bridge code, potentially shutting down the according ports towards these hosts for multicast traffic as the bridge did not learn about these listeners. Reported-by: Jan Stancek Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index fb0e36fac668..e56bae4f59ce 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1181,6 +1181,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, max_delay); @@ -1228,6 +1236,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long max_delay; unsigned long now = jiffies; const struct in6_addr *group = NULL; + bool is_general_query; int err = 0; spin_lock(&br->multicast_lock); @@ -1262,6 +1271,16 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); } + is_general_query = group && ipv6_addr_any(group); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip6_querier, !ipv6_addr_any(&ip6h->saddr), max_delay); From 20a599bec95a52fa72432b2376a2ce47c5bb68fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 10 Mar 2014 22:25:25 +0100 Subject: [PATCH 53/70] bridge: multicast: enable snooping on general queries only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this check someone could easily create a denial of service by injecting multicast-specific queries to enable the bridge snooping part if no real querier issuing periodic general queries is present on the link which would result in the bridge wrongly shutting down ports for multicast traffic as the bridge did not learn about these listeners. With this patch the snooping code is enabled upon receiving valid, general queries only. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index e56bae4f59ce..93067ecdb9a2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1127,9 +1127,10 @@ static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, struct bridge_mcast_querier *querier, int saddr, + bool is_general_query, unsigned long max_delay) { - if (saddr) + if (saddr && is_general_query) br_multicast_update_querier_timer(br, querier, max_delay); else if (timer_pending(&querier->timer)) return; @@ -1190,7 +1191,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, } br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, - max_delay); + !group, max_delay); if (!group) goto out; @@ -1282,7 +1283,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, } br_multicast_query_received(br, port, &br->ip6_querier, - !ipv6_addr_any(&ip6h->saddr), max_delay); + !ipv6_addr_any(&ip6h->saddr), + is_general_query, max_delay); if (!group) goto out; From fdfaf64e75397567257e1051931f9a3377360665 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 10 Mar 2014 15:56:51 -0700 Subject: [PATCH 54/70] x86: bpf_jit: support negative offsets Commit a998d4342337 claimed to introduce negative offset support to x86 jit, but it couldn't be working, since at the time of the execution of LD+ABS or LD+IND instructions via call into bpf_internal_load_pointer_neg_helper() the %edx (3rd argument of this func) had junk value instead of access size in bytes (1 or 2 or 4). Store size into %edx instead of %ecx (what original commit intended to do) Fixes: a998d4342337 ("bpf jit: Let the x86 jit handle negative offsets") Signed-off-by: Alexei Starovoitov Cc: Jan Seiffert Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 877b9a1b2152..01495755701b 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -140,7 +140,7 @@ bpf_slow_path_byte_msh: push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ - mov $SIZE,%ecx; /* size */ \ + mov $SIZE,%edx; /* size */ \ call bpf_internal_load_pointer_neg_helper; \ test %rax,%rax; \ pop SKBDATA; \ From 406764622fb71e5f4efb39ff111ef87713815f6d Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Tue, 11 Mar 2014 08:49:39 +0000 Subject: [PATCH 55/70] tools/net/Makefile: Define PACKAGE to fix build problems Fixes the following build problem with binutils-2.24 gcc -Wall -O2 -c -o bpf_jit_disasm.o bpf_jit_disasm.c In file included from bpf_jit_disasm.c:25:0: /usr/include/bfd.h:35:2: error: #error config.h must be included before this header #error config.h must be included before this header This is similar to commit 3ce711a6abc27abce1554e1d671a8762b7187690 "perf tools: bfd.h/libbfd detection fails with recent binutils" See: https://sourceware.org/bugzilla/show_bug.cgi?id=14243 CC: David S. Miller CC: Daniel Borkmann CC: netdev@vger.kernel.org Acked-by: Daniel Borkmann Signed-off-by: Markos Chandras Signed-off-by: David S. Miller --- tools/net/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/Makefile b/tools/net/Makefile index 004cd74734b6..ee577ea03ba5 100644 --- a/tools/net/Makefile +++ b/tools/net/Makefile @@ -12,7 +12,7 @@ YACC = bison all : bpf_jit_disasm bpf_dbg bpf_asm -bpf_jit_disasm : CFLAGS = -Wall -O2 +bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl bpf_jit_disasm : bpf_jit_disasm.o From f75761b6b5bf6277296505941d2dd8e11f9b5c35 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 11 Mar 2014 15:11:59 +0800 Subject: [PATCH 56/70] r8169: fix the incorrect tx descriptor version The tx descriptor version of RTL8111B belong to RTL_TD_0. Signed-off-by: Hayes Wang Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e9779653cd4c..3ff7bc3e7a23 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -209,7 +209,7 @@ static const struct { [RTL_GIGA_MAC_VER_16] = _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), [RTL_GIGA_MAC_VER_17] = - _R("RTL8168b/8111b", RTL_TD_1, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), [RTL_GIGA_MAC_VER_18] = _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), [RTL_GIGA_MAC_VER_19] = From 836fbaf459f9c041826864021688e9bd131e722c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 11 Mar 2014 12:45:32 +0000 Subject: [PATCH 57/70] xen-netback: use skb_is_gso in xenvif_start_xmit In 5bd076708 ("Xen-netback: Fix issue caused by using gso_type wrongly") we use skb_is_gso to determine if we need an extra slot to accommodate the SKB. There's similar error in interface.c. Change that to use skb_is_gso as well. Signed-off-by: Wei Liu Cc: Annie Li Cc: Ian Campbell Cc: Paul Durrant Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7669d49a67e2..301cc037fda8 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -132,8 +132,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) /* If the skb is GSO then we'll also need an extra slot for the * metadata. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + if (skb_is_gso(skb)) min_slots_needed++; /* If the skb can't possibly fit in the remaining slots From 56cb456746a15c1025a178466492ca4c373b1a63 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:30 +0200 Subject: [PATCH 58/70] net/mlx4_core: Fix wrong dump of the vxlan offloads device capability Fix the value used to dump the vxlan offloads device capability to align with the MLX4_DEV_CAP_FLAG2_yyy definition. While on that, add dump to the IPoIB flow-steering device capability and fix small typo. The vxlan cap value wasn't fully handled when a conflict was resolved between MLX4_DEV_CAP_FLAG2_DMFS_IPOIB coming from the IB tree to MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS coming from net-next. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 8726e34cee22..7e2995ecea6f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -129,13 +129,14 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", - [3] = "Device manage flow steering support", + [3] = "Device managed flow steering support", [4] = "Automatic MAC reassignment support", [5] = "Time stamping support", [6] = "VST (control vlan insertion/stripping) support", [7] = "FSM (MAC anti-spoofing) support", [8] = "Dynamic QP updates support", - [9] = "TCP/IP offloads/flow-steering for VXLAN support" + [9] = "Device managed flow steering IPoIB support", + [10] = "TCP/IP offloads/flow-steering for VXLAN support" }; int i; From 2a2083f7f3568c0192daa6ac0e6fa35d953f47bd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:31 +0200 Subject: [PATCH 59/70] net/mlx4_en: Handle vxlan steering rules for mac address changes When the device mac address is changed, we must deregister the vxlan steering rule associated with the previous mac, and register a new steering rule using the new mac. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fad45316200a..ba2126111573 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -742,6 +742,14 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, err = mlx4_en_uc_steer_add(priv, new_mac, &qpn, &entry->reg_id); + if (err) + return err; + if (priv->tunnel_reg_id) { + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + priv->tunnel_reg_id = 0; + } + err = mlx4_en_tunnel_steer_add(priv, new_mac, qpn, + &priv->tunnel_reg_id); return err; } } From 7855bff42ea9938a0853321256f4c8ce3628aa73 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:32 +0200 Subject: [PATCH 60/70] net/mlx4_core: Load the IB driver when the device supports IBoE When checking what protocol drivers to load, the IB driver should be requested also over Ethernet ports, if the device supports IBoE (RoCE). Fixes: b046ffe 'net/mlx4_core: Load higher level modules according to ports type' Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 30a08a60f059..936c15364739 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -751,7 +751,7 @@ static void mlx4_request_modules(struct mlx4_dev *dev) has_eth_port = true; } - if (has_ib_port) + if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) request_module_nowait(IB_DRV_NAME); if (has_eth_port) request_module_nowait(EN_DRV_NAME); From a93c12560498066765c58b032530798a26bd8f70 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Mar 2014 11:23:42 +0100 Subject: [PATCH 61/70] packet: doc: Spelling s/than/that/ Signed-off-by: Geert Uytterhoeven Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 1404674c0a02..6fea79efb4cb 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -453,7 +453,7 @@ TP_STATUS_COPY : This flag indicates that the frame (and associated enabled previously with setsockopt() and the PACKET_COPY_THRESH option. - The number of frames than can be buffered to + The number of frames that can be buffered to be read with recvfrom is limited like a normal socket. See the SO_RCVBUF option in the socket (7) man page. From 7e814a6c50a1669118ffa4961568fea3aa955faf Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 11 Mar 2014 12:06:24 +0100 Subject: [PATCH 62/70] MAINTAINERS: Add tools/net to NETWORKING [GENERAL] Make sure patches for these tools go to the netdev list as well. References: https://marc.info/?l=linux-kernel&m=139450284501328&w=2 Cc: David S. Miller Cc: Daniel Borkmann Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e1297ff255e1..e1be77c78336 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6002,6 +6002,7 @@ F: include/linux/netdevice.h F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h +F: tools/net/ NETWORKING [IPv4/IPv6] M: "David S. Miller" From dbb490b96584d4e958533fb637f08b557f505657 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Tue, 11 Mar 2014 11:58:27 +0000 Subject: [PATCH 63/70] net: socket: error on a negative msg_namelen When copying in a struct msghdr from the user, if the user has set the msg_namelen parameter to a negative value it gets clamped to a valid size due to a comparison between signed and unsigned values. Ensure the syscall errors when the user passes in a negative value. Signed-off-by: Matthew Leach Signed-off-by: David S. Miller --- net/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/socket.c b/net/socket.c index 879933aaed4c..32df5847efa3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1985,6 +1985,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, { if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; From f4e53f9a4f7d01cef9f096b1eb3705329a7ce9c2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Mar 2014 10:18:53 +0100 Subject: [PATCH 64/70] MAINTAINERS: add networking selftests to NETWORKING Add it to NETWORKING [GENERAL] to make sure patches for selftests go to the netdev list as well. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e1be77c78336..b359eb056e5c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6003,6 +6003,7 @@ F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: tools/net/ +F: tools/testing/selftests/net/ NETWORKING [IPv4/IPv6] M: "David S. Miller" From 0a8d8c446b5429d15ff2d48f46e00d8a08552303 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 13 Mar 2014 10:44:34 +0100 Subject: [PATCH 65/70] vmxnet3: fix building without CONFIG_PCI_MSI Since commit d25f06ea466e "vmxnet3: fix netpoll race condition", the vmxnet3 driver fails to build when CONFIG_PCI_MSI is disabled, because it unconditionally references the vmxnet3_msix_rx() function. To fix this, use the same #ifdef in the caller that exists around the function definition. Signed-off-by: Arnd Bergmann Cc: Neil Horman Cc: Shreyas Bhatewara Cc: "VMware, Inc." Cc: "David S. Miller" Cc: stable@vger.kernel.org Acked-by: Neil Horman Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b7daa02ff026..0fa3b44f7342 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1761,13 +1761,16 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - int i; switch (adapter->intr.type) { - case VMXNET3_IT_MSIX: +#ifdef CONFIG_PCI_MSI + case VMXNET3_IT_MSIX: { + int i; for (i = 0; i < adapter->num_rx_queues; i++) vmxnet3_msix_rx(0, &adapter->rx_queue[i]); break; + } +#endif case VMXNET3_IT_MSI: default: vmxnet3_intr(0, adapter->netdev); From de123268300fd33b7f7668fda3264059daffa6ef Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 13 Mar 2014 14:52:15 +0200 Subject: [PATCH 66/70] net/mlx4_en: Deregister multicast vxlan steering rules when going down When mlx4_en_stop_port() is called, we need to deregister also the tunnel steering rules that relate to multicast. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index ba2126111573..84a96f70dfb5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1800,6 +1800,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); + if (mclist->tunnel_reg_id) + mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id); } mlx4_en_clear_list(dev); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { From 6e07a1e0b53d77dbcb08f29d1cca07c6d33a926f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 12 Mar 2014 08:21:24 +0100 Subject: [PATCH 67/70] at86rf230: fix lockdep splats This patch fix a lockdep in the at86rf230 driver, otherwise we get: [ 30.206517] ================================= [ 30.211078] [ INFO: inconsistent lock state ] [ 30.215647] 3.14.0-20140108-1-00994-g32e9426 #163 Not tainted [ 30.221660] --------------------------------- [ 30.226222] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 30.232514] systemd-udevd/157 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 30.238439] (&(&lp->lock)->rlock){?.+...}, at: [] at86rf230_isr+0x18/0x44 [ 30.246621] {HARDIRQ-ON-W} state was registered at: [ 30.251728] [] __lock_acquire+0x7a4/0x18d8 [ 30.257135] [] lock_acquire+0x68/0x7c [ 30.262071] [] _raw_spin_lock+0x28/0x38 [ 30.267203] [] at86rf230_xmit+0x1c/0x144 [ 30.272412] [] mac802154_xmit_worker+0x88/0x148 [ 30.278271] [] process_one_work+0x274/0x404 [ 30.283761] [] worker_thread+0x228/0x374 [ 30.288971] [] kthread+0xd0/0xe4 [ 30.293455] [] ret_from_fork+0x14/0x2c [ 30.298493] irq event stamp: 8948 [ 30.301963] hardirqs last enabled at (8947): [] __kmalloc+0xb4/0x110 [ 30.309636] hardirqs last disabled at (8948): [] __irq_svc+0x34/0x5c [ 30.317215] softirqs last enabled at (8452): [] __do_softirq+0x1dc/0x264 [ 30.325243] softirqs last disabled at (8439): [] irq_exit+0x80/0xf4 We use the lp->lock inside the isr of at86rf230, that's why we need the irqsave spinlock calls. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index ab31544bc254..a30258aad139 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -546,12 +546,12 @@ at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb) int rc; unsigned long flags; - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); if (lp->irq_busy) { - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); return -EBUSY; } - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); might_sleep(); @@ -725,10 +725,11 @@ static void at86rf230_irqwork_level(struct work_struct *work) static irqreturn_t at86rf230_isr(int irq, void *data) { struct at86rf230_local *lp = data; + unsigned long flags; - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); lp->irq_busy = 1; - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); schedule_work(&lp->irqwork); From fb00bc2e6cd2046282ba4b03f4fe682aee70b2f8 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 12 Mar 2014 17:31:59 +0800 Subject: [PATCH 68/70] bonding: set correct vlan id for alb xmit path The commit d3ab3ffd1d728d7ee77340e7e7e2c7cfe6a4013e (bonding: use rlb_client_info->vlan_id instead of ->tag) remove the rlb_client_info->tag, but occur some issues, The vlan_get_tag() will return 0 for success and -EINVAL for error, so the client_info->vlan_id always be set to 0 if the vlan_get_tag return 0 for success, so the client_info would never get a correct vlan id. We should only set the vlan id to 0 when the vlan_get_tag return error. Fixes: d3ab3ffd1d7 (bonding: use rlb_client_info->vlan_id instead of ->tag) CC: Ding Tianhong CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Ding Tianhong Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index a2c47476804d..e8f133e926aa 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -730,7 +730,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon client_info->ntt = 0; } - if (!vlan_get_tag(skb, &client_info->vlan_id)) + if (vlan_get_tag(skb, &client_info->vlan_id)) client_info->vlan_id = 0; if (!client_info->assigned) { From 84fe61821e4ebab6322eeae3f3c27f77f0031978 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 12 Mar 2014 11:28:19 +0100 Subject: [PATCH 69/70] eth: fec: Fix lost promiscuous mode after reconnecting cable If the Freescale fec is in promiscuous mode and network cable is reconnected then the promiscuous mode get lost. The problem is caused by a too soon call of set_multicast_list to re-enable promisc mode. The FEC_R_CNTRL register changes are overwritten by fec_restart. This patch fixes this by moving the call behind the init of FEC_R_CNTRL register in fec_restart. Successful tested on a i.MX28 board. Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 479a7cba45c0..03a351300013 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -528,13 +528,6 @@ fec_restart(struct net_device *ndev, int duplex) /* Clear any outstanding interrupt. */ writel(0xffc00000, fep->hwp + FEC_IEVENT); - /* Setup multicast filter. */ - set_multicast_list(ndev); -#ifndef CONFIG_M5272 - writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); - writel(0, fep->hwp + FEC_HASH_TABLE_LOW); -#endif - /* Set maximum receive buffer size. */ writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE); @@ -655,6 +648,13 @@ fec_restart(struct net_device *ndev, int duplex) writel(rcntl, fep->hwp + FEC_R_CNTRL); + /* Setup multicast filter. */ + set_multicast_list(ndev); +#ifndef CONFIG_M5272 + writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); + writel(0, fep->hwp + FEC_HASH_TABLE_LOW); +#endif + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { /* enable ENET endian swap */ ecntl |= (1 << 8); From ecab67015ef6e3f3635551dcc9971cf363cc1cd5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 12 Mar 2014 22:13:19 +0100 Subject: [PATCH 70/70] ipv6: Avoid unnecessary temporary addresses being generated tmp_prefered_lft is an offset to ifp->tstamp, not now. Therefore age needs to be added to the condition. Age calculation in ipv6_create_tempaddr is different from the one in addrconf_verify and doesn't consider ADDRCONF_TIMER_FUZZ_MINUS. This can cause age in ipv6_create_tempaddr to be less than the one in addrconf_verify and therefore unnecessary temporary address to be generated. Use age calculation as in addrconf_modify to avoid this. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fdbfeca36d63..344e972426df 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1103,8 +1103,11 @@ retry: * Lifetime is greater than REGEN_ADVANCE time units. In particular, * an implementation must not create a temporary address with a zero * Preferred Lifetime. + * Use age calculation as in addrconf_verify to avoid unnecessary + * temporary addresses being generated. */ - if (tmp_prefered_lft <= regen_advance) { + age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + if (tmp_prefered_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1;