From 1f6fc43e621167492ed4b7f3b4269c584c3d6ccc Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 2 Aug 2012 18:41:48 +0100 Subject: [PATCH 01/33] cfg80211: process pending events when unregistering net device libertas currently calls cfg80211_disconnected() when it is being brought down. This causes an event to be allocated, but since the wdev is already removed from the rdev by the time that the event processing work executes, the event is never processed or freed. http://article.gmane.org/gmane.linux.kernel.wireless.general/95666 Fix this leak, and other possible situations, by processing the event queue when a device is being unregistered. Thanks to Johannes Berg for the suggestion. Signed-off-by: Daniel Drake Cc: stable@vger.kernel.org Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 5 +++++ net/wireless/core.h | 1 + net/wireless/util.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 31b40cc4a9c3..dcd64d5b07aa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -952,6 +952,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, */ synchronize_rcu(); INIT_LIST_HEAD(&wdev->list); + /* + * Ensure that all events have been processed and + * freed. + */ + cfg80211_process_wdev_events(wdev); break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) diff --git a/net/wireless/core.h b/net/wireless/core.h index 5206c6844fd7..bc7430b54771 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, u32 *flags, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); +void cfg80211_process_wdev_events(struct wireless_dev *wdev); int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, diff --git a/net/wireless/util.c b/net/wireless/util.c index 26f8cd30f712..994e2f0cc7a8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -735,7 +735,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) wdev->connect_keys = NULL; } -static void cfg80211_process_wdev_events(struct wireless_dev *wdev) +void cfg80211_process_wdev_events(struct wireless_dev *wdev) { struct cfg80211_event *ev; unsigned long flags; From deee0214def5d8a32b8112f11d9c2b1696e9c0cb Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 3 Aug 2012 12:49:14 +0200 Subject: [PATCH 02/33] rt61pci: fix NULL pointer dereference in config_lna_gain We can not pass NULL libconf->conf->channel to rt61pci_config() as it is dereferenced unconditionally in rt61pci_config_lna_gain() subroutine. Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=44361 Cc: stable@vger.kernel.org Reported-and-tested-by: Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt61pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index f32259686b45..3f7bc5cadf9a 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -2243,8 +2243,7 @@ static void rt61pci_txdone(struct rt2x00_dev *rt2x00dev) static void rt61pci_wakeup(struct rt2x00_dev *rt2x00dev) { - struct ieee80211_conf conf = { .flags = 0 }; - struct rt2x00lib_conf libconf = { .conf = &conf }; + struct rt2x00lib_conf libconf = { .conf = &rt2x00dev->hw->conf }; rt61pci_config(rt2x00dev, &libconf, IEEE80211_CONF_CHANGE_PS); } From 50e2a30cf6fcaeb2d27360ba614dd169a10041c5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 5 Aug 2012 18:31:46 +0200 Subject: [PATCH 03/33] iwlwifi: disable greenfield transmissions as a workaround There's a bug that causes the rate scaling to get stuck when it has to use single-stream rates with a peer that can do GF and SGI; the two are incompatible so we can't use them together, but that causes the algorithm to not work at all, it always rejects updates. Disable greenfield for now to prevent that problem. Cc: stable@vger.kernel.org Reviewed-by: Emmanuel Grumbach Tested-by: Cesar Eduardo Barros Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/dvm/rs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/rs.c b/drivers/net/wireless/iwlwifi/dvm/rs.c index 6fddd2785e6e..a82f46c10f5e 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rs.c +++ b/drivers/net/wireless/iwlwifi/dvm/rs.c @@ -707,11 +707,14 @@ static int rs_toggle_antenna(u32 valid_ant, u32 *rate_n_flags, */ static bool rs_use_green(struct ieee80211_sta *sta) { - struct iwl_station_priv *sta_priv = (void *)sta->drv_priv; - struct iwl_rxon_context *ctx = sta_priv->ctx; - - return (sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD) && - !(ctx->ht.non_gf_sta_present); + /* + * There's a bug somewhere in this code that causes the + * scaling to get stuck because GF+SGI can't be combined + * in SISO rates. Until we find that bug, disable GF, it + * has only limited benefit and we still interoperate with + * GF APs since we can always receive GF transmissions. + */ + return false; } /** From b7ec70be01a87f2c85df3ae11046e74f9b67e323 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Tue, 31 Jul 2012 02:02:43 +0000 Subject: [PATCH 04/33] e1000e: NIC goes up and immediately goes down Found that commit d478eb44 was a bad commit. If the link partner is transmitting codeword (even if NULL codeword), then the RXCW.C bit will be set so check for RXCW.CW is unnecessary. Ref: RH BZ 840642 Reported-by: Fabio Futigami Signed-off-by: Tushar Dave CC: Marcelo Ricardo Leitner CC: stable [2.6.38+] Tested-by: Aaron Brown Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/e1000e/82571.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 0b3bade957fd..2a4ded2fd6e5 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -1601,10 +1601,8 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) * auto-negotiation in the TXCW register and disable * forced link in the Device Control register in an * attempt to auto-negotiate with our link partner. - * If the partner code word is null, stop forcing - * and restart auto negotiation. */ - if ((rxcw & E1000_RXCW_C) || !(rxcw & E1000_RXCW_CW)) { + if (rxcw & E1000_RXCW_C) { /* Enable autoneg, and unforce link up */ ew32(TXCW, mac->txcw); ew32(CTRL, (ctrl & ~E1000_CTRL_SLU)); From eca90f550494171f54f8a700caee65ec16455a5b Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Wed, 1 Aug 2012 02:11:15 +0000 Subject: [PATCH 05/33] e1000e: 82571 Tx Data Corruption during Tx hang recovery A bus trace shows that while executing e1000e_down, TCTL is cleared except for the PSP bit. This occurs while in the middle of fetching a TSO packet since the Tx packet buffer is full at that point. Before the device is reset, the e1000_watchdog_task starts to run from the middle (it was apparently pre-empted earlier, although that is not in the trace) and sets TCTL.EN. At that point, 82571 transmits the corrupted packet, apparently because TCTL.MULR was cleared in the middle of fetching a packet, which is forbidden. Driver should just clear TCTL.EN in e1000_reset_hw_82571 instead of clearing the entire register, so as not to change any settings in the middle of fetching a packet. Signed-off-by: Tushar Dave Tested-by: Aaron Brown Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/e1000e/82571.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 2a4ded2fd6e5..080c89093feb 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -999,7 +999,7 @@ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) **/ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) { - u32 ctrl, ctrl_ext, eecd; + u32 ctrl, ctrl_ext, eecd, tctl; s32 ret_val; /* @@ -1014,7 +1014,9 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) ew32(IMC, 0xffffffff); ew32(RCTL, 0); - ew32(TCTL, E1000_TCTL_PSP); + tctl = er32(TCTL); + tctl &= ~E1000_TCTL_EN; + ew32(TCTL, tctl); e1e_flush(); usleep_range(10000, 20000); From 119b0e0351bcdb7833f368781e6241ff283b49e5 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 7 Aug 2012 00:45:57 -0700 Subject: [PATCH 06/33] igb: add delay to allow igb loopback test to succeed on 8086:10c9 Some 8086:10c9 NICs have a problem completing the ethtool loopback test. The result looks like this: ethtool -t eth1 The test result is FAIL The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 13 Link test (on/offline) 0 A bisect clearly points to commit a95a07445ee97a2fef65befafbadcc30ca1bd145. However that seems to only trigger the bug. While adding some printk the problem disappeared, so this might be a timing issue. After some trial and error I discovered that adding a small delay just before igb_write_phy_reg() in igb_integrated_phy_loopback() allows the loopback test to succeed. I was unable to figure out the root cause so far but I expect it to be somewhere in the following executing path igb_integrated_phy_loopback ->igb_write_phy_reg_igp ->igb_write_phy_reg_mdic ->igb_acquire_phy_82575 ->igb_acquire_swfw_sync_82575 The problem could only be observed on 8086:10c9 NICs so far and not all of them show the behaviour. I did not restrict the workaround to this type of NIC as it should do no harm to other igb NICs. With the patch below the loopback test succeeded 500 times in a row using a NIC that would otherwise fail. Signed-off-by: Stefan Assmann Tested-by: Aaron Brown Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 90550f5e3dd9..70591117051b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1498,6 +1498,9 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) break; } + /* add small delay to avoid loopback test failure */ + msleep(50); + /* force 1000, set loopback */ igb_write_phy_reg(hw, PHY_CONTROL, 0x4140); From 035534ed3377d9def2c17717899fd64a111a785b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 6 Aug 2012 18:02:29 +0200 Subject: [PATCH 07/33] canfd: remove redundant CAN FD flag The first idea of the CAN FD implementation started with a new struct canfd_frame to be used for both CAN FD frames and legacy CAN frames. The now mainlined implementation supports both CAN frame types simultaneously and distinguishes them only by their required sizes: CAN_MTU and CANFD_MTU. Only the struct canfd_frame contains a flags element which is needed for the additional CAN FD information. As CAN FD implicitly means that the 'Extened Data Length' mode is enabled the formerly defined CANFD_EDL bit became redundant and also confusing as an unset bit would be an error and would always need to be tested. This patch removes the obsolete CANFD_EDL bit and clarifies the documentation for the use of struct canfd_frame and the CAN FD relevant flags. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/include/linux/can.h b/include/linux/can.h index 018055efc034..e52958d7c2d1 100644 --- a/include/linux/can.h +++ b/include/linux/can.h @@ -74,20 +74,21 @@ struct can_frame { /* * defined bits for canfd_frame.flags * - * As the default for CAN FD should be to support the high data rate in the - * payload section of the frame (HDR) and to support up to 64 byte in the - * data section (EDL) the bits are only set in the non-default case. - * Btw. as long as there's no real implementation for CAN FD network driver - * these bits are only preliminary. + * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to + * be set in the CAN frame bitstream on the wire. The EDL bit switch turns + * the CAN controllers bitstream processor into the CAN FD mode which creates + * two new options within the CAN FD frame specification: * - * RX: NOHDR/NOEDL - info about received CAN FD frame - * ESI - bit from originating CAN controller - * TX: NOHDR/NOEDL - control per-frame settings if supported by CAN controller - * ESI - bit is set by local CAN controller + * Bit Rate Switch - to indicate a second bitrate is/was used for the payload + * Error State Indicator - represents the error state of the transmitting node + * + * As the CANFD_ESI bit is internally generated by the transmitting CAN + * controller only the CANFD_BRS bit is relevant for real CAN controllers when + * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make + * sense for virtual CAN interfaces to test applications with echoed frames. */ -#define CANFD_NOHDR 0x01 /* frame without high data rate */ -#define CANFD_NOEDL 0x02 /* frame without extended data length */ -#define CANFD_ESI 0x04 /* error state indicator */ +#define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */ +#define CANFD_ESI 0x02 /* error state indicator of the transmitting node */ /** * struct canfd_frame - CAN flexible data rate frame structure From 920d087e44c228be6270e07fdb59043380a4bb00 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Mon, 6 Aug 2012 16:08:36 +0000 Subject: [PATCH 08/33] drivers: net: irda: bfin_sir: fix compile error Bit IREN is replaced by UMOD_IRDA and UMOD_MASK since blackfin 60x added, but this driver didn't update which will cause bfin_sir build error: drivers/net/irda/bfin_sir.c:161:9: error: 'IREN' undeclared (first use in this function) drivers/net/irda/bfin_sir.c:435:18: error: 'IREN' undeclared (first use in this function) drivers/net/irda/bfin_sir.c:521:11: error: 'IREN' undeclared (first use in this function) This patch fix it. Signed-off-by: Sonic Zhang Signed-off-by: Bob Liu Acked-by: Samuel Ortiz Signed-off-by: David S. Miller --- drivers/net/irda/bfin_sir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/irda/bfin_sir.c b/drivers/net/irda/bfin_sir.c index a561ae44a9ac..c6a0299aa9f9 100644 --- a/drivers/net/irda/bfin_sir.c +++ b/drivers/net/irda/bfin_sir.c @@ -158,7 +158,7 @@ static int bfin_sir_set_speed(struct bfin_sir_port *port, int speed) /* If not add the 'RPOLC', we can't catch the receive interrupt. * It's related with the HW layout and the IR transiver. */ - val |= IREN | RPOLC; + val |= UMOD_IRDA | RPOLC; UART_PUT_GCTL(port, val); return ret; } @@ -432,7 +432,7 @@ static void bfin_sir_shutdown(struct bfin_sir_port *port, struct net_device *dev bfin_sir_stop_rx(port); val = UART_GET_GCTL(port); - val &= ~(UCEN | IREN | RPOLC); + val &= ~(UCEN | UMOD_MASK | RPOLC); UART_PUT_GCTL(port, val); #ifdef CONFIG_SIR_BFIN_DMA @@ -518,10 +518,10 @@ static void bfin_sir_send_work(struct work_struct *work) * reset all the UART. */ val = UART_GET_GCTL(port); - val &= ~(IREN | RPOLC); + val &= ~(UMOD_MASK | RPOLC); UART_PUT_GCTL(port, val); SSYNC(); - val |= IREN | RPOLC; + val |= UMOD_IRDA | RPOLC; UART_PUT_GCTL(port, val); SSYNC(); /* bfin_sir_set_speed(port, self->speed); */ From 99aa3473e672ca610905838997fa018b95cd643f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 6 Aug 2012 16:27:10 +0000 Subject: [PATCH 09/33] af_packet: Quiet sparse noise about using plain integer as NULL pointer Quiets the sparse warning: warning: Using plain integer as NULL pointer Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a0..f016f6634a79 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1079,7 +1079,7 @@ static void *packet_current_rx_frame(struct packet_sock *po, default: WARN(1, "TPACKET version not supported\n"); BUG(); - return 0; + return NULL; } } From 08252b32311c3fa84219ad794d640af7399b5485 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Tue, 7 Aug 2012 00:23:11 +0000 Subject: [PATCH 10/33] pptp: lookup route with the proper net namespace pptp always use init_net as the net namespace to lookup route, this will cause route lookup failed in container. because we already set the correct net namespace to struct sock in pptp_create,so fix this by using sock_net(sk) to replace &init_net. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 1c98321b56cc..162464fe86bf 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -189,7 +189,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (sk_pppox(po)->sk_state & PPPOX_DEAD) goto tx_error; - rt = ip_route_output_ports(&init_net, &fl4, NULL, + rt = ip_route_output_ports(sock_net(sk), &fl4, NULL, opt->dst_addr.sin_addr.s_addr, opt->src_addr.sin_addr.s_addr, 0, 0, IPPROTO_GRE, @@ -468,7 +468,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.private = sk; po->chan.ops = &pptp_chan_ops; - rt = ip_route_output_ports(&init_net, &fl4, sk, + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, opt->dst_addr.sin_addr.s_addr, opt->src_addr.sin_addr.s_addr, 0, 0, From 0c03eca3d995e73d691edea8c787e25929ec156d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Aug 2012 00:47:11 +0000 Subject: [PATCH 11/33] net: fib: fix incorrect call_rcu_bh() After IP route cache removal, I believe rcu_bh() has very little use and we should remove this RCU variant, since it adds some cycles in fast path. Anyway, the call_rcu_bh() use in fib_true is obviously wrong, since some users only assert rcu_read_lock(). Signed-off-by: Eric Dumazet Cc: "Paul E. McKenney" Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f0cdb30921c0..57bd978483e1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -367,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head) static inline void free_leaf(struct leaf *l) { - call_rcu_bh(&l->rcu, __leaf_free_rcu); + call_rcu(&l->rcu, __leaf_free_rcu); } static inline void free_leaf_info(struct leaf_info *leaf) From a37e6e344910a43b9ebc2bbf29a029f5ea942598 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Aug 2012 10:55:45 +0000 Subject: [PATCH 12/33] net: force dst_default_metrics to const section While investigating on network performance problems, I found this little gem : $ nm -v vmlinux | grep -1 dst_default_metrics ffffffff82736540 b busy.46605 ffffffff82736560 B dst_default_metrics ffffffff82736598 b dst_busy_list Apparently, declaring a const array without initializer put it in (writeable) bss section, in middle of possibly often dirtied cache lines. Since we really want dst_default_metrics be const to avoid any possible false sharing and catch any buggy writes, I force a null initializer. ffffffff818a4c20 R dst_default_metrics Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/core/dst.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index baf597890064..621e3513ef5e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -110,7 +110,7 @@ struct dst_entry { }; extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[RTAX_MAX]; +extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL #define __DST_METRICS_PTR(Y) \ diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..56d63612e1e4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX + 1] = { + /* This initializer is needed to force linker to place this variable + * into const section. Otherwise it might end into bss section. + * We really want to avoid false sharing on this variable, and catch + * any writes on it. + */ + [RTAX_MAX] = 0xdeadbeef, +}; + void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags) From be72f63b4cd5d6778cf7eb0d0a86f18f96b51c0c Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 7 Aug 2012 07:27:25 +0000 Subject: [PATCH 13/33] sched: add missing group change to qfq_change_class [Resending again, as the text was corrupted by the email client] To speed up operations, QFQ internally divides classes into groups. Which group a class belongs to depends on the ratio between the maximum packet length and the weight of the class. Unfortunately the function qfq_change_class lacks the steps for changing the group of a class when the ratio max_pkt_len/weight of the class changes. For example, when the last of the following three commands is executed, the group of class 1:1 is not correctly changed: tc disc add dev XXX root handle 1: qfq tc class add dev XXX parent 1: qfq classid 1:1 weight 1 tc class change dev XXX parent 1: classid 1:1 qfq weight 4 Not changing the group of a class does not affect the long-term bandwidth guaranteed to the class, as the latter is independent of the maximum packet length, and correctly changes (only) if the weight of the class changes. In contrast, if the group of the class is not updated, the class is still guaranteed the short-term bandwidth and packet delay related to its old group, instead of the guarantees that it should receive according to its new weight and/or maximum packet length. This may also break service guarantees for other classes. This patch adds the missing operations. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 95 ++++++++++++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 26 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 9af01f3df18c..e4723d31fdd5 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -203,6 +203,34 @@ out: return index; } +/* Length of the next packet (0 if the queue is empty). */ +static unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + + skb = sch->ops->peek(sch); + return skb ? qdisc_pkt_len(skb) : 0; +} + +static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *); +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int len); + +static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl, + u32 lmax, u32 inv_w, int delta_w) +{ + int i; + + /* update qfq-specific data */ + cl->lmax = lmax; + cl->inv_w = inv_w; + i = qfq_calc_index(cl->inv_w, cl->lmax); + + cl->grp = &q->groups[i]; + + q->wsum += delta_w; +} + static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg) { @@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, lmax = 1UL << QFQ_MTU_SHIFT; if (cl != NULL) { + bool need_reactivation = false; + if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, &cl->rate_est, qdisc_root_sleeping_lock(sch), @@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } - if (inv_w != cl->inv_w) { - sch_tree_lock(sch); - q->wsum += delta_w; - cl->inv_w = inv_w; - sch_tree_unlock(sch); + if (lmax == cl->lmax && inv_w == cl->inv_w) + return 0; /* nothing to update */ + + i = qfq_calc_index(inv_w, lmax); + sch_tree_lock(sch); + if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) { + /* + * shift cl->F back, to not charge the + * class for the not-yet-served head + * packet + */ + cl->F = cl->S; + /* remove class from its slot in the old group */ + qfq_deactivate_class(q, cl); + need_reactivation = true; } + + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); + + if (need_reactivation) /* activate in new group */ + qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc)); + sch_tree_unlock(sch); + return 0; } @@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->common.classid = classid; - cl->lmax = lmax; - cl->inv_w = inv_w; - i = qfq_calc_index(cl->inv_w, cl->lmax); - cl->grp = &q->groups[i]; + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); @@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } } - q->wsum += weight; sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); @@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) } } -/* What is length of next packet in queue (0 if queue is empty) */ -static unsigned int qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - - skb = sch->ops->peek(sch); - return skb ? qdisc_pkt_len(skb) : 0; -} - /* * Updates the class, returns true if also the group needs to be updated. */ @@ -843,11 +877,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_group *grp; struct qfq_class *cl; int err; - u64 roundedS; - int s; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -876,11 +907,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; /* If reach this point, queue q was idle */ - grp = cl->grp; + qfq_activate_class(q, cl, qdisc_pkt_len(skb)); + + return err; +} + +/* + * Handle class switch from idle to backlogged. + */ +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int pkt_len) +{ + struct qfq_group *grp = cl->grp; + u64 roundedS; + int s; + qfq_update_start(q, cl); /* compute new finish time and rounded start. */ - cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; + cl->F = cl->S + (u64)pkt_len * cl->inv_w; roundedS = qfq_round_down(cl->S, grp->slot_shift); /* @@ -917,8 +962,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) skip_update: qfq_slot_insert(grp, cl, roundedS); - - return err; } From 155e4e12b9f49c2dc817bb4c44e9416c46833c3d Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 7 Aug 2012 08:32:34 +0000 Subject: [PATCH 14/33] batman-adv: Fix mem leak in the batadv_tt_local_event() function Memory is allocated for 'tt_change_node' with kmalloc(). 'tt_change_node' may go out of scope really being used for anything (except have a few members initialized) if we hit the 'del:' label. This patch makes sure we free the memory in that case. Signed-off-by: Jesper Juhl Acked-by: Antonio Quartulli Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a438f4b582fc..99dd8f75b3ff 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -197,6 +197,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, del: list_del(&entry->list); kfree(entry); + kfree(tt_change_node); event_removed = true; goto unlock; } From 47dffc7547ed3b105dabd570ed864347687beac2 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 7 Aug 2012 11:56:26 +0000 Subject: [PATCH 15/33] cdc-phonet: Don't leak in usbpn_open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We allocate memory for 'req' with usb_alloc_urb() and then test 'if (!req || rx_submit(pnd, req, GFP_KERNEL | __GFP_COLD))'. If we enter that branch due to '!req' then there is no problem. But if we enter the branch due to 'req' being != 0 and the 'rx_submit()' call being false, then we'll leak the memory we allocated. Deal with the leak by always calling 'usb_free_urb(req)' when entering the branch. If 'req' happens to be 0 then the call is harmless, if it is not 0 then we free the memory we allocated but don't need. Signed-off-by: Jesper Juhl Acked-by: RĂ©mi Denis-Courmont Signed-off-by: David S. Miller --- drivers/net/usb/cdc-phonet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index 64610048ce87..7d78669000d7 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -232,6 +232,7 @@ static int usbpn_open(struct net_device *dev) struct urb *req = usb_alloc_urb(0, GFP_KERNEL); if (!req || rx_submit(pnd, req, GFP_KERNEL | __GFP_COLD)) { + usb_free_urb(req); usbpn_close(dev); return -ENOMEM; } From 7364e445f62825758fa61195d237a5b8ecdd06ec Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 8 Aug 2012 00:33:25 +0000 Subject: [PATCH 16/33] net/core: Fix potential memory leak in dev_set_alias() Do not leak memory by updating pointer with potentially NULL realloc return value. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f91abf800161..a39354ee1432 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1055,6 +1055,8 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { + char *new_ifalias; + ASSERT_RTNL(); if (len >= IFALIASZ) @@ -1068,9 +1070,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return 0; } - dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!dev->ifalias) + new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); + if (!new_ifalias) return -ENOMEM; + dev->ifalias = new_ifalias; strlcpy(dev->ifalias, alias, len+1); return len; From 3a32aea6e01b10a462763e0b0e00805f387ca010 Mon Sep 17 00:00:00 2001 From: "stigge@antcom.de" Date: Wed, 8 Aug 2012 03:18:54 +0000 Subject: [PATCH 17/33] lpc_eth: remove obsolete ifdefs The #ifdefs regarding CONFIG_ARCH_LPC32XX_MII_SUPPORT and CONFIG_ARCH_LPC32XX_IRAM_FOR_NET are obsolete since the symbols have been removed from Kconfig and replaced by devicetree based configuration. Signed-off-by: Roland Stigge Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 4069edab229e..53743f7a2ca9 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -346,28 +346,15 @@ static phy_interface_t lpc_phy_interface_mode(struct device *dev) "phy-mode", NULL); if (mode && !strcmp(mode, "mii")) return PHY_INTERFACE_MODE_MII; - return PHY_INTERFACE_MODE_RMII; } - - /* non-DT */ -#ifdef CONFIG_ARCH_LPC32XX_MII_SUPPORT - return PHY_INTERFACE_MODE_MII; -#else return PHY_INTERFACE_MODE_RMII; -#endif } static bool use_iram_for_net(struct device *dev) { if (dev && dev->of_node) return of_property_read_bool(dev->of_node, "use-iram"); - - /* non-DT */ -#ifdef CONFIG_ARCH_LPC32XX_IRAM_FOR_NET - return true; -#else return false; -#endif } /* Receive Status information word */ From f5addb91d14e63beb6224a62fb81b6e610cee3bc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Aug 2012 04:47:24 +0000 Subject: [PATCH 18/33] net/stmmac: mark probe function as __devinit Driver probe functions are generally __devinit so they will be discarded after initialization for non-hotplug kernels. This was found by a new warning after patch 6a228452d "stmmac: Add device-tree support" adds a new __devinit function that is called from stmmac_pltfr_probe. Without this patch, building socfpga_defconfig results in: WARNING: drivers/net/ethernet/stmicro/stmmac/stmmac.o(.text+0x5d4c): Section mismatch in reference from the function stmmac_pltfr_probe() to the function .devinit.text:stmmac_probe_config_dt() The function stmmac_pltfr_probe() references the function __devinit stmmac_probe_config_dt(). This is often because stmmac_pltfr_probe lacks a __devinit annotation or the annotation of stmmac_probe_config_dt is wrong. Signed-off-by: Arnd Bergmann Cc: Stefan Roese Cc: Giuseppe Cavallaro Cc: David S. Miller Cc: netdev@vger.kernel.org Acked-by: Stefan Roese Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index cd01ee7ecef1..b93245c11995 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -74,7 +74,7 @@ static int __devinit stmmac_probe_config_dt(struct platform_device *pdev, * the necessary resources and invokes the main to init * the net device, register the mdio bus etc. */ -static int stmmac_pltfr_probe(struct platform_device *pdev) +static int __devinit stmmac_pltfr_probe(struct platform_device *pdev) { int ret = 0; struct resource *res; From 36471012e2ae28ca3178f84d4687a2d88a36593e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Aug 2012 11:19:13 +0200 Subject: [PATCH 19/33] tcp: must free metrics at net dismantle We currently leak all tcp metrics at struct net dismantle time. tcp_net_metrics_exit() frees the hash table, we must first iterate it to free all metrics. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 2288a6399e1e..0abe67bb4d3a 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net) static void __net_exit tcp_net_metrics_exit(struct net *net) { + unsigned int i; + + for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { + struct tcp_metrics_block *tm, *next; + + tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); + while (tm) { + next = rcu_dereference_protected(tm->tcpm_next, 1); + kfree(tm); + tm = next; + } + } kfree(net->ipv4.tcp_metrics_hash); } From b669588abaceb5c6d70699b6c009e5cedc42449b Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Sat, 28 Jul 2012 05:07:48 +0000 Subject: [PATCH 20/33] igb: fix panic while dumping packets on Tx hang with IOMMU This patch resolves a "BUG: unable to handle kernel paging request at ..." oops while dumping packet data. The issue occurs with IOMMU enabled due to the address provided by phys_to_virt(). This patch avoids phys_to_virt() by making using skb->data and the address of the pages allocated for Rx. Signed-off-by: Emil Tantilov Tested-by: Jeff Pieper Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/igb/igb_main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b7c2d5050572..48cc4fb1a307 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -462,10 +462,10 @@ static void igb_dump(struct igb_adapter *adapter) (u64)buffer_info->time_stamp, buffer_info->skb, next_desc); - if (netif_msg_pktdata(adapter) && buffer_info->dma != 0) + if (netif_msg_pktdata(adapter) && buffer_info->skb) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, - 16, 1, phys_to_virt(buffer_info->dma), + 16, 1, buffer_info->skb->data, buffer_info->length, true); } } @@ -547,18 +547,17 @@ rx_ring_summary: (u64)buffer_info->dma, buffer_info->skb, next_desc); - if (netif_msg_pktdata(adapter)) { + if (netif_msg_pktdata(adapter) && + buffer_info->dma && buffer_info->skb) { print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, - 16, 1, - phys_to_virt(buffer_info->dma), - IGB_RX_HDR_LEN, true); + DUMP_PREFIX_ADDRESS, + 16, 1, buffer_info->skb->data, + IGB_RX_HDR_LEN, true); print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt( - buffer_info->page_dma + - buffer_info->page_offset), + page_address(buffer_info->page) + + buffer_info->page_offset, PAGE_SIZE/2, true); } } From f0c5dadff3fbda77a65b8577fee437c3d771233d Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 1 Aug 2012 08:12:21 +0000 Subject: [PATCH 21/33] e1000e: fix panic while dumping packets on Tx hang with IOMMU This patch resolves a "BUG: unable to handle kernel paging request at ..." oops while dumping packet data. The issue occurs with IOMMU enabled due to the address provided by phys_to_virt(). This patch avoids phys_to_virt() by using skb->data and the address of the pages allocated for Rx. Signed-off-by: Emil Tantilov Tested-by: Jeff Pieper Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/e1000e/netdev.c | 36 ++++++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 95b245310f17..46c3b1f9ff89 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -178,6 +178,24 @@ static void e1000_regdump(struct e1000_hw *hw, struct e1000_reg_info *reginfo) pr_info("%-15s %08x %08x\n", rname, regs[0], regs[1]); } +static void e1000e_dump_ps_pages(struct e1000_adapter *adapter, + struct e1000_buffer *bi) +{ + int i; + struct e1000_ps_page *ps_page; + + for (i = 0; i < adapter->rx_ps_pages; i++) { + ps_page = &bi->ps_pages[i]; + + if (ps_page->page) { + pr_info("packet dump for ps_page %d:\n", i); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, + 16, 1, page_address(ps_page->page), + PAGE_SIZE, true); + } + } +} + /* * e1000e_dump - Print registers, Tx-ring and Rx-ring */ @@ -299,10 +317,10 @@ static void e1000e_dump(struct e1000_adapter *adapter) (unsigned long long)buffer_info->time_stamp, buffer_info->skb, next_desc); - if (netif_msg_pktdata(adapter) && buffer_info->dma != 0) + if (netif_msg_pktdata(adapter) && buffer_info->skb) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, - 16, 1, phys_to_virt(buffer_info->dma), - buffer_info->length, true); + 16, 1, buffer_info->skb->data, + buffer_info->skb->len, true); } /* Print Rx Ring Summary */ @@ -381,10 +399,8 @@ rx_ring_summary: buffer_info->skb, next_desc); if (netif_msg_pktdata(adapter)) - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt(buffer_info->dma), - adapter->rx_ps_bsize0, true); + e1000e_dump_ps_pages(adapter, + buffer_info); } } break; @@ -444,12 +460,12 @@ rx_ring_summary: (unsigned long long)buffer_info->dma, buffer_info->skb, next_desc); - if (netif_msg_pktdata(adapter)) + if (netif_msg_pktdata(adapter) && + buffer_info->skb) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt - (buffer_info->dma), + buffer_info->skb->data, adapter->rx_buffer_len, true); } From a2d6a1d5a435b051d3660961d5dee5b33b15f754 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Aug 2012 05:18:04 +0000 Subject: [PATCH 22/33] igb: Fix register defines for all non-82575 hardware It looks like the register defines for DCA were never updated after going from 82575 to 82576. This change addresses that by updating the defines. Signed-off-by: Alexander Duyck Tested-by: Jeff Pieper Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/igb/e1000_regs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 10efcd88dca0..28394bea5253 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -156,8 +156,12 @@ : (0x0E018 + ((_n) * 0x40))) #define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) \ : (0x0E028 + ((_n) * 0x40))) -#define E1000_DCA_TXCTRL(_n) (0x03814 + (_n << 8)) -#define E1000_DCA_RXCTRL(_n) (0x02814 + (_n << 8)) +#define E1000_RXCTL(_n) ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \ + (0x0C014 + ((_n) * 0x40))) +#define E1000_DCA_RXCTRL(_n) E1000_RXCTL(_n) +#define E1000_TXCTL(_n) ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \ + (0x0E014 + ((_n) * 0x40))) +#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n) #define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) \ : (0x0E038 + ((_n) * 0x40))) #define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) \ From 66d1b9263a371abd15806c53f486f0645ef31a8f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 9 Aug 2012 02:50:40 +0000 Subject: [PATCH 23/33] tun: don't zeroize sock->file on detach This is a fix for bug, introduced in 3.4 kernel by commit 1ab5ecb90cb6a3df1476e052f76a6e8f6511cb3d ("tun: don't hold network namespace by tun sockets"), which, among other things, replaced simple sock_put() by sk_release_kernel(). Below is sequence, which leads to oops for non-persistent devices: tun_chr_close() tun_detach() <== tun->socket.file = NULL tun_free_netdev() sk_release_sock() sock_release(sock->file == NULL) iput(SOCK_INODE(sock)) <== dereference on NULL pointer This patch just removes zeroing of socket's file from __tun_detach(). sock_release() will do this. Cc: stable@vger.kernel.org Reported-by: Ruan Zhijie Tested-by: Ruan Zhijie Acked-by: Al Viro Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: Stanislav Kinsbursky Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 926d4db5cb38..3a16d4fdaa05 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -187,7 +187,6 @@ static void __tun_detach(struct tun_struct *tun) netif_tx_lock_bh(tun->dev); netif_carrier_off(tun->dev); tun->tfile = NULL; - tun->socket.file = NULL; netif_tx_unlock_bh(tun->dev); /* Drop read queue */ From 8eee694c3e667e6f0856d9c8525208058f9d42bf Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 9 Aug 2012 04:37:25 +0000 Subject: [PATCH 24/33] bnx2x: fix unload previous driver flow when flr-capable The existing previous driver unload flow is flawed, causing the probe of functions reaching the 'uncommon fork' in flr-capable devices to fail. This patch resolves this, as well as fixing the flow for hypervisors which disable flr capabilities from functions as they pass them as PDA to VMs, as we cannot base the flow on the pci configuration space. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 55 ++++++++++--------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index dd451c3dd83d..5384a6dd155f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9384,32 +9384,24 @@ static int __devinit bnx2x_prev_mark_path(struct bnx2x *bp) return rc; } -static bool __devinit bnx2x_can_flr(struct bnx2x *bp) -{ - int pos; - u32 cap; - struct pci_dev *dev = bp->pdev; - - pos = pci_pcie_cap(dev); - if (!pos) - return false; - - pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP, &cap); - if (!(cap & PCI_EXP_DEVCAP_FLR)) - return false; - - return true; -} - static int __devinit bnx2x_do_flr(struct bnx2x *bp) { int i, pos; u16 status; struct pci_dev *dev = bp->pdev; - /* probe the capability first */ - if (bnx2x_can_flr(bp)) - return -ENOTTY; + + if (CHIP_IS_E1x(bp)) { + BNX2X_DEV_INFO("FLR not supported in E1/E1H\n"); + return -EINVAL; + } + + /* only bootcode REQ_BC_VER_4_INITIATE_FLR and onwards support flr */ + if (bp->common.bc_ver < REQ_BC_VER_4_INITIATE_FLR) { + BNX2X_ERR("FLR not supported by BC_VER: 0x%x\n", + bp->common.bc_ver); + return -EINVAL; + } pos = pci_pcie_cap(dev); if (!pos) @@ -9429,12 +9421,8 @@ static int __devinit bnx2x_do_flr(struct bnx2x *bp) "transaction is not cleared; proceeding with reset anyway\n"); clear: - if (bp->common.bc_ver < REQ_BC_VER_4_INITIATE_FLR) { - BNX2X_ERR("FLR not supported by BC_VER: 0x%x\n", - bp->common.bc_ver); - return -EINVAL; - } + BNX2X_DEV_INFO("Initiating FLR\n"); bnx2x_fw_command(bp, DRV_MSG_CODE_INITIATE_FLR, 0); return 0; @@ -9454,8 +9442,21 @@ static int __devinit bnx2x_prev_unload_uncommon(struct bnx2x *bp) * the one required, then FLR will be sufficient to clean any residue * left by previous driver */ - if (bnx2x_test_firmware_version(bp, false) && bnx2x_can_flr(bp)) - return bnx2x_do_flr(bp); + rc = bnx2x_test_firmware_version(bp, false); + + if (!rc) { + /* fw version is good */ + BNX2X_DEV_INFO("FW version matches our own. Attempting FLR\n"); + rc = bnx2x_do_flr(bp); + } + + if (!rc) { + /* FLR was performed */ + BNX2X_DEV_INFO("FLR successful\n"); + return 0; + } + + BNX2X_DEV_INFO("Could not FLR\n"); /* Close the MCP request, return failure*/ rc = bnx2x_prev_mcp_done(bp); From 2070ffa2c8d4dc17cd8fc4b30d2710bcde27b051 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 9 Aug 2012 04:37:26 +0000 Subject: [PATCH 25/33] bnx2x: Fix recovery flow cleanup during probe During probe, every function probed clears the recovery registers from all functions on its path - thus signaling that given a future recovery event, there will be no need to wait for those functions. This is a flawed behaviour - each function should only be responsible for its own bit. Since this registers are handled during the load/unload routines, this cleanup is removed altogether. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 5384a6dd155f..02b5a343b195 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -4041,20 +4041,6 @@ static bool bnx2x_get_load_status(struct bnx2x *bp, int engine) return val != 0; } -/* - * Reset the load status for the current engine. - */ -static void bnx2x_clear_load_status(struct bnx2x *bp) -{ - u32 val; - u32 mask = (BP_PATH(bp) ? BNX2X_PATH1_LOAD_CNT_MASK : - BNX2X_PATH0_LOAD_CNT_MASK); - bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RECOVERY_REG); - val = REG_RD(bp, BNX2X_RECOVERY_GLOB_REG); - REG_WR(bp, BNX2X_RECOVERY_GLOB_REG, val & (~mask)); - bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RECOVERY_REG); -} - static void _print_next_block(int idx, const char *blk) { pr_cont("%s%s", idx ? ", " : "", blk); @@ -11428,9 +11414,6 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, if (!chip_is_e1x) REG_WR(bp, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); - /* Reset the load counter */ - bnx2x_clear_load_status(bp); - dev->watchdog_timeo = TX_TIMEOUT; dev->netdev_ops = &bnx2x_netdev_ops; From 3a7c384ffd57ef5fbd95f48edaa2ca4eb3d9f2ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Aug 2012 13:56:06 +0000 Subject: [PATCH 26/33] ipv4: tcp: unicast_sock should not land outside of TCP stack commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a selinux regression, reported and bisected by John Stultz selinux_ip_postroute_compat() expect to find a valid sk->sk_security pointer, but this field is NULL for unicast_sock It turns out that unicast_sock are really temporary stuff to be able to reuse part of IP stack (ip_append_data()/ip_push_pending_frames()) Fact is that frames sent by ip_send_unicast_reply() should be orphaned to not fool LSM. Note IPv6 never had this problem, as tcp_v6_send_response() doesnt use a fake socket at all. I'll probably implement tcp_v4_send_response() to remove these unicast_sock in linux-3.7 Reported-by: John Stultz Bisected-by: John Stultz Signed-off-by: Eric Dumazet Cc: Paul Moore Cc: Eric Paris Cc: "Serge E. Hallyn" Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 76dde25fb9a0..ec410e08b4b9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1536,6 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; + skb_orphan(nskb); skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } From 63d02d157ec4124990258d66517b6c11fd6df0cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Aug 2012 14:11:00 +0000 Subject: [PATCH 27/33] net: tcp: ipv6_mapped needs sk_rx_dst_set method commit 5d299f3d3c8a2fb (net: ipv6: fix TCP early demux) added a regression for ipv6_mapped case. [ 67.422369] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 67.449678] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 92.631060] BUG: unable to handle kernel NULL pointer dereference at (null) [ 92.631435] IP: [< (null)>] (null) [ 92.631645] PGD 0 [ 92.631846] Oops: 0010 [#1] SMP [ 92.632095] Modules linked in: autofs4 sunrpc ipv6 dm_mirror dm_region_hash dm_log dm_multipath dm_mod video sbs sbshc battery ac lp parport sg snd_hda_intel snd_hda_codec snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device pcspkr snd_pcm_oss snd_mixer_oss snd_pcm snd_timer serio_raw button floppy snd i2c_i801 i2c_core soundcore snd_page_alloc shpchp ide_cd_mod cdrom microcode ehci_hcd ohci_hcd uhci_hcd [ 92.634294] CPU 0 [ 92.634294] Pid: 4469, comm: sendmail Not tainted 3.6.0-rc1 #3 [ 92.634294] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 92.634294] RSP: 0018:ffff880245fc7cb0 EFLAGS: 00010282 [ 92.634294] RAX: ffffffffa01985f0 RBX: ffff88024827ad00 RCX: 0000000000000000 [ 92.634294] RDX: 0000000000000218 RSI: ffff880254735380 RDI: ffff88024827ad00 [ 92.634294] RBP: ffff880245fc7cc8 R08: 0000000000000001 R09: 0000000000000000 [ 92.634294] R10: 0000000000000000 R11: ffff880245fc7bf8 R12: ffff880254735380 [ 92.634294] R13: ffff880254735380 R14: 0000000000000000 R15: 7fffffffffff0218 [ 92.634294] FS: 00007f4516ccd6f0(0000) GS:ffff880256600000(0000) knlGS:0000000000000000 [ 92.634294] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 92.634294] CR2: 0000000000000000 CR3: 0000000245ed1000 CR4: 00000000000007f0 [ 92.634294] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 92.634294] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 92.634294] Process sendmail (pid: 4469, threadinfo ffff880245fc6000, task ffff880254b8cac0) [ 92.634294] Stack: [ 92.634294] ffffffff813837a7 ffff88024827ad00 ffff880254b6b0e8 ffff880245fc7d68 [ 92.634294] ffffffff81385083 00000000001d2680 ffff8802547353a8 ffff880245fc7d18 [ 92.634294] ffffffff8105903a ffff88024827ad60 0000000000000002 00000000000000ff [ 92.634294] Call Trace: [ 92.634294] [] ? tcp_finish_connect+0x2c/0xfa [ 92.634294] [] tcp_rcv_state_process+0x2b6/0x9c6 [ 92.634294] [] ? sched_clock_cpu+0xc3/0xd1 [ 92.634294] [] ? local_clock+0x2b/0x3c [ 92.634294] [] tcp_v4_do_rcv+0x63a/0x670 [ 92.634294] [] release_sock+0x128/0x1bd [ 92.634294] [] __inet_stream_connect+0x1b1/0x352 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? wake_up_bit+0x25/0x25 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? inet_stream_connect+0x22/0x4b [ 92.634294] [] inet_stream_connect+0x33/0x4b [ 92.634294] [] sys_connect+0x78/0x9e [ 92.634294] [] ? sysret_check+0x1b/0x56 [ 92.634294] [] ? __audit_syscall_entry+0x195/0x1c8 [ 92.634294] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 92.634294] [] system_call_fastpath+0x16/0x1b [ 92.634294] Code: Bad RIP value. [ 92.634294] RIP [< (null)>] (null) [ 92.634294] RSP [ 92.634294] CR2: 0000000000000000 [ 92.648982] ---[ end trace 24e2bed94314c8d9 ]--- [ 92.649146] Kernel panic - not syncing: Fatal exception in interrupt Fix this using inet_sk_rx_dst_set(), and export this function in case IPv6 is modular. Reported-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index e19124b84cd2..1f000ffe7075 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -464,6 +464,7 @@ extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 272241f16fcb..767823764016 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1869,7 +1869,7 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -1877,6 +1877,7 @@ static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } +EXPORT_SYMBOL(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a439e9a4c01..bb9ce2b2f377 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1777,6 +1777,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), From b5ec8eeac46a99004c26791f70b15d001e970acf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Aug 2012 02:22:47 +0000 Subject: [PATCH 28/33] ipv4: fix ip_send_skb() ip_send_skb() can send orphaned skb, so we must pass the net pointer to avoid possible NULL dereference in error path. Bug added by commit 3a7c384ffd57 (ipv4: tcp: unicast_sock should not land outside of TCP stack) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_output.c | 5 ++--- net/ipv4/udp.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index bd5e444a19ce..5a5d84d3d2c6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -120,7 +120,7 @@ extern struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); -extern int ip_send_skb(struct sk_buff *skb); +extern int ip_send_skb(struct net *net, struct sk_buff *skb); extern int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4); extern void ip_flush_pending_frames(struct sock *sk); extern struct sk_buff *ip_make_skb(struct sock *sk, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ec410e08b4b9..147ccc3e93db 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1366,9 +1366,8 @@ out: return skb; } -int ip_send_skb(struct sk_buff *skb) +int ip_send_skb(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); int err; err = ip_local_out(skb); @@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) return 0; /* Netfilter gets whole the not fragmented skb. */ - return ip_send_skb(skb); + return ip_send_skb(sock_net(sk), skb); } /* diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..6f6d1aca3c3d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->check = CSUM_MANGLED_0; send: - err = ip_send_skb(skb); + err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), From 55461ddbcb0b36d1575e01fb4f130c609ca1cfee Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 10 Aug 2012 07:35:14 +0000 Subject: [PATCH 29/33] ixgbe: add missing braces This patch adds missing braces around the 10gig link check to include the check for KR support. Signed-off-by: Emil Tantilov Reported-by: Sascha Wildner Tested-by: Phil Schmitt Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 50fc137501da..18bf08c9d7a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -804,12 +804,13 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { /* Set KX4/KX/KR support according to speed requested */ autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { if (orig_autoc & IXGBE_AUTOC_KX4_SUPP) autoc |= IXGBE_AUTOC_KX4_SUPP; if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) && (hw->phy.smart_speed_active == false)) autoc |= IXGBE_AUTOC_KR_SUPP; + } if (speed & IXGBE_LINK_SPEED_1GB_FULL) autoc |= IXGBE_AUTOC_KX_SUPP; } else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) && From 2359a47671fc4fb0fe5e9945f76c2cb10792c0f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Jul 2012 20:52:21 +0000 Subject: [PATCH 30/33] codel: refine one condition to avoid a nul rec_inv_sqrt One condition before codel_Newton_step() was not good if we never left the dropping state for a flow. As a result rec_inv_sqrt was 0, instead of the ~0 initial value. codel control law was then set to a very aggressive mode, dropping many packets before reaching 'target' and recovering from this problem. To keep codel_vars_init() as efficient as possible, refine the condition to make sure rec_inv_sqrt initial value is correct Many thanks to Anton Mich for discovering the issue and suggesting a fix. Reported-by: Anton Mich Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/codel.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/codel.h b/include/net/codel.h index 550debfc2403..389cf621161d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -305,6 +305,8 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, } } } else if (drop) { + u32 delta; + if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { @@ -320,9 +322,11 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ - if (codel_time_before(now - vars->drop_next, + delta = vars->count - vars->lastcount; + if (delta > 1 && + codel_time_before(now - vars->drop_next, 16 * params->interval)) { - vars->count = (vars->count - vars->lastcount) | 1; + vars->count = delta; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. From 3a7f8c34fefb109903af9a0fac6d0d05f93335c5 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Sat, 11 Aug 2012 20:26:31 +0000 Subject: [PATCH 31/33] macvtap: rcu_dereference outside read-lock section rcu_dereference occurs in update section. Replacement by rcu_dereference_protected in order to prevent lockdep complaint. Found by Linux Driver Verification project (linuxtesting.org) Signed-off-by: Denis Efremov Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0737bd4d1669..0f0f9ce3a776 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -94,7 +94,8 @@ static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) int i; for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { - if (rcu_dereference(vlan->taps[i]) == q) + if (rcu_dereference_protected(vlan->taps[i], + lockdep_is_held(&macvtap_lock)) == q) return i; } From 7f5c3e3a80e6654cf48dfba7cf94f88c6b505467 Mon Sep 17 00:00:00 2001 From: "danborkmann@iogearbox.net" Date: Fri, 10 Aug 2012 22:48:54 +0000 Subject: [PATCH 32/33] af_packet: remove BUG statement in tpacket_destruct_skb Here's a quote of the comment about the BUG macro from asm-generic/bug.h: Don't use BUG() or BUG_ON() unless there's really no way out; one example might be detecting data structure corruption in the middle of an operation that can't be backed out of. If the (sub)system can somehow continue operating, perhaps with reduced functionality, it's probably not BUG-worthy. If you're tempted to BUG(), think again: is completely giving up really the *only* solution? There are usually better options, where users don't need to reboot ASAP and can mostly shut down cleanly. In our case, the status flag of a ring buffer slot is managed from both sides, the kernel space and the user space. This means that even though the kernel side might work as expected, the user space screws up and changes this flag right between the send(2) is triggered when the flag is changed to TP_STATUS_SENDING and a given skb is destructed after some time. Then, this will hit the BUG macro. As David suggested, the best solution is to simply remove this statement since it cannot be used for kernel side internal consistency checks. I've tested it and the system still behaves /stable/ in this case, so in accordance with the above comment, we should rather remove it. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f016f6634a79..8ac890a1a4c0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1936,7 +1936,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); From f57b07c0c7ca9e4dde36acfabdf474ee3c478e6d Mon Sep 17 00:00:00 2001 From: Joren Van Onder Date: Sat, 11 Aug 2012 17:10:35 +0000 Subject: [PATCH 33/33] bnx2x: Fix compiler warnings Fix the following compiler warnings: - drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c:2908:3: warning: comparison of distinct pointer types lacks a cast [enabled by default] - drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c:1709:7: warning: comparison of distinct pointer types lacks a cast [enabled by default] Signed-off-by: Joren Van Onder Acked-By: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 77bcd4cb4ffb..463b9ec57d80 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1278,7 +1278,7 @@ struct bnx2x { #define BNX2X_FW_RX_ALIGN_START (1UL << BNX2X_RX_ALIGN_SHIFT) #define BNX2X_FW_RX_ALIGN_END \ - max(1UL << BNX2X_RX_ALIGN_SHIFT, \ + max_t(u64, 1UL << BNX2X_RX_ALIGN_SHIFT, \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define BNX2X_PXP_DRAM_ALIGN (BNX2X_RX_ALIGN_SHIFT - 5)