From ae5c682113f9f94cc5e76f92cf041ee624c173ee Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 19 Mar 2017 22:35:59 +0800 Subject: [PATCH 01/82] netfilter: nfnl_cthelper: fix incorrect helper->expect_class_max The helper->expect_class_max must be set to the total number of expect_policy minus 1, since we will use the statement "if (class > helper->expect_class_max)" to validate the CTA_EXPECT_CLASS attr in ctnetlink_alloc_expect. So for compatibility, set the helper->expect_class_max to the NFCTH_POLICY_SET_NUM attr's value minus 1. Also: it's invalid when the NFCTH_POLICY_SET_NUM attr's value is zero. 1. this will result "expect_policy = kzalloc(0, GFP_KERNEL);"; 2. we cannot set the helper->expect_class_max to a proper value. So if nla_get_be32(tb[NFCTH_POLICY_SET_NUM]) is zero, report -EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de8782345c86..3cd41d105407 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -161,6 +161,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + unsigned int class_max; ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set); @@ -170,19 +171,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; - helper->expect_class_max = - ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); - - if (helper->expect_class_max != 0 && - helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (class_max == 0) + return -EINVAL; + if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * - helper->expect_class_max, GFP_KERNEL); + class_max, GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; - for (i=0; iexpect_class_max; i++) { + for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; @@ -191,6 +191,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (ret < 0) goto err; } + + helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: @@ -377,10 +379,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, - htonl(helper->expect_class_max))) + htonl(helper->expect_class_max + 1))) goto nla_put_failure; - for (i=0; iexpect_class_max; i++) { + for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET+i) | NLA_F_NESTED); if (nest_parms2 == NULL) From 2c422257550f123049552b39f7af6e3428a60f43 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Mar 2017 13:32:37 +0100 Subject: [PATCH 02/82] netfilter: nfnl_cthelper: fix runtime expectation policy updates We only allow runtime updates of expectation policies for timeout and maximum number of expectations, otherwise reject the update. Signed-off-by: Pablo Neira Ayuso Acked-by: Liping Zhang --- net/netfilter/nfnetlink_cthelper.c | 86 +++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 3cd41d105407..90f291e27eb1 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -255,6 +255,89 @@ err: return ret; } +static int +nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, + struct nf_conntrack_expect_policy *new_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) + return -EBUSY; + + new_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + new_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], + struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *policy; + int i, err; + + /* Check first that all policy attributes are well-formed, so we don't + * leave things in inconsistent state on errors. + */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + + if (!tb[NFCTH_POLICY_SET + i]) + return -EINVAL; + + err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + &new_policy[i], + tb[NFCTH_POLICY_SET + i]); + if (err < 0) + return err; + } + /* Now we can safely update them. */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + policy = (struct nf_conntrack_expect_policy *) + &helper->expect_policy[i]; + policy->max_expected = new_policy->max_expected; + policy->timeout = new_policy->timeout; + } + + return 0; +} + +static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; + unsigned int class_max; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (helper->expect_class_max + 1 != class_max) + return -EBUSY; + + return nfnl_cthelper_update_policy_all(tb, helper); +} + static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) @@ -265,8 +348,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return -EBUSY; if (tb[NFCTH_POLICY]) { - ret = nfnl_cthelper_parse_expect_policy(helper, - tb[NFCTH_POLICY]); + ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } From f83bf8da1135ca635aac8f062cad3f001fcf3a26 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Tue, 21 Mar 2017 15:07:10 +0800 Subject: [PATCH 03/82] netfilter: nfnl_cthelper: Fix memory leak We have memory leaks of nf_conntrack_helper & expect_policy. Signed-off-by: Jeffy Chen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 90f291e27eb1..2b987d2a77bc 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -216,7 +216,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err; + goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); @@ -247,10 +247,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nf_conntrack_helper_register(helper); if (ret < 0) - goto err; + goto err2; return 0; -err: +err2: + kfree(helper->expect_policy); +err1: kfree(helper); return ret; } @@ -696,6 +698,8 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, found = true; nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(cur); } } /* Make sure we return success if we flush and there is no helpers */ @@ -759,6 +763,8 @@ static void __exit nfnl_cthelper_exit(void) continue; nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(cur); } } } From f35ec35e5e726e7137deaee0dec24f8cbf07a48f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 Mar 2017 22:40:30 -0700 Subject: [PATCH 04/82] net: phy: Export mdiobus_register_board_info() We can build modular code that uses mdiobus_register_board_info() which would lead to linking failure since this symbol is not expoerted. Fixes: 648ea0134069 ("net: phy: Allow pre-declaration of MDIO devices") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio-boardinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c index 6b988f77da08..61941e29daae 100644 --- a/drivers/net/phy/mdio-boardinfo.c +++ b/drivers/net/phy/mdio-boardinfo.c @@ -84,3 +84,4 @@ int mdiobus_register_board_info(const struct mdio_board_info *info, return 0; } +EXPORT_SYMBOL(mdiobus_register_board_info); From 48481c8fa16410ffa45939b13b6c53c2ca609e5f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Mar 2017 12:39:21 -0700 Subject: [PATCH 05/82] net: neigh: guard against NULL solicit() method Dmitry posted a nice reproducer of a bug triggering in neigh_probe() when dereferencing a NULL neigh->ops->solicit method. This can happen for arp_direct_ops/ndisc_direct_ops and similar, which can be used for NUD_NOARP neighbours (created when dev->header_ops is NULL). Admin can then force changing nud_state to some other state that would fire neigh timer. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e7c12caa20c8..4526cbd7e28a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour *neigh) if (skb) skb = skb_clone(skb, GFP_ATOMIC); write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); + if (neigh->ops->solicit) + neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); kfree_skb(skb); } From a95600294157ca7527ee7c70249fb53e09d8c566 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 26 Jan 2017 14:43:32 +0200 Subject: [PATCH 06/82] iwlwifi: mvm: fix accessing fw_id_to_mac_id Access should be by rcu_dereference. Issue was found by sparse. Fixes: 65e254821cee ("iwlwifi: mvm: use firmware station PM notification for AP_LINK_PS") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 6927caecd48e..486dcceed17a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2401,7 +2401,7 @@ void iwl_mvm_sta_pm_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) return; rcu_read_lock(); - sta = mvm->fw_id_to_mac_id[notif->sta_id]; + sta = rcu_dereference(mvm->fw_id_to_mac_id[notif->sta_id]); if (WARN_ON(IS_ERR_OR_NULL(sta))) { rcu_read_unlock(); return; From 251fe09f13bfb54c1ede66ee8bf8ddd0061c4f7c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Mar 2017 13:40:00 +0300 Subject: [PATCH 07/82] iwlwifi: mvm: writing zero bytes to debugfs causes a crash This is a static analysis fix. The warning is: drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c:912 iwl_mvm_fw_dbg_collect() warn: integer overflows 'sizeof(*desc) + len' I guess this code is supposed to take a NUL character, but if we write zero bytes then it tries to write -1 characters and crashes. Fixes: c91b865cb14d ("iwlwifi: mvm: support description for user triggered fw dbg collection") Signed-off-by: Dan Carpenter Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index a260cd503200..077bfd8f4c0c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1056,6 +1056,8 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm, if (ret) return ret; + if (count == 0) + return 0; iwl_mvm_fw_dbg_collect(mvm, FW_DBG_TRIGGER_USER, buf, (count - 1), NULL); From 4d339989acd730f17bc814b5ddb9c54e405766b6 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 21 Mar 2017 17:13:16 +0200 Subject: [PATCH 08/82] iwlwifi: mvm: support ibss in dqa mode Allow working IBSS also when working in DQA mode. This is done by setting it to treat the queues the same as a BSS AP treats the queues. Fixes: 7948b87308a4 ("iwlwifi: mvm: enable dynamic queue allocation mode") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 9 ++++++--- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 7 +++++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 99132ea16ede..c5734e1a02d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -216,7 +216,8 @@ u32 iwl_mvm_mac_get_queues_mask(struct ieee80211_vif *vif) qmask |= BIT(vif->hw_queue[ac]); } - if (vif->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) qmask |= BIT(vif->cab_queue); return qmask; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index b51a2853cc80..9d28db7f56aa 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1806,7 +1806,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_get_wd_timeout(mvm, vif, false, false); int queue; - if (vif->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; else if (vif->type == NL80211_IFTYPE_P2P_DEVICE) queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE; @@ -1837,7 +1838,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * enabled-cab_queue to the mask) */ if (iwl_mvm_is_dqa_supported(mvm) && - vif->type == NL80211_IFTYPE_AP) { + (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC)) { struct iwl_trans_txq_scd_cfg cfg = { .fifo = IWL_MVM_TX_FIFO_MCAST, .sta_id = mvmvif->bcast_sta.sta_id, @@ -1862,7 +1864,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, lockdep_assert_held(&mvm->mutex); - if (vif->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) iwl_mvm_disable_txq(mvm, vif->cab_queue, vif->cab_queue, IWL_MAX_TID_COUNT, 0); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 3f37075f4cde..1ba0a6f55503 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -506,6 +506,7 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, switch (info->control.vif->type) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: /* * Handle legacy hostapd as well, where station may be added * only after assoc. Take care of the case where we send a @@ -517,7 +518,8 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, if (info->hw_queue == info->control.vif->cab_queue) return info->hw_queue; - WARN_ONCE(1, "fc=0x%02x", le16_to_cpu(fc)); + WARN_ONCE(info->control.vif->type != NL80211_IFTYPE_ADHOC, + "fc=0x%02x", le16_to_cpu(fc)); return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; case NL80211_IFTYPE_P2P_DEVICE: if (ieee80211_is_mgmt(fc)) @@ -584,7 +586,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) iwl_mvm_vif_from_mac80211(info.control.vif); if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE || - info.control.vif->type == NL80211_IFTYPE_AP) { + info.control.vif->type == NL80211_IFTYPE_AP || + info.control.vif->type == NL80211_IFTYPE_ADHOC) { sta_id = mvmvif->bcast_sta.sta_id; queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr->frame_control); From 49d52e8108a21749dc2114b924c907db43358984 Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Wed, 22 Mar 2017 15:27:01 -0500 Subject: [PATCH 09/82] net: phy: handle state correctly in phy_stop_machine If the PHY is halted on stop, then do not set the state to PHY_UP. This ensures the phy will be restarted later in phy_start when the machine is started again. Fixes: 00db8189d984 ("This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details.") Signed-off-by: Nathan Sullivan Signed-off-by: Brad Mouring Acked-by: Xander Huff Acked-by: Kyle Roeschley Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1be69d8bc909..a2bfc82e95d7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -681,7 +681,7 @@ void phy_stop_machine(struct phy_device *phydev) cancel_delayed_work_sync(&phydev->state_queue); mutex_lock(&phydev->lock); - if (phydev->state > PHY_UP) + if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); } From 2f25abe6bac573928a990ccbdac75873add8127e Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 23 Mar 2017 19:14:19 +0800 Subject: [PATCH 10/82] r8152: prevent the driver from transmitting packets with carrier off The linking status may be changed when autosuspend. And, after autoresume, the driver may try to transmit packets when the device is carrier off, because the interrupt transfer doesn't update the linking status, yet. And, if the device is in ALDPS mode, the device would stop working. The another similar case is 1. unplug the cable. 2. interrupt transfer queue a work_queue for linking change. 3. device enters the ALDPS mode. 4. a tx occurs before the work_queue is called. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0b1b9188625d..c34df33c6d72 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1294,6 +1294,7 @@ static void intr_callback(struct urb *urb) } } else { if (netif_carrier_ok(tp->netdev)) { + netif_stop_queue(tp->netdev); set_bit(RTL8152_LINK_CHG, &tp->flags); schedule_delayed_work(&tp->schedule, 0); } @@ -3169,6 +3170,9 @@ static void set_carrier(struct r8152 *tp) napi_enable(&tp->napi); netif_wake_queue(netdev); netif_info(tp, link, netdev, "carrier on\n"); + } else if (netif_queue_stopped(netdev) && + skb_queue_len(&tp->tx_queue) < tp->tx_qlen) { + netif_wake_queue(netdev); } } else { if (netif_carrier_ok(netdev)) { @@ -3702,8 +3706,18 @@ static int rtl8152_resume(struct usb_interface *intf) tp->rtl_ops.autosuspend_en(tp, false); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); - if (netif_carrier_ok(tp->netdev)) - rtl_start_rx(tp); + + if (netif_carrier_ok(tp->netdev)) { + if (rtl8152_get_speed(tp) & LINK_STATUS) { + rtl_start_rx(tp); + } else { + netif_carrier_off(tp->netdev); + tp->rtl_ops.disable(tp); + netif_info(tp, link, tp->netdev, + "linking down\n"); + } + } + napi_enable(&tp->napi); clear_bit(SELECTIVE_SUSPEND, &tp->flags); smp_mb__after_atomic(); From 1adbddef118ae8bf6409c13208645d28c29731c1 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:41 +0300 Subject: [PATCH 11/82] net:ethernet:aquantia: Remove adapter re-opening when MTU changed. Closing/opening the adapter is not needed at all. The new MTU settings take effect immediately. Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index d05fbfdce5e5..5d6c40d86775 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -100,11 +100,6 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu) goto err_exit; ndev->mtu = new_mtu; - if (netif_running(ndev)) { - aq_ndev_close(ndev); - aq_ndev_open(ndev); - } - err_exit: return err; } From ea0504f554c8f989eab58549300d15582d36f039 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:42 +0300 Subject: [PATCH 12/82] net:ethernet:aquantia: Fix packet type detection (TCP/UDP) for IPv6. In order for the checksum offloads to work correctly we need to set the packet type bit (TCP/UDP) in the TX context buffer. Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code") Signed-off-by: Pavel Belous Tested-by: David Arcari Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_nic.c | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index ee78444bfb88..db2b51da2988 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -510,10 +510,22 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, if (skb->ip_summed == CHECKSUM_PARTIAL) { dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol) ? 1U : 0U; - dx_buff->is_tcp_cso = - (ip_hdr(skb)->protocol == IPPROTO_TCP) ? 1U : 0U; - dx_buff->is_udp_cso = - (ip_hdr(skb)->protocol == IPPROTO_UDP) ? 1U : 0U; + + if (ip_hdr(skb)->version == 4) { + dx_buff->is_tcp_cso = + (ip_hdr(skb)->protocol == IPPROTO_TCP) ? + 1U : 0U; + dx_buff->is_udp_cso = + (ip_hdr(skb)->protocol == IPPROTO_UDP) ? + 1U : 0U; + } else if (ip_hdr(skb)->version == 6) { + dx_buff->is_tcp_cso = + (ipv6_hdr(skb)->nexthdr == NEXTHDR_TCP) ? + 1U : 0U; + dx_buff->is_udp_cso = + (ipv6_hdr(skb)->nexthdr == NEXTHDR_UDP) ? + 1U : 0U; + } } for (; nr_frags--; ++frag_count) { From 9f0dd8c322e89f137c44b437d6fbecc9dea12204 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:43 +0300 Subject: [PATCH 13/82] net:ethernet:aquantia: Missing spinlock initialization. Fix for missing initialization aq_ring header.lock spinlock. Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 0358e6072d45..3a8a4aa13687 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -101,6 +101,7 @@ int aq_ring_init(struct aq_ring_s *self) self->hw_head = 0; self->sw_head = 0; self->sw_tail = 0; + spin_lock_init(&self->header.lock); return 0; } From 386aff88e32ec3f82e3f032217bad0c8c8846349 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:44 +0300 Subject: [PATCH 14/82] net:ethernet:aquantia: Fix for LSO with IPv6. Fix Context Command bit: L3 type = "0" for IPv4, "1" for IPv6. Fixes: bab6de8fd180 ("net: ethernet: aquantia: Atlantic A0 and B0 specific functions.") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 3 +++ drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 3 ++- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 3 +++ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index db2b51da2988..cdb02991f249 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -487,6 +487,9 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, dx_buff->mss = skb_shinfo(skb)->gso_size; dx_buff->is_txc = 1U; + dx_buff->is_ipv6 = + (ip_hdr(skb)->version == 6) ? 1U : 0U; + dx = aq_ring_next_dx(ring, dx); dx_buff = &ring->buff_ring[dx]; ++ret; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 257254645068..eecd6d1c4d73 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -58,7 +58,8 @@ struct __packed aq_ring_buff_s { u8 len_l2; u8 len_l3; u8 len_l4; - u8 rsvd2; + u8 is_ipv6:1; + u8 rsvd2:7; u32 len_pkt; }; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index a2b746a2dd50..a536875a7d0d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -433,6 +433,9 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self, buff->len_l3 + buff->len_l2); is_gso = true; + + if (buff->is_ipv6) + txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_IPV6; } else { buff_pa_len = buff->len; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index cab2931dab9a..69488c9b03e2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -471,6 +471,9 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, buff->len_l3 + buff->len_l2); is_gso = true; + + if (buff->is_ipv6) + txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6; } else { buff_pa_len = buff->len; From 5d73bb863c2ef3aa1a28b5885c85ede7307df8ea Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:45 +0300 Subject: [PATCH 15/82] net:ethernet:aquantia: Reset is_gso flag when EOP reached. We need to reset is_gso flag when EOP reached (entire LSO packet processed). Fixes: bab6de8fd180 ("net: ethernet: aquantia: Atlantic A0 and B0 specific functions.") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 1 + drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index a536875a7d0d..4ee15ff06a44 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -461,6 +461,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self, if (unlikely(buff->is_eop)) { txd->ctl |= HW_ATL_A0_TXD_CTL_EOP; txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_WB; + is_gso = false; } } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 69488c9b03e2..42150708191d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -499,6 +499,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, if (unlikely(buff->is_eop)) { txd->ctl |= HW_ATL_B0_TXD_CTL_EOP; txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB; + is_gso = false; } } From 7d969d2e8890f546c8cec634b3aa5f57d4eef883 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 23 Mar 2017 14:55:08 +0100 Subject: [PATCH 16/82] s390/qeth: size calculation outbound buffers Depending on the device type, hard_start_xmit() builds different output buffer formats. For instance with HiperSockets, on both L2 and L3 we strip the ETH header from the skb - L3 doesn't need it, and L2 carries it in the buffer's header element. For this, we pass data_offset = ETH_HLEN all the way down to __qeth_fill_buffer(), where skb->data is then adjusted accordingly. But the initial size calculation still considers the *full* skb length (including the ETH header). So qeth_get_elements_no() can erroneously reject a skb as too big, even though it would actually fit into an output buffer once the ETH header has been trimmed off later. Fix this by passing an additional offset to qeth_get_elements_no(), that indicates where in the skb the on-wire data actually begins. Since the current code uses data_offset=-1 for some special handling on OSA, we need to clamp data_offset to 0... On HiperSockets this helps when sending ~MTU-size skbs with weird page alignment. No change for OSA or AF_IUCV. Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 ++- drivers/s390/net/qeth_core_main.c | 5 +++-- drivers/s390/net/qeth_l2_main.c | 5 +++-- drivers/s390/net/qeth_l3_main.c | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index e7addea8741b..d9561e39c3b2 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -961,7 +961,8 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role); int qeth_bridgeport_an_set(struct qeth_card *card, int enable); int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); -int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int); +int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, + int extra_elems, int data_offset); int qeth_get_elements_for_frags(struct sk_buff *); int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, int, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 315d8a2db7c0..9a5f99ccb122 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3837,6 +3837,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * @card: qeth card structure, to check max. elems. * @skb: SKB address * @extra_elems: extra elems needed, to check against max. + * @data_offset: range starts at skb->data + data_offset * * Returns the number of pages, and thus QDIO buffer elements, needed to cover * skb data, including linear part and fragments. Checks if the result plus @@ -3844,10 +3845,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * Note: extra_elems is not included in the returned result. */ int qeth_get_elements_no(struct qeth_card *card, - struct sk_buff *skb, int extra_elems) + struct sk_buff *skb, int extra_elems, int data_offset) { int elements = qeth_get_elements_for_range( - (addr_t)skb->data, + (addr_t)skb->data + data_offset, (addr_t)skb->data + skb_headlen(skb)) + qeth_get_elements_for_frags(skb); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index bea483307618..af4e6a639fec 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -849,7 +849,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * chaining we can not send long frag lists */ if ((card->info.type != QETH_CARD_TYPE_IQD) && - !qeth_get_elements_no(card, new_skb, 0)) { + !qeth_get_elements_no(card, new_skb, 0, 0)) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -894,7 +894,8 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - elements = qeth_get_elements_no(card, new_skb, elements_needed); + elements = qeth_get_elements_no(card, new_skb, elements_needed, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 06d0addcc058..72aa9537a725 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2867,7 +2867,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) */ if ((card->info.type != QETH_CARD_TYPE_IQD) && ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) || - (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) { + (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -2909,7 +2909,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) elements = use_tso ? qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) : - qeth_get_elements_no(card, new_skb, hdr_elements); + qeth_get_elements_no(card, new_skb, hdr_elements, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); From acd9776b5c45ef02d1a210969a6fcc058afb76e3 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 23 Mar 2017 14:55:09 +0100 Subject: [PATCH 17/82] s390/qeth: no ETH header for outbound AF_IUCV With AF_IUCV traffic, the skb passed to hard_start_xmit() has a 14 byte slot at skb->data, intended for an ETH header. qeth_l3_fill_af_iucv_hdr() fills this ETH header... and then immediately moves it to the skb's headroom, where it disappears and is never seen again. But it's still possible for us to return NETDEV_TX_BUSY after the skb has been modified. Since we didn't get a private copy of the skb, the next time the skb is delivered to hard_start_xmit() it no longer has the expected layout (we moved the ETH header to the headroom, so skb->data now starts at the IUCV_TRANS header). So when qeth_l3_fill_af_iucv_hdr() does another round of rebuilding, the resulting qeth header ends up all wrong. On transmission, the buffer is then rejected by the HiperSockets device with SBALF15 = x'04'. When this error is passed back to af_iucv as TX_NOTIFY_UNREACHABLE, it tears down the offending socket. As the ETH header for AF_IUCV serves no purpose, just align the code to what we do for IP traffic on L3 HiperSockets: keep the ETH header at skb->data, and pass down data_offset = ETH_HLEN to qeth_fill_buffer(). When mapping the payload into the SBAL elements, the ETH header is then stripped off. This avoids the skb manipulations in qeth_l3_fill_af_iucv_hdr(), and any buffer re-entering hard_start_xmit() after NETDEV_TX_BUSY is now processed properly. Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 72aa9537a725..653f0fb76573 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2609,17 +2609,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card, char daddr[16]; struct af_iucv_trans_hdr *iucv_hdr; - skb_pull(skb, 14); - card->dev->header_ops->create(skb, card->dev, 0, - card->dev->dev_addr, card->dev->dev_addr, - card->dev->addr_len); - skb_pull(skb, 14); - iucv_hdr = (struct af_iucv_trans_hdr *)skb->data; memset(hdr, 0, sizeof(struct qeth_hdr)); hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; hdr->hdr.l3.ext_flags = 0; - hdr->hdr.l3.length = skb->len; + hdr->hdr.l3.length = skb->len - ETH_HLEN; hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST; + + iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN); memset(daddr, 0, sizeof(daddr)); daddr[0] = 0xfe; daddr[1] = 0x80; @@ -2823,10 +2819,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((card->info.type == QETH_CARD_TYPE_IQD) && !skb_is_nonlinear(skb)) { new_skb = skb; - if (new_skb->protocol == ETH_P_AF_IUCV) - data_offset = 0; - else - data_offset = ETH_HLEN; + data_offset = ETH_HLEN; hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); if (!hdr) goto tx_drop; From 90b14dc731b1b4aac8ba01850b530bf7ba26462f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 23 Mar 2017 14:55:10 +0100 Subject: [PATCH 18/82] MAINTAINERS: add Julian Wiedmann Add Julian Wiedmann as additional maintainer for drivers/s390/net and net/iucv. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c45c02bc6082..e48678d15401 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10808,6 +10808,7 @@ F: drivers/s390/block/dasd* F: block/partitions/ibm.c S390 NETWORK DRIVERS +M: Julian Wiedmann M: Ursula Braun L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ @@ -10838,6 +10839,7 @@ S: Supported F: drivers/s390/scsi/zfcp_* S390 IUCV NETWORK LAYER +M: Julian Wiedmann M: Ursula Braun L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ From a5af83925363eb85d467933e3d6ec5a87001eb7c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Mar 2017 17:07:26 +0100 Subject: [PATCH 19/82] bna: avoid writing uninitialized data into hw registers The latest gcc-7 snapshot warns about bfa_ioc_send_enable/bfa_ioc_send_disable writing undefined values into the hardware registers: drivers/net/ethernet/brocade/bna/bfa_ioc.c: In function 'bfa_iocpf_sm_disabling_entry': arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+4)' is used uninitialized in this function [-Werror=uninitialized] arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+8)' is used uninitialized in this function [-Werror=uninitialized] The two functions look like they should do the same thing, but only one of them initializes the time stamp and clscode field. The fact that we only get a warning for one of the two functions seems to be arbitrary, based on the inlining decisions in the compiler. To address this, I'm making both functions do the same thing: - set the clscode from the ioc structure in both - set the time stamp from ktime_get_real_seconds (which also avoids the signed-integer overflow in 2038 and extends the well-defined behavior until 2106). - zero-fill the reserved field Fixes: 8b230ed8ec96 ("bna: Brocade 10Gb Ethernet device driver") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 9e59663a6ead..0f6811860ad5 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1930,13 +1930,13 @@ static void bfa_ioc_send_enable(struct bfa_ioc *ioc) { struct bfi_ioc_ctrl_req enable_req; - struct timeval tv; bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ, bfa_ioc_portid(ioc)); enable_req.clscode = htons(ioc->clscode); - do_gettimeofday(&tv); - enable_req.tv_sec = ntohl(tv.tv_sec); + enable_req.rsvd = htons(0); + /* overflow in 2106 */ + enable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req)); } @@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc) bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ, bfa_ioc_portid(ioc)); + disable_req.clscode = htons(ioc->clscode); + disable_req.rsvd = htons(0); + /* overflow in 2106 */ + disable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req)); } From a80db69e47d764bbcaf2fec54b1f308925e7c490 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 23 Mar 2017 11:03:31 -0700 Subject: [PATCH 20/82] kcm: return immediately after copy_from_user() failure There is no reason to continue after a copy_from_user() failure. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 309062f3debe..31762f76cdb5 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_attach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_attach_ioctl(sock, &info); @@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_unattach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_unattach_ioctl(sock, &info); @@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct socket *newsock = NULL; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_clone(sock, &info, &newsock); From 9f47a48e6eb97d793db85373e3ef4c55d876d334 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 23 Mar 2017 20:47:15 -0700 Subject: [PATCH 21/82] Revert "e1000e: driver trying to free already-free irq" This reverts commit 7e54d9d063fa239c95c21548c5267f0ef419ff56. After additional regression testing, several users are experiencing kernel panics during shutdown on e1000e devices. Reverting this change resolves the issue. Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 2175cced402f..e9af89ad039c 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6274,8 +6274,8 @@ static int e1000e_pm_freeze(struct device *dev) /* Quiesce the device without resetting the hardware */ e1000e_down(adapter, false); e1000_free_irq(adapter); - e1000e_reset_interrupt_capability(adapter); } + e1000e_reset_interrupt_capability(adapter); /* Allow time for pending master requests to run */ e1000e_disable_pcie_master(&adapter->hw); From 95f255211396958c718aef8c45e3923b5211ea7b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 09:38:03 -0700 Subject: [PATCH 22/82] net: Do not allow negative values for busy_read and busy_poll sysctl interfaces This change basically codifies what I think was already the limitations on the busy_poll and busy_read sysctl interfaces. We weren't checking the lower bounds and as such could input negative values. The behavior when that was used was dependent on the architecture. In order to prevent any issues with that I am just disabling support for values less than 0 since this way we don't have to worry about any odd behaviors. By limiting the sysctl values this way it also makes it consistent with how we handle the SO_BUSY_POLL socket option since the value appears to be reported as a signed integer value and negative values are rejected. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4ead336e14ea..7f9cc400eca0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, #endif #ifdef CONFIG_NET_SCHED From 28ee1b746f493b7c62347d714f58fbf4f70df4f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Mar 2017 19:42:37 +0100 Subject: [PATCH 23/82] secure_seq: downgrade to per-host timestamp offsets Unfortunately too many devices (not under our control) use tcp_tw_recycle=1, which depends on timestamps being identical of the same saddr. Although tcp_tw_recycle got removed in net-next we can't make such end hosts disappear so downgrade to per-host timestamp offsets. Cc: Soheil Hassas Yeganeh Cc: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Reported-by: Yvan Vanrossomme Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/secure_seq.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 758f140b6bed..d28da7d363f1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -20,9 +20,11 @@ #include static siphash_key_t net_secret __read_mostly; +static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { + net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } #endif @@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) +static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +{ + const struct { + struct in6_addr saddr; + struct in6_addr daddr; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .daddr = *(struct in6_addr *)daddr, + }; + + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash(&combined, offsetofend(typeof(combined), daddr), + &ts_secret); +} + u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport, u32 *tsoff) { @@ -63,7 +82,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } EXPORT_SYMBOL(secure_tcpv6_sequence_number); @@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET +static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +{ + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash_2u32((__force u32)saddr, (__force u32)daddr, + &ts_secret); +} /* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, @@ -103,7 +130,7 @@ u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } From 13a8cd191a2b470cfd435b3b57dbd21aa65ff78c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 15:01:42 -0700 Subject: [PATCH 24/82] i40e: Do not enable NAPI on q_vectors that have no rings When testing the epoll w/ busy poll code I found that I could get into a state where the i40e driver had q_vectors w/ active NAPI that had no rings. This was resulting in a divide by zero error. To correct it I am updating the driver code so that we only support NAPI on q_vectors that have 1 or more rings allocated to them. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e8a8351c8ea9..82a95cc2c8ee 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4438,8 +4438,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_enable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_enable(&q_vector->napi); + } } /** @@ -4453,8 +4457,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_disable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_disable(&q_vector->napi); + } } /** From 43a6684519ab0a6c52024b5e25322476cabad893 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Mar 2017 19:36:13 -0700 Subject: [PATCH 25/82] ping: implement proper locking We got a report of yet another bug in ping http://www.openwall.com/lists/oss-security/2017/03/24/6 ->disconnect() is not called with socket lock held. Fix this by acquiring ping rwlock earlier. Thanks to Daniel, Alexander and Andrey for letting us know this problem. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Reported-by: Daniel Jiang Reported-by: Solar Designer Reported-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/ipv4/ping.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2af6244b83e2..ccfbce13a633 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -156,17 +156,18 @@ int ping_hash(struct sock *sk) void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + write_lock_bh(&ping_table.lock); if (sk_hashed(sk)) { - write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&ping_table.lock); } + write_unlock_bh(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); From b1977682a3858b5584ffea7cfb7bd863f68db18d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 24 Mar 2017 15:57:33 -0700 Subject: [PATCH 26/82] bpf: improve verifier packet range checks llvm can optimize the 'if (ptr > data_end)' checks to be in the order slightly different than the original C code which will confuse verifier. Like: if (ptr + 16 > data_end) return TC_ACT_SHOT; // may be followed by if (ptr + 14 > data_end) return TC_ACT_SHOT; while llvm can see that 'ptr' is valid for all 16 bytes, the verifier could not. Fix verifier logic to account for such case and add a test. Reported-by: Huapeng Zhou Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++-- tools/testing/selftests/bpf/test_verifier.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 796b68d00119..5e6202e62265 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1973,14 +1973,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) - regs[i].range = dst_reg->off; + /* keep the maximum range already checked */ + regs[i].range = max(regs[i].range, dst_reg->off); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) - reg->range = dst_reg->off; + reg->range = max(reg->range, dst_reg->off); } } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d1555e4240c0..7d761d4cc759 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3417,6 +3417,26 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_LWT_XMIT, }, + { + "overlapping checks for direct packet access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, + }, { "invalid access of tc_classid for LWT_IN", .insns = { From a17f1861b5ea4327f9f35e9edb3c5fadceaa7c64 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 24 Mar 2017 23:02:49 +0100 Subject: [PATCH 27/82] net: hns: fix uninitialized data use When dev_dbg() is enabled, we print uninitialized data, as gcc-7.0.1 now points out: ethernet/hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_promisc_tcam': ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.low.val' may be used uninitialized in this function [-Werror=maybe-uninitialized] ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.high.val' may be used uninitialized in this function [-Werror=maybe-uninitialized] We also pass the data into hns_dsaf_tcam_mc_cfg(), which might later use it (not sure about that), so it seems safer to just always initialize the tbl_tcam_data structure. Fixes: 1f5fa2dd1cfa ("net: hns: fix for promisc mode in HNS driver") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 90dbda792614..cd93657abe87 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2924,10 +2924,11 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, /* find the tcam entry index for promisc */ entry_index = dsaf_promisc_tcam_entry(port); + memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); + memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); + /* config key mask */ if (enable) { - memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); - memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); dsaf_set_field(tbl_tcam_data.low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, DSAF_TBL_TCAM_KEY_PORT_S, port); From 834a61d455ff8069552f44140b2c1de85e6bc84d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 24 Mar 2017 23:02:50 +0100 Subject: [PATCH 28/82] net: hns: avoid gcc-7.0.1 warning for uninitialized data hns_dsaf_set_mac_key() calls dsaf_set_field() on an uninitialized field, which will then change only a few of its bits, causing a warning with the latest gcc: hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_uc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] (origin) &= (~(mask)); \ ^~ hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_mc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_add_mac_mc_port': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_rm_mac_addr': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_mc_port': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_uc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_mc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] The code is actually correct since we always set all 16 bits of the port_vlan field, but gcc correctly points out that the first access does contain uninitialized data. This initializes the field to zero first before setting the individual bits. Fixes: 5483bfcb169c ("net: hns: modify tcam table and set mac key") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index cd93657abe87..403ea9db6dbd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -1519,6 +1519,7 @@ static void hns_dsaf_set_mac_key( mac_key->high.bits.mac_3 = addr[3]; mac_key->low.bits.mac_4 = addr[4]; mac_key->low.bits.mac_5 = addr[5]; + mac_key->low.bits.port_vlan = 0; dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M, DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id); dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, From 6ac3b77a6ffff7513ff86b684aa256ea01c0e5b5 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 25 Mar 2017 01:48:08 +0300 Subject: [PATCH 29/82] irda: vlsi_ir: fix check for DMA mapping errors vlsi_alloc_ring() checks for DMA mapping errors by comparing returned address with zero, while pci_dma_mapping_error() should be used. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/irda/vlsi_ir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index ffedad2a360a..15b920086251 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr memset(rd, 0, sizeof(*rd)); rd->hw = hwmap + i; rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA); - if (rd->buf == NULL || - !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) { + if (rd->buf) + busaddr = pci_map_single(pdev, rd->buf, len, dir); + if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) { if (rd->buf) { net_err_ratelimited("%s: failed to create PCI-MAP for %p\n", __func__, rd->buf); @@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr rd = r->rd + j; busaddr = rd_get_addr(rd); rd_set_addr_status(rd, 0, 0); - if (busaddr) - pci_unmap_single(pdev, busaddr, len, dir); + pci_unmap_single(pdev, busaddr, len, dir); kfree(rd->buf); rd->buf = NULL; } From 3b7dabf029478bb80507a6c4500ca94132a2bc0b Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 08:53:12 +0800 Subject: [PATCH 30/82] netfilter: invoke synchronize_rcu after set the _hook_ to NULL Otherwise, another CPU may access the invalid pointer. For example: CPU0 CPU1 - rcu_read_lock(); - pfunc = _hook_; _hook_ = NULL; - mod unload - - pfunc(); // invalid, panic - rcu_read_unlock(); So we must call synchronize_rcu() to wait the rcu reader to finish. Also note, in nf_nat_snmp_basic_fini, synchronize_rcu() will be invoked by later nf_conntrack_helper_unregister, but I'm inclined to add a explicit synchronize_rcu after set the nf_nat_snmp_hook to NULL. Depend on such obscure assumptions is not a good idea. Last, in nfnetlink_cttimeout, we use kfree_rcu to free the time object, so in cttimeout_exit, invoking rcu_barrier() is not necessary at all, remove it too. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 + net/netfilter/nf_conntrack_ecache.c | 2 ++ net/netfilter/nf_conntrack_netlink.c | 1 + net/netfilter/nf_nat_core.c | 2 ++ net/netfilter/nfnetlink_cttimeout.c | 2 +- 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index c9b52c361da2..5a8f7c360887 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index da9df2d56e66..22fc32143e9c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6806b5e73567..908d858034e4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3442,6 +3442,7 @@ static void __exit ctnetlink_exit(void) #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT RCU_INIT_POINTER(nfnl_ct_hook, NULL); #endif + synchronize_rcu(); } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 94b14c5a8b17..82802e4a6640 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void) #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif + synchronize_rcu(); + for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 139e0867e56e..47d6656c9119 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void) #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + synchronize_rcu(); #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - rcu_barrier(); } module_init(cttimeout_init); From 83d90219a5df8d950855ce73229a97b63605c317 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 12:09:15 +0800 Subject: [PATCH 31/82] netfilter: nfnl_cthelper: fix a race when walk the nf_ct_helper_hash table The nf_ct_helper_hash table is protected by nf_ct_helper_mutex, while nfct_helper operation is protected by nfnl_lock(NFNL_SUBSYS_CTHELPER). So it's possible that one CPU is walking the nf_ct_helper_hash for cthelper add/get/del, another cpu is doing nf_conntrack_helpers_unregister at the same time. This is dangrous, and may cause use after free error. Note, delete operation will flush all cthelpers added via nfnetlink, so using rcu to do protect is not easy. Now introduce a dummy list to record all the cthelpers added via nfnetlink, then we can walk the dummy list instead of walking the nf_ct_helper_hash. Also, keep nfnl_cthelper_dump_table unchanged, it may be invoked without nfnl_lock(NFNL_SUBSYS_CTHELPER) held. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 181 +++++++++++++---------------- 1 file changed, 83 insertions(+), 98 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 2b987d2a77bc..d45558178da5 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -32,6 +32,13 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); +struct nfnl_cthelper { + struct list_head list; + struct nf_conntrack_helper helper; +}; + +static LIST_HEAD(nfnl_cthelper_list); + static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -205,14 +212,16 @@ nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; + struct nfnl_cthelper *nfcth; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); - if (helper == NULL) + nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); + if (nfcth == NULL) return -ENOMEM; + helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) @@ -249,11 +258,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (ret < 0) goto err2; + list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; err2: kfree(helper->expect_policy); err1: - kfree(helper); + kfree(nfcth); return ret; } @@ -379,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - int ret = 0, i; + struct nfnl_cthelper *nlcth; + int ret = 0; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -390,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, if (ret < 0) return ret; - rcu_read_lock(); - for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) - continue; + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if ((tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_EXCL) { - ret = -EEXIST; - goto err; - } - helper = cur; - break; - } + helper = cur; + break; } - rcu_read_unlock(); if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); @@ -422,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, ret = nfnl_cthelper_update(tb, helper); return ret; -err: - rcu_read_unlock(); - return ret; } static int @@ -588,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - int ret = -ENOENT, i; + int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; + struct nfnl_cthelper *nlcth; bool tuple_set = false; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -613,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; - - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { - ret = -ENOMEM; - break; - } - - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), - NFNL_MSG_CTHELPER_NEW, cur); - if (ret <= 0) { - kfree_skb(skb2); - break; - } - - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; - - /* this avoids a loop in nfnetlink. */ - return ret == -EAGAIN ? -ENOBUFS : ret; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; } + + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; } return ret; } @@ -662,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; - int i, j = 0, ret; + struct nfnl_cthelper *nlcth, *n; + int j = 0, ret; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); @@ -678,30 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + j++; - j++; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - found = true; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); - kfree(cur); - } + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + + list_del(&nlcth->list); + kfree(nlcth); } + /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : -ENOENT; } @@ -750,22 +741,16 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *tmp; - int i; + struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); - for (i=0; iflags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); - kfree(cur); - } + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(nlcth); } } From 9c3f3794926a997b1cab6c42480ff300efa2d162 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 16:35:29 +0800 Subject: [PATCH 32/82] netfilter: nf_ct_ext: fix possible panic after nf_ct_extend_unregister If one cpu is doing nf_ct_extend_unregister while another cpu is doing __nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover, there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to NULL, so it's possible that we may access invalid pointer. But actually, most of the ct extends are built-in, so the problem listed above will not happen. However, there are two exceptions: NF_CT_EXT_NAT and NF_CT_EXT_SYNPROXY. For _EXT_NAT, the panic will not happen, since adding the nat extend and unregistering the nat extend are located in the same file(nf_nat_core.c), this means that after the nat module is removed, we cannot add the nat extend too. For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while synproxy extend unregister will be done by synproxy_core_exit. So after nf_synproxy_core.ko is removed, we may still try to add the synproxy extend, then kernel panic may happen. I know it's very hard to reproduce this issue, but I can play a tricky game to make it happen very easily :) Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook: # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook. Also note, in the userspace we only add a 20s' delay, then reinject the syn packet to the kernel: # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1 Step 3. Using "nc 2.2.2.2 1234" to connect the server. Step 4. Now remove the nf_synproxy_core.ko quickly: # iptables -F FORWARD # rmmod ipt_SYNPROXY # rmmod nf_synproxy_core Step 5. After 20s' delay, the syn packet is reinjected to the kernel. Now you will see the panic like this: kernel BUG at net/netfilter/nf_conntrack_extend.c:91! Call Trace: ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack] init_conntrack+0x12b/0x600 [nf_conntrack] nf_conntrack_in+0x4cc/0x580 [nf_conntrack] ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4] nf_reinject+0x104/0x270 nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue] ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue] ? nla_parse+0xa0/0x100 nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink] [...] One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e. introduce nf_conntrack_synproxy.c and only do ct extend register and unregister in it, similar to nf_conntrack_timeout.c. But having such a obscure restriction of nf_ct_extend_unregister is not a good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then it will be easier if we add new ct extend in the future. Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary anymore, remove it too. Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_extend.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 02bcf00c2492..008299b7f78f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } + off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len + var_alloc_len; alloc_size = t->alloc_size + var_alloc_len; @@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } newoff = ALIGN(old->len, t->align); newlen = newoff + t->len + var_alloc_len; @@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - rcu_barrier(); /* Wait for completion of call_rcu()'s */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); From 75c689dca98851d65ef5a27e5ce26b625b68751c Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 25 Mar 2017 18:24:36 +0800 Subject: [PATCH 33/82] netfilter: nf_nat_snmp: Fix panic when snmp_trap_helper fails to register In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the snmp_helper is never registered. But it still tries to unregister the snmp_helper, it could cause the panic. Now remove the useless snmp_helper and the unregister call in the error handler. Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper") Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5a8f7c360887..53e49f5011d3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .me = THIS_MODULE, .help = help, @@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { static int __init nf_nat_snmp_basic_init(void) { - int ret = 0; - BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - ret = nf_conntrack_helper_register(&snmp_trap_helper); - if (ret < 0) { - nf_conntrack_helper_unregister(&snmp_helper); - return ret; - } - return ret; + return nf_conntrack_helper_register(&snmp_trap_helper); } static void __exit nf_nat_snmp_basic_fini(void) From 248ccd5ee644fcf68984d945cffcdc364992ddbf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 27 Mar 2017 10:48:11 -0700 Subject: [PATCH 34/82] MAINTAINERS: Add Andrew Lunn as co-maintainer of PHYLIB Andrew has been contributing a lot to PHYLIB over the past months and his feedback on patches is more than welcome. Signed-off-by: Florian Fainelli Acked-by: Andrew Lunn Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e48678d15401..9975dcefc372 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4922,6 +4922,7 @@ F: include/linux/netfilter_bridge/ F: net/bridge/ ETHERNET PHY LIBRARY +M: Andrew Lunn M: Florian Fainelli L: netdev@vger.kernel.org S: Maintained From ffefb6f4d6ad699a2b5484241bc46745a53235d0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Mar 2017 18:00:14 +0100 Subject: [PATCH 35/82] net: ipconfig: fix ic_close_devs() use-after-free Our chosen ic_dev may be anywhere in our list of ic_devs, and we may free it before attempting to close others. When we compare d->dev and ic_dev->dev, we're potentially dereferencing memory returned to the allocator. This causes KASAN to scream for each subsequent ic_dev we check. As there's a 1-1 mapping between ic_devs and netdevs, we can instead compare d and ic_dev directly, which implicitly handles the !ic_dev case, and avoids the use-after-free. The ic_dev pointer may be stale, but we will not dereference it. Original splat: [ 6.487446] ================================================================== [ 6.494693] BUG: KASAN: use-after-free in ic_close_devs+0xc4/0x154 at addr ffff800367efa708 [ 6.503013] Read of size 8 by task swapper/0/1 [ 6.507452] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-00002-gda42158 #8 [ 6.514993] Hardware name: AppliedMicro Mustang/Mustang, BIOS 3.05.05-beta_rc Jan 27 2016 [ 6.523138] Call trace: [ 6.525590] [] dump_backtrace+0x0/0x570 [ 6.530976] [] show_stack+0x20/0x30 [ 6.536017] [] dump_stack+0x120/0x188 [ 6.541231] [] kasan_object_err+0x24/0xa0 [ 6.546790] [] kasan_report_error+0x244/0x738 [ 6.552695] [] __asan_report_load8_noabort+0x54/0x80 [ 6.559204] [] ic_close_devs+0xc4/0x154 [ 6.564590] [] ip_auto_config+0x2ed4/0x2f1c [ 6.570321] [] do_one_initcall+0xcc/0x370 [ 6.575882] [] kernel_init_freeable+0x5f8/0x6c4 [ 6.581959] [] kernel_init+0x18/0x190 [ 6.587171] [] ret_from_fork+0x10/0x40 [ 6.592468] Object at ffff800367efa700, in cache kmalloc-128 size: 128 [ 6.598969] Allocated: [ 6.601324] PID = 1 [ 6.603427] save_stack_trace_tsk+0x0/0x418 [ 6.607603] save_stack_trace+0x20/0x30 [ 6.611430] kasan_kmalloc+0xd8/0x188 [ 6.615087] ip_auto_config+0x8c4/0x2f1c [ 6.619002] do_one_initcall+0xcc/0x370 [ 6.622832] kernel_init_freeable+0x5f8/0x6c4 [ 6.627178] kernel_init+0x18/0x190 [ 6.630660] ret_from_fork+0x10/0x40 [ 6.634223] Freed: [ 6.636233] PID = 1 [ 6.638334] save_stack_trace_tsk+0x0/0x418 [ 6.642510] save_stack_trace+0x20/0x30 [ 6.646337] kasan_slab_free+0x88/0x178 [ 6.650167] kfree+0xb8/0x478 [ 6.653131] ic_close_devs+0x130/0x154 [ 6.656875] ip_auto_config+0x2ed4/0x2f1c [ 6.660875] do_one_initcall+0xcc/0x370 [ 6.664705] kernel_init_freeable+0x5f8/0x6c4 [ 6.669051] kernel_init+0x18/0x190 [ 6.672534] ret_from_fork+0x10/0x40 [ 6.676098] Memory state around the buggy address: [ 6.680880] ffff800367efa600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.688078] ffff800367efa680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 6.695276] >ffff800367efa700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 6.702469] ^ [ 6.705952] ffff800367efa780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 6.713149] ffff800367efa800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 6.720343] ================================================================== [ 6.727536] Disabling lock debugging due to kernel taint Signed-off-by: Mark Rutland Cc: Alexey Kuznetsov Cc: David S. Miller Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Patrick McHardy Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index fd9f34bbd740..dfb2ab2dd3c8 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { + if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } From f9ba3501d50317697811ff3c48f623f08d616fc8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 27 Mar 2017 00:21:15 +0800 Subject: [PATCH 36/82] sctp: change to save MSG_MORE flag into assoc David Laight noticed the support for MSG_MORE with datamsg->force_delay didn't really work as we expected, as the first msg with MSG_MORE set would always block the following chunks' dequeuing. This Patch is to rewrite it by saving the MSG_MORE flag into assoc as David Laight suggested. asoc->force_delay is used to save MSG_MORE flag before a msg is sent. All chunks in queue would not be sent out if asoc->force_delay is set by the msg with MSG_MORE flag, until a new msg without MSG_MORE flag clears asoc->force_delay. Note that this change would not affect the flush is generated by other triggers, like asoc->state != ESTABLISHED, queue size > pmtu etc. v1->v2: Not clear asoc->force_delay after sending the msg with MSG_MORE flag. Fixes: 4ea0c32f5f42 ("sctp: add support for MSG_MORE") Signed-off-by: Xin Long Acked-by: David Laight Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/output.c | 2 +- net/sctp/socket.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 592decebac75..8caa5ee9e290 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -499,7 +499,6 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - force_delay:1, can_delay; /* should this message be Nagle delayed */ }; @@ -1878,6 +1877,7 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ + force_delay:1, prsctp_enable:1, reconf_enable:1; diff --git a/net/sctp/output.c b/net/sctp/output.c index 1224421036b3..73fd178007a3 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -704,7 +704,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, */ if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && - !chunk->msg->force_delay) + !asoc->force_delay) /* Nothing unacked */ return SCTP_XMIT_OK; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0f378ea2ae38..baa269a0d52e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) err = PTR_ERR(datamsg); goto out_free; } - datamsg->force_delay = !!(msg->msg_flags & MSG_MORE); + asoc->force_delay = !!(msg->msg_flags & MSG_MORE); /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { From af109a2cf6a9a6271fa420ae2d64d72d86c92b7d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 12:11:07 +0200 Subject: [PATCH 37/82] isdn: kcapi: avoid uninitialized data gcc-7 points out that the AVMB1_ADDCARD ioctl results in an unintialized value ending up in the cardnr parameter: drivers/isdn/capi/kcapi.c: In function 'old_capi_manufacturer': drivers/isdn/capi/kcapi.c:1042:24: error: 'cdef.cardnr' may be used uninitialized in this function [-Werror=maybe-uninitialized] cparams.cardnr = cdef.cardnr; This has been broken since before the start of the git history, so either the value is not used for anything important, or the ioctl command doesn't get called in practice. Setting the cardnr to zero avoids the warning and makes sure we have consistent behavior. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/isdn/capi/kcapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 1dfd1085a04f..9ca691d6c13b 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) sizeof(avmb1_carddef)))) return -EFAULT; cdef.cardtype = AVM_CARDTYPE_B1; + cdef.cardnr = 0; } else { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_extcarddef)))) From c2b341a620018d4eaeb0e85c16274ac4e5f153d4 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 28 Mar 2017 12:12:38 +0200 Subject: [PATCH 38/82] net: moxa: fix TX overrun memory leak moxart_mac_start_xmit() doesn't care where tx_tail is, tx_head can catch and pass tx_tail, which is bad because moxart_tx_finished() isn't guaranteed to catch up on freeing resources from tx_tail. Add a check in moxart_mac_start_xmit() stopping the queue at the end of the circular buffer. Also add a check in moxart_tx_finished() waking the queue if the buffer has TX_WAKE_THRESHOLD or more free descriptors. While we're at it, move spin_lock_irq() to happen before our descriptor pointer is assigned in moxart_mac_start_xmit(). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=99451 Signed-off-by: Jonas Jensen Signed-off-by: David S. Miller --- drivers/net/ethernet/moxa/moxart_ether.c | 20 ++++++++++++++++++-- drivers/net/ethernet/moxa/moxart_ether.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 06c9f4100cb9..6ad44be08b33 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "moxart_ether.h" @@ -278,6 +279,13 @@ rx_next: return rx; } +static int moxart_tx_queue_space(struct net_device *ndev) +{ + struct moxart_mac_priv_t *priv = netdev_priv(ndev); + + return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM); +} + static void moxart_tx_finished(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); @@ -297,6 +305,9 @@ static void moxart_tx_finished(struct net_device *ndev) tx_tail = TX_NEXT(tx_tail); } priv->tx_tail = tx_tail; + if (netif_queue_stopped(ndev) && + moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD) + netif_wake_queue(ndev); } static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id) @@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct moxart_mac_priv_t *priv = netdev_priv(ndev); void *desc; unsigned int len; - unsigned int tx_head = priv->tx_head; + unsigned int tx_head; u32 txdes1; int ret = NETDEV_TX_BUSY; + spin_lock_irq(&priv->txlock); + + tx_head = priv->tx_head; desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head); - spin_lock_irq(&priv->txlock); + if (moxart_tx_queue_space(ndev) == 1) + netif_stop_queue(ndev); + if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) { net_dbg_ratelimited("no TX space for packet\n"); priv->stats.tx_dropped++; diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h index 93a9563ac7c6..afc32ec998c0 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.h +++ b/drivers/net/ethernet/moxa/moxart_ether.h @@ -59,6 +59,7 @@ #define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM_MASK)) #define TX_BUF_SIZE 1600 #define TX_BUF_SIZE_MAX (TX_DESC1_BUF_SIZE_MASK+1) +#define TX_WAKE_THRESHOLD 16 #define RX_DESC_NUM 64 #define RX_DESC_NUM_MASK (RX_DESC_NUM-1) From e497ec680c4cd51e76bfcdd49363d9ab8d32a757 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Tue, 28 Mar 2017 16:13:41 +0300 Subject: [PATCH 39/82] net/mlx5: Avoid dereferencing uninitialized pointer In NETDEV_CHANGEUPPER event the upper_info field is valid only when linking is true. Otherwise it should be ignored. Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature) Signed-off-by: Talat Batheesh Reviewed-by: Aviv Heller Reviewed-by: Moni Shoua Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 55957246c0e8..b5d5519542e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -294,7 +294,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, struct netdev_notifier_changeupper_info *info) { struct net_device *upper = info->upper_dev, *ndev_tmp; - struct netdev_lag_upper_info *lag_upper_info; + struct netdev_lag_upper_info *lag_upper_info = NULL; bool is_bonded; int bond_status = 0; int num_slaves = 0; @@ -303,7 +303,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, if (!netif_is_lag_master(upper)) return 0; - lag_upper_info = info->upper_info; + if (info->linking) + lag_upper_info = info->upper_info; /* The event may still be of interest if the slave does not belong to * us, but is enslaved to a master which has one or more of our netdevs From 16b8b6de32572207abeb4dfb74ab7bd8409a5690 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 16:11:18 +0200 Subject: [PATCH 40/82] rocker: fix Wmaybe-uninitialized false-positive gcc-7 reports a warning that earlier versions did not have: drivers/net/ethernet/rocker/rocker_ofdpa.c: In function 'ofdpa_port_stp_update': arch/x86/include/asm/string_32.h:79:22: error: '*((void *)&prev_ctrls+4)' may be used uninitialized in this function [-Werror=maybe-uninitialized] *((short *)to + 2) = *((short *)from + 2); ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/rocker/rocker_ofdpa.c:2218:7: note: '*((void *)&prev_ctrls+4)' was declared here This is clearly a variation of the warning about 'prev_state' that was shut up using uninitialized_var(). We can slightly simplify the code and get rid of the warning by unconditionally saving the prev_state and prev_ctrls variables. The inlined memcpy is not particularly expensive here, as it just has to read five bytes from one or two cache lines. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 7cd76b6b5cb9..2ae852454780 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2216,18 +2216,15 @@ static int ofdpa_port_stp_update(struct ofdpa_port *ofdpa_port, { bool want[OFDPA_CTRL_MAX] = { 0, }; bool prev_ctrls[OFDPA_CTRL_MAX]; - u8 uninitialized_var(prev_state); + u8 prev_state; int err; int i; - if (switchdev_trans_ph_prepare(trans)) { - memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls)); - prev_state = ofdpa_port->stp_state; - } - - if (ofdpa_port->stp_state == state) + prev_state = ofdpa_port->stp_state; + if (prev_state == state) return 0; + memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls)); ofdpa_port->stp_state = state; switch (state) { From b768b16de58d5e0b1d7c3f936825b25327ced20c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 28 Mar 2017 11:25:26 -0700 Subject: [PATCH 41/82] openvswitch: Fix refcount leak on force commit. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference count held for skb needs to be released when the skb's nfct pointer is cleared regardless of if nf_ct_delete() is called or not. Failing to release the skb's reference cound led to deferred conntrack cleanup spinning forever within nf_conntrack_cleanup_net_list() when cleaning up a network namespace:    kworker/u16:0-19025 [004] 45981067.173642: sched_switch: kworker/u16:0:19025 [120] R ==> rcu_preempt:7 [120]    kworker/u16:0-19025 [004] 45981067.173651: kernel_stack: => ___preempt_schedule (ffffffffa001ed36) => _raw_spin_unlock_bh (ffffffffa0713290) => nf_ct_iterate_cleanup (ffffffffc00a4454) => nf_conntrack_cleanup_net_list (ffffffffc00a5e1e) => nf_conntrack_pernet_exit (ffffffffc00a63dd) => ops_exit_list.isra.1 (ffffffffa06075f3) => cleanup_net (ffffffffa0607df0) => process_one_work (ffffffffa0084c31) => worker_thread (ffffffffa008592b) => kthread (ffffffffa008bee2) => ret_from_fork (ffffffffa071b67c) Fixes: dd41d33f0b03 ("openvswitch: Add force commit.") Reported-by: Yang Song Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e0a87776a010..7b2c2fce408a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net, */ if (nf_ct_is_confirmed(ct)) nf_ct_delete(ct, 0, 0); - else - nf_conntrack_put(&ct->ct_general); + + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, 0); return false; } From b3ef5520c1eabb56064474043c7c55a1a65b8708 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 28 Mar 2017 09:11:31 +0100 Subject: [PATCH 42/82] cfg80211: check rdev resume callback only for registered wiphy We got the following use-after-free KASAN report: BUG: KASAN: use-after-free in wiphy_resume+0x591/0x5a0 [cfg80211] at addr ffff8803fc244090 Read of size 8 by task kworker/u16:24/2587 CPU: 6 PID: 2587 Comm: kworker/u16:24 Tainted: G B 4.9.13-debug+ Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 1.2.19 12/22/2016 Workqueue: events_unbound async_run_entry_fn ffff880425d4f9d8 ffffffffaeedb541 ffff88042b80ef00 ffff8803fc244088 ffff880425d4fa00 ffffffffae84d7a1 ffff880425d4fa98 ffff8803fc244080 ffff88042b80ef00 ffff880425d4fa88 ffffffffae84da3a ffffffffc141f7d9 Call Trace: [] dump_stack+0x85/0xc4 [] kasan_object_err+0x21/0x70 [] kasan_report_error+0x1fa/0x500 [] ? cfg80211_bss_age+0x39/0xc0 [cfg80211] [] ? cfg80211_bss_age+0x9a/0xc0 [cfg80211] [] ? trace_hardirqs_on+0xd/0x10 [] ? wiphy_suspend+0xc70/0xc70 [cfg80211] [] __asan_report_load8_noabort+0x61/0x70 [] ? wiphy_suspend+0xbb0/0xc70 [cfg80211] [] ? wiphy_resume+0x591/0x5a0 [cfg80211] [] wiphy_resume+0x591/0x5a0 [cfg80211] [] ? wiphy_suspend+0xc70/0xc70 [cfg80211] [] dpm_run_callback+0x6e/0x4f0 [] device_resume+0x1c2/0x670 [] async_resume+0x1d/0x50 [] async_run_entry_fn+0xfe/0x610 [] process_one_work+0x716/0x1a50 [] ? process_one_work+0x679/0x1a50 [] ? _raw_spin_unlock_irq+0x3d/0x60 [] ? pwq_dec_nr_in_flight+0x2b0/0x2b0 [] worker_thread+0xe0/0x1460 [] ? process_one_work+0x1a50/0x1a50 [] kthread+0x222/0x2e0 [] ? kthread_park+0x80/0x80 [] ? kthread_park+0x80/0x80 [] ? kthread_park+0x80/0x80 [] ret_from_fork+0x2a/0x40 Object at ffff8803fc244088, in cache kmalloc-1024 size: 1024 Allocated: PID = 71 save_stack_trace+0x1b/0x20 save_stack+0x46/0xd0 kasan_kmalloc+0xad/0xe0 kasan_slab_alloc+0x12/0x20 __kmalloc_track_caller+0x134/0x360 kmemdup+0x20/0x50 brcmf_cfg80211_attach+0x10b/0x3a90 [brcmfmac] brcmf_bus_start+0x19a/0x9a0 [brcmfmac] brcmf_pcie_setup+0x1f1a/0x3680 [brcmfmac] brcmf_fw_request_nvram_done+0x44c/0x11b0 [brcmfmac] request_firmware_work_func+0x135/0x280 process_one_work+0x716/0x1a50 worker_thread+0xe0/0x1460 kthread+0x222/0x2e0 ret_from_fork+0x2a/0x40 Freed: PID = 2568 save_stack_trace+0x1b/0x20 save_stack+0x46/0xd0 kasan_slab_free+0x71/0xb0 kfree+0xe8/0x2e0 brcmf_cfg80211_detach+0x62/0xf0 [brcmfmac] brcmf_detach+0x14a/0x2b0 [brcmfmac] brcmf_pcie_remove+0x140/0x5d0 [brcmfmac] brcmf_pcie_pm_leave_D3+0x198/0x2e0 [brcmfmac] pci_pm_resume+0x186/0x220 dpm_run_callback+0x6e/0x4f0 device_resume+0x1c2/0x670 async_resume+0x1d/0x50 async_run_entry_fn+0xfe/0x610 process_one_work+0x716/0x1a50 worker_thread+0xe0/0x1460 kthread+0x222/0x2e0 ret_from_fork+0x2a/0x40 Memory state around the buggy address: ffff8803fc243f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8803fc244000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8803fc244080: fc fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8803fc244100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8803fc244180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb What is happening is that brcmf_pcie_resume() detects a device that is no longer responsive and it decides to unbind resulting in a wiphy_unregister() and wiphy_free() call. Now the wiphy instance remains allocated, because PM needs to call wiphy_resume() for it. However, brcmfmac already does a kfree() for the struct cfg80211_registered_device::ops field. Change the checks in wiphy_resume() to only access the struct cfg80211_registered_device::ops if the wiphy instance is still registered at this time. Cc: stable@vger.kernel.org # 4.10.x, 4.9.x Reported-by: Daniel J Blueman Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 16b6b5988be9..570a2b67ca10 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev) /* Age scan results with time spent in suspend */ cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); - if (rdev->ops->resume) { - rtnl_lock(); - if (rdev->wiphy.registered) - ret = rdev_resume(rdev); - rtnl_unlock(); - } + rtnl_lock(); + if (rdev->wiphy.registered && rdev->ops->resume) + ret = rdev_resume(rdev); + rtnl_unlock(); return ret; } From 77c1c03c5b8ef28e55bb0aff29b1e006037ca645 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 28 Mar 2017 22:59:25 +0800 Subject: [PATCH 43/82] netfilter: nfnetlink_queue: fix secctx memory leak We must call security_release_secctx to free the memory returned by security_secid_to_secctx, otherwise memory may be leaked forever. Fixes: ef493bd930ae ("netfilter: nfnetlink_queue: add security context information") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3ee0b8a000a4..933509ebf3d3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); - return NULL; + goto nlmsg_failure; } nlh = nlmsg_put(skb, 0, 0, @@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); - return NULL; + goto nlmsg_failure; } nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = entry->state.pf; @@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; + if (seclen) + security_release_secctx(secdata, seclen); return skb; nla_put_failure: skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); +nlmsg_failure: + if (seclen) + security_release_secctx(secdata, seclen); return NULL; } From 7d65f82954dadbbe7b6e1aec7e07ad17bc6d958b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Mar 2017 14:15:24 +0200 Subject: [PATCH 44/82] mac80211: unconditionally start new netdev queues with iTXQ support When internal mac80211 TXQs aren't supported, netdev queues must always started out started even when driver queues are stopped while the interface is added. This is necessary because with the internal TXQ support netdev queues are never stopped and packet scheduling/dropping is done in mac80211. Cc: stable@vger.kernel.org # 4.9+ Fixes: 80a83cfc434b1 ("mac80211: skip netdev queue control with software queuing") Reported-and-tested-by: Sven Eckelmann Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 40813dd3301c..5bb0c5012819 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local); if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + local->ops->wake_tx_queue) { /* XXX: for AP_VLAN, actually track AP queues */ netif_tx_start_all_queues(dev); } else if (dev) { From 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 29 Mar 2017 08:44:59 +0200 Subject: [PATCH 45/82] l2tp: hold tunnel socket when handling control frames in l2tp_ip and l2tp_ip6 The code following l2tp_tunnel_find() expects that a new reference is held on sk. Either sk_receive_skb() or the discard_put error path will drop a reference from the tunnel's socket. This issue exists in both l2tp_ip and l2tp_ip6. Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index d25038cfd64e..7208fbe5856b 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -178,9 +178,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a4abcbc4c09a..516d7ce24ba7 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -191,9 +191,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); From e91793bb615cf6cdd59c0b6749fe173687bb0947 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 29 Mar 2017 08:45:29 +0200 Subject: [PATCH 46/82] l2tp: purge socket queues in the .destruct() callback The Rx path may grab the socket right before pppol2tp_release(), but nothing guarantees that it will enqueue packets before skb_queue_purge(). Therefore, the socket can be destroyed without its queues fully purged. Fix this by purging queues in pppol2tp_session_destruct() where we're guaranteed nothing is still referencing the socket. Fixes: 9e9cb6221aa7 ("l2tp: fix userspace reception on plain L2TP sockets") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 36cc56fd0418..123b6a2411a0 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session) static void pppol2tp_session_destruct(struct sock *sk) { struct l2tp_session *session = sk->sk_user_data; + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); @@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock) l2tp_session_queue_purge(session); sock_put(sk); } - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - release_sock(sk); /* This will delete the session context via From 2247925f0942dc4e7c09b1cde45ca18461d94c5f Mon Sep 17 00:00:00 2001 From: Sankar Patchineelam Date: Tue, 28 Mar 2017 19:47:29 -0400 Subject: [PATCH 47/82] bnxt_en: Fix NULL pointer dereference in reopen failure path Net device reset can fail when the h/w or f/w is in a bad state. Subsequent netdevice open fails in bnxt_hwrm_stat_ctx_alloc(). The cleanup invokes bnxt_hwrm_resource_free() which inturn calls bnxt_disable_int(). In this routine, the code segment if (ring->fw_ring_id != INVALID_HW_RING_ID) BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); results in NULL pointer dereference as cpr->cp_doorbell is not yet initialized, and fw_ring_id is zero. The fix is to initialize cpr fw_ring_id to INVALID_HW_RING_ID before bnxt_init_chip() is invoked. Signed-off-by: Sankar Patchineelam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 32de4589d16a..7ee772483f26 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2455,6 +2455,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr) return 0; } +static void bnxt_init_cp_rings(struct bnxt *bp) +{ + int i; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + + ring->fw_ring_id = INVALID_HW_RING_ID; + } +} + static int bnxt_init_rx_rings(struct bnxt *bp) { int i, rc = 0; @@ -5006,6 +5018,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init) static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init) { + bnxt_init_cp_rings(bp); bnxt_init_rx_rings(bp); bnxt_init_tx_rings(bp); bnxt_init_ring_grps(bp, irq_re_init); From 23e12c893489ed12ecfccbf866fc62af1bead4b0 Mon Sep 17 00:00:00 2001 From: Sankar Patchineelam Date: Tue, 28 Mar 2017 19:47:30 -0400 Subject: [PATCH 48/82] bnxt_en: Correct the order of arguments to netdev_err() in bnxt_set_tpa() Signed-off-by: Sankar Patchineelam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7ee772483f26..6c8568780a6a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4744,7 +4744,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); if (rc) { netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n", - rc, i); + i, rc); return rc; } } From 3ed3a83e3f3871c57b18cef09b148e96921236ed Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 28 Mar 2017 19:47:31 -0400 Subject: [PATCH 49/82] bnxt_en: Fix DMA unmapping of the RX buffers in XDP mode during shutdown. In bnxt_free_rx_skbs(), which is called to free up all RX buffers during shutdown, we need to unmap the page if we are running in XDP mode. Fixes: c61fb99cae51 ("bnxt_en: Add RX page mode support.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6c8568780a6a..1f1e54ba0ecb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1983,20 +1983,25 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) for (j = 0; j < max_idx; j++) { struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j]; + dma_addr_t mapping = rx_buf->mapping; void *data = rx_buf->data; if (!data) continue; - dma_unmap_single(&pdev->dev, rx_buf->mapping, - bp->rx_buf_use_size, bp->rx_dir); - rx_buf->data = NULL; - if (BNXT_RX_PAGE_MODE(bp)) + if (BNXT_RX_PAGE_MODE(bp)) { + mapping -= bp->rx_dma_offset; + dma_unmap_page(&pdev->dev, mapping, + PAGE_SIZE, bp->rx_dir); __free_page(data); - else + } else { + dma_unmap_single(&pdev->dev, mapping, + bp->rx_buf_use_size, + bp->rx_dir); kfree(data); + } } for (j = 0; j < max_agg_idx; j++) { From 358e78b5f445c35f5b7f9241425fb499ee9fe3e2 Mon Sep 17 00:00:00 2001 From: Zakharov Vlad Date: Wed, 29 Mar 2017 13:41:46 +0300 Subject: [PATCH 50/82] ezchip: nps_enet: check if napi has been completed After a new NAPI_STATE_MISSED state was added to NAPI we can get into this state and in such case we have to reschedule NAPI as some work is still pending and we have to process it. napi_complete_done() function returns false if we have to reschedule something (e.g. in case we were in MISSED state) as current polling have not been completed yet. nps_enet driver hasn't been verifying the return value of napi_complete_done() and has been forcibly enabling interrupts. That is not correct as we should not enable interrupts before we have processed all scheduled work. As a result we were getting trapped in interrupt hanlder chain as we had never been able to disabale ethernet interrupts again. So this patch makes nps_enet_poll() func verify return value of napi_complete_done() and enable interrupts only in case all scheduled work has been completed. Signed-off-by: Vlad Zakharov Signed-off-by: David S. Miller --- drivers/net/ethernet/ezchip/nps_enet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 992ebe973d25..f819843e2bae 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -189,11 +189,9 @@ static int nps_enet_poll(struct napi_struct *napi, int budget) nps_enet_tx_handler(ndev); work_done = nps_enet_rx_handler(ndev); - if (work_done < budget) { + if ((work_done < budget) && napi_complete_done(napi, work_done)) { u32 buf_int_enable_value = 0; - napi_complete_done(napi, work_done); - /* set tx_done and rx_rdy bits */ buf_int_enable_value |= NPS_ENET_ENABLE << RX_RDY_SHIFT; buf_int_enable_value |= NPS_ENET_ENABLE << TX_DONE_SHIFT; From 893dc68f1b18451e6d550b1884fc6be76e1bb90c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 21 Mar 2017 09:24:11 -0500 Subject: [PATCH 51/82] rtlwifi: Fix scheduling while atomic splat Following commit cceb0a597320 ("rtlwifi: Add work queue for c2h cmd."), the following BUG is reported when rtl8723be is used: BUG: sleeping function called from invalid context at mm/slab.h:432 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/0 CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W O 4.11.0-rc3-wl+ #276 Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.50 09/29/2014 Call Trace: dump_stack+0x63/0x89 ___might_sleep+0xe9/0x130 __might_sleep+0x4a/0x90 kmem_cache_alloc_trace+0x19f/0x200 ? rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi] rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi] rtl8723be_c2h_packet_handler+0xac/0xc0 [rtl8723be] rtl8723be_rx_command_packet+0x37/0x5c [rtl8723be] _rtl_pci_rx_interrupt+0x200/0x6b0 [rtl_pci] _rtl_pci_interrupt+0x20c/0x5d0 [rtl_pci] __handle_irq_event_percpu+0x3f/0x1d0 handle_irq_event_percpu+0x23/0x60 handle_irq_event+0x3c/0x60 handle_fasteoi_irq+0xa2/0x170 handle_irq+0x20/0x30 do_IRQ+0x48/0xd0 common_interrupt+0x89/0x89 ... Although commit cceb0a597320 converted most c2h commands to use a work queue, the Bluetooth coexistence routines can be in atomic mode when they execute such a call. Fixes: cceb0a597320 ("rtlwifi: Add work queue for c2h cmd.") Signed-off-by: Larry Finger Cc: Ping-Ke Shih Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index caea350f05aa..bdc379178e87 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1742,12 +1742,14 @@ void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, u8 tag, u8 len, u8 *val) unsigned long flags; struct rtl_c2hcmd *c2hcmd; - c2hcmd = kmalloc(sizeof(*c2hcmd), GFP_KERNEL); + c2hcmd = kmalloc(sizeof(*c2hcmd), + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); if (!c2hcmd) goto label_err; - c2hcmd->val = kmalloc(len, GFP_KERNEL); + c2hcmd->val = kmalloc(len, + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); if (!c2hcmd->val) goto label_err2; From d77facb88448cdeaaa3adba5b9704a48ac2ac8d6 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 28 Mar 2017 09:11:30 +0100 Subject: [PATCH 52/82] brcmfmac: use local iftype avoiding use-after-free of virtual interface A use-after-free was found using KASAN. In brcmf_p2p_del_if() the virtual interface is removed using call to brcmf_remove_interface(). After that the virtual interface instance has been freed and should not be referenced. Solve this by storing the nl80211 iftype in local variable, which is used in a couple of places anyway. Cc: stable@vger.kernel.org # 4.10.x, 4.9.x Reported-by: Daniel J Blueman Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index de19c7c92bc6..85d949e03f79 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -2238,14 +2238,16 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy); struct brcmf_p2p_info *p2p = &cfg->p2p; struct brcmf_cfg80211_vif *vif; + enum nl80211_iftype iftype; bool wait_for_disable = false; int err; brcmf_dbg(TRACE, "delete P2P vif\n"); vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev); + iftype = vif->wdev.iftype; brcmf_cfg80211_arm_vif_event(cfg, vif); - switch (vif->wdev.iftype) { + switch (iftype) { case NL80211_IFTYPE_P2P_CLIENT: if (test_bit(BRCMF_VIF_STATUS_DISCONNECTING, &vif->sme_state)) wait_for_disable = true; @@ -2275,7 +2277,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) BRCMF_P2P_DISABLE_TIMEOUT); err = 0; - if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE) { + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { brcmf_vif_clear_mgmt_ies(vif); err = brcmf_p2p_release_p2p_if(vif); } @@ -2291,7 +2293,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) brcmf_remove_interface(vif->ifp, true); brcmf_cfg80211_arm_vif_event(cfg, NULL); - if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE) + if (iftype != NL80211_IFTYPE_P2P_DEVICE) p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL; return err; From 2b6867c2ce76c596676bec7d2d525af525fdc6e2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:20 +0200 Subject: [PATCH 53/82] net/packet: fix overflow in check for priv area size Subtracting tp_sizeof_priv from tp_block_size and casting to int to check whether one is less then the other doesn't always work (both of them are unsigned ints). Compare them as is instead. Also cast tp_sizeof_priv to u64 before using BLK_PLUS_PRIV, as it can overflow inside BLK_PLUS_PRIV otherwise. Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a0dbe7ca8f72..2323ee35dc09 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4193,8 +4193,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) From 8f8d28e4d6d815a391285e121c3a53a0b6cb9e7b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:21 +0200 Subject: [PATCH 54/82] net/packet: fix overflow in check for tp_frame_nr When calculating rb->frames_per_block * req->tp_block_nr the result can overflow. Add a check that tp_block_size * tp_block_nr <= UINT_MAX. Since frames_per_block <= tp_block_size, the expression would never overflow. Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2323ee35dc09..3ac286ebb2f4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4205,6 +4205,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; + if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; From bcc5364bdcfe131e6379363f089e7b4108d35b70 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:22 +0200 Subject: [PATCH 55/82] net/packet: fix overflow in check for tp_reserve When calculating po->tp_hdrlen + po->tp_reserve the result can overflow. Fix by checking that tp_reserve <= INT_MAX on assign. Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3ac286ebb2f4..8489beff5c25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + if (val > INT_MAX) + return -EINVAL; po->tp_reserve = val; return 0; } From 3dbcc105d5561e18ccd0842c7baab1c835562a37 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 30 Mar 2017 01:00:53 +0800 Subject: [PATCH 56/82] sctp: alloc stream info when initializing asoc When sending a msg without asoc established, sctp will send INIT packet first and then enqueue chunks. Before receiving INIT_ACK, stream info is not yet alloced. But enqueuing chunks needs to access stream info, like out stream state and out stream cnt. This patch is to fix it by allocing out stream info when initializing an asoc, allocing in stream and re-allocing out stream when processing init. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 ++- net/sctp/associola.c | 7 ++++++- net/sctp/sm_make_chunk.c | 9 ++------ net/sctp/stream.c | 43 +++++++++++++++++++++++++++++++------- 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8caa5ee9e290..a127b7c2c3c9 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -377,7 +377,8 @@ typedef struct sctp_sender_hb_info { __u64 hb_nonce; } sctp_sender_hb_info_t; -struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp); +int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp); +int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0439a1a68367..0b26df5f6188 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -246,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; + if (sctp_stream_new(asoc, gfp)) + goto fail_init; + /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -264,7 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp)) - goto fail_init; + goto stream_free; asoc->active_key_id = ep->active_key_id; asoc->prsctp_enable = ep->prsctp_enable; @@ -287,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; +stream_free: + sctp_stream_free(asoc->stream); fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 969a30c7bb54..118faff6a332 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * association. */ if (!asoc->temp) { - int error; - - asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams, - asoc->c.sinit_num_ostreams, gfp); - if (!asoc->stream) + if (sctp_stream_init(asoc, gfp)) goto clean_up; - error = sctp_assoc_set_id(asoc, gfp); - if (error) + if (sctp_assoc_set_id(asoc, gfp)) goto clean_up; } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 1c6cc04fa3a4..bbed997e1c5f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,33 +35,60 @@ #include #include -struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp) +int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp) { struct sctp_stream *stream; int i; stream = kzalloc(sizeof(*stream), gfp); if (!stream) - return NULL; + return -ENOMEM; - stream->outcnt = outcnt; + stream->outcnt = asoc->c.sinit_num_ostreams; stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); if (!stream->out) { kfree(stream); - return NULL; + return -ENOMEM; } for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; - stream->incnt = incnt; + asoc->stream = stream; + + return 0; +} + +int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp) +{ + struct sctp_stream *stream = asoc->stream; + int i; + + /* Initial stream->out size may be very big, so free it and alloc + * a new one with new outcnt to save memory. + */ + kfree(stream->out); + stream->outcnt = asoc->c.sinit_num_ostreams; + stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + if (!stream->out) + goto nomem; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + stream->incnt = asoc->c.sinit_max_instreams; stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); if (!stream->in) { kfree(stream->out); - kfree(stream); - return NULL; + goto nomem; } - return stream; + return 0; + +nomem: + asoc->stream = NULL; + kfree(stream); + + return -ENOMEM; } void sctp_stream_free(struct sctp_stream *stream) From 0b98ca2a45e35dfe02f4512fa30b9f5a9900cb29 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Thu, 30 Mar 2017 00:58:32 -0400 Subject: [PATCH 57/82] be2net: Fix endian issue in logical link config command Use cpu_to_le32() for link_config variable in set_logical_link_config command as this variable is of type u32. Signed-off-by: Suresh Reddy Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 30e855004c57..02dd5246dfae 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4939,8 +4939,9 @@ static int __be_cmd_set_logical_link_config(struct be_adapter *adapter, int link_state, int version, u8 domain) { - struct be_mcc_wrb *wrb; struct be_cmd_req_set_ll_link *req; + struct be_mcc_wrb *wrb; + u32 link_config = 0; int status; mutex_lock(&adapter->mcc_lock); @@ -4962,10 +4963,12 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, if (link_state == IFLA_VF_LINK_STATE_ENABLE || link_state == IFLA_VF_LINK_STATE_AUTO) - req->link_config |= PLINK_ENABLE; + link_config |= PLINK_ENABLE; if (link_state == IFLA_VF_LINK_STATE_AUTO) - req->link_config |= PLINK_TRACK; + link_config |= PLINK_TRACK; + + req->link_config = cpu_to_le32(link_config); status = be_mcc_notify_wait(adapter); err: From b917078c1c107ce34264af893e436e6115eeb9f6 Mon Sep 17 00:00:00 2001 From: Daode Huang Date: Thu, 30 Mar 2017 16:37:41 +0100 Subject: [PATCH 58/82] net: hns: Add ACPI support to check SFP present The current code only supports DT to check SFP present. This patch adds ACPI support as well. Signed-off-by: Daode Huang Reviewed-by: Yisen Zhuang Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 11 +++++--- .../ethernet/hisilicon/hns/hns_dsaf_misc.c | 28 ++++++++++++++++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 3239d27143b9..bdd8cdd732fb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -82,9 +82,12 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) else *link_status = 0; - ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, &sfp_prsnt); - if (!ret) - *link_status = *link_status && sfp_prsnt; + if (mac_cb->media_type == HNAE_MEDIA_TYPE_FIBER) { + ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, + &sfp_prsnt); + if (!ret) + *link_status = *link_status && sfp_prsnt; + } mac_cb->link = *link_status; } @@ -855,7 +858,7 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) of_node_put(np); np = of_parse_phandle(to_of_node(mac_cb->fw_port), - "serdes-syscon", 0); + "serdes-syscon", 0); syscon = syscon_node_to_regmap(np); of_node_put(np); if (IS_ERR_OR_NULL(syscon)) { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index a2c22d084ce9..e13aa064a8e9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -461,6 +461,32 @@ int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt) return 0; } +int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt) +{ + union acpi_object *obj; + union acpi_object obj_args, argv4; + + obj_args.integer.type = ACPI_TYPE_INTEGER; + obj_args.integer.value = mac_cb->mac_id; + + argv4.type = ACPI_TYPE_PACKAGE, + argv4.package.count = 1, + argv4.package.elements = &obj_args, + + obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev), + hns_dsaf_acpi_dsm_uuid, 0, + HNS_OP_GET_SFP_STAT_FUNC, &argv4); + + if (!obj || obj->type != ACPI_TYPE_INTEGER) + return -ENODEV; + + *sfp_prsnt = obj->integer.value; + + ACPI_FREE(obj); + + return 0; +} + /** * hns_mac_config_sds_loopback - set loop back for serdes * @mac_cb: mac control block @@ -592,7 +618,7 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst_acpi; misc_op->get_phy_if = hns_mac_get_phy_if_acpi; - misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt; + misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt_acpi; misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback_acpi; } else { From 3af887c38f1ae0db66c00c4ee2e8a0b1e99ffc29 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Mar 2017 17:00:12 +0100 Subject: [PATCH 59/82] net/faraday: Explicitly include linux/of.h and linux/property.h This driver uses interfaces from linux/of.h and linux/property.h but relies on implict inclusion of those headers which means that changes in other headers could break the build, as happened in -next for arm today. Add a explicit includes. Signed-off-by: Mark Brown Acked-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 928b0df2b8e0..ade6b3e4ed13 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -28,8 +28,10 @@ #include #include #include +#include #include #include +#include #include #include From 6f56f6186c18e3fd54122b73da68e870687b8c59 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Thu, 30 Mar 2017 12:36:03 -0700 Subject: [PATCH 60/82] openvswitch: Fix ovs_flow_key_update() ovs_flow_key_update() is called when the flow key is invalid, and it is used to update and revalidate the flow key. Commit 329f45bc4f19 ("openvswitch: add mac_proto field to the flow key") introduces mac_proto field to flow key and use it to determine whether the flow key is valid. However, the commit does not update the code path in ovs_flow_key_update() to revalidate the flow key which may cause BUG_ON() on execute_recirc(). This patch addresses the aforementioned issue. Fixes: 329f45bc4f19 ("openvswitch: add mac_proto field to the flow key") Signed-off-by: Yi-Hung Wei Acked-by: Jiri Benc Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d4bb8eb63f2..3f76cb765e5b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) /* Link layer. */ clear_vlan(key); - if (key->mac_proto == MAC_PROTO_NONE) { + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { if (unlikely(eth_type_vlan(skb->protocol))) return -EINVAL; @@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { - return key_extract(skb, key); + int res; + + res = key_extract(skb, key); + if (!res) + key->mac_proto &= ~SW_FLOW_KEY_INVALID; + + return res; } static int key_extract_mac_proto(struct sk_buff *skb) From d5b07ccc1bf5fd2ccc6bf9da5677fc448a972e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Rebe?= Date: Tue, 28 Mar 2017 07:56:51 +0200 Subject: [PATCH 61/82] r8152: The Microsoft Surface docks also use R8152 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this the generic cdc_ether grabs the device, and does not really work. Signed-off-by: René Rebe Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 15 +++++++++++++++ drivers/net/usb/r8152.c | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index f5552aaaa77a..f3ae88fdf332 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -532,6 +532,7 @@ static const struct driver_info wwan_info = { #define LENOVO_VENDOR_ID 0x17ef #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 +#define MICROSOFT_VENDOR_ID 0x045e static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -761,6 +762,20 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Microsoft Surface 2 dock (based on Realtek RTL8152) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07ab, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* Microsoft Surface 3 dock (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c34df33c6d72..07f788c49d57 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -517,6 +517,7 @@ enum rtl8152_flags { /* Define these values to match your device */ #define VENDOR_ID_REALTEK 0x0bda +#define VENDOR_ID_MICROSOFT 0x045e #define VENDOR_ID_SAMSUNG 0x04e8 #define VENDOR_ID_LENOVO 0x17ef #define VENDOR_ID_NVIDIA 0x0955 @@ -4521,6 +4522,8 @@ static void rtl8152_disconnect(struct usb_interface *intf) static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)}, {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, + {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)}, + {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)}, {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062)}, From fce366a9dd0ddc47e7ce05611c266e8574a45116 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Mar 2017 02:24:02 +0200 Subject: [PATCH 62/82] bpf, verifier: fix alu ops against map_value{, _adj} register types While looking into map_value_adj, I noticed that alu operations directly on the map_value() resp. map_value_adj() register (any alu operation on a map_value() register will turn it into a map_value_adj() typed register) are not sufficiently protected against some of the operations. Two non-exhaustive examples are provided that the verifier needs to reject: i) BPF_AND on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0xbf842a00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+2 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (57) r0 &= 8 8: (7a) *(u64 *)(r0 +0) = 22 R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=8 R10=fp 9: (95) exit from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp 9: (95) exit processed 10 insns ii) BPF_ADD in 32 bit mode on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0xc24eee00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+2 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (04) (u32) r0 += (u32) 0 8: (7a) *(u64 *)(r0 +0) = 22 R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 9: (95) exit from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp 9: (95) exit processed 10 insns Issue is, while min_value / max_value boundaries for the access are adjusted appropriately, we change the pointer value in a way that cannot be sufficiently tracked anymore from its origin. Operations like BPF_{AND,OR,DIV,MUL,etc} on a destination register that is PTR_TO_MAP_VALUE{,_ADJ} was probably unintended, in fact, all the test cases coming with 484611357c19 ("bpf: allow access into map value arrays") perform BPF_ADD only on the destination register that is PTR_TO_MAP_VALUE_ADJ. Only for UNKNOWN_VALUE register types such operations make sense, f.e. with unknown memory content fetched initially from a constant offset from the map value memory into a register. That register is then later tested against lower / upper bounds, so that the verifier can then do the tracking of min_value / max_value, and properly check once that UNKNOWN_VALUE register is added to the destination register with type PTR_TO_MAP_VALUE{,_ADJ}. This is also what the original use-case is solving. Note, tracking on what is being added is done through adjust_reg_min_max_vals() and later access to the map value enforced with these boundaries and the given offset from the insn through check_map_access_adj(). Tests will fail for non-root environment due to prohibited pointer arithmetic, in particular in check_alu_op(), we bail out on the is_pointer_value() check on the dst_reg (which is false in root case as we allow for pointer arithmetic via env->allow_ptr_leaks). Similarly to PTR_TO_PACKET, one way to fix it is to restrict the allowed operations on PTR_TO_MAP_VALUE{,_ADJ} registers to 64 bit mode BPF_ADD. The test_verifier suite runs fine after the patch and it also rejects mentioned test cases. Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5e6202e62265..86deddecff25 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1925,6 +1925,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * register as unknown. */ if (env->allow_ptr_leaks && + BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD && (dst_reg->type == PTR_TO_MAP_VALUE || dst_reg->type == PTR_TO_MAP_VALUE_ADJ)) dst_reg->type = PTR_TO_MAP_VALUE_ADJ; From 79adffcd6489ef43bda2dfded3d637d7fb4fac80 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Mar 2017 02:24:03 +0200 Subject: [PATCH 63/82] bpf, verifier: fix rejection of unaligned access checks for map_value_adj Currently, the verifier doesn't reject unaligned access for map_value_adj register types. Commit 484611357c19 ("bpf: allow access into map value arrays") added logic to check_ptr_alignment() extending it from PTR_TO_PACKET to also PTR_TO_MAP_VALUE_ADJ, but for PTR_TO_MAP_VALUE_ADJ no enforcement is in place, because reg->id for PTR_TO_MAP_VALUE_ADJ reg types is never non-zero, meaning, we can cause BPF_H/_W/_DW-based unaligned access for architectures not supporting efficient unaligned access, and thus worst case could raise exceptions on some archs that are unable to correct the unaligned access or perform a different memory access to the actual requested one and such. i) Unaligned load with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0x42533a00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+11 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (61) r1 = *(u32 *)(r0 +0) 8: (35) if r1 >= 0xb goto pc+9 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=10 R10=fp 9: (07) r0 += 3 10: (79) r7 = *(u64 *)(r0 +0) R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R10=fp 11: (79) r7 = *(u64 *)(r0 +2) R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R7=inv R10=fp [...] ii) Unaligned store with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0x4df16a00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+19 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (07) r0 += 3 8: (7a) *(u64 *)(r0 +0) = 42 R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp 9: (7a) *(u64 *)(r0 +2) = 43 R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp 10: (7a) *(u64 *)(r0 -2) = 44 R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp [...] For the PTR_TO_PACKET type, reg->id is initially zero when skb->data was fetched, it later receives a reg->id from env->id_gen generator once another register with UNKNOWN_VALUE type was added to it via check_packet_ptr_add(). The purpose of this reg->id is twofold: i) it is used in find_good_pkt_pointers() for setting the allowed access range for regs with PTR_TO_PACKET of same id once verifier matched on data/data_end tests, and ii) for check_ptr_alignment() to determine that when not having efficient unaligned access and register with UNKNOWN_VALUE was added to PTR_TO_PACKET, that we're only allowed to access the content bytewise due to unknown unalignment. reg->id was never intended for PTR_TO_MAP_VALUE{,_ADJ} types and thus is always zero, the only marking is in PTR_TO_MAP_VALUE_OR_NULL that was added after 484611357c19 via 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers"). Above tests will fail for non-root environment due to prohibited pointer arithmetic. The fix splits register-type specific checks into their own helper instead of keeping them combined, so we don't run into a similar issue in future once we extend check_ptr_alignment() further and forget to add reg->type checks for some of the checks. Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 58 ++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 86deddecff25..a834068a400e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } } -static int check_ptr_alignment(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, int off, int size) +static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, + int off, int size) { - if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) { - if (off % size != 0) { - verbose("misaligned access off %d size %d\n", - off, size); - return -EACCES; - } else { - return 0; - } - } - - if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) - /* misaligned access to packet is ok on x86,arm,arm64 */ - return 0; - if (reg->id && size != 1) { - verbose("Unknown packet alignment. Only byte-sized access allowed\n"); + verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n"); return -EACCES; } /* skb->data is NET_IP_ALIGN-ed */ - if (reg->type == PTR_TO_PACKET && - (NET_IP_ALIGN + reg->off + off) % size != 0) { + if ((NET_IP_ALIGN + reg->off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", NET_IP_ALIGN, reg->off, off, size); return -EACCES; } + return 0; } +static int check_val_ptr_alignment(const struct bpf_reg_state *reg, + int size) +{ + if (size != 1) { + verbose("Unknown alignment. Only byte-sized access allowed in value access.\n"); + return -EACCES; + } + + return 0; +} + +static int check_ptr_alignment(const struct bpf_reg_state *reg, + int off, int size) +{ + switch (reg->type) { + case PTR_TO_PACKET: + return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : + check_pkt_ptr_alignment(reg, off, size); + case PTR_TO_MAP_VALUE_ADJ: + return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : + check_val_ptr_alignment(reg, size); + default: + if (off % size != 0) { + verbose("misaligned access off %d size %d\n", + off, size); + return -EACCES; + } + + return 0; + } +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, if (size < 0) return size; - err = check_ptr_alignment(env, reg, off, size); + err = check_ptr_alignment(reg, off, size); if (err) return err; From 02ea80b1850e48abbce77878896229d7cc5cb230 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Mar 2017 02:24:04 +0200 Subject: [PATCH 64/82] bpf: add various verifier test cases for self-tests Add a couple of test cases, for example, probing for xadd on a spilled pointer to packet and map_value_adj register, various other map_value_adj tests including the unaligned load/store, and trying out pointer arithmetic on map_value_adj register itself. For the unaligned load/store, we need to figure out whether the architecture has efficient unaligned access and need to mark affected tests accordingly. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/include/linux/filter.h | 10 + tools/testing/selftests/bpf/Makefile | 9 +- tools/testing/selftests/bpf/test_verifier.c | 270 +++++++++++++++++++- 3 files changed, 283 insertions(+), 6 deletions(-) diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 122153b16ea4..390d7c9685fd 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -168,6 +168,16 @@ .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6a1ad58cb66f..9af09e8099c0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,7 +1,14 @@ LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf +APIDIR := ../../../include/uapi +GENDIR := ../../../../include/generated +GENHDR := $(GENDIR)/autoconf.h -CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR) +ifneq ($(wildcard $(GENHDR)),) + GENFLAGS := -DHAVE_GENHDR +endif + +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) LDLIBS += -lcap TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 7d761d4cc759..c848e90b6421 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -30,6 +30,14 @@ #include +#ifdef HAVE_GENHDR +# include "autoconf.h" +#else +# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__) +# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 +# endif +#endif + #include "../../../include/linux/filter.h" #ifndef ARRAY_SIZE @@ -39,6 +47,8 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 +#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) + struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -53,6 +63,7 @@ struct bpf_test { REJECT } result, result_unpriv; enum bpf_prog_type prog_type; + uint8_t flags; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -2431,6 +2442,30 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test15 (spill with xadd)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_5, 4096), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R2 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -2934,6 +2969,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "valid map access into an array with a variable", @@ -2957,6 +2993,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "valid map access into an array with a signed variable", @@ -2984,6 +3021,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a constant", @@ -3025,6 +3063,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is outside of the array range", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a variable", @@ -3048,6 +3087,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with no floor check", @@ -3074,6 +3114,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a invalid max check", @@ -3100,6 +3141,7 @@ static struct bpf_test tests[] = { .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a invalid max check", @@ -3129,6 +3171,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "multiple registers share map_lookup_elem result", @@ -3252,6 +3295,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "constant register |= constant should keep constant type", @@ -3981,7 +4025,208 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, }, { - "map element value (adjusted) is preserved across register spilling", + "map element value or null is marked on register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + }, + { + "map element value store of cleared call register", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R1 !read_ok", + .errstr = "R1 !read_ok", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value with unaligned store", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43), + BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "map element value with unaligned load", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "map element value illegal alu op, 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 3", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 4", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_3, 4096), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .errstr = "R0 invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value is preserved across register spilling", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), @@ -4003,6 +4248,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result = ACCEPT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: stack, bitwise AND + JMP, correct bounds", @@ -4441,6 +4687,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid range check", @@ -4472,6 +4719,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, } }; @@ -4550,11 +4798,11 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { + int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); int prog_type = test->prog_type; int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1; - int fd_prog, expected_ret; const char *expected_err; do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3); @@ -4567,8 +4815,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; + + reject_from_alignment = fd_prog < 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && + strstr(bpf_vlog, "Unknown alignment."); +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (reject_from_alignment) { + printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", + strerror(errno)); + goto fail_log; + } +#endif if (expected_ret == ACCEPT) { - if (fd_prog < 0) { + if (fd_prog < 0 && !reject_from_alignment) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; @@ -4578,14 +4837,15 @@ static void do_test_single(struct bpf_test *test, bool unpriv, printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err)) { + if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { printf("FAIL\nUnexpected error message!\n"); goto fail_log; } } (*passes)++; - printf("OK\n"); + printf("OK%s\n", reject_from_alignment ? + " (NOTE: reject due to unknown alignment)" : ""); close_fds: close(fd_prog); close(fd_f1); From afe89962ee0799955b606cc7637ac86a296923a6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 31 Mar 2017 17:57:28 +0800 Subject: [PATCH 65/82] sctp: use right in and out stream cnt Since sctp reconf was added in sctp, the real cnt of in/out stream have not been c.sinit_max_instreams and c.sinit_num_ostreams any more. This patch is to replace them with stream->in/outcnt. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 +-- net/sctp/proc.c | 4 ++-- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/socket.c | 10 +++++----- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 025ccff67072..8081476ed313 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1026,8 +1026,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ - if (chunk->sinfo.sinfo_stream >= - asoc->c.sinit_num_ostreams) { + if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) { /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 206377fe91ec..a0b29d43627f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", - assoc->hbinterval, assoc->c.sinit_max_instreams, - assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->hbinterval, assoc->stream->incnt, + assoc->stream->outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, atomic_read(&sk->sk_wmem_alloc), diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e03bb1aab4d0..24c6ccce7539 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3946,7 +3946,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto discard_noforce; } @@ -4017,7 +4017,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto gen_shutdown; } @@ -6353,7 +6353,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ sid = ntohs(data_hdr->stream); - if (sid >= asoc->c.sinit_max_instreams) { + if (sid >= asoc->stream->incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index baa269a0d52e..12fbae2c1002 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { + if (sinfo->sinfo_stream >= asoc->stream->outcnt) { err = -EINVAL; goto out_free; } @@ -4461,8 +4461,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - info->sctpi_instrms = asoc->c.sinit_max_instreams; - info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + info->sctpi_instrms = asoc->stream->incnt; + info->sctpi_outstrms = asoc->stream->outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) @@ -4691,8 +4691,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - status.sstat_instrms = asoc->c.sinit_max_instreams; - status.sstat_outstrms = asoc->c.sinit_num_ostreams; + status.sstat_instrms = asoc->stream->incnt; + status.sstat_outstrms = asoc->stream->outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, From 61b9a047729bb230978178bca6729689d0c50ca2 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:25 +0200 Subject: [PATCH 66/82] l2tp: fix race in l2tp_recv_common() Taking a reference on sessions in l2tp_recv_common() is racy; this has to be done by the callers. To this end, a new function is required (l2tp_session_get()) to atomically lookup a session and take a reference on it. Callers then have to manually drop this reference. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 73 ++++++++++++++++++++++++++++++++++++-------- net/l2tp/l2tp_core.h | 3 ++ net/l2tp/l2tp_ip.c | 17 ++++++++--- net/l2tp/l2tp_ip6.c | 18 ++++++++--- 4 files changed, 88 insertions(+), 23 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8adab6335ced..8a067536d15c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -278,6 +278,55 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn } EXPORT_SYMBOL_GPL(l2tp_session_find); +/* Like l2tp_session_find() but takes a reference on the returned session. + * Optionally calls session->ref() too if do_ref is true. + */ +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref) +{ + struct hlist_head *session_list; + struct l2tp_session *session; + + if (!tunnel) { + struct l2tp_net *pn = l2tp_pernet(net); + + session_list = l2tp_session_id_hash_2(pn, session_id); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(session, session_list, global_hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + rcu_read_unlock_bh(); + + return session; + } + } + rcu_read_unlock_bh(); + + return NULL; + } + + session_list = l2tp_session_id_hash(tunnel, session_id); + read_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session, session_list, hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + read_unlock_bh(&tunnel->hlist_lock); + + return session; + } + } + read_unlock_bh(&tunnel->hlist_lock); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_session_get); + struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; @@ -633,6 +682,9 @@ discard: * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. + * + * session->ref() must have been called prior to l2tp_recv_common(). + * session->deref() will be called automatically after skb is processed. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, @@ -642,14 +694,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int offset; u32 ns, nr; - /* The ref count is increased since we now hold a pointer to - * the session. Take care to decrement the refcnt when exiting - * this function from now on... - */ - l2tp_session_inc_refcount(session); - if (session->ref) - (*session->ref)(session); - /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { @@ -802,8 +846,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Try to dequeue as many skbs from reorder_q as we can. */ l2tp_recv_dequeue(session); - l2tp_session_dec_refcount(session); - return; discard: @@ -812,8 +854,6 @@ discard: if (session->deref) (*session->deref)(session); - - l2tp_session_dec_refcount(session); } EXPORT_SYMBOL(l2tp_recv_common); @@ -920,8 +960,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } /* Find the session context */ - session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id); + session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); if (!session || !session->recv_skb) { + if (session) { + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } + /* Not found? Pass to userspace to deal with */ l2tp_info(tunnel, L2TP_MSG_DATA, "%s: no session found (%u/%u). Passing up.\n", @@ -930,6 +976,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); + l2tp_session_dec_refcount(session); return 0; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index aebf281d09ee..4544e81a3d27 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -230,6 +230,9 @@ out: return tunnel; } +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref); struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 7208fbe5856b..4d322c1b7233 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -203,6 +204,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 516d7ce24ba7..88b397c30d86 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); + return 0; pass_up: @@ -216,6 +218,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); From 57377d63547861919ee634b845c7caa38de4a452 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:26 +0200 Subject: [PATCH 67/82] l2tp: ensure session can't get removed during pppol2tp_session_ioctl() Holding a reference on session is required before calling pppol2tp_session_ioctl(). The session could get freed while processing the ioctl otherwise. Since pppol2tp_session_ioctl() uses the session's socket, we also need to take a reference on it in l2tp_session_get(). Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 123b6a2411a0..827e55c41ba2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1141,11 +1141,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, if (stats.session_id != 0) { /* resend to session ioctl handler */ struct l2tp_session *session = - l2tp_session_find(sock_net(sk), tunnel, stats.session_id); - if (session != NULL) - err = pppol2tp_session_ioctl(session, cmd, arg); - else + l2tp_session_get(sock_net(sk), tunnel, + stats.session_id, true); + + if (session) { + err = pppol2tp_session_ioctl(session, cmd, + arg); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } else { err = -EBADR; + } break; } #ifdef CONFIG_XFRM From dbdbc73b44782e22b3b4b6e8b51e7a3d245f3086 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:27 +0200 Subject: [PATCH 68/82] l2tp: fix duplicate session creation l2tp_session_create() relies on its caller for checking for duplicate sessions. This is racy since a session can be concurrently inserted after the caller's verification. Fix this by letting l2tp_session_create() verify sessions uniqueness upon insertion. Callers need to be adapted to check for l2tp_session_create()'s return code instead of calling l2tp_session_find(). pppol2tp_connect() is a bit special because it has to work on existing sessions (if they're not connected) or to create a new session if none is found. When acting on a preexisting session, a reference must be held or it could go away on us. So we have to use l2tp_session_get() instead of l2tp_session_find() and drop the reference before exiting. Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support") Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 70 +++++++++++++++++++++++++++++++++----------- net/l2tp/l2tp_eth.c | 10 ++----- net/l2tp/l2tp_ppp.c | 62 +++++++++++++++++++-------------------- 3 files changed, 85 insertions(+), 57 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8a067536d15c..46b450a1bc21 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -374,6 +374,48 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) } EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, + struct l2tp_session *session) +{ + struct l2tp_session *session_walk; + struct hlist_head *g_head; + struct hlist_head *head; + struct l2tp_net *pn; + + head = l2tp_session_id_hash(tunnel, session->session_id); + + write_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session_walk, head, hlist) + if (session_walk->session_id == session->session_id) + goto exist; + + if (tunnel->version == L2TP_HDR_VER_3) { + pn = l2tp_pernet(tunnel->l2tp_net); + g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net), + session->session_id); + + spin_lock_bh(&pn->l2tp_session_hlist_lock); + hlist_for_each_entry(session_walk, g_head, global_hlist) + if (session_walk->session_id == session->session_id) + goto exist_glob; + + hlist_add_head_rcu(&session->global_hlist, g_head); + spin_unlock_bh(&pn->l2tp_session_hlist_lock); + } + + hlist_add_head(&session->hlist, head); + write_unlock_bh(&tunnel->hlist_lock); + + return 0; + +exist_glob: + spin_unlock_bh(&pn->l2tp_session_hlist_lock); +exist: + write_unlock_bh(&tunnel->hlist_lock); + + return -EEXIST; +} + /* Lookup a tunnel by id */ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id) @@ -1785,6 +1827,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; + int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1840,6 +1883,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn l2tp_session_set_header_len(session, tunnel->version); + err = l2tp_session_add_to_tunnel(tunnel, session); + if (err) { + kfree(session); + + return ERR_PTR(err); + } + /* Bump the reference count. The session context is deleted * only when this drops to zero. */ @@ -1849,28 +1899,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn /* Ensure tunnel socket isn't deleted */ sock_hold(tunnel->sock); - /* Add session to the tunnel's hash list */ - write_lock_bh(&tunnel->hlist_lock); - hlist_add_head(&session->hlist, - l2tp_session_id_hash(tunnel, session_id)); - write_unlock_bh(&tunnel->hlist_lock); - - /* And to the global session list if L2TPv3 */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_add_head_rcu(&session->global_hlist, - l2tp_session_id_hash_2(pn, session_id)); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - } - /* Ignore management session in session count value */ if (session->session_id != 0) atomic_inc(&l2tp_session_count); + + return session; } - return session; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(l2tp_session_create); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8bf18a5f66e0..6fd41d7afe1e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -221,12 +221,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p goto out; } - session = l2tp_session_find(net, tunnel, session_id); - if (session) { - rc = -EEXIST; - goto out; - } - if (cfg->ifname) { dev = dev_get_by_name(net, cfg->ifname); if (dev) { @@ -240,8 +234,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p session = l2tp_session_create(sizeof(*spriv), tunnel, session_id, peer_session_id, cfg); - if (!session) { - rc = -ENOMEM; + if (IS_ERR(session)) { + rc = PTR_ERR(session); goto out; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 827e55c41ba2..26501902d1a7 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -583,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, int error = 0; u32 tunnel_id, peer_tunnel_id; u32 session_id, peer_session_id; + bool drop_refcnt = false; int ver = 2; int fd; @@ -684,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = peer_tunnel_id; - /* Create session if it doesn't already exist. We handle the - * case where a session was previously created by the netlink - * interface by checking that the session doesn't already have - * a socket and its tunnel socket are what we expect. If any - * of those checks fail, return EEXIST to the caller. - */ - session = l2tp_session_find(sock_net(sk), tunnel, session_id); - if (session == NULL) { - /* Default MTU must allow space for UDP/L2TP/PPP - * headers. - */ - cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD; + session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); + if (session) { + drop_refcnt = true; + ps = l2tp_session_priv(session); - /* Allocate and initialize a new session context. */ - session = l2tp_session_create(sizeof(struct pppol2tp_session), - tunnel, session_id, - peer_session_id, &cfg); - if (session == NULL) { - error = -ENOMEM; + /* Using a pre-existing session is fine as long as it hasn't + * been connected yet. + */ + if (ps->sock) { + error = -EEXIST; + goto end; + } + + /* consistency checks */ + if (ps->tunnel_sock != tunnel->sock) { + error = -EEXIST; goto end; } } else { - ps = l2tp_session_priv(session); - error = -EEXIST; - if (ps->sock != NULL) - goto end; + /* Default MTU must allow space for UDP/L2TP/PPP headers */ + cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; + cfg.mru = cfg.mtu; - /* consistency checks */ - if (ps->tunnel_sock != tunnel->sock) + session = l2tp_session_create(sizeof(struct pppol2tp_session), + tunnel, session_id, + peer_session_id, &cfg); + if (IS_ERR(session)) { + error = PTR_ERR(session); goto end; + } } /* Associate session with its PPPoL2TP socket */ @@ -778,6 +779,8 @@ out_no_ppp: session->name); end: + if (drop_refcnt) + l2tp_session_dec_refcount(session); release_sock(sk); return error; @@ -805,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i if (tunnel->sock == NULL) goto out; - /* Check that this session doesn't already exist */ - error = -EEXIST; - session = l2tp_session_find(net, tunnel, session_id); - if (session != NULL) - goto out; - /* Default MTU values. */ if (cfg->mtu == 0) cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; @@ -818,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i cfg->mru = cfg->mtu; /* Allocate and initialize a new session context. */ - error = -ENOMEM; session = l2tp_session_create(sizeof(struct pppol2tp_session), tunnel, session_id, peer_session_id, cfg); - if (session == NULL) + if (IS_ERR(session)) { + error = PTR_ERR(session); goto out; + } ps = l2tp_session_priv(session); ps->tunnel_sock = tunnel->sock; From 5e6a9e5a3554a5b3db09cdc22253af1849c65dff Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:29 +0200 Subject: [PATCH 69/82] l2tp: hold session while sending creation notifications l2tp_session_find() doesn't take any reference on the returned session. Therefore, the session may disappear while sending the notification. Use l2tp_session_get() instead and decrement session's refcount once the notification is sent. Fixes: 33f72e6f0c67 ("l2tp : multicast notification to the registered listeners") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 3620fba31786..f1b68effb077 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -642,10 +642,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf session_id, peer_session_id, &cfg); if (ret >= 0) { - session = l2tp_session_find(net, tunnel, session_id); - if (session) + session = l2tp_session_get(net, tunnel, session_id, false); + if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); + l2tp_session_dec_refcount(session); + } } out: From 2777e2ab5a9cf2b4524486c6db1517a6ded25261 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:30 +0200 Subject: [PATCH 70/82] l2tp: take a reference on sessions used in genetlink handlers Callers of l2tp_nl_session_find() need to hold a reference on the returned session since there's no guarantee that it isn't going to disappear from under them. Relying on the fact that no l2tp netlink message may be processed concurrently isn't enough: sessions can be deleted by other means (e.g. by closing the PPPOL2TP socket of a ppp pseudowire). l2tp_nl_cmd_session_delete() is a bit special: it runs a callback function that may require a previous call to session->ref(). In particular, for ppp pseudowires, the callback is l2tp_session_delete(), which then calls pppol2tp_session_close() and dereferences the PPPOL2TP socket. The socket might already be gone at the moment l2tp_session_delete() calls session->ref(), so we need to take a reference during the session lookup. So we need to pass the do_ref variable down to l2tp_session_get() and l2tp_session_get_by_ifname(). Since all callers have to be updated, l2tp_session_find_by_ifname() and l2tp_nl_session_find() are renamed to reflect their new behaviour. Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 9 +++++++-- net/l2tp/l2tp_core.h | 3 ++- net/l2tp/l2tp_netlink.c | 39 ++++++++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 46b450a1bc21..e927422d8c58 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -352,7 +352,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_find_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref) { struct l2tp_net *pn = l2tp_pernet(net); int hash; @@ -362,7 +363,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); rcu_read_unlock_bh(); + return session; } } @@ -372,7 +377,7 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, struct l2tp_session *session) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 4544e81a3d27..3b9b704a84e4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -237,7 +237,8 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref); struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f1b68effb077..93e317377c66 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; -static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) +static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, + bool do_ref) { u32 tunnel_id; u32 session_id; @@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); - session = l2tp_session_find_by_ifname(net, ifname); + session = l2tp_session_get_by_ifname(net, ifname, do_ref); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel) - session = l2tp_session_find(net, tunnel, session_id); + session = l2tp_session_get(net, tunnel, session_id, + do_ref); } return session; @@ -660,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; u16 pw_type; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, true); if (session == NULL) { ret = -ENODEV; goto out; @@ -674,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -683,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf int ret = 0; struct l2tp_session *session; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; goto out; @@ -718,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_MODIFY); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -813,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int ret; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; - goto out; + goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto out; + goto err_ref; } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) - goto err_out; + goto err_ref_msg; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); + ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); -err_out: + l2tp_session_dec_refcount(session); + + return ret; + +err_ref_msg: nlmsg_free(msg); - -out: +err_ref: + l2tp_session_dec_refcount(session); +err: return ret; } From 75514b6654859e0130b512396dc964d2a9e84967 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 31 Mar 2017 18:41:23 -0500 Subject: [PATCH 71/82] net: ethernet: ti: cpsw: wake tx queues on ndo_tx_timeout In case, if TX watchdog is fired some or all netdev TX queues will be stopped and as part of recovery it is required not only to drain and reinitailize CPSW TX channeles, but also wake up stoppted TX queues what doesn't happen now and netdevice will stop transmiting data until reopenned. Hence, add netif_tx_wake_all_queues() call in .ndo_tx_timeout() to complete recovery and restore TX path. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 9f3d9c67e3fe..58cdc066ef2c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1817,6 +1817,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev) } cpsw_intr_enable(cpsw); + netif_trans_update(ndev); + netif_tx_wake_all_queues(ndev); } static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) From ac6a3722fed67c658a435187d0254ae119d845d3 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 3 Apr 2017 15:42:58 -0400 Subject: [PATCH 72/82] flow dissector: correct size of storage for ARP The last argument to __skb_header_pointer() should be a buffer large enough to store struct arphdr. This can be a pointer to a struct arphdr structure. The code was previously using a pointer to a pointer to struct arphdr. By my counting the storage available both before and after is 8 bytes on x86_64. Fixes: 55733350e5e8 ("flow disector: ARP support") Reported-by: Nicolas Iooss Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c35aae13c8d2..d98d4998213d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -390,7 +390,7 @@ mpls: unsigned char ar_tip[4]; } *arp_eth, _arp_eth; const struct arphdr *arp; - struct arphdr *_arp; + struct arphdr _arp; arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, hlen, &_arp); From df2729c3238ed89fb8ccf850d38c732858a5bade Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 1 Apr 2017 17:15:59 +0800 Subject: [PATCH 73/82] sctp: check for dst and pathmtu update in sctp_packet_config This patch is to move sctp_transport_dst_check into sctp_packet_config from sctp_packet_transmit and add pathmtu check in sctp_packet_config. With this fix, sctp can update dst or pathmtu before appending chunks, which can void dropping packets in sctp_packet_transmit when dst is obsolete or dst's mtu is changed. This patch is also to improve some other codes in sctp_packet_config. It updates packet max_size with gso_max_size, checks for dst and pathmtu, and appends ecne chunk only when packet is empty and asoc is not NULL. It makes sctp flush work better, as we only need to set up them once for one flush schedule. It's also safe, since asoc is NULL only when the packet is created by sctp_ootb_pkt_new in which it just gets the new dst, no need to do more things for it other than set packet with transport's pathmtu. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 17 +++++++++-- net/sctp/output.c | 67 ++++++++++++++++++++++------------------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1f71ee5ab518..d75caa7a629b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -596,12 +596,23 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr) */ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) { - if (t->dst && (!dst_check(t->dst, t->dst_cookie) || - t->pathmtu != max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)), - SCTP_DEFAULT_MINSEGMENT))) + if (t->dst && !dst_check(t->dst, t->dst_cookie)) sctp_transport_dst_release(t); return t->dst; } +static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) +{ + __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)), + SCTP_DEFAULT_MINSEGMENT); + + if (t->pathmtu == pmtu) + return true; + + t->pathmtu = pmtu; + + return false; +} + #endif /* __net_sctp_h__ */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 73fd178007a3..ec4d50a38713 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; + struct sock *sk; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - packet->vtag = vtag; - if (asoc && tp->dst) { - struct sock *sk = asoc->base.sk; + /* do the following jobs only once for a flush schedule */ + if (!sctp_packet_empty(packet)) + return; - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } + /* set packet max_size with pathmtu */ + packet->max_size = tp->pathmtu; + if (!asoc) + return; - if (sk_can_gso(sk)) { - struct net_device *dev = tp->dst->dev; - - packet->max_size = dev->gso_max_size; - } else { - packet->max_size = asoc->pathmtu; - } - rcu_read_unlock(); - - } else { - packet->max_size = tp->pathmtu; + /* update dst or transport pathmtu if in need */ + sk = asoc->base.sk; + if (!sctp_transport_dst_check(tp)) { + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(sk, asoc); + } else if (!sctp_transport_pmtu_check(tp)) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(sk, asoc); } - if (ecn_capable && sctp_packet_empty(packet)) { - struct sctp_chunk *chunk; + /* If there a is a prepend chunk stick it on the list before + * any other chunks get appended. + */ + if (ecn_capable) { + struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc); - /* If there a is a prepend chunk stick it on the list before - * any other chunks get appended. - */ - chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } + + if (!tp->dst) + return; + + /* set packet max_size with gso_max_size if gso is enabled*/ + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size + : asoc->pathmtu; + rcu_read_unlock(); } /* Initialize the packet structure. */ @@ -582,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /* update dst if in need */ - if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); - } + /* drop packet if no dst */ dst = dst_clone(tp->dst); if (!dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); From 0b9aefea860063bb39e36bd7fe6c7087fed0ba87 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 1 Apr 2017 11:00:21 -0300 Subject: [PATCH 74/82] tcp: minimize false-positives on TCP/GRO check Markus Trippelsdorf reported that after commit dcb17d22e1c2 ("tcp: warn on bogus MSS and try to amend it") the kernel started logging the warning for a NIC driver that doesn't even support GRO. It was diagnosed that it was possibly caused on connections that were using TCP Timestamps but some packets lacked the Timestamps option. As we reduce rcv_mss when timestamps are used, the lack of them would cause the packets to be bigger than expected, although this is a valid case. As this warning is more as a hint, getting a clean-cut on the threshold is probably not worth the execution time spent on it. This patch thus alleviates the false-positives with 2 quick checks: by accounting for the entire TCP option space and also checking against the interface MTU if it's available. These changes, specially the MTU one, might mask some real positives, though if they are really happening, it's possible that sooner or later it will be triggered anyway. Reported-by: Markus Trippelsdorf Cc: Eric Dumazet Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c43119726a62..97ac6776e47d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ -static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) +static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb, + unsigned int len) { static bool __once __read_mostly; @@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); - pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", - dev ? dev->name : "Unknown driver"); + if (!dev || len >= dev->mtu) + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); rcu_read_unlock(); } } @@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); - if (unlikely(icsk->icsk_ack.rcv_mss != len)) - tcp_gro_dev_warn(sk, skb); + /* Account for possibly-removed options */ + if (unlikely(len > icsk->icsk_ack.rcv_mss + + MAX_TCP_OPTION_SPACE)) + tcp_gro_dev_warn(sk, skb, len); } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. From e08293a4ccbcc993ded0fdc46f1e57926b833d63 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 3 Apr 2017 12:03:13 +0200 Subject: [PATCH 75/82] l2tp: take reference on sessions being dumped Take a reference on the sessions returned by l2tp_session_find_nth() (and rename it l2tp_session_get_nth() to reflect this change), so that caller is assured that the session isn't going to disappear while processing it. For procfs and debugfs handlers, the session is held in the .start() callback and dropped in .show(). Given that pppol2tp_seq_session_show() dereferences the associated PPPoL2TP socket and that l2tp_dfs_seq_session_show() might call pppol2tp_show(), we also need to call the session's .ref() callback to prevent the socket from going away from under us. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info") Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 8 ++++++-- net/l2tp/l2tp_core.h | 3 ++- net/l2tp/l2tp_debugfs.c | 10 +++++++--- net/l2tp/l2tp_netlink.c | 7 +++++-- net/l2tp/l2tp_ppp.c | 10 +++++++--- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e927422d8c58..e37d9554da7b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -327,7 +327,8 @@ struct l2tp_session *l2tp_session_get(struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref) { int hash; struct l2tp_session *session; @@ -337,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -347,7 +351,7 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_nth); +EXPORT_SYMBOL_GPL(l2tp_session_get_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 3b9b704a84e4..8ce7818c7a9d 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -236,7 +236,8 @@ struct l2tp_session *l2tp_session_get(struct net *net, struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref); struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, bool do_ref); struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 2d6760a2ae34..d100aed3d06f 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) } /* Show the tunnel or session context */ - if (pd->session == NULL) + if (!pd->session) { l2tp_dfs_seq_tunnel_show(m, pd->tunnel); - else + } else { l2tp_dfs_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 93e317377c66..7e3e669baac4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -867,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_find_nth(tunnel, si); + session = l2tp_session_get_nth(tunnel, si, false); if (session == NULL) { ti++; tunnel = NULL; @@ -877,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session, L2TP_CMD_SESSION_GET) < 0) + session, L2TP_CMD_SESSION_GET) < 0) { + l2tp_session_dec_refcount(session); break; + } + l2tp_session_dec_refcount(session); si++; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 26501902d1a7..7bf73091baa2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1560,7 +1560,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -1687,10 +1687,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) /* Show the tunnel or session context. */ - if (pd->session == NULL) + if (!pd->session) { pppol2tp_seq_tunnel_show(m, pd->tunnel); - else + } else { pppol2tp_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; From a8919661d78b90d747b3514197e49ecf8727d08c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 3 Apr 2017 11:19:10 +0100 Subject: [PATCH 76/82] bnx2x: fix spelling mistake in macros HW_INTERRUT_ASSERT_SET_* Trival fix, rename HW_INTERRUT_ASSERT_SET_* to HW_INTERRUPT_ASSERT_SET_* Signed-off-by: Colin Ian King Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 6 +++--- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 0a23034bbe3f..352beff796ae 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2277,7 +2277,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCP) | \ GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RSVD_GRC)) -#define HW_INTERRUT_ASSERT_SET_0 \ +#define HW_INTERRUPT_ASSERT_SET_0 \ (AEU_INPUTS_ATTN_BITS_TSDM_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_TCM_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_TSEMI_HW_INTERRUPT | \ @@ -2290,7 +2290,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR |\ AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR |\ AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR) -#define HW_INTERRUT_ASSERT_SET_1 \ +#define HW_INTERRUPT_ASSERT_SET_1 \ (AEU_INPUTS_ATTN_BITS_QM_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_TIMERS_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_XSDM_HW_INTERRUPT | \ @@ -2318,7 +2318,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR | \ AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR |\ AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR) -#define HW_INTERRUT_ASSERT_SET_2 \ +#define HW_INTERRUPT_ASSERT_SET_2 \ (AEU_INPUTS_ATTN_BITS_CSEMI_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_CDU_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_DMAE_HW_INTERRUPT | \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index ac76fc251d26..a851f95c307a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -4166,14 +4166,14 @@ static void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn) bnx2x_release_phy_lock(bp); } - if (attn & HW_INTERRUT_ASSERT_SET_0) { + if (attn & HW_INTERRUPT_ASSERT_SET_0) { val = REG_RD(bp, reg_offset); - val &= ~(attn & HW_INTERRUT_ASSERT_SET_0); + val &= ~(attn & HW_INTERRUPT_ASSERT_SET_0); REG_WR(bp, reg_offset, val); BNX2X_ERR("FATAL HW block attention set0 0x%x\n", - (u32)(attn & HW_INTERRUT_ASSERT_SET_0)); + (u32)(attn & HW_INTERRUPT_ASSERT_SET_0)); bnx2x_panic(); } } @@ -4191,7 +4191,7 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn) BNX2X_ERR("FATAL error from DORQ\n"); } - if (attn & HW_INTERRUT_ASSERT_SET_1) { + if (attn & HW_INTERRUPT_ASSERT_SET_1) { int port = BP_PORT(bp); int reg_offset; @@ -4200,11 +4200,11 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn) MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1); val = REG_RD(bp, reg_offset); - val &= ~(attn & HW_INTERRUT_ASSERT_SET_1); + val &= ~(attn & HW_INTERRUPT_ASSERT_SET_1); REG_WR(bp, reg_offset, val); BNX2X_ERR("FATAL HW block attention set1 0x%x\n", - (u32)(attn & HW_INTERRUT_ASSERT_SET_1)); + (u32)(attn & HW_INTERRUPT_ASSERT_SET_1)); bnx2x_panic(); } } @@ -4235,7 +4235,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn) } } - if (attn & HW_INTERRUT_ASSERT_SET_2) { + if (attn & HW_INTERRUPT_ASSERT_SET_2) { int port = BP_PORT(bp); int reg_offset; @@ -4244,11 +4244,11 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn) MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2); val = REG_RD(bp, reg_offset); - val &= ~(attn & HW_INTERRUT_ASSERT_SET_2); + val &= ~(attn & HW_INTERRUPT_ASSERT_SET_2); REG_WR(bp, reg_offset, val); BNX2X_ERR("FATAL HW block attention set2 0x%x\n", - (u32)(attn & HW_INTERRUT_ASSERT_SET_2)); + (u32)(attn & HW_INTERRUPT_ASSERT_SET_2)); bnx2x_panic(); } } From 249ee819e24c180909f43c1173c8ef6724d21faf Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 3 Apr 2017 13:23:15 +0200 Subject: [PATCH 77/82] l2tp: fix PPP pseudo-wire auto-loading PPP pseudo-wire type is 7 (11 is L2TP_PWTYPE_IP). Fixes: f1f39f911027 ("l2tp: auto load type modules") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7bf73091baa2..861b255a2d51 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1853,4 +1853,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); -MODULE_ALIAS_L2TP_PWTYPE(11); +MODULE_ALIAS_L2TP_PWTYPE(7); From 30c57f073449e09ae42f908bfff56c08c8751a6f Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Mon, 3 Apr 2017 17:34:28 +0530 Subject: [PATCH 78/82] net: ethernet: ti: cpsw: fix race condition during open() TI's cpsw driver handles both OF and non-OF case for phy connect. Unfortunately of_phy_connect() returns NULL on error while phy_connect() returns ERR_PTR(). To handle this, cpsw_slave_open() overrides the return value from phy_connect() to make it NULL or error. This leaves a small window, where cpsw_adjust_link() may be invoked for a slave while slave->phy pointer is temporarily set to -ENODEV (or some other error) before it is finally set to NULL. _cpsw_adjust_link() only handles the NULL case, and an oops results when ERR_PTR() is seen by it. Note that cpsw_adjust_link() checks PHY status for each slave whenever it is invoked. It can so happen that even though phy_connect() for a given slave returns error, _cpsw_adjust_link() is still called for that slave because the link status of another slave changed. Fix this by using a temporary pointer to store return value of {of_}phy_connect() and do a one-time write to slave->phy. Reviewed-by: Grygorii Strashko Reported-by: Yan Liu Signed-off-by: Sekhar Nori Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 58cdc066ef2c..fa674a8bda0c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1267,6 +1267,7 @@ static void soft_reset_slave(struct cpsw_slave *slave) static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) { u32 slave_port; + struct phy_device *phy; struct cpsw_common *cpsw = priv->cpsw; soft_reset_slave(slave); @@ -1300,27 +1301,28 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); if (slave->data->phy_node) { - slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node, + phy = of_phy_connect(priv->ndev, slave->data->phy_node, &cpsw_adjust_link, 0, slave->data->phy_if); - if (!slave->phy) { + if (!phy) { dev_err(priv->dev, "phy \"%s\" not found on slave %d\n", slave->data->phy_node->full_name, slave->slave_num); return; } } else { - slave->phy = phy_connect(priv->ndev, slave->data->phy_id, + phy = phy_connect(priv->ndev, slave->data->phy_id, &cpsw_adjust_link, slave->data->phy_if); - if (IS_ERR(slave->phy)) { + if (IS_ERR(phy)) { dev_err(priv->dev, "phy \"%s\" not found on slave %d, err %ld\n", slave->data->phy_id, slave->slave_num, - PTR_ERR(slave->phy)); - slave->phy = NULL; + PTR_ERR(phy)); return; } } + slave->phy = phy; + phy_attached_info(slave->phy); phy_start(slave->phy); From 3ebfdf082184d04f6e73b30cd9446613dc7f8c02 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 4 Apr 2017 13:39:55 +0800 Subject: [PATCH 79/82] sctp: get sock from transport in sctp_transport_update_pmtu This patch is almost to revert commit 02f3d4ce9e81 ("sctp: Adjust PMTU updates to accomodate route invalidation."). As t->asoc can't be NULL in sctp_transport_update_pmtu, it could get sk from asoc, and no need to pass sk into that function. It is also to remove some duplicated codes from that function. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 5 ++--- include/net/sctp/structs.h | 6 +++--- net/sctp/associola.c | 6 +++--- net/sctp/input.c | 4 ++-- net/sctp/output.c | 4 ++-- net/sctp/socket.c | 6 +++--- net/sctp/transport.c | 19 +++++++------------ 7 files changed, 22 insertions(+), 28 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d75caa7a629b..069582ee5d7f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -448,10 +448,9 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) return frag; } -static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc) +static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) { - - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); asoc->pmtu_pending = 0; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a127b7c2c3c9..138f8615acf0 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -952,8 +952,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); -void sctp_transport_reset(struct sctp_transport *); -void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); +void sctp_transport_reset(struct sctp_transport *t); +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); @@ -1954,7 +1954,7 @@ void sctp_assoc_update(struct sctp_association *old, __u32 sctp_association_get_next_tsn(struct sctp_association *); -void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *); +void sctp_assoc_sync_pmtu(struct sctp_association *asoc); void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); void sctp_assoc_set_primary(struct sctp_association *, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0b26df5f6188..a9708da28eb5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1412,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc, /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ -void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) +void sctp_assoc_sync_pmtu(struct sctp_association *asoc) { struct sctp_transport *t; __u32 pmtu = 0; @@ -1424,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, - SCTP_TRUNC4(dst_mtu(t->dst))); + sctp_transport_update_pmtu( + t, SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index 2a28ab20487f..0e06a278d2a9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ - sctp_transport_update_pmtu(sk, t, pmtu); + sctp_transport_update_pmtu(t, pmtu); /* Update association pmtu. */ - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } /* Retransmit with the new pmtu setting. diff --git a/net/sctp/output.c b/net/sctp/output.c index ec4d50a38713..1409a875ad8e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -105,10 +105,10 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); if (asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } else if (!sctp_transport_pmtu_check(tp)) { if (asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } /* If there a is a prepend chunk stick it on the list before diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 12fbae2c1002..c1401f43d40f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(sk, asoc); + sctp_assoc_pending_pmtu(asoc); /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D @@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; } else { @@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } } else if (asoc) { asoc->param_flags = diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3379668af368..721eeebfcd8a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu) +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { - struct dst_entry *dst; + struct dst_entry *dst = sctp_transport_dst_check(t); if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ @@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->pathmtu = pmtu; } - dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); - if (dst) { - dst->ops->update_pmtu(dst, sk, NULL, pmtu); - + dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); } + + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); } /* Caches the dst entry and source address for a transport's destination From ecde8f36f8a05a023b9d026e9094571aab421d36 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 4 Apr 2017 14:15:39 -0700 Subject: [PATCH 80/82] tcp: fix lost retransmit SNMP under-counting The lost retransmit SNMP stat is under-counting retransmission that uses segment offloading. This patch fixes that so all retransmission related SNMP counters are consistent. Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 4ecb38ae8504..d8acbd9f477a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) /* Account for retransmits that are lost again */ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, + tcp_skb_pcount(skb)); } } From 2d2517ee314ef1de0517f74d06c2825fbf597ba3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 4 Apr 2017 14:15:40 -0700 Subject: [PATCH 81/82] tcp: fix reordering SNMP under-counting Currently the reordering SNMP counters only increase if a connection sees a higher degree then it has previously seen. It ignores if the reordering degree is not greater than the default system threshold. This significantly under-counts the number of reordering events and falsely convey that reordering is rare on the network. This patch properly and faithfully records the number of reordering events detected by the TCP stack, just like the comment says "this exciting event is worth to be remembered". Note that even so TCP still under-estimate the actual reordering events because TCP requires TS options or certain packet sequences to detect reordering (i.e. ACKing never-retransmitted sequence in recovery or disordered state). Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 97ac6776e47d..2c1f59386a7b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -878,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric, const int ts) { struct tcp_sock *tp = tcp_sk(sk); - if (metric > tp->reordering) { - int mib_idx; + int mib_idx; + if (metric > tp->reordering) { tp->reordering = min(sysctl_tcp_max_reordering, metric); - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -906,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } tp->rack.reord = 1; + + /* This exciting event is worth to be remembered. 8) */ + if (ts) + mib_idx = LINUX_MIB_TCPTSREORDER; + else if (tcp_is_reno(tp)) + mib_idx = LINUX_MIB_TCPRENOREORDER; + else if (tcp_is_fack(tp)) + mib_idx = LINUX_MIB_TCPFACKREORDER; + else + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS(sock_net(sk), mib_idx); } /* This must be called before lost_out is incremented */ From c383bdd14f91562babd269aa7c36b46fee7b6c75 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 4 Apr 2017 15:56:55 -0700 Subject: [PATCH 82/82] nfp: fix potential use after free on xdp prog We should unregister the net_device first, before we give back our reference on xdp_prog. Otherwise xdp_prog may be freed before .ndo_stop() disabled the datapath. Found by code inspection. Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 9179a99563af..a41377e26c07 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3275,9 +3275,10 @@ void nfp_net_netdev_clean(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); + unregister_netdev(nn->netdev); + if (nn->xdp_prog) bpf_prog_put(nn->xdp_prog); if (nn->bpf_offload_xdp) nfp_net_xdp_offload(nn, NULL); - unregister_netdev(nn->netdev); }