From c4e45c84270798b1d51ede9a8c6810378ccfe162 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Mon, 30 Oct 2017 17:38:43 +0200 Subject: [PATCH 01/67] iwlwifi: mvm: set correct chains in Rx status ieee80211_rx_status::chains was always set to zero. That caused rate scaling to always start with the lowest rate possible (rs_get_initial_rate). Set it correctly according to the MPDU response. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 76dc58381e1c..20fe23fbf040 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -213,6 +213,7 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm, struct ieee80211_rx_status *rx_status) { int energy_a, energy_b, max_energy; + u32 rate_flags = le32_to_cpu(desc->rate_n_flags); energy_a = desc->energy_a; energy_a = energy_a ? -energy_a : S8_MIN; @@ -224,7 +225,8 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm, energy_a, energy_b, max_energy); rx_status->signal = max_energy; - rx_status->chains = 0; /* TODO: phy info */ + rx_status->chains = + (rate_flags & RATE_MCS_ANT_AB_MSK) >> RATE_MCS_ANT_POS; rx_status->chain_signal[0] = energy_a; rx_status->chain_signal[1] = energy_b; rx_status->chain_signal[2] = S8_MIN; From 0b9832b712d6767d6c7b01965fd788d1ca84fc92 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 13 Nov 2017 09:50:47 +0200 Subject: [PATCH 02/67] iwlwifi: mvm: don't use transmit queue hang detection when it is not possible When we act as an AP, new firmware versions handle internally the power saving clients and the driver doesn't know that the peers went to sleep. It is, hence, possible that a peer goes to sleep for a long time and stop pulling frames. This will cause its transmit queue to hang which is a condition that triggers the recovery flow in the driver. While this client is certainly buggy (it should have pulled the frame based on the TIM IE in the beacon), we can't blow up because of a buggy client. Change the current implementation to not enable the transmit queue hang detection on queues that serve peers when we act as an AP / GO. We can still enable this mechanism using the debug configuration which can come in handy when we want to debug why the client doesn't wake up. Cc: stable@vger.kernel.org # v4.13 Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index d46115e2d69e..19c1d1f76e15 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1134,9 +1134,18 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, unsigned int default_timeout = cmd_q ? IWL_DEF_WD_TIMEOUT : mvm->cfg->base_params->wd_timeout; - if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_TXQ_TIMERS)) + if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_TXQ_TIMERS)) { + /* + * We can't know when the station is asleep or awake, so we + * must disable the queue hang detection. + */ + if (fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_STA_PM_NOTIF) && + vif && vif->type == NL80211_IFTYPE_AP) + return IWL_WATCHDOG_DISABLED; return iwlmvm_mod_params.tfd_q_hang_detect ? default_timeout : IWL_WATCHDOG_DISABLED; + } trigger = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TXQ_TIMERS); txq_timer = (void *)trigger->data; From d1b275ffec459c5ae12b5c7086c84175696e5a9f Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 15 Nov 2017 14:12:30 +0200 Subject: [PATCH 03/67] iwlwifi: mvm: fix the TX queue hang timeout for MONITOR vif type The MONITOR type is missing in the interface type switch. Add it. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 19c1d1f76e15..03ffd84786ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1172,6 +1172,8 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, return le32_to_cpu(txq_timer->p2p_go); case NL80211_IFTYPE_P2P_DEVICE: return le32_to_cpu(txq_timer->p2p_device); + case NL80211_IFTYPE_MONITOR: + return default_timeout; default: WARN_ON(1); return mvm->cfg->base_params->wd_timeout; From b13f43a48571f0cd0fda271b5046b65f1f268db5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 19 Nov 2017 10:35:14 +0200 Subject: [PATCH 04/67] iwlwifi: mvm: fix packet injection We need to have a station and a queue for the monitor interface to be able to inject traffic. We used to have this traffic routed to the auxiliary queue, but this queue isn't scheduled for the station we had linked to the monitor vif. Allocate a new queue, link it to the monitor vif's station and make that queue use the BE fifo. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=196715 Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/fw/api/txq.h | 4 ++ .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 53 ++++++++++++++----- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +- 6 files changed, 49 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h index 87b4434224a1..dfa111bb411e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h @@ -68,6 +68,9 @@ * @IWL_MVM_DQA_CMD_QUEUE: a queue reserved for sending HCMDs to the FW * @IWL_MVM_DQA_AUX_QUEUE: a queue reserved for aux frames * @IWL_MVM_DQA_P2P_DEVICE_QUEUE: a queue reserved for P2P device frames + * @IWL_MVM_DQA_INJECT_MONITOR_QUEUE: a queue reserved for injection using + * monitor mode. Note this queue is the same as the queue for P2P device + * but we can't have active monitor mode along with P2P device anyway. * @IWL_MVM_DQA_GCAST_QUEUE: a queue reserved for P2P GO/SoftAP GCAST frames * @IWL_MVM_DQA_BSS_CLIENT_QUEUE: a queue reserved for BSS activity, to ensure * that we are never left without the possibility to connect to an AP. @@ -87,6 +90,7 @@ enum iwl_mvm_dqa_txq { IWL_MVM_DQA_CMD_QUEUE = 0, IWL_MVM_DQA_AUX_QUEUE = 1, IWL_MVM_DQA_P2P_DEVICE_QUEUE = 2, + IWL_MVM_DQA_INJECT_MONITOR_QUEUE = 2, IWL_MVM_DQA_GCAST_QUEUE = 3, IWL_MVM_DQA_BSS_CLIENT_QUEUE = 4, IWL_MVM_DQA_MIN_MGMT_QUEUE = 5, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index a2bf530eeae4..2f22e14e00fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -787,7 +787,7 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm, u32 action) { struct iwl_mac_ctx_cmd cmd = {}; - u32 tfd_queue_msk = 0; + u32 tfd_queue_msk = BIT(mvm->snif_queue); int ret; WARN_ON(vif->type != NL80211_IFTYPE_MONITOR); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 4575595ab022..6a9a25beab3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -972,6 +972,7 @@ struct iwl_mvm { /* Tx queues */ u16 aux_queue; + u16 snif_queue; u16 probe_queue; u16 p2p_dev_queue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 7078b7e458be..45470b6b351a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -624,6 +624,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, mvm->fw_restart = iwlwifi_mod_params.fw_restart ? -1 : 0; mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE; + mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE; mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index c19f98489d4e..1add5615fc3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1709,29 +1709,29 @@ void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta) sta->sta_id = IWL_MVM_INVALID_STA; } -static void iwl_mvm_enable_aux_queue(struct iwl_mvm *mvm) +static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 *queue, + u8 sta_id, u8 fifo) { unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ? mvm->cfg->base_params->wd_timeout : IWL_WATCHDOG_DISABLED; if (iwl_mvm_has_new_tx_api(mvm)) { - int queue = iwl_mvm_tvqm_enable_txq(mvm, mvm->aux_queue, - mvm->aux_sta.sta_id, - IWL_MAX_TID_COUNT, - wdg_timeout); - mvm->aux_queue = queue; + int tvqm_queue = + iwl_mvm_tvqm_enable_txq(mvm, *queue, sta_id, + IWL_MAX_TID_COUNT, + wdg_timeout); + *queue = tvqm_queue; } else { struct iwl_trans_txq_scd_cfg cfg = { - .fifo = IWL_MVM_TX_FIFO_MCAST, - .sta_id = mvm->aux_sta.sta_id, + .fifo = fifo, + .sta_id = sta_id, .tid = IWL_MAX_TID_COUNT, .aggregate = false, .frame_limit = IWL_FRAME_LIMIT, }; - iwl_mvm_enable_txq(mvm, mvm->aux_queue, mvm->aux_queue, 0, &cfg, - wdg_timeout); + iwl_mvm_enable_txq(mvm, *queue, *queue, 0, &cfg, wdg_timeout); } } @@ -1750,7 +1750,9 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm) /* Map Aux queue to fifo - needs to happen before adding Aux station */ if (!iwl_mvm_has_new_tx_api(mvm)) - iwl_mvm_enable_aux_queue(mvm); + iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue, + mvm->aux_sta.sta_id, + IWL_MVM_TX_FIFO_MCAST); ret = iwl_mvm_add_int_sta_common(mvm, &mvm->aux_sta, NULL, MAC_INDEX_AUX, 0); @@ -1764,7 +1766,9 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm) * to firmware so enable queue here - after the station was added */ if (iwl_mvm_has_new_tx_api(mvm)) - iwl_mvm_enable_aux_queue(mvm); + iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue, + mvm->aux_sta.sta_id, + IWL_MVM_TX_FIFO_MCAST); return 0; } @@ -1772,10 +1776,31 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm) int iwl_mvm_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int ret; lockdep_assert_held(&mvm->mutex); - return iwl_mvm_add_int_sta_common(mvm, &mvm->snif_sta, vif->addr, + + /* Map snif queue to fifo - must happen before adding snif station */ + if (!iwl_mvm_has_new_tx_api(mvm)) + iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue, + mvm->snif_sta.sta_id, + IWL_MVM_TX_FIFO_BE); + + ret = iwl_mvm_add_int_sta_common(mvm, &mvm->snif_sta, vif->addr, mvmvif->id, 0); + if (ret) + return ret; + + /* + * For 22000 firmware and on we cannot add queue to a station unknown + * to firmware so enable queue here - after the station was added + */ + if (iwl_mvm_has_new_tx_api(mvm)) + iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue, + mvm->snif_sta.sta_id, + IWL_MVM_TX_FIFO_BE); + + return 0; } int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) @@ -1784,6 +1809,8 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) lockdep_assert_held(&mvm->mutex); + iwl_mvm_disable_txq(mvm, mvm->snif_queue, mvm->snif_queue, + IWL_MAX_TID_COUNT, 0); ret = iwl_mvm_rm_sta_common(mvm, mvm->snif_sta.sta_id); if (ret) IWL_WARN(mvm, "Failed sending remove station\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 593b7f97b29c..333bcb75b8af 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -657,7 +657,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) if (ap_sta_id != IWL_MVM_INVALID_STA) sta_id = ap_sta_id; } else if (info.control.vif->type == NL80211_IFTYPE_MONITOR) { - queue = mvm->aux_queue; + queue = mvm->snif_queue; + sta_id = mvm->snif_sta.sta_id; } } From f3402d6d8eb084f6c2563dee1a09385b7c0ea2d5 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 20 Nov 2017 11:37:18 +0200 Subject: [PATCH 05/67] iwlwifi: pcie: fix erroneous "Read failed message" Current pci dumping code code is always falling to the error path, resulting with a constant "Read failed" message, also for the successful reads. Fixes: a5c932e41fdd ("iwlwifi: pcie: dump registers when HW becomes inaccessible") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index b7a51603465b..3dee95e6a475 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -166,6 +166,7 @@ static void iwl_trans_pcie_dump_regs(struct iwl_trans *trans) print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0); } + goto out; err_read: print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0); From 0232d2cd7aa8e1b810fe84fb4059a0bd1eabe2ba Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 20 Nov 2017 13:25:05 +0200 Subject: [PATCH 06/67] iwlwifi: fix access to prph when transport is stopped When getting HW rfkill we get stop_device being called from two paths. One path is the IRQ calling stop device, and updating op mode and stack. As a result, cfg80211 is running rfkill sync work that shuts down all devices (second path). In the second path, we eventually get to iwl_mvm_stop_device which calls iwl_fw_dump_conf_clear->iwl_fw_dbg_stop_recording, that access periphery registers. The device may be stopped at this point from the first path, which will result with a failure to access those registers. Simply checking for the trans status is insufficient, since the race will still exist, only minimized. Instead, move the stop from iwl_fw_dump_conf_clear (which is getting called only from stop path) to the transport stop device function, where the access is always safe. This has the added value, of actually stopping dbgc before stopping device even when the stop is initiated from the transport. Fixes: 1efc3843a4ee ("iwlwifi: stop dbgc recording before stopping DMA") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 2 -- drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 6 ++++++ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 9 +++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 9c889a32fe24..223fb77a3aa9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -209,8 +209,6 @@ static inline void iwl_fw_dbg_stop_recording(struct iwl_fw_runtime *fwrt) static inline void iwl_fw_dump_conf_clear(struct iwl_fw_runtime *fwrt) { - iwl_fw_dbg_stop_recording(fwrt); - fwrt->dump.conf = FW_DBG_INVALID; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index c59f4581e972..ac05fd1e74c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -49,6 +49,7 @@ * *****************************************************************************/ #include "iwl-trans.h" +#include "iwl-prph.h" #include "iwl-context-info.h" #include "internal.h" @@ -156,6 +157,11 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans, bool low_power) trans_pcie->is_down = true; + /* Stop dbgc before stopping device */ + iwl_write_prph(trans, DBGC_IN_SAMPLE, 0); + udelay(100); + iwl_write_prph(trans, DBGC_OUT_CTRL, 0); + /* tell the device to stop sending interrupts */ iwl_disable_interrupts(trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 3dee95e6a475..4541c86881d6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1227,6 +1227,15 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) trans_pcie->is_down = true; + /* Stop dbgc before stopping device */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { + iwl_set_bits_prph(trans, MON_BUFF_SAMPLE_CTL, 0x100); + } else { + iwl_write_prph(trans, DBGC_IN_SAMPLE, 0); + udelay(100); + iwl_write_prph(trans, DBGC_OUT_CTRL, 0); + } + /* tell the device to stop sending interrupts */ iwl_disable_interrupts(trans); From 5c3de777bdaf48bd0cfb43097c0d0fb85056cab7 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Sat, 25 Nov 2017 21:39:25 +0100 Subject: [PATCH 07/67] brcmfmac: change driver unbind order of the sdio function devices In the function brcmf_sdio_firmware_callback() the driver is unbound from the sdio function devices in the error path. However, the order in which it is done resulted in a use-after-free issue (see brcmf_ops_sdio_remove() in bcmsdh.c). Hence change the order and first unbind sdio function #2 device and then unbind sdio function #1 device. Cc: stable@vger.kernel.org # v4.12.x Fixes: 7a51461fc2da ("brcmfmac: unbind all devices upon failure in firmware callback") Reported-by: Stefan Wahren Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 310c4e2746aa..adf180f338ca 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4121,8 +4121,8 @@ release: sdio_release_host(sdiodev->func[1]); fail: brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err); - device_release_driver(dev); device_release_driver(&sdiodev->func[2]->dev); + device_release_driver(dev); } struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) From c895f6f703ad7dd2f99e751d9884b0aa5d0eea25 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:44 +0100 Subject: [PATCH 08/67] bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type Commit 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") introduced the bpf_perf_event_data structure which exports the pt_regs structure. This is OK for multiple architectures but fail for s390 and arm64 which do not export pt_regs. Programs using them, for example, the bpf selftest fail to compile on these architectures. For s390, exporting the pt_regs is not an option because s390 wants to allow changes to it. For arm64, there is a user_pt_regs structure that covers parts of the pt_regs structure for use by user space. To solve the broken uapi for s390 and arm64, introduce an abstract type for pt_regs and add an asm/bpf_perf_event.h file that concretes the type. An asm-generic header file covers the architectures that export pt_regs today. The arch-specific enablement for s390 and arm64 follows in separate commits. Reported-by: Thomas Richter Fixes: 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnd Bergmann Cc: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/alpha/include/uapi/asm/Kbuild | 2 ++ arch/arc/include/uapi/asm/Kbuild | 1 + arch/arm/include/uapi/asm/Kbuild | 1 + arch/blackfin/include/uapi/asm/Kbuild | 1 + arch/c6x/include/uapi/asm/Kbuild | 1 + arch/cris/include/uapi/asm/Kbuild | 1 + arch/frv/include/uapi/asm/Kbuild | 2 ++ arch/h8300/include/uapi/asm/Kbuild | 1 + arch/hexagon/include/uapi/asm/Kbuild | 1 + arch/ia64/include/uapi/asm/Kbuild | 1 + arch/m32r/include/uapi/asm/Kbuild | 1 + arch/m68k/include/uapi/asm/Kbuild | 1 + arch/metag/include/uapi/asm/Kbuild | 1 + arch/microblaze/include/uapi/asm/Kbuild | 1 + arch/mips/include/uapi/asm/Kbuild | 1 + arch/mn10300/include/uapi/asm/Kbuild | 1 + arch/nios2/include/uapi/asm/Kbuild | 1 + arch/openrisc/include/uapi/asm/Kbuild | 1 + arch/parisc/include/uapi/asm/Kbuild | 1 + arch/powerpc/include/uapi/asm/Kbuild | 1 + arch/riscv/include/uapi/asm/Kbuild | 1 + arch/score/include/uapi/asm/Kbuild | 1 + arch/sh/include/uapi/asm/Kbuild | 1 + arch/sparc/include/uapi/asm/Kbuild | 1 + arch/tile/include/uapi/asm/Kbuild | 1 + arch/unicore32/include/uapi/asm/Kbuild | 1 + arch/x86/include/uapi/asm/Kbuild | 1 + arch/xtensa/include/uapi/asm/Kbuild | 1 + include/linux/perf_event.h | 6 +++++- include/uapi/asm-generic/bpf_perf_event.h | 9 +++++++++ include/uapi/linux/bpf_perf_event.h | 5 ++--- kernel/events/core.c | 2 +- 32 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 include/uapi/asm-generic/bpf_perf_event.h diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild index b15bf6bc0e94..14a2e9af97e9 100644 --- a/arch/alpha/include/uapi/asm/Kbuild +++ b/arch/alpha/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += bpf_perf_event.h diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild index fa6d0ff4ff89..170b5db64afe 100644 --- a/arch/arc/include/uapi/asm/Kbuild +++ b/arch/arc/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 4d53de308ee0..4d1cc1847edf 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -7,6 +7,7 @@ generated-y += unistd-oabi.h generated-y += unistd-eabi.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild index aa624b4ab655..2240b38c2915 100644 --- a/arch/blackfin/include/uapi/asm/Kbuild +++ b/arch/blackfin/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 67ee896a76a7..26644e15d854 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index 3687b54bb18e..3470c6e9c7b9 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild index b15bf6bc0e94..14a2e9af97e9 100644 --- a/arch/frv/include/uapi/asm/Kbuild +++ b/arch/frv/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += bpf_perf_event.h diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index 187aed820e71..2f65f78792cb 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild index cb5df3aad3a8..41a176dbb53e 100644 --- a/arch/hexagon/include/uapi/asm/Kbuild +++ b/arch/hexagon/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild index 13a97aa2285f..f5c6967a93bb 100644 --- a/arch/ia64/include/uapi/asm/Kbuild +++ b/arch/ia64/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += kvm_para.h diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild index 1c44d3b3eba0..451bf6071c6e 100644 --- a/arch/m32r/include/uapi/asm/Kbuild +++ b/arch/m32r/include/uapi/asm/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += kvm_para.h generic-y += siginfo.h diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index 3717b64a620d..c2e26a44c482 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild index 6ac763d9a3e3..f9eaf07d29f8 100644 --- a/arch/metag/include/uapi/asm/Kbuild +++ b/arch/metag/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 06609ca36115..2c6a6bffea32 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild index a0266feba9e6..7a4becd8963a 100644 --- a/arch/mips/include/uapi/asm/Kbuild +++ b/arch/mips/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += ipcbuf.h diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild index c94ee54210bc..81271d3af47c 100644 --- a/arch/mn10300/include/uapi/asm/Kbuild +++ b/arch/mn10300/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += siginfo.h diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild index ffca24da7647..13a3d77b4d7b 100644 --- a/arch/nios2/include/uapi/asm/Kbuild +++ b/arch/nios2/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index 62286dbeb904..130c16ccba0a 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild index 196d2a4efb31..286ef5a5904b 100644 --- a/arch/parisc/include/uapi/asm/Kbuild +++ b/arch/parisc/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += kvm_para.h generic-y += param.h generic-y += poll.h diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index 0d960ef78a9a..1a6ed5919ffd 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += param.h generic-y += poll.h generic-y += resource.h diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild index 5ded96b06352..7e91f4850475 100644 --- a/arch/riscv/include/uapi/asm/Kbuild +++ b/arch/riscv/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += setup.h generic-y += unistd.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild index c94ee54210bc..81271d3af47c 100644 --- a/arch/score/include/uapi/asm/Kbuild +++ b/arch/score/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += siginfo.h diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild index e28531333efa..ba4d39cb321d 100644 --- a/arch/sh/include/uapi/asm/Kbuild +++ b/arch/sh/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild index 2178c78c7c1a..4680ba246b55 100644 --- a/arch/sparc/include/uapi/asm/Kbuild +++ b/arch/sparc/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += types.h diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild index 5711de0a1b5e..cc439612bcd5 100644 --- a/arch/tile/include/uapi/asm/Kbuild +++ b/arch/tile/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 759a71411169..8611ef980554 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index da1489cb64dc..1e901e421f2d 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generated-y += unistd_32.h generated-y += unistd_64.h generated-y += unistd_x32.h diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild index a5bcdfb890f1..837d4dd76785 100644 --- a/arch/xtensa/include/uapi/asm/Kbuild +++ b/arch/xtensa/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2c9c87d8a0c1..7546822a1d74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -15,6 +15,7 @@ #define _LINUX_PERF_EVENT_H #include +#include /* * Kernel-internal data types and definitions: @@ -787,7 +788,7 @@ struct perf_output_handle { }; struct bpf_perf_event_data_kern { - struct pt_regs *regs; + bpf_user_pt_regs_t *regs; struct perf_sample_data *data; struct perf_event *event; }; @@ -1177,6 +1178,9 @@ extern void perf_bp_event(struct perf_event *event, void *data); (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) # define perf_instruction_pointer(regs) instruction_pointer(regs) #endif +#ifndef perf_arch_bpf_user_pt_regs +# define perf_arch_bpf_user_pt_regs(regs) regs +#endif static inline bool has_branch_stack(struct perf_event *event) { diff --git a/include/uapi/asm-generic/bpf_perf_event.h b/include/uapi/asm-generic/bpf_perf_event.h new file mode 100644 index 000000000000..53815d2cd047 --- /dev/null +++ b/include/uapi/asm-generic/bpf_perf_event.h @@ -0,0 +1,9 @@ +#ifndef _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ + +#include + +/* Export kernel pt_regs structure */ +typedef struct pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ */ diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h index af549d4ecf1b..8f95303f9d80 100644 --- a/include/uapi/linux/bpf_perf_event.h +++ b/include/uapi/linux/bpf_perf_event.h @@ -8,11 +8,10 @@ #ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ #define _UAPI__LINUX_BPF_PERF_EVENT_H__ -#include -#include +#include struct bpf_perf_event_data { - struct pt_regs regs; + bpf_user_pt_regs_t regs; __u64 sample_period; }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 16beab4767e1..ba957b9812b3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7987,11 +7987,11 @@ static void bpf_overflow_handler(struct perf_event *event, { struct bpf_perf_event_data_kern ctx = { .data = data, - .regs = regs, .event = event, }; int ret = 0; + ctx.regs = perf_arch_bpf_user_pt_regs(regs); preempt_disable(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) goto out; From 466698e654e8931945301ea999feb6bd4bfaf849 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:45 +0100 Subject: [PATCH 09/67] s390/bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type To mitigate and correct the broken uapi for the BPF_PROG_TYPE_PERF_EVENT program type, introduce a user_pt_regs structure (similar to arm64) that exports parts from the beginnig of the pt_regs structure. The export must start with the beginning of the pt_regs structure because to correctly calculate BPF prologues for perf (regs_query_register_offset()). For BPF_PROG_TYPE_PERF_EVENT program types, the BPF program is then passed a user_pt_regs structure. Note: Depending on future changes to the s390 pt_regs structure, consider the user_pt_regs structure to be stable for a particular kernel version only. (Of course, s390 tries to ensure keep it stable as much as possible.) Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/s390/include/asm/perf_event.h | 1 + arch/s390/include/asm/ptrace.h | 11 ++++++++--- arch/s390/include/uapi/asm/bpf_perf_event.h | 9 +++++++++ arch/s390/include/uapi/asm/ptrace.h | 11 +++++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 arch/s390/include/uapi/asm/bpf_perf_event.h diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index d6c9d1e0dc2d..b9c0e361748b 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -40,6 +40,7 @@ struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs /* Perf pt_regs extension for sample-data-entry indicators */ struct perf_sf_sde_regs { diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index a3788dafc0e1..6f70d81c40f2 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -74,9 +74,14 @@ enum { */ struct pt_regs { - unsigned long args[1]; - psw_t psw; - unsigned long gprs[NUM_GPRS]; + union { + user_pt_regs user_regs; + struct { + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; + }; + }; unsigned long orig_gpr2; unsigned int int_code; unsigned int int_parm; diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..cefe7c7cd4f6 --- /dev/null +++ b/arch/s390/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 0d23c8ff2900..70f7cb2e5e50 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -290,6 +290,17 @@ typedef struct unsigned long orig_gpr2; } s390_regs; +/* + * The user_pt_regs structure exports the beginning of + * the in-kernel pt_regs structure to user space. + */ +typedef struct +{ + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; +} user_pt_regs; + /* * Now for the user space program event recording (trace) definitions. * The following structures are used only for the ptrace interface, don't From a39cada70268aadff7153e4f782bcd90a5c69d07 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:46 +0100 Subject: [PATCH 10/67] arm64/bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type Correct the broken uapi for the BPF_PROG_TYPE_PERF_EVENT program type by exporting the user_pt_regs structure instead of the pt_regs structure that is in-kernel only. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Will Deacon Cc: Mark Rutland Cc: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann --- arch/arm64/include/asm/perf_event.h | 2 ++ arch/arm64/include/uapi/asm/bpf_perf_event.h | 9 +++++++++ 2 files changed, 11 insertions(+) create mode 100644 arch/arm64/include/uapi/asm/bpf_perf_event.h diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 8d5cbec17d80..f9ccc36d3dc3 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -18,6 +18,7 @@ #define __ASM_PERF_EVENT_H #include +#include #define ARMV8_PMU_MAX_COUNTERS 32 #define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) @@ -79,6 +80,7 @@ struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs #endif #define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ From 62e1dfa3e1e32fbc351548e7fae2d97c46d25a33 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:47 +0100 Subject: [PATCH 11/67] s390/uapi: correct whitespace & coding style in asm/ptrace.h Correct whitespace and coding style issues in the s390 asm/ptrace.h uapi header file. This is preparatory work to copy it to the tools/ directory for inclusion by selftests and perf. Signed-off-by: Hendrik Brueckner Signed-off-by: Daniel Borkmann --- arch/s390/include/uapi/asm/ptrace.h | 118 ++++++++++++---------------- 1 file changed, 52 insertions(+), 66 deletions(-) diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 70f7cb2e5e50..543dd70e12c8 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -162,7 +162,7 @@ #define GPR_SIZE 8 #define CR_SIZE 8 -#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ +#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ #endif /* __s390x__ */ @@ -179,17 +179,16 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_OLDSETOPTIONS 21 #ifndef __ASSEMBLY__ #include #include -typedef union -{ - float f; - double d; - __u64 ui; +typedef union { + float f; + double d; + __u64 ui; struct { __u32 hi; @@ -197,23 +196,21 @@ typedef union } fp; } freg_t; -typedef struct -{ - __u32 fpc; +typedef struct { + __u32 fpc; __u32 pad; - freg_t fprs[NUM_FPRS]; + freg_t fprs[NUM_FPRS]; } s390_fp_regs; -#define FPC_EXCEPTION_MASK 0xF8000000 -#define FPC_FLAGS_MASK 0x00F80000 -#define FPC_DXC_MASK 0x0000FF00 -#define FPC_RM_MASK 0x00000003 +#define FPC_EXCEPTION_MASK 0xF8000000 +#define FPC_FLAGS_MASK 0x00F80000 +#define FPC_DXC_MASK 0x0000FF00 +#define FPC_RM_MASK 0x00000003 /* this typedef defines how a Program Status Word looks like */ -typedef struct -{ - unsigned long mask; - unsigned long addr; +typedef struct { + unsigned long mask; + unsigned long addr; } __attribute__ ((aligned(8))) psw_t; #ifndef __s390x__ @@ -282,8 +279,7 @@ typedef struct /* * The s390_regs structure is used to define the elf_gregset_t. */ -typedef struct -{ +typedef struct { psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned int acrs[NUM_ACRS]; @@ -294,8 +290,7 @@ typedef struct * The user_pt_regs structure exports the beginning of * the in-kernel pt_regs structure to user space. */ -typedef struct -{ +typedef struct { unsigned long args[1]; psw_t psw; unsigned long gprs[NUM_GPRS]; @@ -307,19 +302,17 @@ typedef struct * touch or even look at it if you don't want to modify the user-space * ptrace interface. In particular stay away from it for in-kernel PER. */ -typedef struct -{ +typedef struct { unsigned long cr[NUM_CR_WORDS]; } per_cr_words; #define PER_EM_MASK 0xE8000000UL -typedef struct -{ +typedef struct { #ifdef __s390x__ - unsigned : 32; + unsigned : 32; #endif /* __s390x__ */ - unsigned em_branching : 1; + unsigned em_branching : 1; unsigned em_instruction_fetch : 1; /* * Switching on storage alteration automatically fixes @@ -328,44 +321,41 @@ typedef struct unsigned em_storage_alteration : 1; unsigned em_gpr_alt_unused : 1; unsigned em_store_real_address : 1; - unsigned : 3; + unsigned : 3; unsigned branch_addr_ctl : 1; - unsigned : 1; + unsigned : 1; unsigned storage_alt_space_ctl : 1; - unsigned : 21; + unsigned : 21; unsigned long starting_addr; unsigned long ending_addr; } per_cr_bits; -typedef struct -{ +typedef struct { unsigned short perc_atmid; unsigned long address; unsigned char access_id; } per_lowcore_words; -typedef struct -{ - unsigned perc_branching : 1; +typedef struct { + unsigned perc_branching : 1; unsigned perc_instruction_fetch : 1; unsigned perc_storage_alteration : 1; - unsigned perc_gpr_alt_unused : 1; + unsigned perc_gpr_alt_unused : 1; unsigned perc_store_real_address : 1; - unsigned : 3; - unsigned atmid_psw_bit_31 : 1; - unsigned atmid_validity_bit : 1; - unsigned atmid_psw_bit_32 : 1; - unsigned atmid_psw_bit_5 : 1; - unsigned atmid_psw_bit_16 : 1; - unsigned atmid_psw_bit_17 : 1; - unsigned si : 2; + unsigned : 3; + unsigned atmid_psw_bit_31 : 1; + unsigned atmid_validity_bit : 1; + unsigned atmid_psw_bit_32 : 1; + unsigned atmid_psw_bit_5 : 1; + unsigned atmid_psw_bit_16 : 1; + unsigned atmid_psw_bit_17 : 1; + unsigned si : 2; unsigned long address; - unsigned : 4; - unsigned access_id : 4; + unsigned : 4; + unsigned access_id : 4; } per_lowcore_bits; -typedef struct -{ +typedef struct { union { per_cr_words words; per_cr_bits bits; @@ -375,9 +365,9 @@ typedef struct * the kernel always sets them to zero. To enable single * stepping use ptrace(PTRACE_SINGLESTEP) instead. */ - unsigned single_step : 1; + unsigned single_step : 1; unsigned instruction_fetch : 1; - unsigned : 30; + unsigned : 30; /* * These addresses are copied into cr10 & cr11 if single * stepping is switched off @@ -387,11 +377,10 @@ typedef struct union { per_lowcore_words words; per_lowcore_bits bits; - } lowcore; + } lowcore; } per_struct; -typedef struct -{ +typedef struct { unsigned int len; unsigned long kernel_addr; unsigned long process_addr; @@ -401,12 +390,12 @@ typedef struct * S/390 specific non posix ptrace requests. I chose unusual values so * they are unlikely to clash with future ptrace definitions. */ -#define PTRACE_PEEKUSR_AREA 0x5000 -#define PTRACE_POKEUSR_AREA 0x5001 +#define PTRACE_PEEKUSR_AREA 0x5000 +#define PTRACE_POKEUSR_AREA 0x5001 #define PTRACE_PEEKTEXT_AREA 0x5002 #define PTRACE_PEEKDATA_AREA 0x5003 #define PTRACE_POKETEXT_AREA 0x5004 -#define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 #define PTRACE_PEEK_SYSTEM_CALL 0x5007 #define PTRACE_POKE_SYSTEM_CALL 0x5008 @@ -424,21 +413,19 @@ typedef struct * PT_PROT definition is loosely based on hppa bsd definition in * gdb/hppab-nat.c */ -#define PTRACE_PROT 21 +#define PTRACE_PROT 21 -typedef enum -{ +typedef enum { ptprot_set_access_watchpoint, ptprot_set_write_watchpoint, ptprot_disable_watchpoint } ptprot_flags; -typedef struct -{ +typedef struct { unsigned long lowaddr; unsigned long hiaddr; ptprot_flags prot; -} ptprot_area; +} ptprot_area; /* Sequence of bytes for breakpoint illegal instruction. */ #define S390_BREAKPOINT {0x0,0x1} @@ -450,8 +437,7 @@ typedef struct * The user_regs_struct defines the way the user registers are * store on the stack for signal handling. */ -struct user_regs_struct -{ +struct user_regs_struct { psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned int acrs[NUM_ACRS]; From 618e165b2a8e10765dd2a4f9866d118a474f0faf Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:48 +0100 Subject: [PATCH 12/67] selftests/bpf: sync kernel headers and introduce arch support in Makefile Synchronize the uapi kernel header files which solves the broken uapi export of pt_regs. Because of arch-specific uapi headers, extended the include path in the Makefile. With this change, the test_verifier program compiles and runs successfully on s390. Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Daniel Borkmann Cc: Shuah Khan Signed-off-by: Daniel Borkmann --- .../arm64/include/uapi/asm/bpf_perf_event.h | 9 + .../s390/include/uapi/asm/bpf_perf_event.h | 9 + tools/arch/s390/include/uapi/asm/ptrace.h | 457 ++++++++++++++++++ .../include/uapi/asm-generic/bpf_perf_event.h | 9 + tools/include/uapi/linux/bpf_perf_event.h | 6 +- tools/testing/selftests/bpf/Makefile | 14 +- 6 files changed, 500 insertions(+), 4 deletions(-) create mode 100644 tools/arch/arm64/include/uapi/asm/bpf_perf_event.h create mode 100644 tools/arch/s390/include/uapi/asm/bpf_perf_event.h create mode 100644 tools/arch/s390/include/uapi/asm/ptrace.h create mode 100644 tools/include/uapi/asm-generic/bpf_perf_event.h diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..cefe7c7cd4f6 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..543dd70e12c8 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/ptrace.h @@ -0,0 +1,457 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + */ + +#ifndef _UAPI_S390_PTRACE_H +#define _UAPI_S390_PTRACE_H + +/* + * Offsets in the user_regs_struct. They are used for the ptrace + * system call and in entry.S + */ +#ifndef __s390x__ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x04 +#define PT_GPR0 0x08 +#define PT_GPR1 0x0C +#define PT_GPR2 0x10 +#define PT_GPR3 0x14 +#define PT_GPR4 0x18 +#define PT_GPR5 0x1C +#define PT_GPR6 0x20 +#define PT_GPR7 0x24 +#define PT_GPR8 0x28 +#define PT_GPR9 0x2C +#define PT_GPR10 0x30 +#define PT_GPR11 0x34 +#define PT_GPR12 0x38 +#define PT_GPR13 0x3C +#define PT_GPR14 0x40 +#define PT_GPR15 0x44 +#define PT_ACR0 0x48 +#define PT_ACR1 0x4C +#define PT_ACR2 0x50 +#define PT_ACR3 0x54 +#define PT_ACR4 0x58 +#define PT_ACR5 0x5C +#define PT_ACR6 0x60 +#define PT_ACR7 0x64 +#define PT_ACR8 0x68 +#define PT_ACR9 0x6C +#define PT_ACR10 0x70 +#define PT_ACR11 0x74 +#define PT_ACR12 0x78 +#define PT_ACR13 0x7C +#define PT_ACR14 0x80 +#define PT_ACR15 0x84 +#define PT_ORIGGPR2 0x88 +#define PT_FPC 0x90 +/* + * A nasty fact of life that the ptrace api + * only supports passing of longs. + */ +#define PT_FPR0_HI 0x98 +#define PT_FPR0_LO 0x9C +#define PT_FPR1_HI 0xA0 +#define PT_FPR1_LO 0xA4 +#define PT_FPR2_HI 0xA8 +#define PT_FPR2_LO 0xAC +#define PT_FPR3_HI 0xB0 +#define PT_FPR3_LO 0xB4 +#define PT_FPR4_HI 0xB8 +#define PT_FPR4_LO 0xBC +#define PT_FPR5_HI 0xC0 +#define PT_FPR5_LO 0xC4 +#define PT_FPR6_HI 0xC8 +#define PT_FPR6_LO 0xCC +#define PT_FPR7_HI 0xD0 +#define PT_FPR7_LO 0xD4 +#define PT_FPR8_HI 0xD8 +#define PT_FPR8_LO 0XDC +#define PT_FPR9_HI 0xE0 +#define PT_FPR9_LO 0xE4 +#define PT_FPR10_HI 0xE8 +#define PT_FPR10_LO 0xEC +#define PT_FPR11_HI 0xF0 +#define PT_FPR11_LO 0xF4 +#define PT_FPR12_HI 0xF8 +#define PT_FPR12_LO 0xFC +#define PT_FPR13_HI 0x100 +#define PT_FPR13_LO 0x104 +#define PT_FPR14_HI 0x108 +#define PT_FPR14_LO 0x10C +#define PT_FPR15_HI 0x110 +#define PT_FPR15_LO 0x114 +#define PT_CR_9 0x118 +#define PT_CR_10 0x11C +#define PT_CR_11 0x120 +#define PT_IEEE_IP 0x13C +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x140-1 + +#define GPR_SIZE 4 +#define CR_SIZE 4 + +#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ + +#else /* __s390x__ */ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x08 +#define PT_GPR0 0x10 +#define PT_GPR1 0x18 +#define PT_GPR2 0x20 +#define PT_GPR3 0x28 +#define PT_GPR4 0x30 +#define PT_GPR5 0x38 +#define PT_GPR6 0x40 +#define PT_GPR7 0x48 +#define PT_GPR8 0x50 +#define PT_GPR9 0x58 +#define PT_GPR10 0x60 +#define PT_GPR11 0x68 +#define PT_GPR12 0x70 +#define PT_GPR13 0x78 +#define PT_GPR14 0x80 +#define PT_GPR15 0x88 +#define PT_ACR0 0x90 +#define PT_ACR1 0x94 +#define PT_ACR2 0x98 +#define PT_ACR3 0x9C +#define PT_ACR4 0xA0 +#define PT_ACR5 0xA4 +#define PT_ACR6 0xA8 +#define PT_ACR7 0xAC +#define PT_ACR8 0xB0 +#define PT_ACR9 0xB4 +#define PT_ACR10 0xB8 +#define PT_ACR11 0xBC +#define PT_ACR12 0xC0 +#define PT_ACR13 0xC4 +#define PT_ACR14 0xC8 +#define PT_ACR15 0xCC +#define PT_ORIGGPR2 0xD0 +#define PT_FPC 0xD8 +#define PT_FPR0 0xE0 +#define PT_FPR1 0xE8 +#define PT_FPR2 0xF0 +#define PT_FPR3 0xF8 +#define PT_FPR4 0x100 +#define PT_FPR5 0x108 +#define PT_FPR6 0x110 +#define PT_FPR7 0x118 +#define PT_FPR8 0x120 +#define PT_FPR9 0x128 +#define PT_FPR10 0x130 +#define PT_FPR11 0x138 +#define PT_FPR12 0x140 +#define PT_FPR13 0x148 +#define PT_FPR14 0x150 +#define PT_FPR15 0x158 +#define PT_CR_9 0x160 +#define PT_CR_10 0x168 +#define PT_CR_11 0x170 +#define PT_IEEE_IP 0x1A8 +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x1B0-1 + +#define GPR_SIZE 8 +#define CR_SIZE 8 + +#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ + +#endif /* __s390x__ */ + +#define NUM_GPRS 16 +#define NUM_FPRS 16 +#define NUM_CRS 16 +#define NUM_ACRS 16 + +#define NUM_CR_WORDS 3 + +#define FPR_SIZE 8 +#define FPC_SIZE 4 +#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ +#define ACR_SIZE 4 + + +#define PTRACE_OLDSETOPTIONS 21 + +#ifndef __ASSEMBLY__ +#include +#include + +typedef union { + float f; + double d; + __u64 ui; + struct + { + __u32 hi; + __u32 lo; + } fp; +} freg_t; + +typedef struct { + __u32 fpc; + __u32 pad; + freg_t fprs[NUM_FPRS]; +} s390_fp_regs; + +#define FPC_EXCEPTION_MASK 0xF8000000 +#define FPC_FLAGS_MASK 0x00F80000 +#define FPC_DXC_MASK 0x0000FF00 +#define FPC_RM_MASK 0x00000003 + +/* this typedef defines how a Program Status Word looks like */ +typedef struct { + unsigned long mask; + unsigned long addr; +} __attribute__ ((aligned(8))) psw_t; + +#ifndef __s390x__ + +#define PSW_MASK_PER 0x40000000UL +#define PSW_MASK_DAT 0x04000000UL +#define PSW_MASK_IO 0x02000000UL +#define PSW_MASK_EXT 0x01000000UL +#define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ +#define PSW_MASK_MCHECK 0x00040000UL +#define PSW_MASK_WAIT 0x00020000UL +#define PSW_MASK_PSTATE 0x00010000UL +#define PSW_MASK_ASC 0x0000C000UL +#define PSW_MASK_CC 0x00003000UL +#define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_RI 0x00000000UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x0000FF00UL + +#define PSW_ADDR_AMODE 0x80000000UL +#define PSW_ADDR_INSN 0x7FFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) + +#define PSW_ASC_PRIMARY 0x00000000UL +#define PSW_ASC_ACCREG 0x00004000UL +#define PSW_ASC_SECONDARY 0x00008000UL +#define PSW_ASC_HOME 0x0000C000UL + +#else /* __s390x__ */ + +#define PSW_MASK_PER 0x4000000000000000UL +#define PSW_MASK_DAT 0x0400000000000000UL +#define PSW_MASK_IO 0x0200000000000000UL +#define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL +#define PSW_MASK_KEY 0x00F0000000000000UL +#define PSW_MASK_MCHECK 0x0004000000000000UL +#define PSW_MASK_WAIT 0x0002000000000000UL +#define PSW_MASK_PSTATE 0x0001000000000000UL +#define PSW_MASK_ASC 0x0000C00000000000UL +#define PSW_MASK_CC 0x0000300000000000UL +#define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_RI 0x0000008000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x0000FF0180000000UL + +#define PSW_ADDR_AMODE 0x0000000000000000UL +#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) + +#define PSW_ASC_PRIMARY 0x0000000000000000UL +#define PSW_ASC_ACCREG 0x0000400000000000UL +#define PSW_ASC_SECONDARY 0x0000800000000000UL +#define PSW_ASC_HOME 0x0000C00000000000UL + +#endif /* __s390x__ */ + + +/* + * The s390_regs structure is used to define the elf_gregset_t. + */ +typedef struct { + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; +} s390_regs; + +/* + * The user_pt_regs structure exports the beginning of + * the in-kernel pt_regs structure to user space. + */ +typedef struct { + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; +} user_pt_regs; + +/* + * Now for the user space program event recording (trace) definitions. + * The following structures are used only for the ptrace interface, don't + * touch or even look at it if you don't want to modify the user-space + * ptrace interface. In particular stay away from it for in-kernel PER. + */ +typedef struct { + unsigned long cr[NUM_CR_WORDS]; +} per_cr_words; + +#define PER_EM_MASK 0xE8000000UL + +typedef struct { +#ifdef __s390x__ + unsigned : 32; +#endif /* __s390x__ */ + unsigned em_branching : 1; + unsigned em_instruction_fetch : 1; + /* + * Switching on storage alteration automatically fixes + * the storage alteration event bit in the users std. + */ + unsigned em_storage_alteration : 1; + unsigned em_gpr_alt_unused : 1; + unsigned em_store_real_address : 1; + unsigned : 3; + unsigned branch_addr_ctl : 1; + unsigned : 1; + unsigned storage_alt_space_ctl : 1; + unsigned : 21; + unsigned long starting_addr; + unsigned long ending_addr; +} per_cr_bits; + +typedef struct { + unsigned short perc_atmid; + unsigned long address; + unsigned char access_id; +} per_lowcore_words; + +typedef struct { + unsigned perc_branching : 1; + unsigned perc_instruction_fetch : 1; + unsigned perc_storage_alteration : 1; + unsigned perc_gpr_alt_unused : 1; + unsigned perc_store_real_address : 1; + unsigned : 3; + unsigned atmid_psw_bit_31 : 1; + unsigned atmid_validity_bit : 1; + unsigned atmid_psw_bit_32 : 1; + unsigned atmid_psw_bit_5 : 1; + unsigned atmid_psw_bit_16 : 1; + unsigned atmid_psw_bit_17 : 1; + unsigned si : 2; + unsigned long address; + unsigned : 4; + unsigned access_id : 4; +} per_lowcore_bits; + +typedef struct { + union { + per_cr_words words; + per_cr_bits bits; + } control_regs; + /* + * The single_step and instruction_fetch bits are obsolete, + * the kernel always sets them to zero. To enable single + * stepping use ptrace(PTRACE_SINGLESTEP) instead. + */ + unsigned single_step : 1; + unsigned instruction_fetch : 1; + unsigned : 30; + /* + * These addresses are copied into cr10 & cr11 if single + * stepping is switched off + */ + unsigned long starting_addr; + unsigned long ending_addr; + union { + per_lowcore_words words; + per_lowcore_bits bits; + } lowcore; +} per_struct; + +typedef struct { + unsigned int len; + unsigned long kernel_addr; + unsigned long process_addr; +} ptrace_area; + +/* + * S/390 specific non posix ptrace requests. I chose unusual values so + * they are unlikely to clash with future ptrace definitions. + */ +#define PTRACE_PEEKUSR_AREA 0x5000 +#define PTRACE_POKEUSR_AREA 0x5001 +#define PTRACE_PEEKTEXT_AREA 0x5002 +#define PTRACE_PEEKDATA_AREA 0x5003 +#define PTRACE_POKETEXT_AREA 0x5004 +#define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 +#define PTRACE_ENABLE_TE 0x5009 +#define PTRACE_DISABLE_TE 0x5010 +#define PTRACE_TE_ABORT_RAND 0x5011 + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in . + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ + +/* + * PT_PROT definition is loosely based on hppa bsd definition in + * gdb/hppab-nat.c + */ +#define PTRACE_PROT 21 + +typedef enum { + ptprot_set_access_watchpoint, + ptprot_set_write_watchpoint, + ptprot_disable_watchpoint +} ptprot_flags; + +typedef struct { + unsigned long lowaddr; + unsigned long hiaddr; + ptprot_flags prot; +} ptprot_area; + +/* Sequence of bytes for breakpoint illegal instruction. */ +#define S390_BREAKPOINT {0x0,0x1} +#define S390_BREAKPOINT_U16 ((__u16)0x0001) +#define S390_SYSCALL_OPCODE ((__u16)0x0a00) +#define S390_SYSCALL_SIZE 2 + +/* + * The user_regs_struct defines the way the user registers are + * store on the stack for signal handling. + */ +struct user_regs_struct { + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; + s390_fp_regs fp_regs; + /* + * These per registers are in here so that gdb can modify them + * itself as there is no "official" ptrace interface for hardware + * watchpoints. This is the way intel does it. + */ + per_struct per_info; + unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_S390_PTRACE_H */ diff --git a/tools/include/uapi/asm-generic/bpf_perf_event.h b/tools/include/uapi/asm-generic/bpf_perf_event.h new file mode 100644 index 000000000000..53815d2cd047 --- /dev/null +++ b/tools/include/uapi/asm-generic/bpf_perf_event.h @@ -0,0 +1,9 @@ +#ifndef _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ + +#include + +/* Export kernel pt_regs structure */ +typedef struct pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ */ diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h index 067427259820..8f95303f9d80 100644 --- a/tools/include/uapi/linux/bpf_perf_event.h +++ b/tools/include/uapi/linux/bpf_perf_event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or @@ -7,11 +8,10 @@ #ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ #define _UAPI__LINUX_BPF_PERF_EVENT_H__ -#include -#include +#include struct bpf_perf_event_data { - struct pt_regs regs; + bpf_user_pt_regs_t regs; __u64 sample_period; }; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 333a48655ee0..21a2d76b67dc 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,7 +1,19 @@ # SPDX-License-Identifier: GPL-2.0 + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif +include $(srctree)/tools/scripts/Makefile.arch + +$(call detected_var,SRCARCH) + LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf APIDIR := ../../../include/uapi +ASMDIR:= ../../../arch/$(ARCH)/include/uapi GENDIR := ../../../../include/generated GENHDR := $(GENDIR)/autoconf.h @@ -9,7 +21,7 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(ASMDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ From a81c42136604fb660b366d1ff6d9e0969f166413 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:49 +0100 Subject: [PATCH 13/67] perf s390: add regs_query_register_offset() The regs_query_register_offset() helper function converts register name like "%r0" to an offset of a register in user_pt_regs It is required by the BPF prologue generator. The user_pt_regs structure was recently added to "asm/ptrace.h". Hence, update tools/perf/check-headers.sh to keep the header file in sync with kernel changes. Suggested-by: Thomas Richter Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Heiko Carstens Signed-off-by: Daniel Borkmann --- tools/perf/arch/s390/Makefile | 1 + tools/perf/arch/s390/util/dwarf-regs.c | 32 +++++++++++++++++++++++--- tools/perf/check-headers.sh | 1 + 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 21322e0385b8..09ba923debe8 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -2,3 +2,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c index f47576ce13ea..a8ace5cc6301 100644 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ b/tools/perf/arch/s390/util/dwarf-regs.c @@ -2,17 +2,43 @@ /* * Mapping of DWARF debug register numbers into register names. * - * Copyright IBM Corp. 2010 - * Author(s): Heiko Carstens , + * Copyright IBM Corp. 2010, 2017 + * Author(s): Heiko Carstens , + * Hendrik Brueckner * */ +#include #include -#include +#include #include +#include +#include +#include #include "dwarf-regs-table.h" const char *get_arch_regstr(unsigned int n) { return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n]; } + +/* + * Convert the register name into an offset to struct pt_regs (kernel). + * This is required by the BPF prologue generator. The BPF + * program is called in the BPF overflow handler in the perf + * core. + */ +int regs_query_register_offset(const char *name) +{ + unsigned long gpr; + + if (!name || strncmp(name, "%r", 2)) + return -EINVAL; + + errno = 0; + gpr = strtoul(name + 2, NULL, 10); + if (errno || gpr >= 16) + return -EINVAL; + + return offsetof(user_pt_regs, gprs) + 8 * gpr; +} diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 77406d25e521..6db9d809fe97 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -30,6 +30,7 @@ arch/x86/include/uapi/asm/vmx.h arch/powerpc/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm_perf.h +arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h From 567deca8e72df3ceb6c07c63f8541a4928f64d3b Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Thu, 16 Nov 2017 09:29:19 +0200 Subject: [PATCH 14/67] iwlwifi: add new cards for 9260 and 22000 series add 1 PCI ID for 9260 series and 1 for 22000 series. Cc: stable@vger.kernel.org Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index f21fe59faccf..ccd7c33c4c28 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -553,6 +553,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0214, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271C, 0x0214, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_cfg)}, @@ -664,6 +665,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x0310, iwla000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x40C0, 0x0000, iwla000_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x40C0, 0x0A10, iwla000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0000, iwla000_2ax_cfg_hr)}, #endif /* CONFIG_IWLMVM */ From 6c2d49fdc5d947c5fe89935bd52e69f10000f4cb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Nov 2017 17:26:09 +0100 Subject: [PATCH 15/67] iwlwifi: mvm: flush queue before deleting ROC Before deleting a time event (remain-on-channel instance), flush the queue so that frames cannot get stuck on it. We already flush the AUX STA queues, but a separate station is used for the P2P Device queue. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 ++ .../wireless/intel/iwlwifi/mvm/time-event.c | 24 +++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 6a9a25beab3f..55ab5349dd40 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1061,6 +1061,7 @@ struct iwl_mvm { * @IWL_MVM_STATUS_ROC_AUX_RUNNING: AUX remain-on-channel is running * @IWL_MVM_STATUS_D3_RECONFIG: D3 reconfiguration is being done * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running + * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA */ enum iwl_mvm_status { IWL_MVM_STATUS_HW_RFKILL, @@ -1072,6 +1073,7 @@ enum iwl_mvm_status { IWL_MVM_STATUS_ROC_AUX_RUNNING, IWL_MVM_STATUS_D3_RECONFIG, IWL_MVM_STATUS_FIRMWARE_RUNNING, + IWL_MVM_STATUS_NEED_FLUSH_P2P, }; /* Keep track of completed init configuration */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 4d0314912e94..e25cda9fbf6c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -132,6 +132,24 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk) * executed, and a new time event means a new command. */ iwl_mvm_flush_sta(mvm, &mvm->aux_sta, true, CMD_ASYNC); + + /* Do the same for the P2P device queue (STA) */ + if (test_and_clear_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status)) { + struct iwl_mvm_vif *mvmvif; + + /* + * NB: access to this pointer would be racy, but the flush bit + * can only be set when we had a P2P-Device VIF, and we have a + * flush of this work in iwl_mvm_prepare_mac_removal() so it's + * not really racy. + */ + + if (!WARN_ON(!mvm->p2p_device_vif)) { + mvmvif = iwl_mvm_vif_from_mac80211(mvm->p2p_device_vif); + iwl_mvm_flush_sta(mvm, &mvmvif->bcast_sta, true, + CMD_ASYNC); + } + } } static void iwl_mvm_roc_finished(struct iwl_mvm *mvm) @@ -855,10 +873,12 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm) mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif); - if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) + if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) { iwl_mvm_remove_time_event(mvm, mvmvif, te_data); - else + set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status); + } else { iwl_mvm_remove_aux_roc_te(mvm, mvmvif, te_data); + } iwl_mvm_roc_finished(mvm); } From 9d0fc5a50a0548f8e5d61243e5e5f26d5c405aef Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Mon, 21 Nov 2016 17:01:25 +0200 Subject: [PATCH 16/67] iwlwifi: mvm: enable RX offloading with TKIP and WEP Set the flag that indicates that ICV was stripped on if this option was enabled in the HW. Cc: stable@vger.kernel.org # 4.13+ [this is needed for the 9000-series HW to work properly] Signed-off-by: David Spinadel Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index ca0b5536a8a6..921cab9e2d73 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -117,6 +117,7 @@ #define FH_RSCSR_FRAME_INVALID 0x55550000 #define FH_RSCSR_FRAME_ALIGN 0x40 #define FH_RSCSR_RPA_EN BIT(25) +#define FH_RSCSR_RADA_EN BIT(26) #define FH_RSCSR_RXQ_POS 16 #define FH_RSCSR_RXQ_MASK 0x3F0000 @@ -128,7 +129,8 @@ struct iwl_rx_packet { * 31: flag flush RB request * 30: flag ignore TC (terminal counter) request * 29: flag fast IRQ request - * 28-26: Reserved + * 28-27: Reserved + * 26: RADA enabled * 25: Offload enabled * 24: RPF enabled * 23: RSS enabled diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 20fe23fbf040..bf8409d8a132 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -234,8 +234,8 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm, static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr, struct ieee80211_rx_status *stats, - struct iwl_rx_mpdu_desc *desc, int queue, - u8 *crypt_len) + struct iwl_rx_mpdu_desc *desc, u32 pkt_flags, + int queue, u8 *crypt_len) { u16 status = le16_to_cpu(desc->status); @@ -272,6 +272,10 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr, if ((status & IWL_RX_MPDU_STATUS_SEC_MASK) == IWL_RX_MPDU_STATUS_SEC_WEP) *crypt_len = IEEE80211_WEP_IV_LEN; + + if (pkt_flags & FH_RSCSR_RADA_EN) + stats->flag |= RX_FLAG_ICV_STRIPPED; + return 0; case IWL_RX_MPDU_STATUS_SEC_EXT_ENC: if (!(status & IWL_RX_MPDU_STATUS_MIC_OK)) @@ -850,7 +854,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status = IEEE80211_SKB_RXCB(skb); - if (iwl_mvm_rx_crypto(mvm, hdr, rx_status, desc, queue, &crypt_len)) { + if (iwl_mvm_rx_crypto(mvm, hdr, rx_status, desc, + le32_to_cpu(pkt->len_n_flags), queue, + &crypt_len)) { kfree_skb(skb); return; } From bf19037074e770aad74b3b90f37b8b98db3f3748 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 8 Feb 2016 23:30:47 +0200 Subject: [PATCH 17/67] iwlwifi: mvm: mark MIC stripped MPDUs When RADA is active, the hardware decrypts the packets and strips off the MIC as it is useless after decryption. Indicate that to mac80211. Cc: stable@vger.kernel.org # 4.13+ [this is needed for the 9000-series HW to work properly] Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index bf8409d8a132..3b8d44361380 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -255,6 +255,8 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr, return -1; stats->flag |= RX_FLAG_DECRYPTED; + if (pkt_flags & FH_RSCSR_RADA_EN) + stats->flag |= RX_FLAG_MIC_STRIPPED; *crypt_len = IEEE80211_CCMP_HDR_LEN; return 0; case IWL_RX_MPDU_STATUS_SEC_TKIP: From e599ea1410c3a2f55716f9c309587235cca32025 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Dec 2017 15:28:44 -0800 Subject: [PATCH 18/67] Revert "tcp: must block bh in __inet_twsk_hashdance()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We had to disable BH _before_ calling __inet_twsk_hashdance() in commit cfac7f836a71 ("tcp/dccp: block bh before arming time_wait timer"). This means we can revert 614bdd4d6e61 ("tcp: must block bh in __inet_twsk_hashdance()"). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_timewait_sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c690cd0d9b3f..b563e0c46bac 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -93,7 +93,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, } /* - * Enter the time wait state. + * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. */ @@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; - spin_lock_bh(&bhead->lock); + spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); @@ -137,7 +137,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(lock); + spin_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); From 42d779ffc186f6dd26271fc60a7417cb51aca93e Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 1 Dec 2017 21:37:23 -0800 Subject: [PATCH 19/67] nfp: fix port stats for mac representors Previously we swapped the tx_packets, tx_bytes and tx_dropped counters with rx_packets, rx_bytes and rx_dropped counters, respectively. This behaviour is correct and expected for VF representors but it should not be swapped for physical port mac representors. Fixes: eadfa4c3be99 ("nfp: add stats and xmit helpers for representors") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 924a05e05da0..78b36c67c232 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -84,16 +84,13 @@ nfp_repr_phy_port_get_stats64(struct nfp_port *port, { u8 __iomem *mem = port->eth_stats; - /* TX and RX stats are flipped as we are returning the stats as seen - * at the switch port corresponding to the phys port. - */ - stats->tx_packets = readq(mem + NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK); - stats->tx_bytes = readq(mem + NFP_MAC_STATS_RX_IN_OCTETS); - stats->tx_dropped = readq(mem + NFP_MAC_STATS_RX_IN_ERRORS); + stats->tx_packets = readq(mem + NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK); + stats->tx_bytes = readq(mem + NFP_MAC_STATS_TX_OUT_OCTETS); + stats->tx_dropped = readq(mem + NFP_MAC_STATS_TX_OUT_ERRORS); - stats->rx_packets = readq(mem + NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK); - stats->rx_bytes = readq(mem + NFP_MAC_STATS_TX_OUT_OCTETS); - stats->rx_dropped = readq(mem + NFP_MAC_STATS_TX_OUT_ERRORS); + stats->rx_packets = readq(mem + NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK); + stats->rx_bytes = readq(mem + NFP_MAC_STATS_RX_IN_OCTETS); + stats->rx_dropped = readq(mem + NFP_MAC_STATS_RX_IN_ERRORS); } static void From 029b6d1405504984b9d2661110ff1a17467d3426 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 2 Dec 2017 08:41:55 +0100 Subject: [PATCH 20/67] Revert "net: core: maybe return -EEXIST in __dev_alloc_name" This reverts commit d6f295e9def0; some userspace (in the case we noticed it's wpa_supplicant), is relying on the current error code to determine that a fixed name interface already exists. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 07ed21d64f92..f47e96b62308 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) * when the name is long and there isn't enough space left * for the digits, or if all bits are used. */ - return p ? -ENFILE : -EEXIST; + return -ENFILE; } static int dev_alloc_name_ns(struct net *net, From 58117672943734715bbe7565ac9f062effa524f0 Mon Sep 17 00:00:00 2001 From: Zumeng Chen Date: Mon, 4 Dec 2017 11:22:02 +0800 Subject: [PATCH 21/67] gianfar: fix a flooded alignment reports because of padding issue. According to LS1021A RM, the value of PAL can be set so that the start of the IP header in the receive data buffer is aligned to a 32-bit boundary. Normally, setting PAL = 2 provides minimal padding to ensure such alignment of the IP header. However every incoming packet's 8-byte time stamp will be inserted into the packet data buffer as padding alignment bytes when hardware time stamping is enabled. So we set the padding 8+2 here to avoid the flooded alignment faults: root@128:~# cat /proc/cpu/alignment User: 0 System: 17539 (inet_gro_receive+0x114/0x2c0) Skipped: 0 Half: 0 Word: 0 DWord: 0 Multi: 17539 User faults: 2 (fixup) Also shown when exception report enablement CPU: 0 PID: 161 Comm: irq/66-eth1_g0_ Not tainted 4.1.21-rt13-WR8.0.0.0_preempt-rt #16 Hardware name: Freescale LS1021A [<8001b420>] (unwind_backtrace) from [<8001476c>] (show_stack+0x20/0x24) [<8001476c>] (show_stack) from [<807cfb48>] (dump_stack+0x94/0xac) [<807cfb48>] (dump_stack) from [<80025d70>] (do_alignment+0x720/0x958) [<80025d70>] (do_alignment) from [<80009224>] (do_DataAbort+0x40/0xbc) [<80009224>] (do_DataAbort) from [<80015398>] (__dabt_svc+0x38/0x60) Exception stack(0x86ad1cc0 to 0x86ad1d08) 1cc0: f9b3e080 86b3d072 2d78d287 00000000 866816c0 86b3d05e 86e785d0 00000000 1ce0: 00000011 0000000e 80840ab0 86ad1d3c 86ad1d08 86ad1d08 806d7fc0 806d806c 1d00: 40070013 ffffffff [<80015398>] (__dabt_svc) from [<806d806c>] (inet_gro_receive+0x114/0x2c0) [<806d806c>] (inet_gro_receive) from [<80660eec>] (dev_gro_receive+0x21c/0x3c0) [<80660eec>] (dev_gro_receive) from [<8066133c>] (napi_gro_receive+0x44/0x17c) [<8066133c>] (napi_gro_receive) from [<804f0538>] (gfar_clean_rx_ring+0x39c/0x7d4) [<804f0538>] (gfar_clean_rx_ring) from [<804f0bf4>] (gfar_poll_rx_sq+0x58/0xe0) [<804f0bf4>] (gfar_poll_rx_sq) from [<80660b10>] (net_rx_action+0x27c/0x43c) [<80660b10>] (net_rx_action) from [<80033638>] (do_current_softirqs+0x1e0/0x3dc) [<80033638>] (do_current_softirqs) from [<800338c4>] (__local_bh_enable+0x90/0xa8) [<800338c4>] (__local_bh_enable) from [<8008025c>] (irq_forced_thread_fn+0x70/0x84) [<8008025c>] (irq_forced_thread_fn) from [<800805e8>] (irq_thread+0x16c/0x244) [<800805e8>] (irq_thread) from [<8004e490>] (kthread+0xe8/0x104) [<8004e490>] (kthread) from [<8000fda8>] (ret_from_fork+0x14/0x2c) Signed-off-by: Zumeng Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 5be52d89b182..81a73af0df31 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1378,9 +1378,11 @@ static int gfar_probe(struct platform_device *ofdev) gfar_init_addr_hash_table(priv); - /* Insert receive time stamps into padding alignment bytes */ + /* Insert receive time stamps into padding alignment bytes, and + * plus 2 bytes padding to ensure the cpu alignment. + */ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) - priv->padding = 8; + priv->padding = 8 + DEFAULT_PADDING; if (dev->features & NETIF_F_IP_CSUM || priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) From 5c472203421ab4f928aa1ae9e1dbcfdd80324148 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 4 Dec 2017 13:31:10 +0200 Subject: [PATCH 22/67] net_sched: red: Avoid devision by zero Do not allow delta value to be zero since it is used as a divisor. Fixes: 8af2a218de38 ("sch_red: Adaptative RED AQM") Signed-off-by: Nogah Frankel Signed-off-by: David S. Miller --- include/net/red.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/red.h b/include/net/red.h index 9a9347710701..5918f78d36a0 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -179,7 +179,7 @@ static inline void red_set_parms(struct red_parms *p, p->qth_max = qth_max << Wlog; p->Wlog = Wlog; p->Plog = Plog; - if (delta < 0) + if (delta <= 0) delta = 1; p->qth_delta = delta; if (!max_P) { From 8afa10cbe281b10371fee5a87ab266e48d71a7f9 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 4 Dec 2017 13:31:11 +0200 Subject: [PATCH 23/67] net_sched: red: Avoid illegal values Check the qmin & qmax values doesn't overflow for the given Wlog value. Check that qmin <= qmax. Fixes: a783474591f2 ("[PKT_SCHED]: Generic RED layer") Signed-off-by: Nogah Frankel Signed-off-by: David S. Miller --- include/net/red.h | 11 +++++++++++ net/sched/sch_choke.c | 3 +++ net/sched/sch_gred.c | 3 +++ net/sched/sch_red.c | 2 ++ net/sched/sch_sfq.c | 3 +++ 5 files changed, 22 insertions(+) diff --git a/include/net/red.h b/include/net/red.h index 5918f78d36a0..9665582c4687 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -168,6 +168,17 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) +{ + if (fls(qth_min) + Wlog > 32) + return false; + if (fls(qth_max) + Wlog > 32) + return false; + if (qth_max < qth_min) + return false; + return true; +} + static inline void red_set_parms(struct red_parms *p, u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, u8 Scell_log, u8 *stab, u32 max_P) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index b30a2c70bd48..531250fceb9e 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -369,6 +369,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) ctl = nla_data(tb[TCA_CHOKE_PARMS]); + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + return -EINVAL; + if (ctl->limit > CHOKE_MAX_QUEUE) return -EINVAL; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 17c7130454bd..bc30f9186ac6 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -356,6 +356,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + return -EINVAL; + if (!q) { table->tab[dp] = q = *prealloc; *prealloc = NULL; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7f8ea9e297c3..9d874e60e032 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -212,6 +212,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; ctl = nla_data(tb[TCA_RED_PARMS]); + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + return -EINVAL; if (ctl->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 09c1203c1711..930e5bd26d3d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog)) + return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) From 22c1aed4093a605b120d6e566620364843a318ed Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 4 Dec 2017 14:33:26 +0100 Subject: [PATCH 24/67] net: sh_eth: use correct "struct device" when calling DMA mapping functions There are two types of "struct device": the one representing the physical device on its physical bus (platform, SPI, PCI, etc.), and the one representing the logical device in its device class (net, etc.). The DMA mapping API expects to receive as argument a "struct device" representing the physical device, as the "struct device" contains information about the bus that the DMA API needs. However, the sh_eth driver mistakenly uses the "struct device" representing the logical device (embedded in "struct net_device") rather than the "struct device" representing the physical device on its bus. This commit fixes that by adjusting all calls to the DMA mapping API. Signed-off-by: Thomas Petazzoni Acked-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 7e060aa9fbed..91e918e654fe 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1149,7 +1149,8 @@ static int sh_eth_tx_free(struct net_device *ndev, bool sent_only) entry, le32_to_cpu(txdesc->status)); /* Free the original skb. */ if (mdp->tx_skbuff[entry]) { - dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr), + dma_unmap_single(&mdp->pdev->dev, + le32_to_cpu(txdesc->addr), le32_to_cpu(txdesc->len) >> 16, DMA_TO_DEVICE); dev_kfree_skb_irq(mdp->tx_skbuff[entry]); @@ -1179,7 +1180,7 @@ static void sh_eth_ring_free(struct net_device *ndev) if (mdp->rx_skbuff[i]) { struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i]; - dma_unmap_single(&ndev->dev, + dma_unmap_single(&mdp->pdev->dev, le32_to_cpu(rxdesc->addr), ALIGN(mdp->rx_buf_sz, 32), DMA_FROM_DEVICE); @@ -1245,9 +1246,9 @@ static void sh_eth_ring_format(struct net_device *ndev) /* The size of the buffer is a multiple of 32 bytes. */ buf_len = ALIGN(mdp->rx_buf_sz, 32); - dma_addr = dma_map_single(&ndev->dev, skb->data, buf_len, + dma_addr = dma_map_single(&mdp->pdev->dev, skb->data, buf_len, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, dma_addr)) { + if (dma_mapping_error(&mdp->pdev->dev, dma_addr)) { kfree_skb(skb); break; } @@ -1527,7 +1528,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) mdp->rx_skbuff[entry] = NULL; if (mdp->cd->rpadir) skb_reserve(skb, NET_IP_ALIGN); - dma_unmap_single(&ndev->dev, dma_addr, + dma_unmap_single(&mdp->pdev->dev, dma_addr, ALIGN(mdp->rx_buf_sz, 32), DMA_FROM_DEVICE); skb_put(skb, pkt_len); @@ -1555,9 +1556,9 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) if (skb == NULL) break; /* Better luck next round. */ sh_eth_set_receive_align(skb); - dma_addr = dma_map_single(&ndev->dev, skb->data, + dma_addr = dma_map_single(&mdp->pdev->dev, skb->data, buf_len, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, dma_addr)) { + if (dma_mapping_error(&mdp->pdev->dev, dma_addr)) { kfree_skb(skb); break; } @@ -2441,9 +2442,9 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* soft swap. */ if (!mdp->cd->hw_swap) sh_eth_soft_swap(PTR_ALIGN(skb->data, 4), skb->len + 2); - dma_addr = dma_map_single(&ndev->dev, skb->data, skb->len, + dma_addr = dma_map_single(&mdp->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(&ndev->dev, dma_addr)) { + if (dma_mapping_error(&mdp->pdev->dev, dma_addr)) { kfree_skb(skb); return NETDEV_TX_OK; } From 573500dbf0f2756947517c1d4f942767dbf16dcc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 4 Dec 2017 14:33:27 +0100 Subject: [PATCH 25/67] net: sh_eth: don't use NULL as "struct device" for the DMA mapping API Using NULL as argument for the DMA mapping API is bogus, as the DMA mapping API may use information from the "struct device" to perform the DMA mapping operation. Therefore, pass the appropriate "struct device". Signed-off-by: Thomas Petazzoni Acked-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 91e918e654fe..db72d13cebb9 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1187,7 +1187,7 @@ static void sh_eth_ring_free(struct net_device *ndev) } } ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; - dma_free_coherent(NULL, ringsize, mdp->rx_ring, + dma_free_coherent(&mdp->pdev->dev, ringsize, mdp->rx_ring, mdp->rx_desc_dma); mdp->rx_ring = NULL; } @@ -1204,7 +1204,7 @@ static void sh_eth_ring_free(struct net_device *ndev) sh_eth_tx_free(ndev, false); ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring; - dma_free_coherent(NULL, ringsize, mdp->tx_ring, + dma_free_coherent(&mdp->pdev->dev, ringsize, mdp->tx_ring, mdp->tx_desc_dma); mdp->tx_ring = NULL; } @@ -1324,8 +1324,8 @@ static int sh_eth_ring_init(struct net_device *ndev) /* Allocate all Rx descriptors. */ rx_ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; - mdp->rx_ring = dma_alloc_coherent(NULL, rx_ringsize, &mdp->rx_desc_dma, - GFP_KERNEL); + mdp->rx_ring = dma_alloc_coherent(&mdp->pdev->dev, rx_ringsize, + &mdp->rx_desc_dma, GFP_KERNEL); if (!mdp->rx_ring) goto ring_free; @@ -1333,8 +1333,8 @@ static int sh_eth_ring_init(struct net_device *ndev) /* Allocate all Tx descriptors. */ tx_ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring; - mdp->tx_ring = dma_alloc_coherent(NULL, tx_ringsize, &mdp->tx_desc_dma, - GFP_KERNEL); + mdp->tx_ring = dma_alloc_coherent(&mdp->pdev->dev, tx_ringsize, + &mdp->tx_desc_dma, GFP_KERNEL); if (!mdp->tx_ring) goto ring_free; return 0; From 672ecbe1c977616aa720c9397589665b33e72610 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 4 Dec 2017 10:31:43 -0800 Subject: [PATCH 26/67] tipc: fix a null pointer deref on error path In tipc_topsrv_kern_subscr() when s->tipc_conn_new() fails we call tipc_close_conn() to clean up, but in this case calling conn_put() is just enough. This fixes the folllowing crash: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 3085 Comm: syzkaller064164 Not tainted 4.15.0-rc1+ #137 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: 00000000c24413a5 task.stack: 000000005e8160b5 RIP: 0010:__lock_acquire+0xd55/0x47f0 kernel/locking/lockdep.c:3378 RSP: 0018:ffff8801cb5474a8 EFLAGS: 00010002 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffff85ecb400 RBP: ffff8801cb547830 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffffffff87489d60 R12: ffff8801cd2980c0 R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000020 FS: 00000000014ee880(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffee2426e40 CR3: 00000001cb85a000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:4004 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:175 spin_lock_bh include/linux/spinlock.h:320 [inline] tipc_subscrb_subscrp_delete+0x8f/0x470 net/tipc/subscr.c:201 tipc_subscrb_delete net/tipc/subscr.c:238 [inline] tipc_subscrb_release_cb+0x17/0x30 net/tipc/subscr.c:316 tipc_close_conn+0x171/0x270 net/tipc/server.c:204 tipc_topsrv_kern_subscr+0x724/0x810 net/tipc/server.c:514 tipc_group_create+0x702/0x9c0 net/tipc/group.c:184 tipc_sk_join net/tipc/socket.c:2747 [inline] tipc_setsockopt+0x249/0xc10 net/tipc/socket.c:2861 SYSC_setsockopt net/socket.c:1851 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1830 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 14c04493cb77 ("tipc: add ability to order and receive topology events in driver") Reported-by: syzbot Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/tipc/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index acaef80fb88c..2710101ba4c1 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -511,7 +511,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, s = con->server; scbr = s->tipc_conn_new(*conid); if (!scbr) { - tipc_close_conn(con); + conn_put(con); return false; } From a7d5f107b4978e08eeab599ee7449af34d034053 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 4 Dec 2017 22:00:20 +0100 Subject: [PATCH 27/67] tipc: fix memory leak in tipc_accept_from_sock() When the function tipc_accept_from_sock() fails to create an instance of struct tipc_subscriber it omits to free the already created instance of struct tipc_conn instance before it returns. We fix that with this commit. Reported-by: David S. Miller Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/server.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/server.c b/net/tipc/server.c index 2710101ba4c1..d60c30342327 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -314,6 +314,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) newcon->usr_data = s->tipc_conn_new(newcon->conid); if (!newcon->usr_data) { sock_release(newsock); + conn_put(newcon); return -ENOMEM; } From c9d3fe9da094a9a7a3d3cd365b334b822e05f5e8 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 5 Dec 2017 11:31:14 +0000 Subject: [PATCH 28/67] VSOCK: fix outdated sk_state value in hvs_release() Since commit 3b4477d2dcf2709d0be89e2a8dced3d0f4a017f2 ("VSOCK: use TCP state constants for sk_state") VSOCK has used TCP_* constants for sk_state. Commit b4562ca7925a3bedada87a3dd072dd5bad043288 ("hv_sock: add locking in the open/close/release code paths") reintroduced the SS_DISCONNECTING constant. This patch replaces the old SS_DISCONNECTING with the new TCP_CLOSING constant. CC: Dexuan Cui CC: Cathy Avery Signed-off-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/hyperv_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 5583df708b8c..a827547aa102 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk) lock_sock(sk); - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; vsock_remove_sock(vsk); release_sock(sk); From c20a548792f15f8d8e38cd74356301c6db0d241f Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 5 Dec 2017 13:41:17 -0700 Subject: [PATCH 29/67] net: qualcomm: rmnet: Fix leak on transmit failure If a skb in transmit path does not have sufficient headroom to add the map header, the skb is not sent out and is never freed. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 29842ccc91a9..08e4afc0ab39 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -126,12 +126,12 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, if (skb_headroom(skb) < required_headroom) { if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL)) - return RMNET_MAP_CONSUMED; + goto fail; } map_header = rmnet_map_add_map_header(skb, additional_header_len, 0); if (!map_header) - return RMNET_MAP_CONSUMED; + goto fail; if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) { if (mux_id == 0xff) @@ -143,6 +143,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, skb->protocol = htons(ETH_P_MAP); return RMNET_MAP_SUCCESS; + +fail: + kfree_skb(skb); + return RMNET_MAP_CONSUMED; } static void From 6296928fa3b9f656d7760fbf346c0cf834788a78 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 5 Dec 2017 13:41:18 -0700 Subject: [PATCH 30/67] net: qualcomm: rmnet: Fix leak in device creation failure If the rmnet device creation fails in the newlink either while registering with the physical device or after subsequent operations, the rmnet endpoint information is never freed. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 71bee1af71ef..df21e900f874 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -195,6 +195,7 @@ err2: err1: rmnet_unregister_real_device(real_dev, port); err0: + kfree(ep); return err; } From d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Dec 2017 12:45:56 -0800 Subject: [PATCH 31/67] net: remove hlist_nulls_add_tail_rcu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alexander Potapenko reported use of uninitialized memory [1] This happens when inserting a request socket into TCP ehash, in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized. Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") missed the opportunity to get rid of hlist_nulls_add_tail_rcu() : Both UDP sockets and TCP/DCCP listeners no longer use __sk_nulls_add_node_rcu() for their hash insertion. Since all other sockets have unique 4-tuple, the reuseport status has no special meaning, so we can always use hlist_nulls_add_head_rcu() for them and save few cycles/instructions. [1] ================================================================== BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace:    __dump_stack lib/dump_stack.c:16  dump_stack+0x185/0x1d0 lib/dump_stack.c:52  kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016  __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766  __sk_nulls_add_node_rcu ./include/net/sock.h:684  inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413  reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754  inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765  tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414  tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314  tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917  tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483  tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763  ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216  NF_HOOK ./include/linux/netfilter.h:248  ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257  dst_input ./include/net/dst.h:477  ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397  NF_HOOK ./include/linux/netfilter.h:248  ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488  __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298  __netif_receive_skb net/core/dev.c:4336  netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497  napi_skb_finish net/core/dev.c:4858  napi_gro_receive+0x629/0xa50 net/core/dev.c:4889  e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018  e1000_clean_rx_irq+0x1492/0x1d30 drivers/net/ethernet/intel/e1000/e1000_main.c:4474  e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819  napi_poll net/core/dev.c:5500  net_rx_action+0x73c/0x1820 net/core/dev.c:5566  __do_softirq+0x4b4/0x8dd kernel/softirq.c:284  invoke_softirq kernel/softirq.c:364  irq_exit+0x203/0x240 kernel/softirq.c:405  exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638  do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263  common_interrupt+0x86/0x86 Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") Signed-off-by: Eric Dumazet Reported-by: Alexander Potapenko Acked-by: Craig Gallek Signed-off-by: David S. Miller --- include/linux/rculist_nulls.h | 38 ----------------------------------- include/net/sock.h | 6 +----- 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index a328e8181e49..e4b257ff881b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -100,44 +100,6 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, first->pprev = &n->next; } -/** - * hlist_nulls_add_tail_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the end of the specified hlist_nulls, - * while permitting racing traversals. NOTE: tail insertion requires - * list traversal. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() - * or hlist_nulls_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, - struct hlist_nulls_head *h) -{ - struct hlist_nulls_node *i, *last = NULL; - - for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); - i = hlist_nulls_next_rcu(i)) - last = i; - - if (last) { - n->next = last->next; - n->pprev = &last->next; - rcu_assign_pointer(hlist_nulls_next_rcu(last), n); - } else { - hlist_nulls_add_head_rcu(n, h); - } -} - /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/net/sock.h b/include/net/sock.h index 79e1a2c7912c..9155da422692 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -685,11 +685,7 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && - sk->sk_family == AF_INET6) - hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); - else - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) From 69c64866ce072dea1d1e59a0d61e0f66c0dffb76 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 5 Dec 2017 20:58:35 +0000 Subject: [PATCH 32/67] dccp: CVE-2017-8824: use-after-free in DCCP code Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect() must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL. Signed-off-by: Mohamed Ghannam Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/proto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b68168fcc06a..9d43c1f40274 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; @@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = NULL; + dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); From a5739435b5a3b8c449f8844ecd71a3b1e89f0a33 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Dec 2017 23:27:57 +0000 Subject: [PATCH 33/67] fix kcm_clone() 1) it's fput() or sock_release(), not both 2) don't do fd_install() until the last failure exit. 3) not a bug per se, but... don't attach socket to struct file until it's set up. Take reserving descriptor into the caller, move fd_install() to the caller, sanitize failure exits and calling conventions. Cc: stable@vger.kernel.org # v4.6+ Acked-by: Tom Herbert Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 69 ++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 0b750a22c4b9..c5fa634e63ca 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1625,60 +1625,35 @@ static struct proto kcm_proto = { }; /* Clone a kcm socket. */ -static int kcm_clone(struct socket *osock, struct kcm_clone *info, - struct socket **newsockp) +static struct file *kcm_clone(struct socket *osock) { struct socket *newsock; struct sock *newsk; - struct file *newfile; - int err, newfd; + struct file *file; - err = -ENFILE; newsock = sock_alloc(); if (!newsock) - goto out; + return ERR_PTR(-ENFILE); newsock->type = osock->type; newsock->ops = osock->ops; __module_get(newsock->ops->owner); - newfd = get_unused_fd_flags(0); - if (unlikely(newfd < 0)) { - err = newfd; - goto out_fd_fail; - } - - newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (IS_ERR(newfile)) { - err = PTR_ERR(newfile); - goto out_sock_alloc_fail; - } - newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, &kcm_proto, true); if (!newsk) { - err = -ENOMEM; - goto out_sk_alloc_fail; + sock_release(newsock); + return ERR_PTR(-ENOMEM); } - sock_init_data(newsock, newsk); init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); - fd_install(newfd, newfile); - *newsockp = newsock; - info->fd = newfd; + file = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); + if (IS_ERR(file)) + sock_release(newsock); - return 0; - -out_sk_alloc_fail: - fput(newfile); -out_sock_alloc_fail: - put_unused_fd(newfd); -out_fd_fail: - sock_release(newsock); -out: - return err; + return file; } static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) @@ -1708,17 +1683,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } case SIOCKCMCLONE: { struct kcm_clone info; - struct socket *newsock = NULL; + struct file *file; - err = kcm_clone(sock, &info, &newsock); - if (!err) { - if (copy_to_user((void __user *)arg, &info, - sizeof(info))) { - err = -EFAULT; - sys_close(info.fd); - } + info.fd = get_unused_fd_flags(0); + if (unlikely(info.fd < 0)) + return info.fd; + + file = kcm_clone(sock); + if (IS_ERR(file)) { + put_unused_fd(info.fd); + return PTR_ERR(file); } - + if (copy_to_user((void __user *)arg, &info, + sizeof(info))) { + put_unused_fd(info.fd); + fput(file); + return -EFAULT; + } + fd_install(info.fd, file); + err = 0; break; } default: From 016a266bdfeda268afb2228b6217fd4771334635 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Dec 2017 23:28:38 +0000 Subject: [PATCH 34/67] socketpair(): allocate descriptors first simplifies failure exits considerably... Reviewed-by: Eric Dumazet Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/socket.c | 91 ++++++++++++++++++++++------------------------------ 1 file changed, 39 insertions(+), 52 deletions(-) diff --git a/net/socket.c b/net/socket.c index 42d8e9c9ccd5..2df83c0bfde9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1365,88 +1365,75 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + /* + * reserve descriptors and make sure we won't fail + * to return them to userland. + */ + fd1 = get_unused_fd_flags(flags); + if (unlikely(fd1 < 0)) + return fd1; + + fd2 = get_unused_fd_flags(flags); + if (unlikely(fd2 < 0)) { + put_unused_fd(fd1); + return fd2; + } + + err = put_user(fd1, &usockvec[0]); + if (err) + goto out; + + err = put_user(fd2, &usockvec[1]); + if (err) + goto out; + /* * Obtain the first socket and check if the underlying protocol * supports the socketpair call. */ err = sock_create(family, type, protocol, &sock1); - if (err < 0) + if (unlikely(err < 0)) goto out; err = sock_create(family, type, protocol, &sock2); - if (err < 0) - goto out_release_1; - - err = sock1->ops->socketpair(sock1, sock2); - if (err < 0) - goto out_release_both; - - fd1 = get_unused_fd_flags(flags); - if (unlikely(fd1 < 0)) { - err = fd1; - goto out_release_both; + if (unlikely(err < 0)) { + sock_release(sock1); + goto out; } - fd2 = get_unused_fd_flags(flags); - if (unlikely(fd2 < 0)) { - err = fd2; - goto out_put_unused_1; + err = sock1->ops->socketpair(sock1, sock2); + if (unlikely(err < 0)) { + sock_release(sock2); + sock_release(sock1); + goto out; } newfile1 = sock_alloc_file(sock1, flags, NULL); if (IS_ERR(newfile1)) { err = PTR_ERR(newfile1); - goto out_put_unused_both; + sock_release(sock1); + sock_release(sock2); + goto out; } newfile2 = sock_alloc_file(sock2, flags, NULL); if (IS_ERR(newfile2)) { err = PTR_ERR(newfile2); - goto out_fput_1; + sock_release(sock2); + fput(newfile1); + goto out; } - err = put_user(fd1, &usockvec[0]); - if (err) - goto out_fput_both; - - err = put_user(fd2, &usockvec[1]); - if (err) - goto out_fput_both; - audit_fd_pair(fd1, fd2); fd_install(fd1, newfile1); fd_install(fd2, newfile2); - /* fd1 and fd2 may be already another descriptors. - * Not kernel problem. - */ - return 0; -out_fput_both: - fput(newfile2); - fput(newfile1); - put_unused_fd(fd2); - put_unused_fd(fd1); - goto out; - -out_fput_1: - fput(newfile1); - put_unused_fd(fd2); - put_unused_fd(fd1); - sock_release(sock2); - goto out; - -out_put_unused_both: - put_unused_fd(fd2); -out_put_unused_1: - put_unused_fd(fd1); -out_release_both: - sock_release(sock2); -out_release_1: - sock_release(sock1); out: + put_unused_fd(fd2); + put_unused_fd(fd1); return err; } From 8e1611e2357927b22892ecc062d65c99d0d89066 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Dec 2017 23:29:09 +0000 Subject: [PATCH 35/67] make sock_alloc_file() do sock_release() on failures This changes calling conventions (and simplifies the hell out the callers). New rules: once struct socket had been passed to sock_alloc_file(), it's been consumed either by struct file or by sock_release() done by sock_alloc_file(). Either way the caller should not do sock_release() after that point. Reviewed-by: Eric Dumazet Signed-off-by: Al Viro Signed-off-by: David S. Miller --- drivers/staging/lustre/lnet/lnet/lib-socket.c | 8 ++---- net/9p/trans_fd.c | 1 - net/kcm/kcmsock.c | 7 +----- net/sctp/socket.c | 1 - net/socket.c | 25 ++++++------------- 5 files changed, 11 insertions(+), 31 deletions(-) diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c index 539a26444f31..7d49d4865298 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -71,16 +71,12 @@ lnet_sock_ioctl(int cmd, unsigned long arg) } sock_filp = sock_alloc_file(sock, 0, NULL); - if (IS_ERR(sock_filp)) { - sock_release(sock); - rc = PTR_ERR(sock_filp); - goto out; - } + if (IS_ERR(sock_filp)) + return PTR_ERR(sock_filp); rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg); fput(sock_filp); -out: return rc; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 985046ae4231..80f5c79053a4 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); - sock_release(csocket); kfree(p); return PTR_ERR(file); } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index c5fa634e63ca..d4e98f20fc2a 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1629,7 +1629,6 @@ static struct file *kcm_clone(struct socket *osock) { struct socket *newsock; struct sock *newsk; - struct file *file; newsock = sock_alloc(); if (!newsock) @@ -1649,11 +1648,7 @@ static struct file *kcm_clone(struct socket *osock) sock_init_data(newsock, newsk); init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); - file = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (IS_ERR(file)) - sock_release(newsock); - - return file; + return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); } static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 014847e25648..eb17a911aa29 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5080,7 +5080,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p *newfile = sock_alloc_file(newsock, 0, NULL); if (IS_ERR(*newfile)) { put_unused_fd(retval); - sock_release(newsock); retval = PTR_ERR(*newfile); *newfile = NULL; return retval; diff --git a/net/socket.c b/net/socket.c index 2df83c0bfde9..05f361faec45 100644 --- a/net/socket.c +++ b/net/socket.c @@ -406,8 +406,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) name.len = strlen(name.name); } path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); - if (unlikely(!path.dentry)) + if (unlikely(!path.dentry)) { + sock_release(sock); return ERR_PTR(-ENOMEM); + } path.mnt = mntget(sock_mnt); d_instantiate(path.dentry, SOCK_INODE(sock)); @@ -415,9 +417,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); if (IS_ERR(file)) { - /* drop dentry, keep inode */ + /* drop dentry, keep inode for a bit */ ihold(d_inode(path.dentry)); path_put(&path); + /* ... and now kill it properly */ + sock_release(sock); return file; } @@ -1330,19 +1334,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) retval = sock_create(family, type, protocol, &sock); if (retval < 0) - goto out; + return retval; - retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); - if (retval < 0) - goto out_release; - -out: - /* It may be already another descriptor 8) Not kernel problem. */ - return retval; - -out_release: - sock_release(sock); - return retval; + return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); } /* @@ -1412,7 +1406,6 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, newfile1 = sock_alloc_file(sock1, flags, NULL); if (IS_ERR(newfile1)) { err = PTR_ERR(newfile1); - sock_release(sock1); sock_release(sock2); goto out; } @@ -1420,7 +1413,6 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, newfile2 = sock_alloc_file(sock2, flags, NULL); if (IS_ERR(newfile2)) { err = PTR_ERR(newfile2); - sock_release(sock2); fput(newfile1); goto out; } @@ -1549,7 +1541,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, if (IS_ERR(newfile)) { err = PTR_ERR(newfile); put_unused_fd(newfd); - sock_release(newsock); goto out_put; } From 134059fd2775be79e26c2dff87d25cc2f6ea5626 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Dec 2017 01:04:50 +0100 Subject: [PATCH 36/67] net: thunderx: Fix TCP/UDP checksum offload for IPv4 pkts Offload IP header checksum to NIC. This fixes a previous patch which disabled checksum offloading for both IPv4 and IPv6 packets. So L3 checksum offload was getting disabled for IPv4 pkts. And HW is dropping these pkts for some reason. Without this patch, IPv4 TSO appears to be broken: WIthout this patch I get ~16kbyte/s, with patch close to 2mbyte/s when copying files via scp from test box to my home workstation. Looking at tcpdump on sender it looks like hardware drops IPv4 TSO skbs. This patch restores performance for me, ipv6 looks good too. Fixes: fa6d7cb5d76c ("net: thunderx: Fix TCP/UDP checksum offload for IPv6 pkts") Cc: Sunil Goutham Cc: Aleksey Makarov Cc: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 8b2c31e2a2b0..a3d12dbde95b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1355,6 +1355,8 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, /* Offload checksum calculation to HW */ if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (ip.v4->version == 4) + hdr->csum_l3 = 1; /* Enable IP csum calculation */ hdr->l3_offset = skb_network_offset(skb); hdr->l4_offset = skb_transport_offset(skb); From cc10f8712bd688543602f3f3d052b13be4095695 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 Dec 2017 22:40:25 -0800 Subject: [PATCH 37/67] xen-netback: Fix logging message with spurious period after newline Using a period after a newline causes bad output. Signed-off-by: Joe Perches Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index d6dff347f896..78ebe494fef0 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -186,7 +186,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Obtain the queue to be used to transmit this packet */ index = skb_get_queue_mapping(skb); if (index >= num_queues) { - pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.", + pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n", index, vif->dev->name); index %= num_queues; } From 32d3e51a82d453762ef148b2c4fbc8a7ec374a88 Mon Sep 17 00:00:00 2001 From: Chris Dion Date: Wed, 6 Dec 2017 10:50:28 -0500 Subject: [PATCH 38/67] net_sched: use macvlan real dev trans_start in dev_trans_start() Macvlan devices are similar to vlans and do not update their own trans_start. In order for arp monitoring to work for a bond device when the slaves are macvlans, obtain its real device. Signed-off-by: Chris Dion Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3839cbbdc32b..cd1b200acae7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -277,6 +278,8 @@ unsigned long dev_trans_start(struct net_device *dev) if (is_vlan_dev(dev)) dev = vlan_dev_real_dev(dev); + else if (netif_is_macvlan(dev)) + dev = macvlan_dev_real_dev(dev); res = netdev_get_tx_queue(dev, 0)->trans_start; for (i = 1; i < dev->num_tx_queues; i++) { val = netdev_get_tx_queue(dev, i)->trans_start; From f3069c6d33f6ae63a1668737bc78aaaa51bff7ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Wed, 6 Dec 2017 17:18:28 +0100 Subject: [PATCH 39/67] rds: Fix NULL pointer dereference in __rds_rdma_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a fix for syzkaller719569, where memory registration was attempted without any underlying transport being loaded. Analysis of the case reveals that it is the setsockopt() RDS_GET_MR (2) and RDS_GET_MR_FOR_DEST (7) that are vulnerable. Here is an example stack trace when the bug is hit: BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0 IP: __rds_rdma_map+0x36/0x440 [rds] PGD 2f93d03067 P4D 2f93d03067 PUD 2f93d02067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: bridge stp llc tun rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rds binfmt_misc sb_edac intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul c rc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd iTCO_wdt mei_me sg iTCO_vendor_support ipmi_si mei ipmi_devintf nfsd shpchp pcspkr i2c_i801 ioatd ma ipmi_msghandler wmi lpc_ich mfd_core auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 mgag200 i2c_algo_bit drm_kms_helper ixgbe syscopyarea ahci sysfillrect sysimgblt libahci mdio fb_sys_fops ttm ptp libata sd_mod mlx4_core drm crc32c_intel pps_core megaraid_sas i2c_core dca dm_mirror dm_region_hash dm_log dm_mod CPU: 48 PID: 45787 Comm: repro_set2 Not tainted 4.14.2-3.el7uek.x86_64 #2 Hardware name: Oracle Corporation ORACLE SERVER X5-2L/ASM,MOBO TRAY,2U, BIOS 31110000 03/03/2017 task: ffff882f9190db00 task.stack: ffffc9002b994000 RIP: 0010:__rds_rdma_map+0x36/0x440 [rds] RSP: 0018:ffffc9002b997df0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff882fa2182580 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffc9002b997e40 RDI: ffff882fa2182580 RBP: ffffc9002b997e30 R08: 0000000000000000 R09: 0000000000000002 R10: ffff885fb29e3838 R11: 0000000000000000 R12: ffff882fa2182580 R13: ffff882fa2182580 R14: 0000000000000002 R15: 0000000020000ffc FS: 00007fbffa20b700(0000) GS:ffff882fbfb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c0 CR3: 0000002f98a66006 CR4: 00000000001606e0 Call Trace: rds_get_mr+0x56/0x80 [rds] rds_setsockopt+0x172/0x340 [rds] ? __fget_light+0x25/0x60 ? __fdget+0x13/0x20 SyS_setsockopt+0x80/0xe0 do_syscall_64+0x67/0x1b0 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7fbff9b117f9 RSP: 002b:00007fbffa20aed8 EFLAGS: 00000293 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00000000000c84a4 RCX: 00007fbff9b117f9 RDX: 0000000000000002 RSI: 0000400000000114 RDI: 000000000000109b RBP: 00007fbffa20af10 R08: 0000000000000020 R09: 00007fbff9dd7860 R10: 0000000020000ffc R11: 0000000000000293 R12: 0000000000000000 R13: 00007fbffa20b9c0 R14: 00007fbffa20b700 R15: 0000000000000021 Code: 41 56 41 55 49 89 fd 41 54 53 48 83 ec 18 8b 87 f0 02 00 00 48 89 55 d0 48 89 4d c8 85 c0 0f 84 2d 03 00 00 48 8b 87 00 03 00 00 <48> 83 b8 c0 00 00 00 00 0f 84 25 03 00 0 0 48 8b 06 48 8b 56 08 The fix is to check the existence of an underlying transport in __rds_rdma_map(). Signed-off-by: Håkon Bugge Reported-by: syzbot Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8886f15abe90..bc2f1e0977d6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, long i; int ret; - if (rs->rs_bound_addr == 0) { + if (rs->rs_bound_addr == 0 || !rs->rs_transport) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } From 30f1e59550363f6be28213393407ef225150e7fe Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 22:28:40 +0530 Subject: [PATCH 40/67] drivers: net: dsa: remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ea01f24f15e7..b62d47210db8 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include From 51ef7925e10688c57186d438e784532e063492e4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Nov 2017 17:57:04 +0200 Subject: [PATCH 41/67] brcmfmac: Avoid build error with make W=1 When I run make W=1 on gcc (Debian 7.2.0-16) 7.2.0 I got an error for the first run, all next ones are okay. CC [M] drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.o drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:2078: error: Cannot parse struct or union! scripts/Makefile.build:310: recipe for target 'drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.o' failed Seems like something happened with W=1 and wrong kernel doc format. As a quick fix remove dubious /** in the code. Signed-off-by: Andy Shevchenko Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index adf180f338ca..cdf9e4161592 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -2070,7 +2070,7 @@ static int brcmf_sdio_txpkt_hdalign(struct brcmf_sdio *bus, struct sk_buff *pkt) return head_pad; } -/** +/* * struct brcmf_skbuff_cb reserves first two bytes in sk_buff::cb for * bus layer usage. */ From 589bf32f09852041fbd3b7ce1a9e703f95c230ba Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Wed, 6 Dec 2017 15:23:23 +0100 Subject: [PATCH 42/67] net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case add appropriate calls to clk_disable_unprepare() by jumping to out_mdio in case orion_mdio_probe() returns -EPROBE_DEFER. Found by Linux Driver Verification project (linuxtesting.org). Fixes: 3d604da1e954 ("net: mvmdio: get and enable optional clock") Signed-off-by: Tobias Jordan Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvmdio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index c9798210fa0f..0495487f7b42 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -344,7 +344,8 @@ static int orion_mdio_probe(struct platform_device *pdev) dev->regs + MVMDIO_ERR_INT_MASK); } else if (dev->err_interrupt == -EPROBE_DEFER) { - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out_mdio; } if (pdev->dev.of_node) From e46772a6946a7d1f3fbbc1415871851d6651f1d4 Mon Sep 17 00:00:00 2001 From: Branislav Radocaj Date: Thu, 7 Dec 2017 00:07:38 +0100 Subject: [PATCH 43/67] net: ethernet: arc: fix error handling in emac_rockchip_probe If clk_set_rate() fails, we should disable clk before return. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Branislav Radocaj Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_rockchip.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index e278e3d96ee0..c6163874e4e7 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -220,9 +220,11 @@ static int emac_rockchip_probe(struct platform_device *pdev) /* RMII TX/RX needs always a rate of 25MHz */ err = clk_set_rate(priv->macclk, 25000000); - if (err) + if (err) { dev_err(dev, "failed to change mac clock rate (%d)\n", err); + goto out_clk_disable_macclk; + } } err = arc_emac_probe(ndev, interface); @@ -232,7 +234,8 @@ static int emac_rockchip_probe(struct platform_device *pdev) } return 0; - +out_clk_disable_macclk: + clk_disable_unprepare(priv->macclk); out_regulator_disable: if (priv->regulator) regulator_disable(priv->regulator); From 3d5fdba1842bdd2eef29364c660558cb4cbb3fe0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 7 Dec 2017 01:05:56 +0100 Subject: [PATCH 44/67] net: dsa: mv88e6xxx: Fix interrupt masking on removal When removing the interrupt handling code, we should mask the generation of interrupts. The code however unmasked all interrupts. This can then cause a new interrupt. We then get into a deadlock where the interrupt thread is waiting to run, and the code continues, trying to remove the interrupt handler, which means waiting for the thread to complete. On a UP machine this deadlocks. Fix so we really mask interrupts in the hardware. The same error is made in the error path when install the interrupt handling code. Fixes: 3460a5770ce9 ("net: dsa: mv88e6xxx: Mask g1 interrupts and free interrupt") Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 8171055fde7a..70004264f60d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -339,7 +339,7 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip) u16 mask; mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &mask); - mask |= GENMASK(chip->g1_irq.nirqs, 0); + mask &= ~GENMASK(chip->g1_irq.nirqs, 0); mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask); free_irq(chip->irq, chip); @@ -395,7 +395,7 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip) return 0; out_disable: - mask |= GENMASK(chip->g1_irq.nirqs, 0); + mask &= ~GENMASK(chip->g1_irq.nirqs, 0); mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask); out_mapping: From 3126aeec5313565bfa19e2dd8fd7e3c3390514cb Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 7 Dec 2017 01:05:57 +0100 Subject: [PATCH 45/67] net: dsa: mv88e6xxx: Unregister MDIO bus on error path The MDIO busses need to be unregistered before they are freed, otherwise BUG() is called. Add a call to the unregister code if the registration fails, since we can have multiple busses, of which some may correctly register before one fails. This requires moving the code around a little. Fixes: a3c53be55c95 ("net: dsa: mv88e6xxx: Support multiple MDIO busses") Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 70004264f60d..66d33e97cbc5 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2177,6 +2177,19 @@ static const struct of_device_id mv88e6xxx_mdio_external_match[] = { { }, }; +static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip) + +{ + struct mv88e6xxx_mdio_bus *mdio_bus; + struct mii_bus *bus; + + list_for_each_entry(mdio_bus, &chip->mdios, list) { + bus = mdio_bus->bus; + + mdiobus_unregister(bus); + } +} + static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, struct device_node *np) { @@ -2201,27 +2214,16 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, match = of_match_node(mv88e6xxx_mdio_external_match, child); if (match) { err = mv88e6xxx_mdio_register(chip, child, true); - if (err) + if (err) { + mv88e6xxx_mdios_unregister(chip); return err; + } } } return 0; } -static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip) - -{ - struct mv88e6xxx_mdio_bus *mdio_bus; - struct mii_bus *bus; - - list_for_each_entry(mdio_bus, &chip->mdios, list) { - bus = mdio_bus->bus; - - mdiobus_unregister(bus); - } -} - static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; From 74c4b656c3d92ec4c824ea1a4afd726b7b6568c8 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Wed, 6 Dec 2017 17:15:43 -0800 Subject: [PATCH 46/67] adding missing rcu_read_unlock in ipxip6_rcv commit 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") introduced new exit point in ipxip6_rcv. however rcu_read_unlock is missing there. this diff is fixing this v1->v2: instead of doing rcu_read_unlock in place, we are going to "drop" section (to prevent skb leakage) Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Signed-off-by: Nikita V. Shirokov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3d3092adf1d2..db84f523656d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -904,7 +904,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, if (t->parms.collect_md) { tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); if (!tun_dst) - return 0; + goto drop; } ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); From 6e237d099fac1f73a7b6d7287bb9191f29585a4e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 6 Dec 2017 20:09:12 -0800 Subject: [PATCH 47/67] netlink: Relax attr validation for fixed length types Commit 28033ae4e0f5 ("net: netlink: Update attr validation to require exact length for some types") requires attributes using types NLA_U* and NLA_S* to have an exact length. This change is exposing bugs in various userspace commands that are sending attributes with an invalid length (e.g., attribute has type NLA_U8 and userspace sends NLA_U32). While the commands are clearly broken and need to be fixed, users are arguing that the sudden change in enforcement is breaking older commands on newer kernels for use cases that otherwise "worked". Relax the validation to print a warning mesage similar to what is done for messages containing extra bytes after parsing. Fixes: 28033ae4e0f5 ("net: netlink: Update attr validation to require exact length for some types") Signed-off-by: David Ahern Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- lib/nlattr.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/nlattr.c b/lib/nlattr.c index 8bf78b4b78f0..dfa55c873c13 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -15,7 +15,11 @@ #include #include -/* for these data types attribute length must be exactly given size */ +/* For these data types, attribute length should be exactly the given + * size. However, to maintain compatibility with broken commands, if the + * attribute length does not match the expected size a warning is emitted + * to the user that the command is sending invalid data and needs to be fixed. + */ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), @@ -28,8 +32,16 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_U8] = sizeof(u8), + [NLA_U16] = sizeof(u16), + [NLA_U32] = sizeof(u32), + [NLA_U64] = sizeof(u64), [NLA_MSECS] = sizeof(u64), [NLA_NESTED] = NLA_HDRLEN, + [NLA_S8] = sizeof(s8), + [NLA_S16] = sizeof(s16), + [NLA_S32] = sizeof(s32), + [NLA_S64] = sizeof(s64), }; static int validate_nla_bitfield32(const struct nlattr *nla, @@ -69,11 +81,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); - /* for data types NLA_U* and NLA_S* require exact length */ - if (nla_attr_len[pt->type]) { - if (attrlen != nla_attr_len[pt->type]) - return -ERANGE; - return 0; + if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) { + pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", + current->comm, type); } switch (pt->type) { From 8632385022f2b05a6ca0b9e0f95575865de0e2ce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Dec 2017 11:08:19 -0800 Subject: [PATCH 48/67] tcp: use current time in tcp_rcv_space_adjust() When I switched rcv_rtt_est to high resolution timestamps, I forgot that tp->tcp_mstamp needed to be refreshed in tcp_rcv_space_adjust() Using an old timestamp leads to autotuning lags. Fixes: 645f4c6f2ebd ("tcp: switch rcv_rtt_est and rcvq_space to high resolution timestamps") Signed-off-by: Eric Dumazet Cc: Wei Wang Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 734cfc8ff76e..514c00732988 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -579,6 +579,7 @@ void tcp_rcv_space_adjust(struct sock *sk) int time; int copied; + tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) return; From a4abd7a80addb4a9547f7dfc7812566b60ec505c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 6 Dec 2017 20:21:24 +0100 Subject: [PATCH 49/67] usbnet: fix alignment for frames with no ethernet header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The qmi_wwan minidriver support a 'raw-ip' mode where frames are received without any ethernet header. This causes alignment issues because the skbs allocated by usbnet are "IP aligned". Fix by allowing minidrivers to disable the additional alignment offset. This is implemented using a per-device flag, since the same minidriver also supports 'ethernet' mode. Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode") Reported-and-tested-by: Jay Foster Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 ++ drivers/net/usb/usbnet.c | 5 ++++- include/linux/usb/usbnet.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c750cf7c042b..304ec6555cd8 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -261,9 +261,11 @@ static void qmi_wwan_netdev_setup(struct net_device *net) net->hard_header_len = 0; net->addr_len = 0; net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + set_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 80348b6a8646..d56fe32bf48d 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -484,7 +484,10 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) return -ENOLINK; } - skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); + if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags)) + skb = __netdev_alloc_skb(dev->net, size, flags); + else + skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); if (!skb) { netif_dbg(dev, rx_err, dev->net, "no rx skb\n"); usbnet_defer_kevent (dev, EVENT_RX_MEMORY); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index a69877734c4e..e2ec3582e549 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -82,6 +82,7 @@ struct usbnet { # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 +# define EVENT_NO_IP_ALIGN 13 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) From c7f33023308f3142433b7379718af5f0c2c322a6 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 10:34:03 -0800 Subject: [PATCH 50/67] can: mcba_usb: cancel urb on -EPROTO When we unplug the device, we can see both -EPIPE and -EPROTO depending on exact timing and what system we run on. If we continue to resubmit URBs, they will immediately fail, and they can cause stalls, especially on slower CPUs. Fix this by not resubmitting on -EPROTO, as we already do on -EPIPE. Signed-off-by: Martin Kelly Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index ef417dcddbf7..8d8c2086424d 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -593,6 +593,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) case -ENOENT: case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; From bd352e1adfe0d02d3ea7c8e3fb19183dc317e679 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:47 -0800 Subject: [PATCH 51/67] can: ems_usb: cancel urb on -EPIPE and -EPROTO In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b3d02759c226..b00358297424 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -288,6 +288,8 @@ static void ems_usb_read_interrupt_callback(struct urb *urb) case -ECONNRESET: /* unlink */ case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; From 7a31ced3de06e9878e4f9c3abe8f87d9344d8144 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:48 -0800 Subject: [PATCH 52/67] can: esd_usb2: cancel urb on -EPIPE and -EPROTO In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 9fdb0f0bfa06..c6dcf93675c0 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -393,6 +393,8 @@ static void esd_usb2_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; From 6aa8d5945502baf4687d80de59b7ac865e9e666b Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:49 -0800 Subject: [PATCH 53/67] can: kvaser_usb: cancel urb on -EPIPE and -EPROTO In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index f95945915d20..63587b8e6825 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1326,6 +1326,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) case 0: break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; default: From 12147edc434c9e4c7c2f5fee2e5519b2e5ac34ce Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Tue, 5 Dec 2017 11:15:50 -0800 Subject: [PATCH 54/67] can: usb_8dev: cancel urb on -EPIPE and -EPROTO In mcba_usb, we have observed that when you unplug the device, the driver will endlessly resubmit failing URBs, which can cause CPU stalls. This issue is fixed in mcba_usb by catching the codes seen on device disconnect (-EPIPE and -EPROTO). This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it in the same way. Signed-off-by: Martin Kelly Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index d000cb62d6ae..27861c417c94 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -524,6 +524,8 @@ static void usb_8dev_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; From 91785de6f94b58c3fb6664609e3682f011bd28d2 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 7 Dec 2017 16:13:43 +0100 Subject: [PATCH 55/67] can: peak/pcie_fd: fix potential bug in restarting tx queue Don't rely on can_get_echo_skb() return value to wake the network tx queue up: can_get_echo_skb() returns 0 if the echo array slot was not occupied, but also when the DLC of the released echo frame was 0. Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_canfd.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 85268be0c913..55513411a82e 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -258,21 +258,18 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, /* if this frame is an echo, */ if ((rx_msg_flags & PUCAN_MSG_LOOPED_BACK) && !(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) { - int n; unsigned long flags; spin_lock_irqsave(&priv->echo_lock, flags); - n = can_get_echo_skb(priv->ndev, msg->client); + can_get_echo_skb(priv->ndev, msg->client); spin_unlock_irqrestore(&priv->echo_lock, flags); /* count bytes of the echo instead of skb */ stats->tx_bytes += cf_len; stats->tx_packets++; - if (n) { - /* restart tx queue only if a slot is free */ - netif_wake_queue(priv->ndev); - } + /* restart tx queue (a slot is free) */ + netif_wake_queue(priv->ndev); return 0; } From d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 7 Dec 2017 13:41:34 -0800 Subject: [PATCH 56/67] tcp: invalidate rate samples during SACK reneging Mark tcp_sock during a SACK reneging event and invalidate rate samples while marked. Such rate samples may overestimate bw by including packets that were SACKed before reneging. < ack 6001 win 10000 sack 7001:38001 < ack 7001 win 0 sack 8001:38001 // Reneg detected > seq 7001:8001 // RTO, SACK cleared. < ack 38001 win 10000 In above example the rate sample taken after the last ack will count 7001-38001 as delivered while the actual delivery rate likely could be much lower i.e. 7001-8001. This patch adds a new field tcp_sock.sack_reneg and marks it when we declare SACK reneging and entering TCP_CA_Loss, and unmarks it after the last rate sample was taken before moving back to TCP_CA_Open. This patch also invalidates rate samples taken while tcp_sock.is_sack_reneg is set. Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Eric Dumazet Acked-by: Priyaranjan Jha Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 10 ++++++++-- net/ipv4/tcp_rate.c | 10 +++++++--- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index df5d97a85e1a..ca4a6361389b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,7 +224,8 @@ struct tcp_sock { rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ - unused:3; + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + unused:2; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, diff --git a/include/net/tcp.h b/include/net/tcp.h index 6998707e81f3..6da880d2f022 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1055,7 +1055,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs); + bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bf97317e6c97..f08eebe60446 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; tcp_clear_retrans(tp); inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 514c00732988..075c559570e6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1942,6 +1942,8 @@ void tcp_enter_loss(struct sock *sk) if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; + /* Mark SACK reneging until we recover from this loss event. */ + tp->is_sack_reneg = 1; } tcp_clear_all_retrans_hints(tp); @@ -2365,6 +2367,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) return true; } tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; return false; } @@ -2398,8 +2401,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - if (frto_undo || tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + } return true; } return false; @@ -3496,6 +3501,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct tcp_sacktag_state sack_state; struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; + bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; @@ -3612,7 +3618,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, sack_state.rate); + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 3330a370d306..c61240e43923 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs) + bool is_sack_reneg, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ - /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp) { + /* Return an invalid sample if no timing information is available or + * in recovery from loss with SACK reneging. Rate samples taken during + * a SACK reneging event may overestimate bw by including packets that + * were SACKed before the reneg. + */ + if (!rs->prior_mstamp || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; From b6b5e8a691185606dfffff3198c89e3b4fd9d4f6 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Thu, 7 Dec 2017 18:44:23 +0200 Subject: [PATCH 57/67] gianfar: Disable EEE autoneg by default This controller does not support EEE, but it may connect to a PHY which supports EEE and advertises EEE by default, while its link partner also advertises EEE. If this happens, the PHY enters low power mode when the traffic rate is low and causes packet loss. This patch disables EEE advertisement by default for any PHY that gianfar connects to, to prevent the above unwanted outcome. Signed-off-by: Shaohui Xie Tested-by: Yangbo Lu Signed-off-by: Claudiu Manoil Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 81a73af0df31..7f837006bb6a 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1792,6 +1792,7 @@ static int init_phy(struct net_device *dev) GFAR_SUPPORTED_GBIT : 0; phy_interface_t interface; struct phy_device *phydev; + struct ethtool_eee edata; priv->oldlink = 0; priv->oldspeed = 0; @@ -1816,6 +1817,10 @@ static int init_phy(struct net_device *dev) /* Add support for flow control, but don't advertise it by default */ phydev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause); + /* disable EEE autoneg, EEE not supported by eTSEC */ + memset(&edata, 0, sizeof(struct ethtool_eee)); + phy_ethtool_set_eee(phydev, &edata); + return 0; } From d4a7a8893d4cdbc89d79ac4aa704bf8d4b67b368 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 7 Dec 2017 17:18:58 +0000 Subject: [PATCH 58/67] sfc: pass valid pointers from efx_enqueue_unwind The bytes_compl and pkts_compl pointers passed to efx_dequeue_buffers cannot be NULL. Add a paranoid warning to check this condition and fix the one case where they were NULL. efx_enqueue_unwind() is called very rarely, during error handling. Without this fix it would fail with a NULL pointer dereference in efx_dequeue_buffer, with efx_enqueue_skb in the call stack. Fixes: e9117e5099ea ("sfc: Firmware-Assisted TSO version 2") Reported-by: Jarod Wilson Signed-off-by: Bert Kenward Tested-by: Jarod Wilson Acked-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 0ea7e16f2e6e..9937a2450e57 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -77,6 +77,7 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, } if (buffer->flags & EFX_TX_BUF_SKB) { + EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); (*pkts_compl)++; (*bytes_compl) += buffer->skb->len; dev_consume_skb_any((struct sk_buff *)buffer->skb); @@ -426,12 +427,14 @@ static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) { struct efx_tx_buffer *buffer; + unsigned int bytes_compl = 0; + unsigned int pkts_compl = 0; /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { --tx_queue->insert_count; buffer = __efx_tx_queue_get_insert_buffer(tx_queue); - efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); } } From c589e69b508d29ed8e644dfecda453f71c02ec27 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 7 Dec 2017 12:43:30 -0500 Subject: [PATCH 59/67] tcp_bbr: record "full bw reached" decision in new full_bw_reached bit This commit records the "full bw reached" decision in a new full_bw_reached bit. This is a pure refactor that does not change the current behavior, but enables subsequent fixes and improvements. In particular, this enables simple and clean fixes because the full_bw and full_bw_cnt can be unconditionally zeroed without worrying about forgetting that we estimated we filled the pipe in Startup. And it enables future improvements because multiple code paths can be used for estimating that we filled the pipe in Startup; any new code paths only need to set this bit when they think the pipe is full. Note that this fix intentionally reduces the width of the full_bw_cnt counter, since we have never used the most significant bit. Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 69ee877574d0..3089c956b9f9 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -110,7 +110,8 @@ struct bbr { u32 lt_last_lost; /* LT intvl start: tp->lost */ u32 pacing_gain:10, /* current gain for setting pacing rate */ cwnd_gain:10, /* current gain for setting cwnd */ - full_bw_cnt:3, /* number of rounds without large bw gains */ + full_bw_reached:1, /* reached full bw in Startup? */ + full_bw_cnt:2, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ has_seen_rtt:1, /* have we seen an RTT sample yet? */ unused_b:5; @@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk) { const struct bbr *bbr = inet_csk_ca(sk); - return bbr->full_bw_cnt >= bbr_full_bw_cnt; + return bbr->full_bw_reached; } /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ @@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk, return; } ++bbr->full_bw_cnt; + bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; } /* If pipe is probably full, drain the queue and then enter steady-state. */ @@ -850,6 +852,7 @@ static void bbr_init(struct sock *sk) bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; + bbr->full_bw_reached = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; bbr->cycle_mstamp = 0; From 2f6c498e4f15d27852c04ed46d804a39137ba364 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 7 Dec 2017 12:43:31 -0500 Subject: [PATCH 60/67] tcp_bbr: reset full pipe detection on loss recovery undo Fix BBR so that upon notification of a loss recovery undo BBR resets the full pipe detection (STARTUP exit) state machine. Under high reordering, reordering events can be interpreted as loss. If the reordering and spurious loss estimates are high enough, this could previously cause BBR to spuriously estimate that the pipe is full. Since spurious loss recovery means that our overall sending will have slowed down spuriously, this commit gives a flow more time to probe robustly for bandwidth and decide the pipe is really full. Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 3089c956b9f9..ab3ff14ea7f7 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -874,6 +874,10 @@ static u32 bbr_sndbuf_expand(struct sock *sk) */ static u32 bbr_undo_cwnd(struct sock *sk) { + struct bbr *bbr = inet_csk_ca(sk); + + bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + bbr->full_bw_cnt = 0; return tcp_sk(sk)->snd_cwnd; } From 600647d467c6d04b3954b41a6ee1795b5ae00550 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 7 Dec 2017 12:43:32 -0500 Subject: [PATCH 61/67] tcp_bbr: reset long-term bandwidth sampling on loss recovery undo Fix BBR so that upon notification of a loss recovery undo BBR resets long-term bandwidth sampling. Under high reordering, reordering events can be interpreted as loss. If the reordering and spurious loss estimates are high enough, this can cause BBR to spuriously estimate that we are seeing loss rates high enough to trigger long-term bandwidth estimation. To avoid that problem, this commit resets long-term bandwidth sampling on loss recovery undo events. Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index ab3ff14ea7f7..8322f26e770e 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -878,6 +878,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ bbr->full_bw_cnt = 0; + bbr_reset_lt_bw_sampling(sk); return tcp_sk(sk)->snd_cwnd; } From 2edbdb3159d6f6bd3a9b6e7f789f2b879699a519 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 8 Dec 2017 09:05:26 -0800 Subject: [PATCH 62/67] bnxt_en: Fix sources of spurious netpoll warnings After applying 2270bc5da3497945 ("bnxt_en: Fix netpoll handling") and 903649e718f80da2 ("bnxt_en: Improve -ENOMEM logic in NAPI poll loop."), we still see the following WARN fire: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1875170 at net/core/netpoll.c:165 netpoll_poll_dev+0x15a/0x160 bnxt_poll+0x0/0xd0 exceeded budget in poll Call Trace: [] dump_stack+0x4d/0x70 [] __warn+0xd3/0xf0 [] warn_slowpath_fmt+0x4f/0x60 [] netpoll_poll_dev+0x15a/0x160 [] netpoll_send_skb_on_dev+0x168/0x250 [] netpoll_send_udp+0x2dc/0x440 [] write_ext_msg+0x20e/0x250 [] call_console_drivers.constprop.23+0xa5/0x110 [] console_unlock+0x339/0x5b0 [] vprintk_emit+0x2c8/0x450 [] vprintk_default+0x1f/0x30 [] printk+0x48/0x50 [] edac_raw_mc_handle_error+0x563/0x5c0 [edac_core] [] edac_mc_handle_error+0x42b/0x6e0 [edac_core] [] sbridge_mce_output_error+0x410/0x10d0 [sb_edac] [] sbridge_check_error+0xac/0x130 [sb_edac] [] edac_mc_workq_function+0x3c/0x90 [edac_core] [] process_one_work+0x19b/0x480 [] worker_thread+0x6a/0x520 [] kthread+0xe4/0x100 [] ret_from_fork+0x22/0x40 This happens because we increment rx_pkts on -ENOMEM and -EIO, resulting in rx_pkts > 0. Fix this by only bumping rx_pkts if we were actually given a non-zero budget. Signed-off-by: Calvin Owens Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 28f5e94274ee..61ca4eb7c6fa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1883,7 +1883,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) * here forever if we consistently cannot allocate * buffers. */ - else if (rc == -ENOMEM) + else if (rc == -ENOMEM && budget) rx_pkts++; else if (rc == -EBUSY) /* partial completion */ break; @@ -1969,7 +1969,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR); rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); - if (likely(rc == -EIO)) + if (likely(rc == -EIO) && budget) rx_pkts++; else if (rc == -EBUSY) /* partial completion */ break; From 0ce294d88457bccd7f9991f883fec80022a1ddbd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:30 -0800 Subject: [PATCH 63/67] tcp: correctly test congestion state in RACK RACK does not test the loss recovery state correctly to compute the reordering window. It assumes if lost_out is zero then TCP is not in loss recovery. But it can be zero during recovery before calling tcp_rack_detect_loss(): when an ACK acknowledges all packets marked lost before receiving this ACK, but has not yet to discover new ones by tcp_rack_detect_loss(). The fix is to simply test the congestion state directly. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index d3ea89020c69..3143664902e9 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) * to queuing or delayed ACKs. */ reo_wnd = 1000; - if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { + if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) && + min_rtt != ~0U) { reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); reo_wnd = min(reo_wnd, tp->srtt_us >> 3); } From cd1fc85b4399d47e3d6626301741ba8c38cd475a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:31 -0800 Subject: [PATCH 64/67] tcp: always evaluate losses in RACK upon undo When sender detects spurious retransmission, all packets marked lost are remarked to be in-flight. However some may be considered lost based on its timestamps in RACK. This patch forces RACK to re-evaluate, which may be skipped previously if the ACK does not advance RACK timestamp. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075c559570e6..9550cc42de2d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2329,6 +2329,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) } tp->snd_cwnd_stamp = tcp_jiffies32; tp->undo_marker = 0; + tp->rack.advanced = 1; /* Force RACK to re-exam losses */ } static inline bool tcp_may_undo(const struct tcp_sock *tp) From 428aec5e69fa17d223e1495f395833c50770f7ae Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:32 -0800 Subject: [PATCH 65/67] tcp: fix off-by-one bug in RACK RACK should mark a packet lost when remaining wait time is zero. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 3143664902e9..0c182303e62e 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -80,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) */ remaining = tp->rack.rtt_us + reo_wnd - tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); - if (remaining < 0) { + if (remaining <= 0) { tcp_rack_mark_skb_lost(sk, skb); list_del_init(&skb->tcp_tsorted_anchor); } else { - /* Record maximum wait time (+1 to avoid 0) */ - *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); + /* Record maximum wait time */ + *reo_timeout = max_t(u32, *reo_timeout, remaining); } } } From 6065fd0d179b96ddc488c76542349bcb148a95fd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:33 -0800 Subject: [PATCH 66/67] tcp: evaluate packet losses upon RTT change RACK skips an ACK unless it advances the most recently delivered TX timestamp (rack.mstamp). Since RACK also uses the most recent RTT to decide if a packet is lost, RACK should still run the loss detection whenever the most recent RTT changes. For example, an ACK that does not advance the timestamp but triggers the cwnd undo due to reordering, would then use the most recent (higher) RTT measurement to detect further losses. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 0c182303e62e..3a81720ac0c4 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -117,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, { u32 rtt_us; - if (tp->rack.mstamp && - !tcp_rack_sent_after(xmit_time, tp->rack.mstamp, - end_seq, tp->rack.end_seq)) - return; - rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); - if (sacked & TCPCB_RETRANS) { + if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior * retransmission) was sacked. @@ -134,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, * so it's at least one RTT (i.e., retransmission is at least * an RTT later). */ - if (rtt_us < tcp_min_rtt(tp)) - return; + return; } - tp->rack.rtt_us = rtt_us; - tp->rack.mstamp = xmit_time; - tp->rack.end_seq = end_seq; tp->rack.advanced = 1; + tp->rack.rtt_us = rtt_us; + if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp, + end_seq, tp->rack.end_seq)) { + tp->rack.mstamp = xmit_time; + tp->rack.end_seq = end_seq; + } } /* We have waited long enough to accommodate reordering. Mark the expired From 8a7b741e76cd31b6000636f0391e67ba6793ad1c Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 8 Dec 2017 10:24:20 +0100 Subject: [PATCH 67/67] net: mvpp2: fix the RSS table entry offset The macro used to access or set an RSS table entry was using an offset of 8, while it should use an offset of 0. This lead to wrongly configure the RSS table, not accessing the right entries. Fixes: 1d7d15d79fb4 ("net: mvpp2: initialize the RSS tables") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index fed2b2f909fc..634b2f41cc9e 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -85,7 +85,7 @@ /* RSS Registers */ #define MVPP22_RSS_INDEX 0x1500 -#define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) ((idx) << 8) +#define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) (idx) #define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8) #define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16) #define MVPP22_RSS_TABLE_ENTRY 0x1508