From 0608d4dbaf4ee30a33def4c31b78bc664c80ff79 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 17 Jan 2018 17:39:07 +0200 Subject: [PATCH 01/15] net/mlx5e: Unify slow PCI heuristic Get the link/pci speed query and logic into a single function. Unify the heuristics and use a single PCI threshold (16G) for all. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 31 +++++++++---------- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1d36d7569f44..46707826f27e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3902,16 +3902,20 @@ static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) return 0; } -static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) +static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) { - return (link_speed && pci_bw && - (pci_bw < 40000) && (pci_bw < link_speed)); -} + u32 link_speed = 0; + u32 pci_bw = 0; -static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) -{ - return !(link_speed && pci_bw && - (pci_bw <= 16000) && (pci_bw < link_speed)); + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + +#define MLX5E_SLOW_PCI_RATIO (2) + + return link_speed && pci_bw && + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -3980,17 +3984,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, u16 max_channels) { u8 cq_period_mode = 0; - u32 link_speed = 0; - u32 pci_bw = 0; params->num_channels = max_channels; params->num_tc = 1; - mlx5e_get_max_linkspeed(mdev, &link_speed); - mlx5e_get_pci_bw(mdev, &pci_bw); - mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@ -4000,7 +3997,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && MLX5_CAP_GEN(mdev, vport_group_manager)) - params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); + params->rx_cqe_compress_def = slow_pci_heuristic(mdev); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); @@ -4011,7 +4008,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - params->lro_en = hw_lro_heuristic(link_speed, pci_bw); + params->lro_en = !slow_pci_heuristic(mdev); params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 4e25f2b2e0bc..7d001fe6e631 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -50,6 +50,11 @@ extern uint mlx5_core_debug_mask; __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_dbg_once(__dev, format, ...) \ + dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + #define mlx5_core_dbg_mask(__dev, mask, format, ...) \ do { \ if ((mask) & mlx5_core_debug_mask) \ From 291f445eab9b2b11c8dd05ae1a0943c328cb9b6b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 11 Feb 2018 11:58:30 +0200 Subject: [PATCH 02/15] net/mlx5e: Disable Striding RQ when PCI is slower than link We turn the feature off for servers with PCI BW bounded by a threshold (16G) and lower than MAX LINK BW. This improves the effectiveness of CQE compression feature, that is defaulted to ON for the same case. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 46707826f27e..d4dd00089eb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -113,13 +113,16 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } +static bool slow_pci_heuristic(struct mlx5_core_dev *mdev); + static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && - !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + !slow_pci_heuristic(mdev) && + !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; mlx5e_init_rq_type_params(mdev, params, rq_type); } From 73faf36c5ae6b300490f0a5f2835fa702adef0e1 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 21 Nov 2017 17:43:50 +0200 Subject: [PATCH 03/15] net/mlx5e: Remove unused define MLX5_MPWRQ_STRIDES_PER_PAGE Clean it up as it's not in use. Fixes: d9d9f156f380 ("net/mlx5e: Expand WQE stride when CQE compression is enabled") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 294bc9f175a5..85767f0869d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -93,8 +93,6 @@ #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) -#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ - MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) #define MLX5E_REQUIRED_MTTS(wqes) \ From bd658dda423781dbe775a2d87d662a3145ec05a6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 27 Nov 2017 10:35:12 +0200 Subject: [PATCH 04/15] net/mlx5e: Separate dma base address and offset in dma_sync call Pass the base dma address and offset to dma_sync_single_range_for_cpu(), instead of doing the pre-calculation. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8cce90dc461d..ffcbe5c3818a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -870,10 +870,8 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - dma_sync_single_range_for_cpu(rq->pdev, - di->addr + wi->offset, - 0, frag_size, - DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, + frag_size, DMA_FROM_DEVICE); prefetch(data); wi->offset += frag_size; From 24fd07abfa3584b91f65002956d3d5a5db169185 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Feb 2018 18:27:06 +0200 Subject: [PATCH 05/15] net/mlx5e: Use no-offset function in skb header copy In copying skb header to skb->data, replace the call to skb_copy_to_linear_data_offset() with a zero offset with the call to the no-offset function skb_copy_to_linear_data(). Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ffcbe5c3818a..781b8f21d6d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -333,9 +333,8 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, len = ALIGN(headlen_pg, sizeof(long)); dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data_offset(skb, 0, - page_address(dma_info->page) + offset, - len); + skb_copy_to_linear_data(skb, page_address(dma_info->page) + offset, len); + if (unlikely(offset + headlen > PAGE_SIZE)) { dma_info++; headlen_pg = len; From f1e4fc9b4b02ec728b06ad09d6cdf5a9bb7ec372 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 7 Feb 2018 13:21:30 +0200 Subject: [PATCH 06/15] net/mlx5e: Remove RQ MPWQE fields from params Introduce functions to calculate them when needed. They can be derived from other params. This will simplify transition between RQ configurations. In general, any parameter that is not explicitly set or controlled, but derived from other parameters, should not have a control-path field itself, but a getter function. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 +++- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 13 ++----- .../net/ethernet/mellanox/mlx5/core/en_main.c | 38 +++++++++++++------ 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 85767f0869d8..ba7f1ceb6dcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -231,8 +231,6 @@ struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; u16 rq_headroom; - u8 mpwqe_log_stride_sz; - u8 mpwqe_log_num_strides; u8 log_rq_size; u16 num_channels; u8 num_tc; @@ -840,6 +838,11 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); + void mlx5e_update_stats(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d415e67b557b..234b5b2ebf0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -231,8 +231,8 @@ static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_wqe; - stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; - num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; + stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params); + num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params); wqe_size = stride_size * num_strides; packets_per_wqe = wqe_size / @@ -252,8 +252,8 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_packets; - stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; - num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; + stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params); + num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params); wqe_size = stride_size * num_strides; num_packets = (1 << order_base_2(num_packets)); @@ -1561,11 +1561,6 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); - new_channels.params.mpwqe_log_stride_sz = - MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val); - new_channels.params.mpwqe_log_num_strides = - MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d4dd00089eb1..65e6955713e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -78,6 +78,20 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return MLX5E_MPWQE_STRIDE_SZ(mdev, + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); +} + +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return MLX5_MPWRQ_LOG_WQE_SZ - + mlx5e_mpwqe_get_log_stride_size(mdev, params); +} + void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type) { @@ -88,10 +102,6 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev, - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); - params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - params->mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ params->log_rq_size = is_kdump_kernel() ? @@ -109,7 +119,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, BIT(params->log_rq_size), - BIT(params->mpwqe_log_stride_sz), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)), MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } @@ -453,8 +463,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz; - rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params); + rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params)); byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; @@ -1745,13 +1755,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_rq_param *param) { + struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9); - MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6); + MLX5_SET(wq, wq, log_wqe_num_of_strides, + mlx5e_mpwqe_get_log_num_strides(mdev, params) - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, + mlx5e_mpwqe_get_log_stride_size(mdev, params) - 6); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ @@ -1761,12 +1774,12 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, params->log_rq_size); - MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); param->wq.linear = 1; } @@ -1825,7 +1838,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides; + log_cq_size = params->log_rq_size + + mlx5e_mpwqe_get_log_num_strides(priv->mdev, params); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ log_cq_size = params->log_rq_size; From b0cedc844c00991d323c13b82d651d372dcbbb12 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 7 Feb 2018 13:28:35 +0200 Subject: [PATCH 07/15] net/mlx5e: Remove rq_headroom field from params It can be derived from other params, calculate it via the dedicated function when needed. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - .../net/ethernet/mellanox/mlx5/core/en_main.c | 20 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ba7f1ceb6dcd..ff9aeda186a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -230,7 +230,6 @@ enum mlx5e_priv_flag { struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; - u16 rq_headroom; u8 log_rq_size; u16 num_channels; u8 num_tc; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65e6955713e7..4907b7bb08e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -92,6 +92,19 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, mlx5e_mpwqe_get_log_stride_size(mdev, params); } +static u16 mlx5e_get_rq_headroom(struct mlx5e_params *params) +{ + u16 linear_rq_headroom = params->xdp_prog ? + XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; + + linear_rq_headroom += NET_IP_ALIGN; + + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST) + return linear_rq_headroom; + + return 0; +} + void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type) { @@ -107,12 +120,9 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - params->rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - params->rq_headroom += NET_IP_ALIGN; /* Extra room needed for build_skb */ - params->lro_wqe_sz -= params->rq_headroom + + params->lro_wqe_sz -= mlx5e_get_rq_headroom(params) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } @@ -441,7 +451,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->buff.headroom = params->rq_headroom; + rq->buff.headroom = mlx5e_get_rq_headroom(params); switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: From 2a0f561bf81c13d36910c7312ac8c67f83320a07 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 18 Feb 2018 11:37:06 +0200 Subject: [PATCH 08/15] net/mlx5e: Do not reset Receive Queue params on every type change Do not implicit a call to mlx5e_init_rq_type_params() upon every change in RQ type. It should be called only on channels creation. Fixes: 2fc4bfb7250d ("net/mlx5e: Dynamic RQ type infrastructure") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++-------- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ff9aeda186a1..45d0c64e77e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -918,8 +918,7 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - u8 rq_type); + struct mlx5e_params *params); static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4907b7bb08e0..ffe3b2469032 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -106,9 +106,8 @@ static u16 mlx5e_get_rq_headroom(struct mlx5e_params *params) } void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u8 rq_type) + struct mlx5e_params *params) { - params->rq_wq_type = rq_type; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -135,15 +134,14 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, static bool slow_pci_heuristic(struct mlx5_core_dev *mdev); -static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) +static void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && + params->rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && !slow_pci_heuristic(mdev) && !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; - mlx5e_init_rq_type_params(mdev, params, rq_type); } static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -3736,7 +3734,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) bpf_prog_put(old_prog); if (reset) /* change RQ type according to priv->xdp_prog */ - mlx5e_set_rq_params(priv->mdev, &priv->channels.params); + mlx5e_set_rq_type(priv->mdev, &priv->channels.params); if (was_opened && reset) mlx5e_open_locked(netdev); @@ -4029,7 +4027,8 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); /* RQ */ - mlx5e_set_rq_params(mdev, params); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); /* HW LRO */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index f953378bd13d..870584a07c48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -56,7 +56,8 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ - mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); + params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + mlx5e_init_rq_type_params(mdev, params); /* RQ size in ipoib by default is 512 */ params->log_rq_size = is_kdump_kernel() ? From 2ccb0a79018c9fafa913654163adc9dbac1280c5 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 7 Feb 2018 14:51:45 +0200 Subject: [PATCH 09/15] net/mlx5e: Add ethtool priv-flag for Striding RQ Add a control private flag in ethtool to enable/disable Striding RQ feature. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 ++++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 38 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 22 +++++++---- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 +- 4 files changed, 61 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 45d0c64e77e5..13dd7a97ae04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -205,12 +205,14 @@ static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { "rx_cqe_moder", "tx_cqe_moder", "rx_cqe_compress", + "rx_striding_rq", }; enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), + MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ @@ -827,6 +829,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); + void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); @@ -917,6 +923,7 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 234b5b2ebf0f..7bfe17b7c279 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1598,6 +1598,38 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, return 0; } +static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err; + + if (enable) { + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return -EOPNOTSUPP; + if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params)) + return -EINVAL; + } + + new_channels.params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); + mlx5e_set_rq_type(mdev, &new_channels.params); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + static int mlx5e_handle_pflag(struct net_device *netdev, u32 wanted_flags, enum mlx5e_priv_flag flag, @@ -1643,6 +1675,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_RX_CQE_COMPRESS, set_pflag_rx_cqe_compress); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_STRIDING_RQ, + set_pflag_rx_striding_rq); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ffe3b2469032..7610a7916e96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -71,7 +71,7 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { return MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && @@ -132,14 +132,17 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static bool slow_pci_heuristic(struct mlx5_core_dev *mdev); - -static void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { - params->rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && - !slow_pci_heuristic(mdev) && - !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? + return mlx5e_check_fragmented_striding_rq_cap(mdev) && + !params->xdp_prog && !MLX5_IPSEC_DEV(mdev); +} + +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; } @@ -4027,6 +4030,9 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); /* RQ */ + if (mlx5e_striding_rq_possible(mdev, params)) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, + !slow_pci_heuristic(mdev)); mlx5e_set_rq_type(mdev, params); mlx5e_init_rq_type_params(mdev, params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 870584a07c48..a35608faf8d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -56,7 +56,8 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ - params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false); + mlx5e_set_rq_type(mdev, params); mlx5e_init_rq_type_params(mdev, params); /* RQ size in ipoib by default is 512 */ From c4554fbccaa3306f65954ed0f1dab7abce7889f8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 21 Jan 2018 10:52:17 +0200 Subject: [PATCH 10/15] net/mlx5e: Remove unused max inline related code Commit 58d522912ac7 ("net/mlx5e: Support TX packet copy into WQE") introduced the max inline WQE as an ethtool tunable. One commit later, that functionality was made dependent on BlueFlame. Commit 6982ab609768 ("net/mlx5e: Xmit, no write combining") removed BlueFlame support, and with it the max inline WQE. This patch cleans up the leftovers from the removed feature. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 -- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 32 ++----------------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 11 ------- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 1 - 4 files changed, 2 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 13dd7a97ae04..6898f5e26006 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -240,7 +240,6 @@ struct mlx5e_params { struct net_dim_cq_moder tx_cq_moderation; bool lro_en; u32 lro_wqe_sz; - u16 tx_max_inline; u8 tx_min_inline_mode; u8 rss_hfunc; u8 toeplitz_hash_key[40]; @@ -366,7 +365,6 @@ struct mlx5e_txqsq { void __iomem *uar_map; struct netdev_queue *txq; u32 sqn; - u16 max_inline; u8 min_inline_mode; u16 edge; struct device *pdev; @@ -1017,7 +1015,6 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); #endif -u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in, int inlen); void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7bfe17b7c279..c57c929d7973 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1118,13 +1118,9 @@ static int mlx5e_get_tunable(struct net_device *dev, const struct ethtool_tunable *tuna, void *data) { - const struct mlx5e_priv *priv = netdev_priv(dev); - int err = 0; + int err; switch (tuna->id) { - case ETHTOOL_TX_COPYBREAK: - *(u32 *)data = priv->channels.params.tx_max_inline; - break; case ETHTOOL_PFC_PREVENTION_TOUT: err = mlx5e_get_pfc_prevention_tout(dev, data); break; @@ -1141,35 +1137,11 @@ static int mlx5e_set_tunable(struct net_device *dev, const void *data) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; - int err = 0; - u32 val; + int err; mutex_lock(&priv->state_lock); switch (tuna->id) { - case ETHTOOL_TX_COPYBREAK: - val = *(u32 *)data; - if (val > mlx5e_get_max_inline_cap(mdev)) { - err = -EINVAL; - break; - } - - new_channels.params = priv->channels.params; - new_channels.params.tx_max_inline = val; - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - break; - } - - err = mlx5e_open_channels(priv, &new_channels); - if (err) - break; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - - break; case ETHTOOL_PFC_PREVENTION_TOUT: err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7610a7916e96..5d8eb0a9c0f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -993,7 +993,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->channel = c; sq->txq_ix = txq_ix; sq->uar_map = mdev->mlx5e_res.bfreg.map; - sq->max_inline = params->tx_max_inline; sq->min_inline_mode = params->tx_min_inline_mode; if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); @@ -3882,15 +3881,6 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) return 0; } -u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) -{ - int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; - - return bf_buf_size - - sizeof(struct mlx5e_tx_wqe) + - 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; -} - void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels) { @@ -4052,7 +4042,6 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_set_tx_cq_mode_params(params, cq_period_mode); /* TX inline */ - params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index ea4b255380a2..dd32f3e390ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -884,7 +884,6 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->num_tc = 1; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; From bfc647d52e67dc756c605e9a50d45b71054c2533 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 16 Jan 2018 17:25:06 +0200 Subject: [PATCH 11/15] net/mlx5e: Move all TX timeout logic to be under state lock Driver callback for handling TX timeout should access some internal resources (SQ, CQ) in order to decide if the tx timeout work should be scheduled. These resources might be unavailable if channels are closed in parallel (ifdown for example). The state lock is the mechanism to protect from such races. Move all TX timeout logic to be in the work under a state lock. In addition, Move the work from the global WQ to mlx5e WQ to make sure this work is flushed when device is detached.. Also, move the mlx5e_tx_timeout_work code to be next to the TX timeout NDO for better code locality. Fixes: 3947ca185999 ("net/mlx5e: Implement ndo_tx_timeout callback") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 61 +++++++++++-------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5d8eb0a9c0f0..e0b75f52d556 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -177,26 +177,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -static void mlx5e_tx_timeout_work(struct work_struct *work) -{ - struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, - tx_timeout_work); - int err; - - rtnl_lock(); - mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; - mlx5e_close_locked(priv->netdev); - err = mlx5e_open_locked(priv->netdev); - if (err) - netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", - err); -unlock: - mutex_unlock(&priv->state_lock); - rtnl_unlock(); -} - void mlx5e_update_stats(struct mlx5e_priv *priv) { int i; @@ -3658,13 +3638,19 @@ static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, return true; } -static void mlx5e_tx_timeout(struct net_device *dev) +static void mlx5e_tx_timeout_work(struct work_struct *work) { - struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + struct net_device *dev = priv->netdev; bool reopen_channels = false; - int i; + int i, err; - netdev_err(dev, "TX timeout detected\n"); + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); @@ -3672,7 +3658,9 @@ static void mlx5e_tx_timeout(struct net_device *dev) if (!netif_xmit_stopped(dev_queue)) continue; - netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + + netdev_err(dev, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - dev_queue->trans_start)); @@ -3685,8 +3673,27 @@ static void mlx5e_tx_timeout(struct net_device *dev) } } - if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state)) - schedule_work(&priv->tx_timeout_work); + if (!reopen_channels) + goto unlock; + + mlx5e_close_locked(dev); + err = mlx5e_open_locked(dev); + if (err) + netdev_err(priv->netdev, + "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); + +unlock: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); +} + +static void mlx5e_tx_timeout(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + netdev_err(dev, "TX timeout detected\n"); + queue_work(priv->wq, &priv->tx_timeout_work); } static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) From 2816077127230ef52cc7497903e71def45747611 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 26 Dec 2017 15:17:05 +0200 Subject: [PATCH 12/15] mlx5_{ib,core}: Add query SQ state helper function Move query SQ state function from mlx5_ib to mlx5_core in order to have it in shared code. It will be used in a downstream patch from mlx5e. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/qp.c | 14 +---------- .../ethernet/mellanox/mlx5/core/transobj.c | 25 +++++++++++++++++++ include/linux/mlx5/transobj.h | 1 + 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 85c612ac547a..0d0b0b8dad98 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4739,26 +4739,14 @@ static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u8 *sq_state) { - void *out; - void *sqc; - int inlen; int err; - inlen = MLX5_ST_SZ_BYTES(query_sq_out); - out = kvzalloc(inlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out); + err = mlx5_core_query_sq_state(dev->mdev, sq->base.mqp.qpn, sq_state); if (err) goto out; - - sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); - *sq_state = MLX5_GET(sqc, sqc, state); sq->state = *sq_state; out: - kvfree(out); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 9e38343a951f..c64957b5ef47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -157,6 +157,31 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) } EXPORT_SYMBOL(mlx5_core_query_sq); +int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state) +{ + void *out; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_sq_out); + out = kvzalloc(inlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_sq(dev, sqn, out); + if (err) + goto out; + + sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); + *state = MLX5_GET(sqc, sqc, state); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state); + int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) { diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 7e8f281f8c00..80d7aa8b2831 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -47,6 +47,7 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); +int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, From 1acae6b030164217b9c6a52245eade730057152b Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 31 Dec 2017 12:55:26 +0200 Subject: [PATCH 13/15] mlx5: Move dump error CQE function out of mlx5_ib for code sharing Move mlx5_ib dump error CQE implementation to mlx5 CQ header file in order to use it in a downstream patch from mlx5e. In addition, use print_hex_dump instead of manual dumping of the buffer. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/cq.c | 8 +------- include/linux/mlx5/cq.h | 6 ++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 94a27d89a303..77d257ec899b 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -267,14 +267,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) { - __be32 *p = (__be32 *)cqe; - int i; - mlx5_ib_warn(dev, "dump error cqe\n"); - for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) - pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), - be32_to_cpu(p[1]), be32_to_cpu(p[2]), - be32_to_cpu(p[3])); + mlx5_dump_err_cqe(dev->mdev, cqe); } static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 445ad194e0fe..0ef6138eca49 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -193,6 +193,12 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count); +static inline void mlx5_dump_err_cqe(struct mlx5_core_dev *dev, + struct mlx5_err_cqe *err_cqe) +{ + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, + sizeof(*err_cqe), false); +} int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); From 16cc14d817338fc297970d2d9d146c88ec87474d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 9 Jan 2018 16:21:16 +0200 Subject: [PATCH 14/15] net/mlx5e: Dump xmit error completions Monitor and dump xmit error completions. In addition, add err_cqe counter to track the number of error completion per send queue. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_stats.c | 3 +++ .../ethernet/mellanox/mlx5/core/en_stats.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/en_tx.c | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index c0dab9a8969e..ad91d9de0240 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -60,6 +60,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, @@ -153,6 +154,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; + s->tx_cqe_err += sq_stats->cqe_err; s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; s->tx_csum_none += sq_stats->csum_none; @@ -1103,6 +1105,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, }; static const struct counter_desc ch_stats_desc[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 43a72efa28c0..43dc808684c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -78,6 +78,7 @@ struct mlx5e_sw_stats { u64 tx_queue_wake; u64 tx_queue_dropped; u64 tx_xmit_more; + u64 tx_cqe_err; u64 rx_wqe_err; u64 rx_mpwqe_filler; u64 rx_buff_alloc_err; @@ -197,6 +198,7 @@ struct mlx5e_sq_stats { u64 stopped; u64 wake; u64 dropped; + u64 cqe_err; }; struct mlx5e_ch_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 11b4f1089d1c..88b5b7bfc9a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -417,6 +417,18 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb, wqe, pi); } +static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, + struct mlx5_err_cqe *err_cqe) +{ + u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq); + + netdev_err(sq->channel->netdev, + "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome, + err_cqe->vendor_err_synd); + mlx5_dump_err_cqe(sq->cq.mdev, err_cqe); +} + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_txqsq *sq; @@ -456,6 +468,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); + if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { + if (!sq->stats.cqe_err) + mlx5e_dump_error_cqe(sq, + (struct mlx5_err_cqe *)cqe); + sq->stats.cqe_err++; + } + do { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; From db75373c91b0cfb6a68ad6ae88721e4e21ae6261 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 26 Dec 2017 16:02:24 +0200 Subject: [PATCH 15/15] net/mlx5e: Recover Send Queue (SQ) from error state An error TX completion (CQE) which arrived on a specific SQ indicates that this SQ got moved by the hardware to error state, which means all pending and incoming TX requests are dropped or will be dropped and no further "Good" CQEs will be generated for that SQ. Before this patch TX completions (CQEs) were not monitored and were handled as a regular CQE. This caused the SQ to stay in an error state, making it useless for xmiting new packets. Mitigation plan: In case of an error completion, schedule a recovery work which would do the following: - Mark the TXQ as DRV_XOFF to disable new packets to arrive from the stack - NAPI to flush all pending SQ WQEs (via flush_in_error_en bit) to release SW and HW resources(SKB, DMA, etc) and have the SQ and CQ consumer/producer indices synced. - Modify the SQ state ERR -> RST -> RDY (restart the SQ). - Reactivate the SQ and reset SQ cc and pc If we identify two consecutive requests for SQ recover in less than 500 msecs, drop the recover request to avoid CPU overload, as this scenario most likely happened due to a severe repeated bug. In addition, add SQ recover SW counter to monitor successful recoveries. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 115 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.c | 3 + .../ethernet/mellanox/mlx5/core/en_stats.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_tx.c | 10 +- 5 files changed, 134 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6898f5e26006..353ac6daa3dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -122,6 +122,7 @@ #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ +#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ #define MLX5E_ICOSQ_MAX_WQEBBS \ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) @@ -332,6 +333,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, }; @@ -378,6 +380,10 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; + struct mlx5e_txqsq_recover { + struct work_struct recover_work; + u64 last_recover; + } recover; } ____cacheline_aligned_in_smp; struct mlx5e_xdpsq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e0b75f52d556..1b48dec67abf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -956,6 +956,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } +static void mlx5e_sq_recover(struct work_struct *work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -974,6 +975,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->txq_ix = txq_ix; sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; + INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); @@ -1040,6 +1042,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); @@ -1158,9 +1161,20 @@ err_free_txqsq: return err; } +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); @@ -1205,6 +1219,107 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static void mlx5e_sq_recover(struct work_struct *work) +{ + struct mlx5e_txqsq_recover *recover = + container_of(work, struct mlx5e_txqsq_recover, + recover_work); + struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, + recover); + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return; + } + + if (state != MLX5_RQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return; + } + + netif_tx_disable_queue(sq->txq); + + if (mlx5e_wait_for_sq_flush(sq)) + return; + + /* If the interval between two consecutive recovers per SQ is too + * short, don't recover to avoid infinite loop of ERR_CQE -> recover. + * If we reached this state, there is probably a bug that needs to be + * fixed. let's keep the queue close and let tx timeout cleanup. + */ + if (jiffies_to_msecs(jiffies - recover->last_recover) < + MLX5E_SQ_RECOVER_MIN_INTERVAL) { + netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", + sq->sqn); + return; + } + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + if (mlx5e_sq_to_ready(sq, state)) + return; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats.recover++; + recover->last_recover = jiffies; + mlx5e_activate_txqsq(sq); +} + static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index ad91d9de0240..b08c94422907 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -61,6 +61,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, @@ -155,6 +156,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; s->tx_cqe_err += sq_stats->cqe_err; + s->tx_recover += sq_stats->recover; s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; s->tx_csum_none += sq_stats->csum_none; @@ -1106,6 +1108,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) }, }; static const struct counter_desc ch_stats_desc[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 43dc808684c9..53111a2df587 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -79,6 +79,7 @@ struct mlx5e_sw_stats { u64 tx_queue_dropped; u64 tx_xmit_more; u64 tx_cqe_err; + u64 tx_recover; u64 rx_wqe_err; u64 rx_mpwqe_filler; u64 rx_buff_alloc_err; @@ -199,6 +200,7 @@ struct mlx5e_sq_stats { u64 wake; u64 dropped; u64 cqe_err; + u64 recover; }; struct mlx5e_ch_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 88b5b7bfc9a9..20297108528a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -469,9 +469,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { - if (!sq->stats.cqe_err) + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); + queue_work(cq->channel->priv->wq, + &sq->recover.recover_work); + } sq->stats.cqe_err++; } @@ -528,7 +532,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) { + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5E_SQ_STOP_ROOM) && + !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); sq->stats.wake++; }