From d8822868535eacd7ff52afb0c542361f54280108 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 20 Dec 2017 17:53:22 +0200 Subject: [PATCH 01/15] net/mlx5e: Use vhca id as the hairpin peer identifier The peer vhca id spans less bits vs the ifindex and can well serve for the hairpin hash key, move to use that. This is a pre-step to put more info into the hairpin hash key in downstream patch while keeping it at 32 bits. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cf528da51243..fac8c91c261b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -111,7 +111,7 @@ struct mlx5e_hairpin_entry { /* flows sharing the same hairpin */ struct list_head flows; - int peer_ifindex; + u16 peer_vhca_id; struct mlx5e_hairpin *hp; }; @@ -334,13 +334,13 @@ static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) } static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, - int peer_ifindex) + u16 peer_vhca_id) { struct mlx5e_hairpin_entry *hpe; hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, - hairpin_hlist, peer_ifindex) { - if (hpe->peer_ifindex == peer_ifindex) + hairpin_hlist, peer_vhca_id) { + if (hpe->peer_vhca_id == peer_vhca_id) return hpe; } @@ -353,16 +353,20 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, { int peer_ifindex = parse_attr->mirred_ifindex; struct mlx5_hairpin_params params; + struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; struct mlx5e_hairpin *hp; + u16 peer_id; int err; - if (!MLX5_CAP_GEN(priv->mdev, hairpin)) { + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { netdev_warn(priv->netdev, "hairpin is not supported\n"); return -EOPNOTSUPP; } - hpe = mlx5e_hairpin_get(priv, peer_ifindex); + peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + hpe = mlx5e_hairpin_get(priv, peer_id); if (hpe) goto attach_flow; @@ -371,7 +375,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, return -ENOMEM; INIT_LIST_HEAD(&hpe->flows); - hpe->peer_ifindex = peer_ifindex; + hpe->peer_vhca_id = peer_id; params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, @@ -391,7 +395,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, hp->pair->sqn, params.log_data_size); hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, peer_ifindex); + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, peer_id); attach_flow: flow->nic_attr->hairpin_tirn = hpe->hp->tirn; From 106be53b6b0a107054e5c2a009aa0d079b9c70c1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 20 Dec 2017 18:18:10 +0200 Subject: [PATCH 02/15] net/mlx5e: Set per priority hairpin pairs As part of the QoS model, on xmit, the HW mandates that all packets going through a given SQ have the same priority. To align hairpin SQs with that, we use the priority given as part of the matching for the hairpin hash key. This ensures that flows/packets mapped to different HW priorities will go through different hairpin instances. If no priority is given for matching, we treat that as an 8th priority, this is in order not to harm cases where priority is specified. Only the PCP priority trust model is supported. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 64 +++++++++++++++++-- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fac8c91c261b..cd54cd2b2328 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -112,6 +112,7 @@ struct mlx5e_hairpin_entry { struct list_head flows; u16 peer_vhca_id; + u8 prio; struct mlx5e_hairpin *hp; }; @@ -333,20 +334,63 @@ static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) kvfree(hp); } +static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) +{ + return (peer_vhca_id << 16 | prio); +} + static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, - u16 peer_vhca_id) + u16 peer_vhca_id, u8 prio) { struct mlx5e_hairpin_entry *hpe; + u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, - hairpin_hlist, peer_vhca_id) { - if (hpe->peer_vhca_id == peer_vhca_id) + hairpin_hlist, hash_key) { + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) return hpe; } return NULL; } +#define UNKNOWN_MATCH_PRIO 8 + +static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, u8 *match_prio) +{ + void *headers_c, *headers_v; + u8 prio_val, prio_mask = 0; + bool vlan_present; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { + netdev_warn(priv->netdev, + "only PCP trust state supported for hairpin\n"); + return -EOPNOTSUPP; + } +#endif + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + + vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); + if (vlan_present) { + prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + } + + if (!vlan_present || !prio_mask) { + prio_val = UNKNOWN_MATCH_PRIO; + } else if (prio_mask != 0x7) { + netdev_warn(priv->netdev, + "masked priority match not supported for hairpin\n"); + return -EOPNOTSUPP; + } + + *match_prio = prio_val; + return 0; +} + static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr) @@ -356,6 +400,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; struct mlx5e_hairpin *hp; + u8 match_prio; u16 peer_id; int err; @@ -366,7 +411,10 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, } peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); - hpe = mlx5e_hairpin_get(priv, peer_id); + err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio); + if (err) + return err; + hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); if (hpe) goto attach_flow; @@ -376,6 +424,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, INIT_LIST_HEAD(&hpe->flows); hpe->peer_vhca_id = peer_id; + hpe->prio = match_prio; params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, @@ -390,12 +439,13 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, goto create_hairpin_err; } - netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x log data size %d\n", + netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d log data size %d\n", hp->tirn, hp->pair->rqn, hp->pair->peer_mdev->priv.name, - hp->pair->sqn, params.log_data_size); + hp->pair->sqn, match_prio, params.log_data_size); hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, peer_id); + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); attach_flow: flow->nic_attr->hairpin_tirn = hpe->hp->tirn; From 1ae1df3a119395048a7b2614315d7e88d88a78f3 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 6 Dec 2017 21:05:01 +0200 Subject: [PATCH 03/15] net/mlx5e: Refactor RSS related objects and code In order to use RSS for hairpin, we refactor the code that deals with setup of the TTC steering tables. This is done using an interim ttc params object that has the flow table attributes, TIR numbers, etc. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 24 +++- .../net/ethernet/mellanox/mlx5/core/en_fs.c | 110 +++++++++++------- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 22 +++- 3 files changed, 103 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7b988595ac5f..10620c3e0c61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -1024,11 +1025,26 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); -int mlx5e_create_ttc_table(struct mlx5e_priv *priv); -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); +struct ttc_params { + struct mlx5_flow_table_attr ft_attr; + u32 any_tt_tirn; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_ttc_table *inner_ttc; +}; -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv); -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv); +void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); +void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); +void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params); + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc); + +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc); int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, u32 underlay_qpn, u32 *tisn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index def513484845..f64dda2bed31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -806,25 +806,25 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, return err ? ERR_PTR(err) : rule; } -static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) +static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, + struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { struct mlx5_flow_destination dest = {}; - struct mlx5e_ttc_table *ttc; struct mlx5_flow_handle **rules; struct mlx5_flow_table *ft; int tt; int err; - ttc = &priv->fs.ttc; ft = ttc->ft.t; rules = ttc->rules; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (tt = 0; tt < MLX5E_NUM_TT; tt++) { if (tt == MLX5E_TT_ANY) - dest.tir_num = priv->direct_tir[0].tirn; + dest.tir_num = params->any_tt_tirn; else - dest.tir_num = priv->indir_tir[tt].tirn; + dest.tir_num = params->indir_tirn[tt]; rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_rules[tt].etype, ttc_rules[tt].proto); @@ -832,12 +832,12 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) goto del_rules; } - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) return 0; rules = ttc->tunnel_rules; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.inner_ttc.ft.t; + dest.ft = params->inner_ttc->ft.t; for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_tunnel_rules[tt].etype, @@ -977,25 +977,25 @@ mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, return err ? ERR_PTR(err) : rule; } -static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv, + struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rules; - struct mlx5e_ttc_table *ttc; struct mlx5_flow_table *ft; int err; int tt; - ttc = &priv->fs.inner_ttc; ft = ttc->ft.t; rules = ttc->rules; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (tt = 0; tt < MLX5E_NUM_TT; tt++) { if (tt == MLX5E_TT_ANY) - dest.tir_num = priv->direct_tir[0].tirn; + dest.tir_num = params->any_tt_tirn; else - dest.tir_num = priv->inner_indir_tir[tt].tirn; + dest.tir_num = params->indir_tirn[tt]; rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, ttc_rules[tt].etype, @@ -1075,21 +1075,42 @@ err: return err; } -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params) +{ + ttc_params->any_tt_tirn = priv->direct_tir[0].tirn; + ttc_params->inner_ttc = &priv->fs.inner_ttc; +} + +void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + + ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; +} + +void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) + +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + + ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; + ft_attr->level = MLX5E_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; +} + +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { - struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; - struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; int err; if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) return 0; - ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE; - ft_attr.level = MLX5E_INNER_TTC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; - - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1100,7 +1121,7 @@ int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) if (err) goto err; - err = mlx5e_generate_inner_ttc_table_rules(priv); + err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc); if (err) goto err; @@ -1111,10 +1132,9 @@ err: return err; } -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) { - struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) return; @@ -1122,27 +1142,21 @@ void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) mlx5e_destroy_flow_table(&ttc->ft); } -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) { - struct mlx5e_ttc_table *ttc = &priv->fs.ttc; - mlx5e_cleanup_ttc_rules(ttc); mlx5e_destroy_flow_table(&ttc->ft); } -int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - struct mlx5e_ttc_table *ttc = &priv->fs.ttc; - struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; int err; - ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE; - ft_attr.level = MLX5E_TTC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; - - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1153,7 +1167,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv) if (err) goto err; - err = mlx5e_generate_ttc_table_rules(priv); + err = mlx5e_generate_ttc_table_rules(priv, params, ttc); if (err) goto err; @@ -1474,7 +1488,8 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) int mlx5e_create_flow_steering(struct mlx5e_priv *priv) { - int err; + struct ttc_params ttc_params = {}; + int tt, err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -1489,14 +1504,23 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_inner_ttc_table(priv); + mlx5e_set_ttc_basic_params(priv, &ttc_params); + mlx5e_set_inner_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); if (err) { netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } - err = mlx5e_create_ttc_table(priv); + mlx5e_set_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + + err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -1524,9 +1548,9 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1537,8 +1561,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); - mlx5e_destroy_ttc_table(priv); - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 2743525a40a0..264504a990ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -241,7 +241,8 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { - int err; + struct ttc_params ttc_params = {}; + int tt, err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -256,14 +257,23 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_inner_ttc_table(priv); + mlx5e_set_ttc_basic_params(priv, &ttc_params); + mlx5e_set_inner_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); if (err) { netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } - err = mlx5e_create_ttc_table(priv); + mlx5e_set_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -273,7 +283,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) return 0; err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -282,8 +292,8 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_destroy_ttc_table(priv); - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); mlx5e_arfs_destroy_tables(priv); } From 479f074c5bbe0ba126b252b4449b6ea8b32bd59f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 1 Nov 2017 15:22:08 +0200 Subject: [PATCH 04/15] net/mlx5e: Enlarge the NIC TC offload steering prio to support two levels This will allow to be able and set TC rule whose steering dest is RSS TTC steering table. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index cc4f6ab9374a..c025c98700e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -89,6 +89,9 @@ /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) +#define KERNEL_NIC_TC_NUM_PRIOS 1 +#define KERNEL_NIC_TC_NUM_LEVELS 2 + #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) @@ -134,7 +137,7 @@ static struct init_tree_node { ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, ETHTOOL_PRIO_NUM_LEVELS))), ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(1, 1), + ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, From ddae74ac103bd35616c2bde5dc4dd66e2519db7a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 23 Nov 2017 18:19:23 +0200 Subject: [PATCH 05/15] net/mlx5: Vectorize the low level core hairpin object Enhance the hairpin setup code at the core to support a set of N (RQ,SQ) pairs. This will be later used by the caller to set RSS spreading among the different RQs. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +- .../ethernet/mellanox/mlx5/core/transobj.c | 94 ++++++++++++------- include/linux/mlx5/transobj.h | 7 +- 3 files changed, 69 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cd54cd2b2328..46752a6dff92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -269,7 +269,7 @@ static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); - MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn); + MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); MLX5_SET(tirc, tirc, transport_domain, hp->tdn); err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn); @@ -440,8 +440,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d log data size %d\n", - hp->tirn, hp->pair->rqn, hp->pair->peer_mdev->priv.name, - hp->pair->sqn, match_prio, params.log_data_size); + hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name, + hp->pair->sqn[0], match_prio, params.log_data_size); hpe->hp = hp; hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a09ebbaf3b68..6f90d1fce5c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -437,28 +437,40 @@ static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp, struct mlx5_hairpin_params *params) { - int err; + int i, j, err; - err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn); - if (err) - goto out_err_rq; + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]); + if (err) + goto out_err_rq; + } - err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn); - if (err) - goto out_err_sq; + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]); + if (err) + goto out_err_sq; + } return 0; out_err_sq: - mlx5_core_destroy_rq(hp->func_mdev, hp->rqn); + for (j = 0; j < i; j++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]); + i = hp->num_channels; out_err_rq: + for (j = 0; j < i; j++) + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]); return err; } static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp) { - mlx5_core_destroy_rq(hp->func_mdev, hp->rqn); - mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn); + int i; + + for (i = 0; i < hp->num_channels; i++) { + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]); + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + } } static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn, @@ -505,41 +517,53 @@ static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn, static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp) { - int err; + int i, j, err; - /* set peer SQ */ - err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn, - MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, - MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn); - if (err) - goto err_modify_sq; + /* set peer SQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], + MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]); + if (err) + goto err_modify_sq; + } - /* set func RQ */ - err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn, - MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, - MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn); - - if (err) - goto err_modify_rq; + /* set func RQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], + MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, + MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]); + if (err) + goto err_modify_rq; + } return 0; err_modify_rq: - mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RST, 0, 0); + for (j = 0; j < i; j++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + i = hp->num_channels; err_modify_sq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); return err; } static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) { - /* unset func RQ */ - mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn, MLX5_RQC_STATE_RDY, - MLX5_RQC_STATE_RST, 0, 0); + int i; - /* unset peer SQ */ - mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RST, 0, 0); + /* unset func RQs */ + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + + /* unset peer SQs */ + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); } struct mlx5_hairpin * @@ -550,13 +574,17 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev, struct mlx5_hairpin *hp; int size, err; - size = sizeof(*hp); + size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32); hp = kzalloc(size, GFP_KERNEL); if (!hp) return ERR_PTR(-ENOMEM); hp->func_mdev = func_mdev; hp->peer_mdev = peer_mdev; + hp->num_channels = params->num_channels; + + hp->rqn = (void *)hp + sizeof(*hp); + hp->sqn = hp->rqn + params->num_channels; /* alloc and pair func --> peer hairpin */ err = mlx5_hairpin_create_queues(hp, params); diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index a228310c1968..1bcd8d5562f0 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -78,14 +78,17 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); struct mlx5_hairpin_params { u8 log_data_size; u16 q_counter; + int num_channels; }; struct mlx5_hairpin { struct mlx5_core_dev *func_mdev; struct mlx5_core_dev *peer_mdev; - u32 rqn; - u32 sqn; + int num_channels; + + u32 *rqn; + u32 *sqn; }; struct mlx5_hairpin * From 3f6d08d196b2d05e36c86d7ccbbaa3431a431bed Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 26 Nov 2017 20:39:12 +0200 Subject: [PATCH 06/15] net/mlx5e: Add RSS support for hairpin Support RSS for hairpin traffic. We create multiple hairpin RQ/SQ pairs and RSS TTC table per hairpin instance and steer the related flows through that table so they are spread between the pairs. We open one pair per 50Gbs link speed, for all speeds <= 50Gbs, there is one pair and no RSS while for 100Gbs ports two RSSed pairs. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 209 +++++++++++++++++- 3 files changed, 205 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 10620c3e0c61..a3536e051052 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -697,6 +697,11 @@ enum { MLX5E_ARFS_FT_LEVEL }; +enum { + MLX5E_TC_FT_LEVEL = 0, + MLX5E_TC_TTC_FT_LEVEL, +}; + struct mlx5e_ethtool_table { struct mlx5_flow_table *ft; int num_rules; @@ -1057,6 +1062,8 @@ int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); +int mlx5e_bits_invert(unsigned long a, int size); + /* ethtool helpers */ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 466a4e1244d7..8e380a89ec63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2219,7 +2219,7 @@ static int mlx5e_rx_hash_fn(int hfunc) MLX5_RX_HASH_FN_INVERTED_XOR8; } -static int mlx5e_bits_invert(unsigned long a, int size) +int mlx5e_bits_invert(unsigned long a, int size) { int inv = 0; int i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 46752a6dff92..7a4f577d1c5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -51,12 +51,14 @@ #include "en_tc.h" #include "eswitch.h" #include "vxlan.h" +#include "fs_core.h" struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; u32 mod_hdr_id; u32 hairpin_tirn; + struct mlx5_flow_table *hairpin_ft; }; enum { @@ -64,6 +66,7 @@ enum { MLX5E_TC_FLOW_NIC = BIT(1), MLX5E_TC_FLOW_OFFLOADED = BIT(2), MLX5E_TC_FLOW_HAIRPIN = BIT(3), + MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(4), }; struct mlx5e_tc_flow { @@ -100,8 +103,14 @@ struct mlx5e_hairpin { struct mlx5_hairpin *pair; struct mlx5_core_dev *func_mdev; + struct mlx5e_priv *func_priv; u32 tdn; u32 tirn; + + int num_channels; + struct mlx5e_rqt indir_rqt; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_ttc_table ttc; }; struct mlx5e_hairpin_entry { @@ -290,6 +299,151 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); } +static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) +{ + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn; + struct mlx5e_priv *priv = hp->func_priv; + int i, ix, sz = MLX5E_INDIR_RQT_SIZE; + + mlx5e_build_default_indir_rqt(indirection_rqt, sz, + hp->num_channels); + + for (i = 0; i < sz; i++) { + ix = i; + if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, ilog2(sz)); + ix = indirection_rqt[ix]; + rqn = hp->pair->rqn[ix]; + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); + } +} + +static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) +{ + int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; + struct mlx5e_priv *priv = hp->func_priv; + struct mlx5_core_dev *mdev = priv->mdev; + void *rqtc; + u32 *in; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); + + err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); + if (!err) + hp->indir_rqt.enabled = true; + + kvfree(in); + return err; +} + +static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + u32 in[MLX5_ST_SZ_DW(create_tir_in)]; + int tt, i, err; + void *tirc; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + MLX5_SET(tirc, tirc, transport_domain, hp->tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + + err = mlx5_core_create_tir(hp->func_mdev, in, + MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); + if (err) { + mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_tirs; + } + } + return 0; + +err_destroy_tirs: + for (i = 0; i < tt; i++) + mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); + return err; +} + +static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) +{ + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); +} + +static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, + struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + + ttc_params->any_tt_tirn = hp->tirn; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; + + ft_attr->max_fte = MLX5E_NUM_TT; + ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_TC_PRIO; +} + +static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct ttc_params ttc_params; + int err; + + err = mlx5e_hairpin_create_indirect_rqt(hp); + if (err) + return err; + + err = mlx5e_hairpin_create_indirect_tirs(hp); + if (err) + goto err_create_indirect_tirs; + + mlx5e_hairpin_set_ttc_params(hp, &ttc_params); + err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); + if (err) + goto err_create_ttc_table; + + netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", + hp->num_channels, hp->ttc.ft.t->id); + + return 0; + +err_create_ttc_table: + mlx5e_hairpin_destroy_indirect_tirs(hp); +err_create_indirect_tirs: + mlx5e_destroy_rqt(priv, &hp->indir_rqt); + + return err; +} + +static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + + mlx5e_destroy_ttc_table(priv, &hp->ttc); + mlx5e_hairpin_destroy_indirect_tirs(hp); + mlx5e_destroy_rqt(priv, &hp->indir_rqt); +} + static struct mlx5e_hairpin * mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, int peer_ifindex) @@ -313,13 +467,23 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params } hp->pair = pair; hp->func_mdev = func_mdev; + hp->func_priv = priv; + hp->num_channels = params->num_channels; err = mlx5e_hairpin_create_transport(hp); if (err) goto create_transport_err; + if (hp->num_channels > 1) { + err = mlx5e_hairpin_rss_init(hp); + if (err) + goto rss_init_err; + } + return hp; +rss_init_err: + mlx5e_hairpin_destroy_transport(hp); create_transport_err: mlx5_core_hairpin_destroy(hp->pair); create_pair_err: @@ -329,6 +493,8 @@ create_pair_err: static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) { + if (hp->num_channels > 1) + mlx5e_hairpin_rss_cleanup(hp); mlx5e_hairpin_destroy_transport(hp); mlx5_core_hairpin_destroy(hp->pair); kvfree(hp); @@ -400,6 +566,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; struct mlx5e_hairpin *hp; + u64 link_speed64; + u32 link_speed; u8 match_prio; u16 peer_id; int err; @@ -433,6 +601,13 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); params.q_counter = priv->q_counter; + /* set hairpin pair per each 50Gbs share of the link */ + mlx5e_get_max_linkspeed(priv->mdev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + params.num_channels = link_speed64; + hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); if (IS_ERR(hp)) { err = PTR_ERR(hp); @@ -448,8 +623,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, hash_hairpin_info(peer_id, match_prio)); attach_flow: - flow->nic_attr->hairpin_tirn = hpe->hp->tirn; + if (hpe->hp->num_channels > 1) { + flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + } else { + flow->nic_attr->hairpin_tirn = hpe->hp->tirn; + } list_add(&flow->hairpin, &hpe->flows); + return 0; create_hairpin_err: @@ -497,20 +678,24 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, bool table_created = false; int err, dest_ix = 0; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { - err = mlx5e_hairpin_flow_add(priv, flow, parse_attr); - if (err) { - rule = ERR_PTR(err); - goto err_add_hairpin_flow; - } + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr); + if (err) { + rule = ERR_PTR(err); + goto err_add_hairpin_flow; + } + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = attr->hairpin_ft; + } else { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest[dest_ix].tir_num = attr->hairpin_tirn; - } else { - dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[dest_ix].ft = priv->fs.vlan.ft.t; } dest_ix++; + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = priv->fs.vlan.ft.t; + dest_ix++; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { @@ -551,7 +736,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, MLX5E_TC_PRIO, tc_tbl_size, MLX5E_TC_TABLE_NUM_GROUPS, - 0, 0); + MLX5E_TC_FT_LEVEL, 0); if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); From 4d533e0f86952eb97f66f2c9548313f6e51066c0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 4 Jan 2018 12:26:21 +0200 Subject: [PATCH 07/15] net/mlx5: Enable setting hairpin queue size Allow to specify the size of the hairpin queues along with the packet buffer data size from the core setup code. If the driver doesn't provide this, the FW applies proper value that matches the provided data size and a FW chosen RQ stride size. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 8 ++++++-- include/linux/mlx5/transobj.h | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 6f90d1fce5c5..9e38343a951f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -413,6 +413,7 @@ static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev, MLX5_SET(rqc, rqc, counter_set_id, params->q_counter); MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn); } @@ -430,6 +431,7 @@ static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index acd829d8613b..199bfcd2f2ce 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1031,7 +1031,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_hairpin_queues[0x5]; u8 reserved_at_3c8[0x3]; u8 log_max_hairpin_wq_data_sz[0x5]; - u8 reserved_at_3d0[0xb]; + u8 reserved_at_3d0[0x3]; + u8 log_max_hairpin_num_packets[0x5]; + u8 reserved_at_3d8[0x3]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; @@ -1172,7 +1174,9 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0xb]; + u8 reserved_at_120[0x3]; + u8 log_hairpin_num_packets[0x5]; + u8 reserved_at_128[0x3]; u8 log_hairpin_data_sz[0x5]; u8 reserved_at_130[0x5]; diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 1bcd8d5562f0..7e8f281f8c00 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -77,6 +77,7 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); struct mlx5_hairpin_params { u8 log_data_size; + u8 log_num_packets; u16 q_counter; int num_channels; }; From eb9180f792afe6e1f5a723381632791ad134ec39 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 4 Jan 2018 12:47:19 +0200 Subject: [PATCH 08/15] net/mlx5e: Set hairpin queue size For a given hairpin packet buffer size, different queue sizes (values of log_hairpin_num_packets) determine how the data is broken to strides on the RQ. Currently the chosen value is set to 64B strides. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7a4f577d1c5b..fd98b0dc610f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -599,8 +599,13 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); params.log_data_size = max_t(u8, params.log_data_size, MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); - params.q_counter = priv->q_counter; + params.log_num_packets = params.log_data_size - + MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); + params.log_num_packets = min_t(u8, params.log_num_packets, + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); + + params.q_counter = priv->q_counter; /* set hairpin pair per each 50Gbs share of the link */ mlx5e_get_max_linkspeed(priv->mdev, &link_speed); link_speed = max_t(u32, link_speed, 50000); @@ -614,9 +619,9 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, goto create_hairpin_err; } - netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d log data size %d\n", + netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name, - hp->pair->sqn[0], match_prio, params.log_data_size); + hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); hpe->hp = hp; hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, From 84990945261926a557fc85a835c665996e282a29 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 20 Dec 2017 11:31:28 +0200 Subject: [PATCH 09/15] net/mlx5e: Print delta since last transmit per SQ upon TX timeout When driver callback for TX timeout is being called, it handles all stopped xmit queues (not only the ones which their timeout expired). Add usecs since last transmit to TX timeout logs per send queue in order to monitor if the queue timeout expired. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8e380a89ec63..12196d4d958e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3766,14 +3766,16 @@ static void mlx5e_tx_timeout(struct net_device *dev) netdev_err(dev, "TX timeout detected\n"); for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; - if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) + if (!netif_xmit_stopped(dev_queue)) continue; sched_work = true; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", - i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); + netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - dev_queue->trans_start)); } if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state)) From 3a32b26a4eccb37435d81bf75e41b2bd4464f8d6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 13 Dec 2017 13:04:01 +0200 Subject: [PATCH 10/15] net/mlx5e: Add Event Queue meta data info for TX timeout logs When TX timeout occurs, EQ consumer index and irqn can help in debug for understanding the SW state of EQ. Add them to the logger prints for the relevant EQ only. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 12196d4d958e..71b8b171c565 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3768,6 +3768,9 @@ static void mlx5e_tx_timeout(struct net_device *dev) for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; + struct mlx5_core_dev *mdev = priv->mdev; + int irqn_not_used, eqn; + struct mlx5_eq *eq; if (!netif_xmit_stopped(dev_queue)) continue; @@ -3776,6 +3779,15 @@ static void mlx5e_tx_timeout(struct net_device *dev) netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - dev_queue->trans_start)); + + if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn, + &irqn_not_used)) + continue; + + eq = mlx5_eqn2eq(mdev, eqn); + if (!IS_ERR(eq)) + netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eqn, eq->cons_index, eq->irqn); } if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state)) From 7ca560b5af70b5f578c9bf32c8fbfbd68d22252f Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 19 Dec 2017 14:52:29 +0200 Subject: [PATCH 11/15] net/mlx5e: Poll event queue upon TX timeout before performing full channels recovery Up until this patch, on every TX timeout we would try to do channels recovery. However, in case of a lost interrupt for an EQ, the channel associated to it cannot be recovered if reopened as it would never get another interrupt on sent/received traffic, and eventually ends up with another TX timeout (Restarting the EQ is not part of channel recovery). This patch adds a mechanism for explicitly polling EQ in case of a TX timeout in order to recover from a lost interrupt. If this is not the case (no pending EQEs), perform a channels full recovery as usual. Once a lost EQE is recovered, it triggers the NAPI to run and handle all pending completions. This will free some budget in the bql (via calling netdev_tx_completed_queue) or by clearing pending TXWQEs and waking up the queue. One of the above actions will move the queue to be ready for transmit again. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 51 +++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 18 +++++++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 71b8b171c565..12ea5e319e38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3757,10 +3757,37 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } +static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, + struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int irqn_not_used, eqn; + struct mlx5_eq *eq; + u32 eqe_count; + + if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn, &irqn_not_used)) + return false; + + eq = mlx5_eqn2eq(mdev, eqn); + if (IS_ERR(eq)) + return false; + + netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eqn, eq->cons_index, eq->irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return false; + + netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); + return true; +} + static void mlx5e_tx_timeout(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - bool sched_work = false; + bool reopen_channels = false; int i; netdev_err(dev, "TX timeout detected\n"); @@ -3768,29 +3795,23 @@ static void mlx5e_tx_timeout(struct net_device *dev) for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; - struct mlx5_core_dev *mdev = priv->mdev; - int irqn_not_used, eqn; - struct mlx5_eq *eq; if (!netif_xmit_stopped(dev_queue)) continue; - sched_work = true; - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - dev_queue->trans_start)); - if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn, - &irqn_not_used)) - continue; - - eq = mlx5_eqn2eq(mdev, eqn); - if (!IS_ERR(eq)) - netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eqn, eq->cons_index, eq->irqn); + /* If we recover a lost interrupt, most likely TX timeout will + * be resolved, skip reopening channels + */ + if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + reopen_channels = true; + } } - if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state)) schedule_work(&priv->tx_timeout_work); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index e7e7cef2bde4..4d98ce0901af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -530,6 +530,24 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) return IRQ_HANDLED; } +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. + */ +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) +{ + u32 count_eqe; + + disable_irq(eq->irqn); + count_eqe = eq->cons_index; + mlx5_eq_int(eq->irqn, eq); + count_eqe = eq->cons_index - count_eqe; + enable_irq(eq->irqn); + + return count_eqe; +} + static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index ff4a0b889a6f..b5a46c128b28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -116,6 +116,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); void mlx5_cq_tasklet_cb(unsigned long data); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, From 57d689a8ca7b360f902328da95e8b441c24823ca Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 19 Dec 2017 14:53:34 +0200 Subject: [PATCH 12/15] net/mlx5e: Add per-channel counters infrastructure, use it upon TX timeout Add per-channel counter ch#_eq_rearm to monitor how many lost interrupt recovery actions happened upon TX timeouts. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++ .../net/ethernet/mellanox/mlx5/core/en_stats.c | 18 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_stats.h | 6 ++++++ 4 files changed, 29 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a3536e051052..a70ae3abcbbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -561,6 +561,7 @@ struct mlx5e_channel { /* data path - accessed per napi poll */ struct irq_desc *irq_desc; + struct mlx5e_ch_stats stats; /* control */ struct mlx5e_priv *priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 12ea5e319e38..f61f4f475497 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -178,6 +178,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) struct mlx5e_sw_stats temp, *s = &temp; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; + struct mlx5e_ch_stats *ch_stats; int i, j; memset(s, 0, sizeof(*s)); @@ -185,6 +186,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) struct mlx5e_channel *c = priv->channels.c[i]; rq_stats = &c->rq.stats; + ch_stats = &c->stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; @@ -209,6 +211,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; s->rx_cache_waive += rq_stats->cache_waive; + s->ch_eq_rearm += ch_stats->eq_rearm; for (j = 0; j < priv->channels.params.num_tc; j++) { sq_stats = &c->sq[j].stats; @@ -3781,6 +3784,7 @@ static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, return false; netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); + sq->channel->stats.eq_rearm++; return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index b74ddc7984bc..a4d787208d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -71,6 +71,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -767,12 +768,18 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; +static const struct counter_desc ch_stats_desc[] = { + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, +}; + #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) +#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) { return (NUM_RQ_STATS * priv->channels.num) + + (NUM_CH_STATS * priv->channels.num) + (NUM_SQ_STATS * priv->channels.num * priv->channels.params.num_tc); } @@ -784,6 +791,11 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return idx; + for (i = 0; i < priv->channels.num; i++) + for (j = 0; j < NUM_CH_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ch_stats_desc[j].format, i); + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); @@ -807,6 +819,12 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return idx; + for (i = 0; i < channels->num; i++) + for (j = 0; j < NUM_CH_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&channels->c[i]->stats, + ch_stats_desc, j); + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index d679e21f686e..4316765d9d66 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -44,6 +44,7 @@ #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) struct counter_desc { char format[ETH_GSTRING_LEN]; @@ -88,6 +89,7 @@ struct mlx5e_sw_stats { u64 rx_cache_empty; u64 rx_cache_busy; u64 rx_cache_waive; + u64 ch_eq_rearm; /* Special handling counters */ u64 link_down_events_phy; @@ -192,6 +194,10 @@ struct mlx5e_sq_stats { u64 dropped; }; +struct mlx5e_ch_stats { + u64 eq_rearm; +}; + struct mlx5e_stats { struct mlx5e_sw_stats sw; struct mlx5e_qcounter_stats qcnt; From a89842811ea98a0452893d83aac07d0b46854636 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 28 Nov 2017 23:18:13 +0200 Subject: [PATCH 13/15] net/mlx5e: Merge per priority stats groups Merge the per priority traffic and pfc groups into one group, because both groups share the same update_stats() callback which will be introduced in the upcoming patch. Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_stats.c | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index a4d787208d60..545150d4de9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -657,6 +657,28 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, return idx; } +static int mlx5e_grp_per_prio_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_grp_per_prio_traffic_get_num_stats(priv) + + mlx5e_grp_per_prio_pfc_get_num_stats(priv); +} + +static int mlx5e_grp_per_prio_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx); + return idx; +} + +static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx); + return idx; +} + static const struct counter_desc mlx5e_pme_status_desc[] = { { "module_unplug", 8 }, }; @@ -888,14 +910,9 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_stats = mlx5e_grp_pcie_fill_stats, }, { - .get_num_stats = mlx5e_grp_per_prio_traffic_get_num_stats, - .fill_strings = mlx5e_grp_per_prio_traffic_fill_strings, - .fill_stats = mlx5e_grp_per_prio_traffic_fill_stats, - }, - { - .get_num_stats = mlx5e_grp_per_prio_pfc_get_num_stats, - .fill_strings = mlx5e_grp_per_prio_pfc_fill_strings, - .fill_stats = mlx5e_grp_per_prio_pfc_fill_stats, + .get_num_stats = mlx5e_grp_per_prio_get_num_stats, + .fill_strings = mlx5e_grp_per_prio_fill_strings, + .fill_stats = mlx5e_grp_per_prio_fill_stats, }, { .get_num_stats = mlx5e_grp_pme_get_num_stats, From 193861773534a5711aa439d97eba3515310bb586 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 28 Nov 2017 23:52:13 +0200 Subject: [PATCH 14/15] net/mlx5e: Extend the stats group API to have update_stats() Extend the stats group API to have an update_stats() callback which will be used to fetch the hardware or software counters data. Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 184 +------------- .../ethernet/mellanox/mlx5/core/en_stats.c | 231 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 6 + 5 files changed, 250 insertions(+), 175 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a70ae3abcbbe..4c9360b25532 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -841,7 +841,7 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); -void mlx5e_update_stats(struct mlx5e_priv *priv, bool full); +void mlx5e_update_stats(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2d1395015ab5..cc8048f68f11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -207,7 +207,7 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, return; mutex_lock(&priv->state_lock); - mlx5e_update_stats(priv, true); + mlx5e_update_stats(priv); mutex_unlock(&priv->state_lock); for (i = 0; i < mlx5e_num_stats_grps; i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f61f4f475497..8530c770c873 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -173,185 +173,23 @@ unlock: rtnl_unlock(); } -static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) +void mlx5e_update_stats(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats temp, *s = &temp; - struct mlx5e_rq_stats *rq_stats; - struct mlx5e_sq_stats *sq_stats; - struct mlx5e_ch_stats *ch_stats; - int i, j; + int i; - memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_channel *c = priv->channels.c[i]; - - rq_stats = &c->rq.stats; - ch_stats = &c->stats; - - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; - s->rx_lro_packets += rq_stats->lro_packets; - s->rx_lro_bytes += rq_stats->lro_bytes; - s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; - s->rx_csum_none += rq_stats->csum_none; - s->rx_csum_complete += rq_stats->csum_complete; - s->rx_csum_unnecessary += rq_stats->csum_unnecessary; - s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; - s->rx_xdp_drop += rq_stats->xdp_drop; - s->rx_xdp_tx += rq_stats->xdp_tx; - s->rx_xdp_tx_full += rq_stats->xdp_tx_full; - s->rx_wqe_err += rq_stats->wqe_err; - s->rx_mpwqe_filler += rq_stats->mpwqe_filler; - s->rx_buff_alloc_err += rq_stats->buff_alloc_err; - s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; - s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; - s->rx_page_reuse += rq_stats->page_reuse; - s->rx_cache_reuse += rq_stats->cache_reuse; - s->rx_cache_full += rq_stats->cache_full; - s->rx_cache_empty += rq_stats->cache_empty; - s->rx_cache_busy += rq_stats->cache_busy; - s->rx_cache_waive += rq_stats->cache_waive; - s->ch_eq_rearm += ch_stats->eq_rearm; - - for (j = 0; j < priv->channels.params.num_tc; j++) { - sq_stats = &c->sq[j].stats; - - s->tx_packets += sq_stats->packets; - s->tx_bytes += sq_stats->bytes; - s->tx_tso_packets += sq_stats->tso_packets; - s->tx_tso_bytes += sq_stats->tso_bytes; - s->tx_tso_inner_packets += sq_stats->tso_inner_packets; - s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; - s->tx_added_vlan_packets += sq_stats->added_vlan_packets; - s->tx_queue_stopped += sq_stats->stopped; - s->tx_queue_wake += sq_stats->wake; - s->tx_queue_dropped += sq_stats->dropped; - s->tx_xmit_more += sq_stats->xmit_more; - s->tx_csum_partial_inner += sq_stats->csum_partial_inner; - s->tx_csum_none += sq_stats->csum_none; - s->tx_csum_partial += sq_stats->csum_partial; - } - } - - s->link_down_events_phy = MLX5_GET(ppcnt_reg, - priv->stats.pport.phy_counters, - counter_set.phys_layer_cntrs.link_down_events); - memcpy(&priv->stats.sw, s, sizeof(*s)); -} - -static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) -{ - int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u32 *out = (u32 *)priv->stats.vport.query_vport_out; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; - struct mlx5_core_dev *mdev = priv->mdev; - - MLX5_SET(query_vport_counter_in, in, opcode, - MLX5_CMD_OP_QUERY_VPORT_COUNTER); - MLX5_SET(query_vport_counter_in, in, op_mod, 0); - MLX5_SET(query_vport_counter_in, in, other_vport, 0); - - mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); -} - -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv, bool full) -{ - struct mlx5e_pport_stats *pstats = &priv->stats.pport; - struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int prio; - void *out; - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - out = pstats->IEEE_802_3_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - if (!full) - return; - - out = pstats->RFC_2863_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - out = pstats->RFC_2819_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - out = pstats->phy_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) { - out = pstats->phy_statistical_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - } - - if (MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) { - out = pstats->eth_ext_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - } - - MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { - out = pstats->per_prio_counters[prio]; - MLX5_SET(ppcnt_reg, in, prio_tc, prio); - mlx5_core_access_reg(mdev, in, sz, out, sz, - MLX5_REG_PPCNT, 0, 0); - } -} - -static void mlx5e_update_q_counter(struct mlx5e_priv *priv) -{ - struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; - u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; - int err; - - if (!priv->q_counter) - return; - - err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out)); - if (err) - return; - - qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); -} - -static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; - struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; - int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); - void *out; - - if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) - return; - - out = pcie_stats->pcie_perf_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); -} - -void mlx5e_update_stats(struct mlx5e_priv *priv, bool full) -{ - if (full) { - mlx5e_update_pcie_counters(priv); - mlx5e_ipsec_update_stats(priv); - } - mlx5e_update_pport_counters(priv, full); - mlx5e_update_vport_counters(priv); - mlx5e_update_q_counter(priv); - mlx5e_update_sw_counters(priv); + for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) + if (mlx5e_stats_grps[i].update_stats) + mlx5e_stats_grps[i].update_stats(priv); } static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) { - mlx5e_update_stats(priv, false); + int i; + + for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) + if (mlx5e_stats_grps[i].update_stats_mask & + MLX5E_NDO_UPDATE_STATS) + mlx5e_stats_grps[i].update_stats(priv); } void mlx5e_update_stats_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 545150d4de9f..5f0f3493d747 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -100,6 +100,72 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats temp, *s = &temp; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + struct mlx5e_ch_stats *ch_stats; + int i, j; + + memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; + ch_stats = &c->stats; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; + s->rx_csum_none += rq_stats->csum_none; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; + s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler += rq_stats->mpwqe_filler; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_page_reuse += rq_stats->page_reuse; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; + s->ch_eq_rearm += ch_stats->eq_rearm; + + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + s->tx_xmit_more += sq_stats->xmit_more; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; + } + } + + s->link_down_events_phy = MLX5_GET(ppcnt_reg, + priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); + memcpy(&priv->stats.sw, s, sizeof(*s)); +} + static const struct counter_desc q_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, }; @@ -129,6 +195,22 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; + int err; + + if (!priv->q_counter) + return; + + err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out)); + if (err) + return; + + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); +} + #define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) static const struct counter_desc vport_stats_desc[] = { { "rx_vport_unicast_packets", @@ -201,6 +283,19 @@ static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_vport_update_stats(struct mlx5e_priv *priv) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; + struct mlx5_core_dev *mdev = priv->mdev; + + MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, other_vport, 0); + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + #define PPORT_802_3_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) @@ -253,6 +348,20 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_2863_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) @@ -290,6 +399,20 @@ static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_2863_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_2819_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_2819_cntrs_grp_data_layout.c##_high) @@ -337,6 +460,20 @@ static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_PHY_STATISTICAL_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.phys_layer_statistical_cntrs.c##_high) @@ -377,6 +514,27 @@ static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +static void mlx5e_grp_phy_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return; + + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_ETH_EXT_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -419,6 +577,23 @@ static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_eth_ext_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PCIE_PERF_OFF(c) \ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) static const struct counter_desc pcie_perf_stats_desc[] = { @@ -506,6 +681,22 @@ static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); + void *out; + + if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) + return; + + out = pcie_stats->pcie_perf_counters; + MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); +} + #define PPORT_PER_PRIO_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -679,6 +870,25 @@ static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } +} + static const struct counter_desc mlx5e_pme_status_desc[] = { { "module_unplug", 8 }, }; @@ -746,6 +956,11 @@ static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx + mlx5e_ipsec_get_stats(priv, data + idx); } +static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_ipsec_update_stats(priv); +} + static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, @@ -863,56 +1078,71 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +/* The stats groups order is opposite to the update_stats() order calls */ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, .fill_strings = mlx5e_grp_sw_fill_strings, .fill_stats = mlx5e_grp_sw_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_sw_update_stats, }, { .get_num_stats = mlx5e_grp_q_get_num_stats, .fill_strings = mlx5e_grp_q_fill_strings, .fill_stats = mlx5e_grp_q_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_q_update_stats, }, { .get_num_stats = mlx5e_grp_vport_get_num_stats, .fill_strings = mlx5e_grp_vport_fill_strings, .fill_stats = mlx5e_grp_vport_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_vport_update_stats, }, { .get_num_stats = mlx5e_grp_802_3_get_num_stats, .fill_strings = mlx5e_grp_802_3_fill_strings, .fill_stats = mlx5e_grp_802_3_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_802_3_update_stats, }, { .get_num_stats = mlx5e_grp_2863_get_num_stats, .fill_strings = mlx5e_grp_2863_fill_strings, .fill_stats = mlx5e_grp_2863_fill_stats, + .update_stats = mlx5e_grp_2863_update_stats, }, { .get_num_stats = mlx5e_grp_2819_get_num_stats, .fill_strings = mlx5e_grp_2819_fill_strings, .fill_stats = mlx5e_grp_2819_fill_stats, + .update_stats = mlx5e_grp_2819_update_stats, }, { .get_num_stats = mlx5e_grp_phy_get_num_stats, .fill_strings = mlx5e_grp_phy_fill_strings, .fill_stats = mlx5e_grp_phy_fill_stats, + .update_stats = mlx5e_grp_phy_update_stats, }, { .get_num_stats = mlx5e_grp_eth_ext_get_num_stats, .fill_strings = mlx5e_grp_eth_ext_fill_strings, .fill_stats = mlx5e_grp_eth_ext_fill_stats, + .update_stats = mlx5e_grp_eth_ext_update_stats, }, { .get_num_stats = mlx5e_grp_pcie_get_num_stats, .fill_strings = mlx5e_grp_pcie_fill_strings, .fill_stats = mlx5e_grp_pcie_fill_stats, + .update_stats = mlx5e_grp_pcie_update_stats, }, { .get_num_stats = mlx5e_grp_per_prio_get_num_stats, .fill_strings = mlx5e_grp_per_prio_fill_strings, .fill_stats = mlx5e_grp_per_prio_fill_stats, + .update_stats = mlx5e_grp_per_prio_update_stats, }, { .get_num_stats = mlx5e_grp_pme_get_num_stats, @@ -923,6 +1153,7 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .get_num_stats = mlx5e_grp_ipsec_get_num_stats, .fill_strings = mlx5e_grp_ipsec_fill_strings, .fill_stats = mlx5e_grp_ipsec_fill_stats, + .update_stats = mlx5e_grp_ipsec_update_stats, }, { .get_num_stats = mlx5e_grp_channels_get_num_stats, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 4316765d9d66..0b3320a2b072 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -207,11 +207,17 @@ struct mlx5e_stats { struct mlx5e_pcie_stats pcie; }; +enum { + MLX5E_NDO_UPDATE_STATS = BIT(0x1), +}; + struct mlx5e_priv; struct mlx5e_stats_grp { + u16 update_stats_mask; int (*get_num_stats)(struct mlx5e_priv *priv); int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); + void (*update_stats)(struct mlx5e_priv *priv); }; extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; From 63a612f984a1fae040ab6f1c6a0f1fdcdf1954b8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 9 Jan 2018 14:40:21 +0200 Subject: [PATCH 15/15] net/mlx5e: Add likely to the common RX checksum flow Most of the packets return true for is_last_ethertype_ip, surround it with likely compiler hint. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ff234dfefc27..0d4bb0688faa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -631,7 +631,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (is_last_ethertype_ip(skb, &network_depth)) { + if (likely(is_last_ethertype_ip(skb, &network_depth))) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); if (network_depth > ETH_HLEN)