diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 632a04b0ecaf..0099a3e397bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -991,20 +991,6 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); -struct mlx5_eswitch_rep; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); -void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); -void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv); - /* common netdev helpers */ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); @@ -1031,12 +1017,6 @@ int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp); -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); - -bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); - /* mlx5e generic netdev management API */ struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e43411d232ee..a61b71b6fff3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -35,9 +35,10 @@ #include #include #include +#include "eswitch.h" #include "en.h" #include "en_tc.h" -#include "eswitch.h" +#include "en_rep.h" #include "vxlan.h" struct mlx5e_rq_param { @@ -3784,6 +3785,12 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) (pci_bw < 40000) && (pci_bw < link_speed)); } +static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) +{ + return !(link_speed && pci_bw && + (pci_bw <= 16000) && (pci_bw < link_speed)); +} + void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { params->rx_cq_period_mode = cq_period_mode; @@ -3828,6 +3835,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->num_channels = max_channels; params->num_tc = 1; + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@ -3836,13 +3848,9 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* set CQE compression */ params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && - MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5e_get_max_linkspeed(mdev, &link_speed); - mlx5e_get_pci_bw(mdev, &pci_bw); - mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); + MLX5_CAP_GEN(mdev, vport_group_manager)) params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); - } + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); /* RQ */ @@ -3851,7 +3859,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* HW LRO */ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - params->lro_en = true; + params->lro_en = hw_lro_heuristic(link_speed, pci_bw); params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ @@ -4123,48 +4131,10 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) return 0; } -static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - u8 mac[ETH_ALEN]; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - mlx5_query_nic_vport_mac_address(mdev, 0, mac); - - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep rep; - - rep.load = mlx5e_vport_rep_load; - rep.unload = mlx5e_vport_rep_unload; - rep.vport = vport; - ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, vport, &rep); - } -} - -static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); -} - static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep rep; u16 max_mtu; mlx5e_init_l2_addr(priv); @@ -4179,16 +4149,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); - rep.load = mlx5e_nic_rep_load; - rep.unload = mlx5e_nic_rep_unload; - rep.vport = FDB_UPLINK_VPORT; - rep.netdev = netdev; - mlx5_eswitch_register_vport_rep(esw, 0, &rep); - } - - mlx5e_register_vport_rep(mdev); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -4212,7 +4174,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; rtnl_lock(); if (netif_running(priv->netdev)) @@ -4221,9 +4182,10 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) rtnl_unlock(); queue_work(priv->wq, &priv->set_rx_mode_work); - mlx5e_unregister_vport_rep(mdev); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5_eswitch_unregister_vport_rep(esw, 0); + mlx5e_unregister_vport_reps(priv); + mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); } @@ -4394,7 +4356,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); - void *ppriv = NULL; + struct mlx5e_rep_priv *rpriv = NULL; void *priv; int vport; int err; @@ -4404,10 +4366,17 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (err) return NULL; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - ppriv = &esw->offloads.vport_reps[0]; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) { + mlx5_core_warn(mdev, + "Not creating net device, Failed to alloc rep priv data\n"); + return NULL; + } + rpriv->rep = &esw->offloads.vport_reps[0]; + } - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); goto err_unregister_reps; @@ -4439,16 +4408,19 @@ err_unregister_reps: for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); + kfree(rpriv); return NULL; } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; + void *ppriv = priv->ppriv; unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); + kfree(ppriv); } static void *mlx5e_get_netdev(void *vpriv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 16b683e8226d..79462c0368a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -34,10 +34,14 @@ #include #include #include +#include +#include #include "eswitch.h" #include "en.h" +#include "en_rep.h" #include "en_tc.h" +#include "fs_core.h" static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -75,7 +79,8 @@ static void mlx5e_rep_get_strings(struct net_device *dev, static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct rtnl_link_stats64 *vport_stats; struct ifla_vf_stats vf_stats; int err; @@ -165,7 +170,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw->mode == SRIOV_NONE) @@ -184,10 +190,10 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) } int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) - { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_channel *c; int n, tc, num_sqs = 0; int err = -ENOMEM; @@ -212,42 +218,398 @@ out: return err; } -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) -{ - struct net_device *netdev = rep->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - return mlx5e_add_sqs_fwd_rules(priv); - return 0; -} - void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; mlx5_eswitch_sqs2vport_stop(esw, rep); } -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) { - struct net_device *netdev = rep->netdev; +#if IS_ENABLED(CONFIG_IPV6) + unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms, + DELAY_PROBE_TIME); +#else + unsigned long ipv6_interval = ~0UL; +#endif + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, + DELAY_PROBE_TIME); + struct net_device *netdev = rpriv->rep->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_remove_sqs_fwd_rules(priv); + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); +} - /* clean (and re-init) existing uplink offloaded TC rules */ - mlx5e_tc_cleanup(priv); - mlx5e_tc_init(priv); +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); +} + +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) +{ + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe; + + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + refcount_inc(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) + kfree(nhe); +} + +static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + + ASSERT_RTNL(); + + if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_tc_encap_flows_del(priv, e); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + + mlx5e_tc_encap_flows_add(priv, e); + } +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct mlx5e_neigh_hash_entry *nhe = + container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); + struct neighbour *n = nhe->n; + struct mlx5e_encap_entry *e; + unsigned char ha[ETH_ALEN]; + struct mlx5e_priv *priv; + bool neigh_connected; + bool encap_connected; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + priv = netdev_priv(e->out_dev); + + if (encap_connected != neigh_connected || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + } + mlx5e_rep_neigh_entry_release(nhe); + rtnl_unlock(); + neigh_release(n); +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct mlx5e_neigh m_neigh = {}; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* We are in atomic context and can't take RTNL mutex, so use + * spin_lock_bh to lookup the neigh table. bh is used since + * netevent can be called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + if (!nhe) { + spin_unlock_bh(&neigh_update->encap_lock); + return NOTIFY_DONE; + } + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + mlx5e_rep_neigh_entry_hold(nhe); + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } + spin_unlock_bh(&neigh_update->encap_lock); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use spin_lock_bh to walk the neigh list and look for + * the relevant device. bh is used since netevent can be + * called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) { + if (p->dev == nhe->m_neigh.dev) { + found = true; + break; + } + } + spin_unlock_bh(&neigh_update->encap_lock); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + return err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + spin_lock_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + if (err) + goto out_err; + return 0; + +out_err: + rhashtable_destroy(&neigh_update->neigh_ht); + return err; +} + +static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->rep->netdev); + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + spin_lock_bh(&rpriv->neigh_update.encap_lock); + + list_del(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + spin_unlock_bh(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under RTNL lock or under the + * representor's encap_lock in case RTNL mutex can't be held. + */ +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); +} + +static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} + +static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + /* The neigh hash entry must be removed from the hash table regardless + * of the reference count value, so it won't be found by the next + * neigh notification call. The neigh hash entry reference count is + * incremented only during creation and neigh notification calls and + * protects from freeing the nhe struct. + */ + mlx5e_rep_neigh_entry_remove(priv, nhe); + mlx5e_rep_neigh_entry_release(nhe); +} + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + int err; + + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + if (err) + return err; + } + list_add(&e->encap_list, &nhe->encap_list); + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + + list_del(&e->encap_list); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + + if (list_empty(&nhe->encap_list)) + mlx5e_rep_neigh_entry_destroy(priv, nhe); } static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -265,7 +627,8 @@ static int mlx5e_rep_open(struct net_device *dev) static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN); @@ -277,7 +640,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; int ret; ret = snprintf(buf, len, "%d", rep->vport - 1); @@ -320,10 +684,16 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; - if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS) + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return false; + + rep = rpriv->rep; + if (esw->mode == SRIOV_OFFLOADS && + rep && rep->vport == FDB_UPLINK_VPORT) return true; return false; @@ -331,7 +701,8 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; if (rep && rep->vport != FDB_UPLINK_VPORT) return true; @@ -464,7 +835,8 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_flow_handle *flow_rule; int err; @@ -504,7 +876,8 @@ err_destroy_direct_rqts: static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; mlx5e_tc_cleanup(priv); mlx5_del_flow_rules(rep->vport_rx_rule); @@ -543,20 +916,70 @@ static struct mlx5e_profile mlx5e_rep_profile = { .max_tc = 1, }; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +/* e-Switch vport representors */ + +static int +mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + int err; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + return err; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_remove_sqs; + + return 0; + +err_remove_sqs: + mlx5e_remove_sqs_fwd_rules(priv); + return err; +} + +static void +mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); + + mlx5e_rep_neigh_cleanup(rpriv); +} + +static int +mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; struct net_device *netdev; int err; - netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); + kfree(rpriv); return -EINVAL; } rep->netdev = netdev; + rpriv->rep = rep; err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { @@ -565,31 +988,104 @@ int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, goto err_destroy_netdev; } - err = register_netdev(netdev); + err = mlx5e_rep_neigh_init(rpriv); if (err) { - pr_warn("Failed to register representor netdev for vport %d\n", + pr_warn("Failed to initialized neighbours handling for vport %d\n", rep->vport); goto err_detach_netdev; } + err = register_netdev(netdev); + if (err) { + pr_warn("Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_neigh_cleanup; + } + return 0; +err_neigh_cleanup: + mlx5e_rep_neigh_cleanup(rpriv); + err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); - + kfree(rpriv); return err; } -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void +mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct net_device *netdev = rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + void *ppriv = priv->ppriv; - unregister_netdev(netdev); - mlx5e_detach_netdev(netdev_priv(netdev)); - mlx5e_destroy_netdev(netdev_priv(netdev)); + unregister_netdev(rep->netdev); + + mlx5e_rep_neigh_cleanup(rpriv); + mlx5e_detach_netdev(priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); /* mlx5e_rep_priv */ +} + +static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(mdev, 0, mac); + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; + rep.vport = vport; + ether_addr_copy(rep.hw_id, mac); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); + } +} + +static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); +} + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; + + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; + rep.vport = FDB_UPLINK_VPORT; + rep.netdev = priv->netdev; + mlx5_eswitch_register_vport_rep(esw, 0, &rep); /* UPLINK PF vport*/ + + mlx5e_rep_register_vf_vports(priv); /* VFs vports */ +} + +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ + mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h new file mode 100644 index 000000000000..a0a1a7a1d6c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_REP_H__ +#define __MLX5E_REP_H__ + +#include +#include +#include "eswitch.h" +#include "en.h" + +struct mlx5e_neigh_update_table { + struct rhashtable neigh_ht; + /* Save the neigh hash entries in a list in addition to the hash table + * (neigh_ht). In order to iterate easily over the neigh entries. + * Used for stats query. + */ + struct list_head neigh_list; + /* protect lookup/remove operations */ + spinlock_t encap_lock; + struct notifier_block netevent_nb; + struct delayed_work neigh_stats_work; + unsigned long min_interval; /* jiffies */ +}; + +struct mlx5e_rep_priv { + struct mlx5_eswitch_rep *rep; + struct mlx5e_neigh_update_table neigh_update; +}; + +struct mlx5e_neigh { + struct net_device *dev; + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + int family; +}; + +struct mlx5e_neigh_hash_entry { + struct rhash_head rhash_node; + struct mlx5e_neigh m_neigh; + + /* Save the neigh hash entry in a list on the representor in + * addition to the hash table. In order to iterate easily over the + * neighbour entries. Used for stats query. + */ + struct list_head neigh_list; + + /* encap list sharing the same neigh */ + struct list_head encap_list; + + /* valid only when the neigh reference is taken during + * neigh_update_work workqueue callback. + */ + struct neighbour *n; + struct work_struct neigh_update_work; + + /* neigh hash entry can be deleted only when the refcount is zero. + * refcount is needed to avoid neigh hash entry removal by TC, while + * it's used by the neigh notification call. + */ + refcount_t refcnt; + + /* Save the last reported time offloaded trafic pass over one of the + * neigh hash entry flows. Use it to periodically update the neigh + * 'used' value and avoid neigh deleting by the kernel. + */ + unsigned long reported_lastuse; +}; + +enum { + /* set when the encap entry is successfully offloaded into HW */ + MLX5_ENCAP_ENTRY_VALID = BIT(0), +}; + +struct mlx5e_encap_entry { + /* neigh hash entry list of encaps sharing the same neigh */ + struct list_head encap_list; + struct mlx5e_neigh m_neigh; + /* a node of the eswitch encap hash table which keeping all the encap + * entries + */ + struct hlist_node encap_hlist; + struct list_head flows; + u32 encap_id; + struct ip_tunnel_info tun_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int tunnel_type; + u8 flags; + char *encap_header; + int encap_size; +}; + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv); +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); + +int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); +bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ae66fad98244..7b1566f0ae58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -39,6 +39,7 @@ #include "en.h" #include "en_tc.h" #include "eswitch.h" +#include "en_rep.h" #include "ipoib.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) @@ -809,7 +810,8 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct net_device *netdev = rq->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; @@ -904,7 +906,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto mpwrq_cqe_out; } - prefetch(skb->data); + prefetchw(skb->data); cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 21b5bcaf4bc0..11c27e4fadf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -44,7 +44,9 @@ #include #include #include +#include #include "en.h" +#include "en_rep.h" #include "en_tc.h" #include "eswitch.h" #include "vxlan.h" @@ -58,6 +60,7 @@ struct mlx5_nic_flow_attr { enum { MLX5E_TC_FLOW_ESWITCH = BIT(0), MLX5E_TC_FLOW_NIC = BIT(1), + MLX5E_TC_FLOW_OFFLOADED = BIT(2), }; struct mlx5e_tc_flow { @@ -244,18 +247,128 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; - mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); + } mlx5_eswitch_del_vlan_action(esw, flow->esw_attr); - if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { mlx5e_detach_encap(priv, flow); + kvfree(flow->esw_attr->parse_attr); + } if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5_modify_header_dealloc(priv->mdev, attr->mod_hdr_id); } +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + int err; + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + &e->encap_id); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", + err); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, &e->flows, encap) { + flow->esw_attr->encap_id = e->encap_id; + flow->rule = mlx5e_tc_add_fdb_flow(priv, + flow->esw_attr->parse_attr, + flow); + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_encap_dealloc(priv->mdev, e->encap_id); + } +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + u64 bytes, packets, lastuse = 0; + struct mlx5e_tc_flow *flow; + struct mlx5e_encap_entry *e; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + continue; + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + } + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); + if (!n) { + WARN(1, "The neighbour already freed\n"); + return; + } + + neigh_event_send(n, NULL); + neigh_release(n); + } +} + static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -263,14 +376,16 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, list_del(&flow->encap); if (list_empty(next)) { - struct mlx5_encap_entry *e; + struct mlx5e_encap_entry *e; - e = list_entry(next, struct mlx5_encap_entry, flows); - if (e->n) { + e = list_entry(next, struct mlx5e_encap_entry, flows); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) mlx5_encap_dealloc(priv->mdev, e->encap_id); - neigh_release(e->n); - } + hlist_del_rcu(&e->encap_hlist); + kfree(e->encap_header); kfree(e); } } @@ -702,16 +817,18 @@ static int parse_cls_flower(struct mlx5e_priv *priv, { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; u8 min_inline; int err; err = __parse_cls_flower(priv, spec, f, &min_inline); - if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && - rep->vport != FDB_UPLINK_VPORT) { - if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < min_inline) { + if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + rep = rpriv->rep; + if (rep->vport != FDB_UPLINK_VPORT && + (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < min_inline)) { netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", min_inline, esw->offloads.inline_mode); @@ -1208,16 +1325,17 @@ static void gen_vxlan_header_ipv6(struct net_device *out_dev, static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; int ttl, err; + u8 nud_state; if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -1242,25 +1360,36 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, fl4.daddr = tun_key->u.ipv4.dst; fl4.saddr = tun_key->u.ipv4.src; - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - e->n = n; - e->out_dev = *out_dev; - - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv4(*out_dev, encap_header, + gen_vxlan_header_ipv4(out_dev, encap_header, ipv4_encap_size, e->h_dest, ttl, fl4.daddr, fl4.saddr, tun_key->tp_dst, @@ -1268,31 +1397,49 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv4_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) - + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; char *encap_header; int err, ttl = 0; + u8 nud_state; if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -1318,25 +1465,36 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, fl6.daddr = tun_key->u.ipv6.dst; fl6.saddr = tun_key->u.ipv6.src; - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - e->n = n; - e->out_dev = *out_dev; - - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv6(*out_dev, encap_header, + gen_vxlan_header_ipv6(out_dev, encap_header, ipv6_encap_size, e->h_dest, ttl, &fl6.daddr, &fl6.saddr, tun_key->tp_dst, @@ -1344,31 +1502,51 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv6_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, - struct mlx5_esw_flow_attr *attr) + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw); - struct mlx5e_priv *up_priv = netdev_priv(up_dev); unsigned short family = ip_tunnel_info_af(tun_info); + struct mlx5e_priv *up_priv = netdev_priv(up_dev); + struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_entry *e; - struct net_device *out_dev; - int tunnel_type, err = -EOPNOTSUPP; + struct mlx5e_encap_entry *e; + int tunnel_type, err = 0; uintptr_t hash_key; bool found = false; @@ -1403,10 +1581,8 @@ vxlan_encap_offload_err: } } - if (found) { - attr->encap = e; - return 0; - } + if (found) + goto attach_flow; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) @@ -1417,16 +1593,21 @@ vxlan_encap_offload_err: INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); - if (err) + if (err && err != -EAGAIN) goto out_err; - attr->encap = e; hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); +attach_flow: + list_add(&flow->encap, &e->flows); + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + attr->encap_id = e->encap_id; + return err; out_err: @@ -1439,17 +1620,18 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow *flow) { struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; const struct tc_action *a; LIST_HEAD(actions); bool encap = false; - int err; + int err = 0; if (tc_no_actions(exts)) return -EINVAL; memset(attr, 0, sizeof(*attr)); - attr->in_rep = priv->ppriv; + attr->in_rep = rpriv->rep; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { @@ -1471,7 +1653,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev; + struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -1481,18 +1663,20 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; out_priv = netdev_priv(out_dev); - attr->out_rep = out_priv->ppriv; + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; } else if (encap) { err = mlx5e_attach_encap(priv, info, - out_dev, attr); - if (err) + out_dev, &encap_dev, flow); + if (err && err != -EAGAIN) return err; - list_add(&flow->encap, &attr->encap->flows); attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(attr->encap->out_dev); - attr->out_rep = out_priv->ppriv; + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + attr->parse_attr = parse_attr; } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -1532,7 +1716,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - return 0; + return err; } int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, @@ -1570,7 +1754,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_free; + goto err_handle_encap_flow; flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); @@ -1584,20 +1768,33 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto err_free; } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) goto err_del_rule; - goto out; + if (flow->flags & MLX5E_TC_FLOW_ESWITCH && + !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + kvfree(parse_attr); + return err; err_del_rule: mlx5e_tc_del_flow(priv, flow); +err_handle_encap_flow: + if (err == -EAGAIN) { + err = rhashtable_insert_fast(&tc->ht, &flow->node, + tc->ht_params); + if (err) + mlx5e_tc_del_flow(priv, flow); + else + return 0; + } + err_free: - kfree(flow); -out: kvfree(parse_attr); + kfree(flow); return err; } @@ -1616,7 +1813,6 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, mlx5e_tc_del_flow(priv, flow); - kfree(flow); return 0; @@ -1639,6 +1835,9 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, if (!flow) return -EINVAL; + if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) + return 0; + counter = mlx5_flow_rule_counter(flow->rule); if (!counter) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 34bf903fc886..ecbe30d808ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -46,6 +46,15 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, int mlx5e_stats_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); +struct mlx5e_encap_entry; +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +struct mlx5e_neigh_hash_entry; +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return atomic_read(&priv->fs.tc.ht.nelems); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 43729ec35dfc..5ca6714e3e02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -37,8 +37,8 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) struct mlx5_cqwq *wq = &cq->wq; u32 ci = mlx5_cqwq_get_ci(wq); struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); - int cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; - int sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; if (cqe_ownership_bit != sw_ownership_val) return NULL; @@ -49,10 +49,40 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) return cqe; } +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5_cqe64 *cqe, + u16 *sqcc) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_rq *rq = &sq->channel->rq; + + prefetch(rq); + mlx5_cqwq_pop(&cq->wq); + *sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + return; + } + + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); + return; + } + + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); +} + static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); - struct mlx5_wq_cyc *wq; struct mlx5_cqe64 *cqe; u16 sqcc; @@ -63,39 +93,13 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) if (likely(!cqe)) return; - wq = &sq->wq; - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; - do { - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - - mlx5_cqwq_pop(&cq->wq); - sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - break; - } - - switch (icowi->opcode) { - case MLX5_OPCODE_NOP: - break; - case MLX5_OPCODE_UMR: - mlx5e_post_rx_mpwqe(&sq->channel->rq); - break; - default: - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); - } - - } while ((cqe = mlx5e_get_cqe(cq))); + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 21bed3c3334d..2e34d95ea776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -53,13 +53,6 @@ struct esw_uc_addr { u32 vport; }; -/* E-Switch MC FDB table hash node */ -struct esw_mc_addr { /* SRIOV only */ - struct l2addr_node node; - struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ - u32 refcnt; -}; - /* Vport UC/MC hash node */ struct vport_addr { struct l2addr_node node; @@ -817,7 +810,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, bool promisc, bool mc_promisc) { - struct esw_mc_addr *allmulti_addr = esw->mc_promisc; + struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) @@ -1688,7 +1681,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", esw->enabled_vports, esw->mode); - mc_promisc = esw->mc_promisc; + mc_promisc = &esw->mc_promisc; nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) @@ -1732,7 +1725,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int total_vports = MLX5_TOTAL_VPORTS(dev); - struct esw_mc_addr *mc_promisc; struct mlx5_eswitch *esw; int vport_num; int err; @@ -1761,13 +1753,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->l2_table.size = l2_table_size; - mc_promisc = kzalloc(sizeof(*mc_promisc), GFP_KERNEL); - if (!mc_promisc) { - err = -ENOMEM; - goto abort; - } - esw->mc_promisc = mc_promisc; - esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1835,7 +1820,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); - kfree(esw->mc_promisc); kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 1e7f21be1233..b746f62c8c79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,7 +36,6 @@ #include #include #include -#include #include #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -213,6 +212,13 @@ struct mlx5_esw_offload { u8 encap; }; +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_l2_table l2_table; @@ -226,7 +232,7 @@ struct mlx5_eswitch { * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr *mc_promisc; + struct esw_mc_addr mc_promisc; struct { bool enabled; @@ -289,18 +295,6 @@ enum { #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x4000 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000 -struct mlx5_encap_entry { - struct hlist_node encap_hlist; - struct list_head flows; - u32 encap_id; - struct neighbour *n; - struct ip_tunnel_info tun_info; - unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ - - struct net_device *out_dev; - int tunnel_type; -}; - struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; @@ -308,8 +302,9 @@ struct mlx5_esw_flow_attr { int action; u16 vlan; bool vlan_handled; - struct mlx5_encap_entry *encap; + u32 encap_id; u32 mod_hdr_id; + struct mlx5e_tc_flow_parse_attr *parse_attr; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d297354e8ea9..f991f669047e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -92,7 +92,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, flow_act.modify_id = attr->mod_hdr_id; if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - flow_act.encap_id = attr->encap->encap_id; + flow_act.encap_id = attr->encap_id; rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, spec, &flow_act, dest, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 577d056bf3df..81eafc7b9dd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -199,6 +199,11 @@ struct mlx5_flow_root_namespace { int mlx5_init_fc_stats(struct mlx5_core_dev *dev); void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay); +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval); int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 7431f633de31..6507d8acc54d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -165,7 +165,8 @@ static void mlx5_fc_stats_work(struct work_struct *work) list_splice_tail_init(&fc_stats->addlist, &tmplist); if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) - queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); + queue_delayed_work(fc_stats->wq, &fc_stats->work, + fc_stats->sampling_interval); spin_unlock(&fc_stats->addlist_lock); @@ -200,7 +201,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) node = mlx5_fc_stats_query(dev, counter, last->id); } - fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; + fc_stats->next_query = now + fc_stats->sampling_interval; } struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) @@ -265,6 +266,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) if (!fc_stats->wq) return -ENOMEM; + fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); return 0; @@ -317,3 +319,21 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter, counter->lastbytes = c.bytes; counter->lastpackets = c.packets; } + +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + queue_delayed_work(fc_stats->wq, dwork, delay); +} + +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->sampling_interval = min_t(unsigned long, interval, + fc_stats->sampling_interval); +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f50864626230..3fece51dcf13 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -540,6 +540,7 @@ struct mlx5_fc_stats { struct workqueue_struct *wq; struct delayed_work work; unsigned long next_query; + unsigned long sampling_interval; /* jiffies */ }; struct mlx5_eswitch;