From 73f5a82bb3c9fce550da4a74a32b8cb064b50663 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 13 Jan 2019 15:57:04 +0200 Subject: [PATCH 01/35] RDMA/mad: Reduce MAD scope to mlx5_ib only Management Datagram Interface (MAD) is applicable only when physical port is Infiniband. It makes MAD command logic to be completely unrelated to eth/core parts of mlx5. Signed-off-by: Leon Romanovsky Acked-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 37 +++++++++ drivers/infiniband/hw/mlx5/cmd.h | 2 + drivers/infiniband/hw/mlx5/mad.c | 11 ++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mad.c | 75 ------------------- include/linux/mlx5/driver.h | 2 - 7 files changed, 47 insertions(+), 85 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mad.c diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 356bccc715ee..6bcc63aaa50b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, counter_set_id); return err; } + +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1e76dc67a369..923a7b93f507 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 558638468edb..6c529e6f3a01 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_ib.h" +#include "cmd.h" enum { MLX5_IB_VENDOR_CLASS1 = 0x9, @@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, return dev->mdev->port_caps[port_num - 1].has_smi; } -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad) +static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, + int ignore_bkey, u8 port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const void *in_mad, + void *response_mad) { u8 op_modifier = 0; @@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); + return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + port); } static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06d3b1efea8..efe383c0ac86 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1038,9 +1038,6 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6..0257731e6d42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..4e444863054a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -897,8 +897,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); From e355477ed9e4f401e3931043df97325d38552d54 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 18 Jan 2019 16:33:10 -0800 Subject: [PATCH 02/35] net/mlx5: Make mlx5_cmd_exec_cb() a safe API APIs that have deferred callbacks should have some kind of cleanup function that callers can use to fence the callbacks. Otherwise things like module unloading can lead to dangling function pointers, or worse. The IB MR code is the only place that calls this function and had a really poor attempt at creating this fence. Provide a good version in the core code as future patches will add more places that need this fence. Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/mr.c | 39 +++---------- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 55 +++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 11 ++-- include/linux/mlx5/driver.h | 32 +++++++++-- 5 files changed, 91 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efe383c0ac86..eedba0d2ec4b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -587,6 +587,7 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *parent; atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; + struct mlx5_async_work cb_work; }; struct mlx5_ib_mw { @@ -944,6 +945,7 @@ struct mlx5_ib_dev { struct mlx5_memic memic; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd6ea1f75085..bf2b6ea23851 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -123,9 +123,10 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) } #endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); @@ -216,9 +217,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -679,6 +680,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; @@ -725,33 +727,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } -static void wait_for_async_commands(struct mlx5_ib_dev *dev) -{ - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int total = 0; - int i; - int j; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - for (j = 0 ; j < 1000; j++) { - if (!ent->pending) - break; - msleep(50); - } - } - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - total += ent->pending; - } - - if (total) - mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); - else - mlx5_ib_warn(dev, "done with all pending requests\n"); -} - int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -763,12 +738,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); - wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3e0fa8a8077b..a25a8c6f938e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1711,12 +1711,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4e444863054a..039c9398614c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -850,11 +850,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; + struct wait_queue_head wait; +}; + +struct mlx5_async_work; + +typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); + +struct mlx5_async_work { + struct mlx5_async_ctx *ctx; + mlx5_async_cbk_t user_callback; +}; + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx); +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); @@ -885,9 +904,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context); + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); From ce4eee5340a966f633fee1e8603aafd223ca394a Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 18 Jan 2019 16:33:11 -0800 Subject: [PATCH 03/35] net/mlx5: Add pci AtomicOps request Calling pci_enable_atomic_ops_to_root enables AtomicOp requests to pci root port. AtomicOp requests will be enabled only if the completer and all intermediate pci bridges support PCI atomic operations. This, together with appropriate settings in the NVCONFIG should enable PCI atomic operations on the device. PCI atomic operations were first introduced in PCI Express Base Specification 2.1. The Supported operations are Swap (Unconditional Swap), CAS (Compare and Swap) and FetchAdd (Fetch and Add). Unlike other atomic operation modes PCI atomic operations gives the user the option to do atomic operations on local memory, without involving verbs api, without it compromising the operation's atomicity. Signed-off-by: Michael Guralnik Reviewed-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index be81b319b0dc..085e1133b8d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -693,6 +693,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_clr_master; } + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + dev->iseg_base = pci_resource_start(dev->pdev, 0); dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { From dda7a817f2873a0e0b1c7fde1265758f3623daa4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:49 +0200 Subject: [PATCH 04/35] net/mlx5: Add XRC transport to ODP device capabilities layout The device capabilities for ODP structure was missing the field for XRC transport so add it here. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 35fe5217b244..5407db8ba8e1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -831,7 +831,9 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; - u8 reserved_at_e0[0x720]; + struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_calc_op { From 46861e3e88be18846971792b763eaf520a91a802 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:51 +0200 Subject: [PATCH 05/35] net/mlx5: Set ODP SRQ support in firmware To avoid compatibility issue with older kernels the firmware doesn't allow SRQ to work with ODP unless kernel asks for it. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/main.c | 53 +++++++++++++++++++ include/linux/mlx5/device.h | 3 ++ include/linux/mlx5/mlx5_ifc.h | 1 + 3 files changed, 57 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 085e1133b8d5..e38aa206ab6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -459,6 +459,53 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_ctx; + void *set_hca_cap; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int err; + + if (!MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + /** + * If all bits are cleared we shouldn't try to set it + * or we might fail while trying to access a reserved bit. + */ + if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) + return 0; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + + /* set ODP SRQ support for RC/UD and XRC transports */ + MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -931,6 +978,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto reclaim_boot_pages; + } + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { dev_err(&pdev->dev, "failed to allocate init pages\n"); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8c4a820bd4c1..0845a227a7b2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1201,6 +1201,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) +#define MLX5_CAP_ODP_MAX(mdev, cap)\ + MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap) + #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5407db8ba8e1..c5c679390fbd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -72,6 +72,7 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; From 1fbf1252df0e4212e17f98130cb5372b3bc019b0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 6 Feb 2019 15:00:19 -0800 Subject: [PATCH 06/35] mlx5: use RCU lock in mlx5_eq_cq_get() mlx5_eq_cq_get() is called in IRQ handler, the spinlock inside gets a lot of contentions when we test some heavy workload with 60 RX queues and 80 CPU's, and it is clearly shown in the flame graph. In fact, radix_tree_lookup() is perfectly fine with RCU read lock, we don't have to take a spinlock on this hot path. This is pretty much similar to commit 291c566a2891 ("net/mlx4_core: Fix racy CQ (Completion Queue) free"). Slow paths are still serialized with the spinlock, and with synchronize_irq() it should be safe to just move the fast path to RCU read lock. This patch itself reduces the latency by about 50% for our memcached workload on a 4.14 kernel we test. In upstream, as pointed out by Saeed, this spinlock gets some rework in commit 02d92f790364 ("net/mlx5: CQ Database per EQ"), so the difference could be smaller. Cc: Saeed Mahameed Cc: Tariq Toukan Acked-by: Saeed Mahameed Signed-off-by: Cong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ee04aab65a9f..7092457705a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -114,11 +114,11 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *cq = NULL; - spin_lock(&table->lock); + rcu_read_lock(); cq = radix_tree_lookup(&table->tree, cqn); if (likely(cq)) mlx5_cq_hold(cq); - spin_unlock(&table->lock); + rcu_read_unlock(); return cq; } @@ -371,9 +371,9 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; int err; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); err = radix_tree_insert(&table->tree, cq->cqn, cq); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); return err; } @@ -383,9 +383,9 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); tmp = radix_tree_delete(&table->tree, cq->cqn); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); if (!tmp) { mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); From 224d71ccc07c6b221e4fdff7cd3d04c6033228c5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 11 Feb 2019 13:56:07 +0200 Subject: [PATCH 07/35] net/mlx5: Align ODP capability function with netdev coding style Update newly introduced function to be aligned to netdev coding style. Fixes: 46861e3e88be ("net/mlx5: Set ODP SRQ support in firmware") Reported-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e38aa206ab6d..6d45518edbdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -461,9 +461,9 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) static int handle_hca_cap_odp(struct mlx5_core_dev *dev) { - void *set_ctx; void *set_hca_cap; - int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_ctx; + int set_sz; int err; if (!MLX5_CAP_GEN(dev, pg)) @@ -473,15 +473,12 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev) if (err) return err; - /** - * If all bits are cleared we shouldn't try to set it - * or we might fail while trying to access a reserved bit. - */ if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) return 0; + set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); set_ctx = kzalloc(set_sz, GFP_KERNEL); if (!set_ctx) return -ENOMEM; @@ -492,13 +489,13 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev) /* set ODP SRQ support for RC/UD and XRC transports */ MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, - (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive))); + MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive)); MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, - (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive))); + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive)); MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, - (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))); + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive)); err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); From 20bbf22a622178db71fb8bea5f9000d6f346185a Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:32 -0800 Subject: [PATCH 08/35] net/mlx5: Use void pointer as the type in address_of macro Better to use void * and avoid unnecessary casts. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0845a227a7b2..46223efa1877 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -67,7 +67,7 @@ #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) +#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld))) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ From 7e4c4330a3bc7f34f82b5bc370821e1e16d425fb Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:33 -0800 Subject: [PATCH 09/35] net/mlx5: Use consistent vport num argument type Use u16 for vport number, which matches how hardware refers to this argument throughout commands. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/vport.c | 8 ++--- include/linux/mlx5/vport.h | 8 ++--- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a44ea7b85614..d7382892e81c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -52,7 +52,7 @@ enum { struct vport_addr { struct l2addr_node node; u8 action; - u32 vport; + u16 vport; struct mlx5_flow_handle *flow_rule; bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ @@ -115,7 +115,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -152,7 +152,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, /* E-Switch FDB */ static struct mlx5_flow_handle * -__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : @@ -215,7 +215,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } static struct mlx5_flow_handle * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) { u8 mac_c[ETH_ALEN]; @@ -224,7 +224,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -237,7 +237,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -377,7 +377,7 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err; /* Skip mlx5_mpfs_add_mac for PFs, @@ -409,7 +409,7 @@ fdb_add: static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err = 0; /* Skip mlx5_mpfs_del_mac for PFs, @@ -438,7 +438,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u32 vport_idx = 0; + u16 vport_idx = 0; for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { struct mlx5_vport *vport = &esw->vports[vport_idx]; @@ -485,7 +485,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -525,7 +525,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -564,7 +564,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Apply vport UC/MC list to HW l2 table and FDB table */ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; @@ -599,7 +599,7 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, /* Sync vport UC/MC list from vport context */ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; @@ -686,7 +686,7 @@ out: /* Sync vport UC/MC list from vport context * Must be called after esw_update_vport_addr_list */ -static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; struct l2addr_node *node; @@ -721,7 +721,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) } /* Apply vport rx mode to HW FDB table */ -static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; @@ -764,7 +764,7 @@ promisc: } /* Sync vport rx mode from vport context */ -static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; int promisc_all = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 9b150ce9d315..9a928eb48522 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -255,7 +255,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size) @@ -373,7 +373,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size) { @@ -526,7 +526,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid) + u16 vport, u64 node_guid) { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; @@ -827,7 +827,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all) diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 9c694808c212..1654b911cdb2 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -60,7 +60,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid); + u16 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, @@ -78,7 +78,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size); @@ -87,7 +87,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, u8 addr_list[][ETH_ALEN], int list_size); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all); @@ -96,7 +96,7 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int promisc_mc, int promisc_all); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size); int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, From f0666f1f22b5748e7caf87481a0869689f9f513d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:34 -0800 Subject: [PATCH 10/35] IB/mlx5: Use unified register/load function for uplink and VF vports IB driver maintains different registration and load function calls for uplink and VF vports. This is not necessary as they only differ with each other on their profiles. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 73 +++++++---------------------- drivers/infiniband/hw/mlx5/ib_rep.h | 10 ++-- drivers/infiniband/hw/mlx5/main.c | 26 +++++----- 3 files changed, 37 insertions(+), 72 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 46a9ddc8ca56..3fb22967c098 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -6,7 +6,7 @@ #include "ib_rep.h" #include "srq.h" -static const struct mlx5_ib_profile rep_profile = { +static const struct mlx5_ib_profile vf_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -45,31 +45,18 @@ static const struct mlx5_ib_profile rep_profile = { NULL), }; -static int -mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) -{ - struct mlx5_ib_dev *ibdev; - - ibdev = mlx5_ib_rep_to_dev(rep); - if (!__mlx5_ib_add(ibdev, ibdev->profile)) - return -EINVAL; - return 0; -} - -static void -mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep) -{ - struct mlx5_ib_dev *ibdev; - - ibdev = mlx5_ib_rep_to_dev(rep); - __mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX); -} - static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { +#define FDB_UPLINK_VPORT 0xffff + const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; + if (rep->vport == FDB_UPLINK_VPORT) + profile = &uplink_rep_profile; + else + profile = &vf_rep_profile; + ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); if (!ibdev) return -ENOMEM; @@ -78,7 +65,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, &rep_profile)) + if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; rep->rep_if[REP_IB].priv = ibdev; @@ -105,15 +92,14 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) return mlx5_ib_rep_to_dev(rep); } -static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev) +void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vports = MLX5_TOTAL_VPORTS(mdev); + struct mlx5_eswitch_rep_if rep_if = {}; int vport; - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; - + for (vport = 0; vport < total_vports; vport++) { rep_if.load = mlx5_ib_vport_rep_load; rep_if.unload = mlx5_ib_vport_rep_unload; rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; @@ -121,39 +107,16 @@ static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev) } } -static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev) +void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vports = MLX5_TOTAL_VPORTS(mdev); int vport; - for (vport = 1; vport < total_vfs; vport++) + for (vport = total_vports - 1; vport >= 0; vport--) mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); } -void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5_ib_nic_rep_load; - rep_if.unload = mlx5_ib_nic_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - rep_if.priv = dev; - - mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB); - - mlx5_ib_rep_register_vf_vports(dev); -} - -void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - - mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */ - mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/ -} - u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) { return mlx5_eswitch_mode(esw); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 2ba73636a2fb..798d41e61fb4 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -10,14 +10,16 @@ #include "mlx5_ib.h" #ifdef CONFIG_MLX5_ESWITCH +extern const struct mlx5_ib_profile uplink_rep_profile; + u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, int vport_index); struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport_index); -void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev); -void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev); +void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); +void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, @@ -48,8 +50,8 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, return NULL; } -static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {} -static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {} +static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} +static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94fe253d4956..87ce62e44898 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6386,7 +6386,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_delay_drop_cleanup), }; -static const struct mlx5_ib_profile nic_rep_profile = { +const struct mlx5_ib_profile uplink_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -6479,6 +6479,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) printk_once(KERN_INFO "%s", mlx5_version); + if (MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5_ib_register_vport_reps(mdev); + return mdev; + } + port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -6493,14 +6499,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), MLX5_CAP_GEN(mdev, num_vhca_ports)); - if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { - dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); - dev->profile = &nic_rep_profile; - mlx5_ib_register_vport_reps(dev); - return dev; - } - return __mlx5_ib_add(dev, &pf_profile); } @@ -6509,6 +6507,11 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; + if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) { + mlx5_ib_unregister_vport_reps(mdev); + return; + } + if (mlx5_core_is_mp_slave(mdev)) { mpi = context; mutex_lock(&mlx5_ib_multiport_mutex); @@ -6520,10 +6523,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) } dev = context; - if (dev->profile == &nic_rep_profile) - mlx5_ib_unregister_vport_reps(dev); - else - __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); ib_dealloc_device((struct ib_device *)dev); } From 591905ba96796e3b677b14fa79f27127bfaab4ab Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:35 -0800 Subject: [PATCH 11/35] net/mlx5: Introduce Mellanox SmartNIC and modify page management logic Mellanox's SmartNIC combines embedded CPU(e.g, ARM) processing power with advanced network offloads to accelerate a multitude of security, networking and storage applications. With the introduction of the SmartNIC, there is a new PCI function called Embedded CPU Physical Function(ECPF). And it's possible for a PF to get its ICM pages from the ECPF PCI function. Driver shall identify if it is running on such a function by reading a bit in the initialization segment. When firmware asks for pages, it would issue a page request event specifying how many pages it requests and for which function. That driver responds with a manage_pages command providing the requested pages along with an indication for which function it is providing these pages. The encoding before this patch was as follows: function_id == 0: pages are requested for the function receiving the EQE. function_id != 0: pages are requested for VF identified by the function_id value A new one bit field in the EQE identifies that pages are requested for the ECPF. The notion of page_supplier can be introduced here and to support that, manage pages and query pages were modified so firmware can distinguish the following cases: 1. Function provides pages for itself 2. PF provides pages for its VF 3. ECPF provides pages to itself 4. ECPF provides pages for another function This distinction is possible through the introduction of the bit "embedded_cpu_function" in query_pages, manage_pages and page request EQE. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/ecpf.c | 9 ++++ .../net/ethernet/mellanox/mlx5/core/ecpf.h | 25 +++++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 2 + .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- .../ethernet/mellanox/mlx5/core/pagealloc.c | 54 +++++++++++++------ .../net/ethernet/mellanox/mlx5/core/sriov.c | 2 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/driver.h | 9 +++- include/linux/mlx5/mlx5_ifc.h | 12 +++-- 10 files changed, 94 insertions(+), 25 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 0257731e6d42..07965350b903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 000000000000..28b8c5c5c8c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 000000000000..8b684f0ab48f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6d45518edbdc..08a3da2a8358 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -65,6 +65,7 @@ #include "lib/vxlan.h" #include "lib/devcom.h" #include "diag/fw_tracer.h" +#include "ecpf.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -898,6 +899,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, struct pci_dev *pdev = dev->pdev; int err; + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c68dcea5985b..b127044293b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -121,7 +121,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 modify_bitmask); int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, struct ptp_system_timestamp *sts); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index a83b517b0714..41025387ff2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -48,6 +48,7 @@ enum { struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; + u8 ec_function; s32 npages; struct work_struct work; }; @@ -143,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, MLX5_SET(query_pages_in, in, op_mod, boot ? MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -253,7 +255,8 @@ err_mapping: return err; } -static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -262,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -270,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail) + int notify_fail, bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); @@ -305,6 +309,7 @@ retry: MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) { @@ -316,8 +321,11 @@ retry: dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages += npages; - mlx5_core_dbg(dev, "err %d\n", err); + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); kvfree(in); return 0; @@ -328,7 +336,7 @@ out_4k: out_free: kvfree(in); if (notify_fail) - page_notify_fail(dev, func_id); + page_notify_fail(dev, func_id, ec_function); return err; } @@ -364,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, - int *nclaimed) + int *nclaimed, bool ec_function) { int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -385,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); @@ -410,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages -= num_claimed; out_free: kvfree(out); @@ -423,9 +434,10 @@ static void pages_work_handler(struct work_struct *work) int err = 0; if (req->npages < 0) - err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + req->ec_function); else if (req->npages > 0) - err = give_pages(dev, req->func_id, req->npages, 1); + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); if (err) mlx5_core_warn(dev, "%s fail %d\n", @@ -434,6 +446,10 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } +enum { + EC_FUNCTION_MASK = 0x8000, +}; + static int req_pages_handler(struct notifier_block *nb, unsigned long type, void *data) { @@ -441,6 +457,7 @@ static int req_pages_handler(struct notifier_block *nb, struct mlx5_core_dev *dev; struct mlx5_priv *priv; struct mlx5_eqe *eqe; + bool ec_function; u16 func_id; s32 npages; @@ -450,6 +467,7 @@ static int req_pages_handler(struct notifier_block *nb, func_id = be16_to_cpu(eqe->data.req_pages.func_id); npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); @@ -461,6 +479,7 @@ static int req_pages_handler(struct notifier_block *nb, req->dev = dev; req->func_id = func_id; req->npages = npages; + req->ec_function = ec_function; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); return NOTIFY_OK; @@ -479,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", npages, boot ? "boot" : "init", func_id); - return give_pages(dev, func_id, npages, 0); + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); } enum { @@ -513,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) fwp = rb_entry(p, struct fw_page, rb_node); err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), - &nclaimed); + &nclaimed, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", @@ -535,6 +554,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.vfs_pages, "VFs FW pages counter is %d after reclaiming all pages\n", dev->priv.vfs_pages); + WARN(dev->priv.peer_pf_pages, + "Peer PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.peer_pf_pages); return 0; } @@ -567,10 +589,10 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) flush_workqueue(dev->priv.pg_wq); } -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - int prev_vfs_pages = dev->priv.vfs_pages; + int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -578,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) return 0; } - mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages, dev->priv.name); - while (dev->priv.vfs_pages) { + while (*pages) { if (time_after(jiffies, end)) { - mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); return -ETIMEDOUT; } - if (dev->priv.vfs_pages < prev_vfs_pages) { + if (*pages < prev_pages) { end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - prev_vfs_pages = dev->priv.vfs_pages; + prev_pages = *pages; } msleep(50); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 6e178030d8fb..7b23fa8d2d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ out: if (MLX5_ESWITCH_MANAGER(dev)) mlx5_eswitch_disable_sriov(dev->priv.eswitch); - if (mlx5_wait_for_vf_pages(dev)) + if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 46223efa1877..f2070350f60a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -591,7 +591,7 @@ struct mlx5_eqe_cmd { }; struct mlx5_eqe_page_req { - u8 rsvd0[2]; + __be16 ec_function; __be16 func_id; __be32 num_pages; __be32 rsvd1[5]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 039c9398614c..cce4e8293384 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -522,6 +522,7 @@ struct mlx5_priv { atomic_t reg_pages; struct list_head free_list; int vfs_pages; + int peer_pf_pages; struct mlx5_core_health health; @@ -652,6 +653,7 @@ struct mlx5_core_dev { u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; + u8 embedded_cpu; } caps; u64 sys_image_guid; phys_addr_t iseg_base; @@ -922,7 +924,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages); + s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1076,6 +1078,11 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } +static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu; +} + #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c5c679390fbd..46799b4c8859 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4441,7 +4441,8 @@ struct mlx5_ifc_query_pages_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 num_pages[0x20]; @@ -4460,7 +4461,8 @@ struct mlx5_ifc_query_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -5880,7 +5882,8 @@ struct mlx5_ifc_manage_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 input_num_entries[0x20]; @@ -8749,7 +8752,8 @@ struct mlx5_ifc_initial_seg_bits { u8 initializing[0x1]; u8 reserved_at_fe1[0x4]; u8 nic_interface_supported[0x3]; - u8 reserved_at_fe8[0x18]; + u8 embedded_cpu[0x1]; + u8 reserved_at_fe9[0x17]; struct mlx5_ifc_health_buffer_bits health_buffer; From 22e939a91dcb9bd4b773f2a0c0cb4eb016679b49 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:36 -0800 Subject: [PATCH 12/35] net/mlx5: Update enable HCA dependency With the introduction of ECPF, we require that the ECPF driver will aways call enable/disable HCA for that PF in the same way a PF does this for its VFs. The PF is still responsible for calling enable and disable HCA for its VFs. To distinguish between the ECPF executing enable/disable HCA for itself or for the PF, it sets the embedded CPU function bit in the input params struct of these commands. When the bit is cleared and function ID is zero, it refers to the peer PF. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/ecpf.c | 76 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/ecpf.h | 4 + .../net/ethernet/mellanox/mlx5/core/main.c | 14 ++++ include/linux/mlx5/mlx5_ifc.h | 6 +- 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 28b8c5c5c8c7..1bcf8b8f9713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -7,3 +7,79 @@ bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; } + +static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_peer_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n", + err); + + return err; +} + +static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n", + err); + return; + } + + err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n", + err); +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* ECPF shall enable HCA for peer PF in the same way a PF + * does this for its VFs. + */ + err = mlx5_peer_pf_init(dev); + if (err) + return err; + + return 0; +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_peer_pf_cleanup(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 8b684f0ab48f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -14,11 +14,15 @@ enum { }; bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 08a3da2a8358..40d591c8e76c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -612,6 +612,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } @@ -622,6 +624,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -1071,6 +1075,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } + err = mlx5_ec_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1088,6 +1098,9 @@ out: return 0; err_reg_dev: + mlx5_ec_cleanup(dev); + +err_ec: mlx5_sriov_detach(dev); err_sriov: @@ -1162,6 +1175,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_ec_cleanup(dev); mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 46799b4c8859..1b6d5a563a3a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6061,7 +6061,8 @@ struct mlx5_ifc_enable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6105,7 +6106,8 @@ struct mlx5_ifc_disable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; From c3a4e9f10714911486d09e7729195cc8fbedecd3 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:37 -0800 Subject: [PATCH 13/35] net/mlx5: Add query host params command The QUERY_HOST_PARAMS command is used by an Embedded CPU Physical Function (ECPF) driver to identify and retrieve information about the PF on the host side. E.g, number of virtual functions and PCI BDF. The number of VFs can be changed on the fly, a function is added to query current number of VFs and will be used in downstream patches. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 + .../net/ethernet/mellanox/mlx5/core/ecpf.c | 27 ++++++++++++ .../net/ethernet/mellanox/mlx5/core/ecpf.h | 4 ++ include/linux/mlx5/mlx5_ifc.h | 41 +++++++++++++++++++ 4 files changed, 74 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a25a8c6f938e..46d70eb2d2f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,6 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_HOST_PARAMS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -627,6 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 1bcf8b8f9713..4746f2d28fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,3 +83,30 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } + +static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; + + MLX5_SET(query_host_params_in, in, opcode, + MLX5_CMD_OP_QUERY_HOST_PARAMS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ + u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; + int err; + + err = mlx5_query_host_params_context(dev, out, sizeof(out)); + if (err) + return err; + + *num_vf = MLX5_GET(query_host_params_out, out, + host_params_context.host_num_of_vfs); + mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index d3d7a00a02ac..346372df218f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,6 +16,7 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -23,6 +24,9 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} +static inline int +mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1b6d5a563a3a..565046830559 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -142,6 +142,7 @@ enum { MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, + MLX5_CMD_OP_QUERY_HOST_PARAMS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -9522,4 +9523,44 @@ struct mlx5_ifc_mtrc_ctrl_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_host_params_context_bits { + u8 host_number[0x8]; + u8 reserved_at_8[0x8]; + u8 host_num_of_vfs[0x10]; + + u8 reserved_at_20[0x10]; + u8 host_pci_bus[0x10]; + + u8 reserved_at_40[0x10]; + u8 host_pci_device[0x10]; + + u8 reserved_at_60[0x10]; + u8 host_pci_function[0x10]; + + u8 reserved_at_80[0x180]; +}; + +struct mlx5_ifc_query_host_params_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_host_params_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_host_params_context_bits host_params_context; + + u8 reserved_at_280[0x180]; +}; + #endif /* MLX5_IFC_H */ From 7f0d11c7e0d08304de55b6a571a69166f3d54160 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:38 -0800 Subject: [PATCH 14/35] net/mlx5: Add host params change event In Embedded CPU (EC) configurations, the EC driver needs to know when the number of virtual functions change on the corresponding PF at the host side. This is required so the EC driver can create or destroy representor net devices that represent the VFs ports. Whenever a change in the number of VFs occurs, firmware will generate an event towards the EC which will trigger a work to complete the rest of the handling. The specifics of the handling will be introduced in a downstream patch. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 ++ include/linux/mlx5/device.h | 2 ++ include/linux/mlx5/driver.h | 5 +++++ 4 files changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7092457705a2..5c02f9291799 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -530,6 +530,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + if (mlx5_core_is_ecpf_esw_manager(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index fbc42b7252a9..4f7f776d6332 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -103,6 +103,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: + return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f2070350f60a..f93a5598b942 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -342,6 +342,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, + MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe, + MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cce4e8293384..151563a12fc2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1083,6 +1083,11 @@ static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu; } +static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); +} + #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ From feb393693316bd5de2c88a020f6ded51e3a4120b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:39 -0800 Subject: [PATCH 15/35] net/mlx5: Provide an alternative VF upper bound for ECPF ECPF doesn't support SR-IOV, but an ECPF E-Switch manager shall know the max VFs supported by its peer host PF in order to control those VF vports. The current driver implementation uses the total vfs quantity as provided by the pci sub-system for an upper bound of the VF vports the e-switch code needs to deal with. This obviously can't work as is on ECPF e-switch manager. For now, we use a hard coded value of 128 on such systems. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 151563a12fc2..46e0aa52a58a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1088,7 +1088,16 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); } -#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) +#define MLX5_HOST_PF_MAX_VFS (127u) +static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) +{ + if (mlx5_core_is_ecpf_esw_manager(dev)) + return MLX5_HOST_PF_MAX_VFS; + else + return pci_sriov_get_totalvfs(dev->pdev); +} + +#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ From b05af6aacdb920dc3bfd27d53ade7f680d43265c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:40 -0800 Subject: [PATCH 16/35] net/mlx5: E-Switch, Normalize the name of uplink vport number Driver used to name uplink vport as FDB_UPLINK_VPORT, it's hard to comply with the same naming convention along with the introduction of other vports. Use MLX5_VPORT as the prefix for such vports and relocate the uplink vport definition to public header file for the benefits of both net and IB drivers. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 22 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++--- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 8 +++---- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 -- .../mellanox/mlx5/core/eswitch_offloads.c | 14 ++++++------ include/linux/mlx5/vport.h | 4 ++++ 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 3fb22967c098..99cae9a10195 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ +#include #include "ib_rep.h" #include "srq.h" @@ -48,11 +49,10 @@ static const struct mlx5_ib_profile vf_rep_profile = { static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { -#define FDB_UPLINK_VPORT 0xffff const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) profile = &uplink_rep_profile; else profile = &vf_rep_profile; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 96cc0c6a4014..c78d21b2501e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -152,7 +152,7 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_uplink_rep_update_hw_counters(priv); else mlx5e_vf_rep_update_hw_counters(priv); @@ -1207,7 +1207,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) return false; rep = rpriv->rep; - return (rep->vport == FDB_UPLINK_VPORT); + return (rep->vport == MLX5_VPORT_UPLINK); } static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) @@ -1343,7 +1343,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->sw_mtu = netdev->mtu; /* SQ */ - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; else params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; @@ -1370,7 +1370,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_core_dev *mdev = priv->mdev; - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ @@ -1402,7 +1402,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - if (rep->vport != FDB_UPLINK_VPORT) + if (rep->vport != MLX5_VPORT_UPLINK) netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= netdev->hw_features; @@ -1555,7 +1555,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { uplink_priv = &rpriv->uplink_priv; /* init shared tc flow table */ @@ -1591,7 +1591,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { /* clean indirect TC block notifications */ unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); mlx5e_rep_indr_clean_block_privs(rpriv); @@ -1710,7 +1710,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", @@ -1723,7 +1723,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rep->rep_if[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_create_mdev_resources(dev); if (err) goto err_destroy_netdev; @@ -1759,7 +1759,7 @@ err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_mdev_resources: - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(dev); err_destroy_netdev: @@ -1779,7 +1779,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) unregister_netdev(netdev); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); kfree(ppriv); /* mlx5e_rep_priv */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cae6c6d48984..1a73e661056a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1834,7 +1834,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; - if (rep->vport != FDB_UPLINK_VPORT && + if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && esw->offloads.inline_mode < match_level)) { NL_SET_ERR_MSG_MOD(extack, @@ -2724,7 +2724,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { struct mlx5_esw_flow_attr *attr = flow->esw_attr; - bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT && + bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && flow->flags & MLX5E_TC_FLOW_INGRESS; bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); @@ -2849,7 +2849,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, * original flow and packets redirected from uplink use the * peer mdev. */ - if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT) + if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) in_mdev = peer_priv->mdev; else in_mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d7382892e81c..0db56b4b7009 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,8 +40,6 @@ #include "eswitch.h" #include "fs_core.h" -#define UPLINK_VPORT 0xFFFF - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -188,7 +186,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, misc_parameters); mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -499,7 +497,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return -ENOMEM; esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ - esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); /* Add this multicast mac to all the mc promiscuous vports */ update_allmulti_vports(esw, vaddr, esw_mc); @@ -736,7 +734,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, if (!allmulti_addr->uplink_rule) allmulti_addr->uplink_rule = esw_fdb_set_vport_allmulti_rule(esw, - UPLINK_VPORT); + MLX5_VPORT_UPLINK); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { mlx5_del_flow_rules(vport->allmulti_rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9c89eea9b2c3..94da74b1e6ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -49,8 +49,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define FDB_UPLINK_VPORT 0xffff - #define MLX5_MIN_BW_SHARE 1 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53065b6ae593..b0b1267eab07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -359,15 +359,15 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, in_rep = attr->in_rep; out_rep = attr->dests[0].rep; - if (push && in_rep->vport == FDB_UPLINK_VPORT) + if (push && in_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; - if (pop && out_rep->vport == FDB_UPLINK_VPORT) + if (pop && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ if (!push && !pop && fwd) - if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* protects against (1) setting rules with different vlans to push and @@ -409,7 +409,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) { + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; attr->vlan_handled = true; } @@ -469,7 +469,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; return 0; @@ -1227,7 +1227,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) ether_addr_copy(rep->hw_id, hw_id); } - offloads->vport_reps[0].vport = FDB_UPLINK_VPORT; + offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; return 0; } @@ -1811,7 +1811,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == FDB_UPLINK_VPORT) + if (vport == MLX5_VPORT_UPLINK) vport = UPLINK_REP_INDEX; rep = &offloads->vport_reps[vport]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 1654b911cdb2..2e2928eacd97 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -42,6 +42,10 @@ enum { MLX5_CAP_INLINE_MODE_NOT_REQUIRED, }; +enum { + MLX5_VPORT_UPLINK = 0xffff +}; + u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state); From bf3e4d387daed36aad2cfd4f493b07714ac0cd5e Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:41 -0800 Subject: [PATCH 17/35] net/mlx5: Relocate vport macros to the vport header file These are two macros in the driver general header which deal with the number of total vports and if a vport is vport manager. Such macros are vport entities, better to place them at the vport header file. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + include/linux/mlx5/driver.h | 6 ------ include/linux/mlx5/vport.h | 7 +++++++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5c02f9291799..bb6e5b5d9681 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 79f122b45def..b6a7bc8f667c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "mlx5_core.h" diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 46e0aa52a58a..c5454f985e1d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1097,12 +1097,6 @@ static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) return pci_sriov_get_totalvfs(dev->pdev); } -#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) -#define MLX5_VPORT_MANAGER(mdev) \ - (MLX5_CAP_GEN(mdev, vport_group_manager) && \ - (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ - mlx5_core_is_pf(mdev)) - static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 2e2928eacd97..28f47e868fbc 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,6 +36,13 @@ #include #include +#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) + +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) + enum { MLX5_CAP_INLINE_MODE_L2, MLX5_CAP_INLINE_MODE_VPORT_CONTEXT, From cd7e4186af9d968559852b4eeb1039b3419cc590 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:42 -0800 Subject: [PATCH 18/35] net/mlx5: E-Switch, Avoid magic numbers when initializing offloads mode When dealing with the offloads mode initialization, driver refers to the number of VFs and add magic number one (1) to take account of the uplink. This is not clear and will make the code less readable after adding other vports (e.g. host PF). As these are special vports compared to VF vports, add a helper macro to denote such special vports and eliminate the use of magic number. Moreover, when creating offloads flow table and groups, the driver reserves two more slots for UC and MC miss rules. Replace this magic number with a helper macro as well. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- .../mellanox/mlx5/core/eswitch_offloads.c | 25 +++++++++++-------- include/linux/mlx5/vport.h | 4 ++- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0db56b4b7009..49a9e3877d2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1638,7 +1638,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + 1); + err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0b1267eab07..1496e82b5108 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,6 +46,11 @@ enum { FDB_SLOW_PATH }; +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) + #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] @@ -904,8 +909,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 + - esw->total_vports; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + + MLX5_ESW_MISS_FLOWS + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -999,7 +1004,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + MLX5_ESW_MISS_FLOWS); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -1048,7 +1054,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_destroy_offloads_fast_fdb_tables(esw); } -static int esw_create_offloads_table(struct mlx5_eswitch *esw) +static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; @@ -1062,7 +1068,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1082,16 +1088,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } -static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; - struct mlx5_priv *priv = &esw->dev->priv; u32 *flow_group_in; void *match_criteria, *misc; int err = 0; - int nvports = priv->sriov.num_vfs + 2; + nvports = nvports + MLX5_ESW_MISS_FLOWS; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1407,11 +1412,11 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) return err; - err = esw_create_offloads_table(esw); + err = esw_create_offloads_table(esw, nvports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw); + err = esw_create_vport_rx_group(esw, nvports); if (err) goto create_fg_err; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 28f47e868fbc..3bc05449ac39 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,7 +36,9 @@ #include #include -#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) +#define MLX5_VPORT_PF_PLACEHOLDER (1u) +#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER) +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ From bc4e12ffefdd886057eabe38135515690d0756a6 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:43 -0800 Subject: [PATCH 19/35] net/mlx5: Refactor queries to speed fields in Port Type and Speed register This patch fascicles queries to speed related fields in Port Type and Speed register (PTYS) into a single API. I addition, this patch refactors functions which serves only Ethernet driver: remove the protocol type as an input parameter, move code from 'core' directory into 'en' directory and add 'eth' prefix to the function's name. The patch also encapsulates functions that are not used outside the Ethernet driver removes redundant include files. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 6 +- .../net/ethernet/mellanox/mlx5/core/en/port.c | 75 +++++++++++-- .../net/ethernet/mellanox/mlx5/core/en/port.h | 12 ++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 23 ++-- .../net/ethernet/mellanox/mlx5/core/port.c | 106 ------------------ include/linux/mlx5/port.h | 11 -- 6 files changed, 91 insertions(+), 142 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 87ce62e44898..efd08b41126c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -393,6 +393,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; struct mlx5_core_dev *mdev; struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; @@ -416,10 +417,11 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. */ - err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, - mdev_port_num); + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + mdev_port_num); if (err) goto out; + eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 4a37713023be..9a1c2b2f87d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,6 +63,67 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, + struct mlx5e_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eproto->oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + return 0; +} + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; @@ -78,16 +139,14 @@ u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; - u32 eth_proto_oper; + struct mlx5e_port_eth_proto eproto; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) return err; - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - *speed = mlx5e_port_ptys2speed(eth_proto_oper); + *speed = mlx5e_port_ptys2speed(eproto.oper); if (!(*speed)) err = -EINVAL; @@ -96,17 +155,17 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; - u32 proto_cap; int err; int i; - err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) return err; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) - if (proto_cap & MLX5E_PROT_MASK(i)) + if (eproto.cap & MLX5E_PROT_MASK(i)) max_speed = max(max_speed, mlx5e_link_speed[i]); *speed = max_speed; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index cd2160b8c9bf..4bdab8be10af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,6 +36,18 @@ #include #include "en.h" +struct mlx5e_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, + struct mlx5e_port_eth_proto *eproto); +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin); u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c9df08133718..c29e141d72fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -882,7 +882,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - u32 eth_proto_cap, eth_proto_admin; + struct mlx5e_port_eth_proto eproto; bool an_changes = false; u8 an_disable_admin; u8 an_disable_cap; @@ -898,14 +898,14 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : mlx5e_port_speed2linkmodes(speed); - err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) { - netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); goto out; } - link_modes = link_modes & eth_proto_cap; + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", __func__); @@ -913,24 +913,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } - err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); - if (err) { - netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n", - __func__, err); - goto out; - } - - mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, - &an_disable_cap, &an_disable_admin); + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; an_changes = ((!an_disable && an_disable_admin) || (an_disable && !an_disable_admin)); - if (!an_changes && link_modes == eth_proto_admin) + if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes); mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 2b82f35f4c35..b81542820528 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -30,10 +30,7 @@ * SOFTWARE. */ -#include -#include #include -#include #include "mlx5_core.h" int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, @@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - else - *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); - -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - else - *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); - int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port) { @@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, - local_port); - if (err) - return err; - - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - - return 0; -} -EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); - int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port) { @@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; - u8 an_disable_admin; - u8 an_disable_cap; - u8 an_status; - - mlx5_query_port_autoneg(dev, proto_mask, &an_status, - &an_disable_cap, &an_disable_admin); - if (!an_disable_cap && an_disable) - return -EPERM; - - memset(in, 0, sizeof(in)); - - MLX5_SET(ptys_reg, in, local_port, 1); - MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); - MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - if (proto_mask == MLX5_PTYS_EN) - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); - else - MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); - - return mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PTYS, 0, 1); -} -EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); - /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) { @@ -606,25 +519,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - - *an_status = 0; - *an_disable_cap = 0; - *an_disable_admin = 0; - - if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) - return; - - *an_status = MLX5_GET(ptys_reg, out, an_status); - *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); - *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); -} -EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); - int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index bf4bc01ffb0c..5be7eefa6d75 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -110,27 +110,16 @@ enum mlx5e_connector_type { int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask); -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask); int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port); int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); From a0a899895692d4227cc55b087f5f47e185af7f23 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:44 -0800 Subject: [PATCH 20/35] net/mlx5: Add new fields to Port Type and Speed register Register Port Type and Speed (PTYS) introduces three new fields extending the speed/protocols the can be reported and configured. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 565046830559..5decffe565fb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7826,21 +7826,23 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x3c]; + u8 reserved_at_24[0x1c]; + + u8 ext_eth_proto_capability[0x20]; u8 eth_proto_capability[0x20]; u8 ib_link_width_capability[0x10]; u8 ib_proto_capability[0x10]; - u8 reserved_at_a0[0x20]; + u8 ext_eth_proto_admin[0x20]; u8 eth_proto_admin[0x20]; u8 ib_link_width_admin[0x10]; u8 ib_proto_admin[0x10]; - u8 reserved_at_100[0x20]; + u8 ext_eth_proto_oper[0x20]; u8 eth_proto_oper[0x20]; @@ -8289,7 +8291,9 @@ struct mlx5_ifc_mpegc_reg_bits { struct mlx5_ifc_pcam_enhanced_features_bits { u8 reserved_at_0[0x6d]; u8 rx_icrc_encapsulated_counter[0x1]; - u8 reserved_at_6e[0x8]; + u8 reserved_at_6e[0x4]; + u8 ptys_extended_ethernet[0x1]; + u8 reserved_at_73[0x3]; u8 pfcc_mask[0x1]; u8 reserved_at_77[0x3]; u8 per_lane_error_counters[0x1]; From a08b4ed1373dc59e3e15029bc6f135ba0f53c9a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:45 -0800 Subject: [PATCH 21/35] net/mlx5: Add support to ext_* fields introduced in Port Type and Speed register This patch exposes new link modes (including 50Gbps per lane), and ext_* fields which describes the new link modes in Port Type and Speed register (PTYS). Access functions, translation functions (speed <-> HW bits) and link max speed function were modified. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 3 +- .../net/ethernet/mellanox/mlx5/core/en/port.c | 85 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en/port.h | 8 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 9 +- include/linux/mlx5/port.h | 19 +++++ 5 files changed, 94 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index efd08b41126c..3677c00fa3bb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -421,7 +421,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 9a1c2b2f87d8..122927f3a600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,7 +63,31 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, +}; + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, struct mlx5e_port_eth_proto *eproto) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; @@ -72,13 +96,17 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, if (!eproto) return -EINVAL; + if (ext != MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet)) + return -EOPNOTSUPP; + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); if (err) return err; - eproto->cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eproto->oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); return 0; } @@ -100,7 +128,7 @@ void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, } int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin) + u32 proto_admin, bool ext) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; u32 in[MLX5_ST_SZ_DW(ptys_reg)]; @@ -118,38 +146,46 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, MLX5_SET(ptys_reg, in, local_port, 1); MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PTYS, 0, 1); } -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; + const u32 *table; u32 speed = 0; + u32 max_size; int i; - i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER); - if (i < MLX5E_LINK_MODES_NUMBER) - speed = mlx5e_link_speed[i]; - + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; return speed; } int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5e_port_eth_proto eproto; + bool ext; int err; - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) - return err; + goto out; - *speed = mlx5e_port_ptys2speed(eproto.oper); + *speed = mlx5e_port_ptys2speed(mdev, eproto.oper); if (!(*speed)) err = -EINVAL; +out: return err; } @@ -157,31 +193,38 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; + const u32 *table; + u32 max_size; + bool ext; int err; int i; - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) return err; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) if (eproto.cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, mlx5e_link_speed[i]); + max_speed = max(max_speed, table[i]); *speed = max_speed; return 0; } -u32 mlx5e_port_speed2linkmodes(u32 speed) +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed) { u32 link_modes = 0; + const u32 *table; + u32 max_size; int i; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (mlx5e_link_speed[i] == speed) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) link_modes |= MLX5E_PROT_MASK(i); } - return link_modes; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 4bdab8be10af..70f536ec51c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -42,16 +42,16 @@ struct mlx5e_port_eth_proto { u32 oper; }; -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, struct mlx5e_port_eth_proto *eproto); void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin); -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); + u32 proto_admin, bool ext); +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(u32 speed); +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c29e141d72fb..8343cf7d292c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -695,13 +695,14 @@ static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = netdev_priv(netdev); u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(eth_proto_oper); + speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper); if (!speed) { speed = SPEED_UNKNOWN; goto out; @@ -896,9 +897,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : - mlx5e_port_speed2linkmodes(speed); + mlx5e_port_speed2linkmodes(mdev, speed); - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto); if (err) { netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); @@ -923,7 +924,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, false); mlx5_toggle_port_link(mdev); out: diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 5be7eefa6d75..814fa194663b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -92,6 +92,22 @@ enum mlx5e_link_mode { MLX5E_LINK_MODES_NUMBER, }; +enum mlx5e_ext_link_mode { + MLX5E_SGMII_100M = 0, + MLX5E_1000BASE_X_SGMII = 1, + MLX5E_5GBASE_R = 3, + MLX5E_10GBASE_XFI_XAUI_1 = 4, + MLX5E_40GBASE_XLAUI_4_XLPPI_4 = 5, + MLX5E_25GAUI_1_25GBASE_CR_KR = 6, + MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2 = 7, + MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR = 8, + MLX5E_CAUI_4_100GBASE_CR4_KR4 = 9, + MLX5E_100GAUI_2_100GBASE_CR2_KR2 = 10, + MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, + MLX5E_400GAUI_8 = 15, + MLX5E_EXT_LINK_MODES_NUMBER, +}; + enum mlx5e_connector_type { MLX5E_PORT_UNKNOWN = 0, MLX5E_PORT_NONE = 1, @@ -106,6 +122,9 @@ enum mlx5e_connector_type { }; #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ + (ext ? MLX5_GET(reg, out, ext_##field) : \ + MLX5_GET(reg, out, field)) int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, From 08e8676f1607925adf36e399f0faa8ce3b10bb86 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:46 -0800 Subject: [PATCH 22/35] IB/mlx5: Add support for 50Gbps per lane link modes Driver now supports new link modes: 50Gbps per lane support for 50G/100G/200G. This patch reads the correct field (legacy vs. extended) based on a FW indication bit, and adds a translation function (link modes to IB width and speed) to the new link modes. Signed-off-by: Aya Levin Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 66 ++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3677c00fa3bb..581ae11e2fc9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -331,8 +331,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -389,6 +389,61 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width) +{ + switch (eth_proto_oper) { + case MLX5E_PROT_MASK(MLX5E_SGMII_100M): + case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_SDR; + break; + case MLX5E_PROT_MASK(MLX5E_5GBASE_R): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_DDR; + break; + case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_QDR; + break; + case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_QDR; + break; + case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_EDR; + break; + case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2): + case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_HDR; + break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_HDR; + break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_HDR; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width, bool ext) +{ + return ext ? + translate_eth_ext_proto_oper(eth_proto_oper, active_speed, + active_width) : + translate_eth_legacy_proto_oper(eth_proto_oper, active_speed, + active_width); +} + static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { @@ -401,6 +456,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; + bool ext; int err; mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); @@ -421,14 +477,14 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, false, - eth_proto_oper); + ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); + eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; translate_eth_proto_oper(eth_prot_oper, &props->active_speed, - &props->active_width); + &props->active_width, ext); props->port_cap_flags |= IB_PORT_CM_SUP; props->ip_gids = true; From 86b39a66b75f2f360eef56bdb5ac37d5068a9b7d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Wed, 13 Feb 2019 10:52:34 -0600 Subject: [PATCH 23/35] net/mlx5: Correctly set LAG mode for ECPF When bonding is added, driver assumes that it's RoCE LAG if no VF is enabled. This is not enough for ECPF as the VF is enabled in host PF side. LAG should only choose RoCE mode when both slave devices meet conditions below: 1. E-Switch offloads mode is NONE. 2. No VF is enabled. Signed-off-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 2d223385dc81..04c5aca7f8c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -343,6 +343,11 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) roce_lag = !mlx5_sriov_is_enabled(dev0) && !mlx5_sriov_is_enabled(dev1); +#ifdef CONFIG_MLX5_ESWITCH + roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE; +#endif + if (roce_lag) mlx5_lag_remove_ib_devices(ldev); From a1b3839ac4a4933c7c5167efd7b6b091130d11aa Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 8 Nov 2018 22:37:04 +0200 Subject: [PATCH 24/35] net/mlx5: E-Switch, Properly refer to the esw manager vport In SmartNIC mode, the eswitch manager is not necessarily the PF (vport 0). Use a helper function to get the correct eswitch manager vport number and cache on the eswitch instance for fast reference. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 10 ++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 7 ++-- include/linux/mlx5/vport.h | 2 ++ 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 05830696abd8..9c622749dbde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -378,16 +378,16 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err; - /* Skip mlx5_mpfs_add_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport) + if (esw->manager_vport == vport) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); if (err) { esw_warn(esw->dev, - "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", mac, vport, err); return err; } @@ -410,10 +410,10 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_del_mac for eswitch managerss, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport || !vaddr->mpfs) + if (!vaddr->mpfs || esw->manager_vport == vport) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -1457,15 +1457,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, { int vport_num = vport->vport; - if (!vport_num) + if (esw->manager_vport == vport_num) return; mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, vport->info.link_state); - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, (vport->info.vlan || vport->info.qos)); @@ -1537,8 +1544,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, vport->enabled_events = enable_events; vport->enabled = true; - /* only PF is trusted by default */ - if (!vport_num) + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (esw->manager_vport == vport_num || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; esw_vport_change_handle_locked(vport); @@ -1733,6 +1743,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) return -ENOMEM; esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 0a3eee8746c1..959a9e28d08f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "lib/mpfs.h" @@ -204,6 +205,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; int nvports; + u16 manager_vport; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); @@ -363,6 +365,14 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9128b45f3f37..af2c44d31357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -522,7 +522,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + /* source vport is the esw manager */ + MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -567,7 +568,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, source_eswitch_owner_vhca_id); dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest->vport.num = 0; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } @@ -666,7 +667,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_c[0] = 0x01; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = 0; + dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 3bc05449ac39..b67bcc95ab5d 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -52,6 +52,8 @@ enum { }; enum { + MLX5_VPORT_PF = 0x0, + MLX5_VPORT_ECPF = 0xfffe, MLX5_VPORT_UPLINK = 0xffff }; From cbc44e76bfcdcaccd079487367593ee3f94d006d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 1 Feb 2019 17:34:55 -0600 Subject: [PATCH 25/35] net/mlx5: E-Switch, Properly refer to host PF vport as other vport Commands referring to vports use the following scheme: 1. When referring to my own vport, put 0 in vport and 0 in other_vport. 2. When referring to another vport, put the vport number of the referred vport and put 1 in other_vport. It was assumed that driver is accessing other vport when vport number is greater than 0. With the above scheme, the case that ECPF eswitch manager is trying to access host PF vport will fall over with scheme 1 as the vport number is 0. This is apparently wrong as driver is trying to refer other vport. As such usage can only happen in the eswitch context, change relevant functions to provide other vport input properly. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 13 ++++++------- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 10 ++++------ include/linux/mlx5/vport.h | 4 ++-- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 685f1975be58..f84889bbe2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1083,7 +1083,8 @@ static int mlx5e_vf_rep_open(struct net_device *dev) if (!mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_UP)) + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); unlock: @@ -1101,7 +1102,8 @@ static int mlx5e_vf_rep_close(struct net_device *dev) mutex_lock(&priv->state_lock); mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN); + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); ret = mlx5e_close_locked(dev); mutex_unlock(&priv->state_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9c622749dbde..648c743cc947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1462,7 +1462,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, vport->info.link_state); /* Host PF has its own mac/guid. */ @@ -1581,10 +1581,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport_num); - if (vport_num && esw->mode == SRIOV_LEGACY) { + if (esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); @@ -1875,7 +1875,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, err = mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport, link_state); + vport, 1, link_state); if (err) { mlx5_core_warn(esw->dev, "Failed to set vport %d link state, err = %d", @@ -2137,7 +2137,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) return 0; - err = mlx5_query_vport_down_stats(dev, vport_idx, + err = mlx5_query_vport_down_stats(dev, vport_idx, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) @@ -2174,8 +2174,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); memset(out, 0, outlen); err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 9a928eb48522..ef95feca9961 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) } int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state) + u16 vport, u8 other_vport, u8 state) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; @@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); MLX5_SET(modify_vport_state_in, in, admin_state, state); return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); @@ -1057,7 +1056,7 @@ free: EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; @@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (err) diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index b67bcc95ab5d..b7edcb1dadd8 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -59,7 +59,7 @@ enum { u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state); + u16 vport, u8 other_vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, @@ -121,7 +121,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, int vf, u8 port_num, void *out, From eca8cc3895358b4c35a3ed439537b8c08d7dabb3 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 7 Feb 2019 10:40:58 -0600 Subject: [PATCH 26/35] net/mlx5: E-Switch, Refactor offloads flow steering init/cleanup E-switch offloads mode initialize/cleanup multiple steering related entities (flow table/group). Refactor these operations to internal helper functions for better block design. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index af2c44d31357..19969d487a01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1404,7 +1404,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) { int err; @@ -1422,6 +1422,34 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto create_fg_err; + return 0; + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_fdb_tables(esw); + + return err; +} + +static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) +{ + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_tables(esw); +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + + err = esw_offloads_steering_init(esw, nvports); + if (err) + return err; + err = esw_offloads_load_reps(esw, nvports); if (err) goto err_reps; @@ -1430,14 +1458,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) return 0; err_reps: - esw_destroy_vport_rx_group(esw); - -create_fg_err: - esw_destroy_offloads_table(esw); - -create_ft_err: - esw_destroy_offloads_fdb_tables(esw); - + esw_offloads_steering_cleanup(esw); return err; } @@ -1464,9 +1485,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) { esw_offloads_devcom_cleanup(esw); esw_offloads_unload_reps(esw, nvports); - esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_tables(esw); + esw_offloads_steering_cleanup(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) From c9b99abcf232f69ddff158b1f313fd7d2654414b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 31 Jan 2019 14:40:53 -0600 Subject: [PATCH 27/35] net/mlx5: E-Switch, Split VF and special vports for offloads mode When driver is entering offloads mode, there are two major tasks to do: initialize flow steering and create representors. Flow steering should make sure enough flow table/group spaces are reserved for all reps. Representors will be created in a group, all or none. With the introduction of ECPF, flow steering should still reserve the same spaces. But, the representors are not always loaded/unloaded in a single piece. Once ECPF is in offloads mode, it will get the number of VF changing event from host PF. In such scenario, only the VF reps should be loaded/unloaded, not the reps for special vports (such as the uplink vport). Thus, when entering offloads mode, driver should specify the total number of reps, and the number of VF reps separately. When leaving offloads mode, the cleanup should use the information self-contained in eswitch such as number of VFs. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 7 +-- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +- .../mellanox/mlx5/core/eswitch_offloads.c | 57 +++++++++++++------ include/linux/mlx5/vport.h | 1 + 4 files changed, 48 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 648c743cc947..be6c2931d2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1641,7 +1641,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS); + err = esw_offloads_init(esw, nvfs, + nvfs + MLX5_SPECIAL_VPORTS); } if (err) @@ -1683,7 +1684,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; int old_mode; - int nvports; int i; if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) @@ -1693,7 +1693,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = &esw->mc_promisc; - nvports = esw->enabled_vports; if (esw->mode == SRIOV_LEGACY) mlx5_eq_notifier_unregister(esw->dev, &esw->nb); @@ -1709,7 +1708,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); else if (esw->mode == SRIOV_OFFLOADS) - esw_offloads_cleanup(esw, nvports); + esw_offloads_cleanup(esw); old_mode = esw->mode; esw->mode = SRIOV_NONE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 959a9e28d08f..fd845e6c44d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -208,8 +208,9 @@ struct mlx5_eswitch { u16 manager_vport; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 19969d487a01..14f7ad67cfe4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -54,6 +54,8 @@ enum { #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] +#define UPLINK_REP_INDEX 0 + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -1239,19 +1241,28 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) return 0; } +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (!rep->rep_if[rep_type].valid) + return; + + rep->rep_if[rep_type].unload(rep); +} + static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, u8 rep_type) { struct mlx5_eswitch_rep *rep; int vport; - for (vport = nvports - 1; vport >= 0; vport--) { + for (vport = nvports; vport >= MLX5_VPORT_FIRST_VF; vport--) { rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; - - rep->rep_if[rep_type].unload(rep); + __esw_offloads_unload_rep(esw, rep, rep_type); } + + rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + __esw_offloads_unload_rep(esw, rep, rep_type); } static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) @@ -1262,6 +1273,15 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) esw_offloads_unload_reps_type(esw, nvports, rep_type); } +static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (!rep->rep_if[rep_type].valid) + return 0; + + return rep->rep_if[rep_type].load(esw->dev, rep); +} + static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, u8 rep_type) { @@ -1269,12 +1289,14 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, int vport; int err; - for (vport = 0; vport < nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto out; - err = rep->rep_if[rep_type].load(esw->dev, rep); + for (vport = MLX5_VPORT_FIRST_VF; vport <= nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) goto err_reps; } @@ -1283,6 +1305,7 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, err_reps: esw_offloads_unload_reps_type(esw, vport, rep_type); +out: return err; } @@ -1440,17 +1463,18 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_offloads_fdb_tables(esw); } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports) { int err; mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - err = esw_offloads_steering_init(esw, nvports); + err = esw_offloads_steering_init(esw, total_nvports); if (err) return err; - err = esw_offloads_load_reps(esw, nvports); + err = esw_offloads_load_reps(esw, vf_nvports); if (err) goto err_reps; @@ -1481,10 +1505,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +void esw_offloads_cleanup(struct mlx5_eswitch *esw) { + u16 num_vfs = esw->dev->priv.sriov.num_vfs; + esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_reps(esw, nvports); + esw_offloads_unload_reps(esw, num_vfs); esw_offloads_steering_cleanup(esw); } @@ -1822,7 +1848,6 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { -#define UPLINK_REP_INDEX 0 struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index b7edcb1dadd8..755aeea19e1c 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -53,6 +53,7 @@ enum { enum { MLX5_VPORT_PF = 0x0, + MLX5_VPORT_FIRST_VF = 0x1, MLX5_VPORT_ECPF = 0xfffe, MLX5_VPORT_UPLINK = 0xffff }; From 879c8f84e3602961e441723aaaac372a44dfe588 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 28 Jan 2019 22:12:45 -0600 Subject: [PATCH 28/35] net/mlx5: E-Switch, Use getter and iterator to access vport/rep With only PF and VF, it is sufficient to have the vport/rep array index as the vport number. This is because PF and VF vports numbers are consecutive serial numbers. In downstream patches with introducing of ECPF and UPLINK vports, it's not consecutive any more. Use getter to get specific vport/rep, and use iterator to traversal a list of vport/rep. This hides the translation between array index and vport number, and provides flexibility of using different translation mechanism in the future. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Suggested-by: Saeed Mahameed Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 94 ++++++++++++------- .../mellanox/mlx5/core/eswitch_offloads.c | 75 ++++++++++----- 2 files changed, 113 insertions(+), 56 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index be6c2931d2a0..d1454f18c0a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -68,6 +68,26 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +/* The vport getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_vports(esw, i, vport) \ + for ((i) = MLX5_VPORT_PF; \ + (vport) = &(esw)->vports[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (vport) = &(esw)->vports[i], \ + (i) <= (nvfs); (i)++) + +static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, + u16 vport_num) +{ + WARN_ON(vport_num > esw->total_vports - 1); + return &esw->vports[vport_num]; +} + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -436,17 +456,18 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u16 vport_idx = 0; + struct mlx5_vport *vport; + u16 i, vport_num; - for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { - struct mlx5_vport *vport = &esw->vports[vport_idx]; + mlx5_esw_for_all_vports(esw, i, vport) { struct hlist_head *vport_hash = vport->mc_list; struct vport_addr *iter_vaddr = l2addr_hash_find(vport_hash, mac, struct vport_addr); + vport_num = vport->vport; if (IS_ERR_OR_NULL(vport->allmulti_rule) || - vaddr->vport == vport_idx) + vaddr->vport == vport_num) continue; switch (vaddr->action) { case MLX5_ACTION_ADD: @@ -458,14 +479,14 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, if (!iter_vaddr) { esw_warn(esw->dev, "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", - mac, vport_idx); + mac, vport_num); continue; } - iter_vaddr->vport = vport_idx; + iter_vaddr->vport = vport_num; iter_vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, - vport_idx); + vport_num); iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: @@ -564,7 +585,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; vport_addr_action vport_addr_add; vport_addr_action vport_addr_del; @@ -599,7 +620,7 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; struct l2addr_node *node; @@ -686,7 +707,7 @@ out: */ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct l2addr_node *node; struct vport_addr *addr; struct hlist_head *hash; @@ -722,8 +743,8 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; - struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) goto promisc; @@ -764,7 +785,7 @@ promisc: /* Sync vport rx mode from vport context */ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int promisc_all = 0; int promisc_uc = 0; int promisc_mc = 0; @@ -1341,8 +1362,8 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, u32 initial_max_rate, u32 initial_bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; int err = 0; @@ -1381,7 +1402,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int err = 0; if (!vport->qos.enabled) @@ -1400,8 +1421,8 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, u32 max_rate, u32 bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; u32 bitmask = 0; @@ -1518,10 +1539,10 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) mlx5_fc_destroy(dev, vport->egress.drop_counter); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, +static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int enable_events) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -1558,9 +1579,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, mutex_unlock(&esw->state_lock); } -static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_disable_vport(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; if (!vport->enabled) return; @@ -1603,7 +1625,7 @@ static int eswitch_vport_event(struct notifier_block *nb, u16 vport_num; vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); - vport = &esw->vports[vport_num]; + vport = mlx5_eswitch_get_vport(esw, vport_num); if (vport->enabled) queue_work(esw->work_queue, &vport->vport_change_handler); @@ -1615,6 +1637,7 @@ static int eswitch_vport_event(struct notifier_block *nb, int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { + struct mlx5_vport *vport; int err; int i, enabled_events; @@ -1657,8 +1680,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) * 2. FDB/Eswitch is programmed by user space tools */ enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; - for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, enabled_events); + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_enable_vport(esw, vport, enabled_events); + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + esw_enable_vport(esw, vport, enabled_events); if (mode == SRIOV_LEGACY) { MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); @@ -1683,6 +1712,7 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + struct mlx5_vport *vport; int old_mode; int i; @@ -1697,8 +1727,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) mlx5_eq_notifier_unregister(esw->dev, &esw->nb); - for (i = 0; i < esw->total_vports; i++) - esw_disable_vport(esw, i); + mlx5_esw_for_all_vports(esw, i, vport) + esw_disable_vport(esw, vport); if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); @@ -1725,6 +1755,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; + struct mlx5_vport *vport; int vport_num; int err; @@ -1757,6 +1788,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->total_vports = total_vports; + err = esw_offloads_init_reps(esw); if (err) goto abort; @@ -1765,9 +1798,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - for (vport_num = 0; vport_num < total_vports; vport_num++) { - struct mlx5_vport *vport = &esw->vports[vport_num]; - + mlx5_esw_for_all_vports(esw, vport_num, vport) { vport->vport = vport_num; vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; @@ -1775,7 +1806,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw_vport_change_handler); } - esw->total_vports = total_vports; esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; @@ -2017,8 +2047,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; max_guarantee = evport->info.min_rate; @@ -2037,8 +2066,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled) continue; vport_min_rate = evport->info.min_rate; @@ -2053,7 +2081,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) if (bw_share == evport->qos.bw_share) continue; - err = esw_vport_qos_config(esw, i, vport_max_rate, + err = esw_vport_qos_config(esw, evport->vport, vport_max_rate, bw_share); if (!err) evport->qos.bw_share = bw_share; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 14f7ad67cfe4..4979c7ee0ad7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -56,6 +56,44 @@ enum { #define UPLINK_REP_INDEX 0 +/* The rep getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_reps(esw, i, rep) \ + for ((i) = MLX5_VPORT_PF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= MLX5_VPORT_FIRST_VF; (i)--) + +#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs) \ + for ((vport) = MLX5_VPORT_FIRST_VF; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) + +static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = vport_num; + + if (vport_num == MLX5_VPORT_UPLINK) + idx = UPLINK_REP_INDEX; + + WARN_ON(idx > esw->total_vports - 1); + return &esw->offloads.vport_reps[idx]; +} + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -604,7 +642,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - for (i = 1; i < nvports; i++) { + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { MLX5_SET(fte_match_set_misc, misc, source_port, i); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); @@ -622,7 +660,8 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, return 0; add_flow_err: - for (i--; i > 0; i--) + nvports = --i; + mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) mlx5_del_flow_rules(flows[i]); kvfree(flows); alloc_flows_err: @@ -637,7 +676,7 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) flows = esw->fdb_table.offloads.peer_miss_rules; - for (i = 1; i < esw->total_vports; i++) + mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[i]); kvfree(flows); @@ -1229,9 +1268,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); - for (vport = 0; vport < total_vfs; vport++) { - rep = &offloads->vport_reps[vport]; - + mlx5_esw_for_all_reps(esw, vport, rep) { rep->vport = vport; ether_addr_copy(rep->hw_id, hw_id); } @@ -1256,12 +1293,10 @@ static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, struct mlx5_eswitch_rep *rep; int vport; - for (vport = nvports; vport >= MLX5_VPORT_FIRST_VF; vport--) { - rep = &esw->offloads.vport_reps[vport]; + mlx5_esw_for_each_vf_rep_reverse(esw, vport, rep, nvports) __esw_offloads_unload_rep(esw, rep, rep_type); - } - rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); } @@ -1289,13 +1324,12 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, int vport; int err; - rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) goto out; - for (vport = MLX5_VPORT_FIRST_VF; vport <= nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; + mlx5_esw_for_each_vf_rep(esw, vport, rep, nvports) { err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) goto err_reps; @@ -1304,7 +1338,7 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, return 0; err_reps: - esw_offloads_unload_reps_type(esw, vport, rep_type); + esw_offloads_unload_reps_type(esw, --vport, rep_type); out: return err; } @@ -1848,10 +1882,9 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); return rep->rep_if[rep_type].priv; } @@ -1859,13 +1892,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == MLX5_VPORT_UPLINK) - vport = UPLINK_REP_INDEX; - - rep = &offloads->vport_reps[vport]; + rep = mlx5_eswitch_get_rep(esw, vport); if (rep->rep_if[rep_type].valid && rep->rep_if[rep_type].get_proto_dev) @@ -1876,13 +1905,13 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) { - return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); } EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, int vport) { - return &esw->offloads.vport_reps[vport]; + return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); From f121e0ea9586b2c937bf1ff9a0b682dc6424ce1d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 29 Jan 2019 21:48:31 -0600 Subject: [PATCH 29/35] net/mlx5: E-Switch, Add state to eswitch vport representors Currently the eswitch vport reps have a valid indicator, which is set on register and unset on unregister. However, a rep can be loaded or not loaded when doing unregister, current driver checks if the vport of that rep is enabled as a flag to imply the rep is loaded. However, for ECPF, this is not valid as the host PF will enable the vports for its VFs instead. Add three states: {unregistered, registered, loaded}, with the following state changes across different operations: create: (none) -> unregistered reg: unregistered -> registered load: registered -> loaded unload: loaded -> registered unreg: registered -> unregistered Note that the state shall only be updated inside eswitch driver rather than individual drivers such as ETH or IB. Signed-off-by: Bodong Wang Signed-off-by: Or Gerlitz Suggested-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 31 +++++++++++++------ include/linux/mlx5/eswitch.h | 8 ++++- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4979c7ee0ad7..c6c9dad69ba8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -364,7 +364,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (!rep->rep_if[REP_ETH].valid) + if (rep->rep_if[REP_ETH].state != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -1256,7 +1256,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) struct mlx5_core_dev *dev = esw->dev; struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN]; + u8 hw_id[ETH_ALEN], rep_type; int vport; esw->offloads.vport_reps = kcalloc(total_vfs, @@ -1271,6 +1271,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) mlx5_esw_for_all_reps(esw, vport, rep) { rep->vport = vport; ether_addr_copy(rep->hw_id, hw_id); + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + rep->rep_if[rep_type].state = REP_UNREGISTERED; } offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; @@ -1281,10 +1284,11 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (!rep->rep_if[rep_type].valid) + if (rep->rep_if[rep_type].state != REP_LOADED) return; rep->rep_if[rep_type].unload(rep); + rep->rep_if[rep_type].state = REP_REGISTERED; } static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, @@ -1311,10 +1315,18 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (!rep->rep_if[rep_type].valid) + int err = 0; + + if (rep->rep_if[rep_type].state != REP_REGISTERED) return 0; - return rep->rep_if[rep_type].load(esw->dev, rep); + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + return err; + + rep->rep_if[rep_type].state = REP_LOADED; + + return 0; } static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, @@ -1861,7 +1873,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep_if->get_proto_dev = __rep_if->get_proto_dev; rep_if->priv = __rep_if->priv; - rep_if->valid = true; + rep_if->state = REP_REGISTERED; } EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); @@ -1873,10 +1885,11 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) + if (esw->mode == SRIOV_OFFLOADS && + rep->rep_if[rep_type].state == REP_LOADED) rep->rep_if[rep_type].unload(rep); - rep->rep_if[rep_type].valid = false; + rep->rep_if[rep_type].state = REP_UNREGISTERED; } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); @@ -1896,7 +1909,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].valid && + if (rep->rep_if[rep_type].state == REP_LOADED && rep->rep_if[rep_type].get_proto_dev) return rep->rep_if[rep_type].get_proto_dev(rep); return NULL; diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index fab5121ffb8f..e3dbc1bc0917 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -22,6 +22,12 @@ enum { NUM_REP_TYPES, }; +enum { + REP_UNREGISTERED, + REP_REGISTERED, + REP_LOADED, +}; + struct mlx5_eswitch_rep; struct mlx5_eswitch_rep_if { int (*load)(struct mlx5_core_dev *dev, @@ -29,7 +35,7 @@ struct mlx5_eswitch_rep_if { void (*unload)(struct mlx5_eswitch_rep *rep); void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); void *priv; - bool valid; + u8 state; }; struct mlx5_eswitch_rep { From 29d9fd7d5a667b120f8228da3d8a8d1b2382538d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 29 Jan 2019 22:57:21 -0600 Subject: [PATCH 30/35] net/mlx5: E-Switch, Support load/unload reps of specific vport types Currently the driver loads and unloads all reps in an unbreakable group. However, with ECPF, the reps of special vports such as uplink and host PF should always be loaded in switchdev mode where the reps for VFs will be loaded on-demand and unloaded on no-demand. This is a pre-step for that change. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 111 ++++++++++++++---- 1 file changed, 88 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c6c9dad69ba8..7131d41796fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1291,25 +1291,47 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, rep->rep_if[rep_type].state = REP_REGISTERED; } -static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; - - mlx5_esw_for_each_vf_rep_reverse(esw, vport, rep, nvports) - __esw_offloads_unload_rep(esw, rep, rep_type); rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); } -static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) +static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int i; + + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_vf_vport(esw, nvports, rep_type); +} + +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + __unload_reps_vf_vport(esw, nvports, rep_type); + + /* Special vports must be the last to unload. */ + __unload_reps_special_vport(esw, rep_type); +} + +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = NUM_REP_TYPES; + + while (rep_type-- > 0) + __unload_reps_all_vport(esw, nvports, rep_type); } static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, @@ -1329,39 +1351,42 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, return 0; } -static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; int err; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto out; + return err; +} - mlx5_esw_for_each_vf_rep(esw, vport, rep, nvports) { +static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int err, i; + + mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) - goto err_reps; + goto err_vf; } return 0; -err_reps: - esw_offloads_unload_reps_type(esw, --vport, rep_type); -out: +err_vf: + __unload_reps_vf_vport(esw, --i, rep_type); return err; } -static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = esw_offloads_load_reps_type(esw, nvports, rep_type); + err = __load_reps_vf_vport(esw, nvports, rep_type); if (err) goto err_reps; } @@ -1370,7 +1395,47 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_vf_vport(esw, nvports, rep_type); + return err; +} + +static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + int err; + + /* Special vports must be loaded first. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); + return err; +} + +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = 0; + int err; + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + err = __load_reps_all_vport(esw, nvports, rep_type); + if (err) + goto err_reps; + } + + return err; + +err_reps: + while (rep_type-- > 0) + __unload_reps_all_vport(esw, nvports, rep_type); return err; } @@ -1520,7 +1585,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, if (err) return err; - err = esw_offloads_load_reps(esw, vf_nvports); + err = esw_offloads_load_all_reps(esw, vf_nvports); if (err) goto err_reps; @@ -1556,7 +1621,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) u16 num_vfs = esw->dev->priv.sriov.num_vfs; esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw, num_vfs); esw_offloads_steering_cleanup(esw); } From f8e8fa0262eaf544490a11746c524333158ef0b6 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 31 Jan 2019 17:42:57 -0600 Subject: [PATCH 31/35] net/mlx5: E-Switch, Centralize repersentor reg/unreg to eswitch driver Eswitch has two users: IB and ETH. They both register repersentors when mlx5 interface is added, and unregister the repersentors when mlx5 interface is removed. Ideally, each driver should only deal with the entities which are unique to itself. However, current IB and ETH drivers have to perform the following eswitch operations: 1. When registering, specify how many vports to register. This number is the same for both drivers which is the total available vport numbers. 2. When unregistering, specify the number of registered vports to do unregister. Also, unload the repersentors which are already loaded. It's unnecessary for eswitch driver to hands out the control of above operations to individual driver users, as they're not unique to each driver. Instead, such operations should be centralized to eswitch driver. This consolidates eswitch control flow, and simplified IB and ETH driver. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 18 ++---- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 19 ++----- .../mellanox/mlx5/core/eswitch_offloads.c | 55 +++++++++---------- include/linux/mlx5/eswitch.h | 11 ++-- 4 files changed, 43 insertions(+), 60 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 99cae9a10195..4700cffb5a00 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -95,26 +95,20 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vports = MLX5_TOTAL_VPORTS(mdev); struct mlx5_eswitch_rep_if rep_if = {}; - int vport; - for (vport = 0; vport < total_vports; vport++) { - rep_if.load = mlx5_ib_vport_rep_load; - rep_if.unload = mlx5_ib_vport_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB); - } + rep_if.load = mlx5_ib_vport_rep_load; + rep_if.unload = mlx5_ib_vport_rep_unload; + rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; + + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB); } void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vports = MLX5_TOTAL_VPORTS(mdev); - int vport; - for (vport = total_vports - 1; vport >= 0; vport--) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); } u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f84889bbe2a0..287d48e5b073 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1798,25 +1798,18 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; + struct mlx5_eswitch_rep_if rep_if = {}; - for (vport = 0; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; + rep_if.load = mlx5e_vport_rep_load; + rep_if.unload = mlx5e_vport_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); - } + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - for (vport = total_vfs - 1; vport >= 0; vport--) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7131d41796fb..b702b56c457e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1923,40 +1923,39 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) return 0; } -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *__rep_if, - u8 rep_type) +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *__rep_if, + u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep_if *rep_if; - - rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type]; - - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - rep_if->state = REP_REGISTERED; -} -EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); - -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, u8 rep_type) -{ - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; + int i; - rep = &offloads->vport_reps[vport_index]; + mlx5_esw_for_all_reps(esw, i, rep) { + rep_if = &rep->rep_if[rep_type]; + rep_if->load = __rep_if->load; + rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; + rep_if->priv = __rep_if->priv; - if (esw->mode == SRIOV_OFFLOADS && - rep->rep_if[rep_type].state == REP_LOADED) - rep->rep_if[rep_type].unload(rep); - - rep->rep_if[rep_type].state = REP_UNREGISTERED; + rep_if->state = REP_REGISTERED; + } } -EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); + +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) +{ + u16 max_vf = mlx5_core_max_vfs(esw->dev); + struct mlx5_eswitch_rep *rep; + int i; + + if (esw->mode == SRIOV_OFFLOADS) + __unload_reps_all_vport(esw, max_vf, rep_type); + + mlx5_esw_for_all_reps(esw, i, rep) + rep->rep_if[rep_type].state = REP_UNREGISTERED; +} +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index e3dbc1bc0917..96d8435421de 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -46,13 +46,10 @@ struct mlx5_eswitch_rep { u32 vlan_refcount; }; -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *rep_if, - u8 rep_type); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - u8 rep_type); +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *rep_if, + u8 rep_type); +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type); From 5ae5162066d8e59e365678a9e76fc4d8f6b78d40 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 14 Dec 2018 09:33:22 -0600 Subject: [PATCH 32/35] net/mlx5: E-Switch, Assign a different position for uplink rep and vport In offloads mode, the current implementation puts the uplink representor at index zero of the vport reps array. It is not "natural" to place it at index 0 since we want to put the representor for vport 0 at index 0 with the introduction of SmartNIC. A separate patch will handle the case whether a rep is needed for vport 0 (PF vport). So, we want to have a different placeholder for uplink vport and representor. It was placed at the end of vport and rep array. Since vport number can no longer act as an index into the vport or representors arrays, use functions to map vport numbers to indices when accessing the vports or representors arrays, and vice versa. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++++---- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 24 +++++++++++++++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 11 ++------- include/linux/mlx5/vport.h | 11 ++++++--- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d1454f18c0a7..bb7f72467df9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -84,8 +84,10 @@ enum { static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + WARN_ON(vport_num > esw->total_vports - 1); - return &esw->vports[vport_num]; + return &esw->vports[idx]; } static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, @@ -1756,8 +1758,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; struct mlx5_vport *vport; - int vport_num; - int err; + int err, i; if (!MLX5_VPORT_MANAGER(dev)) return 0; @@ -1798,8 +1799,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - mlx5_esw_for_all_vports(esw, vport_num, vport) { - vport->vport = vport_num; + mlx5_esw_for_all_vports(esw, i, vport) { + vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fd845e6c44d5..2951c1296c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -374,6 +374,30 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) +{ + /* Uplink always locate at the last element of the array.*/ + return esw->total_vports - 1; +} + +static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, + u16 vport_num) +{ + if (vport_num == MLX5_VPORT_UPLINK) + return mlx5_eswitch_uplink_idx(esw); + + return vport_num; +} + +static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, + int index) +{ + if (index == mlx5_eswitch_uplink_idx(esw)) + return MLX5_VPORT_UPLINK; + + return index; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b702b56c457e..e787e9212174 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -85,10 +85,7 @@ enum { static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u16 idx = vport_num; - - if (vport_num == MLX5_VPORT_UPLINK) - idx = UPLINK_REP_INDEX; + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); WARN_ON(idx > esw->total_vports - 1); return &esw->offloads.vport_reps[idx]; @@ -1254,7 +1251,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; u8 hw_id[ETH_ALEN], rep_type; int vport; @@ -1265,19 +1261,16 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); mlx5_esw_for_all_reps(esw, vport, rep) { - rep->vport = vport; + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) rep->rep_if[rep_type].state = REP_UNREGISTERED; } - offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; - return 0; } diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 755aeea19e1c..134248c02786 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,9 +36,14 @@ #include #include -#define MLX5_VPORT_PF_PLACEHOLDER (1u) -#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER) -#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + mlx5_core_max_vfs(mdev)) +#define MLX5_VPORT_PF_PLACEHOLDER (1u) +#define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) + +#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER + \ + MLX5_VPORT_UPLINK_PLACEHOLDER) + +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + \ + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ From 81cd229c294e2e416e9161d9286d34f3aaf19348 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Dec 2018 11:59:33 -0600 Subject: [PATCH 33/35] net/mlx5: E-Switch, Consider ECPF vport depends on eswitch ownership ECPF connects to the eswitch through vport 0xfffe. ECPF may or may not be the eswitch manager depending on firmware configuration. 1. If ECPF is eswitch manager: ECPF will take over the eswitch manager responsibility. A rep of the host PF shall be created at the ECPF side for the eswitch manager to control. 2. If ECPF is not eswitch manager: host PF will be the eswitch manager, ECPF acts similar as a VF to the host PF. Host PF will be aware of the ECPF vport presence and control it's rep. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 8 +- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 15 ++++ .../mellanox/mlx5/core/eswitch_offloads.c | 79 ++++++++++++++++++- include/linux/mlx5/driver.h | 5 ++ include/linux/mlx5/mlx5_ifc.h | 3 +- include/linux/mlx5/vport.h | 8 +- 6 files changed, 110 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bb7f72467df9..d2ab1ee19b2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1667,7 +1667,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); err = esw_offloads_init(esw, nvfs, - nvfs + MLX5_SPECIAL_VPORTS); + nvfs + MLX5_SPECIAL_VPORTS(esw->dev)); } if (err) @@ -1687,6 +1687,12 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); esw_enable_vport(esw, vport, enabled_events); + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + /* Enable VF vports */ mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) esw_enable_vport(esw, vport, enabled_events); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2951c1296c3e..2baa0d71380c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -380,9 +380,20 @@ static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) return esw->total_vports - 1; } +static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw) +{ + return esw->total_vports - 2; +} + static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, u16 vport_num) { + if (vport_num == MLX5_VPORT_ECPF) { + if (!mlx5_ecpf_vport_exists(esw->dev)) + esw_warn(esw->dev, "ECPF vport doesn't exist!\n"); + return mlx5_eswitch_ecpf_idx(esw); + } + if (vport_num == MLX5_VPORT_UPLINK) return mlx5_eswitch_uplink_idx(esw); @@ -392,6 +403,10 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, int index) { + if (index == mlx5_eswitch_ecpf_idx(esw) && + mlx5_ecpf_vport_exists(esw->dev)) + return MLX5_VPORT_ECPF; + if (index == mlx5_eswitch_uplink_idx(esw)) return MLX5_VPORT_UPLINK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e787e9212174..84a33f8e3350 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -639,14 +639,35 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[MLX5_VPORT_PF] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[mlx5_eswitch_ecpf_idx(esw)] = flow; + } + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { MLX5_SET(fte_match_set_misc, misc, source_port, i); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); - esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); - goto add_flow_err; + goto add_vf_flow_err; } flows[i] = flow; } @@ -656,10 +677,18 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, kvfree(spec); return 0; -add_flow_err: +add_vf_flow_err: nvports = --i; mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); kvfree(flows); alloc_flows_err: kvfree(spec); @@ -676,6 +705,12 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[i]); + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + kvfree(flows); } @@ -1288,6 +1323,16 @@ static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); } @@ -1351,6 +1396,34 @@ static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + return err; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_pf; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_ecpf; + } + + return 0; + +err_ecpf: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + +err_pf: + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); return err; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c5454f985e1d..c2de50f02b33 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1088,6 +1088,11 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); } +static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists); +} + #define MLX5_HOST_PF_MAX_VFS (127u) static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5decffe565fb..b7bb774b57b0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -631,7 +631,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x17]; + u8 reserved_at_5[0x16]; + u8 ecpf_vport_exists[0x1]; u8 counter_eswitch_affinity[0x1]; u8 merged_eswitch[0x1]; u8 nic_vport_node_guid_modify[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 134248c02786..0eef548b9946 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -38,11 +38,13 @@ #define MLX5_VPORT_PF_PLACEHOLDER (1u) #define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) +#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev) (mlx5_ecpf_vport_exists(mdev)) -#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER + \ - MLX5_VPORT_UPLINK_PLACEHOLDER) +#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER + \ + MLX5_VPORT_UPLINK_PLACEHOLDER + \ + MLX5_VPORT_ECPF_PLACEHOLDER(mdev)) -#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + \ +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS(mdev) + \ mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ From a3888f33db9f55f401ad315481af251d168e57dd Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 29 Jan 2019 23:13:13 -0600 Subject: [PATCH 34/35] net/mlx5: E-Switch, Load/unload VF reps according to event from host PF When host PF changes the number of VFs, the ECPF esw driver will get a FW event. It should query the number of VFs enabled by host PF and update the VF reps accordingly. Note that host PF can't change the number of VFs dynamically, it has to reset the number of VFs to 0 before changing to a new positive number. The host event is registered when driver is moving to switchdev mode, and it's the last step to do in esw_offloads_init. It's unregistered and the work queue is flushed when driver quits from switchdev mode. In this way, the host event and devlink command are serialized. When driver is enabling switchdev mode, pay attention to the following two facts: 1. Host PF must not have VF initialized as the flow table in ECPF has ENCAP enabled as default. Such flow table can't be created with existing initialized VFs. 2. ECPF doesn't know how many VFs the host PF will enable, ECPF offloads flow steering shall create the flow table/groups based on the max number of VFs possibly supported by host PF. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 11 +++ .../mellanox/mlx5/core/eswitch_offloads.c | 71 ++++++++++++++++++- 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d2ab1ee19b2a..e18af31336e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -39,6 +39,7 @@ #include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" +#include "ecpf.h" enum { MLX5_ACTION_NONE = 0, @@ -1639,6 +1640,7 @@ static int eswitch_vport_event(struct notifier_block *nb, int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { + int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; int err; int i, enabled_events; @@ -1657,6 +1659,18 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + if (mode == SRIOV_OFFLOADS) { + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + if (err) + return err; + total_nvports = esw->total_vports; + } else { + vf_nvports = nvfs; + total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); + } + } + esw->mode = mode; mlx5_lag_update(esw->dev); @@ -1666,8 +1680,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs, - nvfs + MLX5_SPECIAL_VPORTS(esw->dev)); + err = esw_offloads_init(esw, vf_nvports, total_nvports); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2baa0d71380c..af5581a57e56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -182,6 +182,16 @@ struct esw_mc_addr { /* SRIOV only */ u32 refcnt; }; +struct mlx5_host_work { + struct work_struct work; + struct mlx5_eswitch *esw; +}; + +struct mlx5_host_info { + struct mlx5_nb nb; + u16 num_vfs; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -206,6 +216,7 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; + struct mlx5_host_info host_info; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 84a33f8e3350..91c4095ac79e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -40,6 +40,8 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" +#include "ecpf.h" +#include "lib/eq.h" enum { FDB_FAST_PATH = 0, @@ -1640,6 +1642,57 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_offloads_fdb_tables(esw); } +static void esw_host_params_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + int err, num_vf = 0; + + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + + err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); + if (err || num_vf == esw->host_info.num_vfs) + goto out; + + /* Number of VFs can only change from "0 to x" or "x to 0". */ + if (esw->host_info.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + } else { + err = esw_offloads_load_vf_reps(esw, num_vf); + + if (err) + goto out; + } + + esw->host_info.num_vfs = num_vf; + +out: + kfree(host_work); +} + +static int esw_host_params_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_host_work *host_work; + struct mlx5_host_info *host_info; + struct mlx5_eswitch *esw; + + host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); + if (!host_work) + return NOTIFY_DONE; + + host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); + esw = container_of(host_info, struct mlx5_eswitch, host_info); + + host_work->esw = esw; + + INIT_WORK(&host_work->work, esw_host_params_event_handler); + queue_work(esw->work_queue, &host_work->work); + + return NOTIFY_OK; +} + int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, int total_nvports) { @@ -1656,6 +1709,14 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, goto err_reps; esw_offloads_devcom_init(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, + HOST_PARAMS_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); + esw->host_info.num_vfs = vf_nvports; + } + return 0; err_reps: @@ -1684,7 +1745,15 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs = esw->dev->priv.sriov.num_vfs; + u16 num_vfs; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); + flush_workqueue(esw->work_queue); + num_vfs = esw->host_info.num_vfs; + } else { + num_vfs = esw->dev->priv.sriov.num_vfs; + } esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw, num_vfs); From c96692fb8f3d0a161c6892ab9cc51a5e9992ccf2 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 20 Dec 2018 17:28:06 -0600 Subject: [PATCH 35/35] net/mlx5: E-Switch, Allow transition to offloads mode for ECPF Currently, the e-switch driver requires going to legacy mode before changing to the offloads mode. This makes sense for regular case as the legacy mode is done by creating VFs. However, it's problematic when ECPF is the eswitch manager. In such case, ECPF will control the vports on peer host including the peer PF and VFs. But ECPF doesn't need and shall not create VFs as the VFs are created in the peer PF host. Grant ECPF the ability to change from none to the offloads mode. Note that currently the only way to go back to none mode is by unloading the ECPF driver. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 91c4095ac79e..f2260391be5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1250,7 +1250,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != SRIOV_LEGACY && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; @@ -1846,7 +1847,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE) + if (dev->priv.eswitch->mode == SRIOV_NONE && + !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; return 0;