From 8935780b9f7bbad0f2eb5dd43f61ba7b509a1575 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 14 Nov 2017 04:34:23 -0800 Subject: [PATCH 01/13] IB/hfi1: Initialize bth1 in 16B rc ack builder It is possible the bth1 variable could be used uninitialized so going ahead and giving it a default value. Otherwise we leak stack memory to the network. Fixes: 5b6cabb0db77 ("IB/hfi1: Add 16B RC/UC support") Reviewed-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index fd01a760259f..af5f7936f7e5 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -814,7 +814,7 @@ static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp, struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_16b_header *hdr = &opa_hdr->opah; struct ib_other_headers *ohdr; - u32 bth0, bth1; + u32 bth0, bth1 = 0; u16 len, pkey; u8 becn = !!is_fecn; u8 l4 = OPA_16B_L4_IB_LOCAL; From db0acbc475f06c775682ba969ab338e1efa2ae96 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 19 Nov 2017 19:58:30 +0100 Subject: [PATCH 02/13] IB: INFINIBAND should depend on HAS_DMA If NO_DMA=y: ERROR: "bad_dma_ops" [net/sunrpc/xprtrdma/rpcrdma.ko] undefined! ERROR: "bad_dma_ops" [net/smc/smc.ko] undefined! ERROR: "bad_dma_ops" [net/rds/rds_rdma.ko] undefined! ERROR: "bad_dma_ops" [net/9p/9pnet_rdma.ko] undefined! ERROR: "bad_dma_ops" [drivers/nvme/target/nvmet-rdma.ko] undefined! ERROR: "bad_dma_ops" [drivers/nvme/host/nvme-rdma.ko] undefined! ERROR: "bad_dma_ops" [drivers/infiniband/ulp/srpt/ib_srpt.ko] undefined! ERROR: "bad_dma_ops" [drivers/infiniband/ulp/srp/ib_srp.ko] undefined! ERROR: "bad_dma_ops" [drivers/infiniband/ulp/isert/ib_isert.ko] undefined! ERROR: "bad_dma_ops" [drivers/infiniband/ulp/iser/ib_iser.ko] undefined! ERROR: "bad_dma_ops" [drivers/infiniband/ulp/ipoib/ib_ipoib.ko] undefined! ERROR: "bad_dma_ops" [drivers/infiniband/core/ib_core.ko] undefined! Before, this was handled implicitly by the dependency on PCI. Add an explicit dependency on HAS_DMA to fix this. Fixes: 931bc0d91639f8fb ("IB: Move PCI dependency from root KConfig to HW's KConfigs") Signed-off-by: Geert Uytterhoeven Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 98ac46ed7214..cbf186522016 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -1,6 +1,6 @@ menuconfig INFINIBAND tristate "InfiniBand support" - depends on HAS_IOMEM + depends on HAS_IOMEM && HAS_DMA depends on NET depends on INET depends on m || IPV6 != m From 100d6de2ced538c539d923dc14a1b5cbae234503 Mon Sep 17 00:00:00 2001 From: Chien Tin Tung Date: Tue, 21 Nov 2017 16:18:30 -0600 Subject: [PATCH 03/13] i40iw: Allocate a sdbuf per CQP WQE Currently there is only one sdbuf per Control QP (CQP) for programming Segment Descriptor (SD). If multiple SD work requests are posted simultaneously, the sdbuf is reused by all WQEs and new WQEs can corrupt previous WQEs sdbuf leading to incorrect SD programming. Fix this by allocating one sdbuf per CQP SQ WQE. When an SD command is posted, it will use the corresponding sdbuf for the WQE. Fixes: 86dbcd0f12e9 ("i40iw: add file to handle cqp calls") Signed-off-by: Chien Tin Tung Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 43 +++++++++++++++++------- drivers/infiniband/hw/i40iw/i40iw_d.h | 4 ++- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index d88c6cf47cf2..da9821a10e0d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -513,7 +513,7 @@ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp, ret_code = i40iw_allocate_dma_mem(cqp->dev->hw, &cqp->sdbuf, - 128, + I40IW_UPDATE_SD_BUF_SIZE * cqp->sq_size, I40IW_SD_BUF_ALIGNMENT); if (ret_code) @@ -596,14 +596,15 @@ void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp) } /** - * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq - * @cqp: struct for cqp hw - * @wqe_idx: we index of cqp ring + * i40iw_sc_cqp_get_next_send_wqe_idx - get next WQE on CQP SQ and pass back the index + * @cqp: pointer to CQP structure + * @scratch: private data for CQP WQE + * @wqe_idx: WQE index for next WQE on CQP SQ */ -u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch) +static u64 *i40iw_sc_cqp_get_next_send_wqe_idx(struct i40iw_sc_cqp *cqp, + u64 scratch, u32 *wqe_idx) { u64 *wqe = NULL; - u32 wqe_idx; enum i40iw_status_code ret_code; if (I40IW_RING_FULL_ERR(cqp->sq_ring)) { @@ -616,20 +617,32 @@ u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch) cqp->sq_ring.size); return NULL; } - I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code); + I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, *wqe_idx, ret_code); cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++; if (ret_code) return NULL; - if (!wqe_idx) + if (!*wqe_idx) cqp->polarity = !cqp->polarity; - wqe = cqp->sq_base[wqe_idx].elem; - cqp->scratch_array[wqe_idx] = scratch; + wqe = cqp->sq_base[*wqe_idx].elem; + cqp->scratch_array[*wqe_idx] = scratch; I40IW_CQP_INIT_WQE(wqe); return wqe; } +/** + * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq + * @cqp: struct for cqp hw + * @scratch: private data for CQP WQE + */ +u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch) +{ + u32 wqe_idx; + + return i40iw_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx); +} + /** * i40iw_sc_cqp_destroy - destroy cqp during close * @cqp: struct for cqp hw @@ -3587,8 +3600,10 @@ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp, u64 *wqe; int mem_entries, wqe_entries; struct i40iw_dma_mem *sdbuf = &cqp->sdbuf; + u64 offset; + u32 wqe_idx; - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); + wqe = i40iw_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx); if (!wqe) return I40IW_ERR_RING_FULL; @@ -3601,8 +3616,10 @@ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp, LS_64(mem_entries, I40IW_CQPSQ_UPESD_ENTRY_COUNT); if (mem_entries) { - memcpy(sdbuf->va, &info->entry[3], (mem_entries << 4)); - data = sdbuf->pa; + offset = wqe_idx * I40IW_UPDATE_SD_BUF_SIZE; + memcpy((char *)sdbuf->va + offset, &info->entry[3], + mem_entries << 4); + data = (u64)sdbuf->pa + offset; } else { data = 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 65ec39e3746b..1077b78f7754 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1526,7 +1526,7 @@ enum i40iw_alignment { I40IW_AEQ_ALIGNMENT = 0x100, I40IW_CEQ_ALIGNMENT = 0x100, I40IW_CQ0_ALIGNMENT = 0x100, - I40IW_SD_BUF_ALIGNMENT = 0x100 + I40IW_SD_BUF_ALIGNMENT = 0x80 }; #define I40IW_WQE_SIZE_64 64 @@ -1534,6 +1534,8 @@ enum i40iw_alignment { #define I40IW_QP_WQE_MIN_SIZE 32 #define I40IW_QP_WQE_MAX_SIZE 128 +#define I40IW_UPDATE_SD_BUF_SIZE 128 + #define I40IW_CQE_QTYPE_RQ 0 #define I40IW_CQE_QTYPE_SQ 1 From 10499986dbd8778e1acf9f9d2e166800dfee44b4 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 21 Nov 2017 16:18:31 -0600 Subject: [PATCH 04/13] i40iw: Do not free sqbuf when event is I40IW_TIMER_TYPE_CLOSE When the event type is I40IW_TIMER_TYPE_CLOSE, there is no sqbuf and it should not be freed as one in i40iw_schedule_cm_timer(). Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 493d6ef3d2d5..e1454174e3d9 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1043,7 +1043,7 @@ negotiate_done: * i40iw_schedule_cm_timer * @@cm_node: connection's node * @sqbuf: buffer to send - * @type: if it es send ot close + * @type: if it is send or close * @send_retrans: if rexmits to be done * @close_when_complete: is cm_node to be removed * @@ -1067,7 +1067,8 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) { - i40iw_free_sqbuf(vsi, (void *)sqbuf); + if (type != I40IW_TIMER_TYPE_CLOSE) + i40iw_free_sqbuf(vsi, (void *)sqbuf); return -ENOMEM; } new_send->retrycount = I40IW_DEFAULT_RETRYS; @@ -1082,7 +1083,6 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send->timetosend += (HZ / 10); if (cm_node->close_entry) { kfree(new_send); - i40iw_free_sqbuf(vsi, (void *)sqbuf); i40iw_pr_err("already close entry\n"); return -EINVAL; } From a283cdc4d3670700182c820b59078387f9a01a30 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 21 Nov 2017 16:18:32 -0600 Subject: [PATCH 05/13] i40iw: Correct ARP index mask The ARP table entry indexes are aliased to 12bits instead of the intended 16bits when uploaded to the QP Context. This will present an issue when the number of connections exceeds 4096 as ARP entries are reused. Fix this by adjusting the mask to account for the full 16bits. Fixes: 4e9042e647ff ("i40iw: add hw and utils files") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_d.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 1077b78f7754..029083cb81d5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1114,7 +1114,7 @@ #define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT) #define I40IWQPC_ARPIDX_SHIFT 48 -#define I40IWQPC_ARPIDX_MASK (0xfffULL << I40IWQPC_ARPIDX_SHIFT) +#define I40IWQPC_ARPIDX_MASK (0xffffULL << I40IWQPC_ARPIDX_SHIFT) #define I40IWQPC_FLOWLABEL_SHIFT 0 #define I40IWQPC_FLOWLABEL_MASK (0xfffffUL << I40IWQPC_FLOWLABEL_SHIFT) From 8bb45252bb5108485f590a08291d1f4641abc77e Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 21 Nov 2017 16:18:33 -0600 Subject: [PATCH 06/13] i40iw: Move MPA request event for loopback after connect For loopback, a MPA request event is generated when cm_node is initialized, which allows applications to act on the connect request before i40iw_connect() has completed. In some cases, the reject flow executes in parallel with the connect flow and doesn't delete an APBVT entry, because the apbvt_set variable is still not set by the connect flow. Move the MPA request event to the end of i40iw_connect() to notify application for a connect request, after connect has completed. Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Tatyana Nikolova Signed-off-by: Henry Orosco Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index e1454174e3d9..57cf40733800 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2947,8 +2947,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node( loopback_remotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd; cm_node->tcp_cntxt.snd_wscale = loopback_remotenode->tcp_cntxt.rcv_wscale; loopback_remotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; - loopback_remotenode->state = I40IW_CM_STATE_MPAREQ_RCVD; - i40iw_create_event(loopback_remotenode, I40IW_CM_EVENT_MPA_REQ); } return cm_node; } @@ -3864,6 +3862,12 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto err; } + if (cm_node->loopbackpartner) { + cm_node->loopbackpartner->state = I40IW_CM_STATE_MPAREQ_RCVD; + i40iw_create_event(cm_node->loopbackpartner, + I40IW_CM_EVENT_MPA_REQ); + } + i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n", From a7c6dfe215a7f85c6195f09c1b9474a2ba7fea9c Mon Sep 17 00:00:00 2001 From: Henry Orosco Date: Tue, 21 Nov 2017 16:18:34 -0600 Subject: [PATCH 07/13] i40iw: Notify user of established connection after QP in RTS Established CM event is sent prior to modifying QP to RTS state. This can result in application closing the connection before the QP is actually in RTS state. Move sending of established CM event to after modify QP to RTS. Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Henry Orosco Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 57cf40733800..77870f9e1736 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3687,11 +3687,16 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->add_ref(cm_id); i40iw_add_ref(&iwqp->ibqp); - i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0); - attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); + + cm_node->accelerated = 1; + status = + i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0); + if (status) + i40iw_debug(dev, I40IW_DEBUG_CM, "error sending cm event - ESTABLISHED\n"); + if (cm_node->loopbackpartner) { cm_node->loopbackpartner->pdata.size = conn_param->private_data_len; @@ -3702,7 +3707,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) i40iw_create_event(cm_node->loopbackpartner, I40IW_CM_EVENT_CONNECTED); } - cm_node->accelerated = 1; if (cm_node->accept_pend) { atomic_dec(&cm_node->listener->pend_accepts_cnt); cm_node->accept_pend = 0; @@ -4048,9 +4052,6 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event) dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0); if (iwqp->page) kunmap(iwqp->page); - status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0); - if (status) - i40iw_pr_err("send cm event\n"); memset(&attr, 0, sizeof(attr)); attr.qp_state = IB_QPS_RTS; @@ -4058,6 +4059,10 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event) i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); cm_node->accelerated = 1; + status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, + 0); + if (status) + i40iw_debug(dev, I40IW_DEBUG_CM, "error sending cm event - CONNECT_REPLY\n"); return; From 23a9cd2ad90543e9da3786878d2b2729c095439d Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 26 Nov 2017 20:23:54 +0200 Subject: [PATCH 08/13] RDMA/cma: Make sure that PSN is not over max allowed This patch limits the initial value for PSN to 24 bits as spec requires. Signed-off-by: Moni Shoua Signed-off-by: Mukesh Kacker Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1fdb473b5df7..f6983357145d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -801,6 +801,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net, INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = get_net(net); + id_priv->seq_num &= 0x00ffffff; return &id_priv->id; } From a9cd1a673737dd81332fce1145801bfacfb90d90 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 27 Nov 2017 13:39:05 +0000 Subject: [PATCH 09/13] IB/core: Init subsys if compiled to vmlinuz-core Once infiniband is compiled as a core component its subsystem must be enabled before device initialization. Otherwise there is a NULL pointer dereference during mlx4_core init, calltrace: ->device_add if (dev->class) { deref dev->class->p =>NULLPTR #Config CONFIG_NET_DEVLINK=y CONFIG_MAY_USE_DEVLINK=y CONFIG_MLX4_EN=y Signed-off-by: Dmitry Monakhov Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 84fc32a2c8b3..5e1be4949d5f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1253,5 +1253,5 @@ static void __exit ib_core_cleanup(void) MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); -module_init(ib_core_init); +subsys_initcall(ib_core_init); module_exit(ib_core_cleanup); From db270c41900d39a388990701da3ee2971094ebaa Mon Sep 17 00:00:00 2001 From: "Wei Hu\\(Xavier\\)" Date: Tue, 28 Nov 2017 15:10:26 +0800 Subject: [PATCH 10/13] RDMA/hns: Fix the issue of IOVA not page continuous in hip08 If the smmu is enabled, the length of sg obtained from __iommu_map_sg_attrs is not 4kB. When the IOVA is set with the sg dma address, the IOVA will not be page continuous. so, the current code has MTPT configuration error that probably cause dma operation failure. In order to fix this issue, the IOVA should be calculated based on the sg length. Fixes: 3958cc5("RDMA/hns: Configure the MTPT in hip08") Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Yixian Liu Signed-off-by: Xiping Zhang (Francis) Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8f719c00467b..8e18445714a9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1126,9 +1126,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, { struct hns_roce_v2_mpt_entry *mpt_entry; struct scatterlist *sg; + u64 page_addr; u64 *pages; + int i, j; + int len; int entry; - int i; mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); @@ -1186,14 +1188,20 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, i = 0; for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { - pages[i] = ((u64)sg_dma_address(sg)) >> 6; + len = sg_dma_len(sg) >> PAGE_SHIFT; + for (j = 0; j < len; ++j) { + page_addr = sg_dma_address(sg) + + (j << mr->umem->page_shift); + pages[i] = page_addr >> 6; - /* Record the first 2 entry directly to MTPT table */ - if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) - break; - i++; + /* Record the first 2 entry directly to MTPT table */ + if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) + goto found; + i++; + } } +found: mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, V2_MPT_BYTE_56_PA0_H_S, From b1c158350968d6717ec1889f07ea3a89432e8574 Mon Sep 17 00:00:00 2001 From: "Wei Hu\\(Xavier\\)" Date: Tue, 28 Nov 2017 15:10:27 +0800 Subject: [PATCH 11/13] RDMA/hns: Get rid of virt_to_page and vmap calls after dma_alloc_coherent In general dma_alloc_coherent() returns a CPU virtual address and a DMA address, and we have no guarantee that the virtual address is either in the linear map or vmalloc. It could be in some other special place. We have no guarantee that the underlying memory even has an associated struct page at all. In current code, there are incorrect usage as below: dma_alloc_coherent + virt_to_page + vmap. There will probably introduce coherency problem. This patch fixes it to get rid of virt_to_page and vmap calls at Leon's suggestion. The related link: https://lkml.org/lkml/2017/11/7/34 Fixes: 9a44353("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Yixian Liu Signed-off-by: Xiping Zhang (Francis) Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 23 --------------------- drivers/infiniband/hw/hns/hns_roce_device.h | 4 +--- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 3e4c5253ab5c..a40ec939ece5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -162,14 +162,10 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, { int i; struct device *dev = hr_dev->dev; - u32 bits_per_long = BITS_PER_LONG; if (buf->nbufs == 1) { dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); } else { - if (bits_per_long == 64 && buf->page_shift == PAGE_SHIFT) - vunmap(buf->direct.buf); - for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) dma_free_coherent(dev, 1 << buf->page_shift, @@ -185,9 +181,7 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, { int i = 0; dma_addr_t t; - struct page **pages; struct device *dev = hr_dev->dev; - u32 bits_per_long = BITS_PER_LONG; u32 page_size = 1 << page_shift; u32 order; @@ -236,23 +230,6 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, buf->page_list[i].map = t; memset(buf->page_list[i].buf, 0, page_size); } - if (bits_per_long == 64 && page_shift == PAGE_SHIFT) { - pages = kmalloc_array(buf->nbufs, sizeof(*pages), - GFP_KERNEL); - if (!pages) - goto err_free; - - for (i = 0; i < buf->nbufs; ++i) - pages[i] = virt_to_page(buf->page_list[i].buf); - - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, - PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - goto err_free; - } else { - buf->direct.buf = NULL; - } } return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 01d3d695cbba..b154ce40cded 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -726,11 +726,9 @@ static inline struct hns_roce_qp static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) { - u32 bits_per_long_val = BITS_PER_LONG; u32 page_size = 1 << buf->page_shift; - if ((bits_per_long_val == 64 && buf->page_shift == PAGE_SHIFT) || - buf->nbufs == 1) + if (buf->nbufs == 1) return (char *)(buf->direct.buf) + offset; else return (char *)(buf->page_list[offset >> buf->page_shift].buf) + From 378efe798ecf0e7d9730a595ef3419b046e34fb4 Mon Sep 17 00:00:00 2001 From: "Wei Hu\\(Xavier\\)" Date: Tue, 28 Nov 2017 15:10:28 +0800 Subject: [PATCH 12/13] RDMA/hns: Get rid of page operation after dma_alloc_coherent In general, dma_alloc_coherent() returns a CPU virtual address and a DMA address, and we have no guarantee that the underlying memory even has an associated struct page at all. This patch gets rid of the page operation after dma_alloc_coherent, and records the VA returned form dma_alloc_coherent in the struct of hem in hns RoCE driver. Fixes: 9a44353("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Yixian Liu Signed-off-by: Xiping Zhang (Francis) Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 25 ++++++++++++------------ drivers/infiniband/hw/hns/hns_roce_hem.h | 1 + 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 8b733a66fae5..0eeabfbee192 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -224,6 +224,7 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN); chunk->npages = 0; chunk->nsg = 0; + memset(chunk->buf, 0, sizeof(chunk->buf)); list_add_tail(&chunk->list, &hem->chunk_list); } @@ -240,8 +241,7 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, if (!buf) goto fail; - sg_set_buf(mem, buf, PAGE_SIZE << order); - WARN_ON(mem->offset); + chunk->buf[chunk->npages] = buf; sg_dma_len(mem) = PAGE_SIZE << order; ++chunk->npages; @@ -267,8 +267,8 @@ void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) dma_free_coherent(hr_dev->dev, - chunk->mem[i].length, - lowmem_page_address(sg_page(&chunk->mem[i])), + sg_dma_len(&chunk->mem[i]), + chunk->buf[i], sg_dma_address(&chunk->mem[i])); kfree(chunk); } @@ -722,11 +722,12 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_chunk *chunk; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; - struct page *page = NULL; + void *addr = NULL; unsigned long mhop_obj = obj; unsigned long obj_per_chunk; unsigned long idx_offset; int offset, dma_offset; + int length; int i, j; u32 hem_idx = 0; @@ -763,25 +764,25 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, list_for_each_entry(chunk, &hem->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { + length = sg_dma_len(&chunk->mem[i]); if (dma_handle && dma_offset >= 0) { - if (sg_dma_len(&chunk->mem[i]) > - (u32)dma_offset) + if (length > (u32)dma_offset) *dma_handle = sg_dma_address( &chunk->mem[i]) + dma_offset; - dma_offset -= sg_dma_len(&chunk->mem[i]); + dma_offset -= length; } - if (chunk->mem[i].length > (u32)offset) { - page = sg_page(&chunk->mem[i]); + if (length > (u32)offset) { + addr = chunk->buf[i] + offset; goto out; } - offset -= chunk->mem[i].length; + offset -= length; } } out: mutex_unlock(&table->mutex); - return page ? lowmem_page_address(page) + offset : NULL; + return addr; } EXPORT_SYMBOL_GPL(hns_roce_table_find); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index db66db12075e..e8850d59e780 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -78,6 +78,7 @@ struct hns_roce_hem_chunk { int npages; int nsg; struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN]; + void *buf[HNS_ROCE_HEM_CHUNK_LEN]; }; struct hns_roce_hem { From 315d160c5a4e034a576a13aa21e7235d5c9ec609 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 29 Nov 2017 20:10:39 +0200 Subject: [PATCH 13/13] IB/core: Only enforce security for InfiniBand For now the only LSM security enforcement mechanism available is specific to InfiniBand. Bypass enforcement for non-IB link types. This fixes a regression where modify_qp fails for iWARP because querying the PKEY returns -EINVAL. Cc: Paul Moore Cc: Don Dutile Cc: stable@vger.kernel.org Reported-by: Potnuri Bharat Teja Fixes: d291f1a65232("IB/core: Enforce PKey security on QPs") Fixes: 47a2b338fe63("IB/core: Enforce security on management datagrams") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Tested-by: Potnuri Bharat Teja Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/security.c | 50 +++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 23278ed5be45..a337386652b0 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -417,8 +417,17 @@ void ib_close_shared_qp_security(struct ib_qp_security *sec) int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) { + u8 i = rdma_start_port(dev); + bool is_ib = false; int ret; + while (i <= rdma_end_port(dev) && !is_ib) + is_ib = rdma_protocol_ib(dev, i++); + + /* If this isn't an IB device don't create the security context */ + if (!is_ib) + return 0; + qp->qp_sec = kzalloc(sizeof(*qp->qp_sec), GFP_KERNEL); if (!qp->qp_sec) return -ENOMEM; @@ -441,6 +450,10 @@ EXPORT_SYMBOL(ib_create_qp_security); void ib_destroy_qp_security_begin(struct ib_qp_security *sec) { + /* Return if not IB */ + if (!sec) + return; + mutex_lock(&sec->mutex); /* Remove the QP from the lists so it won't get added to @@ -470,6 +483,10 @@ void ib_destroy_qp_security_abort(struct ib_qp_security *sec) int ret; int i; + /* Return if not IB */ + if (!sec) + return; + /* If a concurrent cache update is in progress this * QP security could be marked for an error state * transition. Wait for this to complete. @@ -505,6 +522,10 @@ void ib_destroy_qp_security_end(struct ib_qp_security *sec) { int i; + /* Return if not IB */ + if (!sec) + return; + /* If a concurrent cache update is occurring we must * wait until this QP security structure is processed * in the QP to error flow before destroying it because @@ -557,7 +578,7 @@ int ib_security_modify_qp(struct ib_qp *qp, { int ret = 0; struct ib_ports_pkeys *tmp_pps; - struct ib_ports_pkeys *new_pps; + struct ib_ports_pkeys *new_pps = NULL; struct ib_qp *real_qp = qp->real_qp; bool special_qp = (real_qp->qp_type == IB_QPT_SMI || real_qp->qp_type == IB_QPT_GSI || @@ -565,18 +586,27 @@ int ib_security_modify_qp(struct ib_qp *qp, bool pps_change = ((qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) || (qp_attr_mask & IB_QP_ALT_PATH)); + WARN_ONCE((qp_attr_mask & IB_QP_PORT && + rdma_protocol_ib(real_qp->device, qp_attr->port_num) && + !real_qp->qp_sec), + "%s: QP security is not initialized for IB QP: %d\n", + __func__, real_qp->qp_num); + /* The port/pkey settings are maintained only for the real QP. Open * handles on the real QP will be in the shared_qp_list. When * enforcing security on the real QP all the shared QPs will be * checked as well. */ - if (pps_change && !special_qp) { + if (pps_change && !special_qp && real_qp->qp_sec) { mutex_lock(&real_qp->qp_sec->mutex); new_pps = get_new_pps(real_qp, qp_attr, qp_attr_mask); - + if (!new_pps) { + mutex_unlock(&real_qp->qp_sec->mutex); + return -ENOMEM; + } /* Add this QP to the lists for the new port * and pkey settings before checking for permission * in case there is a concurrent cache update @@ -600,7 +630,7 @@ int ib_security_modify_qp(struct ib_qp *qp, qp_attr_mask, udata); - if (pps_change && !special_qp) { + if (new_pps) { /* Clean up the lists and free the appropriate * ports_pkeys structure. */ @@ -631,6 +661,9 @@ int ib_security_pkey_access(struct ib_device *dev, u16 pkey; int ret; + if (!rdma_protocol_ib(dev, port_num)) + return 0; + ret = ib_get_cached_pkey(dev, port_num, pkey_index, &pkey); if (ret) return ret; @@ -665,6 +698,9 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, { int ret; + if (!rdma_protocol_ib(agent->device, agent->port_num)) + return 0; + ret = security_ib_alloc_security(&agent->security); if (ret) return ret; @@ -690,6 +726,9 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) { + if (!rdma_protocol_ib(agent->device, agent->port_num)) + return; + security_ib_free_security(agent->security); if (agent->lsm_nb_reg) unregister_lsm_notifier(&agent->lsm_nb); @@ -697,6 +736,9 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) { + if (!rdma_protocol_ib(map->agent.device, map->agent.port_num)) + return 0; + if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed) return -EACCES;