From 285214409a9e5fceba2215461b4682b6069d8e77 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 20 Apr 2015 14:01:11 -0600 Subject: [PATCH 01/18] RDMA/CMA: Canonize IPv4 on IPV6 sockets properly When accepting a new IPv4 connect to an IPv6 socket, the CMA tries to canonize the address family to IPv4, but does not properly process the listening sockaddr to get the listening port, and does not properly set the address family of the canonized sockaddr. Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager") Cc: Reported-By: Yotam Kenneth Signed-off-by: Jason Gunthorpe Tested-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d570030d899c..06441a43c3aa 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -859,19 +859,27 @@ static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id memcpy(&ib->sib_addr, &path->dgid, 16); } +static __be16 ss_get_port(const struct sockaddr_storage *ss) +{ + if (ss->ss_family == AF_INET) + return ((struct sockaddr_in *)ss)->sin_port; + else if (ss->ss_family == AF_INET6) + return ((struct sockaddr_in6 *)ss)->sin6_port; + BUG(); +} + static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, struct cma_hdr *hdr) { - struct sockaddr_in *listen4, *ip4; + struct sockaddr_in *ip4; - listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr; ip4 = (struct sockaddr_in *) &id->route.addr.src_addr; - ip4->sin_family = listen4->sin_family; + ip4->sin_family = AF_INET; ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; - ip4->sin_port = listen4->sin_port; + ip4->sin_port = ss_get_port(&listen_id->route.addr.src_addr); ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr; - ip4->sin_family = listen4->sin_family; + ip4->sin_family = AF_INET; ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; ip4->sin_port = hdr->port; } @@ -879,16 +887,15 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, struct cma_hdr *hdr) { - struct sockaddr_in6 *listen6, *ip6; + struct sockaddr_in6 *ip6; - listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr; ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr; - ip6->sin6_family = listen6->sin6_family; + ip6->sin6_family = AF_INET6; ip6->sin6_addr = hdr->dst_addr.ip6; - ip6->sin6_port = listen6->sin6_port; + ip6->sin6_port = ss_get_port(&listen_id->route.addr.src_addr); ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr; - ip6->sin6_family = listen6->sin6_family; + ip6->sin6_family = AF_INET6; ip6->sin6_addr = hdr->src_addr.ip6; ip6->sin6_port = hdr->port; } From 0b7410471d59ce2ea30453e68c03bdb941d5951e Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:44:58 +0530 Subject: [PATCH 02/18] iw_cxgb4: Cleanup register defines/MACROS Cleanup macros and register defines for consistency Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 57176ddd4c50..0493cca3ec15 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -675,7 +675,7 @@ static int send_connect(struct c4iw_ep *ep) if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + opt2 |= T5_ISS_F; } t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); @@ -2214,7 +2214,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + opt2 |= T5_ISS_F; rpl5 = (void *)rpl; memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); if (peer2peer) diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 5e53327fc647..343e8daf2270 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -848,6 +848,8 @@ enum { /* TCP congestion control algorithms */ #define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S) #define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M) -#define CONG_CNTRL_VALID (1 << 18) +#define T5_ISS_S 18 +#define T5_ISS_V(x) ((x) << T5_ISS_S) +#define T5_ISS_F T5_ISS_V(1U) #endif /* _T4FW_RI_API_H_ */ From 6198dd8d7a6a7f40dc4599cb0676101d9cb82776 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:44:59 +0530 Subject: [PATCH 03/18] iw_cxgb4: 32b platform fixes - get_dma_mr() was using ~0UL which is should be ~0ULL. This causes the DMA MR to get setup incorrectly in hardware. - wr_log_show() needed a 64b divide function div64_u64() instead of doing division directly. - fixed warnings about recasting a pointer to a u64 Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- drivers/infiniband/hw/cxgb4/cq.c | 7 +++---- drivers/infiniband/hw/cxgb4/device.c | 6 +++--- drivers/infiniband/hw/cxgb4/mem.c | 6 +++--- drivers/infiniband/hw/cxgb4/qp.c | 10 +++++----- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0493cca3ec15..6fb31bacd5b4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3571,7 +3571,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TP will ignore any value > 0 for MSS index. */ req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF)); - req->cookie = (unsigned long)skb; + req->cookie = (uintptr_t)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index ab7692ac2044..25dbd6986301 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -55,7 +55,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_RESET; @@ -125,7 +125,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_WRITE; @@ -970,8 +970,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, } PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", __func__, chp->cq.cqid, chp, chp->cq.size, - chp->cq.memsize, - (unsigned long long) chp->cq.dma_addr); + chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); return &chp->ibcq; err5: kfree(mm2); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 8fb295e4a9ab..7ed32537eb59 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -151,7 +151,7 @@ static int wr_log_show(struct seq_file *seq, void *v) int prev_ts_set = 0; int idx, end; -#define ts2ns(ts) div64_ul((ts) * dev->rdev.lldi.cclk_ps, 1000) +#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000) idx = atomic_read(&dev->rdev.wr_log_idx) & (dev->rdev.wr_log_size - 1); @@ -784,10 +784,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu " + PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " "qpmask 0x%x cqshift %lu cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (u64)pci_resource_start(rdev->lldi.pdev, 2), + (void *)pci_resource_start(rdev->lldi.pdev, 2), rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpshift, rdev->qpmask, diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 3ef0cf9f5c44..cff815b91707 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F); - req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; + req->wr.wr_lo = (__force __be64)&wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -676,12 +676,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) mhp->attr.zbva = 0; mhp->attr.va_fbo = 0; mhp->attr.page_size = 0; - mhp->attr.len = ~0UL; + mhp->attr.len = ~0ULL; mhp->attr.pbl_size = 0; ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0); + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); if (ret) goto err1; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 15cae5a31018..389ced335bc5 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -275,7 +275,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_NRES_V(2) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; @@ -1209,7 +1209,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, wqe->flowid_len16 = cpu_to_be32( FW_WR_FLOWID_V(ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (unsigned long) &ep->com.wr_wait; + wqe->cookie = (uintptr_t)&ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; ret = c4iw_ofld_send(&rhp->rdev, skb); @@ -1279,7 +1279,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) FW_WR_FLOWID_V(qhp->ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; + wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = @@ -1766,11 +1766,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long) qhp->wq.sq.udb; + mm3->addr = (__force unsigned long)qhp->wq.sq.udb; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long) qhp->wq.rq.udb; + mm4->addr = (__force unsigned long)qhp->wq.rq.udb; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { From 09ece8b9e983fe858de6eab7a386d58d194227b6 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:45:00 +0530 Subject: [PATCH 04/18] iw_cxgb4: use BAR2 GTS register for T5 kernel mode CQs For T5, we must not use the kdb/kgts registers, in order avoid db drops under extreme loads. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 15 +++++++++++---- drivers/infiniband/hw/cxgb4/t4.h | 7 ++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 25dbd6986301..68ddb3710215 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -156,12 +156,19 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, goto err4; cq->gen = 1; - cq->gts = rdev->lldi.gts_reg; cq->rdev = rdev; if (user) { - cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (cq->cqid << rdev->cqshift); - cq->ugts &= PAGE_MASK; + u32 off = (cq->cqid << rdev->cqshift) & PAGE_MASK; + + cq->ugts = (u64)rdev->bar2_pa + off; + } else if (is_t4(rdev->lldi.adapter_type)) { + cq->gts = rdev->lldi.gts_reg; + cq->qid_mask = -1U; + } else { + u32 off = ((cq->cqid << rdev->cqshift) & PAGE_MASK) + 12; + + cq->gts = rdev->bar2_kva + off; + cq->qid_mask = rdev->qpmask; } return 0; err4: diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 871cdcac7be2..7f2a6c244d25 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -539,6 +539,7 @@ struct t4_cq { size_t memsize; __be64 bits_type_ts; u32 cqid; + u32 qid_mask; int vector; u16 size; /* including status page */ u16 cidx; @@ -563,12 +564,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_M) { val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid); + INGRESSQID_V(cq->cqid & cq->qid_mask); writel(val, cq->gts); cq->cidx_inc -= CIDXINC_M; } val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) | - INGRESSQID_V(cq->cqid); + INGRESSQID_V(cq->cqid & cq->qid_mask); writel(val, cq->gts); cq->cidx_inc = 0; return 0; @@ -601,7 +602,7 @@ static inline void t4_hwcq_consume(struct t4_cq *cq) u32 val; val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid); + INGRESSQID_V(cq->cqid & cq->qid_mask); writel(val, cq->gts); cq->cidx_inc = 0; } From 4a75a86c8d04390f268d7237cc49fe9a8e36efe7 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:45:01 +0530 Subject: [PATCH 05/18] iw_cxgb4: enforce qp/cq id requirements Currently the iw_cxgb4 implementation requires the qp and cq qid densities to match as well as the qp and cq id ranges. So fail a device open if the device configuration doesn't meet the requirements. The reason for these restictions has to do with the fact that IQ qid X has a UGTS register in the same bar2 page as EQ qid X. Thus both qids need to be allocated to the same user process for security reasons. The logic that does this (the qpid allocator in iw_cxgb4/resource.c) handles this but requires the above restrictions. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/device.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 7ed32537eb59..83209bb38285 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -764,6 +764,29 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) c4iw_init_dev_ucontext(rdev, &rdev->uctx); + /* + * This implementation assumes udb_density == ucq_density! Eventually + * we might need to support this but for now fail the open. Also the + * cqid and qpid range must match for now. + */ + if (rdev->lldi.udb_density != rdev->lldi.ucq_density) { + pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.udb_density, + rdev->lldi.ucq_density); + err = -EINVAL; + goto err1; + } + if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start || + rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) { + pr_err(MOD "%s: unsupported qp and cq id ranges " + "qp start %u size %u cq start %u size %u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size, + rdev->lldi.vr->cq.size); + err = -EINVAL; + goto err1; + } + /* * qpshift is the number of bits to shift the qpid left in order * to get the correct address of the doorbell for that qp. From 6eec177461751f0fe191cf9977cde692b9481d0a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 21 Apr 2015 16:28:10 -0400 Subject: [PATCH 06/18] RDMA/core: Enable the iWarp Port Mapper to provide the actual address of the connecting peer to its clients Add functionality to enable the port mapper on the passive side to provide to its clients the actual (non-mapped) ip/tcp address information of the connecting peer 1) Adding remote_info_cb() to process the address info of the connecting peer The address info is provided by the user space port mapper service when the connection is initiated by the peer 2) Adding a hash list to store the remote address info 3) Adding functionality to add/remove the remote address info After the info has been provided to the port mapper client, it is removed from the hash list Signed-off-by: Tatyana Nikolova Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 73 +++++++++- drivers/infiniband/core/iwpm_util.c | 208 +++++++++++++++++++++++----- drivers/infiniband/core/iwpm_util.h | 15 ++ include/rdma/iw_portmap.h | 25 ++++ include/uapi/rdma/rdma_netlink.h | 1 + 5 files changed, 288 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index b85ddbc979e0..ab081702566f 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -468,7 +468,8 @@ add_mapping_response_exit: } EXPORT_SYMBOL(iwpm_add_mapping_cb); -/* netlink attribute policy for the response to add and query mapping request */ +/* netlink attribute policy for the response to add and query mapping request + * and response with remote address info */ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 }, [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) }, @@ -559,6 +560,76 @@ query_mapping_response_exit: } EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); +/* + * iwpm_remote_info_cb - Process a port mapper message, containing + * the remote connecting peer address info + */ +int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX]; + struct sockaddr_storage *local_sockaddr, *remote_sockaddr; + struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr; + struct iwpm_remote_info *rem_info; + const char *msg_type; + u8 nl_client; + int ret = -EINVAL; + + msg_type = "Remote Mapping info"; + if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX, + resp_query_policy, nltb, msg_type)) + return ret; + + nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid port mapper client = %d\n", + __func__, nl_client); + return ret; + } + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + + local_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); + remote_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); + mapped_loc_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); + mapped_rem_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]); + + if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family || + mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) { + pr_info("%s: Sockaddr family doesn't match the requested one\n", + __func__); + return ret; + } + rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC); + if (!rem_info) { + pr_err("%s: Unable to allocate a remote info\n", __func__); + ret = -ENOMEM; + return ret; + } + memcpy(&rem_info->mapped_loc_sockaddr, mapped_loc_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->remote_sockaddr, remote_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->mapped_rem_sockaddr, mapped_rem_sockaddr, + sizeof(struct sockaddr_storage)); + rem_info->nl_client = nl_client; + + iwpm_add_remote_info(rem_info); + + iwpm_print_sockaddr(local_sockaddr, + "remote_info: Local sockaddr:"); + iwpm_print_sockaddr(mapped_loc_sockaddr, + "remote_info: Mapped local sockaddr:"); + iwpm_print_sockaddr(remote_sockaddr, + "remote_info: Remote sockaddr:"); + iwpm_print_sockaddr(mapped_rem_sockaddr, + "remote_info: Mapped remote sockaddr:"); + return ret; +} +EXPORT_SYMBOL(iwpm_remote_info_cb); + /* netlink attribute policy for the received request for mapping info */ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING, diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 69e9f84c1605..a626795bf9c7 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -33,8 +33,10 @@ #include "iwpm_util.h" -#define IWPM_HASH_BUCKET_SIZE 512 -#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1) +#define IWPM_MAPINFO_HASH_SIZE 512 +#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) +#define IWPM_REMINFO_HASH_SIZE 64 +#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); @@ -42,31 +44,49 @@ static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); static struct hlist_head *iwpm_hash_bucket; static DEFINE_SPINLOCK(iwpm_mapinfo_lock); +static struct hlist_head *iwpm_reminfo_bucket; +static DEFINE_SPINLOCK(iwpm_reminfo_lock); + static DEFINE_MUTEX(iwpm_admin_lock); static struct iwpm_admin_data iwpm_admin; int iwpm_init(u8 nl_client) { + int ret = 0; if (iwpm_valid_client(nl_client)) return -EINVAL; mutex_lock(&iwpm_admin_lock); if (atomic_read(&iwpm_admin.refcount) == 0) { - iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE * + iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_hash_bucket) { - mutex_unlock(&iwpm_admin_lock); + ret = -ENOMEM; pr_err("%s Unable to create mapinfo hash table\n", __func__); - return -ENOMEM; + goto init_exit; + } + iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE * + sizeof(struct hlist_head), GFP_KERNEL); + if (!iwpm_reminfo_bucket) { + kfree(iwpm_hash_bucket); + ret = -ENOMEM; + pr_err("%s Unable to create reminfo hash table\n", __func__); + goto init_exit; } } atomic_inc(&iwpm_admin.refcount); +init_exit: mutex_unlock(&iwpm_admin_lock); - iwpm_set_valid(nl_client, 1); - return 0; + if (!ret) { + iwpm_set_valid(nl_client, 1); + pr_debug("%s: Mapinfo and reminfo tables are created\n", + __func__); + } + return ret; } EXPORT_SYMBOL(iwpm_init); static void free_hash_bucket(void); +static void free_reminfo_bucket(void); int iwpm_exit(u8 nl_client) { @@ -81,7 +101,8 @@ int iwpm_exit(u8 nl_client) } if (atomic_dec_and_test(&iwpm_admin.refcount)) { free_hash_bucket(); - pr_debug("%s: Mapinfo hash table is destroyed\n", __func__); + free_reminfo_bucket(); + pr_debug("%s: Resources are destroyed\n", __func__); } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); @@ -89,7 +110,7 @@ int iwpm_exit(u8 nl_client) } EXPORT_SYMBOL(iwpm_exit); -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *, +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, @@ -99,9 +120,10 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct hlist_head *hash_bucket_head; struct iwpm_mapping_info *map_info; unsigned long flags; + int ret = -EINVAL; if (!iwpm_valid_client(nl_client)) - return -EINVAL; + return ret; map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); if (!map_info) { pr_err("%s: Unable to allocate a mapping info\n", __func__); @@ -115,13 +137,16 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( &map_info->local_sockaddr, &map_info->mapped_sockaddr); - hlist_add_head(&map_info->hlist_node, hash_bucket_head); + if (hash_bucket_head) { + hlist_add_head(&map_info->hlist_node, hash_bucket_head); + ret = 0; + } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); - return 0; + return ret; } EXPORT_SYMBOL(iwpm_create_mapinfo); @@ -136,9 +161,12 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( local_sockaddr, mapped_local_addr); + if (!hash_bucket_head) + goto remove_mapinfo_exit; + hlist_for_each_entry_safe(map_info, tmp_hlist_node, hash_bucket_head, hlist_node) { @@ -152,6 +180,7 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, } } } +remove_mapinfo_exit: spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return ret; } @@ -166,7 +195,7 @@ static void free_hash_bucket(void) /* remove all the mapinfo data from the list */ spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry_safe(map_info, tmp_hlist_node, &iwpm_hash_bucket[i], hlist_node) { @@ -180,6 +209,96 @@ static void free_hash_bucket(void) spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); } +static void free_reminfo_bucket(void) +{ + struct hlist_node *tmp_hlist_node; + struct iwpm_remote_info *rem_info; + unsigned long flags; + int i; + + /* remove all the remote info from the list */ + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + for (i = 0; i < IWPM_REMINFO_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + &iwpm_reminfo_bucket[i], hlist_node) { + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + } + } + /* free the hash list */ + kfree(iwpm_reminfo_bucket); + iwpm_reminfo_bucket = NULL; + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *, + struct sockaddr_storage *); + +void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) +{ + struct hlist_head *hash_bucket_head; + unsigned long flags; + + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + &rem_info->mapped_loc_sockaddr, + &rem_info->mapped_rem_sockaddr); + if (hash_bucket_head) + hlist_add_head(&rem_info->hlist_node, hash_bucket_head); + } + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, + u8 nl_client) +{ + struct hlist_node *tmp_hlist_node; + struct hlist_head *hash_bucket_head; + struct iwpm_remote_info *rem_info = NULL; + unsigned long flags; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid client = %d\n", __func__, nl_client); + return ret; + } + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + mapped_loc_addr, + mapped_rem_addr); + if (!hash_bucket_head) + goto get_remote_info_exit; + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + hash_bucket_head, hlist_node) { + + if (!iwpm_compare_sockaddr(&rem_info->mapped_loc_sockaddr, + mapped_loc_addr) && + !iwpm_compare_sockaddr(&rem_info->mapped_rem_sockaddr, + mapped_rem_addr)) { + + memcpy(remote_addr, &rem_info->remote_sockaddr, + sizeof(struct sockaddr_storage)); + iwpm_print_sockaddr(remote_addr, + "get_remote_info: Remote sockaddr:"); + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + ret = 0; + break; + } + } + } +get_remote_info_exit: + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); + return ret; +} +EXPORT_SYMBOL(iwpm_get_remote_info); + struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) { @@ -409,31 +528,54 @@ static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) return hash; } -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage - *local_sockaddr, - struct sockaddr_storage - *mapped_sockaddr) +static int get_hash_bucket(struct sockaddr_storage *a_sockaddr, + struct sockaddr_storage *b_sockaddr, u32 *hash) { - u32 local_hash, mapped_hash, hash; + u32 a_hash, b_hash; - if (local_sockaddr->ss_family == AF_INET) { - local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr); - mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr); + if (a_sockaddr->ss_family == AF_INET) { + a_hash = iwpm_ipv4_jhash((struct sockaddr_in *) a_sockaddr); + b_hash = iwpm_ipv4_jhash((struct sockaddr_in *) b_sockaddr); - } else if (local_sockaddr->ss_family == AF_INET6) { - local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr); - mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr); + } else if (a_sockaddr->ss_family == AF_INET6) { + a_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) a_sockaddr); + b_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) b_sockaddr); } else { pr_err("%s: Invalid sockaddr family\n", __func__); - return NULL; + return -EINVAL; } - if (local_hash == mapped_hash) /* if port mapper isn't available */ - hash = local_hash; + if (a_hash == b_hash) /* if port mapper isn't available */ + *hash = a_hash; else - hash = jhash_2words(local_hash, mapped_hash, 0); + *hash = jhash_2words(a_hash, b_hash, 0); + return 0; +} - return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK]; +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage + *local_sockaddr, struct sockaddr_storage + *mapped_sockaddr) +{ + u32 hash; + int ret; + + ret = get_hash_bucket(local_sockaddr, mapped_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_hash_bucket[hash & IWPM_MAPINFO_HASH_MASK]; +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage + *mapped_loc_sockaddr, struct sockaddr_storage + *mapped_rem_sockaddr) +{ + u32 hash; + int ret; + + ret = get_hash_bucket(mapped_loc_sockaddr, mapped_rem_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_reminfo_bucket[hash & IWPM_REMINFO_HASH_MASK]; } static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) @@ -512,7 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) } skb_num++; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], hlist_node) { if (map_info->nl_client != nl_client) @@ -595,7 +737,7 @@ int iwpm_mapinfo_available(void) spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { if (!hlist_empty(&iwpm_hash_bucket[i])) { full_bucket = 1; break; diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 9777c869a140..ee2d9ff095be 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -76,6 +76,14 @@ struct iwpm_mapping_info { u8 nl_client; }; +struct iwpm_remote_info { + struct hlist_node hlist_node; + struct sockaddr_storage remote_sockaddr; + struct sockaddr_storage mapped_loc_sockaddr; + struct sockaddr_storage mapped_rem_sockaddr; + u8 nl_client; +}; + struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; @@ -127,6 +135,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); */ int iwpm_get_nlmsg_seq(void); +/** + * iwpm_add_reminfo - Add remote address info of the connecting peer + * to the remote info hash table + * @reminfo: The remote info to be added + */ +void iwpm_add_remote_info(struct iwpm_remote_info *reminfo); + /** * iwpm_valid_client - Check if the port mapper client is valid * @nl_client: The index of the netlink client diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index 928b2775e992..fda31673a562 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -147,6 +147,16 @@ int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *); */ int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *); +/** + * iwpm_remote_info_cb - Process remote connecting peer address info, which + * the port mapper has received from the connecting peer + * + * @cb: Contains the received message (payload and netlink header) + * + * Stores the IPv4/IPv6 address info in a hash table + */ +int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *); + /** * iwpm_mapping_error_cb - Process port mapper notification for error * @@ -174,6 +184,21 @@ int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *); */ int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *); +/** + * iwpm_get_remote_info - Get the remote connecting peer address info + * + * @mapped_loc_addr: Mapped local address of the listening peer + * @mapped_rem_addr: Mapped remote address of the connecting peer + * @remote_addr: To store the remote address of the connecting peer + * @nl_client: The index of the netlink client + * + * The remote address info is retrieved and provided to the client in + * the remote_addr. After that it is removed from the hash table + */ +int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, u8 nl_client); + /** * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address * info in a hash table diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index de69170a30ce..6e4bb4270ca2 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -37,6 +37,7 @@ enum { RDMA_NL_IWPM_ADD_MAPPING, RDMA_NL_IWPM_QUERY_MAPPING, RDMA_NL_IWPM_REMOVE_MAPPING, + RDMA_NL_IWPM_REMOTE_INFO, RDMA_NL_IWPM_HANDLE_ERR, RDMA_NL_IWPM_MAPINFO, RDMA_NL_IWPM_MAPINFO_NUM, From 230da36ae919e690dbcc44d1be8a2154214c6e36 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 21 Apr 2015 16:28:25 -0400 Subject: [PATCH 07/18] RDMA/nes: Report the actual address of the remote connecting peer Get the actual (non-mapped) ip/tcp address of the connecting peer from the port mapper and report the address info to the user space application at the time of connection establishment Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 1 + drivers/infiniband/hw/nes/nes_cm.c | 63 ++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 3b2a6dc8ea99..9f9d5c563a61 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -116,6 +116,7 @@ static struct ibnl_client_cbs nes_nl_cb_table[] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6f09a72e78d7..72b43417cbe3 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -596,27 +596,52 @@ static void nes_form_reg_msg(struct nes_vnic *nesvnic, memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE); } +static void record_sockaddr_info(struct sockaddr_storage *addr_info, + nes_addr_t *ip_addr, u16 *port_num) +{ + struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info; + + if (in_addr->sin_family == AF_INET) { + *ip_addr = ntohl(in_addr->sin_addr.s_addr); + *port_num = ntohs(in_addr->sin_port); + } +} + /* * nes_record_pm_msg - Save the received mapping info */ static void nes_record_pm_msg(struct nes_cm_info *cm_info, struct iwpm_sa_data *pm_msg) { - struct sockaddr_in *mapped_loc_addr = - (struct sockaddr_in *)&pm_msg->mapped_loc_addr; - struct sockaddr_in *mapped_rem_addr = - (struct sockaddr_in *)&pm_msg->mapped_rem_addr; + record_sockaddr_info(&pm_msg->mapped_loc_addr, + &cm_info->mapped_loc_addr, &cm_info->mapped_loc_port); - if (mapped_loc_addr->sin_family == AF_INET) { - cm_info->mapped_loc_addr = - ntohl(mapped_loc_addr->sin_addr.s_addr); - cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port); - } - if (mapped_rem_addr->sin_family == AF_INET) { - cm_info->mapped_rem_addr = - ntohl(mapped_rem_addr->sin_addr.s_addr); - cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port); - } + record_sockaddr_info(&pm_msg->mapped_rem_addr, + &cm_info->mapped_rem_addr, &cm_info->mapped_rem_port); +} + +/* + * nes_get_reminfo - Get the address info of the remote connecting peer + */ +static int nes_get_remote_addr(struct nes_cm_node *cm_node) +{ + struct sockaddr_storage mapped_loc_addr, mapped_rem_addr; + struct sockaddr_storage remote_addr; + int ret; + + nes_create_sockaddr(htonl(cm_node->mapped_loc_addr), + htons(cm_node->mapped_loc_port), &mapped_loc_addr); + nes_create_sockaddr(htonl(cm_node->mapped_rem_addr), + htons(cm_node->mapped_rem_port), &mapped_rem_addr); + + ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr, + &remote_addr, RDMA_NL_NES); + if (ret) + nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n"); + else + record_sockaddr_info(&remote_addr, &cm_node->rem_addr, + &cm_node->rem_port); + return ret; } /** @@ -1566,9 +1591,14 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, return NULL; /* set our node specific transport info */ - cm_node->loc_addr = cm_info->loc_addr; + if (listener) { + cm_node->loc_addr = listener->loc_addr; + cm_node->loc_port = listener->loc_port; + } else { + cm_node->loc_addr = cm_info->loc_addr; + cm_node->loc_port = cm_info->loc_port; + } cm_node->rem_addr = cm_info->rem_addr; - cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->mapped_loc_addr = cm_info->mapped_loc_addr; @@ -2151,6 +2181,7 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->state = NES_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + nes_get_remote_addr(cm_node); handle_rcv_mpa(cm_node, skb); } else { /* rcvd ACK only */ dev_kfree_skb_any(skb); From 5b6b8fe64053b2649660ded2f3c5be25ebddbfdb Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 21 Apr 2015 16:28:41 -0400 Subject: [PATCH 08/18] RDMA/cxgb4: Report the actual address of the remote connecting peer Get the actual (non-mapped) ip/tcp address of the connecting peer from the port mapper Also setup the passive side endpoint to correctly display the actual and mapped addresses for the new connection. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 54 +++++++++++++++++++++++++--- drivers/infiniband/hw/cxgb4/device.c | 1 + 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6fb31bacd5b4..3c3b00e4e7af 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -583,6 +583,22 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep, sizeof(ep->com.mapped_remote_addr)); } +static int get_remote_addr(struct c4iw_ep *ep) +{ + int ret; + + print_addr(&ep->com, __func__, "get_remote_addr"); + + ret = iwpm_get_remote_info(&ep->com.mapped_local_addr, + &ep->com.mapped_remote_addr, + &ep->com.remote_addr, RDMA_NL_C4IW); + if (ret) + pr_info(MOD "Unable to find remote peer addr info - err %d\n", + ret); + + return ret; +} + static void best_mtu(const unsigned short *mtus, unsigned short mtu, unsigned int *idx, int use_ts, int ipv6) { @@ -2352,27 +2368,57 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; + + /* + * The mapped_local and mapped_remote addresses get setup with + * the actual 4-tuple. The local address will be based on the + * actual local address of the connection, but on the port number + * of the parent listening endpoint. The remote address is + * setup based on a query to the IWPM since we don't know what it + * originally was before mapping. If no mapping was done, then + * mapped_remote == remote, and mapped_local == local. + */ if (iptype == 4) { struct sockaddr_in *sin = (struct sockaddr_in *) - &child_ep->com.local_addr; + &child_ep->com.mapped_local_addr; + sin->sin_family = PF_INET; sin->sin_port = local_port; sin->sin_addr.s_addr = *(__be32 *)local_ip; - sin = (struct sockaddr_in *)&child_ep->com.remote_addr; + + sin = (struct sockaddr_in *)&child_ep->com.local_addr; + sin->sin_family = PF_INET; + sin->sin_port = ((struct sockaddr_in *) + &parent_ep->com.local_addr)->sin_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + + sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr; sin->sin_family = PF_INET; sin->sin_port = peer_port; sin->sin_addr.s_addr = *(__be32 *)peer_ip; } else { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &child_ep->com.local_addr; + &child_ep->com.mapped_local_addr; + sin6->sin6_family = PF_INET6; sin6->sin6_port = local_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); - sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; + + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + + sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = peer_port; memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); } + memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr, + sizeof(child_ep->com.remote_addr)); + get_remote_addr(child_ep); + c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 83209bb38285..1ffbd038c0ae 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -93,6 +93,7 @@ static struct ibnl_client_cbs c4iw_nl_cb_table[] = { [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} }; From c1d383b5785b1e0fb5fb862864712a7208219e6a Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Wed, 15 Apr 2015 18:17:56 +0300 Subject: [PATCH 09/18] IB/core: dma map/unmap locking optimizations Currently, while mapping or unmapping pages for ODP, the umem mutex is locked and unlocked once for each page. Such lock/unlock operation take few tens to hundreds of nsecs. This makes a significant impact when mapping or unmapping few MBs of memory. To avoid this, the mutex should be locked only once per operation, and not per page. Signed-off-by: Guy Shapiro Acked-by: Shachar Raindel Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem_odp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 8b8cc6fa0ab0..aba47398880d 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -446,7 +446,6 @@ static int ib_umem_odp_map_dma_single_page( int remove_existing_mapping = 0; int ret = 0; - mutex_lock(&umem->odp_data->umem_mutex); /* * Note: we avoid writing if seq is different from the initial seq, to * handle case of a racing notifier. This check also allows us to bail @@ -479,8 +478,6 @@ static int ib_umem_odp_map_dma_single_page( } out: - mutex_unlock(&umem->odp_data->umem_mutex); - /* On Demand Paging - avoid pinning the page */ if (umem->context->invalidate_range || !stored_page) put_page(page); @@ -586,6 +583,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); user_virt += npages << PAGE_SHIFT; + mutex_lock(&umem->odp_data->umem_mutex); for (j = 0; j < npages; ++j) { ret = ib_umem_odp_map_dma_single_page( umem, k, base_virt_addr, local_page_list[j], @@ -594,6 +592,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, break; k++; } + mutex_unlock(&umem->odp_data->umem_mutex); if (ret < 0) { /* Release left over pages when handling errors. */ @@ -633,9 +632,9 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ + mutex_lock(&umem->odp_data->umem_mutex); for (addr = virt; addr < bound; addr += (u64)umem->page_size) { idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; - mutex_lock(&umem->odp_data->umem_mutex); if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; struct page *head_page = compound_head(page); @@ -663,7 +662,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, umem->odp_data->page_list[idx] = NULL; umem->odp_data->dma_list[idx] = 0; } - mutex_unlock(&umem->odp_data->umem_mutex); } + mutex_unlock(&umem->odp_data->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); From 325ad0617adaf163e32dd2d857b90baf65a25b5b Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Wed, 15 Apr 2015 18:17:57 +0300 Subject: [PATCH 10/18] IB/core: dma unmap optimizations While unmapping an ODP writable page, the dirty bit of the page is set. In order to do so, the head of the compound page is found. Currently, the compound head is found even on non-writable pages, where it is never used, leading to unnecessary cpu barrier that impacts performance. This patch moves the search for the compound head to be done only when needed. Signed-off-by: Guy Shapiro Acked-by: Shachar Raindel Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem_odp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index aba47398880d..40becdb3196e 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -637,7 +637,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; - struct page *head_page = compound_head(page); dma_addr_t dma = umem->odp_data->dma_list[idx]; dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; @@ -645,7 +644,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma & ODP_WRITE_ALLOWED_BIT) + if (dma & ODP_WRITE_ALLOWED_BIT) { + struct page *head_page = compound_head(page); /* * set_page_dirty prefers being called with * the page lock. However, MMU notifiers are @@ -656,6 +656,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * be removed. */ set_page_dirty(head_page); + } /* on demand pinning support */ if (!umem->context->invalidate_range) put_page(page); From 87a26e976cb93e26742224bdd39f51f7861aa9b7 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 21 Apr 2015 14:50:34 -0700 Subject: [PATCH 11/18] IB/qib: add acounting for MTRR There is no good reason not to, we eventually delete it as well. Cc: Toshi Kani Cc: Suresh Siddha Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Juergen Gross Cc: Daniel Vetter Cc: Andy Lutomirski Cc: Dave Airlie Cc: Antonino Daplas Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: Mike Marciniszyn Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Cc: linux-fbdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_wc_x86_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index 81b225f2300a..fe0850ac6883 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -118,7 +118,7 @@ int qib_enable_wc(struct qib_devdata *dd) if (!ret) { int cookie; - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); + cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1); if (cookie < 0) { { qib_devinfo(dd->pcidev, From d4988623cc605131bed8c77f007082c3555c39ee Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 22 Apr 2015 11:38:24 -0700 Subject: [PATCH 12/18] IB/qib: use arch_phys_wc_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver already makes use of ioremap_wc() on PIO buffers, so convert it to use arch_phys_wc_add(). The qib driver uses a mmap() special case for when PAT is not used, this behaviour used to be determined with a module parameter but since we have been asked to just remove that module parameter this checks for the WC cookie, if not set we can assume PAT was used. If its set we do what we used to do for the mmap for when MTRR was enabled. The removal of the module parameter is OK given that Andy notes that even if users of module parameter are still around it will not prevent loading of the module on recent kernels. Cc: Doug Ledford Cc: Toshi Kani Cc: Rickard Strandqvist Cc: Mike Marciniszyn Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: Dennis Dalessandro Cc: Andy Lutomirski Cc: Suresh Siddha Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Juergen Gross Cc: Daniel Vetter Cc: Dave Airlie Cc: Bjorn Helgaas Cc: Antonino Daplas Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: Dave Hansen Cc: Arnd Bergmann Cc: Michael S. Tsirkin Cc: Stefan Bader Cc: konrad.wilk@oracle.com Cc: ville.syrjala@linux.intel.com Cc: david.vrabel@citrix.com Cc: jbeulich@suse.com Cc: Roger Pau Monné Cc: infinipath@intel.com Cc: linux-rdma@vger.kernel.org Cc: linux-fbdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xensource.com Signed-off-by: Luis R. Rodriguez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 1 - drivers/infiniband/hw/qib/qib_file_ops.c | 3 +- drivers/infiniband/hw/qib/qib_iba6120.c | 8 ++--- drivers/infiniband/hw/qib/qib_iba7220.c | 8 ++--- drivers/infiniband/hw/qib/qib_iba7322.c | 41 +++++++++++------------ drivers/infiniband/hw/qib/qib_init.c | 26 ++++---------- drivers/infiniband/hw/qib/qib_wc_x86_64.c | 31 +++-------------- 7 files changed, 39 insertions(+), 79 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index ffd48bfc4923..ba5173e24973 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1136,7 +1136,6 @@ extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; -extern unsigned qib_wc_pat; extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 9ea6c440a00c..725881890c4a 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -835,7 +835,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYREAD; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - if (qib_wc_pat) + /* We used PAT if wc_cookie == 0 */ + if (!dd->wc_cookie) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 0d2ba59af30a..4b927809d1a1 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3315,11 +3315,9 @@ static int init_6120_variables(struct qib_devdata *dd) qib_6120_config_ctxts(dd); qib_set_ctxtcnt(dd); - if (qib_wc_pat) { - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; - } + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; set_6120_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 22affda8af88..00b2af211157 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4126,11 +4126,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) qib_7220_config_ctxts(dd); qib_set_ctxtcnt(dd); /* needed for PAT setup */ - if (qib_wc_pat) { - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; - } + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; set_7220_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ef97b71c8f7d..f32b4628e991 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6429,6 +6429,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) unsigned features, pidx, sbufcnt; int ret, mtu; u32 sbufs, updthresh; + resource_size_t vl15off; /* pport structs are contiguous, allocated after devdata */ ppd = (struct qib_pportdata *)(dd + 1); @@ -6677,29 +6678,27 @@ static int qib_init_7322_variables(struct qib_devdata *dd) qib_7322_config_ctxts(dd); qib_set_ctxtcnt(dd); - if (qib_wc_pat) { - resource_size_t vl15off; - /* - * We do not set WC on the VL15 buffers to avoid - * a rare problem with unaligned writes from - * interrupt-flushed store buffers, so we need - * to map those separately here. We can't solve - * this for the rarely used mtrr case. - */ - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; + /* + * We do not set WC on the VL15 buffers to avoid + * a rare problem with unaligned writes from + * interrupt-flushed store buffers, so we need + * to map those separately here. We can't solve + * this for the rarely used mtrr case. + */ + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; - /* vl15 buffers start just after the 4k buffers */ - vl15off = dd->physaddr + (dd->piobufbase >> 32) + - dd->piobcnt4k * dd->align4k; - dd->piovl15base = ioremap_nocache(vl15off, - NUM_VL15_BUFS * dd->align4k); - if (!dd->piovl15base) { - ret = -ENOMEM; - goto bail; - } + /* vl15 buffers start just after the 4k buffers */ + vl15off = dd->physaddr + (dd->piobufbase >> 32) + + dd->piobcnt4k * dd->align4k; + dd->piovl15base = ioremap_nocache(vl15off, + NUM_VL15_BUFS * dd->align4k); + if (!dd->piovl15base) { + ret = -ENOMEM; + goto bail; } + qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 2ee36953e234..7e00470adc30 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -91,15 +91,6 @@ MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); unsigned qib_cc_table_size; module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); -/* - * qib_wc_pat parameter: - * 0 is WC via MTRR - * 1 is WC via PAT - * If PAT initialization fails, code reverts back to MTRR - */ -unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ -module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); -MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); static void verify_interrupt(unsigned long); @@ -1377,8 +1368,7 @@ static void cleanup_device_data(struct qib_devdata *dd) spin_unlock(&dd->pport[pidx].cc_shadow_lock); } - if (!qib_wc_pat) - qib_disable_wc(dd); + qib_disable_wc(dd); if (dd->pioavailregs_dma) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, @@ -1547,14 +1537,12 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - if (!qib_wc_pat) { - ret = qib_enable_wc(dd); - if (ret) { - qib_dev_err(dd, - "Write combining not enabled (err %d): performance may be poor\n", - -ret); - ret = 0; - } + ret = qib_enable_wc(dd); + if (ret) { + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); + ret = 0; } qib_verify_pioperf(dd); diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index fe0850ac6883..6d61ef98721c 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -116,21 +116,9 @@ int qib_enable_wc(struct qib_devdata *dd) } if (!ret) { - int cookie; - - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1); - if (cookie < 0) { - { - qib_devinfo(dd->pcidev, - "mtrr_add() WC for PIO bufs failed (%d)\n", - cookie); - ret = -EINVAL; - } - } else { - dd->wc_cookie = cookie; - dd->wc_base = (unsigned long) pioaddr; - dd->wc_len = (unsigned long) piolen; - } + dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen); + if (dd->wc_cookie < 0) + ret = -EINVAL; } return ret; @@ -142,18 +130,7 @@ int qib_enable_wc(struct qib_devdata *dd) */ void qib_disable_wc(struct qib_devdata *dd) { - if (dd->wc_cookie) { - int r; - - r = mtrr_del(dd->wc_cookie, dd->wc_base, - dd->wc_len); - if (r < 0) - qib_devinfo(dd->pcidev, - "mtrr_del(%lx, %lx, %lx) failed: %d\n", - dd->wc_cookie, dd->wc_base, - dd->wc_len, r); - dd->wc_cookie = 0; /* even on failure */ - } + arch_phys_wc_del(dd->wc_cookie); } /** From 471e70583217728955436a3fa6e5201e5c8c296a Mon Sep 17 00:00:00 2001 From: Honggang LI Date: Wed, 29 Apr 2015 17:40:44 +0800 Subject: [PATCH 13/18] IB/core: change rdma_gid2ip into void function as it always return zero Signed-off-by: Honggang Li Acked-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 13 +++---------- include/rdma/ib_addr.h | 3 +-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f80da50d84a5..38339d220d7f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -472,13 +472,8 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, } sgid_addr, dgid_addr; - ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); - if (ret) - return ret; - - ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); - if (ret) - return ret; + rdma_gid2ip(&sgid_addr._sockaddr, sgid); + rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); @@ -512,10 +507,8 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) struct sockaddr_in6 _sockaddr_in6; } gid_addr; - ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + rdma_gid2ip(&gid_addr._sockaddr, sgid); - if (ret) - return ret; memset(&dev_addr, 0, sizeof(dev_addr)); ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); if (ret) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ce55906b54a0..ac54c27a2bfd 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -160,7 +160,7 @@ static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) } /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ -static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) +static inline void rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) { if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { struct sockaddr_in *out_in = (struct sockaddr_in *)out; @@ -173,7 +173,6 @@ static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) out_in->sin6_family = AF_INET6; memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); } - return 0; } static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, From 0d0f738f6a11856a704dcd8fd3a008b200f17625 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 3 May 2015 09:48:26 -0400 Subject: [PATCH 14/18] IB/core: Fix unaligned accesses Addresses the following kernel logs seen during boot of sparc systems: Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Signed-off-by: David Ahern Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 23 +++++++++++------------ drivers/infiniband/core/cm_msgs.h | 4 ++-- include/rdma/ib_cm.h | 7 ++++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index e28a494e2a3a..0c1419105ff0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -437,39 +437,38 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) return cm_id_priv; } -static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask) +static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask) { int i; - for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++) - ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] & - ((unsigned long *) mask)[i]; + for (i = 0; i < IB_CM_COMPARE_SIZE; i++) + dst[i] = src[i] & mask[i]; } static int cm_compare_data(struct ib_cm_compare_data *src_data, struct ib_cm_compare_data *dst_data) { - u8 src[IB_CM_COMPARE_SIZE]; - u8 dst[IB_CM_COMPARE_SIZE]; + u32 src[IB_CM_COMPARE_SIZE]; + u32 dst[IB_CM_COMPARE_SIZE]; if (!src_data || !dst_data) return 0; cm_mask_copy(src, src_data->data, dst_data->mask); cm_mask_copy(dst, dst_data->data, src_data->mask); - return memcmp(src, dst, IB_CM_COMPARE_SIZE); + return memcmp(src, dst, sizeof(src)); } -static int cm_compare_private_data(u8 *private_data, +static int cm_compare_private_data(u32 *private_data, struct ib_cm_compare_data *dst_data) { - u8 src[IB_CM_COMPARE_SIZE]; + u32 src[IB_CM_COMPARE_SIZE]; if (!dst_data) return 0; cm_mask_copy(src, private_data, dst_data->mask); - return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); + return memcmp(src, dst_data->data, sizeof(src)); } /* @@ -538,7 +537,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) static struct cm_id_private * cm_find_listen(struct ib_device *device, __be64 service_id, - u8 *private_data) + u32 *private_data) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -953,7 +952,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, cm_mask_copy(cm_id_priv->compare_data->data, compare_data->data, compare_data->mask); memcpy(cm_id_priv->compare_data->mask, compare_data->mask, - IB_CM_COMPARE_SIZE); + sizeof(compare_data->mask)); } cm_id->state = IB_CM_LISTEN; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index be068f47e47e..8b76f0ef965e 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -103,7 +103,7 @@ struct cm_req_msg { /* local ACK timeout:5, rsvd:3 */ u8 alt_offset139; - u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE]; + u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); @@ -801,7 +801,7 @@ struct cm_sidr_req_msg { __be16 rsvd; __be64 service_id; - u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE]; + u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); struct cm_sidr_rep_msg { diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 0e3ff30647d5..39ed2d2fbd51 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -105,7 +105,8 @@ enum ib_cm_data_size { IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, IB_CM_SIDR_REP_INFO_LENGTH = 72, - IB_CM_COMPARE_SIZE = 64 + /* compare done u32 at a time */ + IB_CM_COMPARE_SIZE = (64 / sizeof(u32)) }; struct ib_cm_id; @@ -337,8 +338,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) struct ib_cm_compare_data { - u8 data[IB_CM_COMPARE_SIZE]; - u8 mask[IB_CM_COMPARE_SIZE]; + u32 data[IB_CM_COMPARE_SIZE]; + u32 mask[IB_CM_COMPARE_SIZE]; }; /** From 179d03bbfd2ebc63934753a696467d28bf9f5b64 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Tue, 5 May 2015 03:55:24 +0530 Subject: [PATCH 15/18] iw_cxgb4: Remove negative advice dmesg warnings Remove these log messages in favor of per-endpoint counters as well as device-global counters that can be inspected via debugfs. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 27 +++++++++++++++++--------- drivers/infiniband/hw/cxgb4/device.c | 7 +++++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 7 +++++++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3c3b00e4e7af..bb95a6c0477b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2058,9 +2058,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) status, status2errno(status)); if (is_neg_adv(status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Connection problems for atid %u status %u (%s)\n", - atid, status, neg_adv_str(status)); + PDBG("%s Connection problems for atid %u status %u (%s)\n", + __func__, atid, status, neg_adv_str(status)); + ep->stats.connect_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); return 0; } @@ -2566,9 +2569,13 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); if (is_neg_adv(req->status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Negative advice on abort - tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, + neg_adv_str(req->status)); + ep->stats.abort_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); return 0; } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, @@ -3977,9 +3984,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } if (is_neg_adv(req->status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Negative advice on abort - tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, + neg_adv_str(req->status)); + ep->stats.abort_neg_adv++; + dev->rdev.stats.neg_adv++; kfree_skb(skb); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 1ffbd038c0ae..cf54d6922dc4 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -490,6 +490,7 @@ static int stats_show(struct seq_file *seq, void *v) dev->rdev.stats.act_ofld_conn_fails); seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.pas_ofld_conn_fails); + seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv); seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird); return 0; } @@ -561,10 +562,13 @@ static int dump_ep(int id, void *p, void *data) cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " "%pI4:%d/%d <-> %pI4:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, &lsin->sin_addr, ntohs(lsin->sin_port), ntohs(mapped_lsin->sin_port), &rsin->sin_addr, ntohs(rsin->sin_port), @@ -582,10 +586,13 @@ static int dump_ep(int id, void *p, void *data) cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " "%pI6:%d/%d <-> %pI6:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, &lsin6->sin6_addr, ntohs(lsin6->sin6_port), ntohs(mapped_lsin6->sin6_port), &rsin6->sin6_addr, ntohs(rsin6->sin6_port), diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index d87e1650f643..97bb5550a6cf 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -137,6 +137,7 @@ struct c4iw_stats { u64 tcam_full; u64 act_ofld_conn_fails; u64 pas_ofld_conn_fails; + u64 neg_adv; }; struct c4iw_hw_queue { @@ -814,6 +815,11 @@ struct c4iw_listen_ep { int backlog; }; +struct c4iw_ep_stats { + unsigned connect_neg_adv; + unsigned abort_neg_adv; +}; + struct c4iw_ep { struct c4iw_ep_common com; struct c4iw_ep *parent_ep; @@ -846,6 +852,7 @@ struct c4iw_ep { unsigned int retry_count; int snd_win; int rcv_win; + struct c4iw_ep_stats stats; }; static inline void print_addr(struct c4iw_ep_common *epc, const char *func, From 8f71c1a27b84948720be17fffba71a67a1f0942d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 5 May 2015 13:01:39 +0200 Subject: [PATCH 16/18] IPoIB/CM: Fix indentation level See also patch "IPoIB/cm: Add connected mode support for devices without SRQs" (commit ID 68e995a29572). Detected by smatch. Signed-off-by: Bart Van Assche Cc: Pradeep Satyanarayana Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 56959adb6c7d..cf32a778e7d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -386,8 +386,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i rx->rx_ring[i].mapping, GFP_KERNEL)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); - ret = -ENOMEM; - goto err_count; + ret = -ENOMEM; + goto err_count; } ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); if (ret) { From 954138dc25dca0263f315c5a3450059df05a4ea1 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 4 May 2015 14:31:03 +0200 Subject: [PATCH 17/18] MAINTAINERS: add include/rdma/ to InfiniBand subsystem Most headers for InfiniBand/RDMA are located under include/rdma/ and include/uapi/rdma. Signed-off-by: Yann Droneaud Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 781e099495d3..c80714ad3114 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5053,6 +5053,8 @@ S: Supported F: Documentation/infiniband/ F: drivers/infiniband/ F: include/uapi/linux/if_infiniband.h +F: include/uapi/rdma/ +F: include/rdma/ INOTIFY M: John McCutchan From b6b2bbe65b2117afd9766faa7cffea4d8f681455 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Tue, 5 May 2015 12:57:09 -0400 Subject: [PATCH 18/18] MAINTAINERS: Update InfiniBand subsystem maintainer Since Roland stepped down, the community asked me to take his place, and the nomination was followed by sufficient votes and no dissensions that we can move forward with the change. Signed-off-by: Doug Ledford --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c80714ad3114..40c14f14204d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5042,13 +5042,13 @@ S: Orphan F: drivers/video/fbdev/imsttfb.c INFINIBAND SUBSYSTEM -M: Roland Dreier +M: Doug Ledford M: Sean Hefty M: Hal Rosenstock L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org/ Q: http://patchwork.kernel.org/project/linux-rdma/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git +T: git git://github.com/dledford/linux.git S: Supported F: Documentation/infiniband/ F: drivers/infiniband/