From 675a027c3db25a439f6ea744bb0c284f983dbfb9 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Fri, 29 Sep 2006 11:47:06 -0700 Subject: [PATCH 1/8] RDMA/cma: Fix leak of cm_ids in case of failures cma_connect_ib() and cma_connect_iw() leak cm_id's in failure cases. Signed-off-by: Krishna Kumar Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1178bd434d1b..69bb0892e887 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1862,6 +1862,11 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: + if (ret && !IS_ERR(id_priv->cm_id.ib)) { + ib_destroy_cm_id(id_priv->cm_id.ib); + id_priv->cm_id.ib = NULL; + } + kfree(private_data); return ret; } @@ -1889,10 +1894,8 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, cm_id->remote_addr = *sin; ret = cma_modify_qp_rtr(&id_priv->id); - if (ret) { - iw_destroy_cm_id(cm_id); - return ret; - } + if (ret) + goto out; iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; @@ -1904,6 +1907,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, iw_param.qpn = conn_param->qp_num; ret = iw_cm_connect(cm_id, &iw_param); out: + if (ret && !IS_ERR(cm_id)) { + iw_destroy_cm_id(cm_id); + id_priv->cm_id.iw = NULL; + } return ret; } From 6e35aabee125999f4b3c01326f5339fa74a89259 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Fri, 29 Sep 2006 11:51:49 -0700 Subject: [PATCH 2/8] RDMA/cma: Fix device removal race The race is as follows: A process : cma_process_remove() calls cma_remove_id_dev(), which sets id state to CMA_DEVICE_REMOVAL and calls wait_event(dev_remove). B process : cma_req_handler() had incremented dev_remove, and calls cma_acquire_ib_dev() and on failure calls cma_release_remove(), which does a wake_up of cma_process_remove(). Then cma_req_handler() calls rdma_destroy_id(); A Process : cma_remove_id_dev() gets woken and checks the state of id, and since it is still (wrongly) CMA_DEVICE_REMOVAL, it calls notify_user(id) and if that fails, the caller - cma_process_remove() calls rdma_destroy_id(id). Two processes can call rdma_destroy_id(), resulting in one de-referencing kfreed id_priv. Fix is for process B to set CMA_DESTROYING in cma_req_handler() so that process A will return instead of doing a rdma_destroy_id(). Signed-off-by: Krishna Kumar Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 69bb0892e887..f383a4f50ab0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -932,6 +932,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) mutex_unlock(&lock); if (ret) { ret = -ENODEV; + cma_exch(conn_id, CMA_DESTROYING); cma_release_remove(conn_id); rdma_destroy_id(&conn_id->id); goto out; From 8f0472d331619d5d74927978d0dde5b4935e41a5 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Fri, 29 Sep 2006 11:57:09 -0700 Subject: [PATCH 3/8] RDMA/cma: Set status correctly on route resolution error On reporting a route error, also include the status for the error, rather than indicating a status of 0 when an error has occurred. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f383a4f50ab0..d10fdf1419b1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1308,6 +1308,7 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, work->old_state = CMA_ROUTE_QUERY; work->new_state = CMA_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + work->event.status = status; } queue_work(cma_wq, &work->work); From 94de178ac636e9d6f89b11cb3dd400b777942ac9 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Fri, 29 Sep 2006 12:03:35 -0700 Subject: [PATCH 4/8] RDMA/cma: Eliminate unnecessary remove_list Eliminate remove_list by using list_del_init() instead during device removal handling. Signed-off-by: Krishna Kumar Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d10fdf1419b1..3982b81d33cf 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2151,12 +2151,9 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) static void cma_process_remove(struct cma_device *cma_dev) { - struct list_head remove_list; struct rdma_id_private *id_priv; int ret; - INIT_LIST_HEAD(&remove_list); - mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { id_priv = list_entry(cma_dev->id_list.next, @@ -2167,8 +2164,7 @@ static void cma_process_remove(struct cma_device *cma_dev) continue; } - list_del(&id_priv->list); - list_add_tail(&id_priv->list, &remove_list); + list_del_init(&id_priv->list); atomic_inc(&id_priv->refcount); mutex_unlock(&lock); From 3f168d2b66d2314fea40614a3b966c1a0b6241a9 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Fri, 29 Sep 2006 12:09:51 -0700 Subject: [PATCH 5/8] RDMA/cma: Optimize error handling Reorganize code relating to cma_get_net_info() and rdam_create_id() to optimize error case handling (no need to alloc memory/etc. as part of rdma_create_id() if input parameters are wrong). Signed-off-by: Krishna Kumar Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3982b81d33cf..9ae4f3a67c70 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -874,23 +874,25 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, __u16 port; u8 ip_ver; - id = rdma_create_id(listen_id->event_handler, listen_id->context, - listen_id->ps); - if (IS_ERR(id)) - return NULL; - - rt = &id->route; - rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; - rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); - if (!rt->path_rec) - goto err; - if (cma_get_net_info(ib_event->private_data, listen_id->ps, &ip_ver, &port, &src, &dst)) goto err; + id = rdma_create_id(listen_id->event_handler, listen_id->context, + listen_id->ps); + if (IS_ERR(id)) + goto err; + cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); + + rt = &id->route; + rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, + GFP_KERNEL); + if (!rt->path_rec) + goto destroy_id; + rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; @@ -903,8 +905,10 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; return id_priv; -err: + +destroy_id: rdma_destroy_id(id); +err: return NULL; } From 13b18c86176cab34ef30ef0a5962fcb0305f7269 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 29 Sep 2006 14:37:51 -0700 Subject: [PATCH 6/8] IB/ipath: Fix RDMA reads The PSN used to generate the request following a RDMA read was incorrect and some state booking wasn't maintained correctly. This patch fixes that. Signed-off-by: Ralph Campbell Signed-off-by: Bryan O'Sullivan --- drivers/infiniband/hw/ipath/ipath_rc.c | 59 ++++++++++++++------------ 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index a504cf67f272..ce6038743c5c 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -241,10 +241,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, * original work request since we may need to resend * it. */ - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - qp->s_len = len = wqe->length; + len = wqe->length; ss = &qp->s_sge; bth2 = 0; switch (wqe->wr.opcode) { @@ -368,14 +365,23 @@ int ipath_make_rc_req(struct ipath_qp *qp, default: goto done; } + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_len = wqe->length; if (newreq) { qp->s_tail++; if (qp->s_tail >= qp->s_size) qp->s_tail = 0; } - bth2 |= qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; + bth2 |= qp->s_psn & IPATH_PSN_MASK; + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_psn = wqe->lpsn + 1; + else { + qp->s_psn++; + if ((int)(qp->s_psn - qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + } /* * Put the QP on the pending list so lost ACKs will cause * a retry. More than one request can be pending so the @@ -690,13 +696,6 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_ibdev *dev; - /* - * If there are no requests pending, we are done. - */ - if (ipath_cmp24(psn, qp->s_next_psn) >= 0 || - qp->s_last == qp->s_tail) - goto done; - if (qp->s_retry == 0) { wc->wr_id = wqe->wr.wr_id; wc->status = IB_WC_RETRY_EXC_ERR; @@ -731,8 +730,6 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) dev->n_rc_resends += (int)qp->s_psn - (int)psn; reset_psn(qp, psn); - -done: tasklet_hi_schedule(&qp->s_task); bail: @@ -765,6 +762,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) struct ib_wc wc; struct ipath_swqe *wqe; int ret = 0; + u32 ack_psn; /* * Remove the QP from the timeout queue (or RNR timeout queue). @@ -777,26 +775,26 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); + /* Nothing is pending to ACK/NAK. */ + if (unlikely(qp->s_last == qp->s_tail)) + goto bail; + /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued * before the NAK'ed request. The MSN won't include the NAK'ed * request but will include an ACK'ed request(s). */ + ack_psn = psn; + if (aeth >> 29) + ack_psn--; wqe = get_swqe_ptr(qp, qp->s_last); - /* Nothing is pending to ACK/NAK. */ - if (qp->s_last == qp->s_tail) - goto bail; - /* * The MSN might be for a later WQE than the PSN indicates so * only complete WQEs that the PSN finishes. */ - while (ipath_cmp24(psn, wqe->lpsn) >= 0) { - /* If we are ACKing a WQE, the MSN should be >= the SSN. */ - if (ipath_cmp24(aeth, wqe->ssn) < 0) - break; + while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) { /* * If this request is a RDMA read or atomic, and the ACK is * for a later operation, this ACK NAKs the RDMA read or @@ -807,7 +805,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * is sent but before the response is received. */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && - opcode != OP(RDMA_READ_RESPONSE_LAST)) || + (opcode != OP(RDMA_READ_RESPONSE_LAST) || + ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || @@ -825,6 +824,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ goto bail; } + if (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + tasklet_hi_schedule(&qp->s_task); /* Post a send completion queue entry if requested. */ if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { @@ -1055,7 +1058,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (qp->s_last != qp->s_tail) + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } rdma_read: @@ -1091,7 +1095,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* ACKs READ req. */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (qp->s_last != qp->s_tail) + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } /* FALLTHROUGH */ From 0f248d9cde673a481eb3182909b54d07e9d58f72 Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Mon, 2 Oct 2006 14:52:17 -0700 Subject: [PATCH 7/8] IB/ehca: Fix device registration Move the call to ib_register_device() later, since a device should not be registered until it is completely read to be used. This fixes crashes that occur if an upper-layer driver such as IPoIB is loaded before the ehca module. Signed-off-by: Hoang-Nam Nguyen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 36 ++++++++++++++------------ 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 2380994418a5..024d511c4b58 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -49,7 +49,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0016"); +MODULE_VERSION("SVNEHCA_0017"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -239,7 +239,7 @@ init_node_guid1: return ret; } -int ehca_register_device(struct ehca_shca *shca) +int ehca_init_device(struct ehca_shca *shca) { int ret; @@ -317,11 +317,6 @@ int ehca_register_device(struct ehca_shca *shca) /* shca->ib_device.process_mad = ehca_process_mad; */ shca->ib_device.mmap = ehca_mmap; - ret = ib_register_device(&shca->ib_device); - if (ret) - ehca_err(&shca->ib_device, - "ib_register_device() failed ret=%x", ret); - return ret; } @@ -561,9 +556,9 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, goto probe1; } - ret = ehca_register_device(shca); + ret = ehca_init_device(shca); if (ret) { - ehca_gen_err("Cannot register Infiniband device"); + ehca_gen_err("Cannot init ehca device struct"); goto probe1; } @@ -571,7 +566,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); if (ret) { ehca_err(&shca->ib_device, "Cannot create EQ."); - goto probe2; + goto probe1; } ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); @@ -600,6 +595,13 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, goto probe5; } + ret = ib_register_device(&shca->ib_device); + if (ret) { + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%x", ret); + goto probe6; + } + /* create AQP1 for port 1 */ if (ehca_open_aqp1 == 1) { shca->sport[0].port_state = IB_PORT_DOWN; @@ -607,7 +609,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, if (ret) { ehca_err(&shca->ib_device, "Cannot create AQP1 for port 1."); - goto probe6; + goto probe7; } } @@ -618,7 +620,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, if (ret) { ehca_err(&shca->ib_device, "Cannot create AQP1 for port 2."); - goto probe7; + goto probe8; } } @@ -630,12 +632,15 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, return 0; -probe7: +probe8: ret = ehca_destroy_aqp1(&shca->sport[0]); if (ret) ehca_err(&shca->ib_device, "Cannot destroy AQP1 for port 1. ret=%x", ret); +probe7: + ib_unregister_device(&shca->ib_device); + probe6: ret = ehca_dereg_internal_maxmr(shca); if (ret) @@ -660,9 +665,6 @@ probe3: ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret); -probe2: - ib_unregister_device(&shca->ib_device); - probe1: ib_dealloc_device(&shca->ib_device); @@ -750,7 +752,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0016)\n"); + "(Rel.: SVNEHCA_0017)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); From e5a010690141ab805b059ba10f7401b80e0be831 Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Mon, 2 Oct 2006 14:52:17 -0700 Subject: [PATCH 8/8] IB/ehca: Tweak trace message format Add an extra space to make things more readable. Signed-off-by: Hoang-Nam Nguyen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_tools.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 9f56bb846d93..809da3ef706b 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -117,7 +117,7 @@ extern int ehca_debug_level; unsigned int l = (unsigned int)(len); \ unsigned char *deb = (unsigned char*)(adr); \ for (x = 0; x < l; x += 16) { \ - printk("EHCA_DMP:%s" format \ + printk("EHCA_DMP:%s " format \ " adr=%p ofs=%04x %016lx %016lx\n", \ __FUNCTION__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \