From cc18b939e1efbc2a47f62dbd2b1df53d974df6b7 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 24 Apr 2014 14:31:53 -0500 Subject: [PATCH 1/4] RDMA/cxgb4: Fix endpoint mutex deadlocks In cases where the cm calls c4iw_modify_rc_qp() with the endpoint mutex held, they must be called with internal == 1. rx_data() and process_mpa_reply() are not doing this. This causes a deadlock because c4iw_modify_rc_qp() might call c4iw_ep_disconnect() in some !internal cases, and c4iw_ep_disconnect() acquires the endpoint mutex. The design was intended to only do the disconnect for !internal calls. Change rx_data(), FPDU_MODE case, to call c4iw_modify_rc_qp() with internal == 1, and then disconnect only after releasing the mutex. Change process_mpa_reply() to call c4iw_modify_rc_qp(TERMINATE) with internal == 1 and set a new attr flag telling it to send a TERMINATE message. Previously this was implied by !internal. Change process_mpa_reply() to return whether the caller should disconnect after releasing the endpoint mutex. Now rx_data() will do the disconnect in the cases where process_mpa_reply() wants to disconnect after the TERMINATE is sent. Change c4iw_modify_rc_qp() RTS->TERM to only disconnect if !internal, and to send a TERMINATE message if attrs->send_term is 1. Change abort_connection() to not aquire the ep mutex for setting the state, and make all calls to abort_connection() do so with the mutex held. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 31 +++++++++++++++++--------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/qp.c | 9 ++++---- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 185452abf32c..f9b04bc7e602 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -996,7 +996,7 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -1154,7 +1154,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) return credits; } -static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) +static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; @@ -1164,6 +1164,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; + int disconnect = 0; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); @@ -1173,7 +1174,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * will abort the connection. */ if (stop_ep_timer(ep)) - return; + return 0; /* * If we get more than the supported amount of private data @@ -1195,7 +1196,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * if we don't even have the mpa message, then bail. */ if (ep->mpa_pkt_len < sizeof(*mpa)) - return; + return 0; mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ @@ -1235,7 +1236,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; + return 0; if (mpa->flags & MPA_REJECT) { err = -ECONNREFUSED; @@ -1337,9 +1338,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_NOMATCH_RTR; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } @@ -1355,9 +1358,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_INSUFF_IRD; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } goto out; @@ -1366,7 +1371,7 @@ err: send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); - return; + return disconnect; } static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) @@ -1524,6 +1529,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; __u8 status = hdr->status; + int disconnect = 0; ep = lookup_tid(t, tid); if (!ep) @@ -1539,7 +1545,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) switch (ep->com.state) { case MPA_REQ_SENT: ep->rcv_seq += dlen; - process_mpa_reply(ep, skb); + disconnect = process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: ep->rcv_seq += dlen; @@ -1555,13 +1561,16 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) ep->com.state, ep->hwtid, status); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + disconnect = 1; break; } default: break; } mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); return 0; } @@ -3482,9 +3491,9 @@ static void process_timeout(struct c4iw_ep *ep) __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) abort_connection(ep, NULL, GFP_KERNEL); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7b8c5806a09d..7474b490760a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -435,6 +435,7 @@ struct c4iw_qp_attributes { u8 ecode; u16 sq_db_inc; u16 rq_db_inc; + u8 send_term; }; struct c4iw_qp { diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 7b5114cb486f..f18ef34e8184 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1388,11 +1388,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; ep = qhp->ep; - disconnect = 1; - c4iw_get_ep(&qhp->ep->com); - if (!internal) + if (!internal) { + c4iw_get_ep(&qhp->ep->com); terminate = 1; - else { + disconnect = 1; + } else { + terminate = qhp->attr.send_term; ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; From 92e5011ab0e073ab8fbb726c11529021e5e63973 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 24 Apr 2014 14:31:59 -0500 Subject: [PATCH 2/4] RDMA/cxgb4: Force T5 connections to use TAHOE congestion control This is required to work around a T5 HW issue. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 8 ++++++++ drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index f9b04bc7e602..1f863a96a480 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -587,6 +587,10 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { @@ -2018,6 +2022,10 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN(1); } + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index dc193c292671..6121ca08fe58 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -836,4 +836,18 @@ struct ulptx_idata { #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) #define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) +enum { /* TCP congestion control algorithms */ + CONG_ALG_RENO, + CONG_ALG_TAHOE, + CONG_ALG_NEWRENO, + CONG_ALG_HIGHSPEED +}; + +#define S_CONG_CNTRL 14 +#define M_CONG_CNTRL 0x3 +#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) +#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) + +#define T5_OPT_2_VALID (1 << 31) + #endif /* _T4FW_RI_API_H_ */ From c2f9da92f2fd6dbf8f40ef4d5e00db688cc0416a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 24 Apr 2014 14:32:04 -0500 Subject: [PATCH 3/4] RDMA/cxgb4: Only allow kernel db ringing for T4 devs The whole db drop avoidance stuff is for T4 only. So we cannot allow that to be enabled for T5 devices. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f18ef34e8184..086f62f5dc9e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1777,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /* * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for * ringing the queue db when we're in DB_FULL mode. + * Only allow this on T4 devices. */ attrs.sq_db_inc = attr->sq_psn; attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) + return -EINVAL; return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } From 7d0a73a40c5ceb7524aa6a43f108de0dd8dbe3f0 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Sat, 26 Apr 2014 00:51:18 +0530 Subject: [PATCH 4/4] RDMA/cxgb4: Update Kconfig to include Chelsio T5 adapter Signed-off-by: Hariprasad Shenai Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index d4e8983fba53..23f38cf2c5cd 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_CXGB4 - tristate "Chelsio T4 RDMA Driver" + tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) select GENERIC_ALLOCATOR ---help--- - This is an iWARP/RDMA driver for the Chelsio T4 1GbE and - 10GbE adapters. + This is an iWARP/RDMA driver for the Chelsio T4 and T5 + 1GbE, 10GbE adapters and T5 40GbE adapter. For general information about Chelsio and our products, visit our website at .