From 21f9024355e58772ec5d7fc3534aa5e29d72a8b6 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 7 Apr 2020 11:02:28 +0000 Subject: [PATCH] nvmet-rdma: fix double free of rdma queue In case rdma accept fails at nvmet_rdma_queue_connect(), release work is scheduled. Later on, a new RDMA CM event may arrive since we didn't destroy the cm-id and call nvmet_rdma_queue_connect_fail(), which schedule another release work. This will cause calling nvmet_rdma_free_queue twice. To fix this we implicitly destroy the cm_id with non-zero ret code, which guarantees that new rdma_cm events will not arrive afterwards. Also add a qp pointer to nvmet_rdma_queue structure, so we can use it when the cm_id pointer is NULL or was destroyed. Signed-off-by: Israel Rukshin Suggested-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f78201421978..ab867f32fb0d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state { struct nvmet_rdma_queue { struct rdma_cm_id *cm_id; + struct ib_qp *qp; struct nvmet_port *port; struct ib_cq *cq; atomic_t sq_wr_avail; @@ -474,7 +475,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, if (ndev->srq) ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); else - ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); + ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); if (unlikely(ret)) pr_err("post_recv cmd failed\n"); @@ -513,7 +514,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); if (rsp->n_rdma) { - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, queue->cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); } @@ -597,7 +598,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, queue->cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->n_rdma = 0; @@ -752,7 +753,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) } if (nvmet_rdma_need_data_in(rsp)) { - if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp, + if (rdma_rw_ctx_post(&rsp->rw, queue->qp, queue->cm_id->port_num, &rsp->read_cqe, NULL)) nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); } else { @@ -1038,6 +1039,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) pr_err("failed to create_qp ret= %d\n", ret); goto err_destroy_cq; } + queue->qp = queue->cm_id->qp; atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); @@ -1066,11 +1068,10 @@ err_destroy_cq: static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { - struct ib_qp *qp = queue->cm_id->qp; - - ib_drain_qp(qp); - rdma_destroy_id(queue->cm_id); - ib_destroy_qp(qp); + ib_drain_qp(queue->qp); + if (queue->cm_id) + rdma_destroy_id(queue->cm_id); + ib_destroy_qp(queue->qp); ib_free_cq(queue->cq); } @@ -1305,9 +1306,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - schedule_work(&queue->release_work); - /* Destroying rdma_cm id is not needed here */ - return 0; + /* + * Don't destroy the cm_id in free path, as we implicitly + * destroy the cm_id here with non-zero ret code. + */ + queue->cm_id = NULL; + goto free_queue; } mutex_lock(&nvmet_rdma_queue_mutex); @@ -1316,6 +1320,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, return 0; +free_queue: + nvmet_rdma_free_queue(queue); put_device: kref_put(&ndev->ref, nvmet_rdma_free_dev);