nvmet-rdma: fix double free of rdma queue
In case rdma accept fails at nvmet_rdma_queue_connect(), release work is scheduled. Later on, a new RDMA CM event may arrive since we didn't destroy the cm-id and call nvmet_rdma_queue_connect_fail(), which schedule another release work. This will cause calling nvmet_rdma_free_queue twice. To fix this we implicitly destroy the cm_id with non-zero ret code, which guarantees that new rdma_cm events will not arrive afterwards. Also add a qp pointer to nvmet_rdma_queue structure, so we can use it when the cm_id pointer is NULL or was destroyed. Signed-off-by: Israel Rukshin <israelr@mellanox.com> Suggested-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Max Gurtovoy <maxg@mellanox.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
8c5c660529
commit
21f9024355
@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state {
|
|||||||
|
|
||||||
struct nvmet_rdma_queue {
|
struct nvmet_rdma_queue {
|
||||||
struct rdma_cm_id *cm_id;
|
struct rdma_cm_id *cm_id;
|
||||||
|
struct ib_qp *qp;
|
||||||
struct nvmet_port *port;
|
struct nvmet_port *port;
|
||||||
struct ib_cq *cq;
|
struct ib_cq *cq;
|
||||||
atomic_t sq_wr_avail;
|
atomic_t sq_wr_avail;
|
||||||
@ -474,7 +475,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
|
|||||||
if (ndev->srq)
|
if (ndev->srq)
|
||||||
ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
|
ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
|
||||||
else
|
else
|
||||||
ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL);
|
ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
|
||||||
|
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
pr_err("post_recv cmd failed\n");
|
pr_err("post_recv cmd failed\n");
|
||||||
@ -513,7 +514,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
|
|||||||
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
|
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
|
||||||
|
|
||||||
if (rsp->n_rdma) {
|
if (rsp->n_rdma) {
|
||||||
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
|
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
|
||||||
queue->cm_id->port_num, rsp->req.sg,
|
queue->cm_id->port_num, rsp->req.sg,
|
||||||
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
|
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
|
||||||
}
|
}
|
||||||
@ -597,7 +598,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
|
|||||||
|
|
||||||
WARN_ON(rsp->n_rdma <= 0);
|
WARN_ON(rsp->n_rdma <= 0);
|
||||||
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
|
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
|
||||||
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
|
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
|
||||||
queue->cm_id->port_num, rsp->req.sg,
|
queue->cm_id->port_num, rsp->req.sg,
|
||||||
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
|
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
|
||||||
rsp->n_rdma = 0;
|
rsp->n_rdma = 0;
|
||||||
@ -752,7 +753,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (nvmet_rdma_need_data_in(rsp)) {
|
if (nvmet_rdma_need_data_in(rsp)) {
|
||||||
if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp,
|
if (rdma_rw_ctx_post(&rsp->rw, queue->qp,
|
||||||
queue->cm_id->port_num, &rsp->read_cqe, NULL))
|
queue->cm_id->port_num, &rsp->read_cqe, NULL))
|
||||||
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
|
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
|
||||||
} else {
|
} else {
|
||||||
@ -1038,6 +1039,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
|
|||||||
pr_err("failed to create_qp ret= %d\n", ret);
|
pr_err("failed to create_qp ret= %d\n", ret);
|
||||||
goto err_destroy_cq;
|
goto err_destroy_cq;
|
||||||
}
|
}
|
||||||
|
queue->qp = queue->cm_id->qp;
|
||||||
|
|
||||||
atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
|
atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
|
||||||
|
|
||||||
@ -1066,11 +1068,10 @@ err_destroy_cq:
|
|||||||
|
|
||||||
static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
|
static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
|
||||||
{
|
{
|
||||||
struct ib_qp *qp = queue->cm_id->qp;
|
ib_drain_qp(queue->qp);
|
||||||
|
if (queue->cm_id)
|
||||||
ib_drain_qp(qp);
|
rdma_destroy_id(queue->cm_id);
|
||||||
rdma_destroy_id(queue->cm_id);
|
ib_destroy_qp(queue->qp);
|
||||||
ib_destroy_qp(qp);
|
|
||||||
ib_free_cq(queue->cq);
|
ib_free_cq(queue->cq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1305,9 +1306,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
|
|||||||
|
|
||||||
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
|
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
schedule_work(&queue->release_work);
|
/*
|
||||||
/* Destroying rdma_cm id is not needed here */
|
* Don't destroy the cm_id in free path, as we implicitly
|
||||||
return 0;
|
* destroy the cm_id here with non-zero ret code.
|
||||||
|
*/
|
||||||
|
queue->cm_id = NULL;
|
||||||
|
goto free_queue;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&nvmet_rdma_queue_mutex);
|
mutex_lock(&nvmet_rdma_queue_mutex);
|
||||||
@ -1316,6 +1320,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
free_queue:
|
||||||
|
nvmet_rdma_free_queue(queue);
|
||||||
put_device:
|
put_device:
|
||||||
kref_put(&ndev->ref, nvmet_rdma_free_dev);
|
kref_put(&ndev->ref, nvmet_rdma_free_dev);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user