diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a37a1f9d1204..6a5ccd4c7e63 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -468,7 +468,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) struct ib_qp *qp; struct ib_fmr_pool *fmr_pool = NULL; struct srp_fr_pool *fr_pool = NULL; - const int m = dev->use_fast_reg ? 3 : 1; + const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); @@ -849,7 +849,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch) for (i = 0; i < target->req_ring_size; ++i) { req = &ch->req_ring[i]; - mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), + mr_list = kmalloc(target->mr_per_cmd * sizeof(void *), GFP_KERNEL); if (!mr_list) goto out; @@ -1298,9 +1298,16 @@ static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) srp_handle_qp_err(cq, wc, "FAST REG"); } +/* + * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset + * where to start in the first element. If sg_offset_p != NULL then + * *sg_offset_p is updated to the offset in state->sg[retval] of the first + * byte that has not yet been mapped. + */ static int srp_map_finish_fr(struct srp_map_state *state, struct srp_request *req, - struct srp_rdma_ch *ch, int sg_nents) + struct srp_rdma_ch *ch, int sg_nents, + unsigned int *sg_offset_p) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; @@ -1316,9 +1323,13 @@ static int srp_map_finish_fr(struct srp_map_state *state, WARN_ON_ONCE(!dev->use_fast_reg); if (sg_nents == 1 && target->global_mr) { - srp_map_desc(state, sg_dma_address(state->sg), - sg_dma_len(state->sg), + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; + + srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, + sg_dma_len(state->sg) - sg_offset, target->global_mr->rkey); + if (sg_offset_p) + *sg_offset_p = 0; return 1; } @@ -1329,15 +1340,18 @@ static int srp_map_finish_fr(struct srp_map_state *state, rkey = ib_inc_rkey(desc->mr->rkey); ib_update_fast_reg_key(desc->mr, rkey); - n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, NULL, dev->mr_page_size); + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, + dev->mr_page_size); if (unlikely(n < 0)) { srp_fr_pool_put(ch->fr_pool, &desc, 1); - pr_debug("%s: ib_map_mr_sg(%d) returned %d.\n", + pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", dev_name(&req->scmnd->device->sdev_gendev), sg_nents, - n); + sg_offset_p ? *sg_offset_p : -1, n); return n; } + WARN_ON_ONCE(desc->mr->length == 0); + req->reg_cqe.done = srp_reg_mr_err_done; wr.wr.next = NULL; @@ -1358,8 +1372,10 @@ static int srp_map_finish_fr(struct srp_map_state *state, desc->mr->length, desc->mr->rkey); err = ib_post_send(ch->qp, &wr.wr, &bad_wr); - if (unlikely(err)) + if (unlikely(err)) { + WARN_ON_ONCE(err == -ENOMEM); return err; + } return n; } @@ -1416,7 +1432,7 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, state->pages = req->map_page; state->fmr.next = req->fmr_list; - state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; + state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; for_each_sg(scat, sg, count, i) { ret = srp_map_sg_entry(state, ch, sg, i); @@ -1435,9 +1451,11 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, struct srp_request *req, struct scatterlist *scat, int count) { + unsigned int sg_offset = 0; + state->desc = req->indirect_desc; state->fr.next = req->fr_list; - state->fr.end = req->fr_list + ch->target->cmd_sg_cnt; + state->fr.end = req->fr_list + ch->target->mr_per_cmd; state->sg = scat; if (count == 0) @@ -1446,7 +1464,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, while (count) { int i, n; - n = srp_map_finish_fr(state, req, ch, count); + n = srp_map_finish_fr(state, req, ch, count, &sg_offset); if (unlikely(n < 0)) return n; @@ -1511,9 +1529,10 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, #ifdef CONFIG_NEED_SG_DMA_LENGTH idb_sg->dma_length = idb_sg->length; /* hack^2 */ #endif - ret = srp_map_finish_fr(&state, req, ch, 1); + ret = srp_map_finish_fr(&state, req, ch, 1, NULL); if (ret < 0) return ret; + WARN_ON_ONCE(ret < 1); } else if (dev->use_fmr) { state.pages = idb_pages; state.pages[0] = (req->indirect_dma_addr & @@ -1531,6 +1550,32 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, return 0; } +#if defined(DYNAMIC_DATA_DEBUG) +static void srp_check_mapping(struct srp_map_state *state, + struct srp_rdma_ch *ch, struct srp_request *req, + struct scatterlist *scat, int count) +{ + struct srp_device *dev = ch->target->srp_host->srp_dev; + struct srp_fr_desc **pfr; + u64 desc_len = 0, mr_len = 0; + int i; + + for (i = 0; i < state->ndesc; i++) + desc_len += be32_to_cpu(req->indirect_desc[i].len); + if (dev->use_fast_reg) + for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) + mr_len += (*pfr)->mr->length; + else if (dev->use_fmr) + for (i = 0; i < state->nmdesc; i++) + mr_len += be32_to_cpu(req->indirect_desc[i].len); + if (desc_len != scsi_bufflen(req->scmnd) || + mr_len > scsi_bufflen(req->scmnd)) + pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", + scsi_bufflen(req->scmnd), desc_len, mr_len, + state->ndesc, state->nmdesc); +} +#endif + /** * srp_map_data() - map SCSI data buffer onto an SRP request * @scmnd: SCSI command to map @@ -1616,6 +1661,15 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, if (ret < 0) goto unmap; +#if defined(DYNAMIC_DEBUG) + { + DEFINE_DYNAMIC_DEBUG_METADATA(ddm, + "Memory mapping consistency check"); + if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT)) + srp_check_mapping(&state, ch, req, scat, count); + } +#endif + /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this * target is not using an external indirect table, we are @@ -2580,6 +2634,20 @@ static int srp_reset_host(struct scsi_cmnd *scmnd) return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; } +static int srp_slave_alloc(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + struct srp_target_port *target = host_to_target(shost); + struct srp_device *srp_dev = target->srp_host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; + + if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + blk_queue_virt_boundary(sdev->request_queue, + ~srp_dev->mr_page_mask); + + return 0; +} + static int srp_slave_configure(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; @@ -2771,6 +2839,7 @@ static struct scsi_host_template srp_template = { .module = THIS_MODULE, .name = "InfiniBand SRP initiator", .proc_name = DRV_NAME, + .slave_alloc = srp_slave_alloc, .slave_configure = srp_slave_configure, .info = srp_target_info, .queuecommand = srp_queuecommand, @@ -3177,6 +3246,7 @@ static ssize_t srp_create_target(struct device *dev, struct srp_device *srp_dev = host->srp_dev; struct ib_device *ibdev = srp_dev->dev; int ret, node_idx, node, cpu, i; + unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; target_host = scsi_host_alloc(&srp_template, @@ -3233,8 +3303,33 @@ static ssize_t srp_create_target(struct device *dev, target->sg_tablesize = target->cmd_sg_cnt; } + if (srp_dev->use_fast_reg || srp_dev->use_fmr) { + /* + * FR and FMR can only map one HCA page per entry. If the + * start address is not aligned on a HCA page boundary two + * entries will be used for the head and the tail although + * these two entries combined contain at most one HCA page of + * data. Hence the "+ 1" in the calculation below. + * + * The indirect data buffer descriptor is contiguous so the + * memory for that buffer will only be registered if + * register_always is true. Hence add one to mr_per_cmd if + * register_always has been set. + */ + max_sectors_per_mr = srp_dev->max_pages_per_mr << + (ilog2(srp_dev->mr_page_size) - 9); + mr_per_cmd = register_always + + (target->scsi_host->max_sectors + 1 + + max_sectors_per_mr - 1) / max_sectors_per_mr; + pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", + target->scsi_host->max_sectors, + srp_dev->max_pages_per_mr, srp_dev->mr_page_size, + max_sectors_per_mr, mr_per_cmd); + } + target_host->sg_tablesize = target->sg_tablesize; - target->mr_pool_size = target->scsi_host->can_queue; + target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; + target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); target->max_iu_len = sizeof (struct srp_cmd) + @@ -3441,6 +3536,9 @@ static void srp_add_one(struct ib_device *device) srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); max_pages_per_mr = device->attrs.max_mr_size; do_div(max_pages_per_mr, srp_dev->mr_page_size); + pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, + device->attrs.max_mr_size, srp_dev->mr_page_size, + max_pages_per_mr, SRP_MAX_PAGES_PER_MR); srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); @@ -3448,12 +3546,13 @@ static void srp_add_one(struct ib_device *device) device->map_phys_fmr && device->unmap_fmr); srp_dev->has_fr = (device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); - if (!srp_dev->has_fmr && !srp_dev->has_fr) + if (!srp_dev->has_fmr && !srp_dev->has_fr) { dev_warn(&device->dev, "neither FMR nor FR is supported\n"); - - srp_dev->use_fast_reg = (srp_dev->has_fr && - (!srp_dev->has_fmr || prefer_fr)); - srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; + } else if (device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) { + srp_dev->use_fast_reg = (srp_dev->has_fr && + (!srp_dev->has_fmr || prefer_fr)); + srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; + } if (srp_dev->use_fast_reg) { srp_dev->max_pages_per_mr = diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a00914cdd44e..26bb9b0a7a63 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -203,6 +203,7 @@ struct srp_target_port { unsigned int scsi_id; unsigned int sg_tablesize; int mr_pool_size; + int mr_per_cmd; int queue_size; int req_ring_size; int comp_vector;