diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c15a26096038..c5dc833212e1 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -883,8 +883,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) /* this prevents anyone from attaching or migrating to this blkcg */ wb_blkcg_offline(blkcg); - /* put the base cgwb reference allowing step 2 to be triggered */ - blkcg_cgwb_put(blkcg); + /* put the base online pin allowing step 2 to be triggered */ + blkcg_unpin_online(blkcg); } /** @@ -983,11 +983,11 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) } spin_lock_init(&blkcg->lock); + refcount_set(&blkcg->online_pin, 1); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); - refcount_set(&blkcg->cgwb_refcnt, 1); #endif list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); @@ -1006,6 +1006,21 @@ unlock: return ret; } +static int blkcg_css_online(struct cgroup_subsys_state *css) +{ + struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg *parent = blkcg_parent(blkcg); + + /* + * blkcg_pin_online() is used to delay blkcg offline so that blkgs + * don't go offline while cgwbs are still active on them. Pin the + * parent so that offline always happens towards the root. + */ + if (parent) + blkcg_pin_online(parent); + return 0; +} + /** * blkcg_init_queue - initialize blkcg part of request queue * @q: request_queue to initialize @@ -1199,6 +1214,7 @@ static void blkcg_exit(struct task_struct *tsk) struct cgroup_subsys io_cgrp_subsys = { .css_alloc = blkcg_css_alloc, + .css_online = blkcg_css_online, .css_offline = blkcg_css_offline, .css_free = blkcg_css_free, .can_attach = blkcg_can_attach, diff --git a/block/blk-mq.c b/block/blk-mq.c index f6291ceedee4..8e56884fd2e9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1289,7 +1289,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * the driver there was more coming, but that turned out to * be a lie. */ - if (q->mq_ops->commit_rqs) + if (q->mq_ops->commit_rqs && queued) q->mq_ops->commit_rqs(hctx); spin_lock(&hctx->lock); @@ -1911,6 +1911,8 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list) { + int queued = 0; + while (!list_empty(list)) { blk_status_t ret; struct request *rq = list_first_entry(list, struct request, @@ -1926,7 +1928,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; } blk_mq_end_request(rq, ret); - } + } else + queued++; } /* @@ -1934,7 +1937,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, * the driver there was more coming, but that turned out to * be a lie. */ - if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) + if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued) hctx->queue->mq_ops->commit_rqs(hctx); } diff --git a/block/partitions/core.c b/block/partitions/core.c index b79c4513629b..bc1ded1331b1 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -496,7 +496,7 @@ int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev) if (!disk_part_scan_enabled(disk)) return 0; - if (bdev->bd_part_count || bdev->bd_super) + if (bdev->bd_part_count || bdev->bd_openers > 1) return -EBUSY; res = invalidate_partition(disk, 0); if (res) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a42c49e04954..da693e6a834e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -429,11 +429,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, * information. */ struct file *file = lo->lo_backing_file; + struct request_queue *q = lo->lo_queue; int ret; mode |= FALLOC_FL_KEEP_SIZE; - if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { + if (!blk_queue_discard(q)) { ret = -EOPNOTSUPP; goto out; } @@ -463,7 +464,7 @@ static void lo_complete_rq(struct request *rq) if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || req_op(rq) != REQ_OP_READ) { if (cmd->ret < 0) - ret = BLK_STS_IOERR; + ret = errno_to_blk_status(cmd->ret); goto end_io; } @@ -867,28 +868,47 @@ static void loop_config_discard(struct loop_device *lo) struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; + /* + * If the backing device is a block device, mirror its zeroing + * capability. Set the discard sectors to the block device's zeroing + * capabilities because loop discards result in blkdev_issue_zeroout(), + * not blkdev_issue_discard(). This maintains consistent behavior with + * file-backed loop devices: discarded regions read back as zero. + */ + if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { + struct request_queue *backingq; + + backingq = bdev_get_queue(inode->i_bdev); + blk_queue_max_discard_sectors(q, + backingq->limits.max_write_zeroes_sectors); + + blk_queue_max_write_zeroes_sectors(q, + backingq->limits.max_write_zeroes_sectors); + /* * We use punch hole to reclaim the free space used by the * image a.k.a. discard. However we do not support discard if * encryption is enabled, because it may give an attacker * useful information. */ - if ((!file->f_op->fallocate) || - lo->lo_encrypt_key_size) { + } else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) { q->limits.discard_granularity = 0; q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, 0); blk_queue_max_write_zeroes_sectors(q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); - return; + + } else { + q->limits.discard_granularity = inode->i_sb->s_blocksize; + q->limits.discard_alignment = 0; + + blk_queue_max_discard_sectors(q, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); } - q->limits.discard_granularity = inode->i_sb->s_blocksize; - q->limits.discard_alignment = 0; - - blk_queue_max_discard_sectors(q, UINT_MAX >> 9); - blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + if (q->limits.max_write_zeroes_sectors) + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + else + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); } static void loop_unprepare_queue(struct loop_device *lo) @@ -1955,7 +1975,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd) failed: /* complete non-aio request */ if (!cmd->use_aio || ret) { - cmd->ret = ret ? -EIO : 0; + if (ret == -EOPNOTSUPP) + cmd->ret = ret; + else + cmd->ret = ret ? -EIO : 0; blk_mq_complete_request(rq); } } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4f907e3beda1..91c1bd659947 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -1252,6 +1253,18 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl) queue_work(nvme_wq, &ctrl->async_event_work); } +/* + * Convert integer values from ioctl structures to user pointers, silently + * ignoring the upper bits in the compat case to match behaviour of 32-bit + * kernels. + */ +static void __user *nvme_to_user_ptr(uintptr_t ptrval) +{ + if (in_compat_syscall()) + ptrval = (compat_uptr_t)ptrval; + return (void __user *)ptrval; +} + static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_user_io io; @@ -1275,7 +1288,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = (io.nblocks + 1) << ns->lba_shift; meta_len = (io.nblocks + 1) * ns->ms; - metadata = (void __user *)(uintptr_t)io.metadata; + metadata = nvme_to_user_ptr(io.metadata); if (ns->ext) { length += meta_len; @@ -1298,7 +1311,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.appmask = cpu_to_le16(io.appmask); return nvme_submit_user_cmd(ns->queue, &c, - (void __user *)(uintptr_t)io.addr, length, + nvme_to_user_ptr(io.addr), length, metadata, meta_len, lower_32_bits(io.slba), NULL, 0); } @@ -1418,9 +1431,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - (void __user *)(uintptr_t)cmd.metadata, - cmd.metadata_len, 0, &result, timeout); + nvme_to_user_ptr(cmd.addr), cmd.data_len, + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, + 0, &result, timeout); nvme_passthru_end(ctrl, effects); if (status >= 0) { @@ -1465,8 +1478,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, + nvme_to_user_ptr(cmd.addr), cmd.data_len, + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 0, &cmd.result, timeout); nvme_passthru_end(ctrl, effects); @@ -1884,6 +1897,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { + struct backing_dev_info *info = + ns->head->disk->queue->backing_dev_info; + + info->capabilities |= BDI_CAP_STABLE_WRITES; + } + revalidate_disk(ns->head->disk); } #endif diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a8bf2fb1287b..7dfc4a2ecf1e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -342,8 +342,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, !template->ls_req || !template->fcp_io || !template->ls_abort || !template->fcp_abort || !template->max_hw_queues || !template->max_sgl_segments || - !template->max_dif_sgl_segments || !template->dma_boundary || - !template->module) { + !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; goto out_reghost_failed; } @@ -2016,7 +2015,6 @@ nvme_fc_ctrl_free(struct kref *ref) { struct nvme_fc_ctrl *ctrl = container_of(ref, struct nvme_fc_ctrl, ref); - struct nvme_fc_lport *lport = ctrl->lport; unsigned long flags; if (ctrl->ctrl.tagset) { @@ -2043,7 +2041,6 @@ nvme_fc_ctrl_free(struct kref *ref) if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); - module_put(lport->ops->module); } static void @@ -3074,15 +3071,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } - if (!try_module_get(lport->ops->module)) { - ret = -EUNATCH; - goto out_free_ctrl; - } - idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_mod_put; + goto out_free_ctrl; } ctrl->ctrl.opts = opts; @@ -3232,8 +3224,6 @@ out_free_queues: out_free_ida: put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); -out_mod_put: - module_put(lport->ops->module); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 61bf87592570..54603bd3e02d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -510,7 +510,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, if (!nr_nsids) return 0; - down_write(&ctrl->namespaces_rwsem); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { unsigned nsid = le32_to_cpu(desc->nsids[n]); @@ -521,7 +521,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, if (++n == nr_nsids) break; } - up_write(&ctrl->namespaces_rwsem); + up_read(&ctrl->namespaces_rwsem); return 0; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 76dbb55625ac..cac8a930396a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1342,7 +1342,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, int ret; sge->addr = qe->dma; - sge->length = sizeof(struct nvme_command), + sge->length = sizeof(struct nvme_command); sge->lkey = queue->device->pd->local_dma_lkey; wr.next = NULL; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 0ef14f0fad86..c15a92163c1f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -174,16 +174,14 @@ static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req) static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) { struct request *rq; - unsigned int bytes; if (unlikely(nvme_tcp_async_req(req))) return false; /* async events don't have a request */ rq = blk_mq_rq_from_pdu(req); - bytes = blk_rq_payload_bytes(rq); - return rq_data_dir(rq) == WRITE && bytes && - bytes <= nvme_tcp_inline_data_size(req->queue); + return rq_data_dir(rq) == WRITE && req->data_len && + req->data_len <= nvme_tcp_inline_data_size(req->queue); } static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) @@ -1075,7 +1073,7 @@ static void nvme_tcp_io_work(struct work_struct *w) if (result > 0) pending = true; else if (unlikely(result < 0)) - break; + return; if (!pending) return; @@ -2164,7 +2162,9 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, c->common.flags |= NVME_CMD_SGL_METABUF; - if (rq_data_dir(rq) == WRITE && req->data_len && + if (!blk_rq_nr_phys_segments(rq)) + nvme_tcp_set_sg_null(c); + else if (rq_data_dir(rq) == WRITE && req->data_len <= nvme_tcp_inline_data_size(queue)) nvme_tcp_set_sg_inline(queue, c, req->data_len); else @@ -2191,7 +2191,8 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, req->data_sent = 0; req->pdu_len = 0; req->pdu_sent = 0; - req->data_len = blk_rq_payload_bytes(rq); + req->data_len = blk_rq_nr_phys_segments(rq) ? + blk_rq_payload_bytes(rq) : 0; req->curr_bio = rq->bio; if (rq_data_dir(rq) == WRITE && @@ -2298,6 +2299,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; + if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) + return 0; + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); nvme_tcp_try_recv(queue); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 7aa10788b7c8..58cabd7b6fc5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1098,12 +1098,19 @@ static struct configfs_attribute *nvmet_referral_attrs[] = { NULL, }; -static void nvmet_referral_release(struct config_item *item) +static void nvmet_referral_notify(struct config_group *group, + struct config_item *item) { struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); struct nvmet_port *port = to_nvmet_port(item); nvmet_referral_disable(parent, port); +} + +static void nvmet_referral_release(struct config_item *item) +{ + struct nvmet_port *port = to_nvmet_port(item); + kfree(port); } @@ -1134,6 +1141,7 @@ static struct config_group *nvmet_referral_make( static struct configfs_group_operations nvmet_referral_group_ops = { .make_group = nvmet_referral_make, + .disconnect_notify = nvmet_referral_notify, }; static const struct config_item_type nvmet_referrals_type = { diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a0db6371b43e..a8ceb7721640 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -684,7 +684,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) disconnect = atomic_xchg(&queue->connected, 0); spin_lock_irqsave(&queue->qlock, flags); - /* about outstanding io's */ + /* abort outstanding io's */ for (i = 0; i < queue->sqsize; fod++, i++) { if (fod->active) { spin_lock(&fod->flock); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 1c50af6219f3..f69ce66e2d44 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -198,10 +198,13 @@ struct fcloop_lport_priv { }; struct fcloop_rport { - struct nvme_fc_remote_port *remoteport; - struct nvmet_fc_target_port *targetport; - struct fcloop_nport *nport; - struct fcloop_lport *lport; + struct nvme_fc_remote_port *remoteport; + struct nvmet_fc_target_port *targetport; + struct fcloop_nport *nport; + struct fcloop_lport *lport; + spinlock_t lock; + struct list_head ls_list; + struct work_struct ls_work; }; struct fcloop_tport { @@ -224,11 +227,10 @@ struct fcloop_nport { }; struct fcloop_lsreq { - struct fcloop_tport *tport; struct nvmefc_ls_req *lsreq; - struct work_struct work; struct nvmefc_tgt_ls_req tgt_ls_req; int status; + struct list_head ls_list; /* fcloop_rport->ls_list */ }; struct fcloop_rscn { @@ -292,21 +294,32 @@ fcloop_delete_queue(struct nvme_fc_local_port *localport, { } - -/* - * Transmit of LS RSP done (e.g. buffers all set). call back up - * initiator "done" flows. - */ static void -fcloop_tgt_lsrqst_done_work(struct work_struct *work) +fcloop_rport_lsrqst_work(struct work_struct *work) { - struct fcloop_lsreq *tls_req = - container_of(work, struct fcloop_lsreq, work); - struct fcloop_tport *tport = tls_req->tport; - struct nvmefc_ls_req *lsreq = tls_req->lsreq; + struct fcloop_rport *rport = + container_of(work, struct fcloop_rport, ls_work); + struct fcloop_lsreq *tls_req; - if (!tport || tport->remoteport) - lsreq->done(lsreq, tls_req->status); + spin_lock(&rport->lock); + for (;;) { + tls_req = list_first_entry_or_null(&rport->ls_list, + struct fcloop_lsreq, ls_list); + if (!tls_req) + break; + + list_del(&tls_req->ls_list); + spin_unlock(&rport->lock); + + tls_req->lsreq->done(tls_req->lsreq, tls_req->status); + /* + * callee may free memory containing tls_req. + * do not reference lsreq after this. + */ + + spin_lock(&rport->lock); + } + spin_unlock(&rport->lock); } static int @@ -319,17 +332,18 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, int ret = 0; tls_req->lsreq = lsreq; - INIT_WORK(&tls_req->work, fcloop_tgt_lsrqst_done_work); + INIT_LIST_HEAD(&tls_req->ls_list); if (!rport->targetport) { tls_req->status = -ECONNREFUSED; - tls_req->tport = NULL; - schedule_work(&tls_req->work); + spin_lock(&rport->lock); + list_add_tail(&rport->ls_list, &tls_req->ls_list); + spin_unlock(&rport->lock); + schedule_work(&rport->ls_work); return ret; } tls_req->status = 0; - tls_req->tport = rport->targetport->private; ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, lsreq->rqstaddr, lsreq->rqstlen); @@ -337,18 +351,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, } static int -fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, +fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, struct nvmefc_tgt_ls_req *tgt_lsreq) { struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); struct nvmefc_ls_req *lsreq = tls_req->lsreq; + struct fcloop_tport *tport = targetport->private; + struct nvme_fc_remote_port *remoteport = tport->remoteport; + struct fcloop_rport *rport; memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, ((lsreq->rsplen < tgt_lsreq->rsplen) ? lsreq->rsplen : tgt_lsreq->rsplen)); + tgt_lsreq->done(tgt_lsreq); - schedule_work(&tls_req->work); + if (remoteport) { + rport = remoteport->private; + spin_lock(&rport->lock); + list_add_tail(&rport->ls_list, &tls_req->ls_list); + spin_unlock(&rport->lock); + schedule_work(&rport->ls_work); + } return 0; } @@ -834,6 +858,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport) { struct fcloop_rport *rport = remoteport->private; + flush_work(&rport->ls_work); fcloop_nport_put(rport->nport); } @@ -850,7 +875,6 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_DMABOUND_4G 0xFFFFFFFF static struct nvme_fc_port_template fctemplate = { - .module = THIS_MODULE, .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, @@ -1136,6 +1160,9 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, rport->nport = nport; rport->lport = nport->lport; nport->rport = rport; + spin_lock_init(&rport->lock); + INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work); + INIT_LIST_HEAD(&rport->ls_list); return count; } diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index c90c06839d64..fd47de0e4e4e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state { struct nvmet_rdma_queue { struct rdma_cm_id *cm_id; + struct ib_qp *qp; struct nvmet_port *port; struct ib_cq *cq; atomic_t sq_wr_avail; @@ -105,6 +106,13 @@ struct nvmet_rdma_queue { struct list_head queue_list; }; +struct nvmet_rdma_port { + struct nvmet_port *nport; + struct sockaddr_storage addr; + struct rdma_cm_id *cm_id; + struct delayed_work repair_work; +}; + struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; @@ -461,7 +469,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, if (ndev->srq) ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); else - ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); + ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); if (unlikely(ret)) pr_err("post_recv cmd failed\n"); @@ -500,7 +508,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); if (rsp->n_rdma) { - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, queue->cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); } @@ -584,7 +592,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, queue->cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->n_rdma = 0; @@ -739,7 +747,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) } if (nvmet_rdma_need_data_in(rsp)) { - if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp, + if (rdma_rw_ctx_post(&rsp->rw, queue->qp, queue->cm_id->port_num, &rsp->read_cqe, NULL)) nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); } else { @@ -911,7 +919,8 @@ static void nvmet_rdma_free_dev(struct kref *ref) static struct nvmet_rdma_device * nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) { - struct nvmet_port *port = cm_id->context; + struct nvmet_rdma_port *port = cm_id->context; + struct nvmet_port *nport = port->nport; struct nvmet_rdma_device *ndev; int inline_page_count; int inline_sge_count; @@ -928,17 +937,17 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) if (!ndev) goto out_err; - inline_page_count = num_pages(port->inline_data_size); + inline_page_count = num_pages(nport->inline_data_size); inline_sge_count = max(cm_id->device->attrs.max_sge_rd, cm_id->device->attrs.max_recv_sge) - 1; if (inline_page_count > inline_sge_count) { pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", - port->inline_data_size, cm_id->device->name, + nport->inline_data_size, cm_id->device->name, inline_sge_count * PAGE_SIZE); - port->inline_data_size = inline_sge_count * PAGE_SIZE; + nport->inline_data_size = inline_sge_count * PAGE_SIZE; inline_page_count = inline_sge_count; } - ndev->inline_data_size = port->inline_data_size; + ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -1024,6 +1033,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) pr_err("failed to create_qp ret= %d\n", ret); goto err_destroy_cq; } + queue->qp = queue->cm_id->qp; atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); @@ -1052,11 +1062,10 @@ err_destroy_cq: static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { - struct ib_qp *qp = queue->cm_id->qp; - - ib_drain_qp(qp); - rdma_destroy_id(queue->cm_id); - ib_destroy_qp(qp); + ib_drain_qp(queue->qp); + if (queue->cm_id) + rdma_destroy_id(queue->cm_id); + ib_destroy_qp(queue->qp); ib_free_cq(queue->cq); } @@ -1266,6 +1275,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { + struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; @@ -1281,7 +1291,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = cm_id->context; + queue->port = port->nport; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ @@ -1290,9 +1300,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - schedule_work(&queue->release_work); - /* Destroying rdma_cm id is not needed here */ - return 0; + /* + * Don't destroy the cm_id in free path, as we implicitly + * destroy the cm_id here with non-zero ret code. + */ + queue->cm_id = NULL; + goto free_queue; } mutex_lock(&nvmet_rdma_queue_mutex); @@ -1301,6 +1314,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, return 0; +free_queue: + nvmet_rdma_free_queue(queue); put_device: kref_put(&ndev->ref, nvmet_rdma_free_dev); @@ -1406,7 +1421,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) { - struct nvmet_port *port; + struct nvmet_rdma_port *port; if (queue) { /* @@ -1425,7 +1440,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, * cm_id destroy. use atomic xchg to make sure * we don't compete with remove_port. */ - if (xchg(&port->priv, NULL) != cm_id) + if (xchg(&port->cm_id, NULL) != cm_id) return 0; /* @@ -1456,6 +1471,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, nvmet_rdma_queue_established(queue); break; case RDMA_CM_EVENT_ADDR_CHANGE: + if (!queue) { + struct nvmet_rdma_port *port = cm_id->context; + + schedule_delayed_work(&port->repair_work, 0); + break; + } + /* FALLTHROUGH */ case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: nvmet_rdma_queue_disconnect(queue); @@ -1498,43 +1520,20 @@ restart: mutex_unlock(&nvmet_rdma_queue_mutex); } -static int nvmet_rdma_add_port(struct nvmet_port *port) +static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) { + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); + + if (cm_id) + rdma_destroy_id(cm_id); +} + +static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) +{ + struct sockaddr *addr = (struct sockaddr *)&port->addr; struct rdma_cm_id *cm_id; - struct sockaddr_storage addr = { }; - __kernel_sa_family_t af; int ret; - switch (port->disc_addr.adrfam) { - case NVMF_ADDR_FAMILY_IP4: - af = AF_INET; - break; - case NVMF_ADDR_FAMILY_IP6: - af = AF_INET6; - break; - default: - pr_err("address family %d not supported\n", - port->disc_addr.adrfam); - return -EINVAL; - } - - if (port->inline_data_size < 0) { - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { - pr_warn("inline_data_size %u is too large, reducing to %u\n", - port->inline_data_size, - NVMET_RDMA_MAX_INLINE_DATA_SIZE); - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; - } - - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, - port->disc_addr.trsvcid, &addr); - if (ret) { - pr_err("malformed ip/port passed: %s:%s\n", - port->disc_addr.traddr, port->disc_addr.trsvcid); - return ret; - } - cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { @@ -1552,23 +1551,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) goto out_destroy_id; } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); + ret = rdma_bind_addr(cm_id, addr); if (ret) { - pr_err("binding CM ID to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpcs)\n", - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); - port->priv = cm_id; + port->cm_id = cm_id; return 0; out_destroy_id: @@ -1576,18 +1571,92 @@ out_destroy_id: return ret; } -static void nvmet_rdma_remove_port(struct nvmet_port *port) +static void nvmet_rdma_repair_port_work(struct work_struct *w) { - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), + struct nvmet_rdma_port, repair_work); + int ret; - if (cm_id) - rdma_destroy_id(cm_id); + nvmet_rdma_disable_port(port); + ret = nvmet_rdma_enable_port(port); + if (ret) + schedule_delayed_work(&port->repair_work, 5 * HZ); +} + +static int nvmet_rdma_add_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port; + __kernel_sa_family_t af; + int ret; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + nport->priv = port; + port->nport = nport; + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); + + switch (nport->disc_addr.adrfam) { + case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; + break; + default: + pr_err("address family %d not supported\n", + nport->disc_addr.adrfam); + ret = -EINVAL; + goto out_free_port; + } + + if (nport->inline_data_size < 0) { + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { + pr_warn("inline_data_size %u is too large, reducing to %u\n", + nport->inline_data_size, + NVMET_RDMA_MAX_INLINE_DATA_SIZE); + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; + } + + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, + nport->disc_addr.trsvcid, &port->addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + nport->disc_addr.traddr, nport->disc_addr.trsvcid); + goto out_free_port; + } + + ret = nvmet_rdma_enable_port(port); + if (ret) + goto out_free_port; + + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(nport->disc_addr.portid), + (struct sockaddr *)&port->addr); + + return 0; + +out_free_port: + kfree(port); + return ret; +} + +static void nvmet_rdma_remove_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port = nport->priv; + + cancel_delayed_work_sync(&port->repair_work); + nvmet_rdma_disable_port(port); + kfree(port); } static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, - struct nvmet_port *port, char *traddr) + struct nvmet_port *nport, char *traddr) { - struct rdma_cm_id *cm_id = port->priv; + struct nvmet_rdma_port *port = nport->priv; + struct rdma_cm_id *cm_id = port->cm_id; if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { struct nvmet_rdma_rsp *rsp = @@ -1597,7 +1666,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, sprintf(traddr, "%pISc", addr); } else { - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); } } diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index f6c8963c915d..db4a04a207ec 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1985,8 +1985,6 @@ out_unlock: /* Declare and initialization an instance of the FC NVME template. */ static struct nvme_fc_port_template lpfc_nvme_template = { - .module = THIS_MODULE, - /* initiator-based functions */ .localport_delete = lpfc_nvme_localport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete, diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 84e2a980dea0..4886d247df6f 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -610,7 +610,6 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) } static struct nvme_fc_port_template qla_nvme_fc_transport = { - .module = THIS_MODULE, .localport_delete = qla_nvme_localport_delete, .remoteport_delete = qla_nvme_remoteport_delete, .create_queue = qla_nvme_alloc_queue, diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index e4a6949fd171..35f8ffe92b70 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -46,6 +46,7 @@ struct blkcg_gq; struct blkcg { struct cgroup_subsys_state css; spinlock_t lock; + refcount_t online_pin; struct radix_tree_root blkg_tree; struct blkcg_gq __rcu *blkg_hint; @@ -56,7 +57,6 @@ struct blkcg { struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; - refcount_t cgwb_refcnt; #endif }; @@ -412,47 +412,38 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) extern void blkcg_destroy_blkgs(struct blkcg *blkcg); -#ifdef CONFIG_CGROUP_WRITEBACK - /** - * blkcg_cgwb_get - get a reference for blkcg->cgwb_list + * blkcg_pin_online - pin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. + * While pinned, a blkcg is kept online. This is primarily used to + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline + * while an associated cgwb is still active. */ -static inline void blkcg_cgwb_get(struct blkcg *blkcg) +static inline void blkcg_pin_online(struct blkcg *blkcg) { - refcount_inc(&blkcg->cgwb_refcnt); + refcount_inc(&blkcg->online_pin); } /** - * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list + * blkcg_unpin_online - unpin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. - * When this count goes to zero, all active wb has finished so the + * This is primarily used to impedance-match blkg and cgwb lifetimes so + * that blkg doesn't go offline while an associated cgwb is still active. + * When this count goes to zero, all active cgwbs have finished so the * blkcg can continue destruction by calling blkcg_destroy_blkgs(). - * This work may occur in cgwb_release_workfn() on the cgwb_release - * workqueue. */ -static inline void blkcg_cgwb_put(struct blkcg *blkcg) +static inline void blkcg_unpin_online(struct blkcg *blkcg) { - if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) + do { + if (!refcount_dec_and_test(&blkcg->online_pin)) + break; blkcg_destroy_blkgs(blkcg); + blkcg = blkcg_parent(blkcg); + } while (blkcg); } -#else - -static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } - -static inline void blkcg_cgwb_put(struct blkcg *blkcg) -{ - /* wb isn't being accounted, so trigger destruction right away */ - blkcg_destroy_blkgs(blkcg); -} - -#endif - /** * blkg_path - format cgroup path of blkg * @blkg: blkg of interest diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 6d0d70f3219c..10f81629b9ce 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -270,8 +270,6 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * - * @module: The LLDD module using the interface - * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -385,8 +383,6 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { - struct module *module; - /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 62f05f605fb5..c81b4f3a7268 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -491,8 +491,8 @@ static void cgwb_release_workfn(struct work_struct *work) css_put(wb->blkcg_css); mutex_unlock(&wb->bdi->cgwb_release_mutex); - /* triggers blkg destruction if cgwb_refcnt becomes zero */ - blkcg_cgwb_put(blkcg); + /* triggers blkg destruction if no online users left */ + blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -592,7 +592,7 @@ static int cgwb_create(struct backing_dev_info *bdi, list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); - blkcg_cgwb_get(blkcg); + blkcg_pin_online(blkcg); css_get(memcg_css); css_get(blkcg_css); }