diff --git a/block/blk-core.c b/block/blk-core.c index edc276899116..29b08428ae45 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -374,6 +374,7 @@ void blk_clear_preempt_only(struct request_queue *q) spin_lock_irqsave(q->queue_lock, flags); queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); + wake_up_all(&q->mq_freeze_wq); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL_GPL(blk_clear_preempt_only); @@ -795,15 +796,38 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask) } EXPORT_SYMBOL(blk_alloc_queue); -int blk_queue_enter(struct request_queue *q, bool nowait) +/** + * blk_queue_enter() - try to increase q->q_usage_counter + * @q: request queue pointer + * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT + */ +int blk_queue_enter(struct request_queue *q, unsigned int flags) { + const bool preempt = flags & BLK_MQ_REQ_PREEMPT; + while (true) { + bool success = false; int ret; - if (percpu_ref_tryget_live(&q->q_usage_counter)) + rcu_read_lock_sched(); + if (percpu_ref_tryget_live(&q->q_usage_counter)) { + /* + * The code that sets the PREEMPT_ONLY flag is + * responsible for ensuring that that flag is globally + * visible before the queue is unfrozen. + */ + if (preempt || !blk_queue_preempt_only(q)) { + success = true; + } else { + percpu_ref_put(&q->q_usage_counter); + } + } + rcu_read_unlock_sched(); + + if (success) return 0; - if (nowait) + if (flags & BLK_MQ_REQ_NOWAIT) return -EBUSY; /* @@ -816,7 +840,8 @@ int blk_queue_enter(struct request_queue *q, bool nowait) smp_rmb(); ret = wait_event_interruptible(q->mq_freeze_wq, - !atomic_read(&q->mq_freeze_depth) || + (atomic_read(&q->mq_freeze_depth) == 0 && + (preempt || !blk_queue_preempt_only(q))) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; @@ -1445,8 +1470,7 @@ static struct request *blk_old_get_request(struct request_queue *q, /* create ioc upfront */ create_io_context(gfp_mask, q->node); - ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM) || - (op & REQ_NOWAIT)); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); spin_lock_irq(q->queue_lock); @@ -2267,8 +2291,10 @@ blk_qc_t generic_make_request(struct bio *bio) current->bio_list = bio_list_on_stack; do { struct request_queue *q = bio->bi_disk->queue; + unsigned int flags = bio->bi_opf & REQ_NOWAIT ? + BLK_MQ_REQ_NOWAIT : 0; - if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) { + if (likely(blk_queue_enter(q, flags) == 0)) { struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ @@ -2327,7 +2353,7 @@ blk_qc_t direct_make_request(struct bio *bio) if (!generic_make_request_checks(bio)) return BLK_QC_T_NONE; - if (unlikely(blk_queue_enter(q, nowait))) { + if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) { if (nowait && !blk_queue_dying(q)) bio->bi_status = BLK_STS_AGAIN; else diff --git a/block/blk-mq.c b/block/blk-mq.c index e21876778cec..211bc8a3e2cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -389,7 +389,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, struct request *rq; int ret; - ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); @@ -428,7 +428,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, if (hctx_idx >= q->nr_hw_queues) return ERR_PTR(-EIO); - ret = blk_queue_enter(q, true); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eb129dfc2ebe..f907e2f8c1dd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2947,21 +2947,37 @@ static void scsi_wait_for_queuecommand(struct scsi_device *sdev) int scsi_device_quiesce(struct scsi_device *sdev) { + struct request_queue *q = sdev->request_queue; int err; + /* + * It is allowed to call scsi_device_quiesce() multiple times from + * the same context but concurrent scsi_device_quiesce() calls are + * not allowed. + */ + WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); + + blk_set_preempt_only(q); + + blk_mq_freeze_queue(q); + /* + * Ensure that the effect of blk_set_preempt_only() will be visible + * for percpu_ref_tryget() callers that occur after the queue + * unfreeze even if the queue was already frozen before this function + * was called. See also https://lwn.net/Articles/573497/. + */ + synchronize_rcu(); + blk_mq_unfreeze_queue(q); + mutex_lock(&sdev->state_mutex); err = scsi_device_set_state(sdev, SDEV_QUIESCE); + if (err == 0) + sdev->quiesced_by = current; + else + blk_clear_preempt_only(q); mutex_unlock(&sdev->state_mutex); - if (err) - return err; - - scsi_run_queue(sdev->request_queue); - while (atomic_read(&sdev->device_busy)) { - msleep_interruptible(200); - scsi_run_queue(sdev->request_queue); - } - return 0; + return err; } EXPORT_SYMBOL(scsi_device_quiesce); @@ -2981,9 +2997,11 @@ void scsi_device_resume(struct scsi_device *sdev) * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); - if (sdev->sdev_state == SDEV_QUIESCE && - scsi_device_set_state(sdev, SDEV_RUNNING) == 0) - scsi_run_queue(sdev->request_queue); + WARN_ON_ONCE(!sdev->quiesced_by); + sdev->quiesced_by = NULL; + blk_clear_preempt_only(sdev->request_queue); + if (sdev->sdev_state == SDEV_QUIESCE) + scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); } EXPORT_SYMBOL(scsi_device_resume); diff --git a/fs/block_dev.c b/fs/block_dev.c index 4afa4d5ff969..04973f484422 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -662,7 +662,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return result; - result = blk_queue_enter(bdev->bd_queue, false); + result = blk_queue_enter(bdev->bd_queue, 0); if (result) return result; result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false); @@ -698,7 +698,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; - result = blk_queue_enter(bdev->bd_queue, false); + result = blk_queue_enter(bdev->bd_queue, 0); if (result) return result; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2147e2381a22..402c9d536ae1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -959,7 +959,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_enter(struct request_queue *q, bool nowait); +extern int blk_queue_enter(struct request_queue *q, unsigned int flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_start_queue_async(struct request_queue *q); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 82e93ee94708..6f0f1e242e23 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -219,6 +219,7 @@ struct scsi_device { unsigned char access_state; struct mutex state_mutex; enum scsi_device_state sdev_state; + struct task_struct *quiesced_by; unsigned long sdev_data[0]; } __attribute__((aligned(sizeof(unsigned long))));