From c0332694903a37cf8ecdc9102d5c9e09cf8643d0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 1 May 2017 08:58:49 -0700 Subject: [PATCH 01/28] block: Remove elevator_change() Since commit 84253394927c ("remove the mg_disk driver") removed the only caller of elevator_change(), also remove the elevator_change() function itself. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Markus Trippelsdorf Signed-off-by: Jens Axboe --- block/elevator.c | 13 ------------- include/linux/elevator.h | 1 - 2 files changed, 14 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index bf11e70f008b..80f485451096 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1088,19 +1088,6 @@ static int __elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, e); } -int elevator_change(struct request_queue *q, const char *name) -{ - int ret; - - /* Protect q->elevator from elevator_init() */ - mutex_lock(&q->sysfs_lock); - ret = __elevator_change(q, name); - mutex_unlock(&q->sysfs_lock); - - return ret; -} -EXPORT_SYMBOL(elevator_change); - static inline bool elv_support_iosched(struct request_queue *q) { if (q->mq_ops && q->tag_set && (q->tag_set->flags & diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 3a216318ae73..d44840368ee7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -214,7 +214,6 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct request_queue *, struct elevator_queue *); -extern int elevator_change(struct request_queue *, const char *); extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, struct elevator_type *); From 0f6422a2c57c6afcf66ead903dc3fa6641184aa4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Apr 2017 16:29:26 -0600 Subject: [PATCH 02/28] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command() All callers can safely block. Kill the atomic/block argument, and remove the argument from all callers. Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 148 +++++++++--------------------- 1 file changed, 43 insertions(+), 105 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 02804cc79d82..d81d797ee65d 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -609,11 +609,6 @@ static void mtip_completion(struct mtip_port *port, complete(waiting); } -static void mtip_null_completion(struct mtip_port *port, - int tag, struct mtip_cmd *command, int status) -{ -} - static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, dma_addr_t buffer_dma, unsigned int sectors); static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, @@ -1117,7 +1112,6 @@ static int mtip_exec_internal_command(struct mtip_port *port, dma_addr_t buffer, int buf_len, u32 opts, - gfp_t atomic, unsigned long timeout) { struct mtip_cmd_sg *command_sg; @@ -1146,30 +1140,22 @@ static int mtip_exec_internal_command(struct mtip_port *port, clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); - if (atomic == GFP_KERNEL) { - if (fis->command != ATA_CMD_STANDBYNOW1) { - /* wait for io to complete if non atomic */ - if (mtip_quiesce_io(port, - MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) { - dev_warn(&dd->pdev->dev, - "Failed to quiesce IO\n"); - mtip_put_int_command(dd, int_cmd); - clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); - return -EBUSY; - } + if (fis->command != ATA_CMD_STANDBYNOW1) { + /* wait for io to complete if non atomic */ + if (mtip_quiesce_io(port, + MTIP_QUIESCE_IO_TIMEOUT_MS, GFP_KERNEL) < 0) { + dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); + mtip_put_int_command(dd, int_cmd); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); + wake_up_interruptible(&port->svc_wait); + return -EBUSY; } - - /* Set the completion function and data for the command. */ - int_cmd->comp_data = &wait; - int_cmd->comp_func = mtip_completion; - - } else { - /* Clear completion - we're going to poll */ - int_cmd->comp_data = NULL; - int_cmd->comp_func = mtip_null_completion; } + /* Set the completion function and data for the command. */ + int_cmd->comp_data = &wait; + int_cmd->comp_func = mtip_completion; + /* Copy the command to the command table */ memcpy(int_cmd->command, fis, fis_len*4); @@ -1198,81 +1184,41 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); - if (atomic == GFP_KERNEL) { - /* Wait for the command to complete or timeout. */ - if ((rv = wait_for_completion_interruptible_timeout( - &wait, - msecs_to_jiffies(timeout))) <= 0) { + /* Wait for the command to complete or timeout. */ + rv = wait_for_completion_interruptible_timeout(&wait, + msecs_to_jiffies(timeout)); + if (rv <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ + dev_err(&dd->pdev->dev, + "Internal command [%02X] was interrupted after %u ms\n", + fis->command, + jiffies_to_msecs(jiffies - start)); + rv = -EINTR; + goto exec_ic_exit; + } else if (rv == 0) /* timeout */ + dev_err(&dd->pdev->dev, + "Internal command did not complete [%02X] within timeout of %lu ms\n", + fis->command, timeout); + else + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", + fis->command, rv, timeout); - if (rv == -ERESTARTSYS) { /* interrupted */ - dev_err(&dd->pdev->dev, - "Internal command [%02X] was interrupted after %u ms\n", - fis->command, - jiffies_to_msecs(jiffies - start)); - rv = -EINTR; - goto exec_ic_exit; - } else if (rv == 0) /* timeout */ - dev_err(&dd->pdev->dev, - "Internal command did not complete [%02X] within timeout of %lu ms\n", - fis->command, timeout); - else - dev_err(&dd->pdev->dev, - "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", - fis->command, rv, timeout); - - if (mtip_check_surprise_removal(dd->pdev) || - test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag)) { - dev_err(&dd->pdev->dev, - "Internal command [%02X] wait returned due to SR\n", - fis->command); - rv = -ENXIO; - goto exec_ic_exit; - } - mtip_device_reset(dd); /* recover from timeout issue */ - rv = -EAGAIN; + if (mtip_check_surprise_removal(dd->pdev) || + test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &dd->dd_flag)) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned due to SR\n", + fis->command); + rv = -ENXIO; goto exec_ic_exit; } - } else { - u32 hba_stat, port_stat; - - /* Spin for checking if command still outstanding */ - timeout = jiffies + msecs_to_jiffies(timeout); - while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) - && time_before(jiffies, timeout)) { - if (mtip_check_surprise_removal(dd->pdev)) { - rv = -ENXIO; - goto exec_ic_exit; - } - if ((fis->command != ATA_CMD_STANDBYNOW1) && - test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag)) { - rv = -ENXIO; - goto exec_ic_exit; - } - port_stat = readl(port->mmio + PORT_IRQ_STAT); - if (!port_stat) - continue; - - if (port_stat & PORT_IRQ_ERR) { - dev_err(&dd->pdev->dev, - "Internal command [%02X] failed\n", - fis->command); - mtip_device_reset(dd); - rv = -EIO; - goto exec_ic_exit; - } else { - writel(port_stat, port->mmio + PORT_IRQ_STAT); - hba_stat = readl(dd->mmio + HOST_IRQ_STAT); - if (hba_stat) - writel(hba_stat, - dd->mmio + HOST_IRQ_STAT); - } - break; - } + mtip_device_reset(dd); /* recover from timeout issue */ + rv = -EAGAIN; + goto exec_ic_exit; } + rv = 0; if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { rv = -ENXIO; @@ -1391,7 +1337,6 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) port->identify_dma, sizeof(u16) * ATA_ID_WORDS, 0, - GFP_KERNEL, MTIP_INT_CMD_TIMEOUT_MS) < 0) { rv = -1; @@ -1477,7 +1422,6 @@ static int mtip_standby_immediate(struct mtip_port *port) 0, 0, 0, - GFP_ATOMIC, timeout); dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n", jiffies_to_msecs(jiffies - start)); @@ -1523,7 +1467,6 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, buffer_dma, sectors * ATA_SECT_SIZE, 0, - GFP_ATOMIC, MTIP_INT_CMD_TIMEOUT_MS); } @@ -1558,7 +1501,6 @@ static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer, buffer_dma, ATA_SECT_SIZE, 0, - GFP_ATOMIC, 15000); } @@ -1686,7 +1628,6 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, dma_addr, ATA_SECT_SIZE, 0, - GFP_KERNEL, MTIP_TRIM_TIMEOUT_MS) < 0) rv = -EIO; @@ -1850,7 +1791,6 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) 0, 0, 0, - GFP_KERNEL, to) < 0) { return -1; } @@ -1946,7 +1886,6 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, (xfer_sz ? dma_addr : 0), (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0), 0, - GFP_KERNEL, to) < 0) { rv = -EFAULT; @@ -2189,7 +2128,6 @@ static int exec_drive_taskfile(struct driver_data *dd, dma_buffer, transfer_size, 0, - GFP_KERNEL, timeout) < 0) { err = -EIO; goto abort; From 8afdd94c74e416de74a8ee61d79e4bf93466420b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Apr 2017 16:32:41 -0600 Subject: [PATCH 03/28] mtip32xx: kill atomic argument to mtip_quiesce_io() All callers now pass in GFP_KERNEL, get rid of the argument. Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index d81d797ee65d..36f3d34f2156 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1035,14 +1035,12 @@ static bool mtip_pause_ncq(struct mtip_port *port, * * @port Pointer to port data structure * @timeout Max duration to wait (ms) - * @atomic gfp_t flag to indicate blockable context or not * * return value * 0 Success * -EBUSY Commands still active */ -static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, - gfp_t atomic) +static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) { unsigned long to; unsigned int n; @@ -1053,18 +1051,12 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, to = jiffies + msecs_to_jiffies(timeout); do { if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) && - atomic == GFP_KERNEL) { + test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { msleep(20); continue; /* svc thd is actively issuing commands */ } - if (atomic == GFP_KERNEL) - msleep(100); - else { - cpu_relax(); - udelay(100); - } + msleep(100); if (mtip_check_surprise_removal(port->dd->pdev)) goto err_fault; @@ -1142,8 +1134,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ - if (mtip_quiesce_io(port, - MTIP_QUIESCE_IO_TIMEOUT_MS, GFP_KERNEL) < 0) { + if (mtip_quiesce_io(port, MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); mtip_put_int_command(dd, int_cmd); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -4106,8 +4097,7 @@ static int mtip_block_remove(struct driver_data *dd) * Explicitly wait here for IOs to quiesce, * as mtip_standby_drive usually won't wait for IOs. */ - if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS, - GFP_KERNEL)) + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS)) mtip_standby_drive(dd); } else From baed548a98397f57a71d61917177f7d42ab17881 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Apr 2017 07:54:06 -0600 Subject: [PATCH 04/28] mtip32xx: abstract out "are any commands active" helper This is a prep patch for backoff in ->queue_rq() for non-ncq commands. Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 36f3d34f2156..aee94f260725 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1030,6 +1030,22 @@ static bool mtip_pause_ncq(struct mtip_port *port, return false; } +static bool mtip_commands_active(struct mtip_port *port) +{ + unsigned int active; + unsigned int n; + + /* + * Ignore s_active bit 0 of array element 0. + * This bit will always be set + */ + active = readl(port->s_active[0]) & 0xFFFFFFFE; + for (n = 1; n < port->dd->slot_groups; n++) + active |= readl(port->s_active[n]); + + return active != 0; +} + /* * Wait for port to quiesce * @@ -1043,8 +1059,7 @@ static bool mtip_pause_ncq(struct mtip_port *port, static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) { unsigned long to; - unsigned int n; - unsigned int active = 1; + bool active = true; blk_mq_stop_hw_queues(port->dd->queue); @@ -1061,14 +1076,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) if (mtip_check_surprise_removal(port->dd->pdev)) goto err_fault; - /* - * Ignore s_active bit 0 of array element 0. - * This bit will always be set - */ - active = readl(port->s_active[0]) & 0xFFFFFFFE; - for (n = 1; n < port->dd->slot_groups; n++) - active |= readl(port->s_active[n]); - + active = mtip_commands_active(port); if (!active) break; } while (time_before(jiffies, to)); From 3f5e6a35774cd6bb4fd6b32edb4efd2a3f90e4dd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Apr 2017 10:45:08 -0600 Subject: [PATCH 05/28] mtip32xx: convert internal command issue to block IO path The driver special cases certain things for command issue, depending on whether it's an internal command or not. Make the internal commands use the regular infrastructure for issuing IO. Since this is an 8-group souped up AHCI variant, we have to deal with NCQ vs non-queueable commands. Do this from the queue_rq handler, by backing off unless the drive is idle. Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 103 +++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 30 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index aee94f260725..9749b099a914 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -195,7 +195,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) if (mtip_check_surprise_removal(dd->pdev)) return NULL; - rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED); + rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); if (IS_ERR(rq)) return NULL; @@ -1088,6 +1088,13 @@ err_fault: return -EFAULT; } +struct mtip_int_cmd { + int fis_len; + dma_addr_t buffer; + int buf_len; + u32 opts; +}; + /* * Execute an internal command and wait for the completion. * @@ -1114,10 +1121,16 @@ static int mtip_exec_internal_command(struct mtip_port *port, u32 opts, unsigned long timeout) { - struct mtip_cmd_sg *command_sg; DECLARE_COMPLETION_ONSTACK(wait); struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; + struct request *rq; + struct mtip_int_cmd icmd = { + .fis_len = fis_len, + .buffer = buffer, + .buf_len = buf_len, + .opts = opts + }; int rv = 0; unsigned long start; @@ -1132,6 +1145,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); return -EFAULT; } + rq = blk_mq_rq_from_pdu(int_cmd); + rq->end_io_data = &icmd; set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1158,35 +1173,16 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Copy the command to the command table */ memcpy(int_cmd->command, fis, fis_len*4); - /* Populate the SG list */ - int_cmd->command_header->opts = - __force_bit2int cpu_to_le32(opts | fis_len); - if (buf_len) { - command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ; - - command_sg->info = - __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF); - command_sg->dba = - __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF); - command_sg->dba_upper = - __force_bit2int cpu_to_le32((buffer >> 16) >> 16); - - int_cmd->command_header->opts |= - __force_bit2int cpu_to_le32((1 << 16)); - } - - /* Populate the command header */ - int_cmd->command_header->byte_count = 0; - start = jiffies; + rq->timeout = timeout; - /* Issue the command to the hardware */ - mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); + /* insert request and run queue */ + blk_execute_rq_nowait(rq->q, NULL, rq, true, NULL); - /* Wait for the command to complete or timeout. */ - rv = wait_for_completion_interruptible_timeout(&wait, - msecs_to_jiffies(timeout)); - if (rv <= 0) { + wait_for_completion(&wait); + rv = int_cmd->status; + + if (rv < 0) { if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, "Internal command [%02X] was interrupted after %u ms\n", @@ -1217,7 +1213,6 @@ static int mtip_exec_internal_command(struct mtip_port *port, goto exec_ic_exit; } - rv = 0; if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { rv = -ENXIO; @@ -3762,6 +3757,44 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, return false; } +static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + struct driver_data *dd = hctx->queue->queuedata; + struct mtip_int_cmd *icmd = rq->end_io_data; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct mtip_cmd_sg *command_sg; + + if (mtip_commands_active(dd->port)) + return BLK_MQ_RQ_QUEUE_BUSY; + + rq->end_io_data = NULL; + + /* Populate the SG list */ + cmd->command_header->opts = + __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len); + if (icmd->buf_len) { + command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ; + + command_sg->info = + __force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF); + command_sg->dba = + __force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF); + command_sg->dba_upper = + __force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16); + + cmd->command_header->opts |= + __force_bit2int cpu_to_le32((1 << 16)); + } + + /* Populate the command header */ + cmd->command_header->byte_count = 0; + + blk_mq_start_request(rq); + mtip_issue_non_ncq_command(dd->port, rq->tag); + return BLK_MQ_RQ_QUEUE_OK; +} + static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -3770,6 +3803,9 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, mtip_init_cmd_header(rq); + if (blk_rq_is_passthrough(rq)) + return mtip_issue_reserved_cmd(hctx, rq); + if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_MQ_RQ_QUEUE_BUSY; @@ -3825,8 +3861,14 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, { struct driver_data *dd = req->q->queuedata; - if (reserved) + if (reserved) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); + + cmd->status = -ETIME; + if (cmd->comp_func) + cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, cmd, -ETIME); goto exit_handler; + } if (test_bit(req->tag, dd->port->cmds_to_issue)) goto exit_handler; @@ -4063,6 +4105,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) } else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); + cmd->status = -ENODEV; if (cmd->comp_func) cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, cmd, -ENODEV); From 9f2779bff2f178496fb00b89797734ee245d2c93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Apr 2017 08:53:01 -0600 Subject: [PATCH 06/28] blk-mq-sched: remove hack that bypasses scheduler for reserved requests We have update the troublesome driver (mtip32xx) to deal with this appropriately. So kill the hack that bypassed scheduler allocation and insertion for reserved requests. Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 8b361e192e8a..e79e9f18d7c2 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -82,11 +82,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, if (likely(!data->hctx)) data->hctx = blk_mq_map_queue(q, data->ctx->cpu); - /* - * For a reserved tag, allocate a normal request since we might - * have driver dependencies on the value of the internal tag. - */ - if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) { + if (e) { data->flags |= BLK_MQ_REQ_INTERNAL; /* From a800ce8ba53da88571872cbccb0e2fff8e374752 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Apr 2017 16:46:02 -0600 Subject: [PATCH 07/28] Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED" This reverts commit 4981d04dd8f1ab19e2cce008da556d7f099b6e68. The driver has been converted to using the proper infrastructure for issuing internal commands. This means it's now safe to use with the scheduling infrastruture, so we can now revert the change that turned off scheduling for mtip32xx. Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 9749b099a914..9108be601a64 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3961,7 +3961,7 @@ static int mtip_block_initialize(struct driver_data *dd) dd->tags.reserved_tags = 1; dd->tags.cmd_size = sizeof(struct mtip_cmd); dd->tags.numa_node = dd->numa_node; - dd->tags.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED; + dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; dd->tags.driver_data = dd; dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; From d6296d39e90c9075bc2fc15f1e86dac44930d4b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 May 2017 10:19:08 -0600 Subject: [PATCH 08/28] blk-mq: update ->init_request and ->exit_request prototypes Remove the request_idx parameter, which can't be used safely now that we support I/O schedulers with blk-mq. Except for a superflous check in mtip32xx it was unused anyway. Also pass the tag_set instead of just the driver data - this allows drivers to avoid some code duplication in a follow on cleanup. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 18 +++++------------- drivers/block/loop.c | 5 ++--- drivers/block/mtip32xx/mtip32xx.c | 20 ++++++-------------- drivers/block/nbd.c | 7 +++---- drivers/block/rbd.c | 5 ++--- drivers/block/virtio_blk.c | 7 +++---- drivers/md/dm-rq.c | 7 +++---- drivers/mtd/ubi/block.c | 7 +++---- drivers/nvme/host/fc.c | 20 +++++++++----------- drivers/nvme/host/pci.c | 15 +++++++-------- drivers/nvme/host/rdma.c | 28 ++++++++++++++-------------- drivers/nvme/target/loop.c | 17 +++++++++-------- drivers/scsi/scsi_lib.c | 13 ++++++------- include/linux/blk-mq.h | 4 ++-- 14 files changed, 74 insertions(+), 99 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index bf90684a007a..b81e4a7cd7f2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1655,8 +1655,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, if (!rq) continue; - set->ops->exit_request(set->driver_data, rq, - hctx_idx, i); + set->ops->exit_request(set, rq, hctx_idx); tags->static_rqs[i] = NULL; } } @@ -1787,8 +1786,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, tags->static_rqs[i] = rq; if (set->ops->init_request) { - if (set->ops->init_request(set->driver_data, - rq, hctx_idx, i, + if (set->ops->init_request(set, rq, hctx_idx, node)) { tags->static_rqs[i] = NULL; goto fail; @@ -1849,14 +1847,10 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - unsigned flush_start_tag = set->queue_depth; - blk_mq_tag_idle(hctx); if (set->ops->exit_request) - set->ops->exit_request(set->driver_data, - hctx->fq->flush_rq, hctx_idx, - flush_start_tag + hctx_idx); + set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); blk_mq_sched_exit_hctx(q, hctx, hctx_idx); @@ -1889,7 +1883,6 @@ static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) { int node; - unsigned flush_start_tag = set->queue_depth; node = hctx->numa_node; if (node == NUMA_NO_NODE) @@ -1933,9 +1926,8 @@ static int blk_mq_init_hctx(struct request_queue *q, goto sched_exit_hctx; if (set->ops->init_request && - set->ops->init_request(set->driver_data, - hctx->fq->flush_rq, hctx_idx, - flush_start_tag + hctx_idx, node)) + set->ops->init_request(set, hctx->fq->flush_rq, hctx_idx, + node)) goto free_fq; if (hctx->flags & BLK_MQ_F_BLOCKING) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 994403efee19..28d932906f24 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1697,9 +1697,8 @@ static void loop_queue_work(struct kthread_work *work) loop_handle_cmd(cmd); } -static int loop_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 9108be601a64..96fe6500e941 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3818,10 +3818,10 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_ERROR; } -static void mtip_free_cmd(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx) +static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { - struct driver_data *dd = data; + struct driver_data *dd = set->driver_data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); if (!cmd->command) @@ -3831,20 +3831,12 @@ static void mtip_free_cmd(void *data, struct request *rq, cmd->command, cmd->command_dma); } -static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, - unsigned int request_idx, unsigned int numa_node) +static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct driver_data *dd = data; + struct driver_data *dd = set->driver_data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - /* - * For flush requests, request_idx starts at the end of the - * tag space. Since we don't support FLUSH/FUA, simply return - * 0 as there's nothing to be done. - */ - if (request_idx >= MTIP_MAX_COMMAND_SLOTS) - return 0; - cmd->command = dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, &cmd->command_dma, GFP_KERNEL); if (!cmd->command) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ac376b9b852d..6b98ec2a3824 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1396,12 +1396,11 @@ static void nbd_dbg_close(void) #endif -static int nbd_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->nbd = data; + cmd->nbd = set->driver_data; return 0; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 089ac4179919..3670e8dd03fe 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4307,9 +4307,8 @@ out: return ret; } -static int rbd_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int rbd_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct work_struct *work = blk_mq_rq_to_pdu(rq); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f94614257462..94173de1efaa 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -573,11 +573,10 @@ static const struct device_attribute dev_attr_cache_type_rw = __ATTR(cache_type, S_IRUGO|S_IWUSR, virtblk_cache_type_show, virtblk_cache_type_store); -static int virtblk_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct virtio_blk *vblk = data; + struct virtio_blk *vblk = set->driver_data; struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); #ifdef CONFIG_VIRTIO_BLK_SCSI diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index bff7e3bdb4ed..522d4fa8db64 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -719,11 +719,10 @@ int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t) return 0; } -static int dm_mq_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - return __dm_rq_init_rq(data, rq); + return __dm_rq_init_rq(set->driver_data, rq); } static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 51f2be8889b5..5497e65439df 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -334,10 +334,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx, } -static int ubiblock_init_request(void *data, struct request *req, - unsigned int hctx_idx, - unsigned int request_idx, - unsigned int numa_node) +static int ubiblock_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 4976db56e351..70e689bf1cad 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1172,12 +1172,12 @@ __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, } static void -nvme_fc_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - return __nvme_fc_exit_request(data, op); + return __nvme_fc_exit_request(set->driver_data, op); } static int @@ -1434,11 +1434,10 @@ out_on_error: } static int -nvme_fc_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_ctrl *ctrl = set->driver_data; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; @@ -1446,11 +1445,10 @@ nvme_fc_init_request(void *data, struct request *rq, } static int -nvme_fc_init_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_ctrl *ctrl = set->driver_data; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_queue *queue = &ctrl->queues[0]; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c8541c3dcd19..56a315bd4d96 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -356,11 +356,11 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i nvmeq->tags = NULL; } -static int nvme_admin_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_admin_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = dev->queues[0]; @@ -383,11 +383,10 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static int nvme_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 29cf88ac3f61..dd1c6deef82f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -315,16 +315,16 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl, DMA_TO_DEVICE); } -static void nvme_rdma_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx) { - return __nvme_rdma_exit_request(data, rq, hctx_idx + 1); + return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1); } -static void nvme_rdma_exit_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx) { - return __nvme_rdma_exit_request(data, rq, 0); + return __nvme_rdma_exit_request(set->driver_data, rq, 0); } static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl, @@ -358,18 +358,18 @@ out_free_qe: return -ENOMEM; } -static int nvme_rdma_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_rdma_init_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx, + unsigned int numa_node) { - return __nvme_rdma_init_request(data, rq, hctx_idx + 1); + return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1); } -static int nvme_rdma_init_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx, + unsigned int numa_node) { - return __nvme_rdma_init_request(data, rq, 0); + return __nvme_rdma_init_request(set->driver_data, rq, 0); } static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 304f1c87c160..feb497134aee 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -230,18 +230,19 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, return 0; } -static int nvme_loop_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_loop_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), hctx_idx + 1); + return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), + hctx_idx + 1); } -static int nvme_loop_init_admin_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), 0); + return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0); } static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1c3e87d6c48f..327b10206d63 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1999,11 +1999,10 @@ static enum blk_eh_timer_return scsi_timeout(struct request *req, return scsi_times_out(req); } -static int scsi_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int scsi_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct Scsi_Host *shost = data; + struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); cmd->sense_buffer = @@ -2014,10 +2013,10 @@ static int scsi_init_request(void *data, struct request *rq, return 0; } -static void scsi_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx) +static void scsi_exit_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { - struct Scsi_Host *shost = data; + struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); scsi_free_sense_buffer(shost, cmd->sense_buffer); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f3e5e1de1bdb..a104832e7ae5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -86,9 +86,9 @@ typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); -typedef int (init_request_fn)(void *, struct request *, unsigned int, +typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); -typedef void (exit_request_fn)(void *, struct request *, unsigned int, +typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); typedef int (reinit_request_fn)(void *, struct request *); From 7a148c2fcff83309748bfaafe121aa85b724624f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 2 May 2017 07:28:02 +0800 Subject: [PATCH 09/28] block: don't call blk_mq_quiesce_queue() after queue is frozen After queue is frozen, no request in this queue can be in use at all, so there can't be any .queue_rq() running on this queue. It isn't necessary to call blk_mq_quiesce_queue() any more, so remove it in both elevator_switch_mq() and blk_mq_update_nr_requests(). Cc: Bart Van Assche Signed-off-by: Ming Lei Fixed up the description a bit. Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 -- block/elevator.c | 3 --- 2 files changed, 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b81e4a7cd7f2..e339247a2570 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2609,7 +2609,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) return -EINVAL; blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); ret = 0; queue_for_each_hw_ctx(q, hctx, i) { @@ -2635,7 +2634,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) q->nr_requests = nr; blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); return ret; } diff --git a/block/elevator.c b/block/elevator.c index 80f485451096..ab726a5c0bf6 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -950,7 +950,6 @@ static int elevator_switch_mq(struct request_queue *q, int ret; blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); if (q->elevator) { if (q->elevator->registered) @@ -978,9 +977,7 @@ static int elevator_switch_mq(struct request_queue *q, out: blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); return ret; - } /* From 994ff079e8f6399e1f8cd43141da0f79ce7a179a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 May 2017 09:53:04 -0600 Subject: [PATCH 10/28] mtip32xx: cleanup internal tag assumptions We don't decode the internal tag to the proper group or tag indx. This works fine because we have hard wired it as 0 for now, but could break if we get rid of that. Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 96fe6500e941..3204623f746a 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -847,16 +847,15 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) struct mtip_port *port = dd->port; struct mtip_cmd *cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && - (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL))) { - if (cmd->comp_func) { - cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); - return; + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && cmd) { + int group = MTIP_TAG_INDEX(MTIP_TAG_INTERNAL); + int status = readl(port->cmd_issue[group]); + + if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL)))) { + if (cmd->comp_func) + cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); } } - - return; } /* @@ -1213,8 +1212,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, goto exec_ic_exit; } - if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) { + if (readl(port->cmd_issue[MTIP_TAG_INDEX(MTIP_TAG_INTERNAL)]) + & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL))) { rv = -ENXIO; if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { mtip_device_reset(dd); From 6f63503c8af569fcd60bb27dfe740d13be0030f6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 May 2017 09:56:00 -0600 Subject: [PATCH 11/28] mtip32xx: convert internal commands to regular block infrastructure Get rid of the private end_io handlers and data, and just use the regular block IO path for these requests. This removes a lot of redundant code. Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 198 +++++------------------------- drivers/block/mtip32xx/mtip32xx.h | 10 -- 2 files changed, 30 insertions(+), 178 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3204623f746a..3a779a4f5653 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -205,66 +205,12 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) return blk_mq_rq_to_pdu(rq); } -static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd) -{ - blk_put_request(blk_mq_rq_from_pdu(cmd)); -} - -/* - * Once we add support for one hctx per mtip group, this will change a bit - */ -static struct request *mtip_rq_from_tag(struct driver_data *dd, - unsigned int tag) -{ - struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; - - return blk_mq_tag_to_rq(hctx->tags, tag); -} - static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd, unsigned int tag) { - struct request *rq = mtip_rq_from_tag(dd, tag); + struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; - return blk_mq_rq_to_pdu(rq); -} - -/* - * IO completion function. - * - * This completion function is called by the driver ISR when a - * command that was issued by the kernel completes. It first calls the - * asynchronous completion function which normally calls back into the block - * layer passing the asynchronous callback data, then unmaps the - * scatter list associated with the completed command, and finally - * clears the allocated bit associated with the completed command. - * - * @port Pointer to the port data structure. - * @tag Tag of the command. - * @data Pointer to driver_data. - * @status Completion status. - * - * return value - * None - */ -static void mtip_async_complete(struct mtip_port *port, - int tag, struct mtip_cmd *cmd, int status) -{ - struct driver_data *dd = port->dd; - struct request *rq; - - if (unlikely(!dd) || unlikely(!port)) - return; - - if (unlikely(status == PORT_IRQ_TF_ERR)) { - dev_warn(&port->dd->pdev->dev, - "Command tag %d failed due to TFE\n", tag); - } - - rq = mtip_rq_from_tag(dd, tag); - - cmd->status = status; - blk_mq_complete_request(rq); + return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(hctx->tags, tag)); } /* @@ -581,38 +527,19 @@ static void print_tags(struct driver_data *dd, "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap); } -/* - * Internal command completion callback function. - * - * This function is normally called by the driver ISR when an internal - * command completed. This function signals the command completion by - * calling complete(). - * - * @port Pointer to the port data structure. - * @tag Tag of the command that has completed. - * @data Pointer to a completion structure. - * @status Completion status. - * - * return value - * None - */ -static void mtip_completion(struct mtip_port *port, - int tag, struct mtip_cmd *command, int status) -{ - struct completion *waiting = command->comp_data; - if (unlikely(status == PORT_IRQ_TF_ERR)) - dev_warn(&port->dd->pdev->dev, - "Internal command %d completed with TFE\n", tag); - - command->comp_func = NULL; - command->comp_data = NULL; - complete(waiting); -} - static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, dma_addr_t buffer_dma, unsigned int sectors); static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, struct smart_attr *attrib); + +static void mtip_complete_command(struct mtip_cmd *cmd, int status) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + + cmd->status = status; + blk_mq_complete_request(req); +} + /* * Handle an error. * @@ -641,11 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd) if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); - - if (cmd->comp_data && cmd->comp_func) { - cmd->comp_func(port, MTIP_TAG_INTERNAL, - cmd, PORT_IRQ_TF_ERR); - } + mtip_complete_command(cmd, -EIO); return; } @@ -672,19 +595,9 @@ static void mtip_handle_tfe(struct driver_data *dd) continue; cmd = mtip_cmd_from_tag(dd, tag); - if (likely(cmd->comp_func)) { - set_bit(tag, tagaccum); - cmd_cnt++; - cmd->comp_func(port, tag, cmd, 0); - } else { - dev_err(&port->dd->pdev->dev, - "Missing completion func for tag %d", - tag); - if (mtip_check_surprise_removal(dd->pdev)) { - /* don't proceed further */ - return; - } - } + mtip_complete_command(cmd, 0); + set_bit(tag, tagaccum); + cmd_cnt++; } } @@ -754,10 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd) tag, fail_reason != NULL ? fail_reason : "unknown"); - if (cmd->comp_func) { - cmd->comp_func(port, tag, - cmd, -ENODATA); - } + mtip_complete_command(cmd, -ENODATA); continue; } } @@ -780,12 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd) dev_warn(&port->dd->pdev->dev, "retiring tag %d\n", tag); - if (cmd->comp_func) - cmd->comp_func(port, tag, cmd, PORT_IRQ_TF_ERR); - else - dev_warn(&port->dd->pdev->dev, - "Bad completion for tag %d\n", - tag); + mtip_complete_command(cmd, -EIO); } } print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); @@ -818,18 +723,7 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, continue; command = mtip_cmd_from_tag(dd, tag); - if (likely(command->comp_func)) - command->comp_func(port, tag, command, 0); - else { - dev_dbg(&dd->pdev->dev, - "Null completion for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - return; - } - } + mtip_complete_command(command, 0); } completed >>= 1; } @@ -851,10 +745,8 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) int group = MTIP_TAG_INDEX(MTIP_TAG_INTERNAL); int status = readl(port->cmd_issue[group]); - if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL)))) { - if (cmd->comp_func) - cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); - } + if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL)))) + mtip_complete_command(cmd, 0); } } @@ -863,7 +755,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) */ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) { - if (unlikely(port_stat & PORT_IRQ_CONNECT)) { dev_warn(&dd->pdev->dev, "Clearing PxSERR.DIAG.x\n"); @@ -990,8 +881,7 @@ static irqreturn_t mtip_irq_handler(int irq, void *instance) static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) { - writel(1 << MTIP_TAG_BIT(tag), - port->cmd_issue[MTIP_TAG_INDEX(tag)]); + writel(1 << MTIP_TAG_BIT(tag), port->cmd_issue[MTIP_TAG_INDEX(tag)]); } static bool mtip_pause_ncq(struct mtip_port *port, @@ -1120,7 +1010,6 @@ static int mtip_exec_internal_command(struct mtip_port *port, u32 opts, unsigned long timeout) { - DECLARE_COMPLETION_ONSTACK(wait); struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; struct request *rq; @@ -1145,7 +1034,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, return -EFAULT; } rq = blk_mq_rq_from_pdu(int_cmd); - rq->end_io_data = &icmd; + rq->special = &icmd; set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1158,17 +1047,13 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* wait for io to complete if non atomic */ if (mtip_quiesce_io(port, MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); - mtip_put_int_command(dd, int_cmd); + blk_mq_free_request(rq); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); return -EBUSY; } } - /* Set the completion function and data for the command. */ - int_cmd->comp_data = &wait; - int_cmd->comp_func = mtip_completion; - /* Copy the command to the command table */ memcpy(int_cmd->command, fis, fis_len*4); @@ -1176,11 +1061,9 @@ static int mtip_exec_internal_command(struct mtip_port *port, rq->timeout = timeout; /* insert request and run queue */ - blk_execute_rq_nowait(rq->q, NULL, rq, true, NULL); + blk_execute_rq(rq->q, NULL, rq, true); - wait_for_completion(&wait); rv = int_cmd->status; - if (rv < 0) { if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, @@ -1222,7 +1105,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, } exec_ic_exit: /* Clear the allocated and active bits for the internal command. */ - mtip_put_int_command(dd, int_cmd); + blk_mq_free_request(rq); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); if (rv >= 0 && mtip_pause_ncq(port, fis)) { /* NCQ paused */ @@ -2377,12 +2260,6 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, (nents << 16) | 5 | AHCI_CMD_PREFETCH); command->command_header->byte_count = 0; - /* - * Set the completion function and data for the command - * within this layer. - */ - command->comp_data = dd; - command->comp_func = mtip_async_complete; command->direction = dma_dir; /* @@ -3760,15 +3637,13 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, struct request *rq) { struct driver_data *dd = hctx->queue->queuedata; - struct mtip_int_cmd *icmd = rq->end_io_data; + struct mtip_int_cmd *icmd = rq->special; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); struct mtip_cmd_sg *command_sg; if (mtip_commands_active(dd->port)) return BLK_MQ_RQ_QUEUE_BUSY; - rq->end_io_data = NULL; - /* Populate the SG list */ cmd->command_header->opts = __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len); @@ -3856,9 +3731,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); cmd->status = -ETIME; - if (cmd->comp_func) - cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, cmd, -ETIME); - goto exit_handler; + return BLK_EH_HANDLED; } if (test_bit(req->tag, dd->port->cmds_to_issue)) @@ -4086,21 +3959,10 @@ protocol_init_error: static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) { - struct driver_data *dd = (struct driver_data *)data; - struct mtip_cmd *cmd; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - if (likely(!reserv)) { - cmd = blk_mq_rq_to_pdu(rq); - cmd->status = -ENODEV; - blk_mq_complete_request(rq); - } else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { - - cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); - cmd->status = -ENODEV; - if (cmd->comp_func) - cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, - cmd, -ENODEV); - } + cmd->status = -ENODEV; + blk_mq_complete_request(rq); } /* diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 57b41528a824..37b8e3e0bb78 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -333,16 +333,6 @@ struct mtip_cmd { dma_addr_t command_dma; /* corresponding physical address */ - void *comp_data; /* data passed to completion function comp_func() */ - /* - * Completion function called by the ISR upon completion of - * a command. - */ - void (*comp_func)(struct mtip_port *port, - int tag, - struct mtip_cmd *cmd, - int status); - int scatter_ents; /* Number of scatter list entries used */ int unaligned; /* command is unaligned on 4k boundary */ From 2719aa217e0d025dbfce74ac777815776ccec072 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 May 2017 11:08:14 -0600 Subject: [PATCH 12/28] blk-mq: don't use sync workqueue flushing from drivers A previous commit introduced the sync flush, which we need from internal callers like blk_mq_quiesce_queue(). However, we also call the stop helpers from drivers, particularly from ->queue_rq() when we have to stop processing for a bit. We can't block from those locations, and we don't have to guarantee that we're fully flushed. Fixes: 9f993737906b ("blk-mq: unify hctx delayed_run_work and run_work") Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e339247a2570..dec70ca0aafd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -41,6 +41,7 @@ static LIST_HEAD(all_q_list); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); +static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync); static int blk_mq_poll_stats_bkt(const struct request *rq) { @@ -166,7 +167,7 @@ void blk_mq_quiesce_queue(struct request_queue *q) unsigned int i; bool rcu = false; - blk_mq_stop_hw_queues(q); + __blk_mq_stop_hw_queues(q, true); queue_for_each_hw_ctx(q, hctx, i) { if (hctx->flags & BLK_MQ_F_BLOCKING) @@ -1218,20 +1219,34 @@ bool blk_mq_queue_stopped(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_queue_stopped); +static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync) +{ + if (sync) + cancel_delayed_work_sync(&hctx->run_work); + else + cancel_delayed_work(&hctx->run_work); + + set_bit(BLK_MQ_S_STOPPED, &hctx->state); +} + void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) { - cancel_delayed_work_sync(&hctx->run_work); - set_bit(BLK_MQ_S_STOPPED, &hctx->state); + __blk_mq_stop_hw_queue(hctx, false); } EXPORT_SYMBOL(blk_mq_stop_hw_queue); -void blk_mq_stop_hw_queues(struct request_queue *q) +void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) { struct blk_mq_hw_ctx *hctx; int i; queue_for_each_hw_ctx(q, hctx, i) - blk_mq_stop_hw_queue(hctx); + __blk_mq_stop_hw_queue(hctx, sync); +} + +void blk_mq_stop_hw_queues(struct request_queue *q) +{ + __blk_mq_stop_hw_queues(q, false); } EXPORT_SYMBOL(blk_mq_stop_hw_queues); From 2e13f33a2464fc3aa7783022a90309cabeca8935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Wed, 3 May 2017 11:19:04 +0200 Subject: [PATCH 13/28] lightnvm: create cmd before allocating request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create nvme command before allocating a request using nvme_alloc_request, which uses the command direction. Up until now, the command has been zeroized, so all commands have been allocated as a read operation. Signed-off-by: Javier González Reviewed-by: Matias Bjørling Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index e4e4e60b1224..8c4adac6fafc 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -503,6 +503,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (!cmd) return -ENOMEM; + nvme_nvm_rqtocmd(rq, rqd, ns, cmd); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); if (IS_ERR(rq)) { kfree(cmd); @@ -517,8 +519,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) rq->__data_len = 0; } - nvme_nvm_rqtocmd(rq, rqd, ns, cmd); - rq->end_io_data = rqd; blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); From 507f7d68fe5c24973dcd6e48f011bdfbd2197f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Wed, 3 May 2017 11:19:05 +0200 Subject: [PATCH 14/28] lightnvm: fix bad back free on error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free memory correctly when an allocation fails on a loop and we free backwards previously successful allocations. Signed-off-by: Javier González Reviewed-by: Matias Bjørling Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 54a06c3a2b8c..6a4aa608ad95 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -74,7 +74,7 @@ static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end) return 0; err: - while (--i > lun_begin) + while (--i >= lun_begin) clear_bit(i, dev->lun_map); return -EBUSY; @@ -211,7 +211,7 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, return tgt_dev; err_ch: - while (--i > 0) + while (--i >= 0) kfree(dev_map->chnls[i].lun_offs); kfree(luns); err_luns: From eabe06595d62cfa9278e2cd012df614bc68a7042 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 May 2017 15:05:26 +0200 Subject: [PATCH 15/28] block/mq: Cure cpu hotplug lock inversion By poking at /debug/sched_features I triggered the following splat: [] ====================================================== [] WARNING: possible circular locking dependency detected [] 4.11.0-00873-g964c8b7-dirty #694 Not tainted [] ------------------------------------------------------ [] bash/2109 is trying to acquire lock: [] (cpu_hotplug_lock.rw_sem){++++++}, at: [] static_key_slow_dec+0x1b/0x50 [] [] but task is already holding lock: [] (&sb->s_type->i_mutex_key#4){+++++.}, at: [] sched_feat_write+0x86/0x170 [] [] which lock already depends on the new lock. [] [] [] the existing dependency chain (in reverse order) is: [] [] -> #2 (&sb->s_type->i_mutex_key#4){+++++.}: [] lock_acquire+0x100/0x210 [] down_write+0x28/0x60 [] start_creating+0x5e/0xf0 [] debugfs_create_dir+0x13/0x110 [] blk_mq_debugfs_register+0x21/0x70 [] blk_mq_register_dev+0x64/0xd0 [] blk_register_queue+0x6a/0x170 [] device_add_disk+0x22d/0x440 [] loop_add+0x1f3/0x280 [] loop_init+0x104/0x142 [] do_one_initcall+0x43/0x180 [] kernel_init_freeable+0x1de/0x266 [] kernel_init+0xe/0x100 [] ret_from_fork+0x31/0x40 [] [] -> #1 (all_q_mutex){+.+.+.}: [] lock_acquire+0x100/0x210 [] __mutex_lock+0x6c/0x960 [] mutex_lock_nested+0x1b/0x20 [] blk_mq_init_allocated_queue+0x37c/0x4e0 [] blk_mq_init_queue+0x3a/0x60 [] loop_add+0xe5/0x280 [] loop_init+0x104/0x142 [] do_one_initcall+0x43/0x180 [] kernel_init_freeable+0x1de/0x266 [] kernel_init+0xe/0x100 [] ret_from_fork+0x31/0x40 [] *** DEADLOCK *** [] [] 3 locks held by bash/2109: [] #0: (sb_writers#11){.+.+.+}, at: [] vfs_write+0x17d/0x1a0 [] #1: (debugfs_srcu){......}, at: [] full_proxy_write+0x5d/0xd0 [] #2: (&sb->s_type->i_mutex_key#4){+++++.}, at: [] sched_feat_write+0x86/0x170 [] [] stack backtrace: [] CPU: 9 PID: 2109 Comm: bash Not tainted 4.11.0-00873-g964c8b7-dirty #694 [] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013 [] Call Trace: [] lock_acquire+0x100/0x210 [] get_online_cpus+0x2a/0x90 [] static_key_slow_dec+0x1b/0x50 [] static_key_disable+0x20/0x30 [] sched_feat_write+0x131/0x170 [] full_proxy_write+0x97/0xd0 [] __vfs_write+0x28/0x120 [] vfs_write+0xb5/0x1a0 [] SyS_write+0x49/0xa0 [] entry_SYSCALL_64_fastpath+0x23/0xc2 This is because of the cpu hotplug lock rework. Break the chain at #1 by reversing the lock acquisition order. This way i_mutex_key#4 no longer depends on cpu_hotplug_lock and things are good. Cc: Jens Axboe Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index dec70ca0aafd..03a747105682 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2336,15 +2336,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, blk_mq_init_cpu_queues(q, set->nr_hw_queues); - get_online_cpus(); mutex_lock(&all_q_mutex); + get_online_cpus(); list_add_tail(&q->all_q_node, &all_q_list); blk_mq_add_queue_tag_set(set, q); blk_mq_map_swqueue(q, cpu_online_mask); - mutex_unlock(&all_q_mutex); put_online_cpus(); + mutex_unlock(&all_q_mutex); if (!(set->flags & BLK_MQ_F_NO_SCHED)) { int ret; From 9052c7cf492d8c52c556f9536c4a9483fbfe4d73 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 May 2017 09:02:42 +0200 Subject: [PATCH 16/28] nfs: Fix bdi handling for cloned superblocks In commit 0d3b12584972 "nfs: Convert to separately allocated bdi" I have wrongly cloned bdi reference in nfs_clone_super(). Further inspection has shown that originally the code was actually allocating a new bdi (in ->clone_server callback) which was later registered in nfs_fs_mount_common() and used for sb->s_bdi in nfs_initialise_sb(). This could later result in bdi for the original superblock not getting unregistered when that superblock got shutdown (as the cloned sb still held bdi reference) and later when a new superblock was created under the same anonymous device number, a clash in sysfs has happened on bdi registration: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 10284 at /linux-next/fs/sysfs/dir.c:31 sysfs_warn_dup+0x64/0x74 sysfs: cannot create duplicate filename '/devices/virtual/bdi/0:32' Modules linked in: axp20x_usb_power gpio_axp209 nvmem_sunxi_sid sun4i_dma sun4i_ss virt_dma CPU: 1 PID: 10284 Comm: mount.nfs Not tainted 4.11.0-rc4+ #14 Hardware name: Allwinner sun7i (A20) Family [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x78/0x8c) [] (dump_stack) from [] (__warn+0xe8/0x100) [] (__warn) from [] (warn_slowpath_fmt+0x38/0x48) [] (warn_slowpath_fmt) from [] (sysfs_warn_dup+0x64/0x74) [] (sysfs_warn_dup) from [] (sysfs_create_dir_ns+0x84/0x94) [] (sysfs_create_dir_ns) from [] (kobject_add_internal+0x9c/0x2ec) [] (kobject_add_internal) from [] (kobject_add+0x48/0x98) [] (kobject_add) from [] (device_add+0xe4/0x5a0) [] (device_add) from [] (device_create_groups_vargs+0xac/0xbc) [] (device_create_groups_vargs) from [] (device_create_vargs+0x20/0x28) [] (device_create_vargs) from [] (bdi_register_va+0x44/0xfc) [] (bdi_register_va) from [] (super_setup_bdi_name+0x48/0xa4) [] (super_setup_bdi_name) from [] (nfs_fill_super+0x1a4/0x204) [] (nfs_fill_super) from [] (nfs_fs_mount_common+0x140/0x1e8) [] (nfs_fs_mount_common) from [] (nfs4_remote_mount+0x50/0x58) [] (nfs4_remote_mount) from [] (mount_fs+0x14/0xa4) [] (mount_fs) from [] (vfs_kern_mount+0x54/0x128) [] (vfs_kern_mount) from [] (nfs_do_root_mount+0x80/0xa0) [] (nfs_do_root_mount) from [] (nfs4_try_mount+0x28/0x3c) [] (nfs4_try_mount) from [] (nfs_fs_mount+0x2cc/0x8c4) [] (nfs_fs_mount) from [] (mount_fs+0x14/0xa4) [] (mount_fs) from [] (vfs_kern_mount+0x54/0x128) [] (vfs_kern_mount) from [] (do_mount+0x158/0xc7c) [] (do_mount) from [] (SyS_mount+0x8c/0xb4) [] (SyS_mount) from [] (ret_fast_syscall+0x0/0x3c) Fix the problem by always creating new bdi for a superblock as we used to do. Reported-and-tested-by: Corentin Labbe Fixes: 0d3b12584972ce5781179ad3f15cca3cdb5cae05 Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/nfs/internal.h | 6 +++--- fs/nfs/super.c | 28 ++++++++++------------------ 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9dc65d7ae754..7b38fedb7e03 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -139,7 +139,7 @@ struct nfs_mount_request { }; struct nfs_mount_info { - int (*fill_super)(struct super_block *, struct nfs_mount_info *); + void (*fill_super)(struct super_block *, struct nfs_mount_info *); int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; struct nfs_clone_mount *cloned; @@ -407,7 +407,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void * struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); void nfs_kill_super(struct super_block *); -int nfs_fill_super(struct super_block *, struct nfs_mount_info *); +void nfs_fill_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; @@ -458,7 +458,7 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ -int nfs_clone_super(struct super_block *, struct nfs_mount_info *); +void nfs_clone_super(struct super_block *, struct nfs_mount_info *); void nfs_umount_begin(struct super_block *); int nfs_statfs(struct dentry *, struct kstatfs *); int nfs_show_options(struct seq_file *, struct dentry *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dc69314d455e..2f3822a4a7d5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2321,11 +2321,10 @@ inline void nfs_initialise_sb(struct super_block *sb) /* * Finish setting up an NFS2/3 superblock */ -int nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) +void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) { struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); - int ret; sb->s_blocksize_bits = 0; sb->s_blocksize = 0; @@ -2343,21 +2342,13 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) } nfs_initialise_sb(sb); - - ret = super_setup_bdi_name(sb, "%u:%u", MAJOR(server->s_dev), - MINOR(server->s_dev)); - if (ret) - return ret; - sb->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD; - return 0; - } EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock */ -int nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) +void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); @@ -2377,10 +2368,6 @@ int nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) } nfs_initialise_sb(sb); - - sb->s_bdi = bdi_get(old_sb->s_bdi); - - return 0; } static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) @@ -2600,14 +2587,19 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, nfs_free_server(server); server = NULL; } else { + error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev), + MINOR(server->s_dev)); + if (error) { + mntroot = ERR_PTR(error); + goto error_splat_super; + } + s->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD; server->super = s; } if (!s->s_root) { /* initial superblock/root creation */ - error = mount_info->fill_super(s, mount_info); - if (error) - goto error_splat_super; + mount_info->fill_super(s, mount_info); nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); } From bec03d6b92264bb2a06cc3d6f2da5815f0585107 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:23 -0700 Subject: [PATCH 17/28] blk-mq-debugfs: separate flags with | This reads more naturally than spaces. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index bcd2a7d4a3a5..1ff3d726fb03 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -53,7 +53,7 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, if (!(flags & BIT(i))) continue; if (sep) - seq_puts(m, " "); + seq_puts(m, "|"); sep = true; if (i < flag_name_count && flag_name[i]) seq_puts(m, flag_name[i]); From 1a435111f8eb30b370e3891caebb1d1ca61f41ec Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:24 -0700 Subject: [PATCH 18/28] blk-mq-debugfs: clean up flag definitions Make sure the spelled out flag names match the definition. This also adds a missing hctx state, BLK_MQ_S_START_ON_RUN, and a missing cmd_flag, __REQ_NOUNMAP. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 189 ++++++++++++++++++++++------------------- 1 file changed, 102 insertions(+), 87 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 1ff3d726fb03..f58a116d6cca 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -63,37 +63,39 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, return 0; } +#define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name static const char *const blk_queue_flag_name[] = { - [QUEUE_FLAG_QUEUED] = "QUEUED", - [QUEUE_FLAG_STOPPED] = "STOPPED", - [QUEUE_FLAG_SYNCFULL] = "SYNCFULL", - [QUEUE_FLAG_ASYNCFULL] = "ASYNCFULL", - [QUEUE_FLAG_DYING] = "DYING", - [QUEUE_FLAG_BYPASS] = "BYPASS", - [QUEUE_FLAG_BIDI] = "BIDI", - [QUEUE_FLAG_NOMERGES] = "NOMERGES", - [QUEUE_FLAG_SAME_COMP] = "SAME_COMP", - [QUEUE_FLAG_FAIL_IO] = "FAIL_IO", - [QUEUE_FLAG_STACKABLE] = "STACKABLE", - [QUEUE_FLAG_NONROT] = "NONROT", - [QUEUE_FLAG_IO_STAT] = "IO_STAT", - [QUEUE_FLAG_DISCARD] = "DISCARD", - [QUEUE_FLAG_NOXMERGES] = "NOXMERGES", - [QUEUE_FLAG_ADD_RANDOM] = "ADD_RANDOM", - [QUEUE_FLAG_SECERASE] = "SECERASE", - [QUEUE_FLAG_SAME_FORCE] = "SAME_FORCE", - [QUEUE_FLAG_DEAD] = "DEAD", - [QUEUE_FLAG_INIT_DONE] = "INIT_DONE", - [QUEUE_FLAG_NO_SG_MERGE] = "NO_SG_MERGE", - [QUEUE_FLAG_POLL] = "POLL", - [QUEUE_FLAG_WC] = "WC", - [QUEUE_FLAG_FUA] = "FUA", - [QUEUE_FLAG_FLUSH_NQ] = "FLUSH_NQ", - [QUEUE_FLAG_DAX] = "DAX", - [QUEUE_FLAG_STATS] = "STATS", - [QUEUE_FLAG_POLL_STATS] = "POLL_STATS", - [QUEUE_FLAG_REGISTERED] = "REGISTERED", + QUEUE_FLAG_NAME(QUEUED), + QUEUE_FLAG_NAME(STOPPED), + QUEUE_FLAG_NAME(SYNCFULL), + QUEUE_FLAG_NAME(ASYNCFULL), + QUEUE_FLAG_NAME(DYING), + QUEUE_FLAG_NAME(BYPASS), + QUEUE_FLAG_NAME(BIDI), + QUEUE_FLAG_NAME(NOMERGES), + QUEUE_FLAG_NAME(SAME_COMP), + QUEUE_FLAG_NAME(FAIL_IO), + QUEUE_FLAG_NAME(STACKABLE), + QUEUE_FLAG_NAME(NONROT), + QUEUE_FLAG_NAME(IO_STAT), + QUEUE_FLAG_NAME(DISCARD), + QUEUE_FLAG_NAME(NOXMERGES), + QUEUE_FLAG_NAME(ADD_RANDOM), + QUEUE_FLAG_NAME(SECERASE), + QUEUE_FLAG_NAME(SAME_FORCE), + QUEUE_FLAG_NAME(DEAD), + QUEUE_FLAG_NAME(INIT_DONE), + QUEUE_FLAG_NAME(NO_SG_MERGE), + QUEUE_FLAG_NAME(POLL), + QUEUE_FLAG_NAME(WC), + QUEUE_FLAG_NAME(FUA), + QUEUE_FLAG_NAME(FLUSH_NQ), + QUEUE_FLAG_NAME(DAX), + QUEUE_FLAG_NAME(STATS), + QUEUE_FLAG_NAME(POLL_STATS), + QUEUE_FLAG_NAME(REGISTERED), }; +#undef QUEUE_FLAG_NAME static int blk_queue_flags_show(struct seq_file *m, void *v) { @@ -180,13 +182,16 @@ static const struct file_operations queue_poll_stat_fops = { .release = single_release, }; +#define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name static const char *const hctx_state_name[] = { - [BLK_MQ_S_STOPPED] = "STOPPED", - [BLK_MQ_S_TAG_ACTIVE] = "TAG_ACTIVE", - [BLK_MQ_S_SCHED_RESTART] = "SCHED_RESTART", - [BLK_MQ_S_TAG_WAITING] = "TAG_WAITING", - + HCTX_STATE_NAME(STOPPED), + HCTX_STATE_NAME(TAG_ACTIVE), + HCTX_STATE_NAME(SCHED_RESTART), + HCTX_STATE_NAME(TAG_WAITING), + HCTX_STATE_NAME(START_ON_RUN), }; +#undef HCTX_STATE_NAME + static int hctx_state_show(struct seq_file *m, void *v) { struct blk_mq_hw_ctx *hctx = m->private; @@ -209,18 +214,22 @@ static const struct file_operations hctx_state_fops = { .release = single_release, }; +#define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name static const char *const alloc_policy_name[] = { - [BLK_TAG_ALLOC_FIFO] = "fifo", - [BLK_TAG_ALLOC_RR] = "rr", + BLK_TAG_ALLOC_NAME(FIFO), + BLK_TAG_ALLOC_NAME(RR), }; +#undef BLK_TAG_ALLOC_NAME +#define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { - [ilog2(BLK_MQ_F_SHOULD_MERGE)] = "SHOULD_MERGE", - [ilog2(BLK_MQ_F_TAG_SHARED)] = "TAG_SHARED", - [ilog2(BLK_MQ_F_SG_MERGE)] = "SG_MERGE", - [ilog2(BLK_MQ_F_BLOCKING)] = "BLOCKING", - [ilog2(BLK_MQ_F_NO_SCHED)] = "NO_SCHED", + HCTX_FLAG_NAME(SHOULD_MERGE), + HCTX_FLAG_NAME(TAG_SHARED), + HCTX_FLAG_NAME(SG_MERGE), + HCTX_FLAG_NAME(BLOCKING), + HCTX_FLAG_NAME(NO_SCHED), }; +#undef HCTX_FLAG_NAME static int hctx_flags_show(struct seq_file *m, void *v) { @@ -253,60 +262,66 @@ static const struct file_operations hctx_flags_fops = { .release = single_release, }; +#define REQ_OP_NAME(name) [REQ_OP_##name] = #name static const char *const op_name[] = { - [REQ_OP_READ] = "READ", - [REQ_OP_WRITE] = "WRITE", - [REQ_OP_FLUSH] = "FLUSH", - [REQ_OP_DISCARD] = "DISCARD", - [REQ_OP_ZONE_REPORT] = "ZONE_REPORT", - [REQ_OP_SECURE_ERASE] = "SECURE_ERASE", - [REQ_OP_ZONE_RESET] = "ZONE_RESET", - [REQ_OP_WRITE_SAME] = "WRITE_SAME", - [REQ_OP_WRITE_ZEROES] = "WRITE_ZEROES", - [REQ_OP_SCSI_IN] = "SCSI_IN", - [REQ_OP_SCSI_OUT] = "SCSI_OUT", - [REQ_OP_DRV_IN] = "DRV_IN", - [REQ_OP_DRV_OUT] = "DRV_OUT", + REQ_OP_NAME(READ), + REQ_OP_NAME(WRITE), + REQ_OP_NAME(FLUSH), + REQ_OP_NAME(DISCARD), + REQ_OP_NAME(ZONE_REPORT), + REQ_OP_NAME(SECURE_ERASE), + REQ_OP_NAME(ZONE_RESET), + REQ_OP_NAME(WRITE_SAME), + REQ_OP_NAME(WRITE_ZEROES), + REQ_OP_NAME(SCSI_IN), + REQ_OP_NAME(SCSI_OUT), + REQ_OP_NAME(DRV_IN), + REQ_OP_NAME(DRV_OUT), }; +#undef REQ_OP_NAME +#define CMD_FLAG_NAME(name) [__REQ_##name] = #name static const char *const cmd_flag_name[] = { - [__REQ_FAILFAST_DEV] = "FAILFAST_DEV", - [__REQ_FAILFAST_TRANSPORT] = "FAILFAST_TRANSPORT", - [__REQ_FAILFAST_DRIVER] = "FAILFAST_DRIVER", - [__REQ_SYNC] = "SYNC", - [__REQ_META] = "META", - [__REQ_PRIO] = "PRIO", - [__REQ_NOMERGE] = "NOMERGE", - [__REQ_IDLE] = "IDLE", - [__REQ_INTEGRITY] = "INTEGRITY", - [__REQ_FUA] = "FUA", - [__REQ_PREFLUSH] = "PREFLUSH", - [__REQ_RAHEAD] = "RAHEAD", - [__REQ_BACKGROUND] = "BACKGROUND", - [__REQ_NR_BITS] = "NR_BITS", + CMD_FLAG_NAME(FAILFAST_DEV), + CMD_FLAG_NAME(FAILFAST_TRANSPORT), + CMD_FLAG_NAME(FAILFAST_DRIVER), + CMD_FLAG_NAME(SYNC), + CMD_FLAG_NAME(META), + CMD_FLAG_NAME(PRIO), + CMD_FLAG_NAME(NOMERGE), + CMD_FLAG_NAME(IDLE), + CMD_FLAG_NAME(INTEGRITY), + CMD_FLAG_NAME(FUA), + CMD_FLAG_NAME(PREFLUSH), + CMD_FLAG_NAME(RAHEAD), + CMD_FLAG_NAME(BACKGROUND), + CMD_FLAG_NAME(NOUNMAP), }; +#undef CMD_FLAG_NAME +#define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name static const char *const rqf_name[] = { - [ilog2((__force u32)RQF_SORTED)] = "SORTED", - [ilog2((__force u32)RQF_STARTED)] = "STARTED", - [ilog2((__force u32)RQF_QUEUED)] = "QUEUED", - [ilog2((__force u32)RQF_SOFTBARRIER)] = "SOFTBARRIER", - [ilog2((__force u32)RQF_FLUSH_SEQ)] = "FLUSH_SEQ", - [ilog2((__force u32)RQF_MIXED_MERGE)] = "MIXED_MERGE", - [ilog2((__force u32)RQF_MQ_INFLIGHT)] = "MQ_INFLIGHT", - [ilog2((__force u32)RQF_DONTPREP)] = "DONTPREP", - [ilog2((__force u32)RQF_PREEMPT)] = "PREEMPT", - [ilog2((__force u32)RQF_COPY_USER)] = "COPY_USER", - [ilog2((__force u32)RQF_FAILED)] = "FAILED", - [ilog2((__force u32)RQF_QUIET)] = "QUIET", - [ilog2((__force u32)RQF_ELVPRIV)] = "ELVPRIV", - [ilog2((__force u32)RQF_IO_STAT)] = "IO_STAT", - [ilog2((__force u32)RQF_ALLOCED)] = "ALLOCED", - [ilog2((__force u32)RQF_PM)] = "PM", - [ilog2((__force u32)RQF_HASHED)] = "HASHED", - [ilog2((__force u32)RQF_STATS)] = "STATS", - [ilog2((__force u32)RQF_SPECIAL_PAYLOAD)] = "SPECIAL_PAYLOAD", + RQF_NAME(SORTED), + RQF_NAME(STARTED), + RQF_NAME(QUEUED), + RQF_NAME(SOFTBARRIER), + RQF_NAME(FLUSH_SEQ), + RQF_NAME(MIXED_MERGE), + RQF_NAME(MQ_INFLIGHT), + RQF_NAME(DONTPREP), + RQF_NAME(PREEMPT), + RQF_NAME(COPY_USER), + RQF_NAME(FAILED), + RQF_NAME(QUIET), + RQF_NAME(ELVPRIV), + RQF_NAME(IO_STAT), + RQF_NAME(ALLOCED), + RQF_NAME(PM), + RQF_NAME(HASHED), + RQF_NAME(STATS), + RQF_NAME(SPECIAL_PAYLOAD), }; +#undef RQF_NAME static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) { From c7e4145ae11b45931f117aa64c26be6cf58302df Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:25 -0700 Subject: [PATCH 19/28] blk-mq-debugfs: error on long write to queue "state" file blk_queue_flags_store() currently truncates and returns a short write if the operation being written is too long. This can give us weird results, like here: $ echo "run bar" echo: write error: invalid argument $ dmesg [ 1103.075435] blk_queue_flags_store: unsupported operation bar. Use either 'run' or 'start' Instead, return an error if the user does this. While we're here, make the argument names consistent with everywhere else in this file. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f58a116d6cca..2a19237455d4 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -107,14 +107,18 @@ static int blk_queue_flags_show(struct seq_file *m, void *v) return 0; } -static ssize_t blk_queue_flags_store(struct file *file, const char __user *ubuf, - size_t len, loff_t *offp) +static ssize_t blk_queue_flags_store(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { struct request_queue *q = file_inode(file)->i_private; char op[16] = { }, *s; - len = min(len, sizeof(op) - 1); - if (copy_from_user(op, ubuf, len)) + if (count >= sizeof(op)) { + pr_err("%s: operation too long\n", __func__); + goto inval; + } + + if (copy_from_user(op, buf, count)) return -EFAULT; s = op; strsep(&s, " \t\n"); /* strip trailing whitespace */ @@ -123,11 +127,12 @@ static ssize_t blk_queue_flags_store(struct file *file, const char __user *ubuf, } else if (strcmp(op, "start") == 0) { blk_mq_start_stopped_hw_queues(q, true); } else { - pr_err("%s: unsupported operation %s. Use either 'run' or 'start'\n", - __func__, op); + pr_err("%s: unsupported operation '%s'\n", __func__, op); +inval: + pr_err("%s: use either 'run' or 'start'\n", __func__); return -EINVAL; } - return len; + return count; } static int blk_queue_flags_open(struct inode *inode, struct file *file) From 71b90511cb170dd3e13b640cc1a453b2dd92e023 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:26 -0700 Subject: [PATCH 20/28] blk-mq-debugfs: don't open code strstrip() Slightly more readable, plus we also strip leading spaces. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 2a19237455d4..109ae9f3d400 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -111,17 +111,16 @@ static ssize_t blk_queue_flags_store(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct request_queue *q = file_inode(file)->i_private; - char op[16] = { }, *s; + char opbuf[16] = { }, *op; - if (count >= sizeof(op)) { + if (count >= sizeof(opbuf)) { pr_err("%s: operation too long\n", __func__); goto inval; } - if (copy_from_user(op, buf, count)) + if (copy_from_user(opbuf, buf, count)) return -EFAULT; - s = op; - strsep(&s, " \t\n"); /* strip trailing whitespace */ + op = strstrip(opbuf); if (strcmp(op, "run") == 0) { blk_mq_run_hw_queues(q, true); } else if (strcmp(op, "start") == 0) { From 88aabbd7e7ace77fb3fbf06d22dd3cc0bfc0ca79 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:27 -0700 Subject: [PATCH 21/28] blk-mq-debugfs: rename hw queue directories from to hctx It's not clear what these numbered directories represent unless you consult the code. We're about to get rid of the intermediate "mq" directory, so these would be even more confusing without that context. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 109ae9f3d400..2eae3238fb35 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -936,7 +936,7 @@ static int blk_mq_debugfs_register_hctx(struct request_queue *q, char name[20]; int i; - snprintf(name, sizeof(name), "%u", hctx->queue_num); + snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir); if (!hctx_dir) return -ENOMEM; From f57de23ac9019ea84c548a1637d5562ef07a8f7e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:28 -0700 Subject: [PATCH 22/28] blk-mq-debugfs: get rid of a bunch of boilerplate A large part of blk-mq-debugfs.c is file_operations and seq_file boilerplate. This sucks as is but will suck even more when schedulers can define their own debugfs entries. Factor it all out into a single blk_mq_debugfs_fops which multiplexes as needed. We store the request_queue, blk_mq_hw_ctx, or blk_mq_ctx in the parent directory dentry, which is kind of hacky, but it works. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 466 ++++++++++++----------------------------- 1 file changed, 137 insertions(+), 329 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 2eae3238fb35..1579af6fcbed 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -26,23 +26,12 @@ struct blk_mq_debugfs_attr { const char *name; umode_t mode; - const struct file_operations *fops; + int (*show)(void *, struct seq_file *); + ssize_t (*write)(void *, const char __user *, size_t, loff_t *); + /* Set either .show or .seq_ops. */ + const struct seq_operations *seq_ops; }; -static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file, - const struct seq_operations *ops) -{ - struct seq_file *m; - int ret; - - ret = seq_open(file, ops); - if (!ret) { - m = file->private_data; - m->private = inode->i_private; - } - return ret; -} - static int blk_flags_show(struct seq_file *m, const unsigned long flags, const char *const *flag_name, int flag_name_count) { @@ -97,9 +86,9 @@ static const char *const blk_queue_flag_name[] = { }; #undef QUEUE_FLAG_NAME -static int blk_queue_flags_show(struct seq_file *m, void *v) +static int queue_state_show(void *data, struct seq_file *m) { - struct request_queue *q = m->private; + struct request_queue *q = data; blk_flags_show(m, q->queue_flags, blk_queue_flag_name, ARRAY_SIZE(blk_queue_flag_name)); @@ -107,10 +96,10 @@ static int blk_queue_flags_show(struct seq_file *m, void *v) return 0; } -static ssize_t blk_queue_flags_store(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t queue_state_write(void *data, const char __user *buf, + size_t count, loff_t *ppos) { - struct request_queue *q = file_inode(file)->i_private; + struct request_queue *q = data; char opbuf[16] = { }, *op; if (count >= sizeof(opbuf)) { @@ -134,19 +123,6 @@ inval: return count; } -static int blk_queue_flags_open(struct inode *inode, struct file *file) -{ - return single_open(file, blk_queue_flags_show, inode->i_private); -} - -static const struct file_operations blk_queue_flags_fops = { - .open = blk_queue_flags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = blk_queue_flags_store, -}; - static void print_stat(struct seq_file *m, struct blk_rq_stat *stat) { if (stat->nr_samples) { @@ -157,9 +133,9 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat) } } -static int queue_poll_stat_show(struct seq_file *m, void *v) +static int queue_poll_stat_show(void *data, struct seq_file *m) { - struct request_queue *q = m->private; + struct request_queue *q = data; int bucket; for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) { @@ -174,18 +150,6 @@ static int queue_poll_stat_show(struct seq_file *m, void *v) return 0; } -static int queue_poll_stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, queue_poll_stat_show, inode->i_private); -} - -static const struct file_operations queue_poll_stat_fops = { - .open = queue_poll_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name static const char *const hctx_state_name[] = { HCTX_STATE_NAME(STOPPED), @@ -196,9 +160,9 @@ static const char *const hctx_state_name[] = { }; #undef HCTX_STATE_NAME -static int hctx_state_show(struct seq_file *m, void *v) +static int hctx_state_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; blk_flags_show(m, hctx->state, hctx_state_name, ARRAY_SIZE(hctx_state_name)); @@ -206,18 +170,6 @@ static int hctx_state_show(struct seq_file *m, void *v) return 0; } -static int hctx_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_state_show, inode->i_private); -} - -static const struct file_operations hctx_state_fops = { - .open = hctx_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name static const char *const alloc_policy_name[] = { BLK_TAG_ALLOC_NAME(FIFO), @@ -235,9 +187,9 @@ static const char *const hctx_flag_name[] = { }; #undef HCTX_FLAG_NAME -static int hctx_flags_show(struct seq_file *m, void *v) +static int hctx_flags_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags); seq_puts(m, "alloc_policy="); @@ -254,18 +206,6 @@ static int hctx_flags_show(struct seq_file *m, void *v) return 0; } -static int hctx_flags_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_flags_show, inode->i_private); -} - -static const struct file_operations hctx_flags_fops = { - .open = hctx_flags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #define REQ_OP_NAME(name) [REQ_OP_##name] = #name static const char *const op_name[] = { REQ_OP_NAME(READ), @@ -383,38 +323,14 @@ static const struct seq_operations hctx_dispatch_seq_ops = { .show = blk_mq_debugfs_rq_show, }; -static int hctx_dispatch_open(struct inode *inode, struct file *file) +static int hctx_ctx_map_show(void *data, struct seq_file *m) { - return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops); -} - -static const struct file_operations hctx_dispatch_fops = { - .open = hctx_dispatch_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int hctx_ctx_map_show(struct seq_file *m, void *v) -{ - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; sbitmap_bitmap_show(&hctx->ctx_map, m); return 0; } -static int hctx_ctx_map_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_ctx_map_show, inode->i_private); -} - -static const struct file_operations hctx_ctx_map_fops = { - .open = hctx_ctx_map_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void blk_mq_debugfs_tags_show(struct seq_file *m, struct blk_mq_tags *tags) { @@ -432,9 +348,9 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, } } -static int hctx_tags_show(struct seq_file *m, void *v) +static int hctx_tags_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -449,21 +365,9 @@ out: return res; } -static int hctx_tags_open(struct inode *inode, struct file *file) +static int hctx_tags_bitmap_show(void *data, struct seq_file *m) { - return single_open(file, hctx_tags_show, inode->i_private); -} - -static const struct file_operations hctx_tags_fops = { - .open = hctx_tags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_tags_bitmap_show(struct seq_file *m, void *v) -{ - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -478,21 +382,9 @@ out: return res; } -static int hctx_tags_bitmap_open(struct inode *inode, struct file *file) +static int hctx_sched_tags_show(void *data, struct seq_file *m) { - return single_open(file, hctx_tags_bitmap_show, inode->i_private); -} - -static const struct file_operations hctx_tags_bitmap_fops = { - .open = hctx_tags_bitmap_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_sched_tags_show(struct seq_file *m, void *v) -{ - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -507,21 +399,9 @@ out: return res; } -static int hctx_sched_tags_open(struct inode *inode, struct file *file) +static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) { - return single_open(file, hctx_sched_tags_show, inode->i_private); -} - -static const struct file_operations hctx_sched_tags_fops = { - .open = hctx_sched_tags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v) -{ - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -536,21 +416,9 @@ out: return res; } -static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file) +static int hctx_io_poll_show(void *data, struct seq_file *m) { - return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private); -} - -static const struct file_operations hctx_sched_tags_bitmap_fops = { - .open = hctx_sched_tags_bitmap_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_io_poll_show(struct seq_file *m, void *v) -{ - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "considered=%lu\n", hctx->poll_considered); seq_printf(m, "invoked=%lu\n", hctx->poll_invoked); @@ -558,32 +426,18 @@ static int hctx_io_poll_show(struct seq_file *m, void *v) return 0; } -static int hctx_io_poll_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_io_poll_show, inode->i_private); -} - -static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf, +static ssize_t hctx_io_poll_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0; return count; } -static const struct file_operations hctx_io_poll_fops = { - .open = hctx_io_poll_open, - .read = seq_read, - .write = hctx_io_poll_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_dispatched_show(struct seq_file *m, void *v) +static int hctx_dispatched_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; int i; seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]); @@ -598,16 +452,10 @@ static int hctx_dispatched_show(struct seq_file *m, void *v) return 0; } -static int hctx_dispatched_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_dispatched_show, inode->i_private); -} - -static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf, +static ssize_t hctx_dispatched_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; int i; for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) @@ -615,96 +463,48 @@ static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations hctx_dispatched_fops = { - .open = hctx_dispatched_open, - .read = seq_read, - .write = hctx_dispatched_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_queued_show(struct seq_file *m, void *v) +static int hctx_queued_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "%lu\n", hctx->queued); return 0; } -static int hctx_queued_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_queued_show, inode->i_private); -} - -static ssize_t hctx_queued_write(struct file *file, const char __user *buf, +static ssize_t hctx_queued_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; hctx->queued = 0; return count; } -static const struct file_operations hctx_queued_fops = { - .open = hctx_queued_open, - .read = seq_read, - .write = hctx_queued_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_run_show(struct seq_file *m, void *v) +static int hctx_run_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "%lu\n", hctx->run); return 0; } -static int hctx_run_open(struct inode *inode, struct file *file) +static ssize_t hctx_run_write(void *data, const char __user *buf, size_t count, + loff_t *ppos) { - return single_open(file, hctx_run_show, inode->i_private); -} - -static ssize_t hctx_run_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; hctx->run = 0; return count; } -static const struct file_operations hctx_run_fops = { - .open = hctx_run_open, - .read = seq_read, - .write = hctx_run_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_active_show(struct seq_file *m, void *v) +static int hctx_active_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "%d\n", atomic_read(&hctx->nr_active)); return 0; } -static int hctx_active_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_active_show, inode->i_private); -} - -static const struct file_operations hctx_active_fops = { - .open = hctx_active_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos) __acquires(&ctx->lock) { @@ -735,140 +535,146 @@ static const struct seq_operations ctx_rq_list_seq_ops = { .stop = ctx_rq_list_stop, .show = blk_mq_debugfs_rq_show, }; - -static int ctx_rq_list_open(struct inode *inode, struct file *file) +static int ctx_dispatched_show(void *data, struct seq_file *m) { - return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops); -} - -static const struct file_operations ctx_rq_list_fops = { - .open = ctx_rq_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int ctx_dispatched_show(struct seq_file *m, void *v) -{ - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]); return 0; } -static int ctx_dispatched_open(struct inode *inode, struct file *file) -{ - return single_open(file, ctx_dispatched_show, inode->i_private); -} - -static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf, +static ssize_t ctx_dispatched_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0; return count; } -static const struct file_operations ctx_dispatched_fops = { - .open = ctx_dispatched_open, - .read = seq_read, - .write = ctx_dispatched_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int ctx_merged_show(struct seq_file *m, void *v) +static int ctx_merged_show(void *data, struct seq_file *m) { - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; seq_printf(m, "%lu\n", ctx->rq_merged); return 0; } -static int ctx_merged_open(struct inode *inode, struct file *file) +static ssize_t ctx_merged_write(void *data, const char __user *buf, + size_t count, loff_t *ppos) { - return single_open(file, ctx_merged_show, inode->i_private); -} - -static ssize_t ctx_merged_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct seq_file *m = file->private_data; - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; ctx->rq_merged = 0; return count; } -static const struct file_operations ctx_merged_fops = { - .open = ctx_merged_open, - .read = seq_read, - .write = ctx_merged_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int ctx_completed_show(struct seq_file *m, void *v) +static int ctx_completed_show(void *data, struct seq_file *m) { - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]); return 0; } -static int ctx_completed_open(struct inode *inode, struct file *file) -{ - return single_open(file, ctx_completed_show, inode->i_private); -} - -static ssize_t ctx_completed_write(struct file *file, const char __user *buf, +static ssize_t ctx_completed_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; ctx->rq_completed[0] = ctx->rq_completed[1] = 0; return count; } -static const struct file_operations ctx_completed_fops = { - .open = ctx_completed_open, +static int blk_mq_debugfs_show(struct seq_file *m, void *v) +{ + const struct blk_mq_debugfs_attr *attr = m->private; + void *data = d_inode(m->file->f_path.dentry->d_parent)->i_private; + + return attr->show(data, m); +} + +static ssize_t blk_mq_debugfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + const struct blk_mq_debugfs_attr *attr = m->private; + void *data = d_inode(file->f_path.dentry->d_parent)->i_private; + + if (!attr->write) + return -EPERM; + + return attr->write(data, buf, count, ppos); +} + +static int blk_mq_debugfs_open(struct inode *inode, struct file *file) +{ + const struct blk_mq_debugfs_attr *attr = inode->i_private; + void *data = d_inode(file->f_path.dentry->d_parent)->i_private; + struct seq_file *m; + int ret; + + if (attr->seq_ops) { + ret = seq_open(file, attr->seq_ops); + if (!ret) { + m = file->private_data; + m->private = data; + } + return ret; + } + + if (WARN_ON_ONCE(!attr->show)) + return -EPERM; + + return single_open(file, blk_mq_debugfs_show, inode->i_private); +} + +static int blk_mq_debugfs_release(struct inode *inode, struct file *file) +{ + const struct blk_mq_debugfs_attr *attr = inode->i_private; + + if (attr->show) + return single_release(inode, file); + else + return seq_release(inode, file); +} + +const struct file_operations blk_mq_debugfs_fops = { + .open = blk_mq_debugfs_open, .read = seq_read, - .write = ctx_completed_write, + .write = blk_mq_debugfs_write, .llseek = seq_lseek, - .release = single_release, + .release = blk_mq_debugfs_release, }; static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { - {"poll_stat", 0400, &queue_poll_stat_fops}, - {"state", 0600, &blk_queue_flags_fops}, + {"poll_stat", 0400, queue_poll_stat_show}, + {"state", 0600, queue_state_show, queue_state_write}, {}, }; static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { - {"state", 0400, &hctx_state_fops}, - {"flags", 0400, &hctx_flags_fops}, - {"dispatch", 0400, &hctx_dispatch_fops}, - {"ctx_map", 0400, &hctx_ctx_map_fops}, - {"tags", 0400, &hctx_tags_fops}, - {"tags_bitmap", 0400, &hctx_tags_bitmap_fops}, - {"sched_tags", 0400, &hctx_sched_tags_fops}, - {"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops}, - {"io_poll", 0600, &hctx_io_poll_fops}, - {"dispatched", 0600, &hctx_dispatched_fops}, - {"queued", 0600, &hctx_queued_fops}, - {"run", 0600, &hctx_run_fops}, - {"active", 0400, &hctx_active_fops}, + {"state", 0400, hctx_state_show}, + {"flags", 0400, hctx_flags_show}, + {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops}, + {"ctx_map", 0400, hctx_ctx_map_show}, + {"tags", 0400, hctx_tags_show}, + {"tags_bitmap", 0400, hctx_tags_bitmap_show}, + {"sched_tags", 0400, hctx_sched_tags_show}, + {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, + {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, + {"dispatched", 0600, hctx_dispatched_show, hctx_dispatched_write}, + {"queued", 0600, hctx_queued_show, hctx_queued_write}, + {"run", 0600, hctx_run_show, hctx_run_write}, + {"active", 0400, hctx_active_show}, {}, }; static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { - {"rq_list", 0400, &ctx_rq_list_fops}, - {"dispatched", 0600, &ctx_dispatched_fops}, - {"merged", 0600, &ctx_merged_fops}, - {"completed", 0600, &ctx_completed_fops}, + {"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops}, + {"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write}, + {"merged", 0600, ctx_merged_show, ctx_merged_write}, + {"completed", 0600, ctx_completed_show, ctx_completed_write}, {}, }; @@ -900,11 +706,13 @@ void blk_mq_debugfs_unregister(struct request_queue *q) } static bool debugfs_create_files(struct dentry *parent, void *data, - const struct blk_mq_debugfs_attr *attr) + const struct blk_mq_debugfs_attr *attr) { + d_inode(parent)->i_private = data; + for (; attr->name; attr++) { if (!debugfs_create_file(attr->name, attr->mode, parent, - data, attr->fops)) + (void *)attr, &blk_mq_debugfs_fops)) return false; } return true; From 18d4d7d0571f5acc9de638ea3a33e8064deaceca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 4 May 2017 00:31:29 -0700 Subject: [PATCH 23/28] blk-mq: Do not invoke queue operations on a dead queue In commit e869b5462f83 ("blk-mq: Unregister debugfs attributes earlier"), we shuffled the debugfs cleanup around so that the "state" attribute was removed before we freed the blk-mq data structures. However, later changes are going to undo that, so we need to explicitly disallow running a dead queue. [Omar: rebased and updated commit message] Signed-off-by: Omar Sandoval Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 1579af6fcbed..347fbb8e059c 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -102,6 +102,14 @@ static ssize_t queue_state_write(void *data, const char __user *buf, struct request_queue *q = data; char opbuf[16] = { }, *op; + /* + * The "state" attribute is removed after blk_cleanup_queue() has called + * blk_mq_free_queue(). Return if QUEUE_FLAG_DEAD has been set to avoid + * triggering a use-after-free. + */ + if (blk_queue_dead(q)) + return -ENOENT; + if (count >= sizeof(opbuf)) { pr_err("%s: operation too long\n", __func__); goto inval; From d173a25165c124442182f6b21d0c2ec381a0eebe Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:30 -0700 Subject: [PATCH 24/28] blk-mq: move debugfs declarations to a separate header file Preparation for adding more declarations. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + block/blk-mq-debugfs.c | 1 + block/blk-mq-debugfs.h | 29 +++++++++++++++++++++++++++++ block/blk-mq-sysfs.c | 1 + block/blk-mq.h | 28 ---------------------------- block/blk-sysfs.c | 1 + 6 files changed, 33 insertions(+), 28 deletions(-) create mode 100644 block/blk-mq-debugfs.h diff --git a/block/blk-core.c b/block/blk-core.c index 24886b69690f..acdca6536562 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -40,6 +40,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-wbt.h" diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 347fbb8e059c..1dc1847b5363 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -21,6 +21,7 @@ #include #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" struct blk_mq_debugfs_attr { diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h new file mode 100644 index 000000000000..00b0f71d0ae9 --- /dev/null +++ b/block/blk-mq-debugfs.h @@ -0,0 +1,29 @@ +#ifndef INT_BLK_MQ_DEBUGFS_H +#define INT_BLK_MQ_DEBUGFS_H + +#ifdef CONFIG_BLK_DEBUG_FS +int blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_unregister(struct request_queue *q); +int blk_mq_debugfs_register_mq(struct request_queue *q); +void blk_mq_debugfs_unregister_mq(struct request_queue *q); +#else +static inline int blk_mq_debugfs_register(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister(struct request_queue *q) +{ +} + +static inline int blk_mq_debugfs_register_mq(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_mq(struct request_queue *q) +{ +} +#endif + +#endif diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ec0afdf765e3..71a237a90d43 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -11,6 +11,7 @@ #include #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" static void blk_mq_sysfs_release(struct kobject *kobj) diff --git a/block/blk-mq.h b/block/blk-mq.h index 2814a14e529c..cc67b48e3551 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -83,34 +83,6 @@ extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); -/* - * debugfs helpers - */ -#ifdef CONFIG_BLK_DEBUG_FS -int blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); -int blk_mq_debugfs_register_mq(struct request_queue *q); -void blk_mq_debugfs_unregister_mq(struct request_queue *q); -#else -static inline int blk_mq_debugfs_register(struct request_queue *q) -{ - return 0; -} - -static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - -static inline int blk_mq_debugfs_register_mq(struct request_queue *q) -{ - return 0; -} - -static inline void blk_mq_debugfs_unregister_mq(struct request_queue *q) -{ -} -#endif - extern void blk_mq_rq_timed_out(struct request *req, bool reserved); void blk_mq_release(struct request_queue *q); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3f37813ccbaf..9995355121d7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -13,6 +13,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-wbt.h" struct queue_sysfs_entry { From 9c1051aacde828073dbbab5e8e59c0fc802efa9a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 08:17:21 -0600 Subject: [PATCH 25/28] blk-mq: untangle debugfs and sysfs Originally, I tied debugfs registration/unregistration together with sysfs. There's no reason to do this, and it's getting in the way of letting schedulers define their own debugfs attributes. Instead, tie the debugfs registration to the lifetime of the structures themselves. The saner lifetimes mean we can also get rid of the extra mq directory and move everything one level up. I.e., nvme0n1/mq/hctx0/tags is now just nvme0n1/hctx0/tags. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 9 +-- block/blk-mq-debugfs.c | 146 ++++++++++++++++++++++------------------- block/blk-mq-debugfs.h | 21 ++++-- block/blk-mq-sysfs.c | 11 ---- block/blk-mq.c | 7 ++ block/blk-sysfs.c | 2 + include/linux/blk-mq.h | 4 ++ include/linux/blkdev.h | 1 - 8 files changed, 112 insertions(+), 89 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index acdca6536562..c580b0138a7f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -40,7 +40,6 @@ #include "blk.h" #include "blk-mq.h" -#include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-wbt.h" @@ -562,13 +561,9 @@ void blk_cleanup_queue(struct request_queue *q) * prevent that q->request_fn() gets invoked after draining finished. */ blk_freeze_queue(q); - if (!q->mq_ops) { - spin_lock_irq(lock); + spin_lock_irq(lock); + if (!q->mq_ops) __blk_drain_queue(q, true); - } else { - blk_mq_debugfs_unregister_mq(q); - spin_lock_irq(lock); - } queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 1dc1847b5363..260cf76e0705 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -687,33 +687,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { {}, }; -int blk_mq_debugfs_register(struct request_queue *q) -{ - if (!blk_debugfs_root) - return -ENOENT; - - q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), - blk_debugfs_root); - if (!q->debugfs_dir) - goto err; - - if (blk_mq_debugfs_register_mq(q)) - goto err; - - return 0; - -err: - blk_mq_debugfs_unregister(q); - return -ENOMEM; -} - -void blk_mq_debugfs_unregister(struct request_queue *q) -{ - debugfs_remove_recursive(q->debugfs_dir); - q->mq_debugfs_dir = NULL; - q->debugfs_dir = NULL; -} - static bool debugfs_create_files(struct dentry *parent, void *data, const struct blk_mq_debugfs_attr *attr) { @@ -727,15 +700,54 @@ static bool debugfs_create_files(struct dentry *parent, void *data, return true; } -static int blk_mq_debugfs_register_ctx(struct request_queue *q, - struct blk_mq_ctx *ctx, - struct dentry *hctx_dir) +int blk_mq_debugfs_register(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + if (!blk_debugfs_root) + return -ENOENT; + + q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), + blk_debugfs_root); + if (!q->debugfs_dir) + return -ENOMEM; + + if (!debugfs_create_files(q->debugfs_dir, q, + blk_mq_debugfs_queue_attrs)) + goto err; + + /* + * blk_mq_init_hctx() attempted to do this already, but q->debugfs_dir + * didn't exist yet (because we don't know what to name the directory + * until the queue is registered to a gendisk). + */ + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx)) + goto err; + } + + return 0; + +err: + blk_mq_debugfs_unregister(q); + return -ENOMEM; +} + +void blk_mq_debugfs_unregister(struct request_queue *q) +{ + debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; +} + +static int blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx) { struct dentry *ctx_dir; char name[20]; snprintf(name, sizeof(name), "cpu%u", ctx->cpu); - ctx_dir = debugfs_create_dir(name, hctx_dir); + ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir); if (!ctx_dir) return -ENOMEM; @@ -745,59 +757,61 @@ static int blk_mq_debugfs_register_ctx(struct request_queue *q, return 0; } -static int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +int blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { struct blk_mq_ctx *ctx; - struct dentry *hctx_dir; char name[20]; int i; - snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); - hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir); - if (!hctx_dir) - return -ENOMEM; - - if (!debugfs_create_files(hctx_dir, hctx, blk_mq_debugfs_hctx_attrs)) - return -ENOMEM; - - hctx_for_each_ctx(hctx, ctx, i) { - if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir)) - return -ENOMEM; - } - - return 0; -} - -int blk_mq_debugfs_register_mq(struct request_queue *q) -{ - struct blk_mq_hw_ctx *hctx; - int i; - if (!q->debugfs_dir) return -ENOENT; - q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir); - if (!q->mq_debugfs_dir) + snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); + hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); + if (!hctx->debugfs_dir) + return -ENOMEM; + + if (!debugfs_create_files(hctx->debugfs_dir, hctx, + blk_mq_debugfs_hctx_attrs)) goto err; - if (!debugfs_create_files(q->mq_debugfs_dir, q, blk_mq_debugfs_queue_attrs)) - goto err; - - queue_for_each_hw_ctx(q, hctx, i) { - if (blk_mq_debugfs_register_hctx(q, hctx)) + hctx_for_each_ctx(hctx, ctx, i) { + if (blk_mq_debugfs_register_ctx(hctx, ctx)) goto err; } return 0; err: - blk_mq_debugfs_unregister_mq(q); + blk_mq_debugfs_unregister_hctx(hctx); return -ENOMEM; } -void blk_mq_debugfs_unregister_mq(struct request_queue *q) +void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { - debugfs_remove_recursive(q->mq_debugfs_dir); - q->mq_debugfs_dir = NULL; + debugfs_remove_recursive(hctx->debugfs_dir); + hctx->debugfs_dir = NULL; +} + +int blk_mq_debugfs_register_hctxs(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (blk_mq_debugfs_register_hctx(q, hctx)) + return -ENOMEM; + } + + return 0; +} + +void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_debugfs_unregister_hctx(hctx); } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 00b0f71d0ae9..596e9b16d3d1 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -4,8 +4,11 @@ #ifdef CONFIG_BLK_DEBUG_FS int blk_mq_debugfs_register(struct request_queue *q); void blk_mq_debugfs_unregister(struct request_queue *q); -int blk_mq_debugfs_register_mq(struct request_queue *q); -void blk_mq_debugfs_unregister_mq(struct request_queue *q); +int blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx); +void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); +int blk_mq_debugfs_register_hctxs(struct request_queue *q); +void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); #else static inline int blk_mq_debugfs_register(struct request_queue *q) { @@ -16,12 +19,22 @@ static inline void blk_mq_debugfs_unregister(struct request_queue *q) { } -static inline int blk_mq_debugfs_register_mq(struct request_queue *q) +static inline int blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { return 0; } -static inline void blk_mq_debugfs_unregister_mq(struct request_queue *q) +static inline void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) +{ +} + +static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) { } #endif diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 71a237a90d43..79969c3c234f 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -11,7 +11,6 @@ #include #include "blk-mq.h" -#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" static void blk_mq_sysfs_release(struct kobject *kobj) @@ -259,8 +258,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - blk_mq_debugfs_unregister_mq(q); - kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); kobject_put(&dev->kobj); @@ -319,8 +316,6 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q) kobject_uevent(&q->mq_kobj, KOBJ_ADD); - blk_mq_debugfs_register(q); - queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) @@ -336,8 +331,6 @@ unreg: while (--i >= 0) blk_mq_unregister_hctx(q->queue_hw_ctx[i]); - blk_mq_debugfs_unregister_mq(q); - kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); kobject_put(&dev->kobj); @@ -365,8 +358,6 @@ void blk_mq_sysfs_unregister(struct request_queue *q) if (!q->mq_sysfs_init_done) goto unlock; - blk_mq_debugfs_unregister_mq(q); - queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); @@ -383,8 +374,6 @@ int blk_mq_sysfs_register(struct request_queue *q) if (!q->mq_sysfs_init_done) goto unlock; - blk_mq_debugfs_register_mq(q); - queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) diff --git a/block/blk-mq.c b/block/blk-mq.c index 03a747105682..5d4ce7eb8dbf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -31,6 +31,7 @@ #include #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" #include "blk-stat.h" #include "blk-wbt.h" @@ -1862,6 +1863,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + blk_mq_debugfs_unregister_hctx(hctx); + blk_mq_tag_idle(hctx); if (set->ops->exit_request) @@ -1948,6 +1951,8 @@ static int blk_mq_init_hctx(struct request_queue *q, if (hctx->flags & BLK_MQ_F_BLOCKING) init_srcu_struct(&hctx->queue_rq_srcu); + blk_mq_debugfs_register_hctx(q, hctx); + return 0; free_fq: @@ -2385,6 +2390,7 @@ static void blk_mq_queue_reinit(struct request_queue *q, { WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); + blk_mq_debugfs_unregister_hctxs(q); blk_mq_sysfs_unregister(q); /* @@ -2396,6 +2402,7 @@ static void blk_mq_queue_reinit(struct request_queue *q, blk_mq_map_swqueue(q, online_mask); blk_mq_sysfs_register(q); + blk_mq_debugfs_register_hctxs(q); } /* diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9995355121d7..504fee940052 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -890,6 +890,8 @@ int blk_register_queue(struct gendisk *disk) if (q->mq_ops) __blk_mq_register_dev(dev, q); + blk_mq_debugfs_register(q); + kobject_uevent(&q->kobj, KOBJ_ADD); wbt_enable_default(q); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a104832e7ae5..de8ed9aaa156 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,6 +57,10 @@ struct blk_mq_hw_ctx { unsigned long poll_considered; unsigned long poll_invoked; unsigned long poll_success; + +#ifdef CONFIG_BLK_DEBUG_FS + struct dentry *debugfs_dir; +#endif }; struct blk_mq_tag_set { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 83d28623645f..b49a79a29e58 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -579,7 +579,6 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; - struct dentry *mq_debugfs_dir; #endif bool mq_sysfs_init_done; From d332ce091813d11a46144354baa72b755833392f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 08:24:40 -0600 Subject: [PATCH 26/28] blk-mq-debugfs: allow schedulers to register debugfs attributes This provides the infrastructure for schedulers to expose their internal state through debugfs. We add a list of queue attributes and a list of hctx attributes to struct elevator_type and wire them up when switching schedulers. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Add missing seq_file.h header in blk-mq-debugfs.h Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 74 +++++++++++++++++++++++++++++++++++----- block/blk-mq-debugfs.h | 37 ++++++++++++++++++++ block/blk-mq-sched.c | 24 ++++++++----- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 + include/linux/elevator.h | 7 ++++ 6 files changed, 127 insertions(+), 17 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 260cf76e0705..a3b887109310 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -24,15 +24,6 @@ #include "blk-mq-debugfs.h" #include "blk-mq-tag.h" -struct blk_mq_debugfs_attr { - const char *name; - umode_t mode; - int (*show)(void *, struct seq_file *); - ssize_t (*write)(void *, const char __user *, size_t, loff_t *); - /* Set either .show or .seq_ops. */ - const struct seq_operations *seq_ops; -}; - static int blk_flags_show(struct seq_file *m, const unsigned long flags, const char *const *flag_name, int flag_name_count) { @@ -725,6 +716,9 @@ int blk_mq_debugfs_register(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx)) goto err; + if (q->elevator && !hctx->sched_debugfs_dir && + blk_mq_debugfs_register_sched_hctx(q, hctx)) + goto err; } return 0; @@ -737,6 +731,7 @@ err: void blk_mq_debugfs_unregister(struct request_queue *q) { debugfs_remove_recursive(q->debugfs_dir); + q->sched_debugfs_dir = NULL; q->debugfs_dir = NULL; } @@ -791,6 +786,7 @@ err: void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { debugfs_remove_recursive(hctx->debugfs_dir); + hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; } @@ -815,3 +811,63 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_unregister_hctx(hctx); } + +int blk_mq_debugfs_register_sched(struct request_queue *q) +{ + struct elevator_type *e = q->elevator->type; + + if (!q->debugfs_dir) + return -ENOENT; + + if (!e->queue_debugfs_attrs) + return 0; + + q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir); + if (!q->sched_debugfs_dir) + return -ENOMEM; + + if (!debugfs_create_files(q->sched_debugfs_dir, q, + e->queue_debugfs_attrs)) + goto err; + + return 0; + +err: + blk_mq_debugfs_unregister_sched(q); + return -ENOMEM; +} + +void blk_mq_debugfs_unregister_sched(struct request_queue *q) +{ + debugfs_remove_recursive(q->sched_debugfs_dir); + q->sched_debugfs_dir = NULL; +} + +int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) +{ + struct elevator_type *e = q->elevator->type; + + if (!hctx->debugfs_dir) + return -ENOENT; + + if (!e->hctx_debugfs_attrs) + return 0; + + hctx->sched_debugfs_dir = debugfs_create_dir("sched", + hctx->debugfs_dir); + if (!hctx->sched_debugfs_dir) + return -ENOMEM; + + if (!debugfs_create_files(hctx->sched_debugfs_dir, hctx, + e->hctx_debugfs_attrs)) + return -ENOMEM; + + return 0; +} + +void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) +{ + debugfs_remove_recursive(hctx->sched_debugfs_dir); + hctx->sched_debugfs_dir = NULL; +} diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 596e9b16d3d1..a5ac21c81ea3 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -2,6 +2,18 @@ #define INT_BLK_MQ_DEBUGFS_H #ifdef CONFIG_BLK_DEBUG_FS + +#include + +struct blk_mq_debugfs_attr { + const char *name; + umode_t mode; + int (*show)(void *, struct seq_file *); + ssize_t (*write)(void *, const char __user *, size_t, loff_t *); + /* Set either .show or .seq_ops. */ + const struct seq_operations *seq_ops; +}; + int blk_mq_debugfs_register(struct request_queue *q); void blk_mq_debugfs_unregister(struct request_queue *q); int blk_mq_debugfs_register_hctx(struct request_queue *q, @@ -9,6 +21,12 @@ int blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); int blk_mq_debugfs_register_hctxs(struct request_queue *q); void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); + +int blk_mq_debugfs_register_sched(struct request_queue *q); +void blk_mq_debugfs_unregister_sched(struct request_queue *q); +int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx); +void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); #else static inline int blk_mq_debugfs_register(struct request_queue *q) { @@ -37,6 +55,25 @@ static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q) static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) { } + +static inline int blk_mq_debugfs_register_sched(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_sched(struct request_queue *q) +{ +} + +static inline int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) +{ +} #endif #endif diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index e79e9f18d7c2..1f5b692526ae 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -11,6 +11,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" #include "blk-wbt.h" @@ -472,6 +473,8 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, } } + blk_mq_debugfs_register_sched_hctx(q, hctx); + return 0; } @@ -483,6 +486,8 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, if (!e) return; + blk_mq_debugfs_unregister_sched_hctx(hctx); + if (e->type->ops.mq.exit_hctx && hctx->sched_data) { e->type->ops.mq.exit_hctx(hctx, hctx_idx); hctx->sched_data = NULL; @@ -519,8 +524,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err; - if (e->ops.mq.init_hctx) { - queue_for_each_hw_ctx(q, hctx, i) { + blk_mq_debugfs_register_sched(q); + + queue_for_each_hw_ctx(q, hctx, i) { + if (e->ops.mq.init_hctx) { ret = e->ops.mq.init_hctx(hctx, i); if (ret) { eq = q->elevator; @@ -529,6 +536,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + blk_mq_debugfs_register_sched_hctx(q, hctx); } return 0; @@ -544,14 +552,14 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) struct blk_mq_hw_ctx *hctx; unsigned int i; - if (e->type->ops.mq.exit_hctx) { - queue_for_each_hw_ctx(q, hctx, i) { - if (hctx->sched_data) { - e->type->ops.mq.exit_hctx(hctx, i); - hctx->sched_data = NULL; - } + queue_for_each_hw_ctx(q, hctx, i) { + blk_mq_debugfs_unregister_sched_hctx(hctx); + if (e->type->ops.mq.exit_hctx && hctx->sched_data) { + e->type->ops.mq.exit_hctx(hctx, i); + hctx->sched_data = NULL; } } + blk_mq_debugfs_unregister_sched(q); if (e->type->ops.mq.exit_sched) e->type->ops.mq.exit_sched(e); blk_mq_sched_tags_teardown(q); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index de8ed9aaa156..c47aa248c640 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -60,6 +60,7 @@ struct blk_mq_hw_ctx { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; + struct dentry *sched_debugfs_dir; #endif }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b49a79a29e58..80ae958717a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -579,6 +579,7 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; + struct dentry *sched_debugfs_dir; #endif bool mq_sysfs_init_done; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d44840368ee7..9ec5e22846e0 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -8,6 +8,9 @@ struct io_cq; struct elevator_type; +#ifdef CONFIG_BLK_DEBUG_FS +struct blk_mq_debugfs_attr; +#endif /* * Return values from elevator merger @@ -144,6 +147,10 @@ struct elevator_type char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; bool uses_mq; +#ifdef CONFIG_BLK_DEBUG_FS + const struct blk_mq_debugfs_attr *queue_debugfs_attrs; + const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; +#endif /* managed by elevator core */ char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ From 16b738f651c83a01db057e5db02ec4b830af9130 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:33 -0700 Subject: [PATCH 27/28] kyber: add debugfs attributes Expose the domain token pools, asynchronous sbitmap depth, domain request lists, and batching state. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 3 +- block/blk-mq-debugfs.h | 2 + block/kyber-iosched.c | 130 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index a3b887109310..8ec738f872e5 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -267,7 +267,7 @@ static const char *const rqf_name[] = { }; #undef RQF_NAME -static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) +int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) { struct request *rq = list_entry_rq(v); const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; @@ -291,6 +291,7 @@ static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) seq_puts(m, "}\n"); return 0; } +EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) __acquires(&hctx->lock) diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index a5ac21c81ea3..dd3bbfe74f46 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -14,6 +14,8 @@ struct blk_mq_debugfs_attr { const struct seq_operations *seq_ops; }; +int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); + int blk_mq_debugfs_register(struct request_queue *q); void blk_mq_debugfs_unregister(struct request_queue *q); int blk_mq_debugfs_register_hctx(struct request_queue *q, diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 3b0090bc5dd1..b9faabc75fdb 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -26,6 +26,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" #include "blk-stat.h" @@ -683,6 +684,131 @@ static struct elv_fs_entry kyber_sched_attrs[] = { }; #undef KYBER_LAT_ATTR +#ifdef CONFIG_BLK_DEBUG_FS +#define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \ +static int kyber_##name##_tokens_show(void *data, struct seq_file *m) \ +{ \ + struct request_queue *q = data; \ + struct kyber_queue_data *kqd = q->elevator->elevator_data; \ + \ + sbitmap_queue_show(&kqd->domain_tokens[domain], m); \ + return 0; \ +} \ + \ +static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos) \ + __acquires(&khd->lock) \ +{ \ + struct blk_mq_hw_ctx *hctx = m->private; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + \ + spin_lock(&khd->lock); \ + return seq_list_start(&khd->rqs[domain], *pos); \ +} \ + \ +static void *kyber_##name##_rqs_next(struct seq_file *m, void *v, \ + loff_t *pos) \ +{ \ + struct blk_mq_hw_ctx *hctx = m->private; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + \ + return seq_list_next(v, &khd->rqs[domain], pos); \ +} \ + \ +static void kyber_##name##_rqs_stop(struct seq_file *m, void *v) \ + __releases(&khd->lock) \ +{ \ + struct blk_mq_hw_ctx *hctx = m->private; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + \ + spin_unlock(&khd->lock); \ +} \ + \ +static const struct seq_operations kyber_##name##_rqs_seq_ops = { \ + .start = kyber_##name##_rqs_start, \ + .next = kyber_##name##_rqs_next, \ + .stop = kyber_##name##_rqs_stop, \ + .show = blk_mq_debugfs_rq_show, \ +}; \ + \ +static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ +{ \ + struct blk_mq_hw_ctx *hctx = data; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + wait_queue_t *wait = &khd->domain_wait[domain]; \ + \ + seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list)); \ + return 0; \ +} +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other) +#undef KYBER_DEBUGFS_DOMAIN_ATTRS + +static int kyber_async_depth_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct kyber_queue_data *kqd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", kqd->async_depth); + return 0; +} + +static int kyber_cur_domain_show(void *data, struct seq_file *m) +{ + struct blk_mq_hw_ctx *hctx = data; + struct kyber_hctx_data *khd = hctx->sched_data; + + switch (khd->cur_domain) { + case KYBER_READ: + seq_puts(m, "READ\n"); + break; + case KYBER_SYNC_WRITE: + seq_puts(m, "SYNC_WRITE\n"); + break; + case KYBER_OTHER: + seq_puts(m, "OTHER\n"); + break; + default: + seq_printf(m, "%u\n", khd->cur_domain); + break; + } + return 0; +} + +static int kyber_batching_show(void *data, struct seq_file *m) +{ + struct blk_mq_hw_ctx *hctx = data; + struct kyber_hctx_data *khd = hctx->sched_data; + + seq_printf(m, "%u\n", khd->batching); + return 0; +} + +#define KYBER_QUEUE_DOMAIN_ATTRS(name) \ + {#name "_tokens", 0400, kyber_##name##_tokens_show} +static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { + KYBER_QUEUE_DOMAIN_ATTRS(read), + KYBER_QUEUE_DOMAIN_ATTRS(sync_write), + KYBER_QUEUE_DOMAIN_ATTRS(other), + {"async_depth", 0400, kyber_async_depth_show}, + {}, +}; +#undef KYBER_QUEUE_DOMAIN_ATTRS + +#define KYBER_HCTX_DOMAIN_ATTRS(name) \ + {#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops}, \ + {#name "_waiting", 0400, kyber_##name##_waiting_show} +static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { + KYBER_HCTX_DOMAIN_ATTRS(read), + KYBER_HCTX_DOMAIN_ATTRS(sync_write), + KYBER_HCTX_DOMAIN_ATTRS(other), + {"cur_domain", 0400, kyber_cur_domain_show}, + {"batching", 0400, kyber_batching_show}, + {}, +}; +#undef KYBER_HCTX_DOMAIN_ATTRS +#endif + static struct elevator_type kyber_sched = { .ops.mq = { .init_sched = kyber_init_sched, @@ -696,6 +822,10 @@ static struct elevator_type kyber_sched = { .has_work = kyber_has_work, }, .uses_mq = true, +#ifdef CONFIG_BLK_DEBUG_FS + .queue_debugfs_attrs = kyber_queue_debugfs_attrs, + .hctx_debugfs_attrs = kyber_hctx_debugfs_attrs, +#endif .elevator_attrs = kyber_sched_attrs, .elevator_name = "kyber", .elevator_owner = THIS_MODULE, From daaadb3e9453ab89c2e113a2d1df8e19e30944cc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 00:31:34 -0700 Subject: [PATCH 28/28] mq-deadline: add debugfs attributes Expose the fifo lists, cached next requests, batching state, and dispatch list. It'd also be possible to add the sorted lists, but there aren't already seq_file helpers for rbtrees. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 9 ++- block/blk-mq-debugfs.h | 1 + block/mq-deadline.c | 123 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 2 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 8ec738f872e5..803aed4d7221 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -267,9 +267,8 @@ static const char *const rqf_name[] = { }; #undef RQF_NAME -int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) +int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) { - struct request *rq = list_entry_rq(v); const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; const unsigned int op = rq->cmd_flags & REQ_OP_MASK; @@ -291,6 +290,12 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) seq_puts(m, "}\n"); return 0; } +EXPORT_SYMBOL_GPL(__blk_mq_debugfs_rq_show); + +int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) +{ + return __blk_mq_debugfs_rq_show(m, list_entry_rq(v)); +} EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index dd3bbfe74f46..a182e6f97565 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -14,6 +14,7 @@ struct blk_mq_debugfs_attr { const struct seq_operations *seq_ops; }; +int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); int blk_mq_debugfs_register(struct request_queue *q); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 236121633ca0..1b964a387afe 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -19,6 +19,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" #include "blk-mq-sched.h" @@ -517,6 +518,125 @@ static struct elv_fs_entry deadline_attrs[] = { __ATTR_NULL }; +#ifdef CONFIG_BLK_DEBUG_FS +#define DEADLINE_DEBUGFS_DDIR_ATTRS(ddir, name) \ +static void *deadline_##name##_fifo_start(struct seq_file *m, \ + loff_t *pos) \ + __acquires(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + spin_lock(&dd->lock); \ + return seq_list_start(&dd->fifo_list[ddir], *pos); \ +} \ + \ +static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \ + loff_t *pos) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + return seq_list_next(v, &dd->fifo_list[ddir], pos); \ +} \ + \ +static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \ + __releases(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + spin_unlock(&dd->lock); \ +} \ + \ +static const struct seq_operations deadline_##name##_fifo_seq_ops = { \ + .start = deadline_##name##_fifo_start, \ + .next = deadline_##name##_fifo_next, \ + .stop = deadline_##name##_fifo_stop, \ + .show = blk_mq_debugfs_rq_show, \ +}; \ + \ +static int deadline_##name##_next_rq_show(void *data, \ + struct seq_file *m) \ +{ \ + struct request_queue *q = data; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + struct request *rq = dd->next_rq[ddir]; \ + \ + if (rq) \ + __blk_mq_debugfs_rq_show(m, rq); \ + return 0; \ +} +DEADLINE_DEBUGFS_DDIR_ATTRS(READ, read) +DEADLINE_DEBUGFS_DDIR_ATTRS(WRITE, write) +#undef DEADLINE_DEBUGFS_DDIR_ATTRS + +static int deadline_batching_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", dd->batching); + return 0; +} + +static int deadline_starved_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", dd->starved); + return 0; +} + +static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos) + __acquires(&dd->lock) +{ + struct request_queue *q = m->private; + struct deadline_data *dd = q->elevator->elevator_data; + + spin_lock(&dd->lock); + return seq_list_start(&dd->dispatch, *pos); +} + +static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct request_queue *q = m->private; + struct deadline_data *dd = q->elevator->elevator_data; + + return seq_list_next(v, &dd->dispatch, pos); +} + +static void deadline_dispatch_stop(struct seq_file *m, void *v) + __releases(&dd->lock) +{ + struct request_queue *q = m->private; + struct deadline_data *dd = q->elevator->elevator_data; + + spin_unlock(&dd->lock); +} + +static const struct seq_operations deadline_dispatch_seq_ops = { + .start = deadline_dispatch_start, + .next = deadline_dispatch_next, + .stop = deadline_dispatch_stop, + .show = blk_mq_debugfs_rq_show, +}; + +#define DEADLINE_QUEUE_DDIR_ATTRS(name) \ + {#name "_fifo_list", 0400, .seq_ops = &deadline_##name##_fifo_seq_ops}, \ + {#name "_next_rq", 0400, deadline_##name##_next_rq_show} +static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { + DEADLINE_QUEUE_DDIR_ATTRS(read), + DEADLINE_QUEUE_DDIR_ATTRS(write), + {"batching", 0400, deadline_batching_show}, + {"starved", 0400, deadline_starved_show}, + {"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops}, + {}, +}; +#undef DEADLINE_QUEUE_DDIR_ATTRS +#endif + static struct elevator_type mq_deadline = { .ops.mq = { .insert_requests = dd_insert_requests, @@ -533,6 +653,9 @@ static struct elevator_type mq_deadline = { }, .uses_mq = true, +#ifdef CONFIG_BLK_DEBUG_FS + .queue_debugfs_attrs = deadline_queue_debugfs_attrs, +#endif .elevator_attrs = deadline_attrs, .elevator_name = "mq-deadline", .elevator_owner = THIS_MODULE,