block: move queues types to the block layer
Having another indirect all in the fast path doesn't really help in our post-spectre world. Also having too many queue type is just going to create confusion, so I'd rather manage them centrally. Note that the queue type naming and ordering changes a bit - the first index now is the default queue for everything not explicitly marked, the optional ones are read and poll queues. Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
154989e45f
commit
e20ba6e1da
|
@ -173,9 +173,16 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *const hctx_types[] = {
|
||||||
|
[HCTX_TYPE_DEFAULT] = "default",
|
||||||
|
[HCTX_TYPE_READ] = "read",
|
||||||
|
[HCTX_TYPE_POLL] = "poll",
|
||||||
|
};
|
||||||
|
|
||||||
static ssize_t blk_mq_hw_sysfs_type_show(struct blk_mq_hw_ctx *hctx, char *page)
|
static ssize_t blk_mq_hw_sysfs_type_show(struct blk_mq_hw_ctx *hctx, char *page)
|
||||||
{
|
{
|
||||||
return sprintf(page, "%u\n", hctx->type);
|
BUILD_BUG_ON(ARRAY_SIZE(hctx_types) != HCTX_MAX_TYPES);
|
||||||
|
return sprintf(page, "%s\n", hctx_types[hctx->type]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct attribute *default_ctx_attrs[] = {
|
static struct attribute *default_ctx_attrs[] = {
|
||||||
|
|
|
@ -81,16 +81,14 @@ extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int);
|
||||||
/*
|
/*
|
||||||
* blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue
|
* blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue
|
||||||
* @q: request queue
|
* @q: request queue
|
||||||
* @hctx_type: the hctx type index
|
* @type: the hctx type index
|
||||||
* @cpu: CPU
|
* @cpu: CPU
|
||||||
*/
|
*/
|
||||||
static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q,
|
static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q,
|
||||||
unsigned int hctx_type,
|
enum hctx_type type,
|
||||||
unsigned int cpu)
|
unsigned int cpu)
|
||||||
{
|
{
|
||||||
struct blk_mq_tag_set *set = q->tag_set;
|
return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
|
||||||
|
|
||||||
return q->queue_hw_ctx[set->map[hctx_type].mq_map[cpu]];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -103,12 +101,17 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
|
||||||
unsigned int flags,
|
unsigned int flags,
|
||||||
unsigned int cpu)
|
unsigned int cpu)
|
||||||
{
|
{
|
||||||
int hctx_type = 0;
|
enum hctx_type type = HCTX_TYPE_DEFAULT;
|
||||||
|
|
||||||
if (q->mq_ops->rq_flags_to_type)
|
if (q->tag_set->nr_maps > HCTX_TYPE_POLL &&
|
||||||
hctx_type = q->mq_ops->rq_flags_to_type(q, flags);
|
((flags & REQ_HIPRI) && test_bit(QUEUE_FLAG_POLL, &q->queue_flags)))
|
||||||
|
type = HCTX_TYPE_POLL;
|
||||||
|
|
||||||
return blk_mq_map_queue_type(q, hctx_type, cpu);
|
else if (q->tag_set->nr_maps > HCTX_TYPE_READ &&
|
||||||
|
((flags & REQ_OP_MASK) == REQ_OP_READ))
|
||||||
|
type = HCTX_TYPE_READ;
|
||||||
|
|
||||||
|
return blk_mq_map_queue_type(q, type, cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -95,13 +95,6 @@ struct nvme_queue;
|
||||||
|
|
||||||
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
|
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
|
||||||
|
|
||||||
enum {
|
|
||||||
NVMEQ_TYPE_READ,
|
|
||||||
NVMEQ_TYPE_WRITE,
|
|
||||||
NVMEQ_TYPE_POLL,
|
|
||||||
NVMEQ_TYPE_NR,
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Represents an NVM Express device. Each nvme_dev is a PCI function.
|
* Represents an NVM Express device. Each nvme_dev is a PCI function.
|
||||||
*/
|
*/
|
||||||
|
@ -115,7 +108,7 @@ struct nvme_dev {
|
||||||
struct dma_pool *prp_small_pool;
|
struct dma_pool *prp_small_pool;
|
||||||
unsigned online_queues;
|
unsigned online_queues;
|
||||||
unsigned max_qid;
|
unsigned max_qid;
|
||||||
unsigned io_queues[NVMEQ_TYPE_NR];
|
unsigned io_queues[HCTX_MAX_TYPES];
|
||||||
unsigned int num_vecs;
|
unsigned int num_vecs;
|
||||||
int q_depth;
|
int q_depth;
|
||||||
u32 db_stride;
|
u32 db_stride;
|
||||||
|
@ -499,10 +492,10 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
|
||||||
|
|
||||||
map->nr_queues = dev->io_queues[i];
|
map->nr_queues = dev->io_queues[i];
|
||||||
if (!map->nr_queues) {
|
if (!map->nr_queues) {
|
||||||
BUG_ON(i == NVMEQ_TYPE_READ);
|
BUG_ON(i == HCTX_TYPE_DEFAULT);
|
||||||
|
|
||||||
/* shared set, resuse read set parameters */
|
/* shared set, resuse read set parameters */
|
||||||
map->nr_queues = dev->io_queues[NVMEQ_TYPE_READ];
|
map->nr_queues = dev->io_queues[HCTX_TYPE_DEFAULT];
|
||||||
qoff = 0;
|
qoff = 0;
|
||||||
offset = queue_irq_offset(dev);
|
offset = queue_irq_offset(dev);
|
||||||
}
|
}
|
||||||
|
@ -512,7 +505,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
|
||||||
* affinity), so use the regular blk-mq cpu mapping
|
* affinity), so use the regular blk-mq cpu mapping
|
||||||
*/
|
*/
|
||||||
map->queue_offset = qoff;
|
map->queue_offset = qoff;
|
||||||
if (i != NVMEQ_TYPE_POLL)
|
if (i != HCTX_TYPE_POLL)
|
||||||
blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
|
blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
|
||||||
else
|
else
|
||||||
blk_mq_map_queues(map);
|
blk_mq_map_queues(map);
|
||||||
|
@ -961,16 +954,6 @@ out_free_cmd:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_rq_flags_to_type(struct request_queue *q, unsigned int flags)
|
|
||||||
{
|
|
||||||
if ((flags & REQ_HIPRI) && test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
|
|
||||||
return NVMEQ_TYPE_POLL;
|
|
||||||
if ((flags & REQ_OP_MASK) == REQ_OP_READ)
|
|
||||||
return NVMEQ_TYPE_READ;
|
|
||||||
|
|
||||||
return NVMEQ_TYPE_WRITE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void nvme_pci_complete_rq(struct request *req)
|
static void nvme_pci_complete_rq(struct request *req)
|
||||||
{
|
{
|
||||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||||
|
@ -1634,7 +1617,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
|
||||||
#define NVME_SHARED_MQ_OPS \
|
#define NVME_SHARED_MQ_OPS \
|
||||||
.queue_rq = nvme_queue_rq, \
|
.queue_rq = nvme_queue_rq, \
|
||||||
.commit_rqs = nvme_commit_rqs, \
|
.commit_rqs = nvme_commit_rqs, \
|
||||||
.rq_flags_to_type = nvme_rq_flags_to_type, \
|
|
||||||
.complete = nvme_pci_complete_rq, \
|
.complete = nvme_pci_complete_rq, \
|
||||||
.init_hctx = nvme_init_hctx, \
|
.init_hctx = nvme_init_hctx, \
|
||||||
.init_request = nvme_init_request, \
|
.init_request = nvme_init_request, \
|
||||||
|
@ -1785,9 +1767,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
|
||||||
}
|
}
|
||||||
|
|
||||||
max = min(dev->max_qid, dev->ctrl.queue_count - 1);
|
max = min(dev->max_qid, dev->ctrl.queue_count - 1);
|
||||||
if (max != 1 && dev->io_queues[NVMEQ_TYPE_POLL]) {
|
if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) {
|
||||||
rw_queues = dev->io_queues[NVMEQ_TYPE_READ] +
|
rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] +
|
||||||
dev->io_queues[NVMEQ_TYPE_WRITE];
|
dev->io_queues[HCTX_TYPE_READ];
|
||||||
} else {
|
} else {
|
||||||
rw_queues = max;
|
rw_queues = max;
|
||||||
}
|
}
|
||||||
|
@ -2076,9 +2058,9 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues)
|
||||||
* Setup read/write queue split
|
* Setup read/write queue split
|
||||||
*/
|
*/
|
||||||
if (nr_io_queues == 1) {
|
if (nr_io_queues == 1) {
|
||||||
dev->io_queues[NVMEQ_TYPE_READ] = 1;
|
dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
|
||||||
dev->io_queues[NVMEQ_TYPE_WRITE] = 0;
|
dev->io_queues[HCTX_TYPE_READ] = 0;
|
||||||
dev->io_queues[NVMEQ_TYPE_POLL] = 0;
|
dev->io_queues[HCTX_TYPE_POLL] = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2095,10 +2077,10 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues)
|
||||||
this_p_queues = nr_io_queues - 1;
|
this_p_queues = nr_io_queues - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
dev->io_queues[NVMEQ_TYPE_POLL] = this_p_queues;
|
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
|
||||||
nr_io_queues -= this_p_queues;
|
nr_io_queues -= this_p_queues;
|
||||||
} else
|
} else
|
||||||
dev->io_queues[NVMEQ_TYPE_POLL] = 0;
|
dev->io_queues[HCTX_TYPE_POLL] = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If 'write_queues' is set, ensure it leaves room for at least
|
* If 'write_queues' is set, ensure it leaves room for at least
|
||||||
|
@ -2112,11 +2094,11 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues)
|
||||||
* a queue set.
|
* a queue set.
|
||||||
*/
|
*/
|
||||||
if (!this_w_queues) {
|
if (!this_w_queues) {
|
||||||
dev->io_queues[NVMEQ_TYPE_WRITE] = 0;
|
dev->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
|
||||||
dev->io_queues[NVMEQ_TYPE_READ] = nr_io_queues;
|
dev->io_queues[HCTX_TYPE_READ] = 0;
|
||||||
} else {
|
} else {
|
||||||
dev->io_queues[NVMEQ_TYPE_WRITE] = this_w_queues;
|
dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
|
||||||
dev->io_queues[NVMEQ_TYPE_READ] = nr_io_queues - this_w_queues;
|
dev->io_queues[HCTX_TYPE_READ] = nr_io_queues - this_w_queues;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2138,8 +2120,8 @@ static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues)
|
||||||
*/
|
*/
|
||||||
do {
|
do {
|
||||||
nvme_calc_io_queues(dev, nr_io_queues);
|
nvme_calc_io_queues(dev, nr_io_queues);
|
||||||
irq_sets[0] = dev->io_queues[NVMEQ_TYPE_READ];
|
irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT];
|
||||||
irq_sets[1] = dev->io_queues[NVMEQ_TYPE_WRITE];
|
irq_sets[1] = dev->io_queues[HCTX_TYPE_READ];
|
||||||
if (!irq_sets[1])
|
if (!irq_sets[1])
|
||||||
affd.nr_sets = 1;
|
affd.nr_sets = 1;
|
||||||
|
|
||||||
|
@ -2226,12 +2208,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
|
||||||
|
|
||||||
dev->num_vecs = result;
|
dev->num_vecs = result;
|
||||||
result = max(result - 1, 1);
|
result = max(result - 1, 1);
|
||||||
dev->max_qid = result + dev->io_queues[NVMEQ_TYPE_POLL];
|
dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
|
||||||
|
|
||||||
dev_info(dev->ctrl.device, "%d/%d/%d read/write/poll queues\n",
|
dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
|
||||||
dev->io_queues[NVMEQ_TYPE_READ],
|
dev->io_queues[HCTX_TYPE_DEFAULT],
|
||||||
dev->io_queues[NVMEQ_TYPE_WRITE],
|
dev->io_queues[HCTX_TYPE_READ],
|
||||||
dev->io_queues[NVMEQ_TYPE_POLL]);
|
dev->io_queues[HCTX_TYPE_POLL]);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Should investigate if there's a performance win from allocating
|
* Should investigate if there's a performance win from allocating
|
||||||
|
@ -2332,13 +2314,13 @@ static int nvme_dev_add(struct nvme_dev *dev)
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!dev->ctrl.tagset) {
|
if (!dev->ctrl.tagset) {
|
||||||
if (!dev->io_queues[NVMEQ_TYPE_POLL])
|
if (!dev->io_queues[HCTX_TYPE_POLL])
|
||||||
dev->tagset.ops = &nvme_mq_ops;
|
dev->tagset.ops = &nvme_mq_ops;
|
||||||
else
|
else
|
||||||
dev->tagset.ops = &nvme_mq_poll_noirq_ops;
|
dev->tagset.ops = &nvme_mq_poll_noirq_ops;
|
||||||
|
|
||||||
dev->tagset.nr_hw_queues = dev->online_queues - 1;
|
dev->tagset.nr_hw_queues = dev->online_queues - 1;
|
||||||
dev->tagset.nr_maps = NVMEQ_TYPE_NR;
|
dev->tagset.nr_maps = HCTX_MAX_TYPES;
|
||||||
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
||||||
dev->tagset.numa_node = dev_to_node(dev->dev);
|
dev->tagset.numa_node = dev_to_node(dev->dev);
|
||||||
dev->tagset.queue_depth =
|
dev->tagset.queue_depth =
|
||||||
|
|
|
@ -81,8 +81,12 @@ struct blk_mq_queue_map {
|
||||||
unsigned int queue_offset;
|
unsigned int queue_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum hctx_type {
|
||||||
HCTX_MAX_TYPES = 3,
|
HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
|
||||||
|
HCTX_TYPE_READ, /* just for READ I/O */
|
||||||
|
HCTX_TYPE_POLL, /* polled I/O of any kind */
|
||||||
|
|
||||||
|
HCTX_MAX_TYPES,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct blk_mq_tag_set {
|
struct blk_mq_tag_set {
|
||||||
|
@ -118,8 +122,6 @@ struct blk_mq_queue_data {
|
||||||
typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
|
typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
|
||||||
const struct blk_mq_queue_data *);
|
const struct blk_mq_queue_data *);
|
||||||
typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *);
|
typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *);
|
||||||
/* takes rq->cmd_flags as input, returns a hardware type index */
|
|
||||||
typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int);
|
|
||||||
typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
|
typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
|
||||||
typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
|
typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
|
||||||
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
|
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
|
||||||
|
@ -154,11 +156,6 @@ struct blk_mq_ops {
|
||||||
*/
|
*/
|
||||||
commit_rqs_fn *commit_rqs;
|
commit_rqs_fn *commit_rqs;
|
||||||
|
|
||||||
/*
|
|
||||||
* Return a queue map type for the given request/bio flags
|
|
||||||
*/
|
|
||||||
rq_flags_to_type_fn *rq_flags_to_type;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reserve budget before queue request, once .queue_rq is
|
* Reserve budget before queue request, once .queue_rq is
|
||||||
* run, it is driver's responsibility to release the
|
* run, it is driver's responsibility to release the
|
||||||
|
|
Loading…
Reference in New Issue