Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block updates and fixes from Jens Axboe:

 - NVMe updates and fixes that missed the first pull request. This
   includes bug fixes, and support for autonomous power management.

 - Fix from Christoph for missing clear of the request payload, causing
   a problem with (at least) the storvsc driver.

 - Further fixes for the queue/bdi life time issues from Jan.

 - The Kconfig mq scheduler update from me.

 - Fixing a use-after-free in dm-rq, spotted by Bart, introduced in this
   merge window.

 - Three fixes for nbd from Josef.

 - Bug fix from Omar, fixing a bug in sas transport code that oopses
   when bsg ioctls were used. From Omar.

 - Improvements to the queue restart and tag wait from from Omar.

 - Set of fixes for the sed/opal code from Scott.

 - Three trivial patches to cciss from Tobin

* 'for-linus' of git://git.kernel.dk/linux-block: (41 commits)
  dm-rq: don't dereference request payload after ending request
  blk-mq-sched: separate mark hctx and queue restart operations
  blk-mq: use sbq wait queues instead of restart for driver tags
  block/sed-opal: Propagate original error message to userland.
  nvme/pci: re-check security protocol support after reset
  block/sed-opal: Introduce free_opal_dev to free the structure and clean up state
  nvme: detect NVMe controller in recent MacBooks
  nvme-rdma: add support for host_traddr
  nvmet-rdma: Fix error handling
  nvmet-rdma: use nvme cm status helper
  nvme-rdma: move nvme cm status helper to .h file
  nvme-fc: don't bother to validate ioccsz and iorcsz
  nvme/pci: No special case for queue busy on IO
  nvme/core: Fix race kicking freed request_queue
  nvme/pci: Disable on removal when disconnected
  nvme: Enable autonomous power state transitions
  nvme: Add a quirk mechanism that uses identify_ctrl
  nvme: make nvmf_register_transport require a create_ctrl callback
  nvme: Use CNS as 8-bit field and avoid endianness conversion
  nvme: add semicolon in nvme_command setting
  ...
This commit is contained in:
Linus Torvalds 2017-02-24 14:13:34 -08:00
commit 1802979ab1
32 changed files with 991 additions and 766 deletions

View File

@ -69,50 +69,6 @@ config MQ_IOSCHED_DEADLINE
---help---
MQ version of the deadline IO scheduler.
config MQ_IOSCHED_NONE
bool
default y
choice
prompt "Default single-queue blk-mq I/O scheduler"
default DEFAULT_SQ_NONE
help
Select the I/O scheduler which will be used by default for blk-mq
managed block devices with a single queue.
config DEFAULT_SQ_DEADLINE
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
config DEFAULT_SQ_NONE
bool "None"
endchoice
config DEFAULT_SQ_IOSCHED
string
default "mq-deadline" if DEFAULT_SQ_DEADLINE
default "none" if DEFAULT_SQ_NONE
choice
prompt "Default multi-queue blk-mq I/O scheduler"
default DEFAULT_MQ_NONE
help
Select the I/O scheduler which will be used by default for blk-mq
managed block devices with multiple queues.
config DEFAULT_MQ_DEADLINE
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
config DEFAULT_MQ_NONE
bool "None"
endchoice
config DEFAULT_MQ_IOSCHED
string
default "mq-deadline" if DEFAULT_MQ_DEADLINE
default "none" if DEFAULT_MQ_NONE
endmenu
endif

View File

@ -205,7 +205,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
* needing a restart in that case.
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart(hctx);
blk_mq_sched_mark_restart_hctx(hctx);
did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
} else if (!has_sched_dispatch) {
blk_mq_flush_busy_ctxs(hctx, &rq_list);
@ -331,20 +331,16 @@ static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
unsigned int i;
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_sched_restart_hctx(hctx);
}
} else {
blk_mq_sched_restart_hctx(hctx);
else {
struct request_queue *q = hctx->queue;
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
return;
clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_sched_restart_hctx(hctx);
}
}
@ -498,15 +494,6 @@ int blk_mq_sched_init(struct request_queue *q)
{
int ret;
#if defined(CONFIG_DEFAULT_SQ_NONE)
if (q->nr_hw_queues == 1)
return 0;
#endif
#if defined(CONFIG_DEFAULT_MQ_NONE)
if (q->nr_hw_queues > 1)
return 0;
#endif
mutex_lock(&q->sysfs_lock);
ret = elevator_init(q, NULL);
mutex_unlock(&q->sysfs_lock);

View File

@ -122,17 +122,27 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
return false;
}
static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
/*
* Mark a hardware queue as needing a restart.
*/
static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
}
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
}
}
/*
* Mark a hardware queue and the request queue it belongs to as needing a
* restart.
*/
static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
}
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)

View File

@ -904,6 +904,44 @@ static bool reorder_tags_to_front(struct list_head *list)
return first != NULL;
}
static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
void *key)
{
struct blk_mq_hw_ctx *hctx;
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
list_del(&wait->task_list);
clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
blk_mq_run_hw_queue(hctx, true);
return 1;
}
static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
{
struct sbq_wait_state *ws;
/*
* The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
* The thread which wins the race to grab this bit adds the hardware
* queue to the wait queue.
*/
if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
return false;
init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
/*
* As soon as this returns, it's no longer safe to fiddle with
* hctx->dispatch_wait, since a completion can wake up the wait queue
* and unlock the bit.
*/
add_wait_queue(&ws->wait, &hctx->dispatch_wait);
return true;
}
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
{
struct request_queue *q = hctx->queue;
@ -931,15 +969,22 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
continue;
/*
* We failed getting a driver tag. Mark the queue(s)
* as needing a restart. Retry getting a tag again,
* in case the needed IO completed right before we
* marked the queue as needing a restart.
* The initial allocation attempt failed, so we need to
* rerun the hardware queue when a tag is freed.
*/
blk_mq_sched_mark_restart(hctx);
if (!blk_mq_get_driver_tag(rq, &hctx, false))
if (blk_mq_dispatch_wait_add(hctx)) {
/*
* It's possible that a tag was freed in the
* window between the allocation failure and
* adding the hardware queue to the wait queue.
*/
if (!blk_mq_get_driver_tag(rq, &hctx, false))
break;
} else {
break;
}
}
list_del_init(&rq->queuelist);
bd.rq = rq;
@ -995,10 +1040,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
*
* blk_mq_run_hw_queue() already checks the STOPPED bit
*
* If RESTART is set, then let completion restart the queue
* instead of potentially looping here.
* If RESTART or TAG_WAITING is set, then let completion restart
* the queue instead of potentially looping here.
*/
if (!blk_mq_sched_needs_restart(hctx))
if (!blk_mq_sched_needs_restart(hctx) &&
!test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
blk_mq_run_hw_queue(hctx, true);
}

View File

@ -220,17 +220,24 @@ int elevator_init(struct request_queue *q, char *name)
}
if (!e) {
if (q->mq_ops && q->nr_hw_queues == 1)
e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
else if (q->mq_ops)
e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
else
/*
* For blk-mq devices, we default to using mq-deadline,
* if available, for single queue devices. If deadline
* isn't available OR we have multiple queues, default
* to "none".
*/
if (q->mq_ops) {
if (q->nr_hw_queues == 1)
e = elevator_get("mq-deadline", false);
if (!e)
return 0;
} else
e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
if (!e) {
printk(KERN_ERR
"Default I/O scheduler not found. " \
"Using noop/none.\n");
"Using noop.\n");
e = elevator_get("noop", false);
}
}

View File

@ -669,14 +669,14 @@ void del_gendisk(struct gendisk *disk)
disk_part_iter_init(&piter, disk,
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
while ((part = disk_part_iter_next(&piter))) {
bdev_unhash_inode(MKDEV(disk->major,
disk->first_minor + part->partno));
invalidate_partition(disk, part->partno);
bdev_unhash_inode(part_devt(part));
delete_partition(disk, part->partno);
}
disk_part_iter_exit(&piter);
invalidate_partition(disk, 0);
bdev_unhash_inode(disk_devt(disk));
set_capacity(disk, 0);
disk->flags &= ~GENHD_FL_UP;

File diff suppressed because it is too large Load Diff

View File

@ -17,15 +17,15 @@
* 02111-1307, USA.
*
* Questions/Comments/Bugfixes to iss_storagedev@hp.com
*
*
* Author: Stephen M. Cameron
*/
#ifdef CONFIG_CISS_SCSI_TAPE
/* Here we have code to present the driver as a scsi driver
as it is simultaneously presented as a block driver. The
/* Here we have code to present the driver as a scsi driver
as it is simultaneously presented as a block driver. The
reason for doing this is to allow access to SCSI tape drives
through the array controller. Note in particular, neither
through the array controller. Note in particular, neither
physical nor logical disks are presented through the scsi layer. */
#include <linux/timer.h>
@ -37,7 +37,7 @@
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_host.h>
#include "cciss_scsi.h"
@ -120,7 +120,7 @@ struct cciss_scsi_adapter_data_t {
struct cciss_scsi_cmd_stack_t cmd_stack;
SGDescriptor_struct **cmd_sg_list;
int registered;
spinlock_t lock; // to protect ccissscsi[ctlr];
spinlock_t lock; // to protect ccissscsi[ctlr];
};
#define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \
@ -143,36 +143,36 @@ scsi_cmd_alloc(ctlr_info_t *h)
u64bit temp64;
sa = h->scsi_ctlr;
stk = &sa->cmd_stack;
stk = &sa->cmd_stack;
if (stk->top < 0)
if (stk->top < 0)
return NULL;
c = stk->elem[stk->top];
c = stk->elem[stk->top];
/* memset(c, 0, sizeof(*c)); */
memset(&c->cmd, 0, sizeof(c->cmd));
memset(&c->Err, 0, sizeof(c->Err));
/* set physical addr of cmd and addr of scsi parameters */
c->cmd.busaddr = c->busaddr;
c->cmd.busaddr = c->busaddr;
c->cmd.cmdindex = c->cmdindex;
/* (__u32) (stk->cmd_pool_handle +
/* (__u32) (stk->cmd_pool_handle +
(sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */
temp64.val = (__u64) (c->busaddr + sizeof(CommandList_struct));
/* (__u64) (stk->cmd_pool_handle +
/* (__u64) (stk->cmd_pool_handle +
(sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top) +
sizeof(CommandList_struct)); */
stk->top--;
c->cmd.ErrDesc.Addr.lower = temp64.val32.lower;
c->cmd.ErrDesc.Addr.upper = temp64.val32.upper;
c->cmd.ErrDesc.Len = sizeof(ErrorInfo_struct);
c->cmd.ctlr = h->ctlr;
c->cmd.err_info = &c->Err;
return (CommandList_struct *) c;
}
static void
static void
scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
{
/* assume only one process in here at a time, locking done by caller. */
@ -183,7 +183,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
struct cciss_scsi_cmd_stack_t *stk;
sa = h->scsi_ctlr;
stk = &sa->cmd_stack;
stk = &sa->cmd_stack;
stk->top++;
if (stk->top >= stk->nelems) {
dev_err(&h->pdev->dev,
@ -228,7 +228,7 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
}
for (i = 0; i < stk->nelems; i++) {
stk->elem[i] = &stk->pool[i];
stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle +
stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle +
(sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
stk->elem[i]->cmdindex = i;
}
@ -244,7 +244,7 @@ scsi_cmd_stack_free(ctlr_info_t *h)
size_t size;
sa = h->scsi_ctlr;
stk = &sa->cmd_stack;
stk = &sa->cmd_stack;
if (stk->top != stk->nelems-1) {
dev_warn(&h->pdev->dev,
"bug: %d scsi commands are still outstanding.\n",
@ -266,7 +266,7 @@ print_cmd(CommandList_struct *cp)
printk("queue:%d\n", cp->Header.ReplyQueue);
printk("sglist:%d\n", cp->Header.SGList);
printk("sgtot:%d\n", cp->Header.SGTotal);
printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper,
printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper,
cp->Header.Tag.lower);
printk("LUN:0x%8phN\n", cp->Header.LUN.LunAddrBytes);
printk("CDBLen:%d\n", cp->Request.CDBLen);
@ -275,8 +275,8 @@ print_cmd(CommandList_struct *cp)
printk(" Dir:%d\n",cp->Request.Type.Direction);
printk("Timeout:%d\n",cp->Request.Timeout);
printk("CDB: %16ph\n", cp->Request.CDB);
printk("edesc.Addr: 0x%08x/0%08x, Len = %d\n",
cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower,
printk("edesc.Addr: 0x%08x/0%08x, Len = %d\n",
cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower,
cp->ErrDesc.Len);
printk("sgs..........Errorinfo:\n");
printk("scsistatus:%d\n", cp->err_info->ScsiStatus);
@ -289,7 +289,7 @@ print_cmd(CommandList_struct *cp)
}
#endif
static int
static int
find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
{
/* finds an unused bus, target, lun for a new device */
@ -299,24 +299,24 @@ find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA);
target_taken[SELF_SCSI_ID] = 1;
target_taken[SELF_SCSI_ID] = 1;
for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++)
target_taken[ccissscsi[h->ctlr].dev[i].target] = 1;
for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) {
if (!target_taken[i]) {
*bus = 0; *target=i; *lun = 0; found=1;
break;
}
}
return (!found);
return (!found);
}
struct scsi2map {
char scsi3addr[8];
int bus, target, lun;
};
static int
static int
cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
struct cciss_scsi_dev_t *device,
struct scsi2map *added, int *nadded)
@ -381,8 +381,8 @@ cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
ccissscsi[h->ctlr].ndevices++;
/* initially, (before registering with scsi layer) we don't
know our hostno and we don't want to print anything first
/* initially, (before registering with scsi layer) we don't
know our hostno and we don't want to print anything first
time anyway (the scsi layer's inquiries will show that info) */
if (hostno != -1)
dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
@ -467,7 +467,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
/* sd contains scsi3 addresses and devtypes, but
bus target and lun are not filled in. This funciton
takes what's in sd to be the current and adjusts
ccissscsi[] to be in line with what's in sd. */
ccissscsi[] to be in line with what's in sd. */
int i,j, found, changes=0;
struct cciss_scsi_dev_t *csd;
@ -492,7 +492,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
if (hostno != -1) /* if it's not the first time... */
sh = h->scsi_ctlr->scsi_host;
/* find any devices in ccissscsi[] that are not in
/* find any devices in ccissscsi[] that are not in
sd[] and remove them from ccissscsi[] */
i = 0;
@ -512,7 +512,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
}
}
if (found == 0) { /* device no longer present. */
if (found == 0) { /* device no longer present. */
changes++;
cciss_scsi_remove_entry(h, hostno, i,
removed, &nremoved);
@ -641,14 +641,13 @@ lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr)
return -1;
}
static void
static void
cciss_scsi_setup(ctlr_info_t *h)
{
struct cciss_scsi_adapter_data_t * shba;
ccissscsi[h->ctlr].ndevices = 0;
shba = (struct cciss_scsi_adapter_data_t *)
kmalloc(sizeof(*shba), GFP_KERNEL);
shba = kmalloc(sizeof(*shba), GFP_KERNEL);
if (shba == NULL)
return;
shba->scsi_host = NULL;
@ -693,20 +692,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
/* copy the sense data whether we need to or not. */
memcpy(cmd->sense_buffer, ei->SenseInfo,
memcpy(cmd->sense_buffer, ei->SenseInfo,
ei->SenseLen > SCSI_SENSE_BUFFERSIZE ?
SCSI_SENSE_BUFFERSIZE :
SCSI_SENSE_BUFFERSIZE :
ei->SenseLen);
scsi_set_resid(cmd, ei->ResidualCnt);
if(ei->CommandStatus != 0)
{ /* an error has occurred */
switch(ei->CommandStatus)
{
if (ei->CommandStatus != 0) { /* an error has occurred */
switch (ei->CommandStatus) {
case CMD_TARGET_STATUS:
/* Pass it up to the upper layers... */
if (!ei->ScsiStatus) {
/* Ordinarily, this case should never happen, but there is a bug
in some released firmware revisions that allows it to happen
if, for example, a 4100 backplane loses power and the tape
@ -731,7 +728,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
print_cmd(c);
*/
/* We get CMD_INVALID if you address a non-existent tape drive instead
of a selection timeout (no response). You will see this if you yank
of a selection timeout (no response). You will see this if you yank
out a tape drive, then try to access it. This is kind of a shame
because it means that any other CMD_INVALID (e.g. driver bug) will
get interpreted as a missing target. */
@ -780,7 +777,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
cmd->result = DID_ERROR << 16;
dev_warn(&h->pdev->dev,
"%p returned unknown status %x\n", c,
ei->CommandStatus);
ei->CommandStatus);
}
}
cmd->scsi_done(cmd);
@ -796,15 +793,15 @@ cciss_scsi_detect(ctlr_info_t *h)
sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *));
if (sh == NULL)
goto fail;
sh->io_port = 0; // good enough? FIXME,
sh->io_port = 0; // good enough? FIXME,
sh->n_io_port = 0; // I don't think we use these two...
sh->this_id = SELF_SCSI_ID;
sh->this_id = SELF_SCSI_ID;
sh->can_queue = cciss_tape_cmds;
sh->sg_tablesize = h->maxsgentries;
sh->max_cmd_len = MAX_COMMAND_SIZE;
sh->max_sectors = h->cciss_max_sectors;
((struct cciss_scsi_adapter_data_t *)
((struct cciss_scsi_adapter_data_t *)
h->scsi_ctlr)->scsi_host = sh;
sh->hostdata[0] = (unsigned long) h;
sh->irq = h->intr[SIMPLE_MODE_INT];
@ -856,7 +853,7 @@ cciss_map_one(struct pci_dev *pdev,
static int
cciss_scsi_do_simple_cmd(ctlr_info_t *h,
CommandList_struct *c,
unsigned char *scsi3addr,
unsigned char *scsi3addr,
unsigned char *cdb,
unsigned char cdblen,
unsigned char *buf, int bufsize,
@ -871,7 +868,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */
// Fill in the request block...
/* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n",
/* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n",
scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */
@ -885,7 +882,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
/* Fill in the SG list and do dma mapping */
cciss_map_one(h->pdev, c, (unsigned char *) buf,
bufsize, DMA_FROM_DEVICE);
bufsize, DMA_FROM_DEVICE);
c->waiting = &wait;
enqueue_cmd_and_start_io(h, c);
@ -896,14 +893,13 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
return(0);
}
static void
static void
cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
{
ErrorInfo_struct *ei;
ei = c->err_info;
switch(ei->CommandStatus)
{
switch (ei->CommandStatus) {
case CMD_TARGET_STATUS:
dev_warn(&h->pdev->dev,
"cmd %p has completed with errors\n", c);
@ -1005,7 +1001,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
if (rc != 0) return rc; /* something went wrong */
if (ei->CommandStatus != 0 &&
if (ei->CommandStatus != 0 &&
ei->CommandStatus != CMD_DATA_UNDERRUN) {
cciss_scsi_interpret_error(h, c);
rc = -1;
@ -1013,7 +1009,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
spin_lock_irqsave(&h->lock, flags);
scsi_cmd_free(h, c);
spin_unlock_irqrestore(&h->lock, flags);
return rc;
return rc;
}
/* Get the device id from inquiry page 0x83 */
@ -1042,7 +1038,7 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
int rc;
CommandList_struct *c;
unsigned char cdb[12];
unsigned char scsi3addr[8];
unsigned char scsi3addr[8];
ErrorInfo_struct *ei;
unsigned long flags;
@ -1069,14 +1065,14 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
cdb[11] = 0;
rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr,
cdb, 12,
(unsigned char *) buf,
cdb, 12,
(unsigned char *) buf,
bufsize, XFER_READ);
if (rc != 0) return rc; /* something went wrong */
ei = c->err_info;
if (ei->CommandStatus != 0 &&
if (ei->CommandStatus != 0 &&
ei->CommandStatus != CMD_DATA_UNDERRUN) {
cciss_scsi_interpret_error(h, c);
rc = -1;
@ -1084,36 +1080,36 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
spin_lock_irqsave(&h->lock, flags);
scsi_cmd_free(h, c);
spin_unlock_irqrestore(&h->lock, flags);
return rc;
return rc;
}
static void
cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
{
/* the idea here is we could get notified from /proc
that some devices have changed, so we do a report
physical luns cmd, and adjust our list of devices
that some devices have changed, so we do a report
physical luns cmd, and adjust our list of devices
accordingly. (We can't rely on the scsi-mid layer just
doing inquiries, because the "busses" that the scsi
doing inquiries, because the "busses" that the scsi
mid-layer probes are totally fabricated by this driver,
so new devices wouldn't show up.
the scsi3addr's of devices won't change so long as the
adapter is not reset. That means we can rescan and
tell which devices we already know about, vs. new
the scsi3addr's of devices won't change so long as the
adapter is not reset. That means we can rescan and
tell which devices we already know about, vs. new
devices, vs. disappearing devices.
Also, if you yank out a tape drive, then put in a disk
in it's place, (say, a configured volume from another
array controller for instance) _don't_ poke this driver
(so it thinks it's still a tape, but _do_ poke the scsi
mid layer, so it does an inquiry... the scsi mid layer
in it's place, (say, a configured volume from another
array controller for instance) _don't_ poke this driver
(so it thinks it's still a tape, but _do_ poke the scsi
mid layer, so it does an inquiry... the scsi mid layer
will see the physical disk. This would be bad. Need to
think about how to prevent that. One idea would be to
think about how to prevent that. One idea would be to
snoop all scsi responses and if an inquiry repsonse comes
back that reports a disk, chuck it an return selection
timeout instead and adjust our table... Not sure i like
that though.
that though.
*/
#define OBDR_TAPE_INQ_SIZE 49
@ -1141,9 +1137,9 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
ch = &ld_buff->LUNListLength[0];
num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
if (num_luns > CISS_MAX_PHYS_LUN) {
printk(KERN_WARNING
printk(KERN_WARNING
"cciss: Maximum physical LUNs (%d) exceeded. "
"%d LUNs ignored.\n", CISS_MAX_PHYS_LUN,
"%d LUNs ignored.\n", CISS_MAX_PHYS_LUN,
num_luns - CISS_MAX_PHYS_LUN);
num_luns = CISS_MAX_PHYS_LUN;
}
@ -1154,7 +1150,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
}
/* adjust our table of devices */
/* adjust our table of devices */
for (i = 0; i < num_luns; i++) {
/* for each physical lun, do an inquiry */
if (ld_buff->LUN[i][3] & 0xC0) continue;
@ -1182,8 +1178,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
cciss_scsi_get_device_id(h, scsi3addr,
this_device->device_id, sizeof(this_device->device_id));
switch (this_device->devtype)
{
switch (this_device->devtype) {
case 0x05: /* CD-ROM */ {
/* We don't *really* support actual CD-ROM devices,
@ -1213,7 +1208,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
currentsd[ncurrent] = *this_device;
ncurrent++;
break;
default:
default:
break;
}
}
@ -1258,8 +1253,8 @@ cciss_scsi_write_info(struct Scsi_Host *sh,
return -EINVAL;
return cciss_scsi_user_command(h, sh->host_no,
buffer, length);
}
buffer, length);
}
static int
cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
@ -1297,8 +1292,8 @@ cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
return 0;
}
/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
dma mapping and fills in the scatter gather entries of the
/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
dma mapping and fills in the scatter gather entries of the
cciss command, c. */
static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
@ -1394,7 +1389,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
// Fill in the command list header
cmd->scsi_done = done; // save this for use by completion code
cmd->scsi_done = done; // save this for use by completion code
/* save c in case we have to abort it */
cmd->host_scribble = (unsigned char *) c;
@ -1404,7 +1399,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
c->Header.ReplyQueue = 0; /* unused in simple mode */
memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */
// Fill in the request block...
c->Request.Timeout = 0;
@ -1414,8 +1409,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
c->Request.Type.Type = TYPE_CMD;
c->Request.Type.Attribute = ATTR_SIMPLE;
switch(cmd->sc_data_direction)
{
switch (cmd->sc_data_direction) {
case DMA_TO_DEVICE:
c->Request.Type.Direction = XFER_WRITE;
break;
@ -1432,15 +1426,15 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
c->Request.Type.Direction = XFER_RSVD;
// This is technically wrong, and cciss controllers should
// reject it with CMD_INVALID, which is the most correct
// response, but non-fibre backends appear to let it
// reject it with CMD_INVALID, which is the most correct
// response, but non-fibre backends appear to let it
// slide by, and give the same results as if this field
// were set correctly. Either way is acceptable for
// our purposes here.
break;
default:
default:
dev_warn(&h->pdev->dev, "unknown data direction: %d\n",
cmd->sc_data_direction);
BUG();
@ -1464,9 +1458,9 @@ static void cciss_unregister_scsi(ctlr_info_t *h)
spin_lock_irqsave(&h->lock, flags);
sa = h->scsi_ctlr;
stk = &sa->cmd_stack;
stk = &sa->cmd_stack;
/* if we weren't ever actually registered, don't unregister */
/* if we weren't ever actually registered, don't unregister */
if (sa->registered) {
spin_unlock_irqrestore(&h->lock, flags);
scsi_remove_host(sa->scsi_host);
@ -1474,7 +1468,7 @@ static void cciss_unregister_scsi(ctlr_info_t *h)
spin_lock_irqsave(&h->lock, flags);
}
/* set scsi_host to NULL so our detect routine will
/* set scsi_host to NULL so our detect routine will
find us on register */
sa->scsi_host = NULL;
spin_unlock_irqrestore(&h->lock, flags);
@ -1490,7 +1484,7 @@ static int cciss_engage_scsi(ctlr_info_t *h)
spin_lock_irqsave(&h->lock, flags);
sa = h->scsi_ctlr;
stk = &sa->cmd_stack;
stk = &sa->cmd_stack;
if (sa->registered) {
dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n");
@ -1586,13 +1580,13 @@ retry_tur:
return rc;
}
/* Need at least one of these error handlers to keep ../scsi/hosts.c from
* complaining. Doing a host- or bus-reset can't do anything good here.
/* Need at least one of these error handlers to keep ../scsi/hosts.c from
* complaining. Doing a host- or bus-reset can't do anything good here.
* Despite what it might say in scsi_error.c, there may well be commands
* on the controller, as the cciss driver registers twice, once as a block
* device for the logical drives, and once as a scsi device, for any tape
* drives. So we know there are no commands out on the tape drives, but we
* don't know there are no commands on the controller, and it is likely
* don't know there are no commands on the controller, and it is likely
* that there probably are, as the cciss block device is most commonly used
* as a boot device (embedded controller on HP/Compaq systems.)
*/

View File

@ -96,6 +96,10 @@ static int max_part;
static struct workqueue_struct *recv_workqueue;
static int part_shift;
static int nbd_dev_dbg_init(struct nbd_device *nbd);
static void nbd_dev_dbg_close(struct nbd_device *nbd);
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
{
return disk_to_dev(nbd->disk);
@ -120,7 +124,7 @@ static const char *nbdcmd_to_ascii(int cmd)
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
{
bdev->bd_inode->i_size = 0;
bd_set_size(bdev, 0);
set_capacity(nbd->disk, 0);
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
@ -129,29 +133,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
{
if (!nbd_is_connected(nbd))
return;
bdev->bd_inode->i_size = nbd->bytesize;
blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
bd_set_size(bdev, nbd->bytesize);
set_capacity(nbd->disk, nbd->bytesize >> 9);
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}
static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
loff_t blocksize, loff_t nr_blocks)
{
int ret;
ret = set_blocksize(bdev, blocksize);
if (ret)
return ret;
nbd->blksize = blocksize;
nbd->bytesize = blocksize * nr_blocks;
nbd_size_update(nbd, bdev);
return 0;
if (nbd_is_connected(nbd))
nbd_size_update(nbd, bdev);
}
static void nbd_end_request(struct nbd_cmd *cmd)
@ -571,10 +566,17 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_MQ_RQ_QUEUE_OK;
}
static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
unsigned long arg)
{
struct socket *sock;
struct nbd_sock **socks;
struct nbd_sock *nsock;
int err;
sock = sockfd_lookup(arg, &err);
if (!sock)
return err;
if (!nbd->task_setup)
nbd->task_setup = current;
@ -598,26 +600,20 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
nsock->sock = sock;
socks[nbd->num_connections++] = nsock;
if (max_part)
bdev->bd_invalidated = 1;
return 0;
}
/* Reset all properties of an NBD device */
static void nbd_reset(struct nbd_device *nbd)
{
int i;
for (i = 0; i < nbd->num_connections; i++)
kfree(nbd->socks[i]);
kfree(nbd->socks);
nbd->socks = NULL;
nbd->runtime_flags = 0;
nbd->blksize = 1024;
nbd->bytesize = 0;
set_capacity(nbd->disk, 0);
nbd->flags = 0;
nbd->tag_set.timeout = 0;
nbd->num_connections = 0;
nbd->task_setup = NULL;
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
}
@ -659,81 +655,143 @@ static void send_disconnects(struct nbd_device *nbd)
}
}
static int nbd_dev_dbg_init(struct nbd_device *nbd);
static void nbd_dev_dbg_close(struct nbd_device *nbd);
static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
{
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
if (!nbd->socks)
return -EINVAL;
mutex_unlock(&nbd->config_lock);
fsync_bdev(bdev);
mutex_lock(&nbd->config_lock);
/* Check again after getting mutex back. */
if (!nbd->socks)
return -EINVAL;
if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
&nbd->runtime_flags))
send_disconnects(nbd);
return 0;
}
static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
{
sock_shutdown(nbd);
nbd_clear_que(nbd);
kill_bdev(bdev);
nbd_bdev_reset(bdev);
/*
* We want to give the run thread a chance to wait for everybody
* to clean up and then do it's own cleanup.
*/
if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
nbd->num_connections) {
int i;
for (i = 0; i < nbd->num_connections; i++)
kfree(nbd->socks[i]);
kfree(nbd->socks);
nbd->socks = NULL;
nbd->num_connections = 0;
}
nbd->task_setup = NULL;
return 0;
}
static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
{
struct recv_thread_args *args;
int num_connections = nbd->num_connections;
int error = 0, i;
if (nbd->task_recv)
return -EBUSY;
if (!nbd->socks)
return -EINVAL;
if (num_connections > 1 &&
!(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
error = -EINVAL;
goto out_err;
}
set_bit(NBD_RUNNING, &nbd->runtime_flags);
blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
if (!args) {
error = -ENOMEM;
goto out_err;
}
nbd->task_recv = current;
mutex_unlock(&nbd->config_lock);
nbd_parse_flags(nbd, bdev);
error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (error) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
goto out_recv;
}
nbd_size_update(nbd, bdev);
nbd_dev_dbg_init(nbd);
for (i = 0; i < num_connections; i++) {
sk_set_memalloc(nbd->socks[i]->sock->sk);
atomic_inc(&nbd->recv_threads);
INIT_WORK(&args[i].work, recv_work);
args[i].nbd = nbd;
args[i].index = i;
queue_work(recv_workqueue, &args[i].work);
}
wait_event_interruptible(nbd->recv_wq,
atomic_read(&nbd->recv_threads) == 0);
for (i = 0; i < num_connections; i++)
flush_work(&args[i].work);
nbd_dev_dbg_close(nbd);
nbd_size_clear(nbd, bdev);
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
out_recv:
mutex_lock(&nbd->config_lock);
nbd->task_recv = NULL;
out_err:
clear_bit(NBD_RUNNING, &nbd->runtime_flags);
nbd_clear_sock(nbd, bdev);
/* user requested, ignore socket errors */
if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
error = 0;
if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
error = -ETIMEDOUT;
nbd_reset(nbd);
return error;
}
/* Must be called with config_lock held */
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case NBD_DISCONNECT: {
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
if (!nbd->socks)
return -EINVAL;
mutex_unlock(&nbd->config_lock);
fsync_bdev(bdev);
mutex_lock(&nbd->config_lock);
/* Check again after getting mutex back. */
if (!nbd->socks)
return -EINVAL;
if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
&nbd->runtime_flags))
send_disconnects(nbd);
return 0;
}
case NBD_DISCONNECT:
return nbd_disconnect(nbd, bdev);
case NBD_CLEAR_SOCK:
sock_shutdown(nbd);
nbd_clear_que(nbd);
kill_bdev(bdev);
nbd_bdev_reset(bdev);
/*
* We want to give the run thread a chance to wait for everybody
* to clean up and then do it's own cleanup.
*/
if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) {
int i;
for (i = 0; i < nbd->num_connections; i++)
kfree(nbd->socks[i]);
kfree(nbd->socks);
nbd->socks = NULL;
nbd->num_connections = 0;
nbd->task_setup = NULL;
}
return nbd_clear_sock(nbd, bdev);
case NBD_SET_SOCK:
return nbd_add_socket(nbd, bdev, arg);
case NBD_SET_BLKSIZE:
nbd_size_set(nbd, bdev, arg,
div_s64(nbd->bytesize, arg));
return 0;
case NBD_SET_SOCK: {
int err;
struct socket *sock = sockfd_lookup(arg, &err);
if (!sock)
return err;
err = nbd_add_socket(nbd, sock);
if (!err && max_part)
bdev->bd_invalidated = 1;
return err;
}
case NBD_SET_BLKSIZE: {
loff_t bsize = div_s64(nbd->bytesize, arg);
return nbd_size_set(nbd, bdev, arg, bsize);
}
case NBD_SET_SIZE:
return nbd_size_set(nbd, bdev, nbd->blksize,
div_s64(arg, nbd->blksize));
nbd_size_set(nbd, bdev, nbd->blksize,
div_s64(arg, nbd->blksize));
return 0;
case NBD_SET_SIZE_BLOCKS:
return nbd_size_set(nbd, bdev, nbd->blksize, arg);
nbd_size_set(nbd, bdev, nbd->blksize, arg);
return 0;
case NBD_SET_TIMEOUT:
nbd->tag_set.timeout = arg * HZ;
return 0;
@ -741,85 +799,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_FLAGS:
nbd->flags = arg;
return 0;
case NBD_DO_IT: {
struct recv_thread_args *args;
int num_connections = nbd->num_connections;
int error = 0, i;
if (nbd->task_recv)
return -EBUSY;
if (!nbd->socks)
return -EINVAL;
if (num_connections > 1 &&
!(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
error = -EINVAL;
goto out_err;
}
set_bit(NBD_RUNNING, &nbd->runtime_flags);
blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
if (!args) {
error = -ENOMEM;
goto out_err;
}
nbd->task_recv = current;
mutex_unlock(&nbd->config_lock);
nbd_parse_flags(nbd, bdev);
error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (error) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
goto out_recv;
}
nbd_size_update(nbd, bdev);
nbd_dev_dbg_init(nbd);
for (i = 0; i < num_connections; i++) {
sk_set_memalloc(nbd->socks[i]->sock->sk);
atomic_inc(&nbd->recv_threads);
INIT_WORK(&args[i].work, recv_work);
args[i].nbd = nbd;
args[i].index = i;
queue_work(recv_workqueue, &args[i].work);
}
wait_event_interruptible(nbd->recv_wq,
atomic_read(&nbd->recv_threads) == 0);
for (i = 0; i < num_connections; i++)
flush_work(&args[i].work);
nbd_dev_dbg_close(nbd);
nbd_size_clear(nbd, bdev);
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
out_recv:
mutex_lock(&nbd->config_lock);
nbd->task_recv = NULL;
out_err:
sock_shutdown(nbd);
nbd_clear_que(nbd);
kill_bdev(bdev);
nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
error = 0;
if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
error = -ETIMEDOUT;
nbd_reset(nbd);
return error;
}
case NBD_DO_IT:
return nbd_start_device(nbd, bdev);
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
* by NBD_DO_IT or NBD_CLEAR_SOCK.
*/
return 0;
case NBD_PRINT_DEBUG:
/*
* For compatibility only, we no longer keep a list of
@ -1134,8 +1121,10 @@ static int __init nbd_init(void)
if (!recv_workqueue)
return -ENOMEM;
if (register_blkdev(NBD_MAJOR, "nbd"))
if (register_blkdev(NBD_MAJOR, "nbd")) {
destroy_workqueue(recv_workqueue);
return -EIO;
}
nbd_dbg_init();

View File

@ -328,13 +328,15 @@ static void dm_softirq_done(struct request *rq)
int rw;
if (!clone) {
rq_end_stats(tio->md, rq);
struct mapped_device *md = tio->md;
rq_end_stats(md, rq);
rw = rq_data_dir(rq);
if (!rq->q->mq_ops)
blk_end_request_all(rq, tio->error);
else
blk_mq_end_request(rq, tio->error);
rq_completed(tio->md, rw, false);
rq_completed(md, rw, false);
return;
}

View File

@ -26,6 +26,7 @@
#include <linux/ptrace.h>
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
#include <scsi/sg.h>
#include <asm/unaligned.h>
@ -56,6 +57,11 @@ EXPORT_SYMBOL_GPL(nvme_max_retries);
static int nvme_char_major;
module_param(nvme_char_major, int, 0);
static unsigned long default_ps_max_latency_us = 25000;
module_param(default_ps_max_latency_us, ulong, 0644);
MODULE_PARM_DESC(default_ps_max_latency_us,
"max power saving latency for new devices; use PM QOS to change per device");
static LIST_HEAD(nvme_ctrl_list);
static DEFINE_SPINLOCK(dev_list_lock);
@ -560,7 +566,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
c.identify.opcode = nvme_admin_identify;
c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL);
c.identify.cns = NVME_ID_CNS_CTRL;
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
if (!*id)
@ -578,7 +584,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
struct nvme_command c = { };
c.identify.opcode = nvme_admin_identify;
c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST);
c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
c.identify.nsid = cpu_to_le32(nsid);
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
@ -590,8 +596,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
int error;
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
c.identify.opcode = nvme_admin_identify,
c.identify.nsid = cpu_to_le32(nsid),
c.identify.opcode = nvme_admin_identify;
c.identify.nsid = cpu_to_le32(nsid);
c.identify.cns = NVME_ID_CNS_NS;
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
if (!*id)
@ -1251,6 +1258,176 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_write_cache(q, vwc, vwc);
}
static void nvme_configure_apst(struct nvme_ctrl *ctrl)
{
/*
* APST (Autonomous Power State Transition) lets us program a
* table of power state transitions that the controller will
* perform automatically. We configure it with a simple
* heuristic: we are willing to spend at most 2% of the time
* transitioning between power states. Therefore, when running
* in any given state, we will enter the next lower-power
* non-operational state after waiting 100 * (enlat + exlat)
* microseconds, as long as that state's total latency is under
* the requested maximum latency.
*
* We will not autonomously enter any non-operational state for
* which the total latency exceeds ps_max_latency_us. Users
* can set ps_max_latency_us to zero to turn off APST.
*/
unsigned apste;
struct nvme_feat_auto_pst *table;
int ret;
/*
* If APST isn't supported or if we haven't been initialized yet,
* then don't do anything.
*/
if (!ctrl->apsta)
return;
if (ctrl->npss > 31) {
dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
return;
}
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return;
if (ctrl->ps_max_latency_us == 0) {
/* Turn off APST. */
apste = 0;
} else {
__le64 target = cpu_to_le64(0);
int state;
/*
* Walk through all states from lowest- to highest-power.
* According to the spec, lower-numbered states use more
* power. NPSS, despite the name, is the index of the
* lowest-power state, not the number of states.
*/
for (state = (int)ctrl->npss; state >= 0; state--) {
u64 total_latency_us, transition_ms;
if (target)
table->entries[state] = target;
/*
* Is this state a useful non-operational state for
* higher-power states to autonomously transition to?
*/
if (!(ctrl->psd[state].flags &
NVME_PS_FLAGS_NON_OP_STATE))
continue;
total_latency_us =
(u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
+ le32_to_cpu(ctrl->psd[state].exit_lat);
if (total_latency_us > ctrl->ps_max_latency_us)
continue;
/*
* This state is good. Use it as the APST idle
* target for higher power states.
*/
transition_ms = total_latency_us + 19;
do_div(transition_ms, 20);
if (transition_ms > (1 << 24) - 1)
transition_ms = (1 << 24) - 1;
target = cpu_to_le64((state << 3) |
(transition_ms << 8));
}
apste = 1;
}
ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
table, sizeof(*table), NULL);
if (ret)
dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
kfree(table);
}
static void nvme_set_latency_tolerance(struct device *dev, s32 val)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
u64 latency;
switch (val) {
case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT:
case PM_QOS_LATENCY_ANY:
latency = U64_MAX;
break;
default:
latency = val;
}
if (ctrl->ps_max_latency_us != latency) {
ctrl->ps_max_latency_us = latency;
nvme_configure_apst(ctrl);
}
}
struct nvme_core_quirk_entry {
/*
* NVMe model and firmware strings are padded with spaces. For
* simplicity, strings in the quirk table are padded with NULLs
* instead.
*/
u16 vid;
const char *mn;
const char *fr;
unsigned long quirks;
};
static const struct nvme_core_quirk_entry core_quirks[] = {
/*
* Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
* the controller to go out to lunch. It dies when the watchdog
* timer reads CSTS and gets 0xffffffff.
*/
{
.vid = 0x144d,
.fr = "BXW75D0Q",
.quirks = NVME_QUIRK_NO_APST,
},
};
/* match is null-terminated but idstr is space-padded. */
static bool string_matches(const char *idstr, const char *match, size_t len)
{
size_t matchlen;
if (!match)
return true;
matchlen = strlen(match);
WARN_ON_ONCE(matchlen > len);
if (memcmp(idstr, match, matchlen))
return false;
for (; matchlen < len; matchlen++)
if (idstr[matchlen] != ' ')
return false;
return true;
}
static bool quirk_matches(const struct nvme_id_ctrl *id,
const struct nvme_core_quirk_entry *q)
{
return q->vid == le16_to_cpu(id->vid) &&
string_matches(id->mn, q->mn, sizeof(id->mn)) &&
string_matches(id->fr, q->fr, sizeof(id->fr));
}
/*
* Initialize the cached copies of the Identify data and various controller
* register in our nvme_ctrl structure. This should be called as soon as
@ -1262,6 +1439,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
u8 prev_apsta;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
@ -1285,6 +1463,24 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
return -EIO;
}
if (!ctrl->identified) {
/*
* Check for quirks. Quirk can depend on firmware version,
* so, in principle, the set of quirks present can change
* across a reset. As a possible future enhancement, we
* could re-scan for quirks every time we reinitialize
* the device, but we'd have to make sure that the driver
* behaves intelligently if the quirks change.
*/
int i;
for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
if (quirk_matches(id, &core_quirks[i]))
ctrl->quirks |= core_quirks[i].quirks;
}
}
ctrl->oacs = le16_to_cpu(id->oacs);
ctrl->vid = le16_to_cpu(id->vid);
ctrl->oncs = le16_to_cpup(&id->oncs);
@ -1305,6 +1501,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
ctrl->npss = id->npss;
prev_apsta = ctrl->apsta;
ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta;
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
if (ctrl->ops->is_fabrics) {
ctrl->icdoff = le16_to_cpu(id->icdoff);
ctrl->ioccsz = le32_to_cpu(id->ioccsz);
@ -1328,6 +1529,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
}
kfree(id);
if (ctrl->apsta && !prev_apsta)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apsta && prev_apsta)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
nvme_configure_apst(ctrl);
ctrl->identified = true;
return ret;
}
EXPORT_SYMBOL_GPL(nvme_init_identify);
@ -1577,6 +1788,29 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev,
}
static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
static ssize_t nvme_sysfs_show_state(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_RECONNECTING]= "reconnecting",
[NVME_CTRL_DELETING] = "deleting",
[NVME_CTRL_DEAD] = "dead",
};
if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
state_name[ctrl->state])
return sprintf(buf, "%s\n", state_name[ctrl->state]);
return sprintf(buf, "unknown state\n");
}
static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -1609,6 +1843,7 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_transport.attr,
&dev_attr_subsysnqn.attr,
&dev_attr_address.attr,
&dev_attr_state.attr,
NULL
};
@ -2065,6 +2300,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
list_add_tail(&ctrl->node, &nvme_ctrl_list);
spin_unlock(&dev_list_lock);
/*
* Initialize latency tolerance controls. The sysfs files won't
* be visible to userspace unless the device actually supports APST.
*/
ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance;
dev_pm_qos_update_user_latency_tolerance(ctrl->device,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
return 0;
out_release_instance:
nvme_release_instance(ctrl);
@ -2090,9 +2333,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
* Revalidating a dead namespace sets capacity to 0. This will
* end buffered writers dirtying pages that can't be synced.
*/
if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
revalidate_disk(ns->disk);
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
continue;
revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
blk_mq_abort_requeue_list(ns->queue);
blk_mq_start_stopped_hw_queues(ns->queue, true);

View File

@ -480,11 +480,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
* being implemented to the common NVMe fabrics library. Part of
* the overall init sequence of starting up a fabrics driver.
*/
void nvmf_register_transport(struct nvmf_transport_ops *ops)
int nvmf_register_transport(struct nvmf_transport_ops *ops)
{
if (!ops->create_ctrl)
return -EINVAL;
mutex_lock(&nvmf_transports_mutex);
list_add_tail(&ops->entry, &nvmf_transports);
mutex_unlock(&nvmf_transports_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(nvmf_register_transport);

View File

@ -128,7 +128,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
void nvmf_register_transport(struct nvmf_transport_ops *ops);
int nvmf_register_transport(struct nvmf_transport_ops *ops);
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);

View File

@ -2353,18 +2353,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
/* sanity checks */
/* FC-NVME supports 64-byte SQE only */
if (ctrl->ctrl.ioccsz != 4) {
dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n",
ctrl->ctrl.ioccsz);
goto out_remove_admin_queue;
}
/* FC-NVME supports 16-byte CQE only */
if (ctrl->ctrl.iorcsz != 1) {
dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n",
ctrl->ctrl.iorcsz);
goto out_remove_admin_queue;
}
/* FC-NVME does not have other data in the capsule */
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
@ -2562,8 +2550,7 @@ static int __init nvme_fc_init_module(void)
if (!nvme_fc_wq)
return -ENOMEM;
nvmf_register_transport(&nvme_fc_transport);
return 0;
return nvmf_register_transport(&nvme_fc_transport);
}
static void __exit nvme_fc_exit_module(void)

View File

@ -78,6 +78,11 @@ enum nvme_quirks {
* readiness, which is done by reading the NVME_CSTS_RDY bit.
*/
NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3),
/*
* APST should not be used.
*/
NVME_QUIRK_NO_APST = (1 << 4),
};
/*
@ -112,6 +117,7 @@ enum nvme_ctrl_state {
struct nvme_ctrl {
enum nvme_ctrl_state state;
bool identified;
spinlock_t lock;
const struct nvme_ctrl_ops *ops;
struct request_queue *admin_q;
@ -147,13 +153,19 @@ struct nvme_ctrl {
u32 vs;
u32 sgls;
u16 kas;
u8 npss;
u8 apsta;
unsigned int kato;
bool subsystem;
unsigned long quirks;
struct nvme_id_power_state psd[32];
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
/* Power saving configuration */
u64 ps_max_latency_us;
/* Fabrics only */
u16 sqsize;
u32 ioccsz;

View File

@ -613,10 +613,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
ret = BLK_MQ_RQ_QUEUE_BUSY;
else
ret = BLK_MQ_RQ_QUEUE_ERROR;
ret = BLK_MQ_RQ_QUEUE_ERROR;
spin_unlock_irq(&nvmeq->q_lock);
goto out_cleanup_iod;
}
@ -1739,7 +1736,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
kfree(dev->ctrl.opal_dev);
free_opal_dev(dev->ctrl.opal_dev);
kfree(dev);
}
@ -1789,14 +1786,17 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
dev->ctrl.opal_dev =
init_opal_dev(&dev->ctrl, &nvme_sec_submit);
if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
if (!dev->ctrl.opal_dev)
dev->ctrl.opal_dev =
init_opal_dev(&dev->ctrl, &nvme_sec_submit);
else if (was_suspend)
opal_unlock_from_suspend(dev->ctrl.opal_dev);
} else {
free_opal_dev(dev->ctrl.opal_dev);
dev->ctrl.opal_dev = NULL;
}
if (was_suspend)
opal_unlock_from_suspend(dev->ctrl.opal_dev);
result = nvme_setup_io_queues(dev);
if (result)
goto out;
@ -2001,8 +2001,10 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
if (!pci_device_is_present(pdev))
if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
nvme_dev_disable(dev, false);
}
flush_work(&dev->reset_work);
nvme_uninit_ctrl(&dev->ctrl);
@ -2121,6 +2123,7 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);

View File

@ -42,28 +42,6 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
static const char *const nvme_rdma_cm_status_strs[] = {
[NVME_RDMA_CM_INVALID_LEN] = "invalid length",
[NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format",
[NVME_RDMA_CM_INVALID_QID] = "invalid queue ID",
[NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size",
[NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size",
[NVME_RDMA_CM_NO_RSC] = "resource not found",
[NVME_RDMA_CM_INVALID_IRD] = "invalid IRD",
[NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD",
};
static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
{
size_t index = status;
if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
nvme_rdma_cm_status_strs[index])
return nvme_rdma_cm_status_strs[index];
else
return "unrecognized reason";
};
/*
* We handle AEN commands ourselves and don't even let the
* block layer know about them.
@ -155,6 +133,10 @@ struct nvme_rdma_ctrl {
struct sockaddr addr;
struct sockaddr_in addr_in;
};
union {
struct sockaddr src_addr;
struct sockaddr_in src_addr_in;
};
struct nvme_ctrl ctrl;
};
@ -567,6 +549,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
int idx, size_t queue_size)
{
struct nvme_rdma_queue *queue;
struct sockaddr *src_addr = NULL;
int ret;
queue = &ctrl->queues[idx];
@ -589,7 +572,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
}
queue->cm_error = -ETIMEDOUT;
ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
src_addr = &ctrl->src_addr;
ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
NVME_RDMA_CONNECT_TIMEOUT_MS);
if (ret) {
dev_info(ctrl->ctrl.device,
@ -1905,6 +1891,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
goto out_free_ctrl;
}
if (opts->mask & NVMF_OPT_HOST_TRADDR) {
ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
opts->host_traddr);
if (ret) {
pr_err("malformed src IP address passed: %s\n",
opts->host_traddr);
goto out_free_ctrl;
}
}
if (opts->mask & NVMF_OPT_TRSVCID) {
u16 port;
@ -2016,7 +2012,8 @@ out_free_ctrl:
static struct nvmf_transport_ops nvme_rdma_transport = {
.name = "rdma",
.required_opts = NVMF_OPT_TRADDR,
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR,
.create_ctrl = nvme_rdma_create_ctrl,
};
@ -2063,8 +2060,7 @@ static int __init nvme_rdma_init_module(void)
return ret;
}
nvmf_register_transport(&nvme_rdma_transport);
return 0;
return nvmf_register_transport(&nvme_rdma_transport);
}
static void __exit nvme_rdma_cleanup_module(void)

View File

@ -41,7 +41,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
if (!ns) {
status = NVME_SC_INVALID_NS;
pr_err("nvmet : Counld not find namespace id : %d\n",
pr_err("nvmet : Could not find namespace id : %d\n",
le32_to_cpu(req->cmd->get_log_page.nsid));
goto out;
}
@ -509,7 +509,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
break;
case nvme_admin_identify:
req->data_len = 4096;
switch (le32_to_cpu(cmd->identify.cns)) {
switch (cmd->identify.cns) {
case NVME_ID_CNS_NS:
req->execute = nvmet_execute_identify_ns;
return 0;

View File

@ -17,6 +17,7 @@
#include "nvmet.h"
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
static DEFINE_IDA(cntlid_ida);
/*
* This read/write semaphore is used to synchronize access to configuration
@ -749,7 +750,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
if (!ctrl->sqs)
goto out_free_cqs;
ret = ida_simple_get(&subsys->cntlid_ida,
ret = ida_simple_get(&cntlid_ida,
NVME_CNTLID_MIN, NVME_CNTLID_MAX,
GFP_KERNEL);
if (ret < 0) {
@ -819,7 +820,7 @@ static void nvmet_ctrl_free(struct kref *ref)
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fatal_err_work);
ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
nvmet_subsys_put(subsys);
kfree(ctrl->sqs);
@ -918,9 +919,6 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
mutex_init(&subsys->lock);
INIT_LIST_HEAD(&subsys->namespaces);
INIT_LIST_HEAD(&subsys->ctrls);
ida_init(&subsys->cntlid_ida);
INIT_LIST_HEAD(&subsys->hosts);
return subsys;
@ -933,7 +931,6 @@ static void nvmet_subsys_free(struct kref *ref)
WARN_ON_ONCE(!list_empty(&subsys->namespaces));
ida_destroy(&subsys->cntlid_ida);
kfree(subsys->subsysnqn);
kfree(subsys);
}
@ -976,6 +973,7 @@ static void __exit nvmet_exit(void)
{
nvmet_exit_configfs();
nvmet_exit_discovery();
ida_destroy(&cntlid_ida);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);

View File

@ -186,14 +186,14 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
}
case nvme_admin_identify:
req->data_len = 4096;
switch (le32_to_cpu(cmd->identify.cns)) {
switch (cmd->identify.cns) {
case NVME_ID_CNS_CTRL:
req->execute =
nvmet_execute_identify_disc_ctrl;
return 0;
default:
pr_err("nvmet: unsupported identify cns %d\n",
le32_to_cpu(cmd->identify.cns));
cmd->identify.cns);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
default:

View File

@ -153,8 +153,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
pr_info("creating controller %d for NQN %s.\n",
ctrl->cntlid, ctrl->hostnqn);
pr_info("creating controller %d for subsystem %s for NQN %s.\n",
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
@ -220,7 +220,7 @@ int nvmet_parse_connect_cmd(struct nvmet_req *req)
req->ns = NULL;
if (req->cmd->common.opcode != nvme_fabrics_command) {
if (cmd->common.opcode != nvme_fabrics_command) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;

View File

@ -1817,16 +1817,14 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
/* data no longer needed */
nvmet_fc_free_tgt_pgs(fod);
if (fcpreq->fcp_error || abort)
nvmet_req_complete(&fod->req, fcpreq->fcp_error);
nvmet_req_complete(&fod->req, fcpreq->fcp_error);
return;
}
switch (fcpreq->op) {
case NVMET_FCOP_WRITEDATA:
if (abort || fcpreq->fcp_error ||
if (fcpreq->fcp_error ||
fcpreq->transferred_length != fcpreq->transfer_length) {
nvmet_req_complete(&fod->req,
NVME_SC_FC_TRANSPORT_ERROR);
@ -1849,7 +1847,7 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
case NVMET_FCOP_READDATA:
case NVMET_FCOP_READDATA_RSP:
if (abort || fcpreq->fcp_error ||
if (fcpreq->fcp_error ||
fcpreq->transferred_length != fcpreq->transfer_length) {
/* data no longer needed */
nvmet_fc_free_tgt_pgs(fod);

View File

@ -724,8 +724,7 @@ static int __init nvme_loop_init_module(void)
ret = nvmet_register_transport(&nvme_loop_ops);
if (ret)
return ret;
nvmf_register_transport(&nvme_loop_transport);
return 0;
return nvmf_register_transport(&nvme_loop_transport);
}
static void __exit nvme_loop_cleanup_module(void)

View File

@ -142,7 +142,6 @@ struct nvmet_subsys {
unsigned int max_nsid;
struct list_head ctrls;
struct ida cntlid_ida;
struct list_head hosts;
bool allow_any_host;

View File

@ -1041,6 +1041,9 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id,
{
struct nvme_rdma_cm_rej rej;
pr_debug("rejecting connect request: status %d (%s)\n",
status, nvme_rdma_cm_msg(status));
rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
rej.sts = cpu_to_le16(status);
@ -1091,7 +1094,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
if (queue->idx < 0) {
ret = NVME_RDMA_CM_NO_RSC;
goto out_free_queue;
goto out_destroy_sq;
}
ret = nvmet_rdma_alloc_rsps(queue);
@ -1135,7 +1138,6 @@ out_destroy_sq:
out_free_queue:
kfree(queue);
out_reject:
pr_debug("rejecting connect request with status code %d\n", ret);
nvmet_rdma_cm_reject(cm_id, ret);
return NULL;
}
@ -1188,7 +1190,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ndev = nvmet_rdma_find_get_device(cm_id);
if (!ndev) {
pr_err("no client data!\n");
nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
return -ECONNREFUSED;
}

View File

@ -1167,7 +1167,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
/* zero out the cmd, except for the embedded scsi_request */
memset((char *)cmd + sizeof(cmd->req), 0,
sizeof(*cmd) - sizeof(cmd->req));
sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
cmd->device = dev;
cmd->sense_buffer = buf;

View File

@ -227,27 +227,31 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
return 0;
}
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
q->cmd_size = sizeof(struct scsi_request);
if (rphy) {
q = blk_init_queue(sas_non_host_smp_request, NULL);
q->request_fn = sas_non_host_smp_request;
dev = &rphy->dev;
name = dev_name(dev);
release = NULL;
} else {
q = blk_init_queue(sas_host_smp_request, NULL);
q->request_fn = sas_host_smp_request;
dev = &shost->shost_gendev;
snprintf(namebuf, sizeof(namebuf),
"sas_host%d", shost->host_no);
name = namebuf;
release = sas_host_release;
}
if (!q)
return -ENOMEM;
error = blk_init_allocated_queue(q);
if (error)
goto out_cleanup_queue;
error = bsg_register_queue(q, dev, name, release);
if (error) {
blk_cleanup_queue(q);
return -ENOMEM;
}
if (error)
goto out_cleanup_queue;
if (rphy)
rphy->q = q;
@ -261,6 +265,10 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
return 0;
out_cleanup_queue:
blk_cleanup_queue(q);
return error;
}
static void sas_bsg_remove(struct Scsi_Host *shost, struct sas_rphy *rphy)

View File

@ -1043,13 +1043,22 @@ static struct block_device *bd_acquire(struct inode *inode)
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
if (bdev) {
if (bdev && !inode_unhashed(bdev->bd_inode)) {
bdgrab(bdev);
spin_unlock(&bdev_lock);
return bdev;
}
spin_unlock(&bdev_lock);
/*
* i_bdev references block device inode that was already shut down
* (corresponding device got removed). Remove the reference and look
* up block device inode again just in case new device got
* reestablished under the same device number.
*/
if (bdev)
bd_forget(inode);
bdev = bdget(inode->i_rdev);
if (bdev) {
spin_lock(&bdev_lock);

View File

@ -33,6 +33,7 @@ struct blk_mq_hw_ctx {
struct blk_mq_ctx **ctxs;
unsigned int nr_ctx;
wait_queue_t dispatch_wait;
atomic_t wait_index;
struct blk_mq_tags *tags;
@ -160,6 +161,7 @@ enum {
BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1,
BLK_MQ_S_SCHED_RESTART = 2,
BLK_MQ_S_TAG_WAITING = 3,
BLK_MQ_MAX_DEPTH = 10240,

View File

@ -29,6 +29,30 @@ enum nvme_rdma_cm_status {
NVME_RDMA_CM_INVALID_ORD = 0x08,
};
static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
{
switch (status) {
case NVME_RDMA_CM_INVALID_LEN:
return "invalid length";
case NVME_RDMA_CM_INVALID_RECFMT:
return "invalid record format";
case NVME_RDMA_CM_INVALID_QID:
return "invalid queue ID";
case NVME_RDMA_CM_INVALID_HSQSIZE:
return "invalid host SQ size";
case NVME_RDMA_CM_INVALID_HRQSIZE:
return "invalid host RQ size";
case NVME_RDMA_CM_NO_RSC:
return "resource not found";
case NVME_RDMA_CM_INVALID_IRD:
return "invalid IRD";
case NVME_RDMA_CM_INVALID_ORD:
return "Invalid ORD";
default:
return "unrecognized reason";
}
}
/**
* struct nvme_rdma_cm_req - rdma connect request
*

View File

@ -579,6 +579,12 @@ struct nvme_write_zeroes_cmd {
__le16 appmask;
};
/* Features */
struct nvme_feat_auto_pst {
__le64 entries[32];
};
/* Admin commands */
enum nvme_admin_opcode {
@ -644,7 +650,9 @@ struct nvme_identify {
__le32 nsid;
__u64 rsvd2[2];
union nvme_data_ptr dptr;
__le32 cns;
__u8 cns;
__u8 rsvd3;
__le16 ctrlid;
__u32 rsvd11[5];
};

View File

@ -27,6 +27,7 @@ typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer,
size_t len, bool send);
#ifdef CONFIG_BLK_SED_OPAL
void free_opal_dev(struct opal_dev *dev);
bool opal_unlock_from_suspend(struct opal_dev *dev);
struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
@ -51,6 +52,10 @@ static inline bool is_sed_ioctl(unsigned int cmd)
return false;
}
#else
static inline void free_opal_dev(struct opal_dev *dev)
{
}
static inline bool is_sed_ioctl(unsigned int cmd)
{
return false;