From d0985394e7fee6b25a7cc8335d45bc1c1a8ab2d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 10 Nov 2011 09:03:55 +0100 Subject: [PATCH 01/12] block: Revert "[SCSI] genhd: add a new attribute "alias" in gendisk" This reverts commit a72c5e5eb738033938ab30d6a634b74d1d060f10. The commit introduced alias for block devices which is intended to be used during logging although actual usage hasn't been committed yet. This approach adds very limited benefit (raw log might be easier to follow) which can be trivially implemented in userland but has a lot of problems. It is much worse than netif renames because it doesn't rename the actual device but just adds conveninence name which isn't used universally or enforced. Everything internal including device lookup and sysfs still uses the internal name and nothing prevents two devices from using conflicting alias - ie. sda can have sdb as its alias. This has been nacked by people working on device driver core, block layer and kernel-userland interface and shouldn't have been upstreamed. Revert it. http://thread.gmane.org/gmane.linux.kernel/1155104 http://thread.gmane.org/gmane.linux.scsi/68632 http://thread.gmane.org/gmane.linux.scsi/69776 Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Acked-by: Kay Sievers Cc: "James E.J. Bottomley" Cc: Nao Nishijima Cc: Alan Cox Cc: Al Viro Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 13 ----- block/genhd.c | 71 --------------------------- include/linux/genhd.h | 4 -- 3 files changed, 88 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 2b5d56127fce..c1eb41cb9876 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -206,16 +206,3 @@ Description: when a discarded area is read the discard_zeroes_data parameter will be set to one. Otherwise it will be 0 and the result of reading a discarded area is undefined. -What: /sys/block//alias -Date: Aug 2011 -Contact: Nao Nishijima -Description: - A raw device name of a disk does not always point a same disk - each boot-up time. Therefore, users have to use persistent - device names, which udev creates when the kernel finds a disk, - instead of raw device name. However, kernel doesn't show those - persistent names on its messages (e.g. dmesg). - This file can store an alias of the disk and it would be - appeared in kernel messages if it is set. A disk can have an - alias which length is up to 255bytes. Users can use alphabets, - numbers, "-" and "_" in alias name. This file is writeonce. diff --git a/block/genhd.c b/block/genhd.c index 9253839714ff..02e9fca80825 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "blk.h" @@ -916,74 +915,6 @@ static int __init genhd_device_init(void) subsys_initcall(genhd_device_init); -static ssize_t alias_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - ssize_t ret = 0; - - if (disk->alias) - ret = snprintf(buf, ALIAS_LEN, "%s\n", disk->alias); - return ret; -} - -static ssize_t alias_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct gendisk *disk = dev_to_disk(dev); - char *alias; - char *envp[] = { NULL, NULL }; - unsigned char c; - int i; - ssize_t ret = count; - - if (!count) - return -EINVAL; - - if (count >= ALIAS_LEN) { - printk(KERN_ERR "alias: alias is too long\n"); - return -EINVAL; - } - - /* Validation check */ - for (i = 0; i < count; i++) { - c = buf[i]; - if (i == count - 1 && c == '\n') - break; - if (!isalnum(c) && c != '_' && c != '-') { - printk(KERN_ERR "alias: invalid alias\n"); - return -EINVAL; - } - } - - if (disk->alias) { - printk(KERN_INFO "alias: %s is already assigned (%s)\n", - disk->disk_name, disk->alias); - return -EINVAL; - } - - alias = kasprintf(GFP_KERNEL, "%s", buf); - if (!alias) - return -ENOMEM; - - if (alias[count - 1] == '\n') - alias[count - 1] = '\0'; - - envp[0] = kasprintf(GFP_KERNEL, "ALIAS=%s", alias); - if (!envp[0]) { - kfree(alias); - return -ENOMEM; - } - - disk->alias = alias; - printk(KERN_INFO "alias: assigned %s to %s\n", alias, disk->disk_name); - - kobject_uevent_env(&dev->kobj, KOBJ_ADD, envp); - - kfree(envp[0]); - return ret; -} - static ssize_t disk_range_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1043,7 +974,6 @@ static ssize_t disk_discard_alignment_show(struct device *dev, return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); } -static DEVICE_ATTR(alias, S_IRUGO|S_IWUSR, alias_show, alias_store); static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); @@ -1066,7 +996,6 @@ static struct device_attribute dev_attr_fail_timeout = #endif static struct attribute *disk_attrs[] = { - &dev_attr_alias.attr, &dev_attr_range.attr, &dev_attr_ext_range.attr, &dev_attr_removable.attr, diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9de31bc98c88..6d18f3531f18 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -21,8 +21,6 @@ #define dev_to_part(device) container_of((device), struct hd_struct, __dev) #define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) -#define alias_name(disk) ((disk)->alias ? (disk)->alias : \ - (disk)->disk_name) extern struct device_type part_type; extern struct kobject *block_depr; @@ -60,7 +58,6 @@ enum { #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 -#define ALIAS_LEN 256 #include #include @@ -166,7 +163,6 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ - char *alias; /* alias name of disk */ char *(*devnode)(struct gendisk *gd, mode_t *mode); unsigned int events; /* supported events */ From 7a401a972df8e184b3d1a3fc958c0a4ddee8d312 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 11 Nov 2011 13:29:04 +0100 Subject: [PATCH 02/12] backing-dev: ensure wakeup_timer is deleted bdi_prune_sb() in bdi_unregister() attempts to removes the bdi links from all super_blocks and then del_timer_sync() the writeback timer. However, this can race with __mark_inode_dirty(), leading to bdi_wakeup_thread_delayed() rearming the writeback timer on the bdi we're unregistering, after we've called del_timer_sync(). This can end up with the bdi being freed with an active timer inside it, as in the case of the following dump after the removal of an SD card. Fix this by redoing the del_timer_sync() in bdi_destory(). ------------[ cut here ]------------ WARNING: at /home/rabin/kernel/arm/lib/debugobjects.c:262 debug_print_object+0x9c/0xc8() ODEBUG: free active (active state 0) object type: timer_list hint: wakeup_timer_fn+0x0/0x180 Modules linked in: Backtrace: [] (dump_backtrace+0x0/0x110) from [] (dump_stack+0x18/0x1c) r6:c02bc638 r5:00000106 r4:c79f5d18 r3:00000000 [] (dump_stack+0x0/0x1c) from [] (warn_slowpath_common+0x54/0x6c) [] (warn_slowpath_common+0x0/0x6c) from [] (warn_slowpath_fmt+0x38/0x40) r8:20000013 r7:c780c6f0 r6:c031613c r5:c780c6f0 r4:c02b1b29 r3:00000009 [] (warn_slowpath_fmt+0x0/0x40) from [] (debug_print_object+0x9c/0xc8) r3:c02b1b29 r2:c02bc662 [] (debug_print_object+0x0/0xc8) from [] (debug_check_no_obj_freed+0xac/0x1dc) r6:c7964000 r5:00000001 r4:c7964000 [] (debug_check_no_obj_freed+0x0/0x1dc) from [] (kmem_cache_free+0x88/0x1f8) [] (kmem_cache_free+0x0/0x1f8) from [] (blk_release_queue+0x70/0x78) [] (blk_release_queue+0x0/0x78) from [] (kobject_release+0x70/0x84) r5:c79641f0 r4:c796420c [] (kobject_release+0x0/0x84) from [] (kref_put+0x68/0x80) r7:00000083 r6:c74083d0 r5:c015289c r4:c796420c [] (kref_put+0x0/0x80) from [] (kobject_put+0x48/0x5c) r5:c79643b4 r4:c79641f0 [] (kobject_put+0x0/0x5c) from [] (blk_cleanup_queue+0x68/0x74) r4:c7964000 [] (blk_cleanup_queue+0x0/0x74) from [] (mmc_blk_put+0x78/0xe8) r5:00000000 r4:c794c400 [] (mmc_blk_put+0x0/0xe8) from [] (mmc_blk_release+0x24/0x38) r5:c794c400 r4:c0322824 [] (mmc_blk_release+0x0/0x38) from [] (__blkdev_put+0xe8/0x170) r5:c78d5e00 r4:c74083c0 [] (__blkdev_put+0x0/0x170) from [] (blkdev_put+0x11c/0x12c) r8:c79f5f70 r7:00000001 r6:c74083d0 r5:00000083 r4:c74083c0 r3:00000000 [] (blkdev_put+0x0/0x12c) from [] (kill_block_super+0x60/0x6c) r7:c7942300 r6:c79f4000 r5:00000083 r4:c74083c0 [] (kill_block_super+0x0/0x6c) from [] (deactivate_locked_super+0x44/0x70) r6:c79f4000 r5:c031af64 r4:c794dc00 r3:c00b06c4 [] (deactivate_locked_super+0x0/0x70) from [] (deactivate_super+0x6c/0x70) r5:c794dc00 r4:c794dc00 [] (deactivate_super+0x0/0x70) from [] (mntput_no_expire+0x188/0x194) r5:c794dc00 r4:c7942300 [] (mntput_no_expire+0x0/0x194) from [] (sys_umount+0x2e4/0x310) r6:c7942300 r5:00000000 r4:00000000 r3:00000000 [] (sys_umount+0x0/0x310) from [] (ret_fast_syscall+0x0/0x30) ---[ end trace e5c83c92ada51c76 ]--- Cc: stable@kernel.org Signed-off-by: Rabin Vincent Signed-off-by: Linus Walleij Signed-off-by: Jens Axboe --- mm/backing-dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a0860640378d..71034f41a2ba 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -724,6 +724,14 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); + /* + * If bdi_unregister() had already been called earlier, the + * wakeup_timer could still be armed because bdi_prune_sb() + * can race with the bdi_wakeup_thread_delayed() calls from + * __mark_inode_dirty(). + */ + del_timer_sync(&bdi->wb.wakeup_timer); + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); From 193733585692301f38d489b8ad8724c2f88349c0 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 11 Nov 2011 22:05:54 +0100 Subject: [PATCH 03/12] The Windows driver .inf disables ASPM on all cciss devices. Do the same. Signed-off-by: Matthew Garrett Cc: iss_storagedev@hp.com Acked-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 486f94ef24d4..5b690194dd99 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -4319,6 +4320,10 @@ static int __devinit cciss_pci_init(ctlr_info_t *h) dev_warn(&h->pdev->dev, "controller appears to be disabled\n"); return -ENODEV; } + + pci_disable_link_state(h->pdev, PCIE_LINK_STATE_L0S | + PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM); + err = pci_enable_device(h->pdev); if (err) { dev_warn(&h->pdev->dev, "Unable to Enable PCI device\n"); From 6b76106d8ef31111d6fc469564b83b5f5542794f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 13 Nov 2011 19:58:09 +0100 Subject: [PATCH 04/12] block: Always check length of all iov entries in blk_rq_map_user_iov() Even after commit 5478755616ae2ef1ce144dded589b62b2a50d575 ("block: check for proper length of iov entries earlier ...") we still won't check for zero-length entries after an unaligned entry. Remove the break-statement, so all entries are checked. Signed-off-by: Ben Hutchings Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- block/blk-map.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index e663ac2d8e68..164cd0059706 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -204,10 +204,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, if (!iov[i].iov_len) return -EINVAL; - if (uaddr & queue_dma_alignment(q)) { + /* + * Keep going so we check length of all segments + */ + if (uaddr & queue_dma_alignment(q)) unaligned = 1; - break; - } } if (unaligned || (q->dma_pad_mask & len) || map_data) From 3bb9068278ea524581237abadd41377a14717e7d Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 16 Nov 2011 09:21:48 +0100 Subject: [PATCH 05/12] loop: prevent information leak after failed read If read was not fully successful we have to fail whole bio to prevent information leak of old pages ##Testcase_begin dd if=/dev/zero of=./file bs=1M count=1 losetup /dev/loop0 ./file -o 4096 truncate -s 0 ./file # OOps loop offset is now beyond i_size, so read will silently fail. # So bio's pages would not be cleared, may which result in information leak. hexdump -C /dev/loop0 ##testcase_end Signed-off-by: Dmitry Monakhov Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3d806820280e..0d567397c254 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -372,7 +372,8 @@ do_lo_receive(struct loop_device *lo, if (retval < 0) return retval; - + if (retval != bvec->bv_len) + return -EIO; return 0; } From 0c614e2d3e6ee6ff13c6181f380787cea1d82d1d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 16 Nov 2011 09:21:48 +0100 Subject: [PATCH 06/12] include/linux/bio.h: use a static inline function for bio_integrity_clone() When CONFIG_BLK_DEV_INTEGRITY is not set, we get these warnings: drivers/md/dm.c: In function 'split_bvec': drivers/md/dm.c:1061:3: warning: statement with no effect drivers/md/dm.c: In function 'clone_bio': drivers/md/dm.c:1088:3: warning: statement with no effect Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index a3c071c9e189..d2a3cc23d828 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -519,7 +519,11 @@ extern void bio_integrity_init(void); #define bioset_integrity_create(a, b) (0) #define bio_integrity_prep(a) (0) #define bio_integrity_enabled(a) (0) -#define bio_integrity_clone(a, b, c, d) (0) +static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, + gfp_t gfp_mask, struct bio_set *bs) +{ + return 0; +} #define bioset_integrity_free(a) do { } while (0) #define bio_integrity_free(a, b) do { } while (0) #define bio_integrity_endio(a, b) do { } while (0) From 7035b5df3c071ccaf2f1694b96bd8958b0eb37ca Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 16 Nov 2011 09:21:49 +0100 Subject: [PATCH 07/12] loop: cleanup set_status interface 1) Anyone who has read access to loopdev has permission to call set_status and may change important parameters such as lo_offset, lo_sizelimit and so on, which contradicts to read access pattern and definitely equals to write access pattern. 2) Add lo_offset over i_size check to prevent blkdev_size overflow. ##Testcase_bagin #dd if=/dev/zero of=./file bs=1k count=1 #losetup /dev/loop0 ./file /* userspace_application */ struct loop_info64 loinf; fd = open("/dev/loop0", O_RDONLY); ioctl(fd, LOOP_GET_STATUS64, &loinf); /* Set offset to any value which is bigger than i_size, and sizelimit * to nonzero value*/ loinf.lo_offset = 4096*1024; loinf.lo_sizelimit = 1024; ioctl(fd, LOOP_SET_STATUS64, &loinf); /* After this loop device will have size similar to 0x7fffffffffxxxx */ #blockdev --getsz /dev/loop0 ##OUTPUT: 36028797018955968 ##Testcase_end [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Dmitry Monakhov Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/loop.c | 46 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0d567397c254..68b205a9338f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -161,17 +161,19 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { &xor_funcs }; -static loff_t get_loop_size(struct loop_device *lo, struct file *file) +static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { - loff_t size, offset, loopsize; + loff_t size, loopsize; /* Compute loopsize in bytes */ size = i_size_read(file->f_mapping->host); - offset = lo->lo_offset; loopsize = size - offset; - if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) - loopsize = lo->lo_sizelimit; + /* offset is beyond i_size, wierd but possible */ + if (loopsize < 0) + return 0; + if (sizelimit > 0 && sizelimit < loopsize) + loopsize = sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. @@ -179,17 +181,25 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file) return loopsize >> 9; } -static int -figure_loop_size(struct loop_device *lo) +static loff_t get_loop_size(struct loop_device *lo, struct file *file) { - loff_t size = get_loop_size(lo, lo->lo_backing_file); + return get_size(lo->lo_offset, lo->lo_sizelimit, file); +} + +static int +figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) +{ + loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; if (unlikely((loff_t)x != size)) return -EFBIG; - + if (lo->lo_offset != offset) + lo->lo_offset = offset; + if (lo->lo_sizelimit != sizelimit) + lo->lo_sizelimit = sizelimit; set_capacity(lo->lo_disk, x); - return 0; + return 0; } static inline int @@ -1059,9 +1069,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { - lo->lo_offset = info->lo_offset; - lo->lo_sizelimit = info->lo_sizelimit; - if (figure_loop_size(lo)) + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) return -EFBIG; } loop_config_discard(lo); @@ -1247,7 +1255,7 @@ static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) err = -ENXIO; if (unlikely(lo->lo_state != Lo_bound)) goto out; - err = figure_loop_size(lo); + err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); if (unlikely(err)) goto out; sec = get_capacity(lo->lo_disk); @@ -1285,13 +1293,19 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, goto out_unlocked; break; case LOOP_SET_STATUS: - err = loop_set_status_old(lo, (struct loop_info __user *) arg); + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_status_old(lo, + (struct loop_info __user *)arg); break; case LOOP_GET_STATUS: err = loop_get_status_old(lo, (struct loop_info __user *) arg); break; case LOOP_SET_STATUS64: - err = loop_set_status64(lo, (struct loop_info64 __user *) arg); + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_status64(lo, + (struct loop_info64 __user *) arg); break; case LOOP_GET_STATUS64: err = loop_get_status64(lo, (struct loop_info64 __user *) arg); From 0007a4c90a11a5371c8b3f80b220fa402a399189 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Wed, 16 Nov 2011 09:21:49 +0100 Subject: [PATCH 08/12] cciss: auto engage SCSI mid layer at driver load time A long time ago, probably in 2002, one of the distros, or maybe more than one, loaded block drivers prior to loading the SCSI mid layer. This meant that the cciss driver, being a block driver, could not engage the SCSI mid layer at init time without panicking, and relied on being poked by a userland program after the system was up (and the SCSI mid layer was therefore present) to engage the SCSI mid layer. This is no longer the case, and cciss can safely rely on the SCSI mid layer being present at init time and engage the SCSI mid layer straight away. This means that users will see their tape drives and medium changers at driver load time without need for a script in /etc/rc.d that does this: for x in /proc/driver/cciss/cciss* do echo "engage scsi" > $x done However, if no tape drives or medium changers are detected, the SCSI mid layer will not be engaged. If a tape drive or medium change is later hot-added to the system it will then be necessary to use the above script or similar for the device(s) to be acceesible. Signed-off-by: Stephen M. Cameron Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- Documentation/blockdev/cciss.txt | 14 ++++++-------- drivers/block/cciss.c | 1 + drivers/block/cciss_scsi.c | 1 + 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/blockdev/cciss.txt b/Documentation/blockdev/cciss.txt index 71464e09ec18..b79d0a13e7cd 100644 --- a/Documentation/blockdev/cciss.txt +++ b/Documentation/blockdev/cciss.txt @@ -98,14 +98,12 @@ You must enable "SCSI tape drive support for Smart Array 5xxx" and "SCSI support" in your kernel configuration to be able to use SCSI tape drives with your Smart Array 5xxx controller. -Additionally, note that the driver will not engage the SCSI core at init -time. The driver must be directed to dynamically engage the SCSI core via -the /proc filesystem entry which the "block" side of the driver creates as -/proc/driver/cciss/cciss* at runtime. This is because at driver init time, -the SCSI core may not yet be initialized (because the driver is a block -driver) and attempting to register it with the SCSI core in such a case -would cause a hang. This is best done via an initialization script -(typically in /etc/init.d, but could vary depending on distribution). +Additionally, note that the driver will engage the SCSI core at init +time if any tape drives or medium changers are detected. The driver may +also be directed to dynamically engage the SCSI core via the /proc filesystem +entry which the "block" side of the driver creates as +/proc/driver/cciss/cciss* at runtime. This is best done via a script. + For example: for x in /proc/driver/cciss/cciss[0-9]* diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 5b690194dd99..8004ac30a7a8 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -5163,6 +5163,7 @@ reinit_after_soft_reset: h->cciss_max_sectors = 8192; rebuild_lun_table(h, 1, 0); + cciss_engage_scsi(h); h->busy_initializing = 0; return 1; diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 951a4e33b92b..e820b68d2f6c 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -1720,5 +1720,6 @@ static int cciss_eh_abort_handler(struct scsi_cmnd *scsicmd) /* If no tape support, then these become defined out of existence */ #define cciss_scsi_setup(cntl_num) +#define cciss_engage_scsi(h) #endif /* CONFIG_CISS_SCSI_TAPE */ From 3540d5e89b2ac268fcfc9b07a50a9ba4acc2e5e5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 16 Nov 2011 09:21:50 +0100 Subject: [PATCH 09/12] block: avoid unnecessary plug list flush get_request_wait() could sleep and flush the plug list. If the list is already flushed, don't flush again. Signed-off-by: Shaohua Li Reviewed-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/blk-core.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f43c8a5840ae..6403c12f8aad 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1379,15 +1379,17 @@ get_rq: */ if (list_empty(&plug->list)) trace_block_plug(q); - else if (!plug->should_sort) { - struct request *__rq; + else { + if (!plug->should_sort) { + struct request *__rq; - __rq = list_entry_rq(plug->list.prev); - if (__rq->q != q) - plug->should_sort = 1; + __rq = list_entry_rq(plug->list.prev); + if (__rq->q != q) + plug->should_sort = 1; + } + if (request_count >= BLK_MAX_REQUEST_COUNT) + blk_flush_plug_list(plug, false); } - if (request_count >= BLK_MAX_REQUEST_COUNT) - blk_flush_plug_list(plug, false); list_add_tail(&req->queuelist, &plug->list); drive_stat_acct(req, 1); } else { From 121f099412bd6576dfb3d94222e89d9341362177 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Nov 2011 09:21:50 +0100 Subject: [PATCH 10/12] bio: change some signed vars to unsigned This is just a cleanup patch to silence a static checker warning. The problem is that we cap "nr_iovecs" so it can't be larger than "UIO_MAXIOV" but we don't check for negative values. It turns out this is prevented at other layers, but logically it doesn't make sense to have negative nr_iovecs so making it unsigned is nicer. Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/bio.c | 7 ++++--- include/linux/bio.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index 41c93c722244..b1fe82cf88cf 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -337,7 +337,7 @@ static void bio_fs_destructor(struct bio *bio) * RETURNS: * Pointer to new bio on success, NULL on failure. */ -struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) +struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); @@ -365,7 +365,7 @@ static void bio_kmalloc_destructor(struct bio *bio) * %__GFP_WAIT, the allocation is guaranteed to succeed. * **/ -struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) +struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) { struct bio *bio; @@ -696,7 +696,8 @@ static void bio_free_map_data(struct bio_map_data *bmd) kfree(bmd); } -static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, +static struct bio_map_data *bio_alloc_map_data(int nr_segs, + unsigned int iov_count, gfp_t gfp_mask) { struct bio_map_data *bmd; diff --git a/include/linux/bio.h b/include/linux/bio.h index d2a3cc23d828..847994aef0e9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -211,8 +211,8 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); -extern struct bio *bio_alloc(gfp_t, int); -extern struct bio *bio_kmalloc(gfp_t, int); +extern struct bio *bio_alloc(gfp_t, unsigned int); +extern struct bio *bio_kmalloc(gfp_t, unsigned int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); From a2c2a0e668e26e020731ce2a40e6474d1d37210a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Nov 2011 09:21:50 +0100 Subject: [PATCH 11/12] paride: fix potential information leak in pg_read() Smatch has a new check for Rosenberg type information leaks where structs are copied to the user with uninitialized stack data in them. i In this case, the pg_write_hdr struct has a hole in it. struct pg_write_hdr { char magic; /* 0 1 */ char func; /* 1 1 */ /* XXX 2 bytes hole, try to pack */ int dlen; /* 4 4 */ Signed-off-by: Dan Carpenter Cc: Tim Waugh Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/paride/pg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index 6b9a2000d56a..a79fb4f7ff62 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -630,6 +630,7 @@ static ssize_t pg_read(struct file *filp, char __user *buf, size_t count, loff_t if (dev->status & 0x10) return -ETIME; + memset(&hdr, 0, sizeof(hdr)); hdr.magic = PG_MAGIC; hdr.dlen = dev->dlen; copy = 0; From 019ceb7d5d252ce71001a157cf29f4ac28501b72 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 16 Nov 2011 09:21:50 +0100 Subject: [PATCH 12/12] block: add missed trace_block_plug After flush plug list, the list has no request, so we need to add a trace_block_plug(). Signed-off-by: Shaohua Li Reviewed-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 6403c12f8aad..ea70e6c80cd3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1387,8 +1387,10 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } - if (request_count >= BLK_MAX_REQUEST_COUNT) + if (request_count >= BLK_MAX_REQUEST_COUNT) { blk_flush_plug_list(plug, false); + trace_block_plug(q); + } } list_add_tail(&req->queuelist, &plug->list); drive_stat_acct(req, 1);