From b62b75905d571c29262a6c38cf9e5f089c203871 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Oct 2008 13:25:21 +1100 Subject: [PATCH 1/3] md: use sysfs_notify_dirent to notify changes to md/array_state Now that we have sysfs_notify_dirent, use it to notify changes to md/array_state. As sysfs_notify_dirent can be called in atomic context, we can remove the delayed notify and the MD_NOTIFY_ARRAY_STATE flag. Signed-off-by: NeilBrown --- drivers/md/md.c | 30 ++++++++++++++++-------------- include/linux/raid/md_k.h | 5 ++++- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index aaa3d465de4e..feea72dc4b69 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -222,6 +222,9 @@ static void mddev_put(mddev_t *mddev) list_del(&mddev->all_mddevs); spin_unlock(&all_mddevs_lock); blk_cleanup_queue(mddev->queue); + if (mddev->sysfs_state) + sysfs_put(mddev->sysfs_state); + mddev->sysfs_state = NULL; kobject_put(&mddev->kobj); } else spin_unlock(&all_mddevs_lock); @@ -2770,7 +2773,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) if (err) return err; else { - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); return len; } } @@ -3465,8 +3468,10 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", disk->disk_name); - else + else { kobject_uevent(&mddev->kobj, KOBJ_ADD); + mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); + } return NULL; } @@ -3477,7 +3482,7 @@ static void md_safemode_timeout(unsigned long data) if (!atomic_read(&mddev->writes_pending)) { mddev->safemode = 1; if (mddev->external) - set_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags); + sysfs_notify_dirent(mddev->sysfs_state); } md_wakeup_thread(mddev->thread); } @@ -3740,7 +3745,7 @@ static int do_md_run(mddev_t * mddev) mddev->changed = 1; md_new_event(mddev); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify(&mddev->kobj, NULL, "degraded"); kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); @@ -3767,7 +3772,7 @@ static int restart_array(mddev_t *mddev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); return 0; } @@ -3847,7 +3852,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) module_put(mddev->pers->owner); mddev->pers = NULL; /* tell userspace to handle 'inactive' */ - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); set_capacity(disk, 0); mddev->changed = 1; @@ -3933,7 +3938,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mdname(mddev)); err = 0; md_new_event(mddev); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); out: return err; } @@ -4938,7 +4943,7 @@ static int md_ioctl(struct inode *inode, struct file *file, if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { if (mddev->ro == 2) { mddev->ro = 0; - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); } else { @@ -5612,7 +5617,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) spin_unlock_irq(&mddev->write_lock); } if (did_change) - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); wait_event(mddev->sb_wait, !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && !test_bit(MD_CHANGE_PENDING, &mddev->flags)); @@ -5655,7 +5660,7 @@ int md_allow_write(mddev_t *mddev) mddev->safemode = 1; spin_unlock_irq(&mddev->write_lock); md_update_sb(mddev, 0); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); } else spin_unlock_irq(&mddev->write_lock); @@ -6048,9 +6053,6 @@ void md_check_recovery(mddev_t *mddev) if (mddev->bitmap) bitmap_daemon_work(mddev->bitmap); - if (test_and_clear_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags)) - sysfs_notify(&mddev->kobj, NULL, "array_state"); - if (mddev->ro) return; @@ -6103,7 +6105,7 @@ void md_check_recovery(mddev_t *mddev) mddev->safemode = 0; spin_unlock_irq(&mddev->write_lock); if (did_change) - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); } if (mddev->flags) diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index c200b9a34aff..b16ad867e944 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -128,7 +128,6 @@ struct mddev_s #define MD_CHANGE_DEVS 0 /* Some device status has changed */ #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ #define MD_CHANGE_PENDING 2 /* superblock update in progress */ -#define MD_NOTIFY_ARRAY_STATE 3 /* atomic context wants to notify userspace */ int ro; @@ -239,6 +238,10 @@ struct mddev_s sector_t resync_max; /* resync should pause * when it gets here */ + struct sysfs_dirent *sysfs_state; /* handle for 'array_state' + * file in sysfs. + */ + spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ atomic_t pending_writes; /* number of active superblock writes */ From 3c0ee63a64a20351ed6c16ec797e1f8c850741ea Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Oct 2008 13:25:28 +1100 Subject: [PATCH 2/3] md: use sysfs_notify_dirent to notify changes to md/dev-xxx/state The 'state' file for a device reports, for example, when the device has failed. Changes should be reported to userspace ASAP without the possibility of blocking on low-memory. sysfs_notify does have that possibility (as it takes a mutex which can be held across a kmalloc) so use sysfs_notify_dirent instead. Signed-off-by: NeilBrown --- drivers/md/md.c | 21 ++++++++++++--------- include/linux/raid/md_k.h | 3 +++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index feea72dc4b69..8b303477c77b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1462,6 +1462,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) kobject_del(&rdev->kobj); goto fail; } + rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state"); + list_add_rcu(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); return 0; @@ -1491,7 +1493,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); - + sysfs_put(rdev->sysfs_state); + rdev->sysfs_state = NULL; /* We need to delay this, otherwise we can deadlock when * writing to 'remove' to "dev/state". We also need * to delay it due to rcu usage. @@ -1926,8 +1929,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) err = 0; } - if (!err) - sysfs_notify(&rdev->kobj, NULL, "state"); + if (!err && rdev->sysfs_state) + sysfs_notify_dirent(rdev->sysfs_state); return err ? err : len; } static struct rdev_sysfs_entry rdev_state = @@ -2022,7 +2025,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) rdev->raid_disk = -1; return err; } else - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); sprintf(nm, "rd%d", rdev->raid_disk); if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) printk(KERN_WARNING @@ -2039,7 +2042,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) clear_bit(Faulty, &rdev->flags); clear_bit(WriteMostly, &rdev->flags); set_bit(In_sync, &rdev->flags); - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); } return len; } @@ -3583,7 +3586,7 @@ static int do_md_run(mddev_t * mddev) return -EINVAL; } } - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); } md_probe(mddev->unit, NULL, NULL); @@ -4302,7 +4305,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (err) export_rdev(rdev); else - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); md_update_sb(mddev, 1); if (mddev->degraded) @@ -6113,7 +6116,7 @@ void md_check_recovery(mddev_t *mddev) rdev_for_each(rdev, rtmp, mddev) if (test_and_clear_bit(StateChanged, &rdev->flags)) - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && @@ -6223,7 +6226,7 @@ void md_check_recovery(mddev_t *mddev) void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) { - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); wait_event_timeout(rdev->blocked_wait, !test_bit(Blocked, &rdev->flags), msecs_to_jiffies(5000)); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index b16ad867e944..8fc909ef6787 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -115,6 +115,9 @@ struct mdk_rdev_s * in superblock. */ struct work_struct del_work; /* used for delayed sysfs removal */ + + struct sysfs_dirent *sysfs_state; /* handle for 'state' + * sysfs entry */ }; struct mddev_s From 92850bbd71228730c80efd491e7427650188d359 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Oct 2008 13:25:32 +1100 Subject: [PATCH 3/3] md: allow extended partitions on md devices. The new extended partition support provides a much nicer was to have partitions on md devices that the 'mdp' alternate major. We cannot really get rid of 'mdp' at this time, but we can enable extended partitions as that will probably make life easier for sysadmins. Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8b303477c77b..d255997bf635 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3463,6 +3463,11 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->fops = &md_fops; disk->private_data = mddev; disk->queue = mddev->queue; + /* Allow extended partitions. This makes the + * 'mdp' device redundant, but we can really + * remove it now. + */ + disk->flags |= GENHD_FL_EXT_DEVT; add_disk(disk); mddev->gendisk = disk; error = kobject_init_and_add(&mddev->kobj, &md_ktype,