From 8c2e870a625bd336b2e7a65a97c1836acef07322 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:30:52 +1000 Subject: [PATCH 1/3] Ensure interrupted recovery completed properly (v1 metadata plus bitmap) If, while assembling an array, we find a device which is not fully in-sync with the array, it is important to set the "fullsync" flags. This is an exact analog to the setting of this flag in hot_add_disk methods. Currently, only v1.x metadata supports having devices in an array which are not fully in-sync (it keep track of how in sync they are). The 'fullsync' flag only makes a difference when a write-intent bitmap is being used. In this case it tells recovery to ignore the bitmap and recovery all blocks. This fix is already in place for raid1, but not raid5/6 or raid10. So without this fix, a raid1 ir raid4/5/6 array with version 1.x metadata and a write intent bitmaps, that is stopped in the middle of a recovery, will appear to complete the recovery instantly after it is reassembled, but the recovery will not be correct. If you might have an array like that, issueing echo repair > /sys/block/mdXX/md/sync_action will make sure recovery completes properly. Cc: Signed-off-by: Neil Brown --- drivers/md/raid10.c | 2 ++ drivers/md/raid5.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1de17da34a95..a71277b640ab 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2137,6 +2137,8 @@ static int run(mddev_t *mddev) !test_bit(In_sync, &disk->rdev->flags)) { disk->head_position = 0; mddev->degraded++; + if (disk->rdev) + conf->fullsync = 1; } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c37e256b1176..475fba4d371e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4305,7 +4305,9 @@ static int run(mddev_t *mddev) " disk %d\n", bdevname(rdev->bdev,b), raid_disk); working_disks++; - } + } else + /* Cannot rely on bitmap to complete recovery */ + conf->fullsync = 1; } /* From efe311431869b40d67911820a309f9a1a41306f3 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:31:14 +1000 Subject: [PATCH 2/3] Don't acknowlege that stripe-expand is complete until it really is. We shouldn't acknowledge that a stripe has been expanded (When reshaping a raid5 by adding a device) until the moved data has actually been written out. However we are currently acknowledging (by calling md_done_sync) when the POST_XOR is complete and before the write. So track in s.locked whether there are pending writes, and don't call md_done_sync yet if there are. Note: we all set R5_LOCKED on devices which are are about to read from. This probably isn't technically necessary, but is usually done when writing a block, and justifies the use of s.locked here. This bug can lead to a crash if an array is stopped while an reshape is in progress. Cc: Signed-off-by: Neil Brown --- drivers/md/raid5.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 475fba4d371e..54c8ee28fcc4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2898,6 +2898,8 @@ static void handle_stripe5(struct stripe_head *sh) for (i = conf->raid_disks; i--; ) { set_bit(R5_Wantwrite, &sh->dev[i].flags); + set_bit(R5_LOCKED, &dev->flags); + s.locked++; if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) sh->ops.count++; } @@ -2911,6 +2913,7 @@ static void handle_stripe5(struct stripe_head *sh) conf->raid_disks); s.locked += handle_write_operations5(sh, 1, 1); } else if (s.expanded && + s.locked == 0 && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); From 9bbbca3a0ee09293108b67835c6bdf6196d7bcb3 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:31:17 +1000 Subject: [PATCH 3/3] Fix error paths if md_probe fails. md_probe can fail (e.g. alloc_disk could fail) without returning an error (as it alway returns NULL). So when we call mddev_find immediately afterwards, we need to check that md_probe actually succeeded. This means checking that mdev->gendisk is non-NULL. cc: Cc: Dave Jones Signed-off-by: Neil Brown --- drivers/md/md.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 7cf512a34ccf..2580ac1b9b0f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3897,8 +3897,10 @@ static void autorun_devices(int part) md_probe(dev, NULL, NULL); mddev = mddev_find(dev); - if (!mddev) { - printk(KERN_ERR + if (!mddev || !mddev->gendisk) { + if (mddev) + mddev_put(mddev); + printk(KERN_ERR "md: cannot allocate memory for md drive.\n"); break; }