md: make error_handler functions more uniform and correct.

- there is no need to test_bit Faulty, as that was already done in
  md_error which is the only caller of these functions.
- MD_CHANGE_DEVS should be set *after* faulty is set to ensure
  metadata is updated correctly.
- spinlock should be held while updating ->degraded.

Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
NeilBrown 2011-05-11 14:38:44 +10:00
parent 92f861a72a
commit 6f8d0c77ce
2 changed files with 40 additions and 38 deletions

View File

@ -186,6 +186,7 @@ static int multipath_congested(void *data, int bits)
static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
{ {
multipath_conf_t *conf = mddev->private; multipath_conf_t *conf = mddev->private;
char b[BDEVNAME_SIZE];
if (conf->raid_disks - mddev->degraded <= 1) { if (conf->raid_disks - mddev->degraded <= 1) {
/* /*
@ -194,26 +195,27 @@ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
* which has just failed. * which has just failed.
*/ */
printk(KERN_ALERT printk(KERN_ALERT
"multipath: only one IO path left and IO error.\n"); "multipath: only one IO path left and IO error.\n");
/* leave it active... it's all we have */ /* leave it active... it's all we have */
} else { return;
/*
* Mark disk as unusable
*/
if (!test_bit(Faulty, &rdev->flags)) {
char b[BDEVNAME_SIZE];
clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
mddev->degraded++;
printk(KERN_ALERT "multipath: IO failure on %s,"
" disabling IO path.\n"
"multipath: Operation continuing"
" on %d IO paths.\n",
bdevname (rdev->bdev,b),
conf->raid_disks - mddev->degraded);
}
} }
/*
* Mark disk as unusable
*/
if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
spin_unlock_irqrestore(&conf->device_lock, flags);
}
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "multipath: IO failure on %s,"
" disabling IO path.\n"
"multipath: Operation continuing"
" on %d IO paths.\n",
bdevname(rdev->bdev, b),
conf->raid_disks - mddev->degraded);
} }
static void print_multipath_conf (multipath_conf_t *conf) static void print_multipath_conf (multipath_conf_t *conf)
@ -273,9 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
PAGE_CACHE_SIZE - 1); PAGE_CACHE_SIZE - 1);
} }
spin_lock_irq(&conf->device_lock);
mddev->degraded--; mddev->degraded--;
rdev->raid_disk = path; rdev->raid_disk = path;
set_bit(In_sync, &rdev->flags); set_bit(In_sync, &rdev->flags);
spin_unlock_irq(&conf->device_lock);
rcu_assign_pointer(p->rdev, rdev); rcu_assign_pointer(p->rdev, rdev);
err = 0; err = 0;
md_integrity_add_rdev(rdev, mddev); md_integrity_add_rdev(rdev, mddev);

View File

@ -1700,27 +1700,25 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
raid5_conf_t *conf = mddev->private; raid5_conf_t *conf = mddev->private;
pr_debug("raid456: error called\n"); pr_debug("raid456: error called\n");
if (!test_bit(Faulty, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
set_bit(MD_CHANGE_DEVS, &mddev->flags); unsigned long flags;
if (test_and_clear_bit(In_sync, &rdev->flags)) { spin_lock_irqsave(&conf->device_lock, flags);
unsigned long flags; mddev->degraded++;
spin_lock_irqsave(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
mddev->degraded++; /*
spin_unlock_irqrestore(&conf->device_lock, flags); * if recovery was running, make sure it aborts.
/* */
* if recovery was running, make sure it aborts. set_bit(MD_RECOVERY_INTR, &mddev->recovery);
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
printk(KERN_ALERT
"md/raid:%s: Disk failure on %s, disabling device.\n"
"md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev),
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
} }
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT
"md/raid:%s: Disk failure on %s, disabling device.\n"
"md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev),
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
} }
/* /*