diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 51f76ddbe265..1b1ab4a1d132 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -96,7 +96,6 @@ struct r5l_log { spinlock_t no_space_stripes_lock; bool need_cache_flush; - bool in_teardown; }; /* @@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, mddev = log->rdev->mddev; /* - * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and - * wait for this thread to finish. This thread waits for - * MD_CHANGE_PENDING clear, which is supposed to be done in - * md_check_recovery(). md_check_recovery() tries to get - * reconfig_mutex. Since r5l_quiesce already holds the mutex, - * md_check_recovery() fails, so the PENDING never get cleared. The - * in_teardown check workaround this issue. + * Discard could zero data, so before discard we must make sure + * superblock is updated to new log tail. Updating superblock (either + * directly call md_update_sb() or depend on md thread) must hold + * reconfig mutex. On the other hand, raid5_quiesce is called with + * reconfig_mutex hold. The first step of raid5_quiesce() is waitting + * for all IO finish, hence waitting for reclaim thread, while reclaim + * thread is calling this function and waitting for reconfig mutex. So + * there is a deadlock. We workaround this issue with a trylock. + * FIXME: we could miss discard if we can't take reconfig mutex */ - if (!log->in_teardown) { - set_mask_bits(&mddev->flags, 0, - BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); - md_wakeup_thread(mddev->thread); - wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_PENDING, &mddev->flags) || - log->in_teardown); - /* - * r5l_quiesce could run after in_teardown check and hold - * mutex first. Superblock might get updated twice. - */ - if (log->in_teardown) - md_update_sb(mddev, 1); - } else { - WARN_ON(!mddev_is_locked(mddev)); - md_update_sb(mddev, 1); - } + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); + if (!mddev_trylock(mddev)) + return; + md_update_sb(mddev, 1); + mddev_unlock(mddev); /* discard IO error really doesn't matter, ignore it */ if (log->last_checkpoint < end) { @@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state) if (!log || state == 2) return; if (state == 0) { - log->in_teardown = 0; /* * This is a special case for hotadd. In suspend, the array has * no journal. In resume, journal is initialized as well as the @@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state) log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); } else if (state == 1) { - /* - * at this point all stripes are finished, so io_unit is at - * least in STRIPE_END state - */ - log->in_teardown = 1; /* make sure r5l_write_super_and_discard_space exits */ mddev = log->rdev->mddev; wake_up(&mddev->sb_wait);