md: Don't ignore read-only array in md_check_recovery()
Usually if the array is not read-write, md_check_recovery() won't register new sync_thread in the first place. And if the array is read-write and sync_thread is registered, md_set_readonly() will unregister sync_thread before setting the array read-only. md/raid follow this behavior hence there is no problem. After commit f52f5c71f3d4 ("md: fix stopping sync thread"), following hang can be triggered by test shell/integrity-caching.sh: 1) array is read-only. dm-raid update super block: rs_update_sbs ro = mddev->ro mddev->ro = 0 -> set array read-write md_update_sb 2) register new sync thread concurrently. 3) dm-raid set array back to read-only: rs_update_sbs mddev->ro = ro 4) stop the array: raid_dtr md_stop stop_sync_thread set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_wakeup_thread_directly(mddev->sync_thread); wait_event(..., !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 5) sync thread done: md_do_sync set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); 6) daemon thread can't unregister sync thread: md_check_recovery if (!md_is_rdwr(mddev) && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; -> -> MD_RECOVERY_RUNNING can't be cleared, hence step 4 hang; The root cause is that dm-raid manipulate 'mddev->ro' by itself, however, dm-raid really should stop sync thread before setting the array read-only. Unfortunately, I need to read more code before I can refacter the handler of 'mddev->ro' in dm-raid, hence let's fix the problem the easy way for now to prevent dm-raid regression. Reported-by: Mikulas Patocka <mpatocka@redhat.com> Closes: https://lore.kernel.org/all/9801e40-8ac7-e225-6a71-309dcf9dc9aa@redhat.com/ Fixes: ecbfb9f118bc ("dm raid: add raid level takeover support") Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240201092559.910982-3-yukuai1@huaweicloud.com
This commit is contained in:
parent
1baae052cc
commit
55a48ad2db
@ -9449,6 +9449,20 @@ not_running:
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
}
|
||||
|
||||
static void unregister_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
|
||||
/* resync/recovery still happening */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
return;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(!mddev->sync_thread))
|
||||
return;
|
||||
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is regularly called by all per-raid-array threads to
|
||||
* deal with generic issues like resync and super-block update.
|
||||
@ -9486,7 +9500,8 @@ void md_check_recovery(struct mddev *mddev)
|
||||
}
|
||||
|
||||
if (!md_is_rdwr(mddev) &&
|
||||
!test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
|
||||
!test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_DONE, &mddev->recovery))
|
||||
return;
|
||||
if ( ! (
|
||||
(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
|
||||
@ -9508,8 +9523,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
/* sync_work already queued. */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
unregister_sync_thread(mddev);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@ -9572,16 +9586,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
* still set.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
|
||||
/* resync/recovery still happening */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(!mddev->sync_thread))
|
||||
goto unlock;
|
||||
|
||||
md_reap_sync_thread(mddev);
|
||||
unregister_sync_thread(mddev);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user