md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread.
We currently use kthread_should_stop() in various places in the sync/reshape code to abort early. However some places set MD_RECOVERY_INTR but don't immediately call md_reap_sync_thread() (and we will shortly get another one). When this happens we are relying on md_check_recovery() to reap the thread and that only happen when it finishes normally. So MD_RECOVERY_INTR must lead to a normal finish without the kthread_should_stop() test. So replace all relevant tests, and be more careful when the thread is interrupted not to acknowledge that latest step in a reshape as it may not be fully committed yet. Also add a test on MD_RECOVERY_INTR in the 'is_mddev_idle' loop so we don't wait have to wait for the speed to drop before we can abort. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
29f097c4d9
commit
c91abf5a35
@ -7410,9 +7410,6 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
mddev->curr_resync = 2;
|
mddev->curr_resync = 2;
|
||||||
|
|
||||||
try_again:
|
try_again:
|
||||||
if (kthread_should_stop())
|
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
|
||||||
|
|
||||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||||
goto skip;
|
goto skip;
|
||||||
for_each_mddev(mddev2, tmp) {
|
for_each_mddev(mddev2, tmp) {
|
||||||
@ -7437,7 +7434,7 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
* be caught by 'softlockup'
|
* be caught by 'softlockup'
|
||||||
*/
|
*/
|
||||||
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
|
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
|
||||||
if (!kthread_should_stop() &&
|
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||||
mddev2->curr_resync >= mddev->curr_resync) {
|
mddev2->curr_resync >= mddev->curr_resync) {
|
||||||
printk(KERN_INFO "md: delaying %s of %s"
|
printk(KERN_INFO "md: delaying %s of %s"
|
||||||
" until %s has finished (they"
|
" until %s has finished (they"
|
||||||
@ -7513,7 +7510,7 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
last_check = 0;
|
last_check = 0;
|
||||||
|
|
||||||
if (j>2) {
|
if (j>2) {
|
||||||
printk(KERN_INFO
|
printk(KERN_INFO
|
||||||
"md: resuming %s of %s from checkpoint.\n",
|
"md: resuming %s of %s from checkpoint.\n",
|
||||||
desc, mdname(mddev));
|
desc, mdname(mddev));
|
||||||
mddev->curr_resync = j;
|
mddev->curr_resync = j;
|
||||||
@ -7550,7 +7547,8 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||||
}
|
}
|
||||||
|
|
||||||
while (j >= mddev->resync_max && !kthread_should_stop()) {
|
while (j >= mddev->resync_max &&
|
||||||
|
!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||||
/* As this condition is controlled by user-space,
|
/* As this condition is controlled by user-space,
|
||||||
* we can block indefinitely, so use '_interruptible'
|
* we can block indefinitely, so use '_interruptible'
|
||||||
* to avoid triggering warnings.
|
* to avoid triggering warnings.
|
||||||
@ -7558,17 +7556,18 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
flush_signals(current); /* just in case */
|
flush_signals(current); /* just in case */
|
||||||
wait_event_interruptible(mddev->recovery_wait,
|
wait_event_interruptible(mddev->recovery_wait,
|
||||||
mddev->resync_max > j
|
mddev->resync_max > j
|
||||||
|| kthread_should_stop());
|
|| test_bit(MD_RECOVERY_INTR,
|
||||||
|
&mddev->recovery));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kthread_should_stop())
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||||
goto interrupted;
|
break;
|
||||||
|
|
||||||
sectors = mddev->pers->sync_request(mddev, j, &skipped,
|
sectors = mddev->pers->sync_request(mddev, j, &skipped,
|
||||||
currspeed < speed_min(mddev));
|
currspeed < speed_min(mddev));
|
||||||
if (sectors == 0) {
|
if (sectors == 0) {
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
goto out;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!skipped) { /* actual IO requested */
|
if (!skipped) { /* actual IO requested */
|
||||||
@ -7605,10 +7604,8 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
last_mark = next;
|
last_mark = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||||
if (kthread_should_stop())
|
break;
|
||||||
goto interrupted;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this loop exits only if either when we are slower than
|
* this loop exits only if either when we are slower than
|
||||||
@ -7631,11 +7628,12 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
|
printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
|
||||||
|
test_bit(MD_RECOVERY_INTR, &mddev->recovery)
|
||||||
|
? "interrupted" : "done");
|
||||||
/*
|
/*
|
||||||
* this also signals 'finished resyncing' to md_stop
|
* this also signals 'finished resyncing' to md_stop
|
||||||
*/
|
*/
|
||||||
out:
|
|
||||||
blk_finish_plug(&plug);
|
blk_finish_plug(&plug);
|
||||||
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
|
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
|
||||||
|
|
||||||
@ -7689,16 +7687,6 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
interrupted:
|
|
||||||
/*
|
|
||||||
* got a signal, exit.
|
|
||||||
*/
|
|
||||||
printk(KERN_INFO
|
|
||||||
"md: md_do_sync() got signal ... exiting\n");
|
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(md_do_sync);
|
EXPORT_SYMBOL_GPL(md_do_sync);
|
||||||
|
|
||||||
|
@ -4386,7 +4386,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
|
|||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
wait_event(mddev->sb_wait, mddev->flags == 0 ||
|
wait_event(mddev->sb_wait, mddev->flags == 0 ||
|
||||||
kthread_should_stop());
|
test_bit(MD_RECOVERY_INTR, &mddev->recovery));
|
||||||
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||||
|
allow_barrier(conf);
|
||||||
|
return sectors_done;
|
||||||
|
}
|
||||||
conf->reshape_safe = mddev->reshape_position;
|
conf->reshape_safe = mddev->reshape_position;
|
||||||
allow_barrier(conf);
|
allow_barrier(conf);
|
||||||
}
|
}
|
||||||
|
@ -4842,14 +4842,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|||||||
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
|
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
|
||||||
/* Cannot proceed until we've updated the superblock... */
|
/* Cannot proceed until we've updated the superblock... */
|
||||||
wait_event(conf->wait_for_overlap,
|
wait_event(conf->wait_for_overlap,
|
||||||
atomic_read(&conf->reshape_stripes)==0);
|
atomic_read(&conf->reshape_stripes)==0
|
||||||
|
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
|
||||||
|
if (atomic_read(&conf->reshape_stripes) != 0)
|
||||||
|
return 0;
|
||||||
mddev->reshape_position = conf->reshape_progress;
|
mddev->reshape_position = conf->reshape_progress;
|
||||||
mddev->curr_resync_completed = sector_nr;
|
mddev->curr_resync_completed = sector_nr;
|
||||||
conf->reshape_checkpoint = jiffies;
|
conf->reshape_checkpoint = jiffies;
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
wait_event(mddev->sb_wait, mddev->flags == 0 ||
|
wait_event(mddev->sb_wait, mddev->flags == 0 ||
|
||||||
kthread_should_stop());
|
test_bit(MD_RECOVERY_INTR, &mddev->recovery));
|
||||||
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||||
|
return 0;
|
||||||
spin_lock_irq(&conf->device_lock);
|
spin_lock_irq(&conf->device_lock);
|
||||||
conf->reshape_safe = mddev->reshape_position;
|
conf->reshape_safe = mddev->reshape_position;
|
||||||
spin_unlock_irq(&conf->device_lock);
|
spin_unlock_irq(&conf->device_lock);
|
||||||
@ -4932,7 +4937,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|||||||
>= mddev->resync_max - mddev->curr_resync_completed) {
|
>= mddev->resync_max - mddev->curr_resync_completed) {
|
||||||
/* Cannot proceed until we've updated the superblock... */
|
/* Cannot proceed until we've updated the superblock... */
|
||||||
wait_event(conf->wait_for_overlap,
|
wait_event(conf->wait_for_overlap,
|
||||||
atomic_read(&conf->reshape_stripes) == 0);
|
atomic_read(&conf->reshape_stripes) == 0
|
||||||
|
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
|
||||||
|
if (atomic_read(&conf->reshape_stripes) != 0)
|
||||||
|
goto ret;
|
||||||
mddev->reshape_position = conf->reshape_progress;
|
mddev->reshape_position = conf->reshape_progress;
|
||||||
mddev->curr_resync_completed = sector_nr;
|
mddev->curr_resync_completed = sector_nr;
|
||||||
conf->reshape_checkpoint = jiffies;
|
conf->reshape_checkpoint = jiffies;
|
||||||
@ -4940,13 +4948,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
wait_event(mddev->sb_wait,
|
wait_event(mddev->sb_wait,
|
||||||
!test_bit(MD_CHANGE_DEVS, &mddev->flags)
|
!test_bit(MD_CHANGE_DEVS, &mddev->flags)
|
||||||
|| kthread_should_stop());
|
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
|
||||||
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||||
|
goto ret;
|
||||||
spin_lock_irq(&conf->device_lock);
|
spin_lock_irq(&conf->device_lock);
|
||||||
conf->reshape_safe = mddev->reshape_position;
|
conf->reshape_safe = mddev->reshape_position;
|
||||||
spin_unlock_irq(&conf->device_lock);
|
spin_unlock_irq(&conf->device_lock);
|
||||||
wake_up(&conf->wait_for_overlap);
|
wake_up(&conf->wait_for_overlap);
|
||||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||||
}
|
}
|
||||||
|
ret:
|
||||||
return reshape_sectors;
|
return reshape_sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user