md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread.

We currently use kthread_should_stop() in various places in the
sync/reshape code to abort early.
However some places set MD_RECOVERY_INTR but don't immediately call
md_reap_sync_thread() (and we will shortly get another one).
When this happens we are relying on md_check_recovery() to reap the
thread and that only happen when it finishes normally.
So MD_RECOVERY_INTR must lead to a normal finish without the
kthread_should_stop() test.

So replace all relevant tests, and be more careful when the thread is
interrupted not to acknowledge that latest step in a reshape as it may
not be fully committed yet.

Also add a test on MD_RECOVERY_INTR in the 'is_mddev_idle' loop
so we don't wait have to wait for the speed to drop before we can abort.

Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
NeilBrown 2013-11-19 12:02:01 +11:00
parent 29f097c4d9
commit c91abf5a35
3 changed files with 34 additions and 31 deletions

View File

@ -7410,9 +7410,6 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync = 2; mddev->curr_resync = 2;
try_again: try_again:
if (kthread_should_stop())
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
goto skip; goto skip;
for_each_mddev(mddev2, tmp) { for_each_mddev(mddev2, tmp) {
@ -7437,7 +7434,7 @@ void md_do_sync(struct md_thread *thread)
* be caught by 'softlockup' * be caught by 'softlockup'
*/ */
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
if (!kthread_should_stop() && if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev2->curr_resync >= mddev->curr_resync) { mddev2->curr_resync >= mddev->curr_resync) {
printk(KERN_INFO "md: delaying %s of %s" printk(KERN_INFO "md: delaying %s of %s"
" until %s has finished (they" " until %s has finished (they"
@ -7513,7 +7510,7 @@ void md_do_sync(struct md_thread *thread)
last_check = 0; last_check = 0;
if (j>2) { if (j>2) {
printk(KERN_INFO printk(KERN_INFO
"md: resuming %s of %s from checkpoint.\n", "md: resuming %s of %s from checkpoint.\n",
desc, mdname(mddev)); desc, mdname(mddev));
mddev->curr_resync = j; mddev->curr_resync = j;
@ -7550,7 +7547,8 @@ void md_do_sync(struct md_thread *thread)
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify(&mddev->kobj, NULL, "sync_completed");
} }
while (j >= mddev->resync_max && !kthread_should_stop()) { while (j >= mddev->resync_max &&
!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* As this condition is controlled by user-space, /* As this condition is controlled by user-space,
* we can block indefinitely, so use '_interruptible' * we can block indefinitely, so use '_interruptible'
* to avoid triggering warnings. * to avoid triggering warnings.
@ -7558,17 +7556,18 @@ void md_do_sync(struct md_thread *thread)
flush_signals(current); /* just in case */ flush_signals(current); /* just in case */
wait_event_interruptible(mddev->recovery_wait, wait_event_interruptible(mddev->recovery_wait,
mddev->resync_max > j mddev->resync_max > j
|| kthread_should_stop()); || test_bit(MD_RECOVERY_INTR,
&mddev->recovery));
} }
if (kthread_should_stop()) if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
goto interrupted; break;
sectors = mddev->pers->sync_request(mddev, j, &skipped, sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev)); currspeed < speed_min(mddev));
if (sectors == 0) { if (sectors == 0) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out; break;
} }
if (!skipped) { /* actual IO requested */ if (!skipped) { /* actual IO requested */
@ -7605,10 +7604,8 @@ void md_do_sync(struct md_thread *thread)
last_mark = next; last_mark = next;
} }
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
if (kthread_should_stop()) break;
goto interrupted;
/* /*
* this loop exits only if either when we are slower than * this loop exits only if either when we are slower than
@ -7631,11 +7628,12 @@ void md_do_sync(struct md_thread *thread)
} }
} }
} }
printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc); printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
test_bit(MD_RECOVERY_INTR, &mddev->recovery)
? "interrupted" : "done");
/* /*
* this also signals 'finished resyncing' to md_stop * this also signals 'finished resyncing' to md_stop
*/ */
out:
blk_finish_plug(&plug); blk_finish_plug(&plug);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
@ -7689,16 +7687,6 @@ void md_do_sync(struct md_thread *thread)
set_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
return; return;
interrupted:
/*
* got a signal, exit.
*/
printk(KERN_INFO
"md: md_do_sync() got signal ... exiting\n");
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;
} }
EXPORT_SYMBOL_GPL(md_do_sync); EXPORT_SYMBOL_GPL(md_do_sync);

View File

@ -4386,7 +4386,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait, mddev->flags == 0 || wait_event(mddev->sb_wait, mddev->flags == 0 ||
kthread_should_stop()); test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
allow_barrier(conf);
return sectors_done;
}
conf->reshape_safe = mddev->reshape_position; conf->reshape_safe = mddev->reshape_position;
allow_barrier(conf); allow_barrier(conf);
} }

View File

@ -4842,14 +4842,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
/* Cannot proceed until we've updated the superblock... */ /* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap, wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes)==0); atomic_read(&conf->reshape_stripes)==0
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (atomic_read(&conf->reshape_stripes) != 0)
return 0;
mddev->reshape_position = conf->reshape_progress; mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = sector_nr; mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait, mddev->flags == 0 || wait_event(mddev->sb_wait, mddev->flags == 0 ||
kthread_should_stop()); test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
return 0;
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
conf->reshape_safe = mddev->reshape_position; conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
@ -4932,7 +4937,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
>= mddev->resync_max - mddev->curr_resync_completed) { >= mddev->resync_max - mddev->curr_resync_completed) {
/* Cannot proceed until we've updated the superblock... */ /* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap, wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0); atomic_read(&conf->reshape_stripes) == 0
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (atomic_read(&conf->reshape_stripes) != 0)
goto ret;
mddev->reshape_position = conf->reshape_progress; mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = sector_nr; mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
@ -4940,13 +4948,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait, wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_DEVS, &mddev->flags) !test_bit(MD_CHANGE_DEVS, &mddev->flags)
|| kthread_should_stop()); || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
goto ret;
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
conf->reshape_safe = mddev->reshape_position; conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap); wake_up(&conf->wait_for_overlap);
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify(&mddev->kobj, NULL, "sync_completed");
} }
ret:
return reshape_sectors; return reshape_sectors;
} }