Merge tag 'md-next-20230927' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.7/block
Pull MD updates from Song: "1. Make rdev add/remove independent from daemon thread, by Yu Kuai; 2. Refactor code around quiesce() and mddev_suspend(), by Yu Kuai." * tag 'md-next-20230927' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: md: replace deprecated strncpy with memcpy md/md-linear: Annotate struct linear_conf with __counted_by md: don't check 'mddev->pers' and 'pers->quiesce' from suspend_lo_store() md: don't check 'mddev->pers' from suspend_hi_store() md-bitmap: suspend array earlier in location_store() md-bitmap: remove the checking of 'pers->quiesce' from location_store() md: don't rely on 'mddev->pers' to be set in mddev_suspend() md: initialize 'writes_pending' while allocating mddev md: initialize 'active_io' while allocating mddev md: delay remove_and_add_spares() for read only array to md_start_sync() md: factor out a helper rdev_addable() from remove_and_add_spares() md: factor out a helper rdev_is_spare() from remove_and_add_spares() md: factor out a helper rdev_removeable() from remove_and_add_spares() md: delay choosing sync action to md_start_sync() md: factor out a helper to choose sync action from md_check_recovery() md: use separate work_struct for md_start_sync()
This commit is contained in:
commit
03f7b57a59
@ -749,7 +749,11 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
mddev_init(&rs->md);
|
||||
if (mddev_init(&rs->md)) {
|
||||
kfree(rs);
|
||||
ti->error = "Cannot initialize raid context";
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
rs->raid_disks = raid_devs;
|
||||
rs->delta_disks = 0;
|
||||
@ -798,6 +802,7 @@ static void raid_set_free(struct raid_set *rs)
|
||||
dm_put_device(rs->ti, rs->dev[i].data_dev);
|
||||
}
|
||||
|
||||
mddev_destroy(&rs->md);
|
||||
kfree(rs);
|
||||
}
|
||||
|
||||
|
@ -2351,11 +2351,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
rv = mddev_lock(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
if (mddev->pers) {
|
||||
if (!mddev->pers->quiesce) {
|
||||
rv = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (mddev->recovery || mddev->sync_thread) {
|
||||
rv = -EBUSY;
|
||||
goto out;
|
||||
@ -2369,11 +2367,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
rv = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (mddev->pers) {
|
||||
mddev_suspend(mddev);
|
||||
md_bitmap_destroy(mddev);
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
|
||||
md_bitmap_destroy(mddev);
|
||||
mddev->bitmap_info.offset = 0;
|
||||
if (mddev->bitmap_info.file) {
|
||||
struct file *f = mddev->bitmap_info.file;
|
||||
@ -2383,6 +2378,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
} else {
|
||||
/* No bitmap, OK to set a location */
|
||||
long long offset;
|
||||
struct bitmap *bitmap;
|
||||
|
||||
if (strncmp(buf, "none", 4) == 0)
|
||||
/* nothing to be done */;
|
||||
else if (strncmp(buf, "file:", 5) == 0) {
|
||||
@ -2406,25 +2403,20 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
rv = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mddev->bitmap_info.offset = offset;
|
||||
if (mddev->pers) {
|
||||
struct bitmap *bitmap;
|
||||
bitmap = md_bitmap_create(mddev, -1);
|
||||
mddev_suspend(mddev);
|
||||
if (IS_ERR(bitmap))
|
||||
rv = PTR_ERR(bitmap);
|
||||
else {
|
||||
mddev->bitmap = bitmap;
|
||||
rv = md_bitmap_load(mddev);
|
||||
if (rv)
|
||||
mddev->bitmap_info.offset = 0;
|
||||
}
|
||||
if (rv) {
|
||||
md_bitmap_destroy(mddev);
|
||||
mddev_resume(mddev);
|
||||
goto out;
|
||||
}
|
||||
mddev_resume(mddev);
|
||||
bitmap = md_bitmap_create(mddev, -1);
|
||||
if (IS_ERR(bitmap)) {
|
||||
rv = PTR_ERR(bitmap);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mddev->bitmap = bitmap;
|
||||
rv = md_bitmap_load(mddev);
|
||||
if (rv) {
|
||||
mddev->bitmap_info.offset = 0;
|
||||
md_bitmap_destroy(mddev);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2437,6 +2429,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
}
|
||||
rv = 0;
|
||||
out:
|
||||
mddev_resume(mddev);
|
||||
mddev_unlock(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
@ -69,6 +69,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||
if (!conf)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* conf->raid_disks is copy of mddev->raid_disks. The reason to
|
||||
* keep a copy of mddev->raid_disks in struct linear_conf is,
|
||||
* mddev->raid_disks may not be consistent with pointers number of
|
||||
* conf->disks[] when it is updated in linear_add() and used to
|
||||
* iterate old conf->disks[] earray in linear_congested().
|
||||
* Here conf->raid_disks is always consitent with number of
|
||||
* pointers in conf->disks[] array, and mddev->private is updated
|
||||
* with rcu_assign_pointer() in linear_addr(), such race can be
|
||||
* avoided.
|
||||
*/
|
||||
conf->raid_disks = raid_disks;
|
||||
|
||||
cnt = 0;
|
||||
conf->array_sectors = 0;
|
||||
|
||||
@ -112,19 +125,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||
conf->disks[i-1].end_sector +
|
||||
conf->disks[i].rdev->sectors;
|
||||
|
||||
/*
|
||||
* conf->raid_disks is copy of mddev->raid_disks. The reason to
|
||||
* keep a copy of mddev->raid_disks in struct linear_conf is,
|
||||
* mddev->raid_disks may not be consistent with pointers number of
|
||||
* conf->disks[] when it is updated in linear_add() and used to
|
||||
* iterate old conf->disks[] earray in linear_congested().
|
||||
* Here conf->raid_disks is always consitent with number of
|
||||
* pointers in conf->disks[] array, and mddev->private is updated
|
||||
* with rcu_assign_pointer() in linear_addr(), such race can be
|
||||
* avoided.
|
||||
*/
|
||||
conf->raid_disks = raid_disks;
|
||||
|
||||
return conf;
|
||||
|
||||
out:
|
||||
|
@ -12,6 +12,6 @@ struct linear_conf
|
||||
struct rcu_head rcu;
|
||||
sector_t array_sectors;
|
||||
int raid_disks; /* a copy of mddev->raid_disks */
|
||||
struct dev_info disks[];
|
||||
struct dev_info disks[] __counted_by(raid_disks);
|
||||
};
|
||||
#endif
|
||||
|
414
drivers/md/md.c
414
drivers/md/md.c
@ -449,7 +449,7 @@ void mddev_suspend(struct mddev *mddev)
|
||||
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
percpu_ref_kill(&mddev->active_io);
|
||||
|
||||
if (mddev->pers->prepare_suspend)
|
||||
if (mddev->pers && mddev->pers->prepare_suspend)
|
||||
mddev->pers->prepare_suspend(mddev);
|
||||
|
||||
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
|
||||
@ -631,16 +631,39 @@ void mddev_put(struct mddev *mddev)
|
||||
* flush_workqueue() after mddev_find will succeed in waiting
|
||||
* for the work to be done.
|
||||
*/
|
||||
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
|
||||
queue_work(md_misc_wq, &mddev->del_work);
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
}
|
||||
|
||||
static void md_safemode_timeout(struct timer_list *t);
|
||||
static void md_start_sync(struct work_struct *ws);
|
||||
|
||||
void mddev_init(struct mddev *mddev)
|
||||
static void active_io_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct mddev *mddev = container_of(ref, struct mddev, active_io);
|
||||
|
||||
wake_up(&mddev->sb_wait);
|
||||
}
|
||||
|
||||
static void no_op(struct percpu_ref *r) {}
|
||||
|
||||
int mddev_init(struct mddev *mddev)
|
||||
{
|
||||
|
||||
if (percpu_ref_init(&mddev->active_io, active_io_release,
|
||||
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
if (percpu_ref_init(&mddev->writes_pending, no_op,
|
||||
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* We want to start with the refcount at zero */
|
||||
percpu_ref_put(&mddev->writes_pending);
|
||||
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->sync_mutex);
|
||||
@ -662,9 +685,21 @@ void mddev_init(struct mddev *mddev)
|
||||
mddev->resync_min = 0;
|
||||
mddev->resync_max = MaxSector;
|
||||
mddev->level = LEVEL_NONE;
|
||||
|
||||
INIT_WORK(&mddev->sync_work, md_start_sync);
|
||||
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_init);
|
||||
|
||||
void mddev_destroy(struct mddev *mddev)
|
||||
{
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_destroy);
|
||||
|
||||
static struct mddev *mddev_find_locked(dev_t unit)
|
||||
{
|
||||
struct mddev *mddev;
|
||||
@ -708,13 +743,16 @@ static struct mddev *mddev_alloc(dev_t unit)
|
||||
new = kzalloc(sizeof(*new), GFP_KERNEL);
|
||||
if (!new)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
mddev_init(new);
|
||||
|
||||
error = mddev_init(new);
|
||||
if (error)
|
||||
goto out_free_new;
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
if (unit) {
|
||||
error = -EEXIST;
|
||||
if (mddev_find_locked(unit))
|
||||
goto out_free_new;
|
||||
goto out_destroy_new;
|
||||
new->unit = unit;
|
||||
if (MAJOR(unit) == MD_MAJOR)
|
||||
new->md_minor = MINOR(unit);
|
||||
@ -725,7 +763,7 @@ static struct mddev *mddev_alloc(dev_t unit)
|
||||
error = -ENODEV;
|
||||
new->unit = mddev_alloc_unit();
|
||||
if (!new->unit)
|
||||
goto out_free_new;
|
||||
goto out_destroy_new;
|
||||
new->md_minor = MINOR(new->unit);
|
||||
new->hold_active = UNTIL_STOP;
|
||||
}
|
||||
@ -733,8 +771,11 @@ static struct mddev *mddev_alloc(dev_t unit)
|
||||
list_add(&new->all_mddevs, &all_mddevs);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
return new;
|
||||
out_free_new:
|
||||
|
||||
out_destroy_new:
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
mddev_destroy(new);
|
||||
out_free_new:
|
||||
kfree(new);
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
@ -745,6 +786,7 @@ static void mddev_free(struct mddev *mddev)
|
||||
list_del(&mddev->all_mddevs);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
mddev_destroy(mddev);
|
||||
kfree(mddev);
|
||||
}
|
||||
|
||||
@ -3879,7 +3921,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
return rv;
|
||||
|
||||
if (mddev->pers == NULL) {
|
||||
strncpy(mddev->clevel, buf, slen);
|
||||
memcpy(mddev->clevel, buf, slen);
|
||||
if (mddev->clevel[slen-1] == '\n')
|
||||
slen--;
|
||||
mddev->clevel[slen] = 0;
|
||||
@ -3912,7 +3954,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
}
|
||||
|
||||
/* Now find the new personality */
|
||||
strncpy(clevel, buf, slen);
|
||||
memcpy(clevel, buf, slen);
|
||||
if (clevel[slen-1] == '\n')
|
||||
slen--;
|
||||
clevel[slen] = 0;
|
||||
@ -4698,7 +4740,7 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
size_t namelen = len-9;
|
||||
if (namelen >= sizeof(mddev->metadata_type))
|
||||
namelen = sizeof(mddev->metadata_type)-1;
|
||||
strncpy(mddev->metadata_type, buf+9, namelen);
|
||||
memcpy(mddev->metadata_type, buf+9, namelen);
|
||||
mddev->metadata_type[namelen] = 0;
|
||||
if (namelen && mddev->metadata_type[namelen-1] == '\n')
|
||||
mddev->metadata_type[--namelen] = 0;
|
||||
@ -4872,6 +4914,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
/* A write to sync_action is enough to justify
|
||||
* canceling read-auto mode
|
||||
*/
|
||||
flush_work(&mddev->sync_work);
|
||||
mddev->ro = MD_RDWR;
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
}
|
||||
@ -5146,18 +5189,13 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
err = mddev_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
err = -EINVAL;
|
||||
if (mddev->pers == NULL ||
|
||||
mddev->pers->quiesce == NULL)
|
||||
goto unlock;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
mddev->suspend_lo = new;
|
||||
mddev_resume(mddev);
|
||||
|
||||
err = 0;
|
||||
unlock:
|
||||
mddev_unlock(mddev);
|
||||
return err ?: len;
|
||||
return len;
|
||||
}
|
||||
static struct md_sysfs_entry md_suspend_lo =
|
||||
__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
|
||||
@ -5183,18 +5221,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
err = mddev_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
err = -EINVAL;
|
||||
if (mddev->pers == NULL)
|
||||
goto unlock;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
mddev->suspend_hi = new;
|
||||
mddev_resume(mddev);
|
||||
|
||||
err = 0;
|
||||
unlock:
|
||||
mddev_unlock(mddev);
|
||||
return err ?: len;
|
||||
return len;
|
||||
}
|
||||
static struct md_sysfs_entry md_suspend_hi =
|
||||
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
|
||||
@ -5597,21 +5630,6 @@ static void mddev_delayed_delete(struct work_struct *ws)
|
||||
kobject_put(&mddev->kobj);
|
||||
}
|
||||
|
||||
static void no_op(struct percpu_ref *r) {}
|
||||
|
||||
int mddev_init_writes_pending(struct mddev *mddev)
|
||||
{
|
||||
if (mddev->writes_pending.percpu_count_ptr)
|
||||
return 0;
|
||||
if (percpu_ref_init(&mddev->writes_pending, no_op,
|
||||
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
|
||||
return -ENOMEM;
|
||||
/* We want to start with the refcount at zero */
|
||||
percpu_ref_put(&mddev->writes_pending);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
|
||||
|
||||
struct mddev *md_alloc(dev_t dev, char *name)
|
||||
{
|
||||
/*
|
||||
@ -5783,12 +5801,6 @@ static void md_safemode_timeout(struct timer_list *t)
|
||||
}
|
||||
|
||||
static int start_dirty_degraded;
|
||||
static void active_io_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct mddev *mddev = container_of(ref, struct mddev, active_io);
|
||||
|
||||
wake_up(&mddev->sb_wait);
|
||||
}
|
||||
|
||||
int md_run(struct mddev *mddev)
|
||||
{
|
||||
@ -5869,15 +5881,10 @@ int md_run(struct mddev *mddev)
|
||||
nowait = nowait && bdev_nowait(rdev->bdev);
|
||||
}
|
||||
|
||||
err = percpu_ref_init(&mddev->active_io, active_io_release,
|
||||
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!bioset_initialized(&mddev->bio_set)) {
|
||||
err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
if (err)
|
||||
goto exit_active_io;
|
||||
return err;
|
||||
}
|
||||
if (!bioset_initialized(&mddev->sync_set)) {
|
||||
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
@ -6074,8 +6081,6 @@ exit_sync_set:
|
||||
bioset_exit(&mddev->sync_set);
|
||||
exit_bio_set:
|
||||
bioset_exit(&mddev->bio_set);
|
||||
exit_active_io:
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_run);
|
||||
@ -6291,7 +6296,6 @@ static void __md_stop(struct mddev *mddev)
|
||||
module_put(pers->owner);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
@ -6306,7 +6310,6 @@ void md_stop(struct mddev *mddev)
|
||||
*/
|
||||
__md_stop_writes(mddev);
|
||||
__md_stop(mddev);
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(md_stop);
|
||||
@ -7646,6 +7649,10 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
sync_blockdev(bdev);
|
||||
}
|
||||
|
||||
if (!md_is_rdwr(mddev))
|
||||
flush_work(&mddev->sync_work);
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
if (err) {
|
||||
pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
|
||||
@ -7886,7 +7893,6 @@ static void md_free_disk(struct gendisk *disk)
|
||||
{
|
||||
struct mddev *mddev = disk->private_data;
|
||||
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
mddev_free(mddev);
|
||||
}
|
||||
|
||||
@ -8570,6 +8576,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
|
||||
BUG_ON(mddev->ro == MD_RDONLY);
|
||||
if (mddev->ro == MD_AUTO_READ) {
|
||||
/* need to switch to read/write */
|
||||
flush_work(&mddev->sync_work);
|
||||
mddev->ro = MD_RDWR;
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
@ -9161,6 +9168,85 @@ void md_do_sync(struct md_thread *thread)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_do_sync);
|
||||
|
||||
static bool rdev_removeable(struct md_rdev *rdev)
|
||||
{
|
||||
/* rdev is not used. */
|
||||
if (rdev->raid_disk < 0)
|
||||
return false;
|
||||
|
||||
/* There are still inflight io, don't remove this rdev. */
|
||||
if (atomic_read(&rdev->nr_pending))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* An error occurred but has not yet been acknowledged by the metadata
|
||||
* handler, don't remove this rdev.
|
||||
*/
|
||||
if (test_bit(Blocked, &rdev->flags))
|
||||
return false;
|
||||
|
||||
/* Fautly rdev is not used, it's safe to remove it. */
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
return true;
|
||||
|
||||
/* Journal disk can only be removed if it's faulty. */
|
||||
if (test_bit(Journal, &rdev->flags))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* 'In_sync' is cleared while 'raid_disk' is valid, which means
|
||||
* replacement has just become active from pers->spare_active(), and
|
||||
* then pers->hot_remove_disk() will replace this rdev with replacement.
|
||||
*/
|
||||
if (!test_bit(In_sync, &rdev->flags))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool rdev_is_spare(struct md_rdev *rdev)
|
||||
{
|
||||
return !test_bit(Candidate, &rdev->flags) && rdev->raid_disk >= 0 &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Journal, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags);
|
||||
}
|
||||
|
||||
static bool rdev_addable(struct md_rdev *rdev)
|
||||
{
|
||||
/* rdev is already used, don't add it again. */
|
||||
if (test_bit(Candidate, &rdev->flags) || rdev->raid_disk >= 0 ||
|
||||
test_bit(Faulty, &rdev->flags))
|
||||
return false;
|
||||
|
||||
/* Allow to add journal disk. */
|
||||
if (test_bit(Journal, &rdev->flags))
|
||||
return true;
|
||||
|
||||
/* Allow to add if array is read-write. */
|
||||
if (md_is_rdwr(rdev->mddev))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* For read-only array, only allow to readd a rdev. And if bitmap is
|
||||
* used, don't allow to readd a rdev that is too old.
|
||||
*/
|
||||
if (rdev->saved_raid_disk >= 0 && !test_bit(Bitmap_sync, &rdev->flags))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool md_spares_need_change(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (rdev_removeable(rdev) || rdev_addable(rdev))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *this)
|
||||
{
|
||||
@ -9193,12 +9279,8 @@ static int remove_and_add_spares(struct mddev *mddev,
|
||||
synchronize_rcu();
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if ((this == NULL || rdev == this) &&
|
||||
rdev->raid_disk >= 0 &&
|
||||
!test_bit(Blocked, &rdev->flags) &&
|
||||
((test_bit(RemoveSynchronized, &rdev->flags) ||
|
||||
(!test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Journal, &rdev->flags))) &&
|
||||
atomic_read(&rdev->nr_pending)==0)) {
|
||||
(test_bit(RemoveSynchronized, &rdev->flags) ||
|
||||
rdev_removeable(rdev))) {
|
||||
if (mddev->pers->hot_remove_disk(
|
||||
mddev, rdev) == 0) {
|
||||
sysfs_unlink_rdev(mddev, rdev);
|
||||
@ -9220,25 +9302,12 @@ static int remove_and_add_spares(struct mddev *mddev,
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (this && this != rdev)
|
||||
continue;
|
||||
if (test_bit(Candidate, &rdev->flags))
|
||||
continue;
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Journal, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
if (rdev_is_spare(rdev))
|
||||
spares++;
|
||||
if (rdev->raid_disk >= 0)
|
||||
if (!rdev_addable(rdev))
|
||||
continue;
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
if (!test_bit(Journal, &rdev->flags)) {
|
||||
if (!md_is_rdwr(mddev) &&
|
||||
!(rdev->saved_raid_disk >= 0 &&
|
||||
!test_bit(Bitmap_sync, &rdev->flags)))
|
||||
continue;
|
||||
|
||||
if (!test_bit(Journal, &rdev->flags))
|
||||
rdev->recovery_offset = 0;
|
||||
}
|
||||
if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
|
||||
/* failure here is OK */
|
||||
sysfs_link_rdev(mddev, rdev);
|
||||
@ -9254,9 +9323,81 @@ no_add:
|
||||
return spares;
|
||||
}
|
||||
|
||||
static bool md_choose_sync_action(struct mddev *mddev, int *spares)
|
||||
{
|
||||
/* Check if reshape is in progress first. */
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
if (mddev->pers->check_reshape == NULL ||
|
||||
mddev->pers->check_reshape(mddev) != 0)
|
||||
return false;
|
||||
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove any failed drives, then add spares if possible. Spares are
|
||||
* also removed and re-added, to allow the personality to fail the
|
||||
* re-add.
|
||||
*/
|
||||
*spares = remove_and_add_spares(mddev, NULL);
|
||||
if (*spares) {
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
|
||||
/* Start new recovery. */
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check if recovery is in progress. */
|
||||
if (mddev->recovery_cp < MaxSector) {
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Delay to choose resync/check/repair in md_do_sync(). */
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
|
||||
return true;
|
||||
|
||||
/* Nothing to be done */
|
||||
return false;
|
||||
}
|
||||
|
||||
static void md_start_sync(struct work_struct *ws)
|
||||
{
|
||||
struct mddev *mddev = container_of(ws, struct mddev, del_work);
|
||||
struct mddev *mddev = container_of(ws, struct mddev, sync_work);
|
||||
int spares = 0;
|
||||
|
||||
mddev_lock_nointr(mddev);
|
||||
|
||||
if (!md_is_rdwr(mddev)) {
|
||||
/*
|
||||
* On a read-only array we can:
|
||||
* - remove failed devices
|
||||
* - add already-in_sync devices if the array itself is in-sync.
|
||||
* As we only add devices that are already in-sync, we can
|
||||
* activate the spares immediately.
|
||||
*/
|
||||
remove_and_add_spares(mddev, NULL);
|
||||
goto not_running;
|
||||
}
|
||||
|
||||
if (!md_choose_sync_action(mddev, &spares))
|
||||
goto not_running;
|
||||
|
||||
if (!mddev->pers->sync_request)
|
||||
goto not_running;
|
||||
|
||||
/*
|
||||
* We are adding a device or devices to an array which has the bitmap
|
||||
* stored on all devices. So make sure all bitmap pages get written.
|
||||
*/
|
||||
if (spares)
|
||||
md_bitmap_write_all(mddev->bitmap);
|
||||
|
||||
rcu_assign_pointer(mddev->sync_thread,
|
||||
md_register_thread(md_do_sync, mddev, "resync"));
|
||||
@ -9264,20 +9405,27 @@ static void md_start_sync(struct work_struct *ws)
|
||||
pr_warn("%s: could not start resync thread...\n",
|
||||
mdname(mddev));
|
||||
/* leave the spares where they are, it shouldn't hurt */
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
wake_up(&resync_wait);
|
||||
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
|
||||
&mddev->recovery))
|
||||
if (mddev->sysfs_action)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
} else
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
goto not_running;
|
||||
}
|
||||
|
||||
mddev_unlock(mddev);
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
md_new_event();
|
||||
return;
|
||||
|
||||
not_running:
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
mddev_unlock(mddev);
|
||||
|
||||
wake_up(&resync_wait);
|
||||
if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
|
||||
mddev->sysfs_action)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -9345,7 +9493,6 @@ void md_check_recovery(struct mddev *mddev)
|
||||
return;
|
||||
|
||||
if (mddev_trylock(mddev)) {
|
||||
int spares = 0;
|
||||
bool try_set_sync = mddev->safemode != 0;
|
||||
|
||||
if (!mddev->external && mddev->safemode == 1)
|
||||
@ -9353,30 +9500,43 @@ void md_check_recovery(struct mddev *mddev)
|
||||
|
||||
if (!md_is_rdwr(mddev)) {
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
/* sync_work already queued. */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (!mddev->external && mddev->in_sync)
|
||||
/* 'Blocked' flag not needed as failed devices
|
||||
/*
|
||||
* 'Blocked' flag not needed as failed devices
|
||||
* will be recorded if array switched to read/write.
|
||||
* Leaving it set will prevent the device
|
||||
* from being removed.
|
||||
*/
|
||||
rdev_for_each(rdev, mddev)
|
||||
clear_bit(Blocked, &rdev->flags);
|
||||
/* On a read-only array we can:
|
||||
* - remove failed devices
|
||||
* - add already-in_sync devices if the array itself
|
||||
* is in-sync.
|
||||
* As we only add devices that are already in-sync,
|
||||
* we can activate the spares immediately.
|
||||
*/
|
||||
remove_and_add_spares(mddev, NULL);
|
||||
/* There is no thread, but we need to call
|
||||
|
||||
/*
|
||||
* There is no thread, but we need to call
|
||||
* ->spare_active and clear saved_raid_disk
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_reap_sync_thread(mddev);
|
||||
|
||||
/*
|
||||
* Let md_start_sync() to remove and add rdevs to the
|
||||
* array.
|
||||
*/
|
||||
if (md_spares_need_change(mddev)) {
|
||||
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
queue_work(md_misc_wq, &mddev->sync_work);
|
||||
}
|
||||
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
|
||||
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@ -9432,56 +9592,14 @@ void md_check_recovery(struct mddev *mddev)
|
||||
clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
|
||||
if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
|
||||
goto not_running;
|
||||
/* no recovery is running.
|
||||
* remove any failed drives, then
|
||||
* add spares if possible.
|
||||
* Spares are also removed and re-added, to allow
|
||||
* the personality to fail the re-add.
|
||||
*/
|
||||
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
if (mddev->pers->check_reshape == NULL ||
|
||||
mddev->pers->check_reshape(mddev) != 0)
|
||||
/* Cannot proceed */
|
||||
goto not_running;
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
} else if ((spares = remove_and_add_spares(mddev, NULL))) {
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
} else if (mddev->recovery_cp < MaxSector) {
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
} else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
|
||||
/* nothing to be done ... */
|
||||
goto not_running;
|
||||
|
||||
if (mddev->pers->sync_request) {
|
||||
if (spares) {
|
||||
/* We are adding a device or devices to an array
|
||||
* which has the bitmap stored on all devices.
|
||||
* So make sure all bitmap pages get written
|
||||
*/
|
||||
md_bitmap_write_all(mddev->bitmap);
|
||||
}
|
||||
INIT_WORK(&mddev->del_work, md_start_sync);
|
||||
queue_work(md_misc_wq, &mddev->del_work);
|
||||
goto unlock;
|
||||
}
|
||||
not_running:
|
||||
if (!mddev->sync_thread) {
|
||||
if (test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
|
||||
queue_work(md_misc_wq, &mddev->sync_work);
|
||||
} else {
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
wake_up(&resync_wait);
|
||||
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
|
||||
&mddev->recovery))
|
||||
if (mddev->sysfs_action)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
}
|
||||
|
||||
unlock:
|
||||
wake_up(&mddev->sb_wait);
|
||||
mddev_unlock(mddev);
|
||||
|
@ -453,7 +453,10 @@ struct mddev {
|
||||
struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */
|
||||
struct kernfs_node *sysfs_level; /*handle for 'level' */
|
||||
|
||||
struct work_struct del_work; /* used for delayed sysfs removal */
|
||||
/* used for delayed sysfs removal */
|
||||
struct work_struct del_work;
|
||||
/* used for register new sync thread */
|
||||
struct work_struct sync_work;
|
||||
|
||||
/* "lock" protects:
|
||||
* flush_bio transition from NULL to !NULL
|
||||
@ -768,7 +771,6 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t
|
||||
extern void md_wakeup_thread(struct md_thread __rcu *thread);
|
||||
extern void md_check_recovery(struct mddev *mddev);
|
||||
extern void md_reap_sync_thread(struct mddev *mddev);
|
||||
extern int mddev_init_writes_pending(struct mddev *mddev);
|
||||
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
|
||||
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
|
||||
extern void md_write_end(struct mddev *mddev);
|
||||
@ -795,7 +797,8 @@ extern int md_integrity_register(struct mddev *mddev);
|
||||
extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
|
||||
|
||||
extern void mddev_init(struct mddev *mddev);
|
||||
extern int mddev_init(struct mddev *mddev);
|
||||
extern void mddev_destroy(struct mddev *mddev);
|
||||
struct mddev *md_alloc(dev_t dev, char *name);
|
||||
void mddev_put(struct mddev *mddev);
|
||||
extern int md_run(struct mddev *mddev);
|
||||
|
@ -3122,8 +3122,7 @@ static int raid1_run(struct mddev *mddev)
|
||||
mdname(mddev));
|
||||
return -EIO;
|
||||
}
|
||||
if (mddev_init_writes_pending(mddev) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* copy the already verified devices into our private RAID1
|
||||
* bookkeeping area. [whatever we allocate in run(),
|
||||
|
@ -4154,9 +4154,6 @@ static int raid10_run(struct mddev *mddev)
|
||||
sector_t min_offset_diff = 0;
|
||||
int first = 1;
|
||||
|
||||
if (mddev_init_writes_pending(mddev) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
if (mddev->private == NULL) {
|
||||
conf = setup_conf(mddev);
|
||||
if (IS_ERR(conf))
|
||||
|
@ -7778,9 +7778,6 @@ static int raid5_run(struct mddev *mddev)
|
||||
long long min_offset_diff = 0;
|
||||
int first = 1;
|
||||
|
||||
if (mddev_init_writes_pending(mddev) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
if (mddev->recovery_cp != MaxSector)
|
||||
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
|
||||
mdname(mddev));
|
||||
|
Loading…
x
Reference in New Issue
Block a user