Merge branch 'md-next-rcu-cleanup' into md-next
From Yu Kuai: md: remove rcu protection to access rdev from conf The lifetime of rdev: 1. md_import_device() generate a rdev based on underlying disk; mddev_lock() rdev = kzalloc(); rdev->bdev = blkdev_get_by_dev(); mddev_unlock() 2. bind_rdev_to_array() add this rdev to mddev->disks; mddev_lock() kobject_add(&rdev->kobj, &mddev->kobj, ...); list_add_rcu(&rdev->same_set, &mddev->disks); mddev_unlock() 3. remove_and_add_spares() add this rdev to conf; mddev_lock() rdev_addable(); pers->hot_add_disk(); rcu_assign_pointer(conf->rdev, rdev); mddev_unlock() 4. Use this array with rdev; 5. remove_and_add_spares() remove rdev from conf; // triggered by sysfs/ioctl mddev_lock() rdev_removeable(); pers->hot_remove_disk(); rcu_assign_pointer(conf->rdev, NULL); synchronize_rcu(); mddev_unlock() // triggered by daemon mddev_lock() rdev_removeable(); synchronize_rcu(); -> this can't protect accessing rdev from conf pers->hot_remove_disk(); rcu_assign_pointer(conf->rdev, NULL); mddev_unlock() 6. md_kick_rdev_from_array() remove rdev from mddev->disks; mddev_lock() list_del_rcu(&rdev->same_set); synchronize_rcu(); list_add(&rdev->same_set, &mddev->deleting) mddev_unlock() export_rdev There are two separate rcu protection for rdev, and this pathset remove the protection of conf(step 3 and 5), because it's safe to access rdev from conf in following cases: - If 'reconfig_mutex' is held, because rdev can't be added or rmoved to conf; - If there is normal IO inflight, because mddev_suspend() will wait for IO to be done and prevent rdev to be added or removed to conf; - If sync thread is running, because remove_and_add_spares() can only be called from daemon thread when sync thread is done, and 'MD_RECOVERY_RUNNING' is also checked for ioctl/sysfs; - if any spinlock or rcu_read_lock() is held, because synchronize_rcu() from step 6 prevent rdev to be freed until spinlock is released or rcu_read_unlock();
This commit is contained in:
commit
726a9b67e9
@ -32,17 +32,15 @@ static int multipath_map (struct mpconf *conf)
|
||||
* now we use the first available disk.
|
||||
*/
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
|
||||
struct md_rdev *rdev = conf->multipaths[i].rdev;
|
||||
|
||||
if (rdev && test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
return i;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
|
||||
return (-1);
|
||||
@ -137,14 +135,16 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
|
||||
struct mpconf *conf = mddev->private;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&mddev->lock);
|
||||
|
||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks,
|
||||
conf->raid_disks - mddev->degraded);
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
|
||||
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
struct md_rdev *rdev = READ_ONCE(conf->multipaths[i].rdev);
|
||||
|
||||
seq_printf(seq, "%s",
|
||||
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_putc(seq, ']');
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
|
||||
conf->raid_disks - mddev->degraded);
|
||||
}
|
||||
|
||||
static void print_multipath_conf (struct mpconf *conf)
|
||||
static void print_multipath_conf(struct mpconf *conf)
|
||||
{
|
||||
int i;
|
||||
struct multipath_info *tmp;
|
||||
@ -195,6 +195,7 @@ static void print_multipath_conf (struct mpconf *conf)
|
||||
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
|
||||
conf->raid_disks);
|
||||
|
||||
lockdep_assert_held(&conf->mddev->reconfig_mutex);
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
tmp = conf->multipaths + i;
|
||||
if (tmp->rdev)
|
||||
@ -231,7 +232,7 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
rdev->raid_disk = path;
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
WRITE_ONCE(p->rdev, rdev);
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
@ -257,16 +258,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
p->rdev = NULL;
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
WRITE_ONCE(p->rdev, NULL);
|
||||
err = md_integrity_register(mddev);
|
||||
}
|
||||
abort:
|
||||
|
@ -9244,46 +9244,21 @@ static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *rdev;
|
||||
int spares = 0;
|
||||
int removed = 0;
|
||||
bool remove_some = false;
|
||||
|
||||
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
/* Mustn't remove devices when resync thread is running */
|
||||
return 0;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if ((this == NULL || rdev == this) &&
|
||||
rdev->raid_disk >= 0 &&
|
||||
!test_bit(Blocked, &rdev->flags) &&
|
||||
test_bit(Faulty, &rdev->flags) &&
|
||||
atomic_read(&rdev->nr_pending)==0) {
|
||||
/* Faulty non-Blocked devices with nr_pending == 0
|
||||
* never get nr_pending incremented,
|
||||
* never get Faulty cleared, and never get Blocked set.
|
||||
* So we can synchronize_rcu now rather than once per device
|
||||
*/
|
||||
remove_some = true;
|
||||
set_bit(RemoveSynchronized, &rdev->flags);
|
||||
if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
|
||||
!mddev->pers->hot_remove_disk(mddev, rdev)) {
|
||||
sysfs_unlink_rdev(mddev, rdev);
|
||||
rdev->saved_raid_disk = rdev->raid_disk;
|
||||
rdev->raid_disk = -1;
|
||||
removed++;
|
||||
}
|
||||
}
|
||||
|
||||
if (remove_some)
|
||||
synchronize_rcu();
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if ((this == NULL || rdev == this) &&
|
||||
(test_bit(RemoveSynchronized, &rdev->flags) ||
|
||||
rdev_removeable(rdev))) {
|
||||
if (mddev->pers->hot_remove_disk(
|
||||
mddev, rdev) == 0) {
|
||||
sysfs_unlink_rdev(mddev, rdev);
|
||||
rdev->saved_raid_disk = rdev->raid_disk;
|
||||
rdev->raid_disk = -1;
|
||||
removed++;
|
||||
}
|
||||
}
|
||||
if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
|
||||
clear_bit(RemoveSynchronized, &rdev->flags);
|
||||
}
|
||||
|
||||
if (removed && mddev->kobj.sd)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
|
||||
|
||||
|
@ -190,11 +190,6 @@ enum flag_bits {
|
||||
* than other devices in the array
|
||||
*/
|
||||
ClusterRemove,
|
||||
RemoveSynchronized, /* synchronize_rcu() was called after
|
||||
* this device was known to be faulty,
|
||||
* so it is safe to remove without
|
||||
* another synchronize_rcu() call.
|
||||
*/
|
||||
ExternalBbl, /* External metadata provides bad
|
||||
* block management for a disk
|
||||
*/
|
||||
|
@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
int choose_first;
|
||||
int choose_next_idle;
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* Check if we can balance. We can balance on the whole
|
||||
* device if no resync is going on, or below the resync window.
|
||||
@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
unsigned int pending;
|
||||
bool nonrot;
|
||||
|
||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
rdev = conf->mirrors[disk].rdev;
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED
|
||||
|| rdev == NULL
|
||||
|| test_bit(Faulty, &rdev->flags))
|
||||
@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
}
|
||||
|
||||
if (best_disk >= 0) {
|
||||
rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
|
||||
rdev = conf->mirrors[best_disk].rdev;
|
||||
if (!rdev)
|
||||
goto retry;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
|
||||
conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
*max_sectors = sectors;
|
||||
|
||||
return best_disk;
|
||||
@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
if (r1bio_existed) {
|
||||
/* Need to get the block device name carefully */
|
||||
struct md_rdev *rdev;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
|
||||
|
||||
if (rdev)
|
||||
snprintf(b, sizeof(b), "%pg", rdev->bdev);
|
||||
else
|
||||
strcpy(b, "???");
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
disks = conf->raid_disks * 2;
|
||||
blocked_rdev = NULL;
|
||||
rcu_read_lock();
|
||||
max_sectors = r1_bio->sectors;
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
|
||||
/*
|
||||
* The write-behind io is only attempted on drives marked as
|
||||
@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
r1_bio->bios[i] = bio;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (unlikely(blocked_rdev)) {
|
||||
/* Wait for this device to become unblocked */
|
||||
@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
|
||||
struct r1conf *conf = mddev->private;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&mddev->lock);
|
||||
|
||||
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
|
||||
conf->raid_disks - mddev->degraded);
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
|
||||
|
||||
seq_printf(seq, "%s",
|
||||
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_printf(seq, "]");
|
||||
}
|
||||
|
||||
@ -1691,16 +1686,15 @@ static void print_conf(struct r1conf *conf)
|
||||
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
|
||||
conf->raid_disks);
|
||||
|
||||
rcu_read_lock();
|
||||
lockdep_assert_held(&conf->mddev->reconfig_mutex);
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
if (rdev)
|
||||
pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
|
||||
i, !test_bit(In_sync, &rdev->flags),
|
||||
!test_bit(Faulty, &rdev->flags),
|
||||
rdev->bdev);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void close_sync(struct r1conf *conf)
|
||||
@ -1810,7 +1804,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
*/
|
||||
if (rdev->saved_raid_disk < 0)
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
WRITE_ONCE(p->rdev, rdev);
|
||||
break;
|
||||
}
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
@ -1826,7 +1820,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
rdev->raid_disk = repl_slot;
|
||||
err = 0;
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
|
||||
WRITE_ONCE(p[conf->raid_disks].rdev, rdev);
|
||||
}
|
||||
|
||||
print_conf(conf);
|
||||
@ -1862,16 +1856,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
p->rdev = NULL;
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
WRITE_ONCE(p->rdev, NULL);
|
||||
if (conf->mirrors[conf->raid_disks + number].rdev) {
|
||||
/* We just removed a device that is being replaced.
|
||||
* Move down the replacement. We drain all IO before
|
||||
@ -1892,7 +1877,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
goto abort;
|
||||
}
|
||||
clear_bit(Replacement, &repl->flags);
|
||||
p->rdev = repl;
|
||||
WRITE_ONCE(p->rdev, repl);
|
||||
conf->mirrors[conf->raid_disks + number].rdev = NULL;
|
||||
unfreeze_array(conf);
|
||||
}
|
||||
@ -2290,8 +2275,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev &&
|
||||
(test_bit(In_sync, &rdev->flags) ||
|
||||
(!test_bit(Faulty, &rdev->flags) &&
|
||||
@ -2299,15 +2283,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
is_badblock(rdev, sect, s,
|
||||
&first_bad, &bad_sectors) == 0) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
if (sync_page_io(rdev, sect, s<<9,
|
||||
conf->tmppage, REQ_OP_READ, false))
|
||||
success = 1;
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
if (success)
|
||||
break;
|
||||
} else
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
d++;
|
||||
if (d == conf->raid_disks * 2)
|
||||
d = 0;
|
||||
@ -2326,29 +2309,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
if (d==0)
|
||||
d = conf->raid_disks * 2;
|
||||
d--;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
r1_sync_page_io(rdev, sect, s,
|
||||
conf->tmppage, WRITE);
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
} else
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
d = start;
|
||||
while (d != read_disk) {
|
||||
if (d==0)
|
||||
d = conf->raid_disks * 2;
|
||||
d--;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
if (r1_sync_page_io(rdev, sect, s,
|
||||
conf->tmppage, READ)) {
|
||||
atomic_add(s, &rdev->corrected_errors);
|
||||
@ -2359,8 +2337,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
rdev->bdev);
|
||||
}
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
} else
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
sectors -= s;
|
||||
sect += s;
|
||||
@ -2741,7 +2718,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
|
||||
r1_bio = raid1_alloc_init_r1buf(conf);
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* If we get a correctably read error during resync or recovery,
|
||||
* we might want to read from a different device. So we
|
||||
@ -2762,7 +2738,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
struct md_rdev *rdev;
|
||||
bio = r1_bio->bios[i];
|
||||
|
||||
rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
rdev = conf->mirrors[i].rdev;
|
||||
if (rdev == NULL ||
|
||||
test_bit(Faulty, &rdev->flags)) {
|
||||
if (i < conf->raid_disks)
|
||||
@ -2820,7 +2796,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio->bi_opf |= MD_FAILFAST;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (disk < 0)
|
||||
disk = wonly;
|
||||
r1_bio->read_disk = disk;
|
||||
|
@ -743,7 +743,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
struct geom *geo = &conf->geo;
|
||||
|
||||
raid10_find_phys(conf, r10_bio);
|
||||
rcu_read_lock();
|
||||
best_dist_slot = -1;
|
||||
min_pending = UINT_MAX;
|
||||
best_dist_rdev = NULL;
|
||||
@ -775,18 +774,11 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
if (r10_bio->devs[slot].bio == IO_BLOCKED)
|
||||
continue;
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[disk].replacement);
|
||||
rdev = conf->mirrors[disk].replacement;
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
|
||||
r10_bio->devs[slot].addr + sectors >
|
||||
rdev->recovery_offset) {
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev
|
||||
* and replacement as NULL during replacement replace
|
||||
* rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
}
|
||||
rdev->recovery_offset)
|
||||
rdev = conf->mirrors[disk].rdev;
|
||||
if (rdev == NULL ||
|
||||
test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
@ -876,7 +868,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
r10_bio->read_slot = slot;
|
||||
} else
|
||||
rdev = NULL;
|
||||
rcu_read_unlock();
|
||||
*max_sectors = best_good_sectors;
|
||||
|
||||
return rdev;
|
||||
@ -1198,9 +1189,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
*/
|
||||
gfp = GFP_NOIO | __GFP_HIGH;
|
||||
|
||||
rcu_read_lock();
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
err_rdev = conf->mirrors[disk].rdev;
|
||||
if (err_rdev)
|
||||
snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
|
||||
else {
|
||||
@ -1208,7 +1198,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
/* This never gets dereferenced */
|
||||
err_rdev = r10_bio->devs[slot].rdev;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
|
||||
@ -1279,15 +1268,8 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
int devnum = r10_bio->devs[n_copy].devnum;
|
||||
struct bio *mbio;
|
||||
|
||||
if (replacement) {
|
||||
rdev = conf->mirrors[devnum].replacement;
|
||||
if (rdev == NULL) {
|
||||
/* Replacement just got moved to main 'rdev' */
|
||||
smp_mb();
|
||||
rdev = conf->mirrors[devnum].rdev;
|
||||
}
|
||||
} else
|
||||
rdev = conf->mirrors[devnum].rdev;
|
||||
rdev = replacement ? conf->mirrors[devnum].replacement :
|
||||
conf->mirrors[devnum].rdev;
|
||||
|
||||
mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
|
||||
if (replacement)
|
||||
@ -1321,25 +1303,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
}
|
||||
}
|
||||
|
||||
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
|
||||
struct md_rdev **prrdev)
|
||||
{
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rrdev = rcu_dereference(mirror->replacement);
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev and
|
||||
* replacement as NULL during replacement replace rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(mirror->rdev);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
|
||||
*prrdev = rrdev;
|
||||
return rdev;
|
||||
}
|
||||
|
||||
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
{
|
||||
int i;
|
||||
@ -1348,11 +1311,11 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
|
||||
retry_wait:
|
||||
blocked_rdev = NULL;
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev);
|
||||
rdev = conf->mirrors[i].rdev;
|
||||
rrdev = conf->mirrors[i].replacement;
|
||||
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
blocked_rdev = rdev;
|
||||
@ -1391,7 +1354,6 @@ retry_wait:
|
||||
}
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (unlikely(blocked_rdev)) {
|
||||
/* Have to wait for this device to get unblocked, then retry */
|
||||
@ -1474,14 +1436,14 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
wait_blocked_dev(mddev, r10_bio);
|
||||
|
||||
rcu_read_lock();
|
||||
max_sectors = r10_bio->sectors;
|
||||
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
int d = r10_bio->devs[i].devnum;
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
rrdev = conf->mirrors[d].replacement;
|
||||
if (rdev && (test_bit(Faulty, &rdev->flags)))
|
||||
rdev = NULL;
|
||||
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
|
||||
@ -1535,7 +1497,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
atomic_inc(&rrdev->nr_pending);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (max_sectors < r10_bio->sectors)
|
||||
r10_bio->sectors = max_sectors;
|
||||
@ -1625,17 +1586,8 @@ static void raid10_end_discard_request(struct bio *bio)
|
||||
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
||||
|
||||
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
|
||||
if (repl)
|
||||
rdev = conf->mirrors[dev].replacement;
|
||||
if (!rdev) {
|
||||
/*
|
||||
* raid10_remove_disk uses smp_mb to make sure rdev is set to
|
||||
* replacement before setting replacement to NULL. It can read
|
||||
* rdev first without barrier protect even replacement is NULL
|
||||
*/
|
||||
smp_rmb();
|
||||
rdev = conf->mirrors[dev].rdev;
|
||||
}
|
||||
rdev = repl ? conf->mirrors[dev].replacement :
|
||||
conf->mirrors[dev].rdev;
|
||||
|
||||
raid_end_discard_bio(r10_bio);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
@ -1785,11 +1737,11 @@ retry_discard:
|
||||
* inc refcount on their rdev. Record them by setting
|
||||
* bios[x] to bio
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (disk = 0; disk < geo->raid_disks; disk++) {
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev);
|
||||
rdev = conf->mirrors[disk].rdev;
|
||||
rrdev = conf->mirrors[disk].replacement;
|
||||
r10_bio->devs[disk].bio = NULL;
|
||||
r10_bio->devs[disk].repl_bio = NULL;
|
||||
|
||||
@ -1809,7 +1761,6 @@ retry_discard:
|
||||
atomic_inc(&rrdev->nr_pending);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
atomic_set(&r10_bio->remaining, 1);
|
||||
for (disk = 0; disk < geo->raid_disks; disk++) {
|
||||
@ -1939,6 +1890,8 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
|
||||
struct r10conf *conf = mddev->private;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&mddev->lock);
|
||||
|
||||
if (conf->geo.near_copies < conf->geo.raid_disks)
|
||||
seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
|
||||
if (conf->geo.near_copies > 1)
|
||||
@ -1953,12 +1906,11 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
|
||||
}
|
||||
seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
|
||||
conf->geo.raid_disks - mddev->degraded);
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->geo.raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
|
||||
|
||||
seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_printf(seq, "]");
|
||||
}
|
||||
|
||||
@ -1980,7 +1932,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
|
||||
ncopies = conf->geo.near_copies;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
do {
|
||||
int n = conf->copies;
|
||||
int cnt = 0;
|
||||
@ -1988,7 +1939,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
|
||||
while (n--) {
|
||||
struct md_rdev *rdev;
|
||||
if (this != ignore &&
|
||||
(rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
|
||||
(rdev = conf->mirrors[this].rdev) &&
|
||||
test_bit(In_sync, &rdev->flags))
|
||||
cnt++;
|
||||
this = (this+1) % disks;
|
||||
@ -1999,7 +1950,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
|
||||
} while (first != 0);
|
||||
has_enough = 1;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return has_enough;
|
||||
}
|
||||
|
||||
@ -2072,8 +2022,7 @@ static void print_conf(struct r10conf *conf)
|
||||
pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
|
||||
conf->geo.raid_disks);
|
||||
|
||||
/* This is only called with ->reconfix_mutex held, so
|
||||
* rcu protection of rdev is not needed */
|
||||
lockdep_assert_held(&conf->mddev->reconfig_mutex);
|
||||
for (i = 0; i < conf->geo.raid_disks; i++) {
|
||||
rdev = conf->mirrors[i].rdev;
|
||||
if (rdev)
|
||||
@ -2190,7 +2139,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
err = 0;
|
||||
if (rdev->saved_raid_disk != mirror)
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
WRITE_ONCE(p->rdev, rdev);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -2204,7 +2153,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->replacement, rdev);
|
||||
WRITE_ONCE(p->replacement, rdev);
|
||||
}
|
||||
|
||||
print_conf(conf);
|
||||
@ -2246,24 +2195,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
*rdevp = NULL;
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
*rdevp = rdev;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
WRITE_ONCE(*rdevp, NULL);
|
||||
if (p->replacement) {
|
||||
/* We must have just cleared 'rdev' */
|
||||
p->rdev = p->replacement;
|
||||
WRITE_ONCE(p->rdev, p->replacement);
|
||||
clear_bit(Replacement, &p->replacement->flags);
|
||||
smp_mb(); /* Make sure other CPUs may see both as identical
|
||||
* but will never see neither -- if they are careful.
|
||||
*/
|
||||
p->replacement = NULL;
|
||||
WRITE_ONCE(p->replacement, NULL);
|
||||
}
|
||||
|
||||
clear_bit(WantReplacement, &rdev->flags);
|
||||
@ -2763,20 +2700,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
if (s > (PAGE_SIZE>>9))
|
||||
s = PAGE_SIZE >> 9;
|
||||
|
||||
rcu_read_lock();
|
||||
do {
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
d = r10_bio->devs[sl].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev &&
|
||||
test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags) &&
|
||||
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
|
||||
&first_bad, &bad_sectors) == 0) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
success = sync_page_io(rdev,
|
||||
r10_bio->devs[sl].addr +
|
||||
sect,
|
||||
@ -2784,7 +2719,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
conf->tmppage,
|
||||
REQ_OP_READ, false);
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
rcu_read_lock();
|
||||
if (success)
|
||||
break;
|
||||
}
|
||||
@ -2792,7 +2726,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
if (sl == conf->copies)
|
||||
sl = 0;
|
||||
} while (sl != slot);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!success) {
|
||||
/* Cannot read from anywhere, just mark the block
|
||||
@ -2816,20 +2749,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
|
||||
start = sl;
|
||||
/* write it back and re-read */
|
||||
rcu_read_lock();
|
||||
while (sl != slot) {
|
||||
if (sl==0)
|
||||
sl = conf->copies;
|
||||
sl--;
|
||||
d = r10_bio->devs[sl].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (!rdev ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
if (r10_sync_page_io(rdev,
|
||||
r10_bio->devs[sl].addr +
|
||||
sect,
|
||||
@ -2848,7 +2779,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
rdev->bdev);
|
||||
}
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
rcu_read_lock();
|
||||
}
|
||||
sl = start;
|
||||
while (sl != slot) {
|
||||
@ -2856,14 +2786,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
sl = conf->copies;
|
||||
sl--;
|
||||
d = r10_bio->devs[sl].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (!rdev ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
switch (r10_sync_page_io(rdev,
|
||||
r10_bio->devs[sl].addr +
|
||||
sect,
|
||||
@ -2891,9 +2820,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
}
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
sectors -= s;
|
||||
sect += s;
|
||||
@ -3367,14 +3294,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
/* Completed a full sync so the replacements
|
||||
* are now fully recovered.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->geo.raid_disks; i++) {
|
||||
struct md_rdev *rdev =
|
||||
rcu_dereference(conf->mirrors[i].replacement);
|
||||
conf->mirrors[i].replacement;
|
||||
|
||||
if (rdev)
|
||||
rdev->recovery_offset = MaxSector;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
conf->fullsync = 0;
|
||||
}
|
||||
@ -3455,9 +3381,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
struct raid10_info *mirror = &conf->mirrors[i];
|
||||
struct md_rdev *mrdev, *mreplace;
|
||||
|
||||
rcu_read_lock();
|
||||
mrdev = rcu_dereference(mirror->rdev);
|
||||
mreplace = rcu_dereference(mirror->replacement);
|
||||
mrdev = mirror->rdev;
|
||||
mreplace = mirror->replacement;
|
||||
|
||||
if (mrdev && (test_bit(Faulty, &mrdev->flags) ||
|
||||
test_bit(In_sync, &mrdev->flags)))
|
||||
@ -3465,22 +3390,18 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
if (mreplace && test_bit(Faulty, &mreplace->flags))
|
||||
mreplace = NULL;
|
||||
|
||||
if (!mrdev && !mreplace) {
|
||||
rcu_read_unlock();
|
||||
if (!mrdev && !mreplace)
|
||||
continue;
|
||||
}
|
||||
|
||||
still_degraded = 0;
|
||||
/* want to reconstruct this device */
|
||||
rb2 = r10_bio;
|
||||
sect = raid10_find_virt(conf, sector_nr, i);
|
||||
if (sect >= mddev->resync_max_sectors) {
|
||||
if (sect >= mddev->resync_max_sectors)
|
||||
/* last stripe is not complete - don't
|
||||
* try to recover this sector.
|
||||
*/
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
/* Unless we are doing a full sync, or a replacement
|
||||
* we only need to recover the block if it is set in
|
||||
* the bitmap
|
||||
@ -3496,14 +3417,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
* that there will never be anything to do here
|
||||
*/
|
||||
chunks_skipped = -1;
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
if (mrdev)
|
||||
atomic_inc(&mrdev->nr_pending);
|
||||
if (mreplace)
|
||||
atomic_inc(&mreplace->nr_pending);
|
||||
rcu_read_unlock();
|
||||
|
||||
r10_bio = raid10_alloc_init_r10buf(conf);
|
||||
r10_bio->state = 0;
|
||||
@ -3522,10 +3441,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
/* Need to check if the array will still be
|
||||
* degraded
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (j = 0; j < conf->geo.raid_disks; j++) {
|
||||
struct md_rdev *rdev = rcu_dereference(
|
||||
conf->mirrors[j].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[j].rdev;
|
||||
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
|
||||
still_degraded = 1;
|
||||
break;
|
||||
@ -3540,8 +3458,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
int k;
|
||||
int d = r10_bio->devs[j].devnum;
|
||||
sector_t from_addr, to_addr;
|
||||
struct md_rdev *rdev =
|
||||
rcu_dereference(conf->mirrors[d].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
sector_t sector, first_bad;
|
||||
int bad_sectors;
|
||||
if (!rdev ||
|
||||
@ -3620,7 +3537,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (j == conf->copies) {
|
||||
/* Cannot recover, so abort the recovery or
|
||||
* record a bad block */
|
||||
@ -3747,12 +3663,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
|
||||
bio = r10_bio->devs[i].bio;
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
}
|
||||
|
||||
sector = r10_bio->devs[i].addr;
|
||||
if (is_badblock(rdev, sector, max_sync,
|
||||
&first_bad, &bad_sectors)) {
|
||||
@ -3762,7 +3676,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bad_sectors -= (sector - first_bad);
|
||||
if (max_sync > bad_sectors)
|
||||
max_sync = bad_sectors;
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -3778,11 +3691,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
count++;
|
||||
|
||||
rdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
rdev = conf->mirrors[d].replacement;
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
}
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
|
||||
/* Need to set up for writing to the replacement */
|
||||
@ -3799,7 +3711,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio->bi_iter.bi_sector = sector + rdev->data_offset;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
count++;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
if (count < 2) {
|
||||
@ -4509,11 +4420,11 @@ static int calc_degraded(struct r10conf *conf)
|
||||
int degraded, degraded2;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
degraded = 0;
|
||||
/* 'prev' section first */
|
||||
for (i = 0; i < conf->prev.raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
degraded++;
|
||||
else if (!test_bit(In_sync, &rdev->flags))
|
||||
@ -4523,13 +4434,12 @@ static int calc_degraded(struct r10conf *conf)
|
||||
*/
|
||||
degraded++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (conf->geo.raid_disks == conf->prev.raid_disks)
|
||||
return degraded;
|
||||
rcu_read_lock();
|
||||
degraded2 = 0;
|
||||
for (i = 0; i < conf->geo.raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
degraded2++;
|
||||
else if (!test_bit(In_sync, &rdev->flags)) {
|
||||
@ -4542,7 +4452,6 @@ static int calc_degraded(struct r10conf *conf)
|
||||
degraded2++;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (degraded2 > degraded)
|
||||
return degraded2;
|
||||
return degraded;
|
||||
@ -4974,16 +4883,15 @@ read_more:
|
||||
blist = read_bio;
|
||||
read_bio->bi_next = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
for (s = 0; s < conf->copies*2; s++) {
|
||||
struct bio *b;
|
||||
int d = r10_bio->devs[s/2].devnum;
|
||||
struct md_rdev *rdev2;
|
||||
if (s&1) {
|
||||
rdev2 = rcu_dereference(conf->mirrors[d].replacement);
|
||||
rdev2 = conf->mirrors[d].replacement;
|
||||
b = r10_bio->devs[s/2].repl_bio;
|
||||
} else {
|
||||
rdev2 = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev2 = conf->mirrors[d].rdev;
|
||||
b = r10_bio->devs[s/2].bio;
|
||||
}
|
||||
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
|
||||
@ -5017,7 +4925,6 @@ read_more:
|
||||
sector_nr += len >> 9;
|
||||
nr_sectors += len >> 9;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
r10_bio->sectors = nr_sectors;
|
||||
|
||||
/* Now submit the read */
|
||||
@ -5070,20 +4977,17 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
struct bio *b;
|
||||
int d = r10_bio->devs[s/2].devnum;
|
||||
struct md_rdev *rdev;
|
||||
rcu_read_lock();
|
||||
if (s&1) {
|
||||
rdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
rdev = conf->mirrors[d].replacement;
|
||||
b = r10_bio->devs[s/2].repl_bio;
|
||||
} else {
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
b = r10_bio->devs[s/2].bio;
|
||||
}
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
}
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
md_sync_acct_bio(b, r10_bio->sectors);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
b->bi_next = NULL;
|
||||
@ -5154,10 +5058,9 @@ static int handle_reshape_read_error(struct mddev *mddev,
|
||||
if (s > (PAGE_SIZE >> 9))
|
||||
s = PAGE_SIZE >> 9;
|
||||
|
||||
rcu_read_lock();
|
||||
while (!success) {
|
||||
int d = r10b->devs[slot].devnum;
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
sector_t addr;
|
||||
if (rdev == NULL ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
@ -5166,14 +5069,12 @@ static int handle_reshape_read_error(struct mddev *mddev,
|
||||
|
||||
addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
success = sync_page_io(rdev,
|
||||
addr,
|
||||
s << 9,
|
||||
pages[idx],
|
||||
REQ_OP_READ, false);
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
rcu_read_lock();
|
||||
if (success)
|
||||
break;
|
||||
failed:
|
||||
@ -5183,7 +5084,6 @@ static int handle_reshape_read_error(struct mddev *mddev,
|
||||
if (slot == first_slot)
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (!success) {
|
||||
/* couldn't read this block, must give up */
|
||||
set_bit(MD_RECOVERY_INTR,
|
||||
@ -5209,12 +5109,8 @@ static void end_reshape_write(struct bio *bio)
|
||||
struct md_rdev *rdev = NULL;
|
||||
|
||||
d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
|
||||
if (repl)
|
||||
rdev = conf->mirrors[d].replacement;
|
||||
if (!rdev) {
|
||||
smp_mb();
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
}
|
||||
rdev = repl ? conf->mirrors[d].replacement :
|
||||
conf->mirrors[d].rdev;
|
||||
|
||||
if (bio->bi_status) {
|
||||
/* FIXME should record badblock */
|
||||
@ -5249,18 +5145,16 @@ static void raid10_finish_reshape(struct mddev *mddev)
|
||||
mddev->resync_max_sectors = mddev->array_sectors;
|
||||
} else {
|
||||
int d;
|
||||
rcu_read_lock();
|
||||
for (d = conf->geo.raid_disks ;
|
||||
d < conf->geo.raid_disks - mddev->delta_disks;
|
||||
d++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
rdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
rdev = conf->mirrors[d].replacement;
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
mddev->layout = mddev->new_layout;
|
||||
mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
|
||||
|
@ -1890,28 +1890,22 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
|
||||
continue;
|
||||
|
||||
/* in case device is broken */
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->disks[disk_index].rdev);
|
||||
rdev = conf->disks[disk_index].rdev;
|
||||
if (rdev) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
sync_page_io(rdev, sh->sector, PAGE_SIZE,
|
||||
sh->dev[disk_index].page, REQ_OP_WRITE,
|
||||
false);
|
||||
rdev_dec_pending(rdev, rdev->mddev);
|
||||
rcu_read_lock();
|
||||
}
|
||||
rrdev = rcu_dereference(conf->disks[disk_index].replacement);
|
||||
rrdev = conf->disks[disk_index].replacement;
|
||||
if (rrdev) {
|
||||
atomic_inc(&rrdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
sync_page_io(rrdev, sh->sector, PAGE_SIZE,
|
||||
sh->dev[disk_index].page, REQ_OP_WRITE,
|
||||
false);
|
||||
rdev_dec_pending(rrdev, rrdev->mddev);
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
ctx->data_parity_stripes++;
|
||||
out:
|
||||
@ -2948,7 +2942,6 @@ bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
|
||||
if (!log)
|
||||
return false;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
tree_index = r5c_tree_index(conf, sect);
|
||||
slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
|
||||
return slot != NULL;
|
||||
|
@ -620,11 +620,9 @@ static void ppl_do_flush(struct ppl_io_unit *io)
|
||||
struct md_rdev *rdev;
|
||||
struct block_device *bdev = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||
bdev = rdev->bdev;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (bdev) {
|
||||
struct bio *bio;
|
||||
@ -882,9 +880,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||
(unsigned long long)r_sector, dd_idx,
|
||||
(unsigned long long)sector);
|
||||
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
rdev = rcu_dereference_protected(
|
||||
conf->disks[dd_idx].rdev, 1);
|
||||
rdev = conf->disks[dd_idx].rdev;
|
||||
if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
|
||||
sector >= rdev->recovery_offset)) {
|
||||
pr_debug("%s:%*s data member disk %d missing\n",
|
||||
@ -936,9 +932,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||
0, &disk, &sh);
|
||||
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
|
||||
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
parity_rdev = rcu_dereference_protected(
|
||||
conf->disks[sh.pd_idx].rdev, 1);
|
||||
parity_rdev = conf->disks[sh.pd_idx].rdev;
|
||||
|
||||
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
|
||||
pr_debug("%s:%*s write parity at sector %llu, disk %pg\n",
|
||||
@ -1404,9 +1398,7 @@ int ppl_init_log(struct r5conf *conf)
|
||||
|
||||
for (i = 0; i < ppl_conf->count; i++) {
|
||||
struct ppl_log *log = &ppl_conf->child_logs[i];
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
struct md_rdev *rdev =
|
||||
rcu_dereference_protected(conf->disks[i].rdev, 1);
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
|
||||
mutex_init(&log->io_mutex);
|
||||
spin_lock_init(&log->io_list_lock);
|
||||
|
@ -693,12 +693,12 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||
int degraded, degraded2;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
degraded = 0;
|
||||
for (i = 0; i < conf->previous_raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||
|
||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
||||
rdev = READ_ONCE(conf->disks[i].replacement);
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
degraded++;
|
||||
else if (test_bit(In_sync, &rdev->flags))
|
||||
@ -716,15 +716,14 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||
if (conf->raid_disks >= conf->previous_raid_disks)
|
||||
degraded++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (conf->raid_disks == conf->previous_raid_disks)
|
||||
return degraded;
|
||||
rcu_read_lock();
|
||||
degraded2 = 0;
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||
|
||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
||||
rdev = READ_ONCE(conf->disks[i].replacement);
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
degraded2++;
|
||||
else if (test_bit(In_sync, &rdev->flags))
|
||||
@ -738,7 +737,6 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||
if (conf->raid_disks <= conf->previous_raid_disks)
|
||||
degraded2++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (degraded2 > degraded)
|
||||
return degraded2;
|
||||
return degraded;
|
||||
@ -1183,14 +1181,8 @@ again:
|
||||
bi = &dev->req;
|
||||
rbi = &dev->rreq; /* For writing to replacement */
|
||||
|
||||
rcu_read_lock();
|
||||
rrdev = rcu_dereference(conf->disks[i].replacement);
|
||||
smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
if (!rdev) {
|
||||
rdev = rrdev;
|
||||
rrdev = NULL;
|
||||
}
|
||||
rdev = conf->disks[i].rdev;
|
||||
rrdev = conf->disks[i].replacement;
|
||||
if (op_is_write(op)) {
|
||||
if (replace_only)
|
||||
rdev = NULL;
|
||||
@ -1211,7 +1203,6 @@ again:
|
||||
rrdev = NULL;
|
||||
if (rrdev)
|
||||
atomic_inc(&rrdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* We have already checked bad blocks for reads. Now
|
||||
* need to check for writes. We never accept write errors
|
||||
@ -2730,28 +2721,6 @@ static void shrink_stripes(struct r5conf *conf)
|
||||
conf->slab_cache = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This helper wraps rcu_dereference_protected() and can be used when
|
||||
* it is known that the nr_pending of the rdev is elevated.
|
||||
*/
|
||||
static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
|
||||
{
|
||||
return rcu_dereference_protected(rdev,
|
||||
atomic_read(&rcu_access_pointer(rdev)->nr_pending));
|
||||
}
|
||||
|
||||
/*
|
||||
* This helper wraps rcu_dereference_protected() and should be used
|
||||
* when it is known that the mddev_lock() is held. This is safe
|
||||
* seeing raid5_remove_disk() has the same lock held.
|
||||
*/
|
||||
static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
|
||||
struct md_rdev __rcu *rdev)
|
||||
{
|
||||
return rcu_dereference_protected(rdev,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
}
|
||||
|
||||
static void raid5_end_read_request(struct bio * bi)
|
||||
{
|
||||
struct stripe_head *sh = bi->bi_private;
|
||||
@ -2777,9 +2746,9 @@ static void raid5_end_read_request(struct bio * bi)
|
||||
* In that case it moved down to 'rdev'.
|
||||
* rdev is not removed until all requests are finished.
|
||||
*/
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (!rdev)
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
|
||||
if (use_new_offset(conf, sh))
|
||||
s = sh->sector + rdev->new_data_offset;
|
||||
@ -2892,11 +2861,11 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
|
||||
for (i = 0 ; i < disks; i++) {
|
||||
if (bi == &sh->dev[i].req) {
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
break;
|
||||
}
|
||||
if (bi == &sh->dev[i].rreq) {
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (rdev)
|
||||
replacement = 1;
|
||||
else
|
||||
@ -2904,7 +2873,7 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
* replaced it. rdev is not removed
|
||||
* until all requests are finished.
|
||||
*/
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -3666,15 +3635,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
int bitmap_end = 0;
|
||||
|
||||
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
||||
struct md_rdev *rdev;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
|
||||
if (rdev && test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
else
|
||||
rdev = NULL;
|
||||
rcu_read_unlock();
|
||||
if (rdev) {
|
||||
if (!rdev_set_badblocks(
|
||||
rdev,
|
||||
@ -3792,16 +3759,17 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||
/* During recovery devices cannot be removed, so
|
||||
* locking and refcounting of rdevs is not needed
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0))
|
||||
abort = 1;
|
||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
||||
rdev = conf->disks[i].replacement;
|
||||
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
@ -3809,7 +3777,6 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||
RAID5_STRIPE_SECTORS(conf), 0))
|
||||
abort = 1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (abort)
|
||||
conf->recovery_disabled =
|
||||
conf->mddev->recovery_disabled;
|
||||
@ -3822,15 +3789,13 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
|
||||
struct md_rdev *rdev;
|
||||
int rv = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement);
|
||||
rdev = sh->raid_conf->disks[disk_idx].replacement;
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& (rdev->recovery_offset <= sh->sector
|
||||
|| rdev->mddev->recovery_cp <= sh->sector))
|
||||
rv = 1;
|
||||
rcu_read_unlock();
|
||||
return rv;
|
||||
}
|
||||
|
||||
@ -4707,7 +4672,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
s->log_failed = r5l_log_disk_error(conf);
|
||||
|
||||
/* Now to look around and see what can be done */
|
||||
rcu_read_lock();
|
||||
for (i=disks; i--; ) {
|
||||
struct md_rdev *rdev;
|
||||
sector_t first_bad;
|
||||
@ -4752,7 +4716,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
/* Prefer to use the replacement for reads, but only
|
||||
* if it is recovered enough and has no bad blocks.
|
||||
*/
|
||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags) &&
|
||||
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
|
||||
!is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
@ -4763,7 +4727,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
set_bit(R5_NeedReplace, &dev->flags);
|
||||
else
|
||||
clear_bit(R5_NeedReplace, &dev->flags);
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
clear_bit(R5_ReadRepl, &dev->flags);
|
||||
}
|
||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||
@ -4810,8 +4774,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
if (test_bit(R5_WriteError, &dev->flags)) {
|
||||
/* This flag does not apply to '.replacement'
|
||||
* only to .rdev, so make sure to check that*/
|
||||
struct md_rdev *rdev2 = rcu_dereference(
|
||||
conf->disks[i].rdev);
|
||||
struct md_rdev *rdev2 = conf->disks[i].rdev;
|
||||
|
||||
if (rdev2 == rdev)
|
||||
clear_bit(R5_Insync, &dev->flags);
|
||||
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||
@ -4823,8 +4787,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
if (test_bit(R5_MadeGood, &dev->flags)) {
|
||||
/* This flag does not apply to '.replacement'
|
||||
* only to .rdev, so make sure to check that*/
|
||||
struct md_rdev *rdev2 = rcu_dereference(
|
||||
conf->disks[i].rdev);
|
||||
struct md_rdev *rdev2 = conf->disks[i].rdev;
|
||||
|
||||
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||
s->handle_bad_blocks = 1;
|
||||
atomic_inc(&rdev2->nr_pending);
|
||||
@ -4832,8 +4796,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
clear_bit(R5_MadeGood, &dev->flags);
|
||||
}
|
||||
if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||
struct md_rdev *rdev2 = rcu_dereference(
|
||||
conf->disks[i].replacement);
|
||||
struct md_rdev *rdev2 = conf->disks[i].replacement;
|
||||
|
||||
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||
s->handle_bad_blocks = 1;
|
||||
atomic_inc(&rdev2->nr_pending);
|
||||
@ -4854,8 +4818,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||
do_recovery = 1;
|
||||
else if (!rdev) {
|
||||
rdev = rcu_dereference(
|
||||
conf->disks[i].replacement);
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||
do_recovery = 1;
|
||||
}
|
||||
@ -4882,7 +4845,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
else
|
||||
s->replacing = 1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5339,23 +5301,23 @@ finish:
|
||||
struct r5dev *dev = &sh->dev[i];
|
||||
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
|
||||
/* We own a safe reference to the rdev */
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
if (!rdev_set_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (!rdev)
|
||||
/* rdev have been moved down */
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
@ -5514,24 +5476,22 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
&dd_idx, NULL);
|
||||
end_sector = sector + bio_sectors(raid_bio);
|
||||
|
||||
rcu_read_lock();
|
||||
if (r5c_big_stripe_cached(conf, sector))
|
||||
goto out_rcu_unlock;
|
||||
return 0;
|
||||
|
||||
rdev = rcu_dereference(conf->disks[dd_idx].replacement);
|
||||
rdev = conf->disks[dd_idx].replacement;
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||
rdev->recovery_offset < end_sector) {
|
||||
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
|
||||
rdev = conf->disks[dd_idx].rdev;
|
||||
if (!rdev)
|
||||
goto out_rcu_unlock;
|
||||
return 0;
|
||||
if (test_bit(Faulty, &rdev->flags) ||
|
||||
!(test_bit(In_sync, &rdev->flags) ||
|
||||
rdev->recovery_offset >= end_sector))
|
||||
goto out_rcu_unlock;
|
||||
return 0;
|
||||
}
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
|
||||
&bad_sectors)) {
|
||||
@ -5575,10 +5535,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
raid_bio->bi_iter.bi_sector);
|
||||
submit_bio_noacct(align_bio);
|
||||
return 1;
|
||||
|
||||
out_rcu_unlock:
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
|
||||
@ -6581,14 +6537,12 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
|
||||
* Note in case of > 1 drive failures it's possible we're rebuilding
|
||||
* one drive while leaving another faulty drive in array.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
|
||||
still_degraded = 1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
|
||||
|
||||
@ -7899,18 +7853,10 @@ static int raid5_run(struct mddev *mddev)
|
||||
|
||||
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
|
||||
i++) {
|
||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
||||
if (!rdev && conf->disks[i].replacement) {
|
||||
/* The replacement is all we have yet */
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[i].replacement);
|
||||
conf->disks[i].replacement = NULL;
|
||||
clear_bit(Replacement, &rdev->flags);
|
||||
rcu_assign_pointer(conf->disks[i].rdev, rdev);
|
||||
}
|
||||
rdev = conf->disks[i].rdev;
|
||||
if (!rdev)
|
||||
continue;
|
||||
if (rcu_access_pointer(conf->disks[i].replacement) &&
|
||||
if (conf->disks[i].replacement &&
|
||||
conf->reshape_progress != MaxSector) {
|
||||
/* replacements and reshape simply do not mix. */
|
||||
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
|
||||
@ -8094,15 +8040,16 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
|
||||
struct r5conf *conf = mddev->private;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&mddev->lock);
|
||||
|
||||
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
|
||||
conf->chunk_sectors / 2, mddev->layout);
|
||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||
|
||||
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_printf (seq, "]");
|
||||
}
|
||||
|
||||
@ -8140,9 +8087,8 @@ static int raid5_spare_active(struct mddev *mddev)
|
||||
unsigned long flags;
|
||||
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
||||
replacement = rdev_mdlock_deref(mddev,
|
||||
conf->disks[i].replacement);
|
||||
rdev = conf->disks[i].rdev;
|
||||
replacement = conf->disks[i].replacement;
|
||||
if (replacement
|
||||
&& replacement->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &replacement->flags)
|
||||
@ -8181,7 +8127,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
struct r5conf *conf = mddev->private;
|
||||
int err = 0;
|
||||
int number = rdev->raid_disk;
|
||||
struct md_rdev __rcu **rdevp;
|
||||
struct md_rdev **rdevp;
|
||||
struct disk_info *p;
|
||||
struct md_rdev *tmp;
|
||||
|
||||
@ -8204,9 +8150,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (unlikely(number >= conf->pool_size))
|
||||
return 0;
|
||||
p = conf->disks + number;
|
||||
if (rdev == rcu_access_pointer(p->rdev))
|
||||
if (rdev == p->rdev)
|
||||
rdevp = &p->rdev;
|
||||
else if (rdev == rcu_access_pointer(p->replacement))
|
||||
else if (rdev == p->replacement)
|
||||
rdevp = &p->replacement;
|
||||
else
|
||||
return 0;
|
||||
@ -8226,37 +8172,24 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||
!has_failed(conf) &&
|
||||
(!rcu_access_pointer(p->replacement) ||
|
||||
rcu_access_pointer(p->replacement) == rdev) &&
|
||||
(!p->replacement || p->replacement == rdev) &&
|
||||
number < conf->raid_disks) {
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
*rdevp = NULL;
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
rcu_assign_pointer(*rdevp, rdev);
|
||||
}
|
||||
}
|
||||
WRITE_ONCE(*rdevp, NULL);
|
||||
if (!err) {
|
||||
err = log_modify(conf, rdev, false);
|
||||
if (err)
|
||||
goto abort;
|
||||
}
|
||||
|
||||
tmp = rcu_access_pointer(p->replacement);
|
||||
tmp = p->replacement;
|
||||
if (tmp) {
|
||||
/* We must have just cleared 'rdev' */
|
||||
rcu_assign_pointer(p->rdev, tmp);
|
||||
WRITE_ONCE(p->rdev, tmp);
|
||||
clear_bit(Replacement, &tmp->flags);
|
||||
smp_mb(); /* Make sure other CPUs may see both as identical
|
||||
* but will never see neither - if they are careful
|
||||
*/
|
||||
rcu_assign_pointer(p->replacement, NULL);
|
||||
WRITE_ONCE(p->replacement, NULL);
|
||||
|
||||
if (!err)
|
||||
err = log_modify(conf, tmp, true);
|
||||
@ -8324,7 +8257,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
rdev->raid_disk = disk;
|
||||
if (rdev->saved_raid_disk != disk)
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
WRITE_ONCE(p->rdev, rdev);
|
||||
|
||||
err = log_modify(conf, rdev, true);
|
||||
|
||||
@ -8333,7 +8266,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
}
|
||||
for (disk = first; disk <= last; disk++) {
|
||||
p = conf->disks + disk;
|
||||
tmp = rdev_mdlock_deref(mddev, p->rdev);
|
||||
tmp = p->rdev;
|
||||
if (test_bit(WantReplacement, &tmp->flags) &&
|
||||
mddev->reshape_position == MaxSector &&
|
||||
p->replacement == NULL) {
|
||||
@ -8342,7 +8275,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
rdev->raid_disk = disk;
|
||||
err = 0;
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->replacement, rdev);
|
||||
WRITE_ONCE(p->replacement, rdev);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -8475,7 +8408,7 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
if (mddev->recovery_cp < MaxSector)
|
||||
return -EBUSY;
|
||||
for (i = 0; i < conf->raid_disks; i++)
|
||||
if (rdev_mdlock_deref(mddev, conf->disks[i].replacement))
|
||||
if (conf->disks[i].replacement)
|
||||
return -EBUSY;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
@ -8646,12 +8579,10 @@ static void raid5_finish_reshape(struct mddev *mddev)
|
||||
for (d = conf->raid_disks ;
|
||||
d < conf->raid_disks - mddev->delta_disks;
|
||||
d++) {
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[d].rdev);
|
||||
rdev = conf->disks[d].rdev;
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[d].replacement);
|
||||
rdev = conf->disks[d].replacement;
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
|
@ -473,8 +473,8 @@ enum {
|
||||
*/
|
||||
|
||||
struct disk_info {
|
||||
struct md_rdev __rcu *rdev;
|
||||
struct md_rdev __rcu *replacement;
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *replacement;
|
||||
struct page *extra_page; /* extra page to use in prexor */
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user