Merge tag 'md-next-20231219' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.8/block
Pull MD updates from Song: "1. Remove deprecated flavors, by Song Liu; 2. raid1 read error check support, by Li Nan; 3. Better handle events off-by-1 case, by Alex Lyakas." * tag 'md-next-20231219' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: md: Remove deprecated CONFIG_MD_FAULTY md: Remove deprecated CONFIG_MD_MULTIPATH md: Remove deprecated CONFIG_MD_LINEAR md/raid1: support read error check md: factor out a helper exceed_read_errors() to check read_errors md: Whenassemble the array, consult the superblock of the freshest device md/raid1: remove unnecessary null checking
This commit is contained in:
commit
0bd7c5d802
@ -61,19 +61,6 @@ config MD_BITMAP_FILE
|
||||
various kernel APIs and can only work with files on a file system not
|
||||
actually sitting on the MD device.
|
||||
|
||||
config MD_LINEAR
|
||||
tristate "Linear (append) mode (deprecated)"
|
||||
depends on BLK_DEV_MD
|
||||
help
|
||||
If you say Y here, then your multiple devices driver will be able to
|
||||
use the so-called linear mode, i.e. it will combine the hard disk
|
||||
partitions by simply appending one to the other.
|
||||
|
||||
To compile this as a module, choose M here: the module
|
||||
will be called linear.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MD_RAID0
|
||||
tristate "RAID-0 (striping) mode"
|
||||
depends on BLK_DEV_MD
|
||||
@ -172,27 +159,6 @@ config MD_RAID456
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MD_MULTIPATH
|
||||
tristate "Multipath I/O support (deprecated)"
|
||||
depends on BLK_DEV_MD
|
||||
help
|
||||
MD_MULTIPATH provides a simple multi-path personality for use
|
||||
the MD framework. It is not under active development. New
|
||||
projects should consider using DM_MULTIPATH which has more
|
||||
features and more testing.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config MD_FAULTY
|
||||
tristate "Faulty test module for MD (deprecated)"
|
||||
depends on BLK_DEV_MD
|
||||
help
|
||||
The "faulty" module allows for a block device that occasionally returns
|
||||
read or write errors. It is useful for testing.
|
||||
|
||||
In unsure, say N.
|
||||
|
||||
|
||||
config MD_CLUSTER
|
||||
tristate "Cluster Support for MD"
|
||||
depends on BLK_DEV_MD
|
||||
|
@ -29,22 +29,16 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
|
||||
|
||||
md-mod-y += md.o md-bitmap.o
|
||||
raid456-y += raid5.o raid5-cache.o raid5-ppl.o
|
||||
linear-y += md-linear.o
|
||||
multipath-y += md-multipath.o
|
||||
faulty-y += md-faulty.o
|
||||
|
||||
# Note: link order is important. All raid personalities
|
||||
# and must come before md.o, as they each initialise
|
||||
# themselves, and md.o may use the personalities when it
|
||||
# and must come before md.o, as they each initialise
|
||||
# themselves, and md.o may use the personalities when it
|
||||
# auto-initialised.
|
||||
|
||||
obj-$(CONFIG_MD_LINEAR) += linear.o
|
||||
obj-$(CONFIG_MD_RAID0) += raid0.o
|
||||
obj-$(CONFIG_MD_RAID1) += raid1.o
|
||||
obj-$(CONFIG_MD_RAID10) += raid10.o
|
||||
obj-$(CONFIG_MD_RAID456) += raid456.o
|
||||
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
|
||||
obj-$(CONFIG_MD_FAULTY) += faulty.o
|
||||
obj-$(CONFIG_MD_CLUSTER) += md-cluster.o
|
||||
obj-$(CONFIG_BCACHE) += bcache/
|
||||
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
|
||||
|
@ -49,7 +49,6 @@ static int md_setup_ents __initdata;
|
||||
* instead of just one. -- KTK
|
||||
* 18May2000: Added support for persistent-superblock arrays:
|
||||
* md=n,0,factor,fault,device-list uses RAID0 for device n
|
||||
* md=n,-1,factor,fault,device-list uses LINEAR for device n
|
||||
* md=n,device-list reads a RAID superblock from the devices
|
||||
* elements in device-list are read by name_to_kdev_t so can be
|
||||
* a hex number or something like /dev/hda1 /dev/sdb
|
||||
@ -88,7 +87,7 @@ static int __init md_setup(char *str)
|
||||
md_setup_ents++;
|
||||
switch (get_option(&str, &level)) { /* RAID level */
|
||||
case 2: /* could be 0 or -1.. */
|
||||
if (level == 0 || level == LEVEL_LINEAR) {
|
||||
if (level == 0) {
|
||||
if (get_option(&str, &factor) != 2 || /* Chunk Size */
|
||||
get_option(&str, &fault) != 2) {
|
||||
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
|
||||
@ -96,10 +95,7 @@ static int __init md_setup(char *str)
|
||||
}
|
||||
md_setup_args[ent].level = level;
|
||||
md_setup_args[ent].chunk = 1 << (factor+12);
|
||||
if (level == LEVEL_LINEAR)
|
||||
pername = "linear";
|
||||
else
|
||||
pername = "raid0";
|
||||
pername = "raid0";
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
|
@ -1,365 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* faulty.c : Multiple Devices driver for Linux
|
||||
*
|
||||
* Copyright (C) 2004 Neil Brown
|
||||
*
|
||||
* fautly-device-simulator personality for md
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* The "faulty" personality causes some requests to fail.
|
||||
*
|
||||
* Possible failure modes are:
|
||||
* reads fail "randomly" but succeed on retry
|
||||
* writes fail "randomly" but succeed on retry
|
||||
* reads for some address fail and then persist until a write
|
||||
* reads for some address fail and then persist irrespective of write
|
||||
* writes for some address fail and persist
|
||||
* all writes fail
|
||||
*
|
||||
* Different modes can be active at a time, but only
|
||||
* one can be set at array creation. Others can be added later.
|
||||
* A mode can be one-shot or recurrent with the recurrence being
|
||||
* once in every N requests.
|
||||
* The bottom 5 bits of the "layout" indicate the mode. The
|
||||
* remainder indicate a period, or 0 for one-shot.
|
||||
*
|
||||
* There is an implementation limit on the number of concurrently
|
||||
* persisting-faulty blocks. When a new fault is requested that would
|
||||
* exceed the limit, it is ignored.
|
||||
* All current faults can be clear using a layout of "0".
|
||||
*
|
||||
* Requests are always sent to the device. If they are to fail,
|
||||
* we clone the bio and insert a new b_end_io into the chain.
|
||||
*/
|
||||
|
||||
#define WriteTransient 0
|
||||
#define ReadTransient 1
|
||||
#define WritePersistent 2
|
||||
#define ReadPersistent 3
|
||||
#define WriteAll 4 /* doesn't go to device */
|
||||
#define ReadFixable 5
|
||||
#define Modes 6
|
||||
|
||||
#define ClearErrors 31
|
||||
#define ClearFaults 30
|
||||
|
||||
#define AllPersist 100 /* internal use only */
|
||||
#define NoPersist 101
|
||||
|
||||
#define ModeMask 0x1f
|
||||
#define ModeShift 5
|
||||
|
||||
#define MaxFault 50
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
#include <linux/slab.h>
|
||||
#include "md.h"
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
|
||||
static void faulty_fail(struct bio *bio)
|
||||
{
|
||||
struct bio *b = bio->bi_private;
|
||||
|
||||
b->bi_iter.bi_size = bio->bi_iter.bi_size;
|
||||
b->bi_iter.bi_sector = bio->bi_iter.bi_sector;
|
||||
|
||||
bio_put(bio);
|
||||
|
||||
bio_io_error(b);
|
||||
}
|
||||
|
||||
struct faulty_conf {
|
||||
int period[Modes];
|
||||
atomic_t counters[Modes];
|
||||
sector_t faults[MaxFault];
|
||||
int modes[MaxFault];
|
||||
int nfaults;
|
||||
struct md_rdev *rdev;
|
||||
};
|
||||
|
||||
static int check_mode(struct faulty_conf *conf, int mode)
|
||||
{
|
||||
if (conf->period[mode] == 0 &&
|
||||
atomic_read(&conf->counters[mode]) <= 0)
|
||||
return 0; /* no failure, no decrement */
|
||||
|
||||
|
||||
if (atomic_dec_and_test(&conf->counters[mode])) {
|
||||
if (conf->period[mode])
|
||||
atomic_set(&conf->counters[mode], conf->period[mode]);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_sector(struct faulty_conf *conf, sector_t start, sector_t end, int dir)
|
||||
{
|
||||
/* If we find a ReadFixable sector, we fix it ... */
|
||||
int i;
|
||||
for (i=0; i<conf->nfaults; i++)
|
||||
if (conf->faults[i] >= start &&
|
||||
conf->faults[i] < end) {
|
||||
/* found it ... */
|
||||
switch (conf->modes[i] * 2 + dir) {
|
||||
case WritePersistent*2+WRITE: return 1;
|
||||
case ReadPersistent*2+READ: return 1;
|
||||
case ReadFixable*2+READ: return 1;
|
||||
case ReadFixable*2+WRITE:
|
||||
conf->modes[i] = NoPersist;
|
||||
return 0;
|
||||
case AllPersist*2+READ:
|
||||
case AllPersist*2+WRITE: return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
|
||||
{
|
||||
int i;
|
||||
int n = conf->nfaults;
|
||||
for (i=0; i<conf->nfaults; i++)
|
||||
if (conf->faults[i] == start) {
|
||||
switch(mode) {
|
||||
case NoPersist: conf->modes[i] = mode; return;
|
||||
case WritePersistent:
|
||||
if (conf->modes[i] == ReadPersistent ||
|
||||
conf->modes[i] == ReadFixable)
|
||||
conf->modes[i] = AllPersist;
|
||||
else
|
||||
conf->modes[i] = WritePersistent;
|
||||
return;
|
||||
case ReadPersistent:
|
||||
if (conf->modes[i] == WritePersistent)
|
||||
conf->modes[i] = AllPersist;
|
||||
else
|
||||
conf->modes[i] = ReadPersistent;
|
||||
return;
|
||||
case ReadFixable:
|
||||
if (conf->modes[i] == WritePersistent ||
|
||||
conf->modes[i] == ReadPersistent)
|
||||
conf->modes[i] = AllPersist;
|
||||
else
|
||||
conf->modes[i] = ReadFixable;
|
||||
return;
|
||||
}
|
||||
} else if (conf->modes[i] == NoPersist)
|
||||
n = i;
|
||||
|
||||
if (n >= MaxFault)
|
||||
return;
|
||||
conf->faults[n] = start;
|
||||
conf->modes[n] = mode;
|
||||
if (conf->nfaults == n)
|
||||
conf->nfaults = n+1;
|
||||
}
|
||||
|
||||
static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct faulty_conf *conf = mddev->private;
|
||||
int failit = 0;
|
||||
|
||||
if (bio_data_dir(bio) == WRITE) {
|
||||
/* write request */
|
||||
if (atomic_read(&conf->counters[WriteAll])) {
|
||||
/* special case - don't decrement, don't submit_bio_noacct,
|
||||
* just fail immediately
|
||||
*/
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (check_sector(conf, bio->bi_iter.bi_sector,
|
||||
bio_end_sector(bio), WRITE))
|
||||
failit = 1;
|
||||
if (check_mode(conf, WritePersistent)) {
|
||||
add_sector(conf, bio->bi_iter.bi_sector,
|
||||
WritePersistent);
|
||||
failit = 1;
|
||||
}
|
||||
if (check_mode(conf, WriteTransient))
|
||||
failit = 1;
|
||||
} else {
|
||||
/* read request */
|
||||
if (check_sector(conf, bio->bi_iter.bi_sector,
|
||||
bio_end_sector(bio), READ))
|
||||
failit = 1;
|
||||
if (check_mode(conf, ReadTransient))
|
||||
failit = 1;
|
||||
if (check_mode(conf, ReadPersistent)) {
|
||||
add_sector(conf, bio->bi_iter.bi_sector,
|
||||
ReadPersistent);
|
||||
failit = 1;
|
||||
}
|
||||
if (check_mode(conf, ReadFixable)) {
|
||||
add_sector(conf, bio->bi_iter.bi_sector,
|
||||
ReadFixable);
|
||||
failit = 1;
|
||||
}
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
if (failit) {
|
||||
struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
|
||||
b->bi_private = bio;
|
||||
b->bi_end_io = faulty_fail;
|
||||
bio = b;
|
||||
} else
|
||||
bio_set_dev(bio, conf->rdev->bdev);
|
||||
|
||||
submit_bio_noacct(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void faulty_status(struct seq_file *seq, struct mddev *mddev)
|
||||
{
|
||||
struct faulty_conf *conf = mddev->private;
|
||||
int n;
|
||||
|
||||
if ((n=atomic_read(&conf->counters[WriteTransient])) != 0)
|
||||
seq_printf(seq, " WriteTransient=%d(%d)",
|
||||
n, conf->period[WriteTransient]);
|
||||
|
||||
if ((n=atomic_read(&conf->counters[ReadTransient])) != 0)
|
||||
seq_printf(seq, " ReadTransient=%d(%d)",
|
||||
n, conf->period[ReadTransient]);
|
||||
|
||||
if ((n=atomic_read(&conf->counters[WritePersistent])) != 0)
|
||||
seq_printf(seq, " WritePersistent=%d(%d)",
|
||||
n, conf->period[WritePersistent]);
|
||||
|
||||
if ((n=atomic_read(&conf->counters[ReadPersistent])) != 0)
|
||||
seq_printf(seq, " ReadPersistent=%d(%d)",
|
||||
n, conf->period[ReadPersistent]);
|
||||
|
||||
|
||||
if ((n=atomic_read(&conf->counters[ReadFixable])) != 0)
|
||||
seq_printf(seq, " ReadFixable=%d(%d)",
|
||||
n, conf->period[ReadFixable]);
|
||||
|
||||
if ((n=atomic_read(&conf->counters[WriteAll])) != 0)
|
||||
seq_printf(seq, " WriteAll");
|
||||
|
||||
seq_printf(seq, " nfaults=%d", conf->nfaults);
|
||||
}
|
||||
|
||||
|
||||
static int faulty_reshape(struct mddev *mddev)
|
||||
{
|
||||
int mode = mddev->new_layout & ModeMask;
|
||||
int count = mddev->new_layout >> ModeShift;
|
||||
struct faulty_conf *conf = mddev->private;
|
||||
|
||||
if (mddev->new_layout < 0)
|
||||
return 0;
|
||||
|
||||
/* new layout */
|
||||
if (mode == ClearFaults)
|
||||
conf->nfaults = 0;
|
||||
else if (mode == ClearErrors) {
|
||||
int i;
|
||||
for (i=0 ; i < Modes ; i++) {
|
||||
conf->period[i] = 0;
|
||||
atomic_set(&conf->counters[i], 0);
|
||||
}
|
||||
} else if (mode < Modes) {
|
||||
conf->period[mode] = count;
|
||||
if (!count) count++;
|
||||
atomic_set(&conf->counters[mode], count);
|
||||
} else
|
||||
return -EINVAL;
|
||||
mddev->new_layout = -1;
|
||||
mddev->layout = -1; /* makes sure further changes come through */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static sector_t faulty_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
||||
{
|
||||
WARN_ONCE(raid_disks,
|
||||
"%s does not support generic reshape\n", __func__);
|
||||
|
||||
if (sectors == 0)
|
||||
return mddev->dev_sectors;
|
||||
|
||||
return sectors;
|
||||
}
|
||||
|
||||
static int faulty_run(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
int i;
|
||||
struct faulty_conf *conf;
|
||||
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
|
||||
conf = kmalloc(sizeof(*conf), GFP_KERNEL);
|
||||
if (!conf)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i=0; i<Modes; i++) {
|
||||
atomic_set(&conf->counters[i], 0);
|
||||
conf->period[i] = 0;
|
||||
}
|
||||
conf->nfaults = 0;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
conf->rdev = rdev;
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
}
|
||||
|
||||
md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
|
||||
mddev->private = conf;
|
||||
|
||||
faulty_reshape(mddev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void faulty_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct faulty_conf *conf = priv;
|
||||
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
static struct md_personality faulty_personality =
|
||||
{
|
||||
.name = "faulty",
|
||||
.level = LEVEL_FAULTY,
|
||||
.owner = THIS_MODULE,
|
||||
.make_request = faulty_make_request,
|
||||
.run = faulty_run,
|
||||
.free = faulty_free,
|
||||
.status = faulty_status,
|
||||
.check_reshape = faulty_reshape,
|
||||
.size = faulty_size,
|
||||
};
|
||||
|
||||
static int __init raid_init(void)
|
||||
{
|
||||
return register_md_personality(&faulty_personality);
|
||||
}
|
||||
|
||||
static void raid_exit(void)
|
||||
{
|
||||
unregister_md_personality(&faulty_personality);
|
||||
}
|
||||
|
||||
module_init(raid_init);
|
||||
module_exit(raid_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)");
|
||||
MODULE_ALIAS("md-personality-10"); /* faulty */
|
||||
MODULE_ALIAS("md-faulty");
|
||||
MODULE_ALIAS("md-level--5");
|
@ -1,318 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
linear.c : Multiple Devices driver for Linux
|
||||
Copyright (C) 1994-96 Marc ZYNGIER
|
||||
<zyngier@ufr-info-p7.ibp.fr> or
|
||||
<maz@gloups.fdn.fr>
|
||||
|
||||
Linear mode management functions.
|
||||
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <trace/events/block.h>
|
||||
#include "md.h"
|
||||
#include "md-linear.h"
|
||||
|
||||
/*
|
||||
* find which device holds a particular offset
|
||||
*/
|
||||
static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
|
||||
{
|
||||
int lo, mid, hi;
|
||||
struct linear_conf *conf;
|
||||
|
||||
lo = 0;
|
||||
hi = mddev->raid_disks - 1;
|
||||
conf = mddev->private;
|
||||
|
||||
/*
|
||||
* Binary Search
|
||||
*/
|
||||
|
||||
while (hi > lo) {
|
||||
|
||||
mid = (hi + lo) / 2;
|
||||
if (sector < conf->disks[mid].end_sector)
|
||||
hi = mid;
|
||||
else
|
||||
lo = mid + 1;
|
||||
}
|
||||
|
||||
return conf->disks + lo;
|
||||
}
|
||||
|
||||
static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
sector_t array_sectors;
|
||||
|
||||
conf = mddev->private;
|
||||
WARN_ONCE(sectors || raid_disks,
|
||||
"%s does not support generic reshape\n", __func__);
|
||||
array_sectors = conf->array_sectors;
|
||||
|
||||
return array_sectors;
|
||||
}
|
||||
|
||||
static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
struct md_rdev *rdev;
|
||||
int i, cnt;
|
||||
|
||||
conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
|
||||
if (!conf)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* conf->raid_disks is copy of mddev->raid_disks. The reason to
|
||||
* keep a copy of mddev->raid_disks in struct linear_conf is,
|
||||
* mddev->raid_disks may not be consistent with pointers number of
|
||||
* conf->disks[] when it is updated in linear_add() and used to
|
||||
* iterate old conf->disks[] earray in linear_congested().
|
||||
* Here conf->raid_disks is always consitent with number of
|
||||
* pointers in conf->disks[] array, and mddev->private is updated
|
||||
* with rcu_assign_pointer() in linear_addr(), such race can be
|
||||
* avoided.
|
||||
*/
|
||||
conf->raid_disks = raid_disks;
|
||||
|
||||
cnt = 0;
|
||||
conf->array_sectors = 0;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
int j = rdev->raid_disk;
|
||||
struct dev_info *disk = conf->disks + j;
|
||||
sector_t sectors;
|
||||
|
||||
if (j < 0 || j >= raid_disks || disk->rdev) {
|
||||
pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
|
||||
mdname(mddev));
|
||||
goto out;
|
||||
}
|
||||
|
||||
disk->rdev = rdev;
|
||||
if (mddev->chunk_sectors) {
|
||||
sectors = rdev->sectors;
|
||||
sector_div(sectors, mddev->chunk_sectors);
|
||||
rdev->sectors = sectors * mddev->chunk_sectors;
|
||||
}
|
||||
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
|
||||
conf->array_sectors += rdev->sectors;
|
||||
cnt++;
|
||||
}
|
||||
if (cnt != raid_disks) {
|
||||
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
|
||||
mdname(mddev));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we calculate the device offsets.
|
||||
*/
|
||||
conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
|
||||
|
||||
for (i = 1; i < raid_disks; i++)
|
||||
conf->disks[i].end_sector =
|
||||
conf->disks[i-1].end_sector +
|
||||
conf->disks[i].rdev->sectors;
|
||||
|
||||
return conf;
|
||||
|
||||
out:
|
||||
kfree(conf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int linear_run (struct mddev *mddev)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
int ret;
|
||||
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
conf = linear_conf(mddev, mddev->raid_disks);
|
||||
|
||||
if (!conf)
|
||||
return 1;
|
||||
mddev->private = conf;
|
||||
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret) {
|
||||
kfree(conf);
|
||||
mddev->private = NULL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
/* Adding a drive to a linear array allows the array to grow.
|
||||
* It is permitted if the new drive has a matching superblock
|
||||
* already on it, with raid_disk equal to raid_disks.
|
||||
* It is achieved by creating a new linear_private_data structure
|
||||
* and swapping it in in-place of the current one.
|
||||
* The current one is never freed until the array is stopped.
|
||||
* This avoids races.
|
||||
*/
|
||||
struct linear_conf *newconf, *oldconf;
|
||||
|
||||
if (rdev->saved_raid_disk != mddev->raid_disks)
|
||||
return -EINVAL;
|
||||
|
||||
rdev->raid_disk = rdev->saved_raid_disk;
|
||||
rdev->saved_raid_disk = -1;
|
||||
|
||||
newconf = linear_conf(mddev,mddev->raid_disks+1);
|
||||
|
||||
if (!newconf)
|
||||
return -ENOMEM;
|
||||
|
||||
/* newconf->raid_disks already keeps a copy of * the increased
|
||||
* value of mddev->raid_disks, WARN_ONCE() is just used to make
|
||||
* sure of this. It is possible that oldconf is still referenced
|
||||
* in linear_congested(), therefore kfree_rcu() is used to free
|
||||
* oldconf until no one uses it anymore.
|
||||
*/
|
||||
oldconf = rcu_dereference_protected(mddev->private,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
mddev->raid_disks++;
|
||||
WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
|
||||
"copied raid_disks doesn't match mddev->raid_disks");
|
||||
rcu_assign_pointer(mddev->private, newconf);
|
||||
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
||||
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
|
||||
kfree_rcu(oldconf, rcu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void linear_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct linear_conf *conf = priv;
|
||||
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct dev_info *tmp_dev;
|
||||
sector_t start_sector, end_sector, data_offset;
|
||||
sector_t bio_sector = bio->bi_iter.bi_sector;
|
||||
|
||||
if (unlikely(bio->bi_opf & REQ_PREFLUSH)
|
||||
&& md_flush_request(mddev, bio))
|
||||
return true;
|
||||
|
||||
tmp_dev = which_dev(mddev, bio_sector);
|
||||
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
|
||||
end_sector = tmp_dev->end_sector;
|
||||
data_offset = tmp_dev->rdev->data_offset;
|
||||
|
||||
if (unlikely(bio_sector >= end_sector ||
|
||||
bio_sector < start_sector))
|
||||
goto out_of_bounds;
|
||||
|
||||
if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
|
||||
md_error(mddev, tmp_dev->rdev);
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(bio_end_sector(bio) > end_sector)) {
|
||||
/* This bio crosses a device boundary, so we have to split it */
|
||||
struct bio *split = bio_split(bio, end_sector - bio_sector,
|
||||
GFP_NOIO, &mddev->bio_set);
|
||||
bio_chain(split, bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
bio_set_dev(bio, tmp_dev->rdev->bdev);
|
||||
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
|
||||
start_sector + data_offset;
|
||||
|
||||
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!bdev_max_discard_sectors(bio->bi_bdev))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
} else {
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
|
||||
bio_sector);
|
||||
mddev_check_write_zeroes(mddev, bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
return true;
|
||||
|
||||
out_of_bounds:
|
||||
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n",
|
||||
mdname(mddev),
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
tmp_dev->rdev->bdev,
|
||||
(unsigned long long)tmp_dev->rdev->sectors,
|
||||
(unsigned long long)start_sector);
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void linear_status (struct seq_file *seq, struct mddev *mddev)
|
||||
{
|
||||
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
|
||||
}
|
||||
|
||||
static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
|
||||
char *md_name = mdname(mddev);
|
||||
|
||||
pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
|
||||
md_name, rdev->bdev);
|
||||
}
|
||||
}
|
||||
|
||||
static void linear_quiesce(struct mddev *mddev, int state)
|
||||
{
|
||||
}
|
||||
|
||||
static struct md_personality linear_personality =
|
||||
{
|
||||
.name = "linear",
|
||||
.level = LEVEL_LINEAR,
|
||||
.owner = THIS_MODULE,
|
||||
.make_request = linear_make_request,
|
||||
.run = linear_run,
|
||||
.free = linear_free,
|
||||
.status = linear_status,
|
||||
.hot_add_disk = linear_add,
|
||||
.size = linear_size,
|
||||
.quiesce = linear_quiesce,
|
||||
.error_handler = linear_error,
|
||||
};
|
||||
|
||||
static int __init linear_init (void)
|
||||
{
|
||||
return register_md_personality (&linear_personality);
|
||||
}
|
||||
|
||||
static void linear_exit (void)
|
||||
{
|
||||
unregister_md_personality (&linear_personality);
|
||||
}
|
||||
|
||||
module_init(linear_init);
|
||||
module_exit(linear_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
|
||||
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
|
||||
MODULE_ALIAS("md-linear");
|
||||
MODULE_ALIAS("md-level--1");
|
@ -1,463 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* multipath.c : Multiple Devices driver for Linux
|
||||
*
|
||||
* Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
|
||||
*
|
||||
* Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
|
||||
*
|
||||
* MULTIPATH management functions.
|
||||
*
|
||||
* derived from raid1.c.
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/slab.h>
|
||||
#include "md.h"
|
||||
#include "md-multipath.h"
|
||||
|
||||
#define MAX_WORK_PER_DISK 128
|
||||
|
||||
#define NR_RESERVED_BUFS 32
|
||||
|
||||
static int multipath_map (struct mpconf *conf)
|
||||
{
|
||||
int i, disks = conf->raid_disks;
|
||||
|
||||
/*
|
||||
* Later we do read balancing on the read side
|
||||
* now we use the first available disk.
|
||||
*/
|
||||
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct md_rdev *rdev = conf->multipaths[i].rdev;
|
||||
|
||||
if (rdev && test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct mddev *mddev = mp_bh->mddev;
|
||||
struct mpconf *conf = mddev->private;
|
||||
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
list_add(&mp_bh->retry_list, &conf->retry_list);
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
|
||||
/*
|
||||
* multipath_end_bh_io() is called when we have finished servicing a multipathed
|
||||
* operation and are ready to return a success/failure code to the buffer
|
||||
* cache layer.
|
||||
*/
|
||||
static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
|
||||
{
|
||||
struct bio *bio = mp_bh->master_bio;
|
||||
struct mpconf *conf = mp_bh->mddev->private;
|
||||
|
||||
bio->bi_status = status;
|
||||
bio_endio(bio);
|
||||
mempool_free(mp_bh, &conf->pool);
|
||||
}
|
||||
|
||||
static void multipath_end_request(struct bio *bio)
|
||||
{
|
||||
struct multipath_bh *mp_bh = bio->bi_private;
|
||||
struct mpconf *conf = mp_bh->mddev->private;
|
||||
struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
|
||||
|
||||
if (!bio->bi_status)
|
||||
multipath_end_bh_io(mp_bh, 0);
|
||||
else if (!(bio->bi_opf & REQ_RAHEAD)) {
|
||||
/*
|
||||
* oops, IO error:
|
||||
*/
|
||||
md_error (mp_bh->mddev, rdev);
|
||||
pr_info("multipath: %pg: rescheduling sector %llu\n",
|
||||
rdev->bdev,
|
||||
(unsigned long long)bio->bi_iter.bi_sector);
|
||||
multipath_reschedule_retry(mp_bh);
|
||||
} else
|
||||
multipath_end_bh_io(mp_bh, bio->bi_status);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
|
||||
static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
|
||||
{
|
||||
struct mpconf *conf = mddev->private;
|
||||
struct multipath_bh * mp_bh;
|
||||
struct multipath_info *multipath;
|
||||
|
||||
if (unlikely(bio->bi_opf & REQ_PREFLUSH)
|
||||
&& md_flush_request(mddev, bio))
|
||||
return true;
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
|
||||
|
||||
mp_bh->master_bio = bio;
|
||||
mp_bh->mddev = mddev;
|
||||
|
||||
mp_bh->path = multipath_map(conf);
|
||||
if (mp_bh->path < 0) {
|
||||
bio_io_error(bio);
|
||||
mempool_free(mp_bh, &conf->pool);
|
||||
return true;
|
||||
}
|
||||
multipath = conf->multipaths + mp_bh->path;
|
||||
|
||||
bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO);
|
||||
|
||||
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
|
||||
mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
|
||||
mp_bh->bio.bi_end_io = multipath_end_request;
|
||||
mp_bh->bio.bi_private = mp_bh;
|
||||
mddev_check_write_zeroes(mddev, &mp_bh->bio);
|
||||
submit_bio_noacct(&mp_bh->bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void multipath_status(struct seq_file *seq, struct mddev *mddev)
|
||||
{
|
||||
struct mpconf *conf = mddev->private;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&mddev->lock);
|
||||
|
||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks,
|
||||
conf->raid_disks - mddev->degraded);
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = READ_ONCE(conf->multipaths[i].rdev);
|
||||
|
||||
seq_printf(seq, "%s",
|
||||
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
seq_putc(seq, ']');
|
||||
}
|
||||
|
||||
/*
|
||||
* Careful, this can execute in IRQ contexts as well!
|
||||
*/
|
||||
static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct mpconf *conf = mddev->private;
|
||||
|
||||
if (conf->raid_disks - mddev->degraded <= 1) {
|
||||
/*
|
||||
* Uh oh, we can do nothing if this is our last path, but
|
||||
* first check if this is a queued request for a device
|
||||
* which has just failed.
|
||||
*/
|
||||
pr_warn("multipath: only one IO path left and IO error.\n");
|
||||
/* leave it active... it's all we have */
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Mark disk as unusable
|
||||
*/
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
mddev->degraded++;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
}
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
pr_err("multipath: IO failure on %pg, disabling IO path.\n"
|
||||
"multipath: Operation continuing on %d IO paths.\n",
|
||||
rdev->bdev,
|
||||
conf->raid_disks - mddev->degraded);
|
||||
}
|
||||
|
||||
static void print_multipath_conf(struct mpconf *conf)
|
||||
{
|
||||
int i;
|
||||
struct multipath_info *tmp;
|
||||
|
||||
pr_debug("MULTIPATH conf printout:\n");
|
||||
if (!conf) {
|
||||
pr_debug("(conf==NULL)\n");
|
||||
return;
|
||||
}
|
||||
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
|
||||
conf->raid_disks);
|
||||
|
||||
lockdep_assert_held(&conf->mddev->reconfig_mutex);
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
tmp = conf->multipaths + i;
|
||||
if (tmp->rdev)
|
||||
pr_debug(" disk%d, o:%d, dev:%pg\n",
|
||||
i,!test_bit(Faulty, &tmp->rdev->flags),
|
||||
tmp->rdev->bdev);
|
||||
}
|
||||
}
|
||||
|
||||
static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct mpconf *conf = mddev->private;
|
||||
int err = -EEXIST;
|
||||
int path;
|
||||
struct multipath_info *p;
|
||||
int first = 0;
|
||||
int last = mddev->raid_disks - 1;
|
||||
|
||||
if (rdev->raid_disk >= 0)
|
||||
first = last = rdev->raid_disk;
|
||||
|
||||
print_multipath_conf(conf);
|
||||
|
||||
for (path = first; path <= last; path++)
|
||||
if ((p=conf->multipaths+path)->rdev == NULL) {
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
|
||||
err = md_integrity_add_rdev(rdev, mddev);
|
||||
if (err)
|
||||
break;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
mddev->degraded--;
|
||||
rdev->raid_disk = path;
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
WRITE_ONCE(p->rdev, rdev);
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
print_multipath_conf(conf);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct mpconf *conf = mddev->private;
|
||||
int err = 0;
|
||||
int number = rdev->raid_disk;
|
||||
struct multipath_info *p = conf->multipaths + number;
|
||||
|
||||
print_multipath_conf(conf);
|
||||
|
||||
if (rdev == p->rdev) {
|
||||
if (test_bit(In_sync, &rdev->flags) ||
|
||||
atomic_read(&rdev->nr_pending)) {
|
||||
pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
WRITE_ONCE(p->rdev, NULL);
|
||||
err = md_integrity_register(mddev);
|
||||
}
|
||||
abort:
|
||||
|
||||
print_multipath_conf(conf);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a kernel thread which:
|
||||
*
|
||||
* 1. Retries failed read operations on working multipaths.
|
||||
* 2. Updates the raid superblock when problems encounter.
|
||||
* 3. Performs writes following reads for array syncronising.
|
||||
*/
|
||||
|
||||
static void multipathd(struct md_thread *thread)
|
||||
{
|
||||
struct mddev *mddev = thread->mddev;
|
||||
struct multipath_bh *mp_bh;
|
||||
struct bio *bio;
|
||||
unsigned long flags;
|
||||
struct mpconf *conf = mddev->private;
|
||||
struct list_head *head = &conf->retry_list;
|
||||
|
||||
md_check_recovery(mddev);
|
||||
for (;;) {
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (list_empty(head))
|
||||
break;
|
||||
mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
|
||||
list_del(head->prev);
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
|
||||
bio = &mp_bh->bio;
|
||||
bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
|
||||
|
||||
if ((mp_bh->path = multipath_map (conf))<0) {
|
||||
pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n",
|
||||
bio->bi_bdev,
|
||||
(unsigned long long)bio->bi_iter.bi_sector);
|
||||
multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
|
||||
} else {
|
||||
pr_err("multipath: %pg: redirecting sector %llu to another IO path\n",
|
||||
bio->bi_bdev,
|
||||
(unsigned long long)bio->bi_iter.bi_sector);
|
||||
*bio = *(mp_bh->master_bio);
|
||||
bio->bi_iter.bi_sector +=
|
||||
conf->multipaths[mp_bh->path].rdev->data_offset;
|
||||
bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
|
||||
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
|
||||
bio->bi_end_io = multipath_end_request;
|
||||
bio->bi_private = mp_bh;
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
}
|
||||
|
||||
static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
||||
{
|
||||
WARN_ONCE(sectors || raid_disks,
|
||||
"%s does not support generic reshape\n", __func__);
|
||||
|
||||
return mddev->dev_sectors;
|
||||
}
|
||||
|
||||
static int multipath_run (struct mddev *mddev)
|
||||
{
|
||||
struct mpconf *conf;
|
||||
int disk_idx;
|
||||
struct multipath_info *disk;
|
||||
struct md_rdev *rdev;
|
||||
int working_disks;
|
||||
int ret;
|
||||
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
|
||||
if (mddev->level != LEVEL_MULTIPATH) {
|
||||
pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
|
||||
mdname(mddev), mddev->level);
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* copy the already verified devices into our private MULTIPATH
|
||||
* bookkeeping area. [whatever we allocate in multipath_run(),
|
||||
* should be freed in multipath_free()]
|
||||
*/
|
||||
|
||||
conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
|
||||
mddev->private = conf;
|
||||
if (!conf)
|
||||
goto out;
|
||||
|
||||
conf->multipaths = kcalloc(mddev->raid_disks,
|
||||
sizeof(struct multipath_info),
|
||||
GFP_KERNEL);
|
||||
if (!conf->multipaths)
|
||||
goto out_free_conf;
|
||||
|
||||
working_disks = 0;
|
||||
rdev_for_each(rdev, mddev) {
|
||||
disk_idx = rdev->raid_disk;
|
||||
if (disk_idx < 0 ||
|
||||
disk_idx >= mddev->raid_disks)
|
||||
continue;
|
||||
|
||||
disk = conf->multipaths + disk_idx;
|
||||
disk->rdev = rdev;
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
working_disks++;
|
||||
}
|
||||
|
||||
conf->raid_disks = mddev->raid_disks;
|
||||
conf->mddev = mddev;
|
||||
spin_lock_init(&conf->device_lock);
|
||||
INIT_LIST_HEAD(&conf->retry_list);
|
||||
|
||||
if (!working_disks) {
|
||||
pr_warn("multipath: no operational IO paths for %s\n",
|
||||
mdname(mddev));
|
||||
goto out_free_conf;
|
||||
}
|
||||
mddev->degraded = conf->raid_disks - working_disks;
|
||||
|
||||
ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
|
||||
sizeof(struct multipath_bh));
|
||||
if (ret)
|
||||
goto out_free_conf;
|
||||
|
||||
rcu_assign_pointer(mddev->thread,
|
||||
md_register_thread(multipathd, mddev, "multipath"));
|
||||
if (!mddev->thread)
|
||||
goto out_free_conf;
|
||||
|
||||
pr_info("multipath: array %s active with %d out of %d IO paths\n",
|
||||
mdname(mddev), conf->raid_disks - mddev->degraded,
|
||||
mddev->raid_disks);
|
||||
/*
|
||||
* Ok, everything is just fine now
|
||||
*/
|
||||
md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
|
||||
|
||||
if (md_integrity_register(mddev))
|
||||
goto out_free_conf;
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_conf:
|
||||
mempool_exit(&conf->pool);
|
||||
kfree(conf->multipaths);
|
||||
kfree(conf);
|
||||
mddev->private = NULL;
|
||||
out:
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static void multipath_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct mpconf *conf = priv;
|
||||
|
||||
mempool_exit(&conf->pool);
|
||||
kfree(conf->multipaths);
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
static struct md_personality multipath_personality =
|
||||
{
|
||||
.name = "multipath",
|
||||
.level = LEVEL_MULTIPATH,
|
||||
.owner = THIS_MODULE,
|
||||
.make_request = multipath_make_request,
|
||||
.run = multipath_run,
|
||||
.free = multipath_free,
|
||||
.status = multipath_status,
|
||||
.error_handler = multipath_error,
|
||||
.hot_add_disk = multipath_add_disk,
|
||||
.hot_remove_disk= multipath_remove_disk,
|
||||
.size = multipath_size,
|
||||
};
|
||||
|
||||
static int __init multipath_init (void)
|
||||
{
|
||||
return register_md_personality (&multipath_personality);
|
||||
}
|
||||
|
||||
static void __exit multipath_exit (void)
|
||||
{
|
||||
unregister_md_personality (&multipath_personality);
|
||||
}
|
||||
|
||||
module_init(multipath_init);
|
||||
module_exit(multipath_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
|
||||
MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
|
||||
MODULE_ALIAS("md-multipath");
|
||||
MODULE_ALIAS("md-level--4");
|
239
drivers/md/md.c
239
drivers/md/md.c
@ -1206,6 +1206,7 @@ struct super_type {
|
||||
struct md_rdev *refdev,
|
||||
int minor_version);
|
||||
int (*validate_super)(struct mddev *mddev,
|
||||
struct md_rdev *freshest,
|
||||
struct md_rdev *rdev);
|
||||
void (*sync_super)(struct mddev *mddev,
|
||||
struct md_rdev *rdev);
|
||||
@ -1286,17 +1287,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
|
||||
rdev->sb_size = MD_SB_BYTES;
|
||||
rdev->badblocks.shift = -1;
|
||||
|
||||
if (sb->level == LEVEL_MULTIPATH)
|
||||
rdev->desc_nr = -1;
|
||||
else
|
||||
rdev->desc_nr = sb->this_disk.number;
|
||||
rdev->desc_nr = sb->this_disk.number;
|
||||
|
||||
/* not spare disk, or LEVEL_MULTIPATH */
|
||||
if (sb->level == LEVEL_MULTIPATH ||
|
||||
(rdev->desc_nr >= 0 &&
|
||||
rdev->desc_nr < MD_SB_DISKS &&
|
||||
sb->disks[rdev->desc_nr].state &
|
||||
((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
|
||||
/* not spare disk */
|
||||
if (rdev->desc_nr >= 0 && rdev->desc_nr < MD_SB_DISKS &&
|
||||
sb->disks[rdev->desc_nr].state & ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
|
||||
spare_disk = false;
|
||||
|
||||
if (!refdev) {
|
||||
@ -1343,8 +1338,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
|
||||
|
||||
/*
|
||||
* validate_super for 0.90.0
|
||||
* note: we are not using "freshest" for 0.9 superblock
|
||||
*/
|
||||
static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
|
||||
{
|
||||
mdp_disk_t *desc;
|
||||
mdp_super_t *sb = page_address(rdev->sb_page);
|
||||
@ -1442,31 +1438,28 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mddev->level != LEVEL_MULTIPATH) {
|
||||
desc = sb->disks + rdev->desc_nr;
|
||||
desc = sb->disks + rdev->desc_nr;
|
||||
|
||||
if (desc->state & (1<<MD_DISK_FAULTY))
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
else if (desc->state & (1<<MD_DISK_SYNC) /* &&
|
||||
desc->raid_disk < mddev->raid_disks */) {
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
rdev->raid_disk = desc->raid_disk;
|
||||
rdev->saved_raid_disk = desc->raid_disk;
|
||||
} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
|
||||
/* active but not in sync implies recovery up to
|
||||
* reshape position. We don't know exactly where
|
||||
* that is, so set to zero for now */
|
||||
if (mddev->minor_version >= 91) {
|
||||
rdev->recovery_offset = 0;
|
||||
rdev->raid_disk = desc->raid_disk;
|
||||
}
|
||||
}
|
||||
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
|
||||
set_bit(WriteMostly, &rdev->flags);
|
||||
if (desc->state & (1<<MD_DISK_FAILFAST))
|
||||
set_bit(FailFast, &rdev->flags);
|
||||
} else /* MULTIPATH are always insync */
|
||||
if (desc->state & (1<<MD_DISK_FAULTY))
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
else if (desc->state & (1<<MD_DISK_SYNC)) {
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
rdev->raid_disk = desc->raid_disk;
|
||||
rdev->saved_raid_disk = desc->raid_disk;
|
||||
} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
|
||||
/* active but not in sync implies recovery up to
|
||||
* reshape position. We don't know exactly where
|
||||
* that is, so set to zero for now
|
||||
*/
|
||||
if (mddev->minor_version >= 91) {
|
||||
rdev->recovery_offset = 0;
|
||||
rdev->raid_disk = desc->raid_disk;
|
||||
}
|
||||
}
|
||||
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
|
||||
set_bit(WriteMostly, &rdev->flags);
|
||||
if (desc->state & (1<<MD_DISK_FAILFAST))
|
||||
set_bit(FailFast, &rdev->flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1756,10 +1749,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
&& rdev->new_data_offset < sb_start + (rdev->sb_size/512))
|
||||
return -EINVAL;
|
||||
|
||||
if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
|
||||
rdev->desc_nr = -1;
|
||||
else
|
||||
rdev->desc_nr = le32_to_cpu(sb->dev_number);
|
||||
rdev->desc_nr = le32_to_cpu(sb->dev_number);
|
||||
|
||||
if (!rdev->bb_page) {
|
||||
rdev->bb_page = alloc_page(GFP_KERNEL);
|
||||
@ -1812,12 +1802,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
sb->level != 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* not spare disk, or LEVEL_MULTIPATH */
|
||||
if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
|
||||
(rdev->desc_nr >= 0 &&
|
||||
rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
|
||||
(le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
|
||||
le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
|
||||
/* not spare disk */
|
||||
if (rdev->desc_nr >= 0 && rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
|
||||
(le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
|
||||
le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
|
||||
spare_disk = false;
|
||||
|
||||
if (!refdev) {
|
||||
@ -1856,10 +1844,11 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
|
||||
{
|
||||
struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
|
||||
__u64 ev1 = le64_to_cpu(sb->events);
|
||||
int role;
|
||||
|
||||
rdev->raid_disk = -1;
|
||||
clear_bit(Faulty, &rdev->flags);
|
||||
@ -1952,13 +1941,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
}
|
||||
} else if (mddev->pers == NULL) {
|
||||
/* Insist of good event counter while assembling, except for
|
||||
* spares (which don't need an event count) */
|
||||
++ev1;
|
||||
* spares (which don't need an event count).
|
||||
* Similar to mdadm, we allow event counter difference of 1
|
||||
* from the freshest device.
|
||||
*/
|
||||
if (rdev->desc_nr >= 0 &&
|
||||
rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
|
||||
(le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
|
||||
le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
|
||||
if (ev1 < mddev->events)
|
||||
if (ev1 + 1 < mddev->events)
|
||||
return -EINVAL;
|
||||
} else if (mddev->bitmap) {
|
||||
/* If adding to array with a bitmap, then we can accept an
|
||||
@ -1973,58 +1964,85 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
/* just a hot-add of a new device, leave raid_disk at -1 */
|
||||
return 0;
|
||||
}
|
||||
if (mddev->level != LEVEL_MULTIPATH) {
|
||||
int role;
|
||||
if (rdev->desc_nr < 0 ||
|
||||
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
|
||||
role = MD_DISK_ROLE_SPARE;
|
||||
rdev->desc_nr = -1;
|
||||
} else
|
||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||
switch(role) {
|
||||
case MD_DISK_ROLE_SPARE: /* spare */
|
||||
break;
|
||||
case MD_DISK_ROLE_FAULTY: /* faulty */
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
break;
|
||||
case MD_DISK_ROLE_JOURNAL: /* journal device */
|
||||
if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
|
||||
/* journal device without journal feature */
|
||||
pr_warn("md: journal device provided without journal feature, ignoring the device\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
set_bit(Journal, &rdev->flags);
|
||||
rdev->journal_tail = le64_to_cpu(sb->journal_tail);
|
||||
rdev->raid_disk = 0;
|
||||
break;
|
||||
default:
|
||||
rdev->saved_raid_disk = role;
|
||||
if ((le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RECOVERY_OFFSET)) {
|
||||
rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
|
||||
if (!(le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RECOVERY_BITMAP))
|
||||
rdev->saved_raid_disk = -1;
|
||||
} else {
|
||||
/*
|
||||
* If the array is FROZEN, then the device can't
|
||||
* be in_sync with rest of array.
|
||||
*/
|
||||
if (!test_bit(MD_RECOVERY_FROZEN,
|
||||
&mddev->recovery))
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
rdev->raid_disk = role;
|
||||
break;
|
||||
|
||||
if (rdev->desc_nr < 0 ||
|
||||
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
|
||||
role = MD_DISK_ROLE_SPARE;
|
||||
rdev->desc_nr = -1;
|
||||
} else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
|
||||
/*
|
||||
* If we are assembling, and our event counter is smaller than the
|
||||
* highest event counter, we cannot trust our superblock about the role.
|
||||
* It could happen that our rdev was marked as Faulty, and all other
|
||||
* superblocks were updated with +1 event counter.
|
||||
* Then, before the next superblock update, which typically happens when
|
||||
* remove_and_add_spares() removes the device from the array, there was
|
||||
* a crash or reboot.
|
||||
* If we allow current rdev without consulting the freshest superblock,
|
||||
* we could cause data corruption.
|
||||
* Note that in this case our event counter is smaller by 1 than the
|
||||
* highest, otherwise, this rdev would not be allowed into array;
|
||||
* both kernel and mdadm allow event counter difference of 1.
|
||||
*/
|
||||
struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
|
||||
u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);
|
||||
|
||||
if (rdev->desc_nr >= freshest_max_dev) {
|
||||
/* this is unexpected, better not proceed */
|
||||
pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
|
||||
mdname(mddev), rdev->bdev, rdev->desc_nr,
|
||||
freshest->bdev, freshest_max_dev);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (sb->devflags & WriteMostly1)
|
||||
set_bit(WriteMostly, &rdev->flags);
|
||||
if (sb->devflags & FailFast1)
|
||||
set_bit(FailFast, &rdev->flags);
|
||||
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
} else /* MULTIPATH are always insync */
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
|
||||
role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
|
||||
pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n",
|
||||
mdname(mddev), rdev->bdev, role, role, freshest->bdev);
|
||||
} else {
|
||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||
}
|
||||
switch (role) {
|
||||
case MD_DISK_ROLE_SPARE: /* spare */
|
||||
break;
|
||||
case MD_DISK_ROLE_FAULTY: /* faulty */
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
break;
|
||||
case MD_DISK_ROLE_JOURNAL: /* journal device */
|
||||
if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
|
||||
/* journal device without journal feature */
|
||||
pr_warn("md: journal device provided without journal feature, ignoring the device\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
set_bit(Journal, &rdev->flags);
|
||||
rdev->journal_tail = le64_to_cpu(sb->journal_tail);
|
||||
rdev->raid_disk = 0;
|
||||
break;
|
||||
default:
|
||||
rdev->saved_raid_disk = role;
|
||||
if ((le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RECOVERY_OFFSET)) {
|
||||
rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
|
||||
if (!(le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RECOVERY_BITMAP))
|
||||
rdev->saved_raid_disk = -1;
|
||||
} else {
|
||||
/*
|
||||
* If the array is FROZEN, then the device can't
|
||||
* be in_sync with rest of array.
|
||||
*/
|
||||
if (!test_bit(MD_RECOVERY_FROZEN,
|
||||
&mddev->recovery))
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
rdev->raid_disk = role;
|
||||
break;
|
||||
}
|
||||
if (sb->devflags & WriteMostly1)
|
||||
set_bit(WriteMostly, &rdev->flags);
|
||||
if (sb->devflags & FailFast1)
|
||||
set_bit(FailFast, &rdev->flags);
|
||||
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2842,10 +2860,6 @@ rewrite:
|
||||
} else
|
||||
pr_debug("md: %pg (skipping faulty)\n",
|
||||
rdev->bdev);
|
||||
|
||||
if (mddev->level == LEVEL_MULTIPATH)
|
||||
/* only need to write one superblock... */
|
||||
break;
|
||||
}
|
||||
if (md_super_wait(mddev) < 0)
|
||||
goto rewrite;
|
||||
@ -2887,7 +2901,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
|
||||
* and should be added immediately.
|
||||
*/
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev);
|
||||
validate_super(mddev, NULL/*freshest*/, rdev);
|
||||
err = mddev->pers->hot_add_disk(mddev, rdev);
|
||||
if (err) {
|
||||
md_kick_rdev_from_array(rdev);
|
||||
@ -3824,7 +3838,7 @@ static int analyze_sbs(struct mddev *mddev)
|
||||
}
|
||||
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, freshest);
|
||||
validate_super(mddev, NULL/*freshest*/, freshest);
|
||||
|
||||
i = 0;
|
||||
rdev_for_each_safe(rdev, tmp, mddev) {
|
||||
@ -3839,20 +3853,15 @@ static int analyze_sbs(struct mddev *mddev)
|
||||
}
|
||||
if (rdev != freshest) {
|
||||
if (super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev)) {
|
||||
validate_super(mddev, freshest, rdev)) {
|
||||
pr_warn("md: kicking non-fresh %pg from array!\n",
|
||||
rdev->bdev);
|
||||
md_kick_rdev_from_array(rdev);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (mddev->level == LEVEL_MULTIPATH) {
|
||||
rdev->desc_nr = i++;
|
||||
rdev->raid_disk = rdev->desc_nr;
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
} else if (rdev->raid_disk >=
|
||||
(mddev->raid_disks - min(0, mddev->delta_disks)) &&
|
||||
!test_bit(Journal, &rdev->flags)) {
|
||||
if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks)) &&
|
||||
!test_bit(Journal, &rdev->flags)) {
|
||||
rdev->raid_disk = -1;
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
@ -6847,7 +6856,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
rdev->saved_raid_disk = rdev->raid_disk;
|
||||
} else
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev);
|
||||
validate_super(mddev, NULL/*freshest*/, rdev);
|
||||
if ((info->state & (1<<MD_DISK_SYNC)) &&
|
||||
rdev->raid_disk != info->raid_disk) {
|
||||
/* This was a hot-add request, but events doesn't
|
||||
@ -8090,7 +8099,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
return;
|
||||
mddev->pers->error_handler(mddev, rdev);
|
||||
|
||||
if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
|
||||
if (mddev->pers->level == 0)
|
||||
return;
|
||||
|
||||
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
|
||||
|
@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
|
||||
else
|
||||
md_bitmap_unplug(bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by fix_read_error() to decay the per rdev read_errors.
|
||||
* We halve the read error count for every hour that has elapsed
|
||||
* since the last recorded read error.
|
||||
*/
|
||||
static inline void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
long cur_time_mon;
|
||||
unsigned long hours_since_last;
|
||||
unsigned int read_errors = atomic_read(&rdev->read_errors);
|
||||
|
||||
cur_time_mon = ktime_get_seconds();
|
||||
|
||||
if (rdev->last_read_error == 0) {
|
||||
/* first time we've seen a read error */
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
return;
|
||||
}
|
||||
|
||||
hours_since_last = (long)(cur_time_mon -
|
||||
rdev->last_read_error) / 3600;
|
||||
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
|
||||
/*
|
||||
* if hours_since_last is > the number of bits in read_errors
|
||||
* just set read errors to 0. We do this to avoid
|
||||
* overflowing the shift of read_errors by hours_since_last.
|
||||
*/
|
||||
if (hours_since_last >= 8 * sizeof(read_errors))
|
||||
atomic_set(&rdev->read_errors, 0);
|
||||
else
|
||||
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
|
||||
}
|
||||
|
||||
static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
int read_errors;
|
||||
|
||||
check_decay_read_errors(mddev, rdev);
|
||||
read_errors = atomic_inc_return(&rdev->read_errors);
|
||||
if (read_errors > max_read_errors) {
|
||||
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
|
||||
mdname(mddev), rdev->bdev, read_errors, max_read_errors);
|
||||
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Failing raid device\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
md_error(mddev, rdev);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
|
||||
#define raid1_log(md, fmt, args...) \
|
||||
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
|
||||
|
||||
#define RAID_1_10_NAME "raid1"
|
||||
#include "raid1-10.c"
|
||||
|
||||
#define START(node) ((node)->start)
|
||||
@ -1124,8 +1125,6 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
|
||||
|
||||
behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO,
|
||||
&r1_bio->mddev->bio_set);
|
||||
if (!behind_bio)
|
||||
return;
|
||||
|
||||
/* discard op, we don't support writezero/writesame yet */
|
||||
if (!bio_has_data(bio)) {
|
||||
@ -2257,16 +2256,24 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||
* 3. Performs writes following reads for array synchronising.
|
||||
*/
|
||||
|
||||
static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
sector_t sect, int sectors)
|
||||
static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
{
|
||||
sector_t sect = r1_bio->sector;
|
||||
int sectors = r1_bio->sectors;
|
||||
int read_disk = r1_bio->read_disk;
|
||||
struct mddev *mddev = conf->mddev;
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[read_disk].rdev);
|
||||
|
||||
if (exceed_read_errors(mddev, rdev)) {
|
||||
r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED;
|
||||
return;
|
||||
}
|
||||
|
||||
while(sectors) {
|
||||
int s = sectors;
|
||||
int d = read_disk;
|
||||
int success = 0;
|
||||
int start;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (s > (PAGE_SIZE>>9))
|
||||
s = PAGE_SIZE >> 9;
|
||||
@ -2507,8 +2514,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
if (mddev->ro == 0
|
||||
&& !test_bit(FailFast, &rdev->flags)) {
|
||||
freeze_array(conf, 1);
|
||||
fix_read_error(conf, r1_bio->read_disk,
|
||||
r1_bio->sector, r1_bio->sectors);
|
||||
fix_read_error(conf, r1_bio);
|
||||
unfreeze_array(conf);
|
||||
} else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) {
|
||||
md_error(mddev, rdev);
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include <linux/raid/md_p.h>
|
||||
#include <trace/events/block.h>
|
||||
#include "md.h"
|
||||
|
||||
#define RAID_1_10_NAME "raid10"
|
||||
#include "raid10.h"
|
||||
#include "raid0.h"
|
||||
#include "md-bitmap.h"
|
||||
@ -2592,42 +2594,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by fix_read_error() to decay the per rdev read_errors.
|
||||
* We halve the read error count for every hour that has elapsed
|
||||
* since the last recorded read error.
|
||||
*
|
||||
*/
|
||||
static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
long cur_time_mon;
|
||||
unsigned long hours_since_last;
|
||||
unsigned int read_errors = atomic_read(&rdev->read_errors);
|
||||
|
||||
cur_time_mon = ktime_get_seconds();
|
||||
|
||||
if (rdev->last_read_error == 0) {
|
||||
/* first time we've seen a read error */
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
return;
|
||||
}
|
||||
|
||||
hours_since_last = (long)(cur_time_mon -
|
||||
rdev->last_read_error) / 3600;
|
||||
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
|
||||
/*
|
||||
* if hours_since_last is > the number of bits in read_errors
|
||||
* just set read errors to 0. We do this to avoid
|
||||
* overflowing the shift of read_errors by hours_since_last.
|
||||
*/
|
||||
if (hours_since_last >= 8 * sizeof(read_errors))
|
||||
atomic_set(&rdev->read_errors, 0);
|
||||
else
|
||||
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
|
||||
}
|
||||
|
||||
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
int sectors, struct page *page, enum req_op op)
|
||||
{
|
||||
@ -2665,7 +2631,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
int sect = 0; /* Offset from r10_bio->sector */
|
||||
int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
|
||||
struct md_rdev *rdev;
|
||||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
int d = r10_bio->devs[slot].devnum;
|
||||
|
||||
/* still own a reference to this rdev, so it cannot
|
||||
@ -2678,15 +2643,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
more fix_read_error() attempts */
|
||||
return;
|
||||
|
||||
check_decay_read_errors(mddev, rdev);
|
||||
atomic_inc(&rdev->read_errors);
|
||||
if (atomic_read(&rdev->read_errors) > max_read_errors) {
|
||||
pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
|
||||
mdname(mddev), rdev->bdev,
|
||||
atomic_read(&rdev->read_errors), max_read_errors);
|
||||
pr_notice("md/raid10:%s: %pg: Failing raid device\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
md_error(mddev, rdev);
|
||||
if (exceed_read_errors(mddev, rdev)) {
|
||||
r10_bio->devs[slot].bio = IO_BLOCKED;
|
||||
return;
|
||||
}
|
||||
|
@ -2,15 +2,11 @@
|
||||
/*
|
||||
md_p.h : physical layout of Linux RAID devices
|
||||
Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
|
||||
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
(for example /usr/src/linux/COPYING); if not, write to the Free
|
||||
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#ifndef _MD_P_H
|
||||
@ -237,7 +233,7 @@ struct mdp_superblock_1 {
|
||||
char set_name[32]; /* set and interpreted by user-space */
|
||||
|
||||
__le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
|
||||
__le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
|
||||
__le32 level; /* 0,1,4,5 */
|
||||
__le32 layout; /* only for raid5 and raid10 currently */
|
||||
__le64 size; /* used size of component devices, in 512byte sectors */
|
||||
|
||||
|
@ -2,15 +2,11 @@
|
||||
/*
|
||||
md_u.h : user <=> kernel API between Linux raidtools and RAID drivers
|
||||
Copyright (C) 1998 Ingo Molnar
|
||||
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
(for example /usr/src/linux/COPYING); if not, write to the Free
|
||||
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_MD_U_H
|
||||
@ -107,11 +103,6 @@ typedef struct mdu_array_info_s {
|
||||
|
||||
} mdu_array_info_t;
|
||||
|
||||
/* non-obvious values for 'level' */
|
||||
#define LEVEL_MULTIPATH (-4)
|
||||
#define LEVEL_LINEAR (-1)
|
||||
#define LEVEL_FAULTY (-5)
|
||||
|
||||
/* we need a value for 'no level specified' and 0
|
||||
* means 'raid0', so we need something else. This is
|
||||
* for internal use only
|
||||
|
Loading…
x
Reference in New Issue
Block a user