1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00

Support the ability to replace specific devices in a RAID array.

RAID is not like traditional LVM mirroring.  LVM mirroring required failed
devices to be removed or the logical volume would simply hang.  RAID arrays can
keep on running with failed devices.  In fact, for RAID types other than RAID1,
removing a device would mean substituting an error target or converting to a
lower level RAID (e.g. RAID6 -> RAID5, or RAID4/5 to RAID0).  Therefore, rather
than removing a failed device unconditionally and potentially allocating a
replacement, RAID allows the user to "replace" a device with a new one.  This
approach is a 1-step solution vs the current 2-step solution.

example> lvconvert --replace <dev_to_remove> vg/lv [possible_replacement_PVs]

'--replace' can be specified more than once.

example> lvconvert --replace /dev/sdb1 --replace /dev/sdc1 vg/lv
This commit is contained in:
Jonathan Earl Brassow 2011-11-30 02:02:10 +00:00
parent 910440212b
commit 0c506d9a40
10 changed files with 317 additions and 11 deletions

View File

@ -1,5 +1,6 @@
Version 2.02.89 -
==================================
Support the ability to replace specific devices in a RAID array via lvconvert.
Add activation/use_linear_target enabled by default.
Use gcc warning options only with .c to .o compilation.
Move y/n prompts to stderr and repeat if response has both 'n' and 'y'.

View File

@ -57,6 +57,7 @@ static const struct flag _lv_flags[] = {
{PVMOVE, "PVMOVE", STATUS_FLAG},
{LOCKED, "LOCKED", STATUS_FLAG},
{LV_NOTSYNCED, "NOTSYNCED", STATUS_FLAG},
{LV_REBUILD, "REBUILD", STATUS_FLAG},
{RAID, NULL, 0},
{RAID_META, NULL, 0},
{RAID_IMAGE, NULL, 0},

View File

@ -61,7 +61,9 @@
//#define VIRTUAL UINT64_C(0x00010000) /* LV - internal use only */
#define MIRROR_LOG UINT64_C(0x00020000) /* LV */
#define MIRROR_IMAGE UINT64_C(0x00040000) /* LV */
#define LV_NOTSYNCED UINT64_C(0x00080000) /* LV */
#define LV_REBUILD UINT64_C(0x00100000) /* LV - internal use only */
//#define PRECOMMITTED UINT64_C(0x00200000) /* VG - internal use only */
#define CONVERTING UINT64_C(0x00400000) /* LV */
@ -788,6 +790,8 @@ int lv_raid_split_and_track(struct logical_volume *lv,
int lv_raid_merge(struct logical_volume *lv);
int lv_raid_reshape(struct logical_volume *lv,
const struct segment_type *new_segtype);
int lv_raid_replace(struct logical_volume *lv, struct dm_list *remove_pvs,
struct dm_list *allocate_pvs);
/* -- metadata/raid_manip.c */

View File

@ -440,7 +440,7 @@ static int _alloc_image_component(struct logical_volume *lv,
return 0;
}
status = LVM_READ | LVM_WRITE | LV_NOTSYNCED | type;
status = LVM_READ | LVM_WRITE | LV_REBUILD | type;
tmp_lv = lv_create_empty(img_name, NULL, status, ALLOC_INHERIT, lv->vg);
if (!tmp_lv) {
log_error("Failed to allocate new raid component, %s", img_name);
@ -569,6 +569,7 @@ static int _alloc_rmeta_for_lv(struct logical_volume *data_lv,
static int _raid_add_images(struct logical_volume *lv,
uint32_t new_count, struct dm_list *pvs)
{
int rebuild_flag_cleared = 0;
uint32_t s;
uint32_t old_count = lv_raid_image_count(lv);
uint32_t count = new_count - old_count;
@ -588,7 +589,7 @@ static int _raid_add_images(struct logical_volume *lv,
*/
if (seg_is_linear(seg)) {
/* A complete resync will be done, no need to mark each sub-lv */
status_mask = ~(LV_NOTSYNCED);
status_mask = ~(LV_REBUILD);
if (!(lvl = dm_pool_alloc(lv->vg->vgmem, sizeof(*lvl)))) {
log_error("Memory allocation failed");
@ -751,6 +752,27 @@ to be left for these sub-lvs.
return 0;
}
/*
* Now that the 'REBUILD' has made its way to the kernel, we must
* remove the flag so that the individual devices are not rebuilt
* upon every activation.
*/
seg = first_seg(lv);
for (s = 0; s < seg->area_count; s++) {
if ((seg_lv(seg, s)->status & LV_REBUILD) ||
(seg_metalv(seg, s)->status & LV_REBUILD)) {
seg_metalv(seg, s)->status &= ~LV_REBUILD;
seg_lv(seg, s)->status &= ~LV_REBUILD;
rebuild_flag_cleared = 1;
}
}
if (rebuild_flag_cleared &&
(!vg_write(lv->vg) || !vg_commit(lv->vg))) {
log_error("Failed to clear REBUILD flag for %s/%s components",
lv->vg->name, lv->name);
return 0;
}
return 1;
fail:
@ -1335,8 +1357,8 @@ static int _convert_mirror_to_raid1(struct logical_volume *lv,
log_debug("Adding %s to %s", lvl->lv->name, lv->name);
/* Images are known to be in-sync */
lvl->lv->status &= ~LV_NOTSYNCED;
first_seg(lvl->lv)->status &= ~LV_NOTSYNCED;
lvl->lv->status &= ~LV_REBUILD;
first_seg(lvl->lv)->status &= ~LV_REBUILD;
lv_set_hidden(lvl->lv);
if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
@ -1428,3 +1450,216 @@ int lv_raid_reshape(struct logical_volume *lv,
seg->segtype->name, new_segtype->name);
return 0;
}
/*
* lv_raid_replace
* @lv
* @replace_pvs
* @allocatable_pvs
*
* Replace the specified PVs.
*/
int lv_raid_replace(struct logical_volume *lv,
struct dm_list *remove_pvs,
struct dm_list *allocate_pvs)
{
uint32_t s, sd, match_count = 0;
struct dm_list old_meta_lvs, old_data_lvs;
struct dm_list new_meta_lvs, new_data_lvs;
struct lv_segment *raid_seg = first_seg(lv);
struct lv_list *lvl;
char *tmp_names[raid_seg->area_count * 2];
dm_list_init(&old_meta_lvs);
dm_list_init(&old_data_lvs);
dm_list_init(&new_meta_lvs);
dm_list_init(&new_data_lvs);
/*
* How many sub-LVs are being removed?
*/
for (s = 0; s < raid_seg->area_count; s++) {
if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) ||
(seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
log_error("Unable to replace RAID images while the "
"array has unassigned areas");
return 0;
}
if (_lv_is_on_pvs(seg_lv(raid_seg, s), remove_pvs) ||
_lv_is_on_pvs(seg_metalv(raid_seg, s), remove_pvs))
match_count++;
}
if (!match_count) {
log_verbose("%s/%s does not contain devices specified"
" for replacement", lv->vg->name, lv->name);
return 1;
} else if (match_count == raid_seg->area_count) {
log_error("Unable to remove all PVs from %s/%s at once.",
lv->vg->name, lv->name);
return 0;
} else if (raid_seg->segtype->parity_devs &&
(match_count > raid_seg->segtype->parity_devs)) {
log_error("Unable to replace more than %u PVs from (%s) %s/%s",
raid_seg->segtype->parity_devs,
raid_seg->segtype->name, lv->vg->name, lv->name);
return 0;
}
/*
* Allocate the new image components first
* - This makes it easy to avoid all currently used devs
* - We can immediately tell if there is enough space
*
* - We need to change the LV names when we insert them.
*/
if (!_alloc_image_components(lv, allocate_pvs, match_count,
&new_meta_lvs, &new_data_lvs)) {
log_error("Failed to allocate replacement images for %s/%s",
lv->vg->name, lv->name);
return 0;
}
/*
* Remove the old images
* - If we did this before the allocate, we wouldn't have to rename
* the allocated images, but it'd be much harder to avoid the right
* PVs during allocation.
*/
if (!_raid_extract_images(lv, raid_seg->area_count - match_count,
remove_pvs, 0,
&old_meta_lvs, &old_data_lvs)) {
log_error("Failed to remove the specified images from %s/%s",
lv->vg->name, lv->name);
return 0;
}
/*
* Skip metadata operation normally done to clear the metadata sub-LVs.
*
* The LV_REBUILD flag is set on the new sub-LVs,
* so they will be rebuilt and we don't need to clear the metadata dev.
*/
for (s = 0; s < raid_seg->area_count; s++) {
tmp_names[s] = NULL;
sd = s + raid_seg->area_count;
tmp_names[sd] = NULL;
if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) &&
(seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
/* Adjust the new metadata LV name */
lvl = dm_list_item(dm_list_first(&new_meta_lvs),
struct lv_list);
dm_list_del(&lvl->list);
tmp_names[s] = dm_pool_alloc(lv->vg->vgmem,
strlen(lvl->lv->name) + 1);
if (!tmp_names[s])
return_0;
if (dm_snprintf(tmp_names[s], strlen(lvl->lv->name) + 1,
"%s_rmeta_%u", lv->name, s) < 0)
return_0;
if (!set_lv_segment_area_lv(raid_seg, s, lvl->lv, 0,
lvl->lv->status)) {
log_error("Failed to add %s to %s",
lvl->lv->name, lv->name);
return 0;
}
lv_set_hidden(lvl->lv);
/* Adjust the new data LV name */
lvl = dm_list_item(dm_list_first(&new_data_lvs),
struct lv_list);
dm_list_del(&lvl->list);
tmp_names[sd] = dm_pool_alloc(lv->vg->vgmem,
strlen(lvl->lv->name) + 1);
if (!tmp_names[sd])
return_0;
if (dm_snprintf(tmp_names[sd], strlen(lvl->lv->name) + 1,
"%s_rimage_%u", lv->name, s) < 0)
return_0;
if (!set_lv_segment_area_lv(raid_seg, s, lvl->lv, 0,
lvl->lv->status)) {
log_error("Failed to add %s to %s",
lvl->lv->name, lv->name);
return 0;
}
lv_set_hidden(lvl->lv);
}
}
if (!vg_write(lv->vg)) {
log_error("Failed to write changes to %s in %s",
lv->name, lv->vg->name);
return 0;
}
if (!suspend_lv(lv->vg->cmd, lv)) {
log_error("Failed to suspend %s/%s before committing changes",
lv->vg->name, lv->name);
return 0;
}
if (!vg_commit(lv->vg)) {
log_error("Failed to commit changes to %s in %s",
lv->name, lv->vg->name);
return 0;
}
if (!resume_lv(lv->vg->cmd, lv)) {
log_error("Failed to resume %s/%s after committing changes",
lv->vg->name, lv->name);
return 0;
}
dm_list_iterate_items(lvl, &old_meta_lvs) {
if (!deactivate_lv(lv->vg->cmd, lvl->lv))
return_0;
if (!lv_remove(lvl->lv))
return_0;
}
dm_list_iterate_items(lvl, &old_data_lvs) {
if (!deactivate_lv(lv->vg->cmd, lvl->lv))
return_0;
if (!lv_remove(lvl->lv))
return_0;
}
/* Update new sub-LVs to correct name and clear REBUILD flag */
for (s = 0; s < raid_seg->area_count; s++) {
sd = s + raid_seg->area_count;
if (tmp_names[s] && tmp_names[sd]) {
seg_metalv(raid_seg, s)->name = tmp_names[s];
seg_lv(raid_seg, s)->name = tmp_names[sd];
seg_metalv(raid_seg, s)->status &= ~LV_REBUILD;
seg_lv(raid_seg, s)->status &= ~LV_REBUILD;
}
}
if (!vg_write(lv->vg)) {
log_error("Failed to write changes to %s in %s",
lv->name, lv->vg->name);
return 0;
}
if (!suspend_lv(lv->vg->cmd, lv)) {
log_error("Failed to suspend %s/%s before committing changes",
lv->vg->name, lv->name);
return 0;
}
if (!vg_commit(lv->vg)) {
log_error("Failed to commit changes to %s in %s",
lv->name, lv->vg->name);
return 0;
}
if (!resume_lv(lv->vg->cmd, lv)) {
log_error("Failed to resume %s/%s after committing changes",
lv->vg->name, lv->name);
return 0;
}
return 1;
}

View File

@ -183,7 +183,7 @@ static int _raid_add_target_line(struct dev_manager *dm __attribute__((unused)),
}
for (s = 0; s < seg->area_count; s++)
if (seg_lv(seg, s)->status & LV_NOTSYNCED)
if (seg_lv(seg, s)->status & LV_REBUILD)
rebuilds |= 1 << s;
if (!dm_tree_node_add_raid_target(node, len, _raid_name(seg),

View File

@ -1653,10 +1653,10 @@ static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command,
_cmd_data_v4[dmt->type].name,
strerror(errno));
else
log_error("device-mapper: %s ioctl "
log_error("device-mapper: %s ioctl on %s "
"failed: %s",
_cmd_data_v4[dmt->type].name,
strerror(errno));
dmi->name, strerror(errno));
/*
* It's sometimes worth retrying after EBUSY in case

View File

@ -52,6 +52,14 @@ LogicalVolume[Path]...
[\-\-version]
LogicalVolume[Path] [PhysicalVolume[Path]...]
.br
.B lvconvert
\-\-replace PhysicalVolume
[\-h|\-?|\-\-help]
[\-v|\-\-verbose]
[\-\-version]
LogicalVolume[Path] [PhysicalVolume[Path]...]
.SH DESCRIPTION
lvconvert is used to change the segment type (i.e. linear, mirror, etc) or
characteristics of a logical volume. For example, it can add or remove the
@ -181,6 +189,14 @@ Use \-f if you do not want any replacement. Additionally, you may use
viz. activation/mirror_log_fault_policy or
activation/mirror_device_fault_policy.
.br
.TP
.I \-\-replace PhysicalVolume
Remove the specified device (PhysicalVolume) and replace it with one that is
available in the volume group or from the specific list provided. This option
is only available to RAID segment types (e.g. "raid1", "raid5", etc).
.br
.SH Examples
"lvconvert -m1 vg00/lvol1"
.br
@ -270,6 +286,14 @@ Merge an image that was detached temporarily from its mirror with
the '\-\-trackchanges' argument back into its original mirror and
bring its contents back up-to-date.
.br
"lvconvert --replace /dev/sdb1 vg00/my_raid1 /dev/sdf1"
.br
Replace the physical volume "/dev/sdb1" in the RAID1 logical volume "my_raid1"
with the specified physical volume "/dev/sdf1". Had the argument "/dev/sdf1"
been left out, lvconvert would attempt to find a suitable device from those
available in the volume group.
.SH SEE ALSO
.BR lvm (8),
.BR vgcreate (8),

View File

@ -55,6 +55,7 @@ arg(corelog_ARG, '\0', "corelog", NULL, 0)
arg(mirrorlog_ARG, '\0', "mirrorlog", string_arg, 0)
arg(splitmirrors_ARG, '\0', "splitmirrors", int_arg, 0)
arg(trackchanges_ARG, '\0', "trackchanges", NULL, 0)
arg(replace_ARG, '\0', "replace", string_arg, ARG_GROUPABLE)
arg(repair_ARG, '\0', "repair", NULL, 0)
arg(use_policies_ARG, '\0', "use-policies", NULL, 0)
arg(monitor_ARG, '\0', "monitor", yes_no_arg, 0)

View File

@ -100,6 +100,7 @@ xx(lvconvert,
"[-m|--mirrors Mirrors [{--mirrorlog {disk|core|mirrored}|--corelog}]]\n"
"\t[--type SegmentType]\n"
"\t[--repair [--use-policies]]\n"
"\t[--replace PhysicalVolume]\n"
"\t[-R|--regionsize MirrorLogRegionSize]\n"
"\t[--alloc AllocationPolicy]\n"
"\t[-b|--background]\n"
@ -141,8 +142,8 @@ xx(lvconvert,
alloc_ARG, background_ARG, chunksize_ARG, corelog_ARG, interval_ARG,
merge_ARG, mirrorlog_ARG, mirrors_ARG, name_ARG, noudevsync_ARG,
regionsize_ARG, repair_ARG, snapshot_ARG, splitmirrors_ARG, trackchanges_ARG,
type_ARG, stripes_long_ARG, stripesize_ARG, test_ARG,
regionsize_ARG, repair_ARG, replace_ARG, snapshot_ARG, splitmirrors_ARG,
trackchanges_ARG, type_ARG, stripes_long_ARG, stripesize_ARG, test_ARG,
use_policies_ARG, yes_ARG, force_ARG, zero_ARG)
xx(lvcreate,

View File

@ -48,6 +48,10 @@ struct lvconvert_params {
char **pvs;
struct dm_list *pvh;
int replace_pv_count;
char **replace_pvs;
struct dm_list *replace_pvh;
struct logical_volume *lv_to_poll;
};
@ -122,6 +126,9 @@ static int _lvconvert_name_params(struct lvconvert_params *lp,
static int _read_params(struct lvconvert_params *lp, struct cmd_context *cmd,
int argc, char **argv)
{
int i;
const char *tmp_str;
struct arg_value_group_list *group;
int region_size;
int pagesize = lvm_getpagesize();
@ -243,7 +250,27 @@ static int _read_params(struct lvconvert_params *lp, struct cmd_context *cmd,
SEG_CANNOT_BE_ZEROED) ?
"n" : "y"), "n");
} else { /* Mirrors */
} else if (arg_count(cmd, replace_ARG)) { /* RAID device replacement */
lp->replace_pv_count = arg_count(cmd, replace_ARG);
lp->replace_pvs = dm_pool_alloc(cmd->mem, sizeof(char *) * lp->replace_pv_count);
if (!lp->replace_pvs)
return_0;
i = 0;
dm_list_iterate_items(group, &cmd->arg_value_groups) {
if (!grouped_arg_is_set(group->arg_values, replace_ARG))
continue;
if (!(tmp_str = grouped_arg_str_value(group->arg_values,
replace_ARG,
NULL))) {
log_error("Failed to get '--replace' argument");
return 0;
}
if (!(lp->replace_pvs[i++] = dm_pool_strdup(cmd->mem,
tmp_str)))
return_0;
}
} else { /* Mirrors (and some RAID functions) */
if (arg_count(cmd, chunksize_ARG)) {
log_error("--chunksize is only available with "
"snapshots");
@ -309,7 +336,7 @@ static int _read_params(struct lvconvert_params *lp, struct cmd_context *cmd,
return_0;
}
if (activation() && lp->segtype->ops->target_present &&
if (activation() && lp->segtype && lp->segtype->ops->target_present &&
!lp->segtype->ops->target_present(cmd, NULL, NULL)) {
log_error("%s: Required device-mapper target(s) not "
"detected in your kernel", lp->segtype->name);
@ -1455,6 +1482,9 @@ static int lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *lp
if (arg_count(cmd, type_ARG))
return lv_raid_reshape(lv, lp->segtype);
if (arg_count(cmd, replace_ARG))
return lv_raid_replace(lv, lp->replace_pvh, lp->pvh);
log_error("Conversion operation not yet supported.");
return 0;
}
@ -1646,6 +1676,9 @@ static int _lvconvert_single(struct cmd_context *cmd, struct logical_volume *lv,
return ECMD_FAILED;
}
if (!lp->segtype)
lp->segtype = first_seg(lv)->segtype;
if (lp->merge) {
if (!lv_is_cow(lv)) {
log_error("Logical volume \"%s\" is not a snapshot",
@ -1785,6 +1818,12 @@ static int lvconvert_single(struct cmd_context *cmd, struct lvconvert_params *lp
} else
lp->pvh = &lv->vg->pvs;
if (lp->replace_pv_count &&
!(lp->replace_pvh = create_pv_list(cmd->mem, lv->vg,
lp->replace_pv_count,
lp->replace_pvs, 0)))
goto_bad;
lp->lv_to_poll = lv;
ret = _lvconvert_single(cmd, lv, lp);
bad: