diff --git a/lib/activate/activate.c b/lib/activate/activate.c index ea567359b..a3978ada9 100644 --- a/lib/activate/activate.c +++ b/lib/activate/activate.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2016 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -272,10 +272,18 @@ int lv_raid_percent(const struct logical_volume *lv, dm_percent_t *percent) { return 0; } +int lv_raid_data_offset(const struct logical_volume *lv, uint64_t *data_offset) +{ + return 0; +} int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health) { return 0; } +int lv_raid_dev_count(const struct logical_volume *lv, uint32_t *dev_cnt) +{ + return 0; +} int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt) { return 0; @@ -984,6 +992,30 @@ int lv_raid_percent(const struct logical_volume *lv, dm_percent_t *percent) return lv_mirror_percent(lv->vg->cmd, lv, 0, percent, NULL); } +int lv_raid_data_offset(const struct logical_volume *lv, uint64_t *data_offset) +{ + int r; + struct dev_manager *dm; + struct dm_status_raid *status; + + if (!lv_info(lv->vg->cmd, lv, 0, NULL, 0, 0)) + return 0; + + log_debug_activation("Checking raid data offset and dev sectors for LV %s/%s", + lv->vg->name, lv->name); + if (!(dm = dev_manager_create(lv->vg->cmd, lv->vg->name, 1))) + return_0; + + if (!(r = dev_manager_raid_status(dm, lv, &status))) + stack; + + *data_offset = status->data_offset; + + dev_manager_destroy(dm); + + return r; +} + int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health) { int r; @@ -1013,6 +1045,32 @@ int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health) return r; } +int lv_raid_dev_count(const struct logical_volume *lv, uint32_t *dev_cnt) +{ + struct dev_manager *dm; + struct dm_status_raid *status; + + *dev_cnt = 0; + + if (!lv_info(lv->vg->cmd, lv, 0, NULL, 0, 0)) + return 0; + + log_debug_activation("Checking raid device count for LV %s/%s", + lv->vg->name, lv->name); + if (!(dm = dev_manager_create(lv->vg->cmd, lv->vg->name, 1))) + return_0; + + if (!dev_manager_raid_status(dm, lv, &status)) { + dev_manager_destroy(dm); + return_0; + } + *dev_cnt = status->dev_count; + + dev_manager_destroy(dm); + + return 1; +} + int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt) { struct dev_manager *dm; diff --git a/lib/activate/activate.h b/lib/activate/activate.h index 85c152171..09d25c5b3 100644 --- a/lib/activate/activate.h +++ b/lib/activate/activate.h @@ -168,6 +168,8 @@ int lv_snapshot_percent(const struct logical_volume *lv, dm_percent_t *percent); int lv_mirror_percent(struct cmd_context *cmd, const struct logical_volume *lv, int wait, dm_percent_t *percent, uint32_t *event_nr); int lv_raid_percent(const struct logical_volume *lv, dm_percent_t *percent); +int lv_raid_dev_count(const struct logical_volume *lv, uint32_t *dev_cnt); +int lv_raid_data_offset(const struct logical_volume *lv, uint64_t *data_offset); int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health); int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt); int lv_raid_sync_action(const struct logical_volume *lv, char **sync_action); diff --git a/lib/activate/dev_manager.c b/lib/activate/dev_manager.c index cde026e16..49d9ad31b 100644 --- a/lib/activate/dev_manager.c +++ b/lib/activate/dev_manager.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2016 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -214,6 +214,14 @@ typedef enum { STATUS, /* DM_DEVICE_STATUS ioctl */ } info_type_t; +/* Return length of segment depending on type and reshape_len */ +static uint32_t _seg_len(const struct lv_segment *seg) +{ + uint32_t reshape_len = seg_is_raid(seg) ? ((seg->area_count - seg->segtype->parity_devs) * seg->reshape_len) : 0; + + return seg->len - reshape_len; +} + static int _info_run(const char *dlid, struct dm_info *dminfo, uint32_t *read_ahead, struct lv_seg_status *seg_status, @@ -250,7 +258,7 @@ static int _info_run(const char *dlid, struct dm_info *dminfo, if (seg_status && dminfo->exists) { start = length = seg_status->seg->lv->vg->extent_size; start *= seg_status->seg->le; - length *= seg_status->seg->len; + length *= _seg_len(seg_status->seg); do { target = dm_get_next_target(dmt, target, &target_start, @@ -2214,7 +2222,7 @@ static char *_add_error_or_zero_device(struct dev_manager *dm, struct dm_tree *d struct lv_segment *seg_i; struct dm_info info; int segno = -1, i = 0; - uint64_t size = (uint64_t) seg->len * seg->lv->vg->extent_size; + uint64_t size = (uint64_t) _seg_len(seg) * seg->lv->vg->extent_size; dm_list_iterate_items(seg_i, &seg->lv->segments) { if (seg == seg_i) { @@ -2500,7 +2508,7 @@ static int _add_target_to_dtree(struct dev_manager *dm, return seg->segtype->ops->add_target_line(dm, dm->mem, dm->cmd, &dm->target_state, seg, laopts, dnode, - extent_size * seg->len, + extent_size * _seg_len(seg), &dm->pvmove_mirror_count); } @@ -2693,7 +2701,7 @@ static int _add_segment_to_dtree(struct dev_manager *dm, /* Replace target and all its used devs with error mapping */ log_debug_activation("Using error for pending delete %s.", display_lvname(seg->lv)); - if (!dm_tree_node_add_error_target(dnode, (uint64_t)seg->lv->vg->extent_size * seg->len)) + if (!dm_tree_node_add_error_target(dnode, (uint64_t)seg->lv->vg->extent_size * _seg_len(seg))) return_0; } else if (!_add_target_to_dtree(dm, dnode, seg, laopts)) return_0; @@ -3165,7 +3173,6 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv, log_error(INTERNAL_ERROR "_tree_action: Action %u not supported.", action); goto out; } - r = 1; out: diff --git a/lib/config/defaults.h b/lib/config/defaults.h index 26bbc69dc..362cb851d 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -71,7 +71,7 @@ * FIXME: Increase these to 64 and further to the MD maximum * once the SubLVs split and name shift got enhanced */ -#define DEFAULT_RAID1_MAX_IMAGES 10 +#define DEFAULT_RAID1_MAX_IMAGES 64 #define DEFAULT_RAID_MAX_IMAGES 64 #define DEFAULT_ALLOCATION_STRIPE_ALL_DEVICES 0 /* Don't stripe across all devices if not -i/--stripes given */ diff --git a/lib/format1/import-extents.c b/lib/format1/import-extents.c index 3ab3ac443..4c259c126 100644 --- a/lib/format1/import-extents.c +++ b/lib/format1/import-extents.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -225,8 +225,8 @@ static int _read_linear(struct cmd_context *cmd, struct lv_map *lvm) while (le < lvm->lv->le_count) { len = _area_length(lvm, le); - if (!(seg = alloc_lv_segment(segtype, lvm->lv, le, len, 0, 0, - NULL, 1, len, 0, 0, 0, NULL))) { + if (!(seg = alloc_lv_segment(segtype, lvm->lv, le, len, 0, 0, 0, + NULL, 1, len, 0, 0, 0, 0, NULL))) { log_error("Failed to allocate linear segment."); return 0; } @@ -297,10 +297,10 @@ static int _read_stripes(struct cmd_context *cmd, struct lv_map *lvm) if (!(seg = alloc_lv_segment(segtype, lvm->lv, lvm->stripes * first_area_le, - lvm->stripes * area_len, + lvm->stripes * area_len, 0, 0, lvm->stripe_size, NULL, lvm->stripes, - area_len, 0, 0, 0, NULL))) { + area_len, 0, 0, 0, 0, NULL))) { log_error("Failed to allocate striped segment."); return 0; } diff --git a/lib/format_pool/import_export.c b/lib/format_pool/import_export.c index 2f0f2ebda..f4097a7ae 100644 --- a/lib/format_pool/import_export.c +++ b/lib/format_pool/import_export.c @@ -1,6 +1,6 @@ /* * Copyright (C) 1997-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -192,9 +192,9 @@ static int _add_stripe_seg(struct dm_pool *mem, return_0; if (!(seg = alloc_lv_segment(segtype, lv, *le_cur, - area_len * usp->num_devs, 0, + area_len * usp->num_devs, 0, 0, usp->striping, NULL, usp->num_devs, - area_len, 0, 0, 0, NULL))) { + area_len, 0, 0, 0, 0, NULL))) { log_error("Unable to allocate striped lv_segment structure"); return 0; } @@ -232,8 +232,8 @@ static int _add_linear_seg(struct dm_pool *mem, area_len = (usp->devs[j].blocks) / POOL_PE_SIZE; if (!(seg = alloc_lv_segment(segtype, lv, *le_cur, - area_len, 0, usp->striping, - NULL, 1, area_len, + area_len, 0, 0, usp->striping, + NULL, 1, area_len, 0, POOL_PE_SIZE, 0, 0, NULL))) { log_error("Unable to allocate linear lv_segment " "structure"); diff --git a/lib/format_text/export.c b/lib/format_text/export.c index 199c185ab..899ff45cb 100644 --- a/lib/format_text/export.c +++ b/lib/format_text/export.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -583,8 +583,10 @@ static int _print_segment(struct formatter *f, struct volume_group *vg, outf(f, "start_extent = %u", seg->le); outsize(f, (uint64_t) seg->len * vg->extent_size, "extent_count = %u", seg->len); - outnl(f); + if (seg->reshape_len) + outsize(f, (uint64_t) seg->reshape_len * vg->extent_size, + "reshape_count = %u", seg->reshape_len); outf(f, "type = \"%s\"", seg->segtype->name); if (!_out_list(f, &seg->tags, "tags")) diff --git a/lib/format_text/flags.c b/lib/format_text/flags.c index 75b905473..716e63201 100644 --- a/lib/format_text/flags.c +++ b/lib/format_text/flags.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2013 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -61,6 +61,9 @@ static const struct flag _lv_flags[] = { {LOCKED, "LOCKED", STATUS_FLAG}, {LV_NOTSYNCED, "NOTSYNCED", STATUS_FLAG}, {LV_REBUILD, "REBUILD", STATUS_FLAG}, + {LV_RESHAPE_DELTA_DISKS_PLUS, "RESHAPE_DELTA_DISKS_PLUS", STATUS_FLAG}, + {LV_RESHAPE_DELTA_DISKS_MINUS, "RESHAPE_DELTA_DISKS_MINUS", STATUS_FLAG}, + {LV_REMOVE_AFTER_RESHAPE, "REMOVE_AFTER_RESHAPE", STATUS_FLAG}, {LV_WRITEMOSTLY, "WRITEMOSTLY", STATUS_FLAG}, {LV_ACTIVATION_SKIP, "ACTIVATION_SKIP", COMPATIBLE_FLAG}, {LV_ERROR_WHEN_FULL, "ERROR_WHEN_FULL", COMPATIBLE_FLAG}, diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index 00caf210f..e54500812 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -354,7 +354,7 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node struct lv_segment *seg; const struct dm_config_node *sn_child = sn->child; const struct dm_config_value *cv; - uint32_t start_extent, extent_count; + uint32_t area_extents, start_extent, extent_count, reshape_count, data_copies; struct segment_type *segtype; const char *segtype_str; @@ -375,6 +375,12 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node return 0; } + if (!_read_int32(sn_child, "reshape_count", &reshape_count)) + reshape_count = 0; + + if (!_read_int32(sn_child, "data_copies", &data_copies)) + data_copies = 1; + segtype_str = SEG_TYPE_NAME_STRIPED; if (!dm_config_get_str(sn_child, "type", &segtype_str)) { @@ -389,9 +395,11 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node !segtype->ops->text_import_area_count(sn_child, &area_count)) return_0; + area_extents = segtype->parity_devs ? + raid_rimage_extents(segtype, extent_count, area_count - segtype->parity_devs, data_copies) : extent_count; if (!(seg = alloc_lv_segment(segtype, lv, start_extent, - extent_count, 0, 0, NULL, area_count, - extent_count, 0, 0, 0, NULL))) { + extent_count, reshape_count, 0, 0, NULL, area_count, + area_extents, data_copies, 0, 0, 0, NULL))) { log_error("Segment allocation failed"); return 0; } diff --git a/lib/metadata/lv.c b/lib/metadata/lv.c index 85879025a..bde082e58 100644 --- a/lib/metadata/lv.c +++ b/lib/metadata/lv.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2016 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -1278,6 +1278,8 @@ char *lv_attr_dup_with_info_and_seg_status(struct dm_pool *mem, const struct lv_ repstr[8] = 'm'; /* RAID has 'm'ismatches */ } else if (lv->status & LV_WRITEMOSTLY) repstr[8] = 'w'; /* sub-LV has 'w'ritemostly */ + else if (lv->status & LV_REMOVE_AFTER_RESHAPE) + repstr[8] = 'R'; /* sub-LV got 'R'emoved from raid set by reshaping */ } else if (lvdm->seg_status.type == SEG_STATUS_CACHE) { if (lvdm->seg_status.cache->fail) repstr[8] = 'F'; diff --git a/lib/metadata/lv_alloc.h b/lib/metadata/lv_alloc.h index f7bc71360..cf2c579c6 100644 --- a/lib/metadata/lv_alloc.h +++ b/lib/metadata/lv_alloc.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -21,11 +21,13 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype, struct logical_volume *lv, uint32_t le, uint32_t len, + uint32_t reshape_len, uint64_t status, uint32_t stripe_size, struct logical_volume *log_lv, uint32_t area_count, uint32_t area_len, + uint32_t data_copies, uint32_t chunk_size, uint32_t region_size, uint32_t extents_copied, diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c index d8f869d7b..64232a861 100644 --- a/lib/metadata/lv_manip.c +++ b/lib/metadata/lv_manip.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -912,11 +912,13 @@ static uint32_t _round_to_stripe_boundary(struct volume_group *vg, uint32_t exte struct lv_segment *alloc_lv_segment(const struct segment_type *segtype, struct logical_volume *lv, uint32_t le, uint32_t len, + uint32_t reshape_len, uint64_t status, uint32_t stripe_size, struct logical_volume *log_lv, uint32_t area_count, uint32_t area_len, + uint32_t data_copies, uint32_t chunk_size, uint32_t region_size, uint32_t extents_copied, @@ -950,10 +952,12 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype, seg->lv = lv; seg->le = le; seg->len = len; + seg->reshape_len = reshape_len; seg->status = status; seg->stripe_size = stripe_size; seg->area_count = area_count; seg->area_len = area_len; + seg->data_copies = data_copies ? : lv_raid_data_copies(segtype, area_count); seg->chunk_size = chunk_size; seg->region_size = region_size; seg->extents_copied = extents_copied; @@ -1047,11 +1051,10 @@ static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t if (lv_is_raid_image(lv)) { /* Calculate the amount of extents to reduce per rmate/rimage LV */ uint32_t rimage_extents; + struct lv_segment *seg1 = first_seg(lv); - /* FIXME: avoid extra seg_is_*() conditonals */ - area_reduction =_round_to_stripe_boundary(lv->vg, area_reduction, - (seg_is_raid1(seg) || seg_is_any_raid0(seg)) ? 0 : _raid_stripes_count(seg), 0); - rimage_extents = raid_rimage_extents(seg->segtype, area_reduction, seg_is_any_raid0(seg) ? 0 : _raid_stripes_count(seg), + /* FIXME: avoid extra seg_is_*() conditionals here */ + rimage_extents = raid_rimage_extents(seg1->segtype, area_reduction, seg_is_any_raid0(seg) ? 0 : _raid_stripes_count(seg), seg_is_raid10(seg) ? 1 :_raid_data_copies(seg)); if (!rimage_extents) return 0; @@ -1258,7 +1261,7 @@ static uint32_t _calc_area_multiple(const struct segment_type *segtype, * the 'stripes' argument will always need to * be given. */ - if (!strcmp(segtype->name, _lv_type_names[LV_TYPE_RAID10])) { + if (segtype_is_raid10(segtype)) { if (!stripes) return area_count / 2; return stripes; @@ -1278,25 +1281,35 @@ static uint32_t _calc_area_multiple(const struct segment_type *segtype, static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction) { uint32_t area_reduction, s; + uint32_t areas = (seg->area_count / (seg_is_raid10(seg) ? seg->data_copies : 1)) - seg->segtype->parity_devs; /* Caller must ensure exact divisibility */ - if (seg_is_striped(seg)) { - if (reduction % seg->area_count) { + // if (!seg_is_raid10(seg) && (seg_is_striped(seg) || seg_is_striped_raid(seg))) { + if (seg_is_striped(seg) || seg_is_striped_raid(seg)) { + if (reduction % areas) { log_error("Segment extent reduction %" PRIu32 " not divisible by #stripes %" PRIu32, reduction, seg->area_count); return 0; } - area_reduction = (reduction / seg->area_count); + area_reduction = reduction / areas; } else area_reduction = reduction; +//printf("%s[%u] seg->lv=%s seg->len=%u seg->area_len=%u area_reduction=%u\n", __func__, __LINE__, seg->lv ? seg->lv->name : "?", seg->len, seg->area_len, area_reduction); for (s = 0; s < seg->area_count; s++) if (!release_and_discard_lv_segment_area(seg, s, area_reduction)) return_0; +//printf("%s[%u] seg->lv=%s seg->len=%u seg->area_len=%u area_reduction=%u\n", __func__, __LINE__, seg->lv ? seg->lv->name : "?", seg->len, seg->area_len, area_reduction); seg->len -= reduction; - seg->area_len -= area_reduction; +//pprintf("%s[%u] seg->lv=%s seg->len=%u seg->area_len=%u area_reduction=%u\n", __func__, __LINE__, seg->lv ? seg->lv->name : "?", seg->len, seg->area_len, area_reduction); + + if (seg_is_raid(seg)) + seg->area_len = seg->len; + else + seg->area_len -= area_reduction; +//printf("%s[%u] seg->lv=%s seg->len=%u seg->area_len=%u area_reduction=%u\n", __func__, __LINE__, seg->lv ? seg->lv->name : "?", seg->len, seg->area_len, area_reduction); return 1; } @@ -1306,11 +1319,13 @@ static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction) */ static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete) { - struct lv_segment *seg; + struct lv_segment *seg = first_seg(lv);; uint32_t count = extents; uint32_t reduction; struct logical_volume *pool_lv; struct logical_volume *external_lv = NULL; + int is_raid10 = seg_is_any_raid10(seg) && seg->reshape_len; + uint32_t data_copies = seg->data_copies; if (lv_is_merging_origin(lv)) { log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.", @@ -1318,6 +1333,7 @@ static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete) clear_snapshot_merge(lv); } +//printf("%s[%u] lv=%s is_raid10=%d le_count=%u extents=%u lv->size=%s seg->len=%u seg->area_len=%u seg->reshape_len=%u\n", __func__, __LINE__, lv->name, is_raid10, lv->le_count, extents, display_size(lv->vg->cmd, lv->size), seg ? seg->len : 4711, seg ? seg->area_len : 4711, seg->reshape_len); dm_list_iterate_back_items(seg, &lv->segments) { if (!count) break; @@ -1373,11 +1389,21 @@ static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete) count -= reduction; } - lv->le_count -= extents; + seg = first_seg(lv); +//printf("%s[%u] lv=%s le_count=%u extents=%u lv->size=%s seg->len=%u seg->area_len=%u\n", __func__, __LINE__, lv->name, lv->le_count, extents, display_size(lv->vg->cmd, lv->size), seg ? seg->len : 4711, seg ? seg->area_len : 4711); + if (is_raid10) { + lv->le_count -= extents * data_copies; + if (seg) + seg->len = seg->area_len = lv->le_count; + } else + lv->le_count -= extents; + lv->size = (uint64_t) lv->le_count * lv->vg->extent_size; +//printf("%s[%u] lv=%s le_count=%u lv->size=%s seg->len=%u seg->area_len=%u\n", __func__, __LINE__, lv->name, lv->le_count, display_size(lv->vg->cmd, lv->size), seg ? seg->len : 4711, seg ? seg->area_len : 4711); if (!delete) return 1; +//printf("%s[%u] lv=%s le_count=%u lv->size=%s seg->len=%u seg->area_len=%u\n", __func__, __LINE__, lv->name, lv->le_count, display_size(lv->vg->cmd, lv->size), seg ? seg->len : 4711, seg ? seg->area_len : 4711); if (lv == lv->vg->pool_metadata_spare_lv) { lv->status &= ~POOL_METADATA_SPARE; @@ -1793,10 +1819,10 @@ static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status, area_multiple = _calc_area_multiple(segtype, area_count, 0); extents = aa[0].len * area_multiple; - if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, + if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0, status, stripe_size, NULL, area_count, - aa[0].len, 0u, region_size, 0u, NULL))) { + aa[0].len, 0, 0u, region_size, 0u, NULL))) { log_error("Couldn't allocate new LV segment."); return 0; } @@ -1808,7 +1834,7 @@ static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status, dm_list_add(&lv->segments, &seg->list); extents = aa[0].len * area_multiple; - +//printf("%s[%u] le_count=%u extents=%u\n", __func__, __LINE__, lv->le_count, extents); if (!_setup_lv_size(lv, lv->le_count + extents)) return_0; @@ -3234,9 +3260,9 @@ int lv_add_virtual_segment(struct logical_volume *lv, uint64_t status, seg->area_len += extents; seg->len += extents; } else { - if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, + if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0, status, 0, NULL, 0, - extents, 0, 0, 0, NULL))) { + extents, 0, 0, 0, 0, NULL))) { log_error("Couldn't allocate new %s segment.", segtype->name); return 0; } @@ -3562,10 +3588,10 @@ static struct lv_segment *_convert_seg_to_mirror(struct lv_segment *seg, } if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, SEG_TYPE_NAME_MIRROR), - seg->lv, seg->le, seg->len, + seg->lv, seg->le, seg->len, 0, seg->status, seg->stripe_size, log_lv, - seg->area_count, seg->area_len, + seg->area_count, seg->area_len, 0, seg->chunk_size, region_size, seg->extents_copied, NULL))) { log_error("Couldn't allocate converted LV segment."); @@ -3667,8 +3693,8 @@ int lv_add_segmented_mirror_image(struct alloc_handle *ah, } if (!(new_seg = alloc_lv_segment(segtype, copy_lv, - seg->le, seg->len, PVMOVE, 0, - NULL, 1, seg->len, + seg->le, seg->len, 0, PVMOVE, 0, + NULL, 1, seg->len, 0, 0, 0, 0, NULL))) return_0; @@ -3863,9 +3889,9 @@ static int _lv_insert_empty_sublvs(struct logical_volume *lv, /* * First, create our top-level segment for our top-level LV */ - if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, lv->status, + if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, 0, lv->status, stripe_size, NULL, - devices, 0, 0, region_size, 0, NULL))) { + devices, 0, 0, 0, region_size, 0, NULL))) { log_error("Failed to create mapping segment for %s.", display_lvname(lv)); return 0; @@ -4063,9 +4089,13 @@ static int _lv_extend_layered_lv(struct alloc_handle *ah, lv_set_hidden(seg_metalv(seg, s)); } - seg->area_len += extents / area_multiple; seg->len += extents; + if (seg_is_raid(seg)) + seg->area_len = seg->len; + else + seg->area_len += extents / area_multiple; +//pprintf("%s[%u] le_count=%u extents=%u seg->len=%u seg-area_len=%u\n", __func__, __LINE__, lv->le_count, extents, seg->len, seg->area_len); if (!_setup_lv_size(lv, lv->le_count + extents)) return_0; @@ -6300,7 +6330,6 @@ static int _lv_update_and_reload(struct logical_volume *lv, int origin_only) log_very_verbose("Updating logical volume %s on disk(s)%s.", display_lvname(lock_lv), origin_only ? " (origin only)": ""); - if (!vg_write(vg)) return_0; @@ -6767,8 +6796,8 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd, return_NULL; /* allocate a new linear segment */ - if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count, - status, 0, NULL, 1, layer_lv->le_count, + if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count, 0, + status, 0, NULL, 1, layer_lv->le_count, 0, 0, 0, 0, NULL))) return_NULL; @@ -6824,8 +6853,8 @@ static int _extend_layer_lv_for_segment(struct logical_volume *layer_lv, /* allocate a new segment */ if (!(mapseg = alloc_lv_segment(segtype, layer_lv, layer_lv->le_count, - seg->area_len, status, 0, - NULL, 1, seg->area_len, 0, 0, 0, seg))) + seg->area_len, 0, status, 0, + NULL, 1, seg->area_len, 0, 0, 0, 0, seg))) return_0; /* map the new segment to the original underlying are */ diff --git a/lib/metadata/merge.c b/lib/metadata/merge.c index 63118182f..924d492cb 100644 --- a/lib/metadata/merge.c +++ b/lib/metadata/merge.c @@ -236,7 +236,7 @@ static void _check_raid_seg(struct lv_segment *seg, int *error_count) if (!seg->areas) raid_seg_error("zero areas"); - if (seg->extents_copied > seg->area_len) + if (seg->extents_copied > seg->len) raid_seg_error_val("extents_copied too large", seg->extents_copied); /* Default < 10, change once raid1 split shift and rename SubLVs works! */ @@ -475,7 +475,7 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg) struct lv_segment *seg, *seg2; uint32_t le = 0; unsigned seg_count = 0, seg_found, external_lv_found = 0; - uint32_t area_multiplier, s; + uint32_t data_rimage_count, s; struct seg_list *sl; struct glv_list *glvl; int error_count = 0; @@ -498,13 +498,14 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg) inc_error_count; } - area_multiplier = segtype_is_striped(seg->segtype) ? - seg->area_count : 1; - if (seg->area_len * area_multiplier != seg->len) { - log_error("LV %s: segment %u has inconsistent " - "area_len %u", - lv->name, seg_count, seg->area_len); + data_rimage_count = seg->area_count - seg->segtype->parity_devs; + /* FIXME: raid varies seg->area_len? */ + if (seg->len != seg->area_len && + seg->len != seg->area_len * data_rimage_count) { + log_error("LV %s: segment %u with len=%u " + " has inconsistent area_len %u", + lv->name, seg_count, seg->len, seg->area_len); inc_error_count; } @@ -766,10 +767,10 @@ static int _lv_split_segment(struct logical_volume *lv, struct lv_segment *seg, /* Clone the existing segment */ if (!(split_seg = alloc_lv_segment(seg->segtype, - seg->lv, seg->le, seg->len, + seg->lv, seg->le, seg->len, seg->reshape_len, seg->status, seg->stripe_size, seg->log_lv, - seg->area_count, seg->area_len, + seg->area_count, seg->area_len, seg->data_copies, seg->chunk_size, seg->region_size, seg->extents_copied, seg->pvmove_source_seg))) { log_error("Couldn't allocate cloned LV segment."); diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 573c09d90..b6fe2e9ec 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2016 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -137,7 +137,11 @@ e.g. to prohibit allocation of a RAID image on a PV already holing an image of the RAID set */ #define LOCKD_SANLOCK_LV UINT64_C(0x0080000000000000) /* LV - Internal use only */ -/* Next unused flag: UINT64_C(0x0100000000000000) */ +#define LV_RESHAPE_DELTA_DISKS_PLUS UINT64_C(0x0100000000000000) /* LV reshape flag delta disks plus image(s) */ +#define LV_RESHAPE_DELTA_DISKS_MINUS UINT64_C(0x0200000000000000) /* LV reshape flag delta disks minus image(s) */ + +#define LV_REMOVE_AFTER_RESHAPE UINT64_C(0x0400000000000000) /* LV needs to be removed after a shrinking reshape */ +/* Next unused flag: UINT64_C(0x0800000000000000) */ /* Format features flags */ #define FMT_SEGMENTS 0x00000001U /* Arbitrary segment params? */ @@ -446,6 +450,7 @@ struct lv_segment { const struct segment_type *segtype; uint32_t le; uint32_t len; + uint32_t reshape_len; /* For RAID: user hidden additional out of place reshaping length off area_len and len */ uint64_t status; @@ -454,6 +459,7 @@ struct lv_segment { uint32_t writebehind; /* For RAID (RAID1 only) */ uint32_t min_recovery_rate; /* For RAID */ uint32_t max_recovery_rate; /* For RAID */ + uint32_t data_offset; /* For RAID: data offset in sectors on each data component image */ uint32_t area_count; uint32_t area_len; uint32_t chunk_size; /* For snapshots/thin_pool. In sectors. */ @@ -464,6 +470,7 @@ struct lv_segment { struct logical_volume *cow; struct dm_list origin_list; uint32_t region_size; /* For mirrors, replicators - in sectors */ + uint32_t data_copies; /* For RAID: number of data copies (e.g. 3 for RAID 6 */ uint32_t extents_copied;/* Number of extents synced for raids/mirrors */ struct logical_volume *log_lv; struct lv_segment *pvmove_source_seg; @@ -1205,7 +1212,8 @@ struct logical_volume *first_replicator_dev(const struct logical_volume *lv); int lv_is_raid_with_tracking(const struct logical_volume *lv); uint32_t lv_raid_image_count(const struct logical_volume *lv); int lv_raid_change_image_count(struct logical_volume *lv, - uint32_t new_count, struct dm_list *allocate_pvs); + uint32_t new_count, const uint32_t region_size, + struct dm_list *allocate_pvs); int lv_raid_split(struct logical_volume *lv, const char *split_name, uint32_t new_count, struct dm_list *splittable_pvs); int lv_raid_split_and_track(struct logical_volume *lv, @@ -1232,6 +1240,7 @@ uint32_t raid_rimage_extents(const struct segment_type *segtype, uint32_t raid_ensure_min_region_size(const struct logical_volume *lv, uint64_t raid_size, uint32_t region_size); int lv_raid_change_region_size(struct logical_volume *lv, int yes, int force, uint32_t new_region_size); +uint32_t lv_raid_data_copies(const struct segment_type *segtype, uint32_t area_count); /* -- metadata/raid_manip.c */ /* ++ metadata/cache_manip.c */ diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c index cdd7af8d6..89ac8ce9b 100644 --- a/lib/metadata/raid_manip.c +++ b/lib/metadata/raid_manip.c @@ -22,6 +22,11 @@ #include "lv_alloc.h" #include "lvm-string.h" +typedef int (*fn_on_lv_t)(struct logical_volume *lv, void *data); +static int _lv_update_reload_fns_reset_eliminate_lvs(struct logical_volume *lv, int origin_only, ...); + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(*a)) + static int _check_restriping(uint32_t new_stripes, struct logical_volume *lv) { if (new_stripes && new_stripes != first_seg(lv)->area_count) { @@ -49,6 +54,23 @@ static int _check_num_areas_in_lv_segments(struct logical_volume *lv, unsigned n return 1; } +/* + * Check if reshape is supported in the kernel. + */ +static int _reshape_is_supported(struct cmd_context *cmd, const struct segment_type *segtype) +{ + unsigned attrs; + + if (!segtype->ops->target_present || + !segtype->ops->target_present(cmd, NULL, &attrs) || + !(attrs & RAID_FEATURE_RESHAPE)) { + log_error("RAID module does not support reshape."); + return 0; + } + + return 1; +} + /* * Ensure region size exceeds the minimum for @lv because * MD's bitmap is limited to tracking 2^21 regions. @@ -119,7 +141,7 @@ char *top_level_lv_name(struct volume_group *vg, const char *lv_name) log_error("Failed to allocate string for new LV name."); return NULL; } - + if ((suffix = first_substring(new_lv_name, "_rimage_", "_rmeta_", "_mimage_", "_mlog_", NULL))) *suffix = '\0'; @@ -227,6 +249,59 @@ static int _deactivate_and_remove_lvs(struct volume_group *vg, struct dm_list *r return 1; } +/* + * HM Helper: + * + * report health string in @*raid_health for @lv from kernel reporting # of devs in @*kernel_devs + */ +static int _get_dev_health(struct logical_volume *lv, uint32_t *kernel_devs, + uint32_t *devs_health, uint32_t *devs_in_sync, + char **raid_health) +{ + unsigned d; + char *rh; + + *devs_health = *devs_in_sync = 0; + + if (!lv_raid_dev_count(lv, kernel_devs)) { + log_error("Failed to get device count"); + return_0; + } + + if (!lv_raid_dev_health(lv, &rh)) { + log_error("Failed to get device health"); + return_0; + } + + d = (unsigned) strlen(rh); + while (d--) { + (*devs_health)++; + if (rh[d] == 'A') + (*devs_in_sync)++; + } + + if (raid_health) + *raid_health = rh; + + return 1; +} + +__attribute__ ((__unused__)) +/* HM Helper: return number of devices in sync for (raid) @lv */ +static int _devs_in_sync_count(struct logical_volume *lv) +{ + uint32_t kernel_devs, devs_health, devs_in_sync; + struct lv_segment *seg = first_seg(lv); + + if (!seg_is_raid(seg)) + return seg->area_count; + + if (!_get_dev_health(lv, &kernel_devs, &devs_health, &devs_in_sync, NULL)) + return 0; + + return (int) devs_in_sync; +} + /* * _raid_in_sync * @lv @@ -237,7 +312,7 @@ static int _deactivate_and_remove_lvs(struct volume_group *vg, struct dm_list *r * * Returns: 1 if in-sync, 0 otherwise. */ -#define _RAID_IN_SYNC_RETRIES 6 +#define _RAID_IN_SYNC_RETRIES 10 static int _raid_in_sync(struct logical_volume *lv) { int retries = _RAID_IN_SYNC_RETRIES; @@ -372,7 +447,7 @@ static int _lv_update_and_reload_list(struct logical_volume *lv, int origin_only display_lvname(lock_lv)); vg_revert(vg); } else if (!(r = vg_commit(vg))) - stack; /* !vg_commit() has implict vg_revert() */ + stack; /* !vg_commit() has implicit vg_revert() */ if (r && lv_list) { dm_list_iterate_items(lvl, lv_list) { @@ -398,86 +473,88 @@ static int _lv_update_and_reload_list(struct logical_volume *lv, int origin_only return r; } -/* Makes on-disk metadata changes - * If LV is active: - * clear first block of device - * otherwise: - * activate, clear, deactivate +/* + * HM Helper + * + * clear first 4K of @lv + * + * We're holding an exclusive lock, so we can clear the + * first block of the (metadata) LV directly on the respective + * PV avoiding activation of the metadata lv altogether and + * hence superfluous latencies. * * Returns: 1 on success, 0 on failure + * + * HM FIXME: share with lv_manip.c! + */ +static int _clear_lv(struct logical_volume *lv) +{ + struct lv_segment *seg = first_seg(lv); + struct physical_volume *pv; + uint64_t offset; + + if (test_mode()) + return 1; + + if (seg->area_count != 1) + return_0; + if (seg_type(seg, 0) != AREA_PV) + return_0; + if (!(pv = seg_pv(seg, 0))) + return_0; + if (!pv->pe_start) + return_0; + + /* + * Rather than wiping lv->size, we can simply wipe the first 4KiB + * to remove the superblock of any previous RAID devices. It is much + * quicker than wiping a potentially larger metadata device completely. + */ + log_verbose("Clearing metadata area of %s", display_lvname(lv)); + offset = (pv->pe_start + seg_pe(seg, 0) * lv->vg->extent_size) << 9; + + return dev_set(pv->dev, offset, 4096, 0); +} + +/* + * HM Helper: + * + * After commiting metadata with the rmeta images on + * @lv_list in, wipe all LVs first 4 KiB on @lv_list + * + * Returns 1 on success or 0 on failure */ static int _clear_lvs(struct dm_list *lv_list) { struct lv_list *lvl; - struct volume_group *vg = NULL; - unsigned i = 0, sz = dm_list_size(lv_list); - char *was_active; - int r = 1; + struct volume_group *vg; - if (!sz) { + if (!lv_list) + return_0; + + if (!dm_list_size(lv_list)) { log_debug_metadata(INTERNAL_ERROR "Empty list of LVs given for clearing."); return 1; } - dm_list_iterate_items(lvl, lv_list) { - if (!lv_is_visible(lvl->lv)) { - log_error(INTERNAL_ERROR - "LVs must be set visible before clearing."); - return 0; - } - vg = lvl->lv->vg; - } - - if (test_mode()) + lvl = dm_list_item(dm_list_first(lv_list), struct lv_list); + if (!lvl->lv || + test_mode()) return 1; /* * FIXME: only vg_[write|commit] if LVs are not already written * as visible in the LVM metadata (which is never the case yet). */ + vg = lvl->lv->vg; if (!vg || !vg_write(vg) || !vg_commit(vg)) return_0; - was_active = alloca(sz); - dm_list_iterate_items(lvl, lv_list) - if (!(was_active[i++] = lv_is_active_locally(lvl->lv))) { - lvl->lv->status |= LV_TEMPORARY; - if (!activate_lv_excl_local(vg->cmd, lvl->lv)) { - log_error("Failed to activate localy %s for clearing.", - display_lvname(lvl->lv)); - r = 0; - goto out; - } - lvl->lv->status &= ~LV_TEMPORARY; - } + if (!_clear_lv(lvl->lv)) + return 0; - dm_list_iterate_items(lvl, lv_list) { - log_verbose("Clearing metadata area %s.", display_lvname(lvl->lv)); - /* - * Rather than wiping lv->size, we can simply - * wipe the first sector to remove the superblock of any previous - * RAID devices. It is much quicker. - */ - if (!wipe_lv(lvl->lv, (struct wipe_params) { .do_zero = 1, .zero_sectors = 1 })) { - log_error("Failed to zero %s.", display_lvname(lvl->lv)); - r = 0; - goto out; - } - } -out: - /* TODO: deactivation is only needed with clustered locking - * in normal case we should keep device active - */ - sz = 0; - dm_list_iterate_items(lvl, lv_list) - if ((i > sz) && !was_active[sz++] && - !deactivate_lv(vg->cmd, lvl->lv)) { - log_error("Failed to deactivate %s.", display_lvname(lvl->lv)); - r = 0; /* continue deactivating */ - } - - return r; + return 1; } /* raid0* <-> raid10_near area reorder helper: swap 2 LV segment areas @a1 and @a2 */ @@ -495,7 +572,7 @@ static void _swap_areas(struct lv_segment_area *a1, struct lv_segment_area *a2) * * raid10_{near,far} can only be reordered to raid0 if !mod(#total_devs, #mirrors) * - * Examples with 6 disks indexed 0..5 with 3 stripes: + * Examples with 6 disks indexed 0..5 with 3 stripes and 2 data copies: * raid0 (012345) -> raid10_{near,far} (031425) order * idx 024135 * raid10_{near,far} (012345) -> raid0 (024135/135024) order depending on mirror leg selection (TBD) @@ -514,7 +591,7 @@ enum raid0_raid10_conversion { reorder_to_raid10_near, reorder_from_raid10_near static int _reorder_raid10_near_seg_areas(struct lv_segment *seg, enum raid0_raid10_conversion conv) { unsigned dc, idx1, idx1_sav, idx2, s, ss, str, xchg; - uint32_t data_copies = 2; /* seg->data_copies */ + uint32_t data_copies = seg->data_copies; uint32_t *idx, stripes = seg->area_count; unsigned i = 0; @@ -646,12 +723,11 @@ static int _reorder_raid10_near_seg_areas(struct lv_segment *seg, enum raid0_rai * * Returns: 1 on success, 0 on failure */ +static char *_generate_raid_name(struct logical_volume *lv, + const char *suffix, int count); static int _shift_and_rename_image_components(struct lv_segment *seg) { - int len; - char *shift_name; uint32_t s, missing; - struct cmd_context *cmd = seg->lv->vg->cmd; /* * All LVs must be properly named for their index before @@ -662,15 +738,6 @@ static int _shift_and_rename_image_components(struct lv_segment *seg) if (!seg_is_raid(seg)) return_0; - if (seg->area_count > 10) { - /* - * FIXME: Handling more would mean I'd have - * to handle double digits - */ - log_error("Unable handle arrays with more than 10 devices."); - return 0; - } - log_very_verbose("Shifting images in %s.", display_lvname(seg->lv)); for (s = 0, missing = 0; s < seg->area_count; s++) { @@ -691,24 +758,16 @@ static int _shift_and_rename_image_components(struct lv_segment *seg) display_lvname(seg_lv(seg, s)), missing); /* Alter rmeta name */ - shift_name = dm_pool_strdup(cmd->mem, seg_metalv(seg, s)->name); - if (!shift_name) { + if (!(seg_metalv(seg, s)->name = _generate_raid_name(seg->lv, "rmeta", s - missing))) { log_error("Memory allocation failed."); return 0; } - len = strlen(shift_name) - 1; - shift_name[len] -= missing; - seg_metalv(seg, s)->name = shift_name; /* Alter rimage name */ - shift_name = dm_pool_strdup(cmd->mem, seg_lv(seg, s)->name); - if (!shift_name) { + if (!(seg_lv(seg, s)->name = _generate_raid_name(seg->lv, "rimage", s - missing))) { log_error("Memory allocation failed."); return 0; } - len = strlen(shift_name) - 1; - shift_name[len] -= missing; - seg_lv(seg, s)->name = shift_name; seg->areas[s - missing] = seg->areas[s]; seg->meta_areas[s - missing] = seg->meta_areas[s]; @@ -832,7 +891,7 @@ static int _alloc_image_components(struct logical_volume *lv, region_size = seg->region_size; if (seg_is_raid(seg)) - segtype = seg->segtype; + segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID0_META); else if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID1))) return_0; @@ -848,17 +907,13 @@ static int _alloc_image_components(struct logical_volume *lv, /* FIXME Workaround for segment type changes where new segtype is unknown here */ /* Only for raid0* to raid4 */ extents = (lv->le_count / seg->area_count) * count; - else if (segtype_is_raid10(segtype)) { - if (seg->area_count < 2) { - log_error(INTERNAL_ERROR "LV %s needs at least 2 areas.", - display_lvname(lv)); - return 0; - } - extents = lv->le_count / (seg->area_count / 2); /* we enforce 2 mirrors right now */ - } else - extents = (segtype->parity_devs) ? - (lv->le_count / (seg->area_count - segtype->parity_devs)) : - lv->le_count; + + else { + if (seg_type(seg, 0) == AREA_LV) + extents = seg_lv(seg, 0)->le_count * count; + else + extents = lv->le_count / (seg->area_count - segtype->parity_devs); + } /* Do we need to allocate any extents? */ if (pvs && !dm_list_empty(pvs) && @@ -955,8 +1010,7 @@ uint32_t raid_rimage_extents(const struct segment_type *segtype, uint64_t r; if (!extents || - segtype_is_mirror(segtype) || - segtype_is_raid1(segtype)) + !segtype_is_striped_raid(segtype)) return extents; r = extents; @@ -968,6 +1022,1291 @@ uint32_t raid_rimage_extents(const struct segment_type *segtype, return r > UINT_MAX ? 0 : (uint32_t) r; } +/* Return number of data copies for @segtype */ +uint32_t lv_raid_data_copies(const struct segment_type *segtype, uint32_t area_count) +{ + if (segtype_is_any_raid10(segtype)) + /* FIXME: change for variable number of data copies */ + return 2; + else if (segtype_is_mirrored(segtype)) + return area_count; + else if (segtype_is_striped_raid(segtype)) + return segtype->parity_devs + 1; + return 1; +} + + +/* Return data images count for @total_rimages depending on @seg's type */ +static uint32_t _data_rimages_count(const struct lv_segment *seg, const uint32_t total_rimages) +{ + if (!seg_is_thin(seg) && total_rimages <= seg->segtype->parity_devs) + return_0; + + return total_rimages - seg->segtype->parity_devs; +} + +/* Get total area len of @lv, i.e. sum of area_len of all segments */ +static uint32_t _lv_total_rimage_len(struct logical_volume *lv) +{ + uint32_t s; + struct lv_segment *seg = first_seg(lv); + + if (seg_is_raid(seg)) { + for (s = 0; s < seg->area_count; s++) + if (seg_lv(seg, s)) + return seg_lv(seg, s)->le_count; + } else + return lv->le_count; + + return_0; +} + +/* + * HM helper: + * + * Compare the raid levels in segtype @t1 and @t2 + * + * Return 1 if same, else 0 + */ +static int _cmp_level(const struct segment_type *t1, const struct segment_type *t2) +{ + if ((segtype_is_any_raid10(t1) && !segtype_is_any_raid10(t2)) || + (!segtype_is_any_raid10(t1) && segtype_is_any_raid10(t2))) + return 0; + + return !strncmp(t1->name, t2->name, 5); +} + +/* + * HM Helper: + * + * Check for same raid levels in segtype @t1 and @t2 + * + * Return 1 if same, else != 1 + */ +static int is_same_level(const struct segment_type *t1, const struct segment_type *t2) +{ + return _cmp_level(t1, t2); +} + +/* Return # of reshape LEs per device for @seg */ +static uint32_t _reshape_len_per_dev(struct lv_segment *seg) +{ + return seg->reshape_len; +} + +/* Return # of reshape LEs per @lv (sum of all sub LVs reshape LEs) */ +static uint32_t _reshape_len_per_lv(struct logical_volume *lv) +{ + struct lv_segment *seg = first_seg(lv); + + return _reshape_len_per_dev(seg) * _data_rimages_count(seg, seg->area_count); +} + +/* + * HM Helper: + * + * store the allocated reshape length per data image + * in the only segment of the top-level RAID @lv and + * in the first segment of each sub lv. + */ +static int _lv_set_reshape_len(struct logical_volume *lv, uint32_t reshape_len) +{ + uint32_t s; + struct lv_segment *data_seg, *seg = first_seg(lv); + + if (reshape_len >= lv->le_count - 1) + return_0; + + seg->reshape_len = reshape_len; + + for (s = 0; s < seg->area_count; s++) { + if (!seg_lv(seg, s)) + return_0; + + reshape_len = seg->reshape_len; + dm_list_iterate_items(data_seg, &seg_lv(seg, s)->segments) { + data_seg->reshape_len = reshape_len; + reshape_len = 0; + } + } + + return 1; +} + +/* HM Helper: + * + * correct segments logical start extents in all sub LVs of @lv + * after having reordered any segments in sub LVs e.g. because of + * reshape space (re)allocation. + */ +static int _lv_set_image_lvs_start_les(struct logical_volume *lv) +{ + uint32_t le, s; + struct lv_segment *data_seg, *seg = first_seg(lv); + + + for (s = 0; s < seg->area_count; s++) { + if (!seg_lv(seg, s)) + return_0; + + le = 0; + dm_list_iterate_items(data_seg, &(seg_lv(seg, s)->segments)) { + data_seg->reshape_len = le ? 0 : seg->reshape_len; + data_seg->le = le; + le += data_seg->len; + } + } + + /* Try merging rimage sub LV segments _after_ adjusting start LEs */ + for (s = 0; s < seg->area_count; s++) + if (!lv_merge_segments(seg_lv(seg, s))) + return_0; + + return 1; +} + +/* + * Relocate @out_of_place_les_per_disk from @lv's data images begin <-> end depending on @where + * + * @where: + * alloc_begin: end -> begin + * alloc_end: begin -> end + */ +enum alloc_where { alloc_begin, alloc_end, alloc_anywhere, alloc_none }; +static int _lv_relocate_reshape_space(struct logical_volume *lv, enum alloc_where where) +{ + uint32_t le, begin, end, s; + struct logical_volume *dlv; + struct dm_list *insert; + struct lv_segment *data_seg, *seg = first_seg(lv); + + if (!_reshape_len_per_dev(seg)) + return_0; + + /* + * Move the reshape LEs of each stripe (i.e. the data image sub lv) + * in the first/last segment(s) across to the opposite end of the + * address space + */ + for (s = 0; s < seg->area_count; s++) { + if (!(dlv = seg_lv(seg, s))) + return_0; + + switch (where) { + case alloc_begin: + /* Move to the beginning -> start moving to the beginning from "end - reshape LEs" to end */ + begin = dlv->le_count - _reshape_len_per_dev(seg); + end = dlv->le_count; + break; + case alloc_end: + /* Move to the end -> start moving to the end from 0 and end with reshape LEs */ + begin = 0; + end = _reshape_len_per_dev(seg); + break; + default: + log_error(INTERNAL_ERROR "bogus reshape space reallocation request [%d]", where); + return 0; + } + + /* Ensure segment boundary at begin/end of reshape space */ + if (!lv_split_segment(dlv, begin ?: end)) + return_0; + + /* Select destination to move to (begin/end) */ + insert = begin ? dlv->segments.n : &dlv->segments; + if (!(data_seg = find_seg_by_le(dlv, begin))) + return_0; + + le = begin; + while (le < end) { + struct dm_list *n = data_seg->list.n; + + le += data_seg->len; + + dm_list_move(insert, &data_seg->list); + + /* If moving to the begin, adjust insertion point so that we don't reverse order */ + if (begin) + insert = data_seg->list.n; + + data_seg = dm_list_item(n, struct lv_segment); + } + + le = 0; + dm_list_iterate_items(data_seg, &dlv->segments) { + data_seg->reshape_len = le ? 0 : _reshape_len_per_dev(seg); + data_seg->le = le; + le += data_seg->len; + } + } + + return 1; +} + +/* + * Check if we've got out of space reshape + * capacity in @lv and allocate if necessary. + * + * We inquire the targets status interface to retrieve + * the current data_offset and the device size and + * compare that to the size of the component image LV + * to tell if an extension of the LV is needed or + * existing space can just be used, + * + * Three different scenarios need to be covered: + * + * - we have to reshape forwards + * (true for adding disks to a raid set) -> + * add extent to each component image upfront + * or move an existing one at the end across; + * kernel will set component devs data_offset to + * the passed in one and new_data_offset to 0, + * i.e. the data starts at offset 0 after the reshape + * + * - we have to reshape backwards + * (true for removing disks form a raid set) -> + * add extent to each component image by the end + * or use already existing one from a previous reshape; + * kernel will leave the data_offset of each component dev + * at 0 and set new_data_offset to the passed in one, + * i.e. the data will be at offset new_data_offset != 0 + * after the reshape + * + * - we are free to reshape either way + * (true for layout changes keeping number of disks) -> + * let the kernel identify free out of place reshape space + * and select the appropriate data_offset and reshape direction + * + * Kernel will always be told to put data offset + * on an extent boundary. + * When we convert to mappings outside MD ones such as linear, + * striped and mirror _and_ data_offset != 0, split the first segment + * and adjust the rest to remove the reshape space. + * If it's at the end, just lv_reduce() and set seg->reshape_len to 0. + * + * Does not write metadata! + */ +static int _lv_alloc_reshape_space(struct logical_volume *lv, + enum alloc_where where, + enum alloc_where *where_it_was, + struct dm_list *allocate_pvs) +{ + uint32_t out_of_place_les_per_disk; + uint64_t data_offset; + struct lv_segment *seg = first_seg(lv); + + if (!seg->stripe_size) + return_0; + + /* Ensure min out-of-place reshape space 1 MiB */ + out_of_place_les_per_disk = max(2048U, (unsigned) seg->stripe_size); + out_of_place_les_per_disk = (uint32_t) max(out_of_place_les_per_disk / (unsigned long long) lv->vg->extent_size, 1ULL); + + /* Get data_offset and dev_sectors from the kernel */ + if (!lv_raid_data_offset(lv, &data_offset)) { + log_error("Can't get data offset and dev size for %s from kernel", + display_lvname(lv)); + return 0; + } + + /* + * If we have reshape space allocated and it has to grow, + * relocate it to the end in case kernel says it is at the + * beginning in order to grow the LV. + */ + if (_reshape_len_per_dev(seg)) { + if (out_of_place_les_per_disk > _reshape_len_per_dev(seg)) { + /* Kernel says data is at data_offset > 0 -> relocate reshape space at the begin to the end */ + if (data_offset && !_lv_relocate_reshape_space(lv, alloc_end)) + return_0; + + data_offset = 0; + out_of_place_les_per_disk -= _reshape_len_per_dev(seg); + } else + out_of_place_les_per_disk = 0; + } + + /* + * If we don't reshape space allocated extend the LV. + * + * first_seg(lv)->reshape_len (only segment of top level raid LV) + * is accounting for the data rimages so that unchanged + * lv_extend()/lv_reduce() can be used to allocate/free, + * because seg->len etc. still holds the whole size as before + * including the reshape space + */ + if (out_of_place_les_per_disk) { + uint32_t data_rimages = _data_rimages_count(seg, seg->area_count); + uint32_t reshape_len = out_of_place_les_per_disk * data_rimages; + uint32_t prev_rimage_len = _lv_total_rimage_len(lv); + uint64_t lv_size = lv->size; + + if (!lv_extend(lv, seg->segtype, data_rimages, + seg->stripe_size, 1, seg->region_size, + reshape_len /* # of reshape LEs to add */, + allocate_pvs, lv->alloc, 0)) { + log_error("Failed to allocate out-of-place reshape space for %s.", + display_lvname(lv)); + return 0; + } + + lv->size = lv_size; + /* pay attention to lv_extend maybe having allocated more because of layout specific rounding */ + if (!_lv_set_reshape_len(lv, _lv_total_rimage_len(lv) - prev_rimage_len)) + return 0; + } + + /* Preset data offset in case we fail relocating reshape space below */ + seg->data_offset = 0; + + /* + * Handle reshape space relocation + */ + switch (where) { + case alloc_begin: + /* Kernel says data is at data_offset == 0 -> relocate reshape space at the end to the begin */ + if (!data_offset && !_lv_relocate_reshape_space(lv, where)) + return_0; + break; + + case alloc_end: + /* Kernel says data is at data_offset > 0 -> relocate reshape space at the begin to the end */ + if (data_offset && !_lv_relocate_reshape_space(lv, where)) + return_0; + break; + + case alloc_anywhere: + /* We don't care where the space is, kernel will just toggle data_offset accordingly */ + break; + + default: + log_error(INTERNAL_ERROR "Bogus reshape space allocation request"); + return 0; + } + + if (where_it_was) + *where_it_was = data_offset ? alloc_begin : alloc_end; + + /* Inform kernel about the reshape length in sectors */ + seg->data_offset = _reshape_len_per_dev(seg) * lv->vg->extent_size; + + return _lv_set_image_lvs_start_les(lv); +} + +/* Remove any reshape space from the data LVs of @lv */ +static int _lv_free_reshape_space_with_status(struct logical_volume *lv, enum alloc_where *where_it_was) +{ + uint32_t total_reshape_len; + struct lv_segment *seg = first_seg(lv); + + if ((total_reshape_len = _reshape_len_per_lv(lv))) { + enum alloc_where where; + /* + * raid10: + * + * the allocator will have added times #data_copies stripes, + * so we need to lv_reduce() less visible size. + */ + if (seg_is_any_raid10(seg)) { + if (total_reshape_len % seg->data_copies) + return_0; + + total_reshape_len /= seg->data_copies; + } + + /* + * Got reshape space on request to free it. + * + * If it happens to be at the beginning of + * the data LVs, remap it to the end in order + * to be able to free it via lv_reduce(). + */ + if (!_lv_alloc_reshape_space(lv, alloc_end, &where, NULL)) + return_0; + + seg->extents_copied = first_seg(lv)->area_len; + if (!lv_reduce(lv, total_reshape_len)) + return_0; + + seg->extents_copied = first_seg(lv)->area_len; + + if (!_lv_set_reshape_len(lv, 0)) + return 0; + + /* + * Only in case reshape space was freed at the beginning, + * which is indicated by "where == alloc_begin", + * tell kernel to adjust data_offsets on raid devices to 0. + * + * The special, unused value '1' for seg->data_offset will cause + * "data_offset 0" to be emitted in the segment line. + */ + seg->data_offset = (where == alloc_begin) ? 1 : 0; + + } else if (where_it_was) + *where_it_was = alloc_none; + + return 1; +} + +static int _lv_free_reshape_space(struct logical_volume *lv) +{ + return _lv_free_reshape_space_with_status(lv, NULL); +} + +/* + * + * HM + * + * Compares current raid disk count of active RAID set @lv to + * requested @dev_count returning number of disks as of healths + * string in @devs_health and synced disks in @devs_in_sync + * + * Returns: + * + * 0: error + * 1: kernel dev count = @dev_count + * 2: kernel dev count < @dev_count + * 3: kernel dev count > @dev_count + * + */ +static int _reshaped_state(struct logical_volume *lv, const unsigned dev_count, + unsigned *devs_health, unsigned *devs_in_sync) +{ + uint32_t kernel_devs; + + if (!devs_health || !devs_in_sync) + return_0; + + if (!_get_dev_health(lv, &kernel_devs, devs_health, devs_in_sync, NULL)) + return 0; + + if (kernel_devs == dev_count) + return 1; + + return kernel_devs < dev_count ? 2 : 3; +} + +/* + * Return new length for @lv based on @old_image_count and @new_image_count in @*len + * + * Subtracts any reshape space and provide data length only! + */ +static int _lv_reshape_get_new_len(struct logical_volume *lv, + uint32_t old_image_count, uint32_t new_image_count, + uint32_t *len) +{ + struct lv_segment *seg = first_seg(lv); + uint32_t di_old = _data_rimages_count(seg, old_image_count); + uint32_t di_new = _data_rimages_count(seg, new_image_count); + uint32_t old_lv_reshape_len, new_lv_reshape_len; + uint64_t r; + + if (!di_old || !di_new) + return_0; + + old_lv_reshape_len = di_old * _reshape_len_per_dev(seg); + new_lv_reshape_len = di_new * _reshape_len_per_dev(seg); + + r = (uint64_t) lv->le_count; + r -= old_lv_reshape_len; + if ((r = new_lv_reshape_len + r * di_new / di_old) > UINT_MAX) { + log_error("No proper new segment length for %s!", display_lvname(lv)); + return 0; + } + + *len = (uint32_t) r; + + return 1; +} + +/* + * Extend/reduce size of @lv and it's first segment during reshape to @extents + */ +static int _reshape_adjust_to_size(struct logical_volume *lv, + uint32_t old_image_count, uint32_t new_image_count) +{ + struct lv_segment *seg = first_seg(lv); + uint32_t new_le_count; + + if (!_lv_reshape_get_new_len(lv, old_image_count, new_image_count, &new_le_count)) + return 0; + + /* Externally visible LV size w/o reshape space */ + lv->le_count = seg->len = new_le_count; + lv->size = (lv->le_count - new_image_count * _reshape_len_per_dev(seg)) * lv->vg->extent_size; + /* seg->area_len does not change */ + + if (old_image_count < new_image_count) { + /* Extend from raid1 mapping */ + if (old_image_count == 2 && + !seg->stripe_size) + seg->stripe_size = DEFAULT_STRIPESIZE; + + /* Reduce to raid1 mapping */ + } else if (new_image_count == 2) + seg->stripe_size = 0; + + return 1; +} + +/* + * HM Helper: + * + * Reshape: add immages to existing raid lv + * + */ +static int _lv_raid_change_image_count(struct logical_volume *lv, uint32_t new_count, + struct dm_list *allocate_pvs, struct dm_list *removal_lvs, + int commit, int use_existing_area_len); +static int _raid_reshape_add_images(struct logical_volume *lv, + const struct segment_type *new_segtype, int yes, + uint32_t old_image_count, uint32_t new_image_count, + const unsigned new_stripes, const unsigned new_stripe_size, + struct dm_list *allocate_pvs) +{ + uint32_t grown_le_count, current_le_count, s; + struct volume_group *vg; + struct logical_volume *slv; + struct lv_segment *seg = first_seg(lv); + struct lvinfo info = { 0 }; + + if (new_image_count == old_image_count) { + log_error(INTERNAL_ERROR "No change of image count on LV %s.", display_lvname(lv)); + return_0; + } + + vg = lv->vg; + + if (!lv_info(vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) { + log_error("lv_info failed: aborting"); + return 0; + } + + if (seg->segtype != new_segtype) + log_print_unless_silent("Ignoring layout change on device adding reshape"); + + if (seg_is_any_raid10(seg) && (new_image_count % seg->data_copies)) { + log_error("Can't reshape %s LV %s to odd number of stripes.", + lvseg_name(seg), display_lvname(lv)); + return 0; + } + + if (!_lv_reshape_get_new_len(lv, old_image_count, new_image_count, &grown_le_count)) + return 0; + + current_le_count = lv->le_count - _reshape_len_per_lv(lv); + grown_le_count -= _reshape_len_per_dev(seg) * _data_rimages_count(seg, new_image_count); + log_warn("WARNING: Adding stripes to active%s logical volume %s " + "will grow it from %u to %u extents!", + info.open_count ? " and open" : "", + display_lvname(lv), current_le_count, grown_le_count); + log_print_unless_silent("Run \"lvresize -l%u %s\" to shrink it or use the additional capacity", + current_le_count, display_lvname(lv)); + if (!yes && yes_no_prompt("Are you sure you want to add %u images to %s LV %s? [y/n]: ", + new_image_count - old_image_count, lvseg_name(seg), display_lvname(lv)) == 'n') { + log_error("Logical volume %s NOT converted.", display_lvname(lv)); + return 0; + } + +//printf("%s[%u] lv=%s segtype=%s data_copies=%u lv_size=%s le_count=%u\n", __func__, __LINE__, display_lvname(lv), lvseg_name(seg), seg->data_copies, display_size(lv->vg->cmd, lv->size), lv->le_count); + /* Allocate new image component pairs for the additional stripes and grow LV size */ + log_debug_metadata("Adding %u data and metadata image LV pair%s to %s", + new_image_count - old_image_count, new_image_count - old_image_count > 1 ? "s" : "", + display_lvname(lv)); + if (!_lv_raid_change_image_count(lv, new_image_count, allocate_pvs, NULL, 0, 0)) + return 0; + + /* Reshape adding image component pairs -> change sizes/counters accordingly */ + if (!_reshape_adjust_to_size(lv, old_image_count, new_image_count)) { + log_error("Failed to adjust LV %s to new size!", display_lvname(lv)); + return 0; + } + + /* Allocate forward out of place reshape space at the beginning of all data image LVs */ + log_debug_metadata("(Re)allocating reshape space for %s", display_lvname(lv)); + if (!_lv_alloc_reshape_space(lv, alloc_begin, NULL, allocate_pvs)) + return 0; + + /* + * Reshape adding image component pairs: + * + * - reset rebuild flag on new image LVs + * - set delta disks plus flag on new image LVs + */ + if (old_image_count < seg->area_count) { + log_debug_metadata("Setting delta disk flag on new data LVs of %s", + display_lvname(lv)); + for (s = old_image_count; s < seg->area_count; s++) { + slv = seg_lv(seg, s); + slv->status &= ~LV_REBUILD; + slv->status |= LV_RESHAPE_DELTA_DISKS_PLUS; + } + } + + seg->stripe_size = new_stripe_size; +//printf("%s[%u] lv=%s segtype=%s data_copies=%u lv_size=%s le_count=%u\n", __func__, __LINE__, display_lvname(lv), lvseg_name(seg), seg->data_copies, display_size(lv->vg->cmd, lv->size), lv->le_count); + + return 1; +} + +/* + * HM Helper: + * + * Reshape: remove images from existing raid lv + * + */ +static int _raid_reshape_remove_images(struct logical_volume *lv, + const struct segment_type *new_segtype, + int yes, int force, + uint32_t old_image_count, uint32_t new_image_count, + const unsigned new_stripes, const unsigned new_stripe_size, + struct dm_list *allocate_pvs, struct dm_list *removal_lvs) +{ + uint32_t active_lvs, current_le_count, reduced_le_count, removed_lvs, s; + uint64_t extend_le_count; + unsigned devs_health, devs_in_sync; + struct lv_segment *seg = first_seg(lv); + struct lvinfo info = { 0 }; + + if (seg_is_any_raid6(seg) && new_stripes < 3) { + log_error("Minimum 3 stripes required for %s LV %s", + lvseg_name(seg), display_lvname(lv)); + return 0; + } + + if (new_image_count == old_image_count) { + log_error(INTERNAL_ERROR "No change of image count on LV %s.", display_lvname(lv)); + return_0; + } + + switch (_reshaped_state(lv, new_image_count, &devs_health, &devs_in_sync)) { + case 3: + /* + * Disk removal reshape step 1: + * + * we got more disks active than requested via @new_stripes + * + * -> flag the ones to remove + * + */ + if (seg->segtype != new_segtype) + log_print_unless_silent("Ignoring layout change on device removing reshape"); + + if (!lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) { + log_error("lv_info failed: aborting"); + return 0; + } + + if (!_lv_reshape_get_new_len(lv, old_image_count, new_image_count, &reduced_le_count)) + return 0; + + reduced_le_count -= seg->reshape_len * _data_rimages_count(seg, new_image_count); + current_le_count = lv->le_count - seg->reshape_len * _data_rimages_count(seg, old_image_count); + extend_le_count = current_le_count * current_le_count / reduced_le_count; + log_warn("WARNING: Removing stripes from active%s logical " + "volume %s will shrink it from %s to %s!", + info.open_count ? " and open" : "", display_lvname(lv), + display_size(lv->vg->cmd, (uint64_t) current_le_count * lv->vg->extent_size), + display_size(lv->vg->cmd, (uint64_t) reduced_le_count * lv->vg->extent_size)); + log_warn("THIS MAY DESTROY (PARTS OF) YOUR DATA!"); + if (!yes) + log_warn("Interrupt the conversion and run \"lvresize -y -l%u %s\" to " + "keep the current size if not done already!", + (uint32_t) extend_le_count, display_lvname(lv)); + log_print_unless_silent("If that leaves the logical volume larger than %llu extents due to stripe rounding,", + (unsigned long long) extend_le_count); + log_print_unless_silent("you may want to grow the content afterwards (filesystem etc.)"); + log_warn("WARNING: too remove freed stripes after the conversion has finished, you have to run \"lvconvert --stripes %u %s\"", + new_stripes, display_lvname(lv)); + + if (!force) { + log_warn("WARNING: Can't remove stripes without --force option"); + return 0; + } + + if (!yes && yes_no_prompt("Are you sure you want to remove %u images from %s LV %s? [y/n]: ", + old_image_count - new_image_count, lvseg_name(seg), display_lvname(lv)) == 'n') { + log_error("Logical volume %s NOT converted.", display_lvname(lv)); + return 0; + } + + /* + * Allocate backward out of place reshape space at the + * _end_ of all data image LVs, because MD reshapes backwards + * to remove disks from a raid set + */ + if (!_lv_alloc_reshape_space(lv, alloc_end, NULL, allocate_pvs)) + return 0; + + /* Flag all disks past new images as delta disks minus to kernel */ + for (s = new_image_count; s < old_image_count; s++) + seg_lv(seg, s)->status |= LV_RESHAPE_DELTA_DISKS_MINUS; + + if (seg_is_any_raid5(seg) && new_image_count == 2) + seg->data_copies = 2; + + break; + + case 1: + /* + * Disk removal reshape step 2: + * + * we got the proper (smaller) amount of devices active + * for a previously finished disk removal reshape + * + * -> remove the freed up images and reduce LV size + * + */ + for (active_lvs = removed_lvs = s = 0; s < seg->area_count; s++) { + struct logical_volume *slv; + + if (!seg_lv(seg, s) || !(slv = seg_lv(seg, s))) { + log_error("Missing image sub lv off LV %s", display_lvname(lv)); + return 0; + } + + if (slv->status & LV_REMOVE_AFTER_RESHAPE) + removed_lvs++; + else + active_lvs++; + } + + if (devs_in_sync != new_image_count) { + log_error("No correct kernel/lvm active LV count on %s", display_lvname(lv)); + return 0; + } + + if (active_lvs + removed_lvs != old_image_count) { + log_error ("No correct kernel/lvm total LV count on %s", display_lvname(lv)); + return 0; + } + + /* Reshape removing image component pairs -> change sizes accordingly */ + if (!_reshape_adjust_to_size(lv, old_image_count, new_image_count)) { + log_error("Failed to adjust LV %s to new size!", display_lvname(lv)); + return 0; + } + + log_debug_metadata("Removing %u data and metadata image LV pair%s from %s", + old_image_count - new_image_count, old_image_count - new_image_count > 1 ? "s" : "", + display_lvname(lv)); + if (!_lv_raid_change_image_count(lv, new_image_count, allocate_pvs, removal_lvs, 0, 0)) + return 0; + + seg->area_count = new_image_count; + + break; + + default: + log_error(INTERNAL_ERROR "Bad return provided to %s.", __func__); + return 0; + } + + seg->stripe_size = new_stripe_size; + + return 1; +} +/* + * HM Helper: + * + * Reshape: keep images in RAID @lv but change stripe size or data copies + * + */ +static int _raid_reshape_keep_images(struct logical_volume *lv, + const struct segment_type *new_segtype, + int yes, int force, int *force_repair, + const int new_data_copies, const unsigned new_stripe_size, + struct dm_list *allocate_pvs) +{ + int alloc_reshape_space = 1; + enum alloc_where where = alloc_anywhere; + struct lv_segment *seg = first_seg(lv); + + if (seg->segtype != new_segtype) + log_print_unless_silent("Converting %s LV %s to %s", + lvseg_name(seg), display_lvname(lv), new_segtype->name); + if (!yes && yes_no_prompt("Are you sure you want to convert %s LV %s? [y/n]: ", + lvseg_name(seg), display_lvname(lv)) == 'n') { + log_error("Logical volume %s NOT converted.", display_lvname(lv)); + return 0; + } + + seg->stripe_size = new_stripe_size; + + /* + * Reshape layout alogorithm or chunksize: + * + * Allocate free out-of-place reshape space unless raid10_far. + * + * If other raid10, allocate it appropriatly. + * + * Allocate it anywhere for raid4/5 to avoid remapping + * it in case it is already allocated. + * + * The dm-raid target is able to use the space whereever it + * is found by appropriately selecting forward or backward reshape. + */ + if (seg->area_count != 2 && + alloc_reshape_space && + !_lv_alloc_reshape_space(lv, where, NULL, allocate_pvs)) + return 0; + + seg->segtype = new_segtype; + + return 1; +} + +/* HM Helper: write, optionally suspend @lv (origin), commit and optionally backup metadata of @vg */ +static int _vg_write_lv_suspend_commit_backup(struct volume_group *vg, + struct logical_volume *lv, + int origin_only, int do_backup) +{ + int r = 1; + + if (!vg_write(vg)) { + log_error("Write of VG %s failed", vg->name); + return_0; + } + + if (lv && !(r = (origin_only ? suspend_lv_origin(vg->cmd, lv_lock_holder(lv)) : + suspend_lv(vg->cmd, lv_lock_holder(lv))))) { + log_error("Failed to suspend %s before committing changes", + display_lvname(lv)); + vg_revert(lv->vg); + } else if (!(r = vg_commit(vg))) + stack; /* !vg_commit() has implicit vg_revert() */ + + if (r && do_backup && !backup(vg)) + log_error("Backup of VG %s failed; continuing", vg->name); + + return r; +} + +static int _vg_write_commit_backup(struct volume_group *vg) +{ + return _vg_write_lv_suspend_commit_backup(vg, NULL, 1, 1); +} + +__attribute__ ((__unused__)) +static int _vg_write_commit(struct volume_group *vg) +{ + return _vg_write_lv_suspend_commit_backup(vg, NULL, 1, 0); +} + +/* Write vg of @lv, suspend @lv and commit the vg */ +static int _vg_write_lv_suspend_vg_commit(struct logical_volume *lv, int origin_only) +{ + return _vg_write_lv_suspend_commit_backup(lv->vg, lv, origin_only, 0); +} + +/* Helper: function to activate @lv exclusively local */ +static int _activate_sub_lv_excl_local(struct logical_volume *lv) +{ + if (lv && !activate_lv_excl_local(lv->vg->cmd, lv)) { + log_error("Failed to activate %s.", display_lvname(lv)); + return 0; + } + + return 1; +} + +/* Helper: function to activate any sub LVs of @lv exclusively local starting with area indexed by @start_idx */ +static int _activate_sub_lvs_excl_local(struct logical_volume *lv, uint32_t start_idx) +{ + uint32_t s; + struct lv_segment *seg = first_seg(lv); + + /* seg->area_count may be 0 here! */ + log_debug_metadata("Activating %u image component%s of LV %s", + seg->area_count - start_idx, seg->meta_areas ? " pairs" : "s", + display_lvname(lv)); + for (s = start_idx; s < seg->area_count; s++) + if (!_activate_sub_lv_excl_local(seg_lv(seg, s)) || + !_activate_sub_lv_excl_local(seg_metalv(seg, s))) + return 0; + + return 1; +} + +/* Helper: function to activate any sub LVs of @lv exclusively local starting with area indexed by @start_idx */ +static int _activate_sub_lvs_excl_local_list(struct logical_volume *lv, struct dm_list *lv_list) +{ + int r = 1; + struct lv_list *lvl; + + if (lv_list) { +dm_list_iterate_items(lvl, lv_list) +printf("%s[%u] lv=%s\n", __func__, __LINE__, display_lvname(lvl->lv)); + dm_list_iterate_items(lvl, lv_list) { + log_very_verbose("Activating logical volume %s before %s in kernel.", + display_lvname(lvl->lv), display_lvname(lv_lock_holder(lv))); + if (!_activate_sub_lv_excl_local(lvl->lv)) + r = 0; /* But lets try with the rest */ + } + } + + return r; +} + +/* Helper: callback function to activate any new image component pairs @lv */ +static int _pre_raid_add_legs(struct logical_volume *lv, void *data) +{ + if (!_vg_write_lv_suspend_vg_commit(lv, 1)) + return 0; + + /* Reload any changed image component pairs for out-of-place reshape apace */ + if (!_activate_sub_lvs_excl_local(lv, 0)) + return 0; + + return 2; /* 1: ok, 2: metadata commited */ +} + +/* Helper: callback function to activate any rmetas on @data list */ +static int _pre_raid0_remove_rmeta(struct logical_volume *lv, void *data) +{ + struct dm_list *lv_list = data; + + if (!_vg_write_lv_suspend_vg_commit(lv, 1)) + return 0; + + /* 1: ok, 2: metadata commited */ + return _activate_sub_lvs_excl_local_list(lv, lv_list) ? 2 : 0; +} + +/* Helper: callback dummy needed for */ +static int _post_raid_dummy(struct logical_volume *lv, void *data) +{ + return 1; +} + +/* + * Reshape logical volume @lv by adding/removing stripes + * (absolute new stripes given in @new_stripes), changing + * layout (e.g. raid5_ls -> raid5_ra) or changing + * stripe size to @new_stripe_size. + * + * In case of disk addition, any PVs listed in mandatory + * @allocate_pvs will be used for allocation of new stripes. + */ +static int _raid_reshape(struct logical_volume *lv, + const struct segment_type *new_segtype, + int yes, int force, + const unsigned new_data_copies, + const unsigned new_region_size, + const unsigned new_stripes, + const unsigned new_stripe_size, + struct dm_list *allocate_pvs) +{ + int force_repair = 0, r, too_few = 0; + unsigned devs_health, devs_in_sync; + uint32_t new_image_count, old_image_count; + enum alloc_where where_it_was; + struct lv_segment *seg = first_seg(lv); + struct dm_list removal_lvs; + + if (!seg_is_reshapable_raid(seg)) + return_0; + + if (!is_same_level(seg->segtype, new_segtype)) + return_0; + + if (!(old_image_count = seg->area_count)) + return_0; + + if ((new_image_count = new_stripes + seg->segtype->parity_devs) < 2) + return_0; + + if (!_check_max_raid_devices(new_image_count)) + return_0; + + if (!_raid_in_sync(lv)) { + log_error("Unable to convert %s while it is not in-sync", + display_lvname(lv)); + return 0; + } + + dm_list_init(&removal_lvs); + + /* No change in layout requested ? */ + if (seg->segtype == new_segtype && + seg->data_copies == new_data_copies && + seg->region_size == new_region_size && + old_image_count == new_image_count && + seg->stripe_size == new_stripe_size) { + /* + * No change in segment type, image count, region or stripe size has been requested -> + * user requests this to remove any reshape space from the @lv + */ + if (!_lv_free_reshape_space_with_status(lv, &where_it_was)) { + log_error(INTERNAL_ERROR "Failed to free reshape space of %s", + display_lvname(lv)); + return 0; + } + + log_print_unless_silent("No change in RAID LV %s layout, freeing reshape space", display_lvname(lv)); + + if (where_it_was == alloc_none) { + log_print_unless_silent("LV %s does not have reshape space allocated", + display_lvname(lv)); + return 1; + } + + if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, 0, NULL, NULL)) + return_0; + + return 1; + } + + /* raid4/5 with N image component pairs (i.e. N-1 stripes): allow for raid4/5 reshape to 2 devices, i.e. raid1 layout */ + if (seg_is_raid4(seg) || seg_is_any_raid5(seg)) { + if (new_stripes < 1) + too_few = 1; + + /* raid6 (raid10 can't shrink reshape) device count: check for 2 stripes minimum */ + } else if (new_stripes < 2) + too_few = 1; + + if (too_few) { + log_error("Too few stripes requested"); + return 0; + } + + switch ((r = _reshaped_state(lv, old_image_count, &devs_health, &devs_in_sync))) { + case 1: + /* + * old_image_count == kernel_dev_count + * + * Check for device health + */ + if (devs_in_sync < devs_health) { + log_error("Can't reshape out of sync LV %s", display_lvname(lv)); + return 0; + } + + /* device count and health are good -> ready to go */ + break; + + case 2: + if (devs_in_sync == new_image_count) + break; + + /* Possible after a shrinking reshape and forgotten device removal */ + log_error("Device count is incorrect. " + "Forgotten \"lvconvert --stripes %d %s\" to remove %u images after reshape?", + devs_in_sync - seg->segtype->parity_devs, display_lvname(lv), + old_image_count - devs_in_sync); + return 0; + + default: + log_error(INTERNAL_ERROR "Bad return=%d provided to %s.", r, __func__); + return 0; + } + + if (seg->stripe_size != new_stripe_size) + log_print_unless_silent("Converting stripesize %s of %s LV %s to %s", + display_size(lv->vg->cmd, seg->stripe_size), + lvseg_name(seg), display_lvname(lv), + display_size(lv->vg->cmd, new_stripe_size)); + + /* Handle disk addition reshaping */ + if (old_image_count < new_image_count) { + if (!_raid_reshape_add_images(lv, new_segtype, yes, + old_image_count, new_image_count, + new_stripes, new_stripe_size, allocate_pvs)) + return 0; + + /* Handle disk removal reshaping */ + } else if (old_image_count > new_image_count) { + if (!_raid_reshape_remove_images(lv, new_segtype, yes, force, + old_image_count, new_image_count, + new_stripes, new_stripe_size, + allocate_pvs, &removal_lvs)) + return 0; + + /* + * Handle raid set layout reshaping w/o changing # of legs (allocation algorithm or stripe size change) + * (e.g. raid5_ls -> raid5_n or stripe size change) + */ + } else if (!_raid_reshape_keep_images(lv, new_segtype, yes, force, &force_repair, + new_data_copies, new_stripe_size, allocate_pvs)) + return 0; + + /* HM FIXME: workaround for not resetting "nosync" flag */ + init_mirror_in_sync(0); + + if (seg->area_count != 2 || old_image_count != seg->area_count) { + if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, 0, &removal_lvs, + _post_raid_dummy, NULL, + _pre_raid_add_legs, NULL)) + return 0; + } if (!_vg_write_commit_backup(lv->vg)) + return 0; + + return 1; // force_repair ? _lv_cond_repair(lv) : 1; +} + +/* + * Check for reshape request defined by: + * + * - raid type is reshape capable + * - no raid level change + * - # of stripes requested to change + * (i.e. add/remove disks from a striped raid set) + * -or- + * - stripe size change requestd + * (e.g. 32K -> 128K) + * + * Returns: + * + * 0 -> no reshape request + * 1 -> allowed reshape request + * 2 -> prohibited reshape request + * 3 -> allowed region size change request + */ +static int _reshape_requested(const struct logical_volume *lv, const struct segment_type *segtype, + const int data_copies, const uint32_t region_size, + const uint32_t stripes, const uint32_t stripe_size) +{ + struct lv_segment *seg = first_seg(lv); + + /* This segment type is not reshapable */ + if (!seg_is_reshapable_raid(seg)) + return 0; + + if (!_reshape_is_supported(lv->vg->cmd, seg->segtype)) + return 0; + + /* Switching raid levels is a takeover, no reshape */ + if (!is_same_level(seg->segtype, segtype)) + return 0; + + /* Possible takeover in case #data_copies == #stripes */ + if (seg_is_raid10_near(seg) && segtype_is_raid1(segtype)) + return 0; + + /* No layout change -> allow for removal of reshape space */ + if (seg->segtype == segtype && + data_copies == seg->data_copies && + region_size == seg->region_size && + stripes == _data_rimages_count(seg, seg->area_count) && + stripe_size == seg->stripe_size) + return 1; + + /* Ensure region size is >= stripe size */ + if (!seg_is_striped(seg) && + !seg_is_any_raid0(seg) && + (region_size || stripe_size) && + ((region_size ?: seg->region_size) < (stripe_size ?: seg->stripe_size))) { + log_error("region size may not be smaller than stripe size on LV %s", + display_lvname(lv)); + return 2; + } +#if 0 + if ((_lv_is_duplicating(lv) || lv_is_duplicated(lv)) && + ((seg_is_raid1(seg) ? 0 : (stripes != _data_rimages_count(seg, seg->area_count))) || + data_copies != seg->data_copies)) + goto err; + if ((!seg_is_striped(seg) && segtype_is_raid10_far(segtype)) || + (seg_is_raid10_far(seg) && !segtype_is_striped(segtype))) { + if (data_copies == seg->data_copies && + region_size == seg->region_size) { + log_error("Can't convert %sraid10_far", + seg_is_raid10_far(seg) ? "" : "to "); + goto err; + } + } + + if (seg_is_raid10_far(seg)) { + if (stripes != _data_rimages_count(seg, seg->area_count)) { + log_error("Can't change stripes in raid10_far"); + goto err; + } + + if (stripe_size != seg->stripe_size) { + log_error("Can't change stripe size in raid10_far"); + goto err; + } + } +#endif + + if (seg_is_any_raid10(seg) && seg->area_count > 2 && + stripes && stripes < seg->area_count - seg->segtype->parity_devs) { + log_error("Can't remove stripes from raid10"); + goto err; + } + + if (data_copies != seg->data_copies) { + if (seg_is_raid10_near(seg)) + return 0; +#if 0 + if (seg_is_raid10_far(seg)) + return segtype_is_raid10_far(segtype) ? 1 : 0; + + if (seg_is_raid10_offset(seg)) { + log_error("Can't change number of data copies on %s LV %s", + lvseg_name(seg), display_lvname(lv)); + goto err; + } +#endif + } + +#if 0 + /* raid10_{near,offset} case */ + if ((seg_is_raid10_near(seg) && segtype_is_raid10_offset(segtype)) || + (seg_is_raid10_offset(seg) && segtype_is_raid10_near(segtype))) { + if (stripes >= seg->area_count) + return 1; + + goto err; + } + + /* + * raid10_far is not reshapable in MD at all; + * lvm/dm adds reshape capability to add/remove data_copies + */ + if (seg_is_raid10_far(seg) && segtype_is_raid10_far(segtype)) { + if (stripes && stripes == seg->area_count && + data_copies > 1 && + data_copies <= seg->area_count && + data_copies != seg->data_copies) + return 1; + + goto err; + + } else if (seg_is_any_raid10(seg) && segtype_is_any_raid10(segtype) && + data_copies > 1 && data_copies != seg->data_copies) + goto err; +#endif + + /* Change layout (e.g. raid5_ls -> raid5_ra) keeping # of stripes */ + if (seg->segtype != segtype) { + if (stripes && stripes != _data_rimages_count(seg, seg->area_count)) + goto err; + + return 1; + } + + if (stripes && stripes == _data_rimages_count(seg, seg->area_count) && + stripe_size == seg->stripe_size) { + log_error("LV %s already has %u stripes.", + display_lvname(lv), stripes); + return 2; + } + + return (stripes || stripe_size) ? 1 : 0; + +err: +#if 0 + if (lv_is_duplicated(lv)) + log_error("Conversion of duplicating sub LV %s rejected", display_lvname(lv)); + else + log_error("Use \"lvconvert --duplicate --type %s ... %s", segtype->name, display_lvname(lv)); +#endif + return 2; +} + /* * _alloc_rmeta_for_lv * @lv @@ -1551,8 +2890,13 @@ static int _lv_raid_change_image_count(struct logical_volume *lv, uint32_t new_c } int lv_raid_change_image_count(struct logical_volume *lv, uint32_t new_count, - struct dm_list *allocate_pvs) + const uint32_t region_size, struct dm_list *allocate_pvs) { + struct lv_segment *seg = first_seg(lv); + + if (region_size) + seg->region_size = region_size; + return _lv_raid_change_image_count(lv, new_count, allocate_pvs, NULL, 1, 0); } @@ -1689,7 +3033,7 @@ int lv_raid_split(struct logical_volume *lv, const char *split_name, * Eliminate the residual LVs */ if (!_deactivate_and_remove_lvs(lv->vg, &removal_lvs)) - return_0; + return_0; if (!vg_write(lv->vg) || !vg_commit(lv->vg)) return_0; @@ -1945,10 +3289,10 @@ static int _alloc_and_add_new_striped_segment(struct logical_volume *lv, return_0; /* Allocate a segment with seg->area_count areas */ - if (!(new_seg = alloc_lv_segment(striped_segtype, lv, le, area_len * seg->area_count, + if (!(new_seg = alloc_lv_segment(striped_segtype, lv, le, area_len * seg->area_count, 0, 0, seg->stripe_size, NULL, seg->area_count, - area_len, seg->chunk_size, 0, 0, NULL))) + area_len, 0, seg->chunk_size, 0, 0, NULL))) return_0; dm_list_add(new_segments, &new_seg->list); @@ -2182,8 +3526,16 @@ static int _raid0_add_or_remove_metadata_lvs(struct logical_volume *lv, return_0; if (update_and_reload) { +#if 0 if (!_lv_update_and_reload_list(lv, 1, removal_lvs)) return_0; +#else + if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, 1, removal_lvs, + _post_raid_dummy, NULL, + _pre_raid0_remove_rmeta, removal_lvs)) + return_0; +#endif + /* If any residual LVs, eliminate them, write VG, commit it and take a backup */ return _eliminate_extracted_lvs(lv->vg, removal_lvs); @@ -2192,52 +3544,108 @@ static int _raid0_add_or_remove_metadata_lvs(struct logical_volume *lv, return 1; } -/* - * Clear any rebuild disk flags on lv. - * If any flags were cleared, *flags_were_cleared is set to 1. - */ -/* FIXME Generalise into foreach_underlying_lv_segment_area. */ -static void _clear_rebuild_flags(struct logical_volume *lv, int *flags_were_cleared) +/* Reset any rebuild or reshape disk flags on @lv, first segment already passed to the kernel */ +static int _reset_flags_passed_to_kernel(struct logical_volume *lv, int *flags_reset) { + uint32_t lv_count = 0, s; + struct logical_volume *slv; struct lv_segment *seg = first_seg(lv); - struct logical_volume *sub_lv; - uint32_t s; - uint64_t flags_to_clear = LV_REBUILD; + uint64_t reset_flags = LV_REBUILD | LV_RESHAPE_DELTA_DISKS_PLUS | LV_RESHAPE_DELTA_DISKS_MINUS; for (s = 0; s < seg->area_count; s++) { if (seg_type(seg, s) == AREA_PV) continue; - sub_lv = seg_lv(seg, s); + if (!(slv = seg_lv(seg, s))) + return_0; /* Recurse into sub LVs */ - _clear_rebuild_flags(sub_lv, flags_were_cleared); + if (!_reset_flags_passed_to_kernel(slv, flags_reset)) + return 0; - if (sub_lv->status & flags_to_clear) { - sub_lv->status &= ~flags_to_clear; - *flags_were_cleared = 1; + if (slv->status & LV_RESHAPE_DELTA_DISKS_MINUS) { + *flags_reset = 1; + slv->status |= LV_REMOVE_AFTER_RESHAPE; + seg_metalv(seg, s)->status |= LV_REMOVE_AFTER_RESHAPE; } + + if (slv->status & reset_flags) { + *flags_reset = 1; + slv->status &= ~reset_flags; + } + + lv_count++; } + + /* Reset passed in data offset (reshaping) */ + if (lv_count) + seg->data_offset = 0; + + return 1; } /* + * HM Helper: + * + * Minimum 4 arguments! + * * Updates and reloads metadata, clears any flags passed to the kernel, * eliminates any residual LVs and updates and reloads metadata again. * - * lv removal_lvs + * @lv mandatory argument, rest variable: * - * This minimally involves 2 metadata commits. + * @lv @origin_only @removal_lvs/NULL @fn_post_on_lv/NULL [ @fn_post_data/NULL [ @fn_post_on_lv/NULL @fn_post_data/NULL ] ] + * + * Run optional variable args function fn_post_on_lv with fn_post_data on @lv before second metadata update + * Run optional variable args function fn_pre_on_lv with fn_pre_data on @lv before first metadata update + * + * This minimaly involves 2 metadata commits or more, depending on + * pre and post functions carrying out any additional ones or not. + * + * WARNING: needs to be called with at least 4 arguments to suit va_list processing! */ -static int _lv_update_reload_fns_reset_eliminate_lvs(struct logical_volume *lv, struct dm_list *removal_lvs) +static int _lv_update_reload_fns_reset_eliminate_lvs(struct logical_volume *lv, int origin_only, ...) { - int flags_were_cleared = 0, r = 0; + int flags_reset = 0, r = 0; + va_list ap; + fn_on_lv_t fn_pre_on_lv = NULL, fn_post_on_lv; + void *fn_pre_data, *fn_post_data; + struct dm_list *removal_lvs; - if (!_lv_update_and_reload_list(lv, 1, removal_lvs)) - return_0; + va_start(ap, origin_only); + removal_lvs = va_arg(ap, struct dm_list *); + + /* Retrieve post/pre functions and post/pre data reference from variable arguments, if any */ + if ((fn_post_on_lv = va_arg(ap, fn_on_lv_t))) { + fn_post_data = va_arg(ap, void *); + if ((fn_pre_on_lv = va_arg(ap, fn_on_lv_t))) + fn_pre_data = va_arg(ap, void *); + } + + /* Call any efn_pre_on_lv before the first update and reload call (e.g. to rename LVs) */ + if (fn_pre_on_lv && !(r = fn_pre_on_lv(lv, fn_pre_data))) { + log_error(INTERNAL_ERROR "Pre callout function failed"); + goto err; + } + + if (r == 2) { + /* + * Returning 2 from pre function -> lv is suspended and + * metadata got updated, don't need to do it again + */ + if (!(origin_only ? resume_lv_origin(lv->vg->cmd, lv_lock_holder(lv)) : + resume_lv(lv->vg->cmd, lv_lock_holder(lv)))) { + log_error("Failed to resume %s", display_lvname(lv)); + goto err; + } + + /* Update metadata and reload mappings including flags (e.g. LV_REBUILD, LV_RESHAPE_DELTA_DISKS_PLUS) */ + } else if (!(origin_only ? lv_update_and_reload_origin(lv) : lv_update_and_reload(lv))) + goto err; /* Eliminate any residual LV and don't commit the metadata */ if (!_eliminate_extracted_lvs_optional_write_vg(lv->vg, removal_lvs, 0)) - return_0; + goto err; /* * Now that any 'REBUILD' or 'RESHAPE_DELTA_DISKS' etc. @@ -2248,20 +3656,27 @@ static int _lv_update_reload_fns_reset_eliminate_lvs(struct logical_volume *lv, * Writes and commits metadata if any flags have been reset * and if successful, performs metadata backup. */ - /* FIXME This needs to be done through hooks in the metadata */ - log_debug_metadata("Clearing any flags for %s passed to the kernel.", - display_lvname(lv)); - _clear_rebuild_flags(lv, &flags_were_cleared); + log_debug_metadata("Clearing any flags for %s passed to the kernel", display_lvname(lv)); + if (!_reset_flags_passed_to_kernel(lv, &flags_reset)) + goto err; - log_debug_metadata("Updating metadata and reloading mappings for %s.", - display_lvname(lv)); - if ((r != 2 || flags_were_cleared) && !lv_update_and_reload(lv)) { - log_error("Update and reload of LV %s failed.", - display_lvname(lv)); - return 0; + /* Call any @fn_post_on_lv before the second update call (e.g. to rename LVs back) */ + if (fn_post_on_lv && !(r = fn_post_on_lv(lv, fn_post_data))) { + log_error("Post callout function failed"); + goto err; } - return 1; + /* Update and reload to clear out reset flags in the metadata and in the kernel */ + log_debug_metadata("Updating metadata mappings for %s", display_lvname(lv)); + if ((r != 2 || flags_reset) && !(origin_only ? lv_update_and_reload_origin(lv) : lv_update_and_reload(lv))) { + log_error(INTERNAL_ERROR "Update of LV %s failed", display_lvname(lv)); + goto err; + } + + r = 1; +err: + va_end(ap); + return r; } /* @@ -2473,7 +3888,7 @@ static int _convert_raid1_to_mirror(struct logical_volume *lv, if (!attach_mirror_log(first_seg(lv), log_lv)) return_0; - return update_and_reload ? _lv_update_reload_fns_reset_eliminate_lvs(lv, removal_lvs) : 1; + return update_and_reload ? _lv_update_reload_fns_reset_eliminate_lvs(lv, 0, removal_lvs, NULL) : 1; } /* @@ -2501,11 +3916,11 @@ static int _striped_to_raid0_move_segs_to_raid0_lvs(struct logical_volume *lv, status = RAID | SEG_RAID | (seg_from->status & (LVM_READ | LVM_WRITE)); /* Allocate a data LV segment with one area for each segment in the striped LV */ - if (!(seg_new = alloc_lv_segment(segtype, dlv, - le, seg_from->area_len, + if (!(seg_new = alloc_lv_segment(segtype, dlv, + le, seg_from->area_len, 0, status, 0 /* stripe_size */, NULL, 1 /* area_count */, - seg_from->area_len, + seg_from->area_len, 0, 0 /* chunk_size */, 0 /* region_size */, 0, NULL))) return_0; @@ -2719,10 +4134,10 @@ static struct lv_segment *_convert_striped_to_raid0(struct logical_volume *lv, */ seg = first_seg(dm_list_item(dm_list_first(&data_lvs), struct lv_list)->lv); if (!(raid0_seg = alloc_lv_segment(segtype, lv, - 0 /* le */, lv->le_count /* len */, + 0 /* le */, lv->le_count /* len */, 0, 0, stripe_size, NULL /* log_lv */, - area_count, area_len, + area_count, area_len, 0, 0 /* chunk_size */, 0 /* seg->region_size */, 0u /* extents_copied */ , NULL /* pvmove_source_seg */))) { @@ -2818,37 +4233,68 @@ static struct possible_takeover_reshape_type _possible_takeover_reshape_types[] { .current_types = SEG_RAID4|SEG_RAID5_N|SEG_RAID6_N_6, .possible_types = SEG_RAID4|SEG_RAID5_N|SEG_RAID6_N_6, .current_areas = ~0U, - .options = ALLOW_REGION_SIZE }, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, + /* Reshape raid5* <-> raid5* */ + { .current_types = SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_LA|SEG_RAID5_N, + .possible_types = SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_LA|SEG_RAID5_N, + .current_areas = ~0U, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, + + /* Reshape raid6* <-> raid6* */ + { .current_types = SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_NC|SEG_RAID6_LS_6|\ + SEG_RAID6_RS_6|SEG_RAID6_RA_6|SEG_RAID6_LA_6|SEG_RAID6_N_6, + .possible_types = SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_NC|SEG_RAID6_LS_6|\ + SEG_RAID6_RS_6|SEG_RAID6_RA_6|SEG_RAID6_LA_6|SEG_RAID6_N_6, + .current_areas = ~0U, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, /* raid5_ls <-> raid6_ls_6 */ { .current_types = SEG_RAID5_LS|SEG_RAID6_LS_6, .possible_types = SEG_RAID5_LS|SEG_RAID6_LS_6, .current_areas = ~0U, - .options = ALLOW_REGION_SIZE }, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, /* raid5_rs -> raid6_rs_6 */ { .current_types = SEG_RAID5_RS|SEG_RAID6_RS_6, .possible_types = SEG_RAID5_RS|SEG_RAID6_RS_6, .current_areas = ~0U, - .options = ALLOW_REGION_SIZE }, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, /* raid5_ls -> raid6_la_6 */ { .current_types = SEG_RAID5_LA|SEG_RAID6_LA_6, .possible_types = SEG_RAID5_LA|SEG_RAID6_LA_6, .current_areas = ~0U, - .options = ALLOW_REGION_SIZE }, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, /* raid5_ls -> raid6_ra_6 */ { .current_types = SEG_RAID5_RA|SEG_RAID6_RA_6, .possible_types = SEG_RAID5_RA|SEG_RAID6_RA_6, .current_areas = ~0U, - .options = ALLOW_REGION_SIZE }, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, + + /* Reshape raid10 <-> raid10 */ + { .current_types = SEG_RAID10_NEAR, + .possible_types = SEG_RAID10_NEAR, + .current_areas = ~0U, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, /* mirror <-> raid1 with arbitrary number of legs */ { .current_types = SEG_MIRROR|SEG_RAID1, .possible_types = SEG_MIRROR|SEG_RAID1, .current_areas = ~0U, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPES|ALLOW_STRIPE_SIZE }, + + /* raid1 -> raid5* with 2 legs */ + { .current_types = SEG_RAID1, + .possible_types = SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_LA|SEG_RAID5_N, + .current_areas = 2U, + .options = ALLOW_REGION_SIZE|ALLOW_STRIPE_SIZE }, + + /* raid5* -> raid1 with 2 legs */ + { .current_types = SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_LA|SEG_RAID5_N, + .possible_types = SEG_RAID1, + .current_areas = 2U, .options = ALLOW_REGION_SIZE }, /* END */ @@ -3092,7 +4538,7 @@ static int _raid0_meta_change_wrapper(struct logical_volume *lv, if (alloc_metadata_devs) return _raid0_add_or_remove_metadata_lvs(lv, 1, allocate_pvs, NULL); else - return _raid0_add_or_remove_metadata_lvs(lv, 1, allocate_pvs, &removal_lvs); + return _raid0_add_or_remove_metadata_lvs(lv, 1, NULL, &removal_lvs); } static int _raid0_to_striped_wrapper(struct logical_volume *lv, @@ -3154,6 +4600,7 @@ static int _raid1_to_mirrored_wrapper(TAKEOVER_FN_ARGS) * order to wipe them then reattach and set back to raid0_meta. * * Same applies to raid4 <-> raid5. + * Same applies to raid10 -> raid0_meta. */ static int _clear_meta_lvs(struct logical_volume *lv) { @@ -3163,11 +4610,11 @@ static int _clear_meta_lvs(struct logical_volume *lv) const struct segment_type *tmp_segtype; struct dm_list meta_lvs; struct lv_list *lvl_array, *lvl; - int is_raid4_or_5N = seg_is_raid4(seg) || seg_is_raid5_n(seg); + int is_raid45n10 = seg_is_raid4(seg) || seg_is_raid5_n(seg) || seg_is_raid10(seg); /* Reject non-raid0_meta/raid4/raid5_n segment types cautiously */ if (!seg->meta_areas || - (!seg_is_raid0_meta(seg) && !is_raid4_or_5N)) + (!seg_is_raid0_meta(seg) && !is_raid45n10)) return_0; if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, seg->area_count * sizeof(*lvl_array)))) @@ -3325,12 +4772,11 @@ static int _shift_parity_dev(struct lv_segment *seg) /* raid456 -> raid0* / striped */ static int _raid45_to_raid54_wrapper(TAKEOVER_FN_ARGS); -static int _raid45610_to_raid0_or_striped_wrapper(TAKEOVER_FN_ARGS) +static int _takeover_downconvert_wrapper(TAKEOVER_FN_ARGS) { int rename_sublvs = 0; struct lv_segment *seg = first_seg(lv); struct dm_list removal_lvs; - uint32_t region_size = seg->region_size; dm_list_init(&removal_lvs); @@ -3341,6 +4787,26 @@ static int _raid45610_to_raid0_or_striped_wrapper(TAKEOVER_FN_ARGS) return 0; } + if (seg_is_any_raid10(seg) && (seg->area_count % seg->data_copies)) { + log_error("Can't convert %s LV %s to %s with odd number of stripes", + lvseg_name(seg), display_lvname(lv), new_segtype->name); + return 0; + } + + if (seg_is_any_raid5(seg) && + segtype_is_raid1(new_segtype)) { + if (seg->area_count != 2) { + log_error("Can't convert %s LV %s to %s with != 2 legs.", + lvseg_name(seg), display_lvname(lv), new_segtype->name); + return 0; + } + if (seg->area_count != new_image_count) { + log_error(INTERNAL_ERROR "Bogus new_image_count converting %s LV %s to %s.", + lvseg_name(seg), display_lvname(lv), new_segtype->name); + return 0; + } + } + if (!yes && yes_no_prompt("Are you sure you want to convert \"%s\" LV %s to \"%s\" " "type losing %s resilience? [y/n]: ", lvseg_name(seg), display_lvname(lv), new_segtype->name, @@ -3354,6 +4820,9 @@ static int _raid45610_to_raid0_or_striped_wrapper(TAKEOVER_FN_ARGS) if (!archive(lv->vg)) return_0; + if (!_lv_free_reshape_space(lv)) + return_0; + /* * raid4 (which actually gets mapped to raid5/dedicated first parity disk) * needs shifting of SubLVs to move the parity SubLV pair in the first area @@ -3365,28 +4834,35 @@ static int _raid45610_to_raid0_or_striped_wrapper(TAKEOVER_FN_ARGS) if (!_shift_parity_dev(seg)) return 0; - if (segtype_is_any_raid0(new_segtype) && - !(rename_sublvs = _rename_area_lvs(lv, "_"))) { - log_error("Failed to rename %s LV %s MetaLVs.", lvseg_name(seg), display_lvname(lv)); - return 0; - } } else if (seg_is_raid10_near(seg)) { log_debug_metadata("Reordering areas for raid10 -> raid0 takeover"); if (!_reorder_raid10_near_seg_areas(seg, reorder_from_raid10_near)) return 0; } - /* Remove meta and data LVs requested */ - if (!_lv_raid_change_image_count(lv, new_image_count, allocate_pvs, &removal_lvs, 0, 0)) + if (segtype_is_any_raid0(new_segtype) && + !(rename_sublvs = _rename_area_lvs(lv, "_"))) { + log_error("Failed to rename %s LV %s MetaLVs.", lvseg_name(seg), display_lvname(lv)); return 0; + } + + /* Remove meta and data LVs requested */ + if (new_image_count != seg->area_count) { + log_debug_metadata("Removing %" PRIu32 " component LV pair(s) to %s.", + lv_raid_image_count(lv) - new_image_count, + display_lvname(lv)); + if (!_lv_raid_change_image_count(lv, new_image_count, allocate_pvs, &removal_lvs, 0, 0)) + return 0; + + seg->area_count = new_image_count; + } /* FIXME Hard-coded raid4/5/6 to striped/raid0 */ if (segtype_is_striped_target(new_segtype) || segtype_is_any_raid0(new_segtype)) { - seg->area_len = seg->extents_copied = seg->area_len / seg->area_count; + seg->area_len = seg->extents_copied = seg->len / seg->area_count; + seg->region_size = 0; if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID0_META))) return_0; - - region_size = 0; } if (segtype_is_striped_target(new_segtype)) { @@ -3402,12 +4878,18 @@ static int _raid45610_to_raid0_or_striped_wrapper(TAKEOVER_FN_ARGS) } else seg->segtype = new_segtype; - seg->region_size = new_region_size ?: region_size; + if (seg_is_raid1(seg)) + seg->stripe_size = 0; - if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs)) + if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, 0, &removal_lvs, NULL)) return_0; if (rename_sublvs) { + /* Got to clear the meta lvs from raid10 content to be able to convert to e.g. raid6 */ + if (segtype_is_raid0_meta(new_segtype) && + !_clear_meta_lvs(lv)) + return_0; + if (!_rename_area_lvs(lv, NULL)) { log_error("Failed to rename %s LV %s MetaLVs.", lvseg_name(seg), display_lvname(lv)); return 0; @@ -3464,6 +4946,9 @@ static int _raid45_to_raid54_wrapper(TAKEOVER_FN_ARGS) if (!archive(lv->vg)) return_0; + if (!_lv_free_reshape_space(lv)) + return_0; + if (!_rename_area_lvs(lv, "_")) { log_error("Failed to rename %s LV %s MetaLVs.", lvseg_name(seg), display_lvname(lv)); return 0; @@ -3481,7 +4966,7 @@ static int _raid45_to_raid54_wrapper(TAKEOVER_FN_ARGS) seg->region_size = new_region_size ?: region_size; seg->segtype = new_segtype; - if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs)) + if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, 0, &removal_lvs, NULL)) return_0; init_mirror_in_sync(0); @@ -3517,8 +5002,8 @@ static int _striped_to_raid0_wrapper(struct logical_volume *lv, return 1; } -/* Helper: striped/raid0* -> raid4/5/6/10, raid45 -> raid6 wrapper */ -static int _striped_or_raid0_to_raid45610_wrapper(TAKEOVER_FN_ARGS) +/* Helper: striped/raid0/raid0_meta/raid1 -> raid4/5/6/10, raid45 -> raid6 wrapper */ +static int _takeover_upconvert_wrapper(TAKEOVER_FN_ARGS) { uint32_t extents_copied, region_size, seg_len, stripe_size; struct lv_segment *seg = first_seg(lv); @@ -3536,10 +5021,40 @@ static int _striped_or_raid0_to_raid45610_wrapper(TAKEOVER_FN_ARGS) return 0; } + if (seg_is_any_raid5(seg) && segtype_is_any_raid6(new_segtype) && seg->area_count < 4) { + log_error("Minimum of 3 stripes needed for conversion from %s to %s", + lvseg_name(seg), new_segtype->name); + return 0; + } + + if (seg_is_raid1(seg)) { + if (seg->area_count != 2) { + log_error("Can't convert %s LV %s to %s with != 2 legs.", + lvseg_name(seg), display_lvname(lv), new_segtype->name); + return 0; + } + if (!segtype_is_any_raid5(new_segtype)) { + log_error("Can't convert %s LV %s to %s.", + lvseg_name(seg), display_lvname(lv), new_segtype->name); + return 0; + } + if (seg->area_count != new_image_count) { + log_error(INTERNAL_ERROR "Bogus new_image_count converting %s LV %s to %s.", + lvseg_name(seg), display_lvname(lv), new_segtype->name); + return 0; + } + + if (!new_stripe_size) + new_stripe_size = 128; + } + /* Archive metadata */ if (!archive(lv->vg)) return_0; + if (!_lv_free_reshape_space(lv)) + return_0; + /* This helper can be used to convert from striped/raid0* -> raid10 too */ if (seg_is_striped_target(seg)) { log_debug_metadata("Converting LV %s from %s to %s.", @@ -3554,15 +5069,14 @@ static int _striped_or_raid0_to_raid45610_wrapper(TAKEOVER_FN_ARGS) if (!_raid0_add_or_remove_metadata_lvs(lv, 1 /* update_and_reload */, allocate_pvs, NULL)) return 0; /* raid0_meta -> raid4 needs clearing of MetaLVs in order to avoid raid disk role change issues in the kernel */ - } else if (segtype_is_raid4(new_segtype) && - !_clear_meta_lvs(lv)) + } + + if (seg_is_raid0_meta(seg) && + segtype_is_raid4(new_segtype) && + !_clear_meta_lvs(lv)) return_0; - /* Add the additional component LV pairs */ - log_debug_metadata("Adding %" PRIu32 " component LV pair(s) to %s.", - new_image_count - lv_raid_image_count(lv), - display_lvname(lv)); extents_copied = seg->extents_copied; region_size = seg->region_size; seg_len = seg->len; @@ -3573,11 +5087,21 @@ static int _striped_or_raid0_to_raid45610_wrapper(TAKEOVER_FN_ARGS) return_0; seg->area_len = seg_lv(seg, 0)->le_count; lv->le_count = seg->len = seg->area_len * seg->area_count; + seg->area_len = seg->len; seg->extents_copied = seg->region_size = 0; } - if (!_lv_raid_change_image_count(lv, new_image_count, allocate_pvs, NULL, 0, 1)) - return 0; + /* Add the additional component LV pairs */ + if (new_image_count != seg->area_count) { + log_debug_metadata("Adding %" PRIu32 " component LV pair(s) to %s.", + new_image_count - lv_raid_image_count(lv), + display_lvname(lv)); + if (!_lv_raid_change_image_count(lv, new_image_count, allocate_pvs, NULL, 0, 1)) + return 0; + + seg = first_seg(lv); + } + if (segtype_is_raid4(new_segtype) && (!_shift_parity_dev(seg) || @@ -3585,25 +5109,40 @@ static int _striped_or_raid0_to_raid45610_wrapper(TAKEOVER_FN_ARGS) log_error("Can't convert %s to %s.", display_lvname(lv), new_segtype->name); return 0; } else if (segtype_is_raid10_near(new_segtype)) { + uint32_t s; + + /* FIXME: raid10 ; needs to change once more than 2 data copies! */ + seg->data_copies = 2; + log_debug_metadata("Reordering areas for raid0 -> raid10 takeover"); if (!_reorder_raid10_near_seg_areas(seg, reorder_to_raid10_near)) return 0; + /* Set rebuild flags accordingly */ + for (s = 0; s < seg->area_count; s++) { + seg_lv(seg, s)->status &= ~LV_REBUILD; + seg_metalv(seg, s)->status &= ~LV_REBUILD; + if (s % seg->data_copies) + seg_lv(seg, s)->status |= LV_REBUILD; + } + } seg->segtype = new_segtype; seg->region_size = new_region_size ?: region_size; - - /* FIXME Hard-coded raid0 to raid4/5/6 */ - seg->stripe_size = stripe_size; - lv->le_count = seg->len = seg->area_len = seg_len; + seg->stripe_size = new_stripe_size ?: stripe_size; seg->extents_copied = extents_copied; + /* FIXME Hard-coded to raid4/5/6/10 */ + lv->le_count = seg->len = seg->area_len = seg_len; + _check_and_adjust_region_size(lv); log_debug_metadata("Updating VG metadata and reloading %s LV %s.", lvseg_name(seg), display_lvname(lv)); - if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, NULL)) - return_0; + if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, 0, &removal_lvs, + _post_raid_dummy, NULL, + _pre_raid_add_legs, NULL)) + return 0; if (segtype_is_raid4(new_segtype)) { /* We had to rename SubLVs because of collision free shifting, rename back... */ @@ -3693,23 +5232,23 @@ static int _takeover_from_raid0_to_raid1(TAKEOVER_FN_ARGS) static int _takeover_from_raid0_to_raid10(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count * 2 /* new_image_count */, - 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count * 2 /* new_image_count */, + 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid0_to_raid45(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count + 1 /* new_image_count */, - 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count + 1 /* new_image_count */, + 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid0_to_raid6(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count + 2 /* new_image_count */, - 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count + 2 /* new_image_count */, + 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid0_to_striped(TAKEOVER_FN_ARGS) @@ -3732,7 +5271,7 @@ static int _takeover_from_raid0_meta_to_mirrored(TAKEOVER_FN_ARGS) static int _takeover_from_raid0_meta_to_raid0(TAKEOVER_FN_ARGS) { - if (!_raid0_meta_change_wrapper(lv, new_segtype, new_stripes, yes, force, 0, allocate_pvs)) + if (!_raid0_meta_change_wrapper(lv, new_segtype, new_stripes, yes, force, 0, NULL)) return_0; return 1; @@ -3745,23 +5284,23 @@ static int _takeover_from_raid0_meta_to_raid1(TAKEOVER_FN_ARGS) static int _takeover_from_raid0_meta_to_raid10(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count * 2 /* new_image_count */, - 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count * 2 /* new_image_count */, + 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid0_meta_to_raid45(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count + 1 /* new_image_count */, - 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count + 1 /* new_image_count */, + 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid0_meta_to_raid6(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count + 2 /* new_image_count */, - 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count + 2 /* new_image_count */, + 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid0_meta_to_striped(TAKEOVER_FN_ARGS) @@ -3779,7 +5318,9 @@ static int _takeover_from_raid1_to_linear(TAKEOVER_FN_ARGS) static int _takeover_from_raid1_to_mirrored(TAKEOVER_FN_ARGS) { - return _raid1_to_mirrored_wrapper(lv, new_segtype, yes, force, new_image_count, new_data_copies, new_stripes, new_stripe_size, new_region_size, allocate_pvs); + return _raid1_to_mirrored_wrapper(lv, new_segtype, yes, force, + new_image_count, new_data_copies, + new_stripes, new_stripe_size, new_region_size, allocate_pvs); } static int _takeover_from_raid1_to_raid0(TAKEOVER_FN_ARGS) @@ -3804,7 +5345,9 @@ static int _takeover_from_raid1_to_raid10(TAKEOVER_FN_ARGS) static int _takeover_from_raid1_to_raid45(TAKEOVER_FN_ARGS) { - return _takeover_unsupported_yet(lv, new_stripes, new_segtype); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count /* unchanged new_image_count */, + 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid1_to_striped(TAKEOVER_FN_ARGS) @@ -3824,17 +5367,23 @@ static int _takeover_from_raid45_to_mirrored(TAKEOVER_FN_ARGS) static int _takeover_from_raid45_to_raid0(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count - 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count - 1, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid45_to_raid0_meta(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count - 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count - 1, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid45_to_raid1(TAKEOVER_FN_ARGS) { - return _takeover_unsupported_yet(lv, new_stripes, new_segtype); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count, + 2 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid45_to_raid54(TAKEOVER_FN_ARGS) @@ -3849,42 +5398,49 @@ static int _takeover_from_raid45_to_raid6(TAKEOVER_FN_ARGS) struct segment_type *segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID5_N); if (!segtype || - !_raid45_to_raid54_wrapper(lv, segtype, yes, force, first_seg(lv)->area_count, + !_raid45_to_raid54_wrapper(lv, segtype, yes, force, + first_seg(lv)->area_count, 1 /* data_copies */, 0, 0, 0, allocate_pvs)) return 0; } - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count + 1 /* new_image_count */, - 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count + 1 /* new_image_count */, + 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_raid45_to_striped(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count - 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count - 1, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid6_to_raid0(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count - 2, - 1 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count - 2, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid6_to_raid0_meta(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count - 2, - 1 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count - 2, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid6_to_raid45(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count - 1, - 2 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count - 1, + 2 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid6_to_striped(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count - 2, - 2 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count - 2, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_striped_to_raid0(TAKEOVER_FN_ARGS) @@ -3910,22 +5466,23 @@ static int _takeover_from_striped_to_raid0_meta(TAKEOVER_FN_ARGS) static int _takeover_from_striped_to_raid10(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count * 2 /* new_image_count */, - 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count * 2 /* new_image_count */, + 2 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_striped_to_raid45(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count + 1, - 2 /* data_copies*/, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count + 1, + 2 /* data_copies*/, 0, 0, new_region_size, allocate_pvs); } static int _takeover_from_striped_to_raid6(TAKEOVER_FN_ARGS) { - return _striped_or_raid0_to_raid45610_wrapper(lv, new_segtype, yes, force, - first_seg(lv)->area_count + 2 /* new_image_count */, - 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); + return _takeover_upconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count + 2 /* new_image_count */, + 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs); } /* @@ -3959,8 +5516,9 @@ static int _takeover_from_raid10_to_mirrored(TAKEOVER_FN_ARGS) static int _takeover_from_raid10_to_raid0(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count / 2, - 1 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count / 2, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } /* @@ -3973,8 +5531,9 @@ static int _takeover_from_raid10_to_raid01(TAKEOVER_FN_ARGS) static int _takeover_from_raid10_to_raid0_meta(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count / 2, - 1 /* data_copies */, 0, 0, 0, allocate_pvs); + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + first_seg(lv)->area_count / 2, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } static int _takeover_from_raid10_to_raid1(TAKEOVER_FN_ARGS) @@ -3993,8 +5552,11 @@ static int _takeover_from_raid10_to_raid10(TAKEOVER_FN_ARGS) static int _takeover_from_raid10_to_striped(TAKEOVER_FN_ARGS) { - return _raid45610_to_raid0_or_striped_wrapper(lv, new_segtype, yes, force, first_seg(lv)->area_count / 2, - 1 /* data_copies */, 0, 0, 0, allocate_pvs); + struct lv_segment *seg = first_seg(lv); + + return _takeover_downconvert_wrapper(lv, new_segtype, yes, force, + seg->area_count / seg->data_copies, + 1 /* data_copies */, 0, 0, 0, allocate_pvs); } /* @@ -4026,12 +5588,6 @@ static takeover_fn_t _get_takeover_fn(const struct lv_segment *seg, const struct return _takeover_fns[_segtype_ix(seg->segtype, seg->area_count)][_segtype_ix(new_segtype, new_image_count)]; } -/* Number of data (not parity) rimages */ -static uint32_t _data_rimages_count(const struct lv_segment *seg, const uint32_t total_rimages) -{ - return total_rimages - seg->segtype->parity_devs; -} - /* * Determine whether data_copies, stripes, stripe_size are * possible for conversion from seg_from to new_segtype. @@ -4050,11 +5606,52 @@ static int _log_prohibited_option(const struct lv_segment *seg_from, return 1; } +/* + * Find takeover raid flag for segment type flag of @seg + */ +/* Segment type flag correspondence for raid5 <-> raid6 conversions */ +static uint64_t _r5_to_r6[][2] = { + { SEG_RAID5_LS, SEG_RAID6_LS_6 }, + { SEG_RAID5_LA, SEG_RAID6_LA_6 }, + { SEG_RAID5_RS, SEG_RAID6_RS_6 }, + { SEG_RAID5_RA, SEG_RAID6_RA_6 }, + { SEG_RAID5_N, SEG_RAID6_N_6 }, +}; + + +/* Return segment type flag for raid5 -> raid6 conversions */ +static uint64_t _get_r56_flag(const struct lv_segment *seg, unsigned idx) +{ + unsigned elems = ARRAY_SIZE(_r5_to_r6); + + while (elems--) + if (seg->segtype->flags & _r5_to_r6[elems][idx]) + return _r5_to_r6[elems][!idx]; + + return 0; +} + +/* Return segment type flag for raid5 -> raid6 conversions */ +static uint64_t _raid_seg_flag_5_to_6(const struct lv_segment *seg) +{ + return _get_r56_flag(seg, 0); +} + +/* Return segment type flag for raid6 -> raid5 conversions */ +static uint64_t _raid_seg_flag_6_to_5(const struct lv_segment *seg) +{ + return _get_r56_flag(seg, 1); +} +/* End raid5 <-> raid6 takeover options */ + /* Change segtype for raid4 <-> raid5 <-> raid6 takeover where necessary. */ static int _set_convenient_raid456_segtype_to(const struct lv_segment *seg_from, - const struct segment_type **segtype) + const struct segment_type **segtype, + int yes) { size_t len = min(strlen((*segtype)->name), strlen(lvseg_name(seg_from))); + uint64_t seg_flag; + struct cmd_context *cmd = seg_from->lv->vg->cmd; const struct segment_type *segtype_sav = *segtype; /* Bail out if same RAID level is requested. */ @@ -4064,54 +5661,60 @@ static int _set_convenient_raid456_segtype_to(const struct lv_segment *seg_from, /* Striped/raid0 -> raid5/6 */ if (seg_is_striped(seg_from) || seg_is_any_raid0(seg_from)) { /* If this is any raid5 conversion request -> enforce raid5_n, because we convert from striped */ - if (segtype_is_any_raid5(*segtype) && - !segtype_is_raid5_n(*segtype)) { - if (!(*segtype = get_segtype_from_flag(seg_from->lv->vg->cmd, SEG_RAID5_N))) - return_0; + if (segtype_is_any_raid5(*segtype) && !segtype_is_raid5_n(*segtype)) { + seg_flag = SEG_RAID5_N; goto replaced; /* If this is any raid6 conversion request -> enforce raid6_n_6, because we convert from striped */ - } else if (segtype_is_any_raid6(*segtype) && - !segtype_is_raid6_n_6(*segtype)) { - if (!(*segtype = get_segtype_from_flag(seg_from->lv->vg->cmd, SEG_RAID6_N_6))) - return_0; + } else if (segtype_is_any_raid6(*segtype) && !segtype_is_raid6_n_6(*segtype)) { + seg_flag = SEG_RAID6_N_6; goto replaced; } /* raid4 -> raid5_n */ - } else if (seg_is_raid4(seg_from) && - segtype_is_any_raid5(*segtype)) { - if (!(*segtype = get_segtype_from_flag(seg_from->lv->vg->cmd, SEG_RAID5_N))) - return_0; + } else if (seg_is_raid4(seg_from) && segtype_is_any_raid5(*segtype)) { + seg_flag = SEG_RAID5_N; goto replaced; /* raid4/raid5_n -> striped/raid0/raid6 */ } else if ((seg_is_raid4(seg_from) || seg_is_raid5_n(seg_from)) && !segtype_is_striped(*segtype) && !segtype_is_any_raid0(*segtype) && + !segtype_is_raid1(*segtype) && !segtype_is_raid4(*segtype) && !segtype_is_raid5_n(*segtype) && !segtype_is_raid6_n_6(*segtype)) { - if (!(*segtype = get_segtype_from_flag(seg_from->lv->vg->cmd, SEG_RAID6_N_6))) + seg_flag = SEG_RAID6_N_6; + goto replaced; + + /* Got to do check for raid5 -> raid6 ... */ + } else if (seg_is_any_raid5(seg_from) && segtype_is_any_raid6(*segtype)) { + if (!(seg_flag = _raid_seg_flag_5_to_6(seg_from))) return_0; goto replaced; - /* ... and raid6 -> striped/raid0/raid4/raid5_n */ - } else if (seg_is_raid6_n_6(seg_from) && - !segtype_is_striped(*segtype) && - !segtype_is_any_raid0(*segtype) && - !segtype_is_raid4(*segtype) && - !segtype_is_raid5_n(*segtype)) { - if (!(*segtype = get_segtype_from_flag(seg_from->lv->vg->cmd, SEG_RAID5_N))) - return_0; + /* ... and raid6 -> raid5 */ + } else if (seg_is_any_raid6(seg_from) && segtype_is_any_raid5(*segtype)) { + /* No result for raid6_{zr,nr,nc} */ + if (!(seg_flag = _raid_seg_flag_6_to_5(seg_from))) + return 0; goto replaced; } return 1; replaced: + if (!(*segtype = get_segtype_from_flag(cmd, seg_flag))) + return_0; log_warn("Replaced LV type %s with possible type %s.", segtype_sav->name, (*segtype)->name); + if (!yes && yes_no_prompt("Do you want to convert %s LV %s to %s? [y/n]: ", + segtype_sav->name, display_lvname(seg_from->lv), + (*segtype)->name) == 'n') { + log_error("Logical volume %s NOT converted", display_lvname(seg_from->lv)); + return 0; + } + return 1; } @@ -4197,6 +5800,7 @@ static int _region_size_change_requested(struct logical_volume *lv, int yes, con /* Check allowed conversion from seg_from to *segtype_to */ static int _conversion_options_allowed(const struct lv_segment *seg_from, const struct segment_type **segtype_to, + int yes, uint32_t new_image_count, int new_data_copies, int new_region_size, int stripes, unsigned new_stripe_size_supplied) @@ -4204,7 +5808,7 @@ static int _conversion_options_allowed(const struct lv_segment *seg_from, int r = 1; uint32_t opts; - if (!new_image_count && !_set_convenient_raid456_segtype_to(seg_from, segtype_to)) + if (!new_image_count && !_set_convenient_raid456_segtype_to(seg_from, segtype_to, yes)) return_0; if (!_get_allowed_conversion_options(seg_from, *segtype_to, new_image_count, &opts)) { @@ -4258,8 +5862,10 @@ int lv_raid_convert(struct logical_volume *lv, uint32_t stripes, stripe_size; uint32_t new_image_count = seg->area_count; uint32_t region_size = new_region_size; + uint32_t data_copies = seg->data_copies; takeover_fn_t takeover_fn; + new_segtype = new_segtype ? : seg->segtype; if (!new_segtype) { log_error(INTERNAL_ERROR "New segtype not specified."); return 0; @@ -4269,33 +5875,47 @@ int lv_raid_convert(struct logical_volume *lv, /* FIXME Ensure caller does *not* set wrong default value! */ /* Define new stripe size if not passed in */ - stripe_size = new_stripe_size ? : seg->stripe_size; + stripe_size = new_stripe_size_supplied ? new_stripe_size : seg->stripe_size; if (segtype_is_striped(new_segtype)) - new_image_count = stripes; + new_image_count = stripes ? : seg->area_count; if (segtype_is_raid(new_segtype) && !_check_max_raid_devices(new_image_count)) return_0; - /* Change RAID region size */ + region_size = new_region_size ? : seg->region_size; + region_size = region_size ? : get_default_region_size(lv->vg->cmd); + /* - * FIXME: workaround with new_region_size until the - * cli validation patches got merged when we'll change - * the API to have new_region_size_supplied to check for. + * reshape of capable raid type requested */ - if (new_region_size) { - if (new_segtype == seg->segtype && - new_region_size != seg->region_size && - seg_is_raid(seg) && !seg_is_any_raid0(seg)) - return _region_size_change_requested(lv, yes, new_region_size); - } else - region_size = seg->region_size ? : get_default_region_size(lv->vg->cmd); + switch (_reshape_requested(lv, new_segtype, data_copies, region_size, stripes, stripe_size)) { + case 0: + break; + case 1: + if (!_raid_reshape(lv, new_segtype, yes, force, + data_copies, region_size, + stripes, stripe_size, allocate_pvs)) { + log_error("Reshape request failed on LV %s", display_lvname(lv)); + return 0; + } + + return 1; + case 2: + log_error("Invalid conversion request on %s", display_lvname(lv)); + /* Error if we got here with stripes and/or stripe size change requested */ + return 0; + default: + log_error(INTERNAL_ERROR "_reshape_requested failed"); + return 0; + } /* * Check acceptible options mirrors, region_size, * stripes and/or stripe_size have been provided. */ - if (!_conversion_options_allowed(seg, &new_segtype, 0 /* Takeover */, 0 /*new_data_copies*/, new_region_size, + if (!_conversion_options_allowed(seg, &new_segtype, yes, + 0 /* Takeover */, 0 /*new_data_copies*/, new_region_size, new_stripes, new_stripe_size_supplied)) return _log_possible_conversion_types(lv, new_segtype); diff --git a/lib/metadata/segtype.c b/lib/metadata/segtype.c index b66ab0278..d0508ca35 100644 --- a/lib/metadata/segtype.c +++ b/lib/metadata/segtype.c @@ -43,7 +43,8 @@ struct segment_type *get_segtype_from_flag(struct cmd_context *cmd, uint64_t fla { struct segment_type *segtype; - dm_list_iterate_items(segtype, &cmd->segtypes) + /* Iterate backwards to provide aliases; e.g. raid5 instead of raid5_ls */ + dm_list_iterate_back_items(segtype, &cmd->segtypes) if (flag & segtype->flags) return segtype; diff --git a/lib/metadata/segtype.h b/lib/metadata/segtype.h index 921282449..bea714189 100644 --- a/lib/metadata/segtype.h +++ b/lib/metadata/segtype.h @@ -140,7 +140,11 @@ struct dev_manager; #define segtype_is_any_raid10(segtype) ((segtype)->flags & SEG_RAID10 ? 1 : 0) #define segtype_is_raid10(segtype) ((segtype)->flags & SEG_RAID10 ? 1 : 0) #define segtype_is_raid10_near(segtype) segtype_is_raid10(segtype) +/* FIXME: once raid10_offset supported */ +#define segtype_is_raid10_offset(segtype) 0 // ((segtype)->flags & SEG_RAID10_OFFSET ? 1 : 0) #define segtype_is_raid_with_meta(segtype) (segtype_is_raid(segtype) && !segtype_is_raid0(segtype)) +#define segtype_is_striped_raid(segtype) (segtype_is_raid(segtype) && !segtype_is_raid1(segtype)) +#define segtype_is_reshapable_raid(segtype) ((segtype_is_striped_raid(segtype) && !segtype_is_any_raid0(segtype)) || segtype_is_raid10_near(segtype) || segtype_is_raid10_offset(segtype)) #define segtype_is_snapshot(segtype) ((segtype)->flags & SEG_SNAPSHOT ? 1 : 0) #define segtype_is_striped(segtype) ((segtype)->flags & SEG_AREAS_STRIPED ? 1 : 0) #define segtype_is_thin(segtype) ((segtype)->flags & (SEG_THIN_POOL|SEG_THIN_VOLUME) ? 1 : 0) @@ -190,6 +194,8 @@ struct dev_manager; #define seg_is_raid10(seg) segtype_is_raid10((seg)->segtype) #define seg_is_raid10_near(seg) segtype_is_raid10_near((seg)->segtype) #define seg_is_raid_with_meta(seg) segtype_is_raid_with_meta((seg)->segtype) +#define seg_is_striped_raid(seg) segtype_is_striped_raid((seg)->segtype) +#define seg_is_reshapable_raid(seg) segtype_is_reshapable_raid((seg)->segtype) #define seg_is_replicator(seg) ((seg)->segtype->flags & SEG_REPLICATOR ? 1 : 0) #define seg_is_replicator_dev(seg) ((seg)->segtype->flags & SEG_REPLICATOR_DEV ? 1 : 0) #define seg_is_snapshot(seg) segtype_is_snapshot((seg)->segtype) @@ -280,6 +286,7 @@ struct segment_type *init_unknown_segtype(struct cmd_context *cmd, #define RAID_FEATURE_RAID0 (1U << 1) /* version 1.7 */ #define RAID_FEATURE_RESHAPING (1U << 2) /* version 1.8 */ #define RAID_FEATURE_RAID4 (1U << 3) /* ! version 1.8 or 1.9.0 */ +#define RAID_FEATURE_RESHAPE (1U << 4) /* version 1.10.2 */ #ifdef RAID_INTERNAL int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib); diff --git a/lib/metadata/snapshot_manip.c b/lib/metadata/snapshot_manip.c index b5fb60c4a..57fbef93b 100644 --- a/lib/metadata/snapshot_manip.c +++ b/lib/metadata/snapshot_manip.c @@ -238,8 +238,8 @@ static struct lv_segment *_alloc_snapshot_seg(struct logical_volume *lv) return NULL; } - if (!(seg = alloc_lv_segment(segtype, lv, 0, lv->le_count, 0, 0, - NULL, 0, lv->le_count, 0, 0, 0, NULL))) { + if (!(seg = alloc_lv_segment(segtype, lv, 0, lv->le_count, 0, 0, 0, + NULL, 0, lv->le_count, 0, 0, 0, 0, NULL))) { log_error("Couldn't allocate new snapshot segment."); return NULL; } diff --git a/lib/raid/raid.c b/lib/raid/raid.c index c679207af..111cc419b 100644 --- a/lib/raid/raid.c +++ b/lib/raid/raid.c @@ -137,6 +137,7 @@ static int _raid_text_import(struct lv_segment *seg, } raid_attr_import[] = { { "region_size", &seg->region_size }, { "stripe_size", &seg->stripe_size }, + { "data_copies", &seg->data_copies }, { "writebehind", &seg->writebehind }, { "min_recovery_rate", &seg->min_recovery_rate }, { "max_recovery_rate", &seg->max_recovery_rate }, @@ -146,6 +147,10 @@ static int _raid_text_import(struct lv_segment *seg, for (i = 0; i < DM_ARRAY_SIZE(raid_attr_import); i++, aip++) { if (dm_config_has_node(sn, aip->name)) { if (!dm_config_get_uint32(sn, aip->name, aip->var)) { + if (!strcmp(aip->name, "data_copies")) { + *aip->var = 0; + continue; + } log_error("Couldn't read '%s' for segment %s of logical volume %s.", aip->name, dm_config_parent_name(sn), seg->lv->name); return 0; @@ -165,6 +170,9 @@ static int _raid_text_import(struct lv_segment *seg, return 0; } + if (seg->data_copies < 2) + seg->data_copies = lv_raid_data_copies(seg->segtype, seg->area_count); + if (seg_is_any_raid0(seg)) seg->area_len /= seg->area_count; @@ -183,18 +191,31 @@ static int _raid_text_export_raid0(const struct lv_segment *seg, struct formatte static int _raid_text_export_raid(const struct lv_segment *seg, struct formatter *f) { - outf(f, "device_count = %u", seg->area_count); + int raid0 = seg_is_any_raid0(seg); + + if (raid0) + outfc(f, (seg->area_count == 1) ? "# linear" : NULL, + "stripe_count = %u", seg->area_count); + + else { + outf(f, "device_count = %u", seg->area_count); + if (seg_is_any_raid10(seg) && seg->data_copies > 0) + outf(f, "data_copies = %" PRIu32, seg->data_copies); + if (seg->region_size) + outf(f, "region_size = %" PRIu32, seg->region_size); + } if (seg->stripe_size) outf(f, "stripe_size = %" PRIu32, seg->stripe_size); - if (seg->region_size) - outf(f, "region_size = %" PRIu32, seg->region_size); - if (seg->writebehind) - outf(f, "writebehind = %" PRIu32, seg->writebehind); - if (seg->min_recovery_rate) - outf(f, "min_recovery_rate = %" PRIu32, seg->min_recovery_rate); - if (seg->max_recovery_rate) - outf(f, "max_recovery_rate = %" PRIu32, seg->max_recovery_rate); + + if (!raid0) { + if (seg_is_raid1(seg) && seg->writebehind) + outf(f, "writebehind = %" PRIu32, seg->writebehind); + if (seg->min_recovery_rate) + outf(f, "min_recovery_rate = %" PRIu32, seg->min_recovery_rate); + if (seg->max_recovery_rate) + outf(f, "max_recovery_rate = %" PRIu32, seg->max_recovery_rate); + } return out_areas(f, seg, "raid"); } @@ -216,14 +237,16 @@ static int _raid_add_target_line(struct dev_manager *dm __attribute__((unused)), struct dm_tree_node *node, uint64_t len, uint32_t *pvmove_mirror_count __attribute__((unused))) { + int delta_disks = 0, delta_disks_minus = 0, delta_disks_plus = 0, data_offset = 0; uint32_t s; uint64_t flags = 0; - uint64_t rebuilds = 0; - uint64_t writemostly = 0; + uint64_t rebuilds[4]; + uint64_t writemostly[4]; struct dm_tree_node_raid_params params; - int raid0 = seg_is_any_raid0(seg); memset(¶ms, 0, sizeof(params)); + memset(&rebuilds, 0, sizeof(rebuilds)); + memset(&writemostly, 0, sizeof(writemostly)); if (!seg->area_count) { log_error(INTERNAL_ERROR "_raid_add_target_line called " @@ -232,63 +255,84 @@ static int _raid_add_target_line(struct dev_manager *dm __attribute__((unused)), } /* - * 64 device restriction imposed by kernel as well. It is - * not strictly a userspace limitation. + * 253 device restriction imposed by kernel due to MD and dm-raid bitfield limitation in superblock. + * It is not strictly a userspace limitation. */ - if (seg->area_count > 64) { - log_error("Unable to handle more than 64 devices in a " - "single RAID array"); + if (seg->area_count > DEFAULT_RAID_MAX_IMAGES) { + log_error("Unable to handle more than %u devices in a " + "single RAID array", DEFAULT_RAID_MAX_IMAGES); return 0; } - if (!raid0) { + if (!seg_is_any_raid0(seg)) { if (!seg->region_size) { - log_error("Missing region size for mirror segment."); + log_error("Missing region size for raid segment in %s.", + seg_lv(seg, 0)->name); return 0; } - for (s = 0; s < seg->area_count; s++) - if (seg_lv(seg, s)->status & LV_REBUILD) - rebuilds |= 1ULL << s; + for (s = 0; s < seg->area_count; s++) { + uint64_t status = seg_lv(seg, s)->status; - for (s = 0; s < seg->area_count; s++) - if (seg_lv(seg, s)->status & LV_WRITEMOSTLY) - writemostly |= 1ULL << s; + if (status & LV_REBUILD) + rebuilds[s/64] |= 1ULL << (s%64); + + if (status & LV_RESHAPE_DELTA_DISKS_PLUS) { + delta_disks++; + delta_disks_plus++; + } else if (status & LV_RESHAPE_DELTA_DISKS_MINUS) { + delta_disks--; + delta_disks_minus++; + } + + if (delta_disks_plus && delta_disks_minus) { + log_error(INTERNAL_ERROR "Invalid request for delta disks minus and delta disks plus!"); + return 0; + } + + if (status & LV_WRITEMOSTLY) + writemostly[s/64] |= 1ULL << (s%64); + } + + data_offset = seg->data_offset; if (mirror_in_sync()) flags = DM_NOSYNC; } params.raid_type = lvseg_name(seg); - params.stripe_size = seg->stripe_size; - params.flags = flags; - if (raid0) { - params.mirrors = 1; - params.stripes = seg->area_count; - } else if (seg->segtype->parity_devs) { + if (seg->segtype->parity_devs) { /* RAID 4/5/6 */ params.mirrors = 1; params.stripes = seg->area_count - seg->segtype->parity_devs; - } else if (seg_is_raid10(seg)) { - /* RAID 10 only supports 2 mirrors now */ - params.mirrors = 2; - params.stripes = seg->area_count / 2; + } else if (seg_is_any_raid0(seg)) { + params.mirrors = 1; + params.stripes = seg->area_count; + } else if (seg_is_any_raid10(seg)) { + params.data_copies = seg->data_copies; + params.stripes = seg->area_count; } else { /* RAID 1 */ - params.mirrors = seg->area_count; + params.mirrors = seg->data_copies; params.stripes = 1; params.writebehind = seg->writebehind; + memcpy(params.writemostly, writemostly, sizeof(params.writemostly)); } - if (!raid0) { + /* RAID 0 doesn't have a bitmap, thus no region_size, rebuilds etc. */ + if (!seg_is_any_raid0(seg)) { params.region_size = seg->region_size; - params.rebuilds = rebuilds; - params.writemostly = writemostly; + memcpy(params.rebuilds, rebuilds, sizeof(params.rebuilds)); params.min_recovery_rate = seg->min_recovery_rate; params.max_recovery_rate = seg->max_recovery_rate; + params.delta_disks = delta_disks; + params.data_offset = data_offset; } + params.stripe_size = seg->stripe_size; + params.flags = flags; + if (!dm_tree_node_add_raid_target_with_params(node, len, ¶ms)) return_0; @@ -450,6 +494,10 @@ static int _raid_target_present(struct cmd_context *cmd, else log_very_verbose("Target raid does not support %s.", SEG_TYPE_NAME_RAID4); + + if (maj > 1 || + (maj == 1 && (min > 10 || (min == 10 && patchlevel >= 2)))) + _raid_attrs |= RAID_FEATURE_RESHAPE; } if (attributes) diff --git a/libdm/libdevmapper.h b/libdm/libdevmapper.h index 9a1025202..cbd391c4c 100644 --- a/libdm/libdevmapper.h +++ b/libdm/libdevmapper.h @@ -331,6 +331,7 @@ struct dm_status_raid { char *dev_health; /* idle, frozen, resync, recover, check, repair */ char *sync_action; + uint64_t data_offset; /* RAID out-of-place reshaping */ }; int dm_get_status_raid(struct dm_pool *mem, const char *params, @@ -1719,7 +1720,7 @@ int dm_tree_node_add_raid_target(struct dm_tree_node *node, const char *raid_type, uint32_t region_size, uint32_t stripe_size, - uint64_t rebuilds, + uint64_t *rebuilds, uint64_t flags); /* @@ -1746,18 +1747,22 @@ struct dm_tree_node_raid_params { uint32_t region_size; uint32_t stripe_size; + int delta_disks; /* +/- number of disks to add/remove (reshaping) */ + int data_offset; /* data offset to set (out-of-place reshaping) */ + /* * 'rebuilds' and 'writemostly' are bitfields that signify * which devices in the array are to be rebuilt or marked * writemostly. By choosing a 'uint64_t', we limit ourself * to RAID arrays with 64 devices. */ - uint64_t rebuilds; - uint64_t writemostly; - uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */ + uint64_t rebuilds[4]; + uint64_t writemostly[4]; + uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */ uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */ uint32_t max_recovery_rate; /* kB/sec/disk */ uint32_t min_recovery_rate; /* kB/sec/disk */ + uint32_t data_copies; /* RAID # of data copies */ uint32_t stripe_cache; /* sectors */ uint64_t flags; /* [no]sync */ diff --git a/libdm/libdm-common.h b/libdm/libdm-common.h index 4dc1870fc..a064db846 100644 --- a/libdm/libdm-common.h +++ b/libdm/libdm-common.h @@ -23,6 +23,8 @@ #define DEV_NAME(dmt) (dmt->mangled_dev_name ? : dmt->dev_name) #define DEV_UUID(DMT) (dmt->mangled_uuid ? : dmt->uuid) +#define RAID_BITMAP_SIZE 4 + int mangle_string(const char *str, const char *str_name, size_t len, char *buf, size_t buf_len, dm_string_mangling_t mode); diff --git a/libdm/libdm-deptree.c b/libdm/libdm-deptree.c index d658bf99d..a26cfcc0f 100644 --- a/libdm/libdm-deptree.c +++ b/libdm/libdm-deptree.c @@ -205,11 +205,14 @@ struct load_segment { struct dm_tree_node *replicator;/* Replicator-dev */ uint64_t rdevice_index; /* Replicator-dev */ - uint64_t rebuilds; /* raid */ - uint64_t writemostly; /* raid */ + int delta_disks; /* raid reshape number of disks */ + int data_offset; /* raid reshape data offset on disk to set */ + uint64_t rebuilds[RAID_BITMAP_SIZE]; /* raid */ + uint64_t writemostly[RAID_BITMAP_SIZE]; /* raid */ uint32_t writebehind; /* raid */ uint32_t max_recovery_rate; /* raid kB/sec/disk */ uint32_t min_recovery_rate; /* raid kB/sec/disk */ + uint32_t data_copies; /* raid10 data_copies */ struct dm_tree_node *metadata; /* Thin_pool + Cache */ struct dm_tree_node *pool; /* Thin_pool, Thin */ @@ -2353,16 +2356,21 @@ static int _mirror_emit_segment_line(struct dm_task *dmt, struct load_segment *s return 1; } -/* Is parameter non-zero? */ -#define PARAM_IS_SET(p) ((p) ? 1 : 0) +static int _2_if_value(unsigned p) +{ + return p ? 2 : 0; +} -/* Return number of bits assuming 4 * 64 bit size */ -static int _get_params_count(uint64_t bits) +/* Return number of bits passed in @bits assuming 2 * 64 bit size */ +static int _get_params_count(uint64_t *bits) { int r = 0; + int i = RAID_BITMAP_SIZE; - r += 2 * hweight32(bits & 0xFFFFFFFF); - r += 2 * hweight32(bits >> 32); + while (i--) { + r += 2 * hweight32(bits[i] & 0xFFFFFFFF); + r += 2 * hweight32(bits[i] >> 32); + } return r; } @@ -2373,32 +2381,60 @@ static int _raid_emit_segment_line(struct dm_task *dmt, uint32_t major, size_t paramsize) { uint32_t i; + uint32_t area_count = seg->area_count / 2; int param_count = 1; /* mandatory 'chunk size'/'stripe size' arg */ int pos = 0; - unsigned type = seg->type; + unsigned type; + + if (seg->area_count % 2) + return 0; if ((seg->flags & DM_NOSYNC) || (seg->flags & DM_FORCESYNC)) param_count++; - param_count += 2 * (PARAM_IS_SET(seg->region_size) + - PARAM_IS_SET(seg->writebehind) + - PARAM_IS_SET(seg->min_recovery_rate) + - PARAM_IS_SET(seg->max_recovery_rate)); + param_count += _2_if_value(seg->data_offset) + + _2_if_value(seg->delta_disks) + + _2_if_value(seg->region_size) + + _2_if_value(seg->writebehind) + + _2_if_value(seg->min_recovery_rate) + + _2_if_value(seg->max_recovery_rate) + + _2_if_value(seg->data_copies > 1); - /* rebuilds and writemostly are 64 bits */ + /* rebuilds and writemostly are BITMAP_SIZE * 64 bits */ param_count += _get_params_count(seg->rebuilds); param_count += _get_params_count(seg->writemostly); - if ((type == SEG_RAID1) && seg->stripe_size) - log_error("WARNING: Ignoring RAID1 stripe size"); + if ((seg->type == SEG_RAID1) && seg->stripe_size) + log_info("WARNING: Ignoring RAID1 stripe size"); /* Kernel only expects "raid0", not "raid0_meta" */ + type = seg->type; if (type == SEG_RAID0_META) type = SEG_RAID0; +#if 0 + /* Kernel only expects "raid10", not "raid10_{far,offset}" */ + else if (type == SEG_RAID10_FAR || + type == SEG_RAID10_OFFSET) { + param_count += 2; + type = SEG_RAID10_NEAR; + } +#endif - EMIT_PARAMS(pos, "%s %d %u", _dm_segtypes[type].target, + EMIT_PARAMS(pos, "%s %d %u", + // type == SEG_RAID10_NEAR ? "raid10" : _dm_segtypes[type].target, + type == SEG_RAID10 ? "raid10" : _dm_segtypes[type].target, param_count, seg->stripe_size); +#if 0 + if (seg->type == SEG_RAID10_FAR) + EMIT_PARAMS(pos, " raid10_format far"); + else if (seg->type == SEG_RAID10_OFFSET) + EMIT_PARAMS(pos, " raid10_format offset"); +#endif + + if (seg->data_copies > 1 && type == SEG_RAID10) + EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies); + if (seg->flags & DM_NOSYNC) EMIT_PARAMS(pos, " nosync"); else if (seg->flags & DM_FORCESYNC) @@ -2407,27 +2443,38 @@ static int _raid_emit_segment_line(struct dm_task *dmt, uint32_t major, if (seg->region_size) EMIT_PARAMS(pos, " region_size %u", seg->region_size); - for (i = 0; i < (seg->area_count / 2); i++) - if (seg->rebuilds & (1ULL << i)) + /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */ + if (seg->data_offset) + EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset); + + if (seg->delta_disks) + EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks); + + for (i = 0; i < area_count; i++) + if (seg->rebuilds[i/64] & (1ULL << (i%64))) EMIT_PARAMS(pos, " rebuild %u", i); - if (seg->min_recovery_rate) - EMIT_PARAMS(pos, " min_recovery_rate %u", - seg->min_recovery_rate); - - if (seg->max_recovery_rate) - EMIT_PARAMS(pos, " max_recovery_rate %u", - seg->max_recovery_rate); - - for (i = 0; i < (seg->area_count / 2); i++) - if (seg->writemostly & (1ULL << i)) + for (i = 0; i < area_count; i++) + if (seg->writemostly[i/64] & (1ULL << (i%64))) EMIT_PARAMS(pos, " write_mostly %u", i); if (seg->writebehind) EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind); + /* + * Has to be before "min_recovery_rate" or the kernels + * check will fail when both set and min > previous max + */ + if (seg->max_recovery_rate) + EMIT_PARAMS(pos, " max_recovery_rate %u", + seg->max_recovery_rate); + + if (seg->min_recovery_rate) + EMIT_PARAMS(pos, " min_recovery_rate %u", + seg->min_recovery_rate); + /* Print number of metadata/data device pairs */ - EMIT_PARAMS(pos, " %u", seg->area_count/2); + EMIT_PARAMS(pos, " %u", area_count); if (_emit_areas_line(dmt, seg, params, paramsize, &pos) <= 0) return_0; @@ -3267,11 +3314,14 @@ int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node, seg->region_size = p->region_size; seg->stripe_size = p->stripe_size; seg->area_count = 0; - seg->rebuilds = p->rebuilds; - seg->writemostly = p->writemostly; + seg->delta_disks = p->delta_disks; + seg->data_offset = p->data_offset; + memcpy(seg->rebuilds, p->rebuilds, sizeof(seg->rebuilds)); + memcpy(seg->writemostly, p->writemostly, sizeof(seg->writemostly)); seg->writebehind = p->writebehind; seg->min_recovery_rate = p->min_recovery_rate; seg->max_recovery_rate = p->max_recovery_rate; + seg->data_copies = p->data_copies; seg->flags = p->flags; return 1; @@ -3282,17 +3332,18 @@ int dm_tree_node_add_raid_target(struct dm_tree_node *node, const char *raid_type, uint32_t region_size, uint32_t stripe_size, - uint64_t rebuilds, + uint64_t *rebuilds, uint64_t flags) { struct dm_tree_node_raid_params params = { .raid_type = raid_type, .region_size = region_size, .stripe_size = stripe_size, - .rebuilds = rebuilds, .flags = flags }; + memcpy(params.rebuilds, rebuilds, sizeof(params.rebuilds)); + return dm_tree_node_add_raid_target_with_params(node, size, ¶ms); } diff --git a/libdm/libdm-targets.c b/libdm/libdm-targets.c index c94e05753..6b8337234 100644 --- a/libdm/libdm-targets.c +++ b/libdm/libdm-targets.c @@ -89,6 +89,8 @@ static unsigned _count_fields(const char *p) * <#devs> * Versions 1.5.0+ (6 fields): * <#devs> + * Versions 1.9.0+ (7 fields): + * <#devs> */ int dm_get_status_raid(struct dm_pool *mem, const char *params, struct dm_status_raid **status) @@ -147,6 +149,22 @@ int dm_get_status_raid(struct dm_pool *mem, const char *params, if (sscanf(p, "%s %" PRIu64, s->sync_action, &s->mismatch_count) != 2) goto_bad; + if (num_fields < 7) + goto out; + + /* + * All pre-1.9.0 version parameters are read. Now we check + * for additional 1.9.0+ parameters (i.e. nr_fields at least 7). + * + * Note that data_offset will be 0 if the + * kernel returns a pre-1.9.0 status. + */ + msg_fields = ""; + if (!(p = _skip_fields(params, 6))) /* skip pre-1.9.0 params */ + goto bad; + if (sscanf(p, "%" PRIu64, &s->data_offset) != 1) + goto bad; + out: *status = s; diff --git a/test/lib/aux.sh b/test/lib/aux.sh index 5108a5268..6ec2c389a 100644 --- a/test/lib/aux.sh +++ b/test/lib/aux.sh @@ -1306,7 +1306,7 @@ udev_wait() { wait_for_sync() { local i for i in {1..100} ; do - check in_sync $1 $2 && return + check in_sync $1 $2 $3 && return sleep .2 done diff --git a/test/lib/check.sh b/test/lib/check.sh index 916d2df39..64812fbd1 100644 --- a/test/lib/check.sh +++ b/test/lib/check.sh @@ -178,7 +178,7 @@ linear() { $(lvl $lv -o+devices) } -# in_sync +# in_sync # Works for "mirror" and "raid*" in_sync() { local a @@ -187,8 +187,11 @@ in_sync() { local type local snap="" local lvm_name="$1/$2" + local ignore_a="$3" local dm_name=$(echo $lvm_name | sed s:-:--: | sed s:/:-:) + [ -z "$ignore_a" ] && ignore_a=0 + a=( $(dmsetup status $dm_name) ) || \ die "Unable to get sync status of $1" @@ -225,7 +228,7 @@ in_sync() { return 1 fi - [[ ${a[$(($idx - 1))]} =~ a ]] && \ + [[ ${a[$(($idx - 1))]} =~ a ]] && [ $ignore_a -eq 0 ] && \ die "$lvm_name ($type$snap) in-sync, but 'a' characters in health status" echo "$lvm_name ($type$snap) is in-sync \"${a[@]}\"" @@ -310,6 +313,12 @@ lv_field() { die "lv_field: lv=$1, field=\"$2\", actual=\"$actual\", expected=\"$3\"" } +lv_first_seg_field() { + local actual=$(get lv_first_seg_field "$1" "$2" "${@:4}") + test "$actual" = "$3" || \ + die "lv_field: lv=$1, field=\"$2\", actual=\"$actual\", expected=\"$3\"" +} + lvh_field() { local actual=$(get lvh_field "$1" "$2" "${@:4}") test "$actual" = "$3" || \ diff --git a/test/lib/get.sh b/test/lib/get.sh index f6504172c..0a8c943fd 100644 --- a/test/lib/get.sh +++ b/test/lib/get.sh @@ -42,6 +42,11 @@ lv_field() { trim_ "$r" } +lv_first_seg_field() { + local r=$(lvs --config 'log{prefix=""}' --noheadings -o "$2" "${@:3}" "$1" | head -1) + trim_ "$r" +} + lvh_field() { local r=$(lvs -H --config 'log{prefix=""}' --noheadings -o "$2" "${@:3}" "$1") trim_ "$r" diff --git a/test/shell/lvconvert-linear_to_striped.sh b/test/shell/lvconvert-linear_to_striped.sh new file mode 100644 index 000000000..629830f36 --- /dev/null +++ b/test/shell/lvconvert-linear_to_striped.sh @@ -0,0 +1,63 @@ +#!/bin/sh +# Copyright (C) 2017 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA2110-1301 USA + +SKIP_WITH_LVMLOCKD=1 +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +which mkfs.ext4 || skip +aux have_raid 1 10 2 || skip + +aux prepare_vg 4 + +# Create linear LV +lvcreate -aey -L16M -n$lv1 $vg +check lv_field $vg/$lv1 segtype "linear" +echo y|mkfs -t ext4 $DM_DEV_DIR/$vg/$lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Upconvert it to 2-legged raid1 +lvconvert -y -m 1 --ty raid1 --regionsize 512K $vg/$lv1 +check lv_field $vg/$lv1 segtype "raid1" +check lv_field $vg/$lv1 stripes 2 +check lv_field $vg/$lv1 regionsize "512.00k" +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Convert 2-legged raid1 to raid5_n +lvconvert -y --ty raid5_n $vg/$lv1 +check lv_field $vg/$lv1 segtype "raid5_n" +check lv_field $vg/$lv1 stripes 2 +check lv_field $vg/$lv1 stripesize "64.00k" +check lv_field $vg/$lv1 regionsize "512.00k" +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Reshape it to to 3 stripes and 256K stripe size +lvconvert -y --stripes 3 --stripesize 256K $vg/$lv1 +check lv_first_seg_field $vg/$lv1 stripes 4 +check lv_first_seg_field $vg/$lv1 stripesize "256.00k" +fsck -fn $DM_DEV_DIR/$vg/$lv1 +aux wait_for_sync $vg $lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Use the additonal space gained by adding stripes +resize2fs $DM_DEV_DIR/$vg/$lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Convert it to striped +# FIXME: _lvconvert fails here? +lvconvert -y --ty striped $vg/$lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 +check lv_first_seg_field $vg/$lv1 segtype "striped" +check lv_first_seg_field $vg/$lv1 stripes 3 +check lv_first_seg_field $vg/$lv1 stripesize "256.00k" + +vgremove -ff $vg diff --git a/test/shell/lvconvert-raid-reshape.sh b/test/shell/lvconvert-raid-reshape.sh new file mode 100644 index 000000000..636a6d370 --- /dev/null +++ b/test/shell/lvconvert-raid-reshape.sh @@ -0,0 +1,195 @@ +#!/bin/sh +# Copyright (C) 2017 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA2110-1301 USA + +SKIP_WITH_LVMLOCKD=1 +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +which mkfs.ext4 || skip +aux have_raid 1 10 2 || skip + +aux prepare_vg 64 + +function _lvcreate +{ + local level=$1 + local req_stripes=$2 + local stripes=$3 + local size=$4 + local vg=$5 + local lv=$6 + + lvcreate -y -aey --type $level -i $req_stripes -L $size -n $lv $vg + check lv_first_seg_field $vg/$lv segtype "$level" + check lv_first_seg_field $vg/$lv stripes $stripes + mkfs.ext4 "$DM_DEV_DIR/$vg/$lv" + fsck -fn "$DM_DEV_DIR/$vg/$lv" +} + +function _lvconvert +{ + local req_level=$1 + local level=$2 + local stripes=$3 + local vg=$4 + local lv=$5 + local region_size=$6 + local wait_and_check=1 + local R="" + + [ -n "$region_size" ] && R="-R $region_size" + [ "${level:0:7}" = "striped" ] && wait_and_check=0 + [ "${level:0:5}" = "raid0" ] && wait_and_check=0 + + lvconvert -y --ty $req_level $R $vg/$lv + [ $? -ne 0 ] && return $? + check lv_first_seg_field $vg/$lv segtype "$level" + check lv_first_seg_field $vg/$lv stripes $stripes + [ -n "$region_size" ] && check lv_field $vg/$lv regionsize $region_size + if [ "$wait_and_check" -eq 1 ] + then + fsck -fn "$DM_DEV_DIR/$vg/$lv" + aux wait_for_sync $vg $lv + fi + fsck -fn "$DM_DEV_DIR/$vg/$lv" +} + +function _reshape_layout +{ + local type=$1 + shift + local stripes=$1 + shift + local vg=$1 + shift + local lv=$1 + shift + local opts="$*" + local ignore_a_chars=0 + + [[ "$opts" =~ "--stripes" ]] && ignore_a_chars=1 + + lvconvert -vvvv -y --ty $type $opts $vg/$lv + check lv_first_seg_field $vg/$lv segtype "$type" + check lv_first_seg_field $vg/$lv stripes $stripes + aux wait_for_sync $vg $lv $ignore_a_chars + fsck -fn "$DM_DEV_DIR/$vg/$lv" +} + +# Delay leg so that rebuilding status characters +# can be read before resync finished too quick. +# aux delay_dev "$dev1" 1 + +# +# Start out with raid5(_ls) +# + +# Create 3-way striped raid5 (4 legs total) +_lvcreate raid5_ls 3 4 16M $vg $lv1 +check lv_first_seg_field $vg/$lv1 segtype "raid5_ls" +aux wait_for_sync $vg $lv1 + +# Reshape it to 256K stripe size +_reshape_layout raid5_ls 4 $vg $lv1 --stripesize 256K +check lv_first_seg_field $vg/$lv1 stripesize "256.00k" + +# Convert raid5(_n) -> striped +not _lvconvert striped striped 3 $vg $lv1 512k +_reshape_layout raid5_n 4 $vg $lv1 +_lvconvert striped striped 3 $vg $lv1 + +# Convert striped -> raid5_n +_lvconvert raid5_n raid5_n 4 $vg $lv1 "" 1 + +# Convert raid5_n -> raid5_ls +_reshape_layout raid5_ls 4 $vg $lv1 + +# Convert raid5_ls to 5 stripes +_reshape_layout raid5_ls 6 $vg $lv1 --stripes 5 + +# Convert raid5_ls back to 3 stripes +_reshape_layout raid5_ls 6 $vg $lv1 --stripes 3 --force +_reshape_layout raid5_ls 4 $vg $lv1 --stripes 3 + +# Convert raid5_ls to 7 stripes +_reshape_layout raid5_ls 8 $vg $lv1 --stripes 7 + +# Convert raid5_ls to 9 stripes +_reshape_layout raid5_ls 10 $vg $lv1 --stripes 9 + +# Convert raid5_ls to 14 stripes +_reshape_layout raid5_ls 15 $vg $lv1 --stripes 14 + +# Convert raid5_ls to 63 stripes +_reshape_layout raid5_ls 64 $vg $lv1 --stripes 63 + +# Convert raid5_ls back to 4 stripes +_reshape_layout raid5_ls 15 $vg $lv1 --stripes 4 --force +_reshape_layout raid5_ls 5 $vg $lv1 --stripes 4 + +# Convert raid5_ls back to 3 stripes +_reshape_layout raid5_ls 5 $vg $lv1 --stripes 3 --force +_reshape_layout raid5_ls 4 $vg $lv1 --stripes 3 + +# Convert raid5_ls -> raid5_rs +_reshape_layout raid5_rs 4 $vg $lv1 + +# Convert raid5_rs -> raid5_la +_reshape_layout raid5_la 4 $vg $lv1 + +# Convert raid5_la -> raid5_ra +_reshape_layout raid5_ra 4 $vg $lv1 + +# Convert raid5_ra -> raid6_ra_6 +_lvconvert raid6_ra_6 raid6_ra_6 5 $vg $lv1 "4.00m" 1 + +# Convert raid5_la -> raid6(_zr) +_reshape_layout raid6 5 $vg $lv1 + +# Convert raid6(_zr) -> raid6_nc +_reshape_layout raid6_nc 5 $vg $lv1 + +# Convert raid6(_nc) -> raid6_nr +_reshape_layout raid6_nr 5 $vg $lv1 + +# Convert raid6_nr) -> raid6_rs_6 +_reshape_layout raid6_rs_6 5 $vg $lv1 + +# Convert raid6_rs_6 to 5 stripes +_reshape_layout raid6_rs_6 7 $vg $lv1 --stripes 5 + +# Convert raid6_rs_6 to 4 stripes +_reshape_layout raid6_rs_6 7 $vg $lv1 --stripes 4 --force +_reshape_layout raid6_rs_6 6 $vg $lv1 --stripes 4 +check lv_first_seg_field $vg/$lv1 stripesize "256.00k" + +# Convert raid6_rs_6 to raid6_n_6 +_reshape_layout raid6_n_6 6 $vg $lv1 + +# Convert raid6_n_6 -> striped +_lvconvert striped striped 4 $vg $lv1 +check lv_first_seg_field $vg/$lv1 stripesize "256.00k" + +# Convert striped -> raid10(_near) +_lvconvert raid10 raid10 8 $vg $lv1 + +# Convert raid10 to 10 stripes and 64K stripesize +# FIXME: change once we support odd numbers of raid10 stripes +not _reshape_layout raid10 9 $vg $lv1 --stripes 9 --stripesize 64K +_reshape_layout raid10 10 $vg $lv1 --stripes 10 --stripesize 64K +check lv_first_seg_field $vg/$lv1 stripesize "64.00k" + +# Convert raid6_n_6 -> striped +_lvconvert striped striped 5 $vg $lv1 +check lv_first_seg_field $vg/$lv1 stripesize "64.00k" + +vgremove -ff $vg diff --git a/test/shell/lvconvert-raid-takeover.sh b/test/shell/lvconvert-raid-takeover.sh index aa41dba80..5d8f8588a 100644 --- a/test/shell/lvconvert-raid-takeover.sh +++ b/test/shell/lvconvert-raid-takeover.sh @@ -117,8 +117,7 @@ fsck -fn "$DM_DEV_DIR/$vg/$lv1" lvconvert -m 4 -R 128K $vg/$lv1 check lv_field $vg/$lv1 segtype "raid1" check lv_field $vg/$lv1 stripes 5 -# FIXME: once lv_raid_chanage_image_count() supports region_size changes -not check lv_field $vg/$lv1 regionsize "128.00k" +check lv_field $vg/$lv1 regionsize "128.00k" fsck -fn "$DM_DEV_DIR/$vg/$lv1" aux wait_for_sync $vg $lv1 fsck -fn "$DM_DEV_DIR/$vg/$lv1" @@ -258,7 +257,13 @@ _lvconvert raid0 raid0 3 $vg $lv1 # Convert raid0 -> raid10 _lvconvert raid10 raid10 6 $vg $lv1 -# Convert raid10 -> raid0 +# Convert raid10 -> raid0_meta +_lvconvert raid0_meta raid0_meta 3 $vg $lv1 + +# Convert raid0_meta -> raid5 +_lvconvert raid5_n raid5_n 4 $vg $lv1 + +# Convert raid5_n -> raid0_meta _lvconvert raid0_meta raid0_meta 3 $vg $lv1 # Convert raid0_meta -> raid10 diff --git a/test/shell/lvconvert-striped_to_linear.sh b/test/shell/lvconvert-striped_to_linear.sh new file mode 100644 index 000000000..1f13eb8b9 --- /dev/null +++ b/test/shell/lvconvert-striped_to_linear.sh @@ -0,0 +1,65 @@ +#!/bin/sh +# Copyright (C) 2017 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA2110-1301 USA + +SKIP_WITH_LVMLOCKD=1 +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +which mkfs.ext4 || skip +aux have_raid 1 10 2 || skip + +aux prepare_vg 5 + +# Create 4-way striped LV +lvcreate -aey --ty striped -i4 -L16M -n $lv1 $vg +check lv_field $vg/$lv1 segtype "striped" +check lv_field $vg/$lv1 stripes 4 +echo y|mkfs -t ext4 $DM_DEV_DIR/$vg/$lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Upconvert to raid5_n +lvconvert -y --ty raid5 $vg/$lv1 +check lv_field $vg/$lv1 segtype "raid5_n" +check lv_field $vg/$lv1 stripes 5 +check lv_field $vg/$lv1 stripesize "64.00k" +fsck -fn $DM_DEV_DIR/$vg/$lv1 +aux wait_for_sync $vg $lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Grow it *4 to keep the given fs +lvresize -L64M $vg/$lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 +check lv_first_seg_field $vg/$lv1 lv_size "64.00m" +aux wait_for_sync $vg $lv1 + +# Convert to 1 stripe +lvconvert -y -f --stripes 1 $vg/$lv1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 +aux wait_for_sync $vg $lv1 1 +lvconvert --stripes 1 $vg/$lv1 +check lv_first_seg_field $vg/$lv1 stripes 2 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Convert to raid1 +lvconvert -y --ty raid1 $vg/$lv1 +check lv_first_seg_field $vg/$lv1 segtype "raid1" +check lv_first_seg_field $vg/$lv1 stripes 2 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +# Convert to linear +lvconvert -y --ty linear $vg/$lv1 +# lvconvert -y -m 0 $vg/$lv1 +check lv_first_seg_field $vg/$lv1 segtype "linear" +check lv_first_seg_field $vg/$lv1 stripes 1 +fsck -fn $DM_DEV_DIR/$vg/$lv1 + +vgremove -ff $vg diff --git a/tools/command-lines.in b/tools/command-lines.in index 606d2e7ba..34ae718bf 100644 --- a/tools/command-lines.in +++ b/tools/command-lines.in @@ -359,6 +359,13 @@ ID: lvconvert_raid_types DESC: Convert LV to raid. RULE: all not lv_is_locked lv_is_pvmove +lvconvert --type raid LV_raid +OO: OO_LVCONVERT_RAID, OO_LVCONVERT +ID: lvconvert_raid_types +DESC: Convert raid LV to different layout algorithm. +RULE: all not lv_is_locked lv_is_pvmove +RULE: all not LV_raid0 LV_raid1 + lvconvert --mirrors SNumber LV OO: OO_LVCONVERT_RAID, OO_LVCONVERT, --mirrorlog MirrorLog OP: PV ... @@ -366,6 +373,21 @@ ID: lvconvert_raid_types DESC: Convert LV to raid1 or mirror, or change number of mirror images. RULE: all not lv_is_locked lv_is_pvmove +lvconvert --stripes_long SNumber LV_raid +OO: OO_LVCONVERT_RAID, OO_LVCONVERT +OP: PV ... +ID: lvconvert_raid_types +DESC: Convert raid LV to change number of stripe images. +RULE: all not lv_is_locked lv_is_pvmove +RULE: all not LV_raid0 LV_raid1 + +lvconvert --stripesize SizeKB LV_raid +OO: OO_LVCONVERT_RAID, OO_LVCONVERT +ID: lvconvert_raid_types +DESC: Convert raid LV to change the stripe size. +RULE: all not lv_is_locked lv_is_pvmove +RULE: all not LV_raid0 LV_raid1 + lvconvert --regionsize RegionSize LV_raid OO: OO_LVCONVERT ID: lvconvert_change_region_size @@ -373,6 +395,13 @@ DESC: Change the region size of an LV. RULE: all not lv_is_locked lv_is_pvmove RULE: all not LV_raid0 +lvconvert LV_mirror_raid +OO: OO_LVCONVERT +ID: lvconvert_raid_types +DESC: Remove out-of-place reshape space +RULE: all not lv_is_locked lv_is_pvmove +RULE: all not LV_raid0 LV_raid1 + --- # lvconvert raid-related utilities diff --git a/tools/lvchange.c b/tools/lvchange.c index 1c00b76f1..08d31d716 100644 --- a/tools/lvchange.c +++ b/tools/lvchange.c @@ -816,6 +816,7 @@ static int _lvchange_writemostly(struct logical_volume *lv) } } + /* FIXME: prohibit on primary if not in-sync! */ if (!lv_update_and_reload(lv)) return_0; diff --git a/tools/lvconvert.c b/tools/lvconvert.c index b9e0665c7..0db10cbd9 100644 --- a/tools/lvconvert.c +++ b/tools/lvconvert.c @@ -1228,6 +1228,9 @@ static int _lvconvert_mirrors(struct cmd_context *cmd, static int _is_valid_raid_conversion(const struct segment_type *from_segtype, const struct segment_type *to_segtype) { + if (!from_segtype) + return 1; + if (from_segtype == to_segtype) return 1; @@ -1356,7 +1359,7 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l DEFAULT_RAID1_MAX_IMAGES, lp->segtype->name, display_lvname(lv)); return 0; } - if (!lv_raid_change_image_count(lv, image_count, lp->pvh)) + if (!lv_raid_change_image_count(lv, image_count, lp->region_size, lp->pvh)) return_0; log_print_unless_silent("Logical volume %s successfully converted.", @@ -1365,10 +1368,13 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l return 1; } goto try_new_takeover_or_reshape; - } else if (!*lp->type_str || seg->segtype == lp->segtype) { + } +#if 0 + } else if ((!*lp->type_str || seg->segtype == lp->segtype) && !lp->stripe_size_supplied) { log_error("Conversion operation not yet supported."); return 0; } +#endif if ((seg_is_linear(seg) || seg_is_striped(seg) || seg_is_mirrored(seg) || lv_is_raid(lv)) && (lp->type_str && lp->type_str[0])) { @@ -1390,10 +1396,14 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l return 0; } + /* FIXME This needs changing globally. */ if (!arg_is_set(cmd, stripes_long_ARG)) lp->stripes = 0; + if (!arg_is_set(cmd, type_ARG)) + lp->segtype = NULL; - if (!lv_raid_convert(lv, lp->segtype, lp->yes, lp->force, lp->stripes, lp->stripe_size_supplied, lp->stripe_size, + if (!lv_raid_convert(lv, lp->segtype, + lp->yes, lp->force, lp->stripes, lp->stripe_size_supplied, lp->stripe_size, lp->region_size, lp->pvh)) return_0; @@ -1410,12 +1420,16 @@ try_new_takeover_or_reshape: /* FIXME This needs changing globally. */ if (!arg_is_set(cmd, stripes_long_ARG)) lp->stripes = 0; + if (!arg_is_set(cmd, type_ARG)) + lp->segtype = NULL; /* Only let raid4 through for now. */ - if (lp->type_str && lp->type_str[0] && lp->segtype != seg->segtype && - ((seg_is_raid4(seg) && seg_is_striped(lp) && lp->stripes > 1) || - (seg_is_striped(seg) && seg->area_count > 1 && seg_is_raid4(lp)))) { - if (!lv_raid_convert(lv, lp->segtype, lp->yes, lp->force, lp->stripes, lp->stripe_size_supplied, lp->stripe_size, + if (!lp->segtype || + (lp->type_str && lp->type_str[0] && lp->segtype != seg->segtype && + ((seg_is_raid4(seg) && seg_is_striped(lp) && lp->stripes > 1) || + (seg_is_striped(seg) && seg->area_count > 1 && seg_is_raid4(lp))))) { + if (!lv_raid_convert(lv, lp->segtype, + lp->yes, lp->force, lp->stripes, lp->stripe_size_supplied, lp->stripe_size, lp->region_size, lp->pvh)) return_0; @@ -1700,6 +1714,8 @@ static int _lvconvert_raid_types(struct cmd_context *cmd, struct logical_volume /* FIXME This is incomplete */ if (_mirror_or_raid_type_requested(cmd, lp->type_str) || _raid0_type_requested(lp->type_str) || _striped_type_requested(lp->type_str) || lp->mirrorlog || lp->corelog) { + if (!arg_is_set(cmd, type_ARG)) + lp->segtype = first_seg(lv)->segtype; /* FIXME Handle +/- adjustments too? */ if (!get_stripe_params(cmd, lp->segtype, &lp->stripes, &lp->stripe_size, &lp->stripes_supplied, &lp->stripe_size_supplied)) goto_out; @@ -2990,9 +3006,9 @@ static int _lvconvert_to_pool(struct cmd_context *cmd, } /* Allocate a new pool segment */ - if (!(seg = alloc_lv_segment(pool_segtype, pool_lv, 0, data_lv->le_count, + if (!(seg = alloc_lv_segment(pool_segtype, pool_lv, 0, data_lv->le_count, 0, pool_lv->status, 0, NULL, 1, - data_lv->le_count, 0, 0, 0, NULL))) + data_lv->le_count, 0, 0, 0, 0, NULL))) return_0; /* Add the new segment to the layer LV */ diff --git a/tools/toollib.c b/tools/toollib.c index 842f67c02..68f6d394a 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -1305,6 +1305,7 @@ static int _validate_stripe_params(struct cmd_context *cmd, const struct segment return 0; } +// printf("%s[%u] *stripe_size=%u\n", __func__, __LINE__, *stripe_size); return 1; } @@ -1324,6 +1325,7 @@ int get_stripe_params(struct cmd_context *cmd, const struct segment_type *segtyp *stripes_supplied = arg_is_set(cmd, stripes_long_ARG) ? : arg_is_set(cmd, stripes_ARG); *stripe_size = arg_uint_value(cmd, stripesize_ARG, 0); +// printf("%s[%u] *stripe_size=%u\n", __func__, __LINE__, *stripe_size); *stripe_size_supplied = arg_is_set(cmd, stripesize_ARG); if (*stripe_size) { if (arg_sign_value(cmd, stripesize_ARG, SIGN_NONE) == SIGN_MINUS) { @@ -1338,6 +1340,7 @@ int get_stripe_params(struct cmd_context *cmd, const struct segment_type *segtyp } } +// printf("%s[%u] *stripe_size=%u\n", __func__, __LINE__, *stripe_size); return _validate_stripe_params(cmd, segtype, stripes, stripe_size); }