1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-10-28 11:55:55 +03:00
lvm2/lib/metadata/mirror.c

2354 lines
63 KiB
C
Raw Normal View History

2003-05-06 16:22:24 +04:00
/*
2008-01-30 17:00:02 +03:00
* Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
2003-05-06 16:22:24 +04:00
*
2004-03-30 23:35:44 +04:00
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
2004-03-30 23:35:44 +04:00
*
* You should have received a copy of the GNU Lesser General Public License
2004-03-30 23:35:44 +04:00
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2003-05-06 16:22:24 +04:00
*/
#include "lib.h"
#include "metadata.h"
#include "toolcontext.h"
2004-09-16 22:40:56 +04:00
#include "segtype.h"
2004-05-05 01:25:57 +04:00
#include "display.h"
#include "archiver.h"
#include "activate.h"
2005-04-22 19:43:02 +04:00
#include "lv_alloc.h"
#include "lvm-string.h"
#include "str_list.h"
2005-11-29 21:20:23 +03:00
#include "locking.h" /* FIXME Should not be used in this file */
2006-05-11 23:45:53 +04:00
#include "defaults.h" /* FIXME: should this be defaults.h? */
2007-12-21 01:37:42 +03:00
/* These are necessary for _write_log_header() */
#include "xlate.h"
#define MIRROR_MAGIC 0x4D695272
#define MIRROR_DISK_VERSION 2
2006-05-11 23:45:53 +04:00
/* These are the flags that represent the mirror failure restoration policies */
2008-01-30 17:00:02 +03:00
#define MIRROR_REMOVE 0
#define MIRROR_ALLOCATE 1
2006-05-11 23:45:53 +04:00
#define MIRROR_ALLOCATE_ANYWHERE 2
2007-12-20 21:55:46 +03:00
/*
* Returns true if the lv is temporary mirror layer for resync
*/
int is_temporary_mirror_layer(const struct logical_volume *lv)
{
if (lv_is_mirror_image(lv) && lv_is_mirrored(lv) && !lv_is_locked(lv))
2007-12-20 21:55:46 +03:00
return 1;
return 0;
}
2008-01-16 22:09:35 +03:00
/*
* Return a temporary LV for resyncing added mirror image.
* Add other mirror legs to lvs list.
*/
2008-01-16 22:13:51 +03:00
struct logical_volume *find_temporary_mirror(const struct logical_volume *lv)
2008-01-16 22:09:35 +03:00
{
struct lv_segment *seg;
if (!lv_is_mirrored(lv))
2008-01-16 22:09:35 +03:00
return NULL;
seg = first_seg(lv);
/* Temporary mirror is always area_num == 0 */
if (seg_type(seg, 0) == AREA_LV &&
is_temporary_mirror_layer(seg_lv(seg, 0)))
return seg_lv(seg, 0);
return NULL;
}
/*
* cluster_mirror_is_available
*
* Check if the proper kernel module and log daemon are running.
* Caller should check for 'vg_is_clustered(lv->vg)' before making
* this call.
*
* Returns: 1 if available, 0 otherwise
*/
int cluster_mirror_is_available(struct cmd_context *cmd)
{
unsigned attr = 0;
const struct segment_type *segtype;
if (!(segtype = get_segtype_from_string(cmd, "mirror")))
return_0;
if (!segtype->ops->target_present)
return_0;
if (!segtype->ops->target_present(cmd, NULL, &attr))
return_0;
if (!(attr & MIRROR_LOG_CLUSTERED))
return 0;
return 1;
}
/*
* Returns the number of mirrors of the LV
*/
2007-12-20 21:55:46 +03:00
uint32_t lv_mirror_count(const struct logical_volume *lv)
{
2007-12-20 21:55:46 +03:00
struct lv_segment *seg;
uint32_t s, mirrors;
if (!lv_is_mirrored(lv))
2007-12-20 21:55:46 +03:00
return 1;
seg = first_seg(lv);
/* FIXME: RAID10 only supports 2 copies right now */
if (!strcmp(seg->segtype->name, "raid10"))
return 2;
if (lv_is_pvmove(lv))
return seg->area_count;
mirrors = 0;
2007-12-20 21:55:46 +03:00
for (s = 0; s < seg->area_count; s++) {
if (seg_type(seg, s) != AREA_LV)
continue;
if (is_temporary_mirror_layer(seg_lv(seg, s)))
mirrors += lv_mirror_count(seg_lv(seg, s));
else
mirrors++;
2007-12-20 21:55:46 +03:00
}
return mirrors ? mirrors : 1;
}
struct lv_segment *find_mirror_seg(struct lv_segment *seg)
{
struct lv_segment *mirror_seg;
mirror_seg = get_only_segment_using_this_lv(seg->lv);
if (!mirror_seg) {
log_error("Failed to find mirror_seg for %s", seg->lv->name);
return NULL;
}
if (!seg_is_mirrored(mirror_seg)) {
log_error("%s on %s is not a mirror segments",
mirror_seg->lv->name, seg->lv->name);
return NULL;
}
return mirror_seg;
}
/*
* Reduce the region size if necessary to ensure
* the volume size is a multiple of the region size.
*
* For internal use only log only in verbose mode
*/
uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents,
uint32_t region_size, int internal, int clustered)
{
uint64_t region_max;
uint64_t region_min, region_min_pow2;
region_max = (UINT64_C(1) << (ffs((int)extents) - 1)) * (UINT64_C(1) << (ffs((int)extent_size) - 1));
if (region_max < UINT32_MAX && region_size > region_max) {
region_size = (uint32_t) region_max;
if (!internal)
log_print_unless_silent("Using reduced mirror region size of %"
PRIu32 " sectors.", region_size);
else
log_verbose("Using reduced mirror region size of %"
PRIu32 " sectors.", region_size);
}
#ifdef CMIRROR_REGION_COUNT_LIMIT
if (clustered) {
/*
* The CPG code used by cluster mirrors can only handle a
* payload of < 1MB currently. (This deficiency is tracked by
* http://bugzilla.redhat.com/682771.) The region size for cluster
* mirrors must be restricted in such a way as to limit the
* size of the bitmap to < 512kB, because there are two bitmaps
* which get sent around during checkpointing while a cluster
* mirror starts up. Ergo, the number of regions must not
* exceed 512k * 8. We also need some room for the other
* checkpointing structures as well, so we reduce by another
* factor of two.
*
* This code should be removed when the CPG restriction is
* lifted.
*/
region_min = (uint64_t) extents * extent_size / CMIRROR_REGION_COUNT_LIMIT;
region_min_pow2 = 1;
while (region_min_pow2 < region_min)
region_min_pow2 *= 2;
if (region_size < region_min_pow2) {
if (internal)
log_print_unless_silent("Increasing mirror region size from %"
PRIu32 " to %" PRIu64 " sectors.",
region_size, region_min_pow2);
else
log_verbose("Increasing mirror region size from %"
PRIu32 " to %" PRIu64 " sectors.",
region_size, region_min_pow2);
region_size = region_min_pow2;
}
}
#endif /* CMIRROR_REGION_COUNT_LIMIT */
return region_size;
}
/*
* shift_mirror_images
* @mirrored_seg
* @mimage: The position (index) of the image to move to the end
*
* When dealing with removal of legs, we often move a 'removable leg'
* to the back of the 'areas' array. It is critically important not
* to simply swap it for the last area in the array. This would have
* the affect of reordering the remaining legs - altering position of
* the primary. So, we must shuffle all of the areas in the array
* to maintain their relative position before moving the 'removable
* leg' to the end.
*
* Short illustration of the problem:
* - Mirror consists of legs A, B, C and we want to remove A
* - We swap A and C and then remove A, leaving C, B
* This scenario is problematic in failure cases where A dies, because
* B becomes the primary. If the above happens, we effectively throw
* away any changes made between the time of failure and the time of
* restructuring the mirror.
*
* So, any time we want to move areas to the end to be removed, use
* this function.
*/
int shift_mirror_images(struct lv_segment *mirrored_seg, unsigned mimage)
{
unsigned i;
struct lv_segment_area area;
if (mimage >= mirrored_seg->area_count) {
log_error("Invalid index (%u) of mirror image supplied "
"to shift_mirror_images()", mimage);
return 0;
}
area = mirrored_seg->areas[mimage];
/* Shift remaining images down to fill the hole */
for (i = mimage + 1; i < mirrored_seg->area_count; i++)
mirrored_seg->areas[i-1] = mirrored_seg->areas[i];
/* Place this one at the end */
mirrored_seg->areas[i-1] = area;
2008-09-19 04:20:39 +04:00
return 1;
}
2008-01-16 22:09:35 +03:00
/*
* This function writes a new header to the mirror log header to the lv
*
* Returns: 1 on success, 0 on failure
*/
static int _write_log_header(struct cmd_context *cmd, struct logical_volume *lv)
{
struct device *dev;
char *name;
struct { /* The mirror log header */
uint32_t magic;
uint32_t version;
uint64_t nr_regions;
} log_header;
log_header.magic = xlate32(MIRROR_MAGIC);
log_header.version = xlate32(MIRROR_DISK_VERSION);
log_header.nr_regions = xlate64((uint64_t)-1);
if (!(name = dm_pool_alloc(cmd->mem, PATH_MAX))) {
log_error("Name allocation failed - log header not written (%s)",
lv->name);
return 0;
}
if (dm_snprintf(name, PATH_MAX, "%s%s/%s", cmd->dev_dir,
lv->vg->name, lv->name) < 0) {
log_error("Name too long - log header not written (%s)", lv->name);
return 0;
}
log_verbose("Writing log header to device, %s", lv->name);
if (!(dev = dev_cache_get(name, NULL))) {
log_error("%s: not found: log header not written", name);
return 0;
}
if (!dev_open_quiet(dev))
return 0;
if (!dev_write(dev, UINT64_C(0), sizeof(log_header), &log_header)) {
log_error("Failed to write log header to %s", name);
dev_close_immediate(dev);
return 0;
}
dev_close_immediate(dev);
return 1;
}
/*
* Initialize mirror log contents
*/
static int _init_mirror_log(struct cmd_context *cmd,
struct logical_volume *log_lv, int in_sync,
struct dm_list *tagsl, int remove_on_failure)
2008-01-16 22:09:35 +03:00
{
struct dm_str_list *sl;
2010-01-08 13:50:11 +03:00
uint64_t orig_status = log_lv->status;
2008-01-17 16:37:51 +03:00
int was_active = 0;
2008-01-16 22:09:35 +03:00
2011-01-24 17:19:05 +03:00
if (test_mode()) {
log_verbose("Test mode: Skipping mirror log initialisation.");
return 1;
}
2008-01-16 22:09:35 +03:00
if (!activation() && in_sync) {
log_error("Aborting. Unable to create in-sync mirror log "
"while activation is disabled.");
return 0;
}
2008-01-17 16:37:51 +03:00
/* If the LV is active, deactivate it first. */
if (lv_is_active(log_lv)) {
(void) deactivate_lv(cmd, log_lv);
/*
* FIXME: workaround to fail early
* Ensure that log is really deactivated because deactivate_lv
* on cluster do not fail if there is log_lv with different UUID.
*/
if (lv_is_active(log_lv)) {
log_error("Aborting. Unable to deactivate mirror log.");
goto revert_new_lv;
}
2008-01-17 16:37:51 +03:00
was_active = 1;
}
/* Temporary make it visible for set_lv() */
lv_set_visible(log_lv);
2008-01-17 16:37:51 +03:00
2008-01-16 22:09:35 +03:00
/* Temporary tag mirror log for activation */
dm_list_iterate_items(sl, tagsl)
2008-01-16 22:09:35 +03:00
if (!str_list_add(cmd->mem, &log_lv->tags, sl->str)) {
log_error("Aborting. Unable to tag mirror log.");
2008-01-17 16:37:51 +03:00
goto activate_lv;
2008-01-16 22:09:35 +03:00
}
/* store mirror log on disk(s) */
if (!vg_write(log_lv->vg) || !vg_commit(log_lv->vg))
2008-01-17 16:37:51 +03:00
goto activate_lv;
2008-01-16 22:09:35 +03:00
backup(log_lv->vg);
2012-02-01 19:05:53 +04:00
/* Wait for events following any deactivation before reactivating */
if (!sync_local_dev_names(cmd)) {
log_error("Aborting. Failed to sync local devices before initialising mirror log %s.",
display_lvname(log_lv));
goto revert_new_lv;
}
2012-02-01 19:05:53 +04:00
2008-01-16 22:09:35 +03:00
if (!activate_lv(cmd, log_lv)) {
log_error("Aborting. Failed to activate mirror log.");
goto revert_new_lv;
}
/* Remove the temporary tags */
dm_list_iterate_items(sl, tagsl)
str_list_del(&log_lv->tags, sl->str);
2008-01-16 22:09:35 +03:00
if (activation()) {
if (!wipe_lv(log_lv, (struct wipe_params)
{ .do_zero = 1, .zero_sectors = log_lv->size,
.zero_value = in_sync ? -1 : 0 })) {
log_error("Aborting. Failed to wipe mirror log.");
goto deactivate_and_revert_new_lv;
}
2008-01-16 22:09:35 +03:00
}
if (activation() && !_write_log_header(cmd, log_lv)) {
log_error("Aborting. Failed to write mirror log header.");
goto deactivate_and_revert_new_lv;
}
if (!deactivate_lv(cmd, log_lv)) {
log_error("Aborting. Failed to deactivate mirror log. "
"Manual intervention required.");
return 0;
}
2009-05-21 07:04:52 +04:00
lv_set_hidden(log_lv);
2008-01-16 22:09:35 +03:00
2008-01-17 16:37:51 +03:00
if (was_active && !activate_lv(cmd, log_lv))
return_0;
2008-01-16 22:09:35 +03:00
return 1;
deactivate_and_revert_new_lv:
if (!deactivate_lv(cmd, log_lv)) {
log_error("Unable to deactivate mirror log LV. "
"Manual intervention required.");
return 0;
}
revert_new_lv:
2008-01-17 16:37:51 +03:00
log_lv->status = orig_status;
dm_list_iterate_items(sl, tagsl)
str_list_del(&log_lv->tags, sl->str);
2008-01-17 16:37:51 +03:00
if (remove_on_failure && !lv_remove(log_lv)) {
2008-01-16 22:09:35 +03:00
log_error("Manual intervention may be required to remove "
"abandoned log LV before retrying.");
2008-01-17 16:37:51 +03:00
return 0;
}
if (!vg_write(log_lv->vg) || !vg_commit(log_lv->vg))
2008-01-17 16:37:51 +03:00
log_error("Manual intervention may be required to "
"remove/restore abandoned log LV before retrying.");
else
backup(log_lv->vg);
2008-01-17 16:37:51 +03:00
activate_lv:
if (was_active && !remove_on_failure && !activate_lv(cmd, log_lv))
return_0;
2008-01-16 22:09:35 +03:00
return 0;
}
/*
* Activate an LV similarly (i.e. SH or EX) to a given "model" LV
*/
static int _activate_lv_like_model(struct logical_volume *model,
struct logical_volume *lv)
{
if (lv_is_active_exclusive(model)) {
if (!activate_lv_excl(lv->vg->cmd, lv))
return_0;
} else {
if (!activate_lv(lv->vg->cmd, lv))
return_0;
}
return 1;
}
/*
* Delete independent/orphan LV, it must acquire lock.
*/
static int _delete_lv(struct logical_volume *mirror_lv, struct logical_volume *lv,
int reactivate)
{
2007-12-20 21:55:46 +03:00
struct cmd_context *cmd = mirror_lv->vg->cmd;
struct dm_str_list *sl;
/* Inherit tags - maybe needed for activation */
if (!str_list_match_list(&mirror_lv->tags, &lv->tags, NULL)) {
dm_list_iterate_items(sl, &mirror_lv->tags)
if (!str_list_add(cmd->mem, &lv->tags, sl->str)) {
log_error("Aborting. Unable to tag.");
return 0;
}
2007-12-20 21:55:46 +03:00
if (!vg_write(mirror_lv->vg) ||
!vg_commit(mirror_lv->vg)) {
log_error("Intermediate VG commit for orphan volume failed.");
return 0;
}
}
if (reactivate) {
/* FIXME: the 'model' should be 'mirror_lv' not 'lv', I think. */
if (!_activate_lv_like_model(lv, lv))
return_0;
/* FIXME Is this superfluous now? */
if (!sync_local_dev_names(cmd)) {
log_error("Failed to sync local devices when reactivating %s.",
display_lvname(lv));
return 0;
}
2012-02-01 19:05:53 +04:00
if (!deactivate_lv(cmd, lv))
return_0;
}
if (!lv_remove(lv))
return_0;
return 1;
}
static int _merge_mirror_images(struct logical_volume *lv,
const struct dm_list *mimages)
{
uint32_t addition = dm_list_size(mimages);
struct logical_volume **img_lvs;
struct lv_list *lvl;
int i = 0;
if (!addition)
return 1;
if (!(img_lvs = alloca(sizeof(*img_lvs) * addition)))
return_0;
dm_list_iterate_items(lvl, mimages)
img_lvs[i++] = lvl->lv;
return lv_add_mirror_lvs(lv, img_lvs, addition,
MIRROR_IMAGE, first_seg(lv)->region_size);
}
/* Unlink the relationship between the segment and its log_lv */
2008-01-26 03:25:04 +03:00
struct logical_volume *detach_mirror_log(struct lv_segment *mirrored_seg)
{
struct logical_volume *log_lv;
if (!mirrored_seg->log_lv)
2008-01-26 03:25:04 +03:00
return NULL;
log_lv = mirrored_seg->log_lv;
mirrored_seg->log_lv = NULL;
lv_set_visible(log_lv);
log_lv->status &= ~MIRROR_LOG;
if (!remove_seg_from_segs_using_this_lv(log_lv, mirrored_seg))
return_0;
2008-01-26 03:25:04 +03:00
return log_lv;
}
2008-01-16 22:11:39 +03:00
/* Check if mirror image LV is removable with regard to given removable_pvs */
int is_mirror_image_removable(struct logical_volume *mimage_lv, void *baton)
2008-01-16 22:11:39 +03:00
{
struct physical_volume *pv;
struct lv_segment *seg;
int pv_found;
struct pv_list *pvl;
uint32_t s;
struct dm_list *removable_pvs = baton;
if (!baton || dm_list_empty(removable_pvs))
return 1;
2008-01-16 22:11:39 +03:00
dm_list_iterate_items(seg, &mimage_lv->segments) {
2008-01-16 22:11:39 +03:00
for (s = 0; s < seg->area_count; s++) {
if (seg_type(seg, s) != AREA_PV) {
/* FIXME Recurse for AREA_LV? */
/* Structure of seg_lv is unknown.
* Not removing this LV for safety. */
return 0;
}
2008-01-16 22:11:39 +03:00
pv = seg_pv(seg, s);
pv_found = 0;
dm_list_iterate_items(pvl, removable_pvs) {
if (id_equal(&pv->id, &pvl->pv->id)) {
pv_found = 1;
break;
}
if (pvl->pv->dev && pv->dev &&
pv->dev->dev == pvl->pv->dev->dev) {
2008-01-16 22:11:39 +03:00
pv_found = 1;
break;
}
}
if (!pv_found)
return 0;
}
}
return 1;
}
2010-01-09 01:00:31 +03:00
/*
* _move_removable_mimages_to_end
*
* We always detach mimage LVs from the end of the areas array.
* This function will push 'count' mimages to the end of the array
* based on if their PVs are removable.
*
* This is an all or nothing function. Either the user specifies
* enough removable PVs to satisfy count, or they don't specify
* any removable_pvs at all (in which case all PVs in the mirror
* are considered removable).
*/
static int _move_removable_mimages_to_end(struct logical_volume *lv,
uint32_t count,
struct dm_list *removable_pvs)
{
int i;
2010-01-09 01:00:31 +03:00
struct logical_volume *sub_lv;
struct lv_segment *mirrored_seg = first_seg(lv);
if (!removable_pvs)
return 1;
for (i = mirrored_seg->area_count - 1; (i >= 0) && count; i--) {
sub_lv = seg_lv(mirrored_seg, i);
2010-01-09 01:00:31 +03:00
if (!is_temporary_mirror_layer(sub_lv) &&
is_mirror_image_removable(sub_lv, removable_pvs)) {
if (!shift_mirror_images(mirrored_seg, i))
return_0;
count--;
}
2010-01-09 01:00:31 +03:00
}
return !count;
}
2010-04-20 16:14:28 +04:00
static int _mirrored_lv_in_sync(struct logical_volume *lv)
{
dm_percent_t sync_percent;
2010-04-20 16:14:28 +04:00
if (!lv_mirror_percent(lv->vg->cmd, lv, 0, &sync_percent,
NULL)) {
if (lv_is_active_but_not_locally(lv))
log_error("Unable to determine mirror sync status of"
" remotely active LV, %s/%s",
lv->vg->name, lv->name);
else
log_error("Unable to determine mirror "
"sync status of %s/%s.",
lv->vg->name, lv->name);
2010-04-20 16:14:28 +04:00
return 0;
}
return (sync_percent == DM_PERCENT_100) ? 1 : 0;
2010-04-20 16:14:28 +04:00
}
2010-01-09 01:00:31 +03:00
/*
* Split off 'split_count' legs from a mirror
*
* Returns: 0 on error, 1 on success
*/
static int _split_mirror_images(struct logical_volume *lv,
const char *split_name,
uint32_t split_count,
struct dm_list *removable_pvs)
{
uint32_t i;
struct logical_volume *sub_lv = NULL;
struct logical_volume *new_lv = NULL;
2010-01-09 01:00:31 +03:00
struct logical_volume *detached_log_lv = NULL;
struct lv_segment *mirrored_seg = first_seg(lv);
struct dm_list split_images;
struct lv_list *lvl;
struct cmd_context *cmd = lv->vg->cmd;
2010-01-09 01:00:31 +03:00
if (!lv_is_mirrored(lv)) {
2010-01-09 01:00:31 +03:00
log_error("Unable to split non-mirrored LV, %s",
lv->name);
return 0;
}
if (!split_count) {
log_error(INTERNAL_ERROR "split_count is zero!");
2010-01-09 01:00:31 +03:00
return 0;
}
log_verbose("Detaching %d images from mirror, %s",
split_count, lv->name);
if (!_move_removable_mimages_to_end(lv, split_count, removable_pvs)) {
/*
* FIXME: Allow incomplete specification of removable PVs?
*
* I am forcing the user to either specify no
* removable PVs or all of them. Should we allow
* them to just specify some - making us pick the rest?
*/
log_error("Insufficient removable PVs given"
" to satisfy request");
return 0;
}
/*
* Step 1:
* Remove the images from the mirror.
* Make them visible, independent LVs (don't change names yet).
* Track them in a list for later instantiation.
*/
2010-01-09 01:00:31 +03:00
dm_list_init(&split_images);
for (i = 0; i < split_count; i++) {
mirrored_seg->area_count--;
sub_lv = seg_lv(mirrored_seg, mirrored_seg->area_count);
sub_lv->status &= ~MIRROR_IMAGE;
if (!release_lv_segment_area(mirrored_seg, mirrored_seg->area_count, mirrored_seg->area_len))
return_0;
2010-01-09 01:00:31 +03:00
log_very_verbose("%s assigned to be split", sub_lv->name);
if (!new_lv) {
lv_set_visible(sub_lv);
new_lv = sub_lv;
continue;
}
/* If there is more than one image being split, add to list */
lvl = dm_pool_alloc(lv->vg->vgmem, sizeof(*lvl));
if (!lvl) {
log_error("lv_list alloc failed");
return 0;
}
lvl->lv = sub_lv;
dm_list_add(&split_images, &lvl->list);
}
new_lv->name = dm_pool_strdup(lv->vg->vgmem, split_name);
if (!new_lv->name) {
log_error("Unable to rename newly split LV");
return 0;
2010-01-09 01:00:31 +03:00
}
if (!dm_list_empty(&split_images)) {
size_t len = strlen(new_lv->name) + 32;
char *layer_name, format[len];
/*
* A number of images have been split and
* a new mirror layer must be formed
*/
if (!insert_layer_for_lv(cmd, new_lv, 0, "_mimage_%d")) {
2010-01-09 01:00:31 +03:00
log_error("Failed to build new mirror, %s",
new_lv->name);
return 0;
}
first_seg(new_lv)->region_size = mirrored_seg->region_size;
dm_list_iterate_items(lvl, &split_images) {
sub_lv = lvl->lv;
if (dm_snprintf(format, len, "%s_mimage_%%d",
new_lv->name) < 0) {
log_error("Failed to build new image name.");
return 0;
}
layer_name = dm_pool_alloc(lv->vg->vgmem, len);
2010-01-09 01:00:31 +03:00
if (!layer_name) {
log_error("Unable to allocate memory");
return 0;
}
if (!generate_lv_name(lv->vg, format, layer_name, len)||
sscanf(layer_name, format, &i) != 1) {
log_error("Failed to generate new image names");
return 0;
}
sub_lv->name = layer_name;
}
if (!_merge_mirror_images(new_lv, &split_images)) {
log_error("Failed to group split "
"images into new mirror");
return 0;
}
/*
* We don't allow splitting a mirror that is not in-sync,
* so we can bring the newly split mirror up without a
* resync. (It will be a 'core' log mirror after all.)
*/
init_mirror_in_sync(1);
}
sub_lv = NULL;
/*
* If no more mirrors, remove mirror layer.
* The sub_lv is removed entirely later - leaving
* only the top-level (now linear) LV.
*/
if (mirrored_seg->area_count == 1) {
sub_lv = seg_lv(mirrored_seg, 0);
sub_lv->status &= ~MIRROR_IMAGE;
lv_set_visible(sub_lv);
detached_log_lv = detach_mirror_log(mirrored_seg);
if (!remove_layer_from_lv(lv, sub_lv))
return_0;
lv->status &= ~MIRROR;
lv->status &= ~MIRRORED;
lv->status &= ~LV_NOTSYNCED;
}
if (!vg_write(mirrored_seg->lv->vg)) {
log_error("Intermediate VG metadata write failed.");
return 0;
}
2010-01-09 01:00:31 +03:00
/*
* Suspend the mirror - this includes all the sub-LVs and
* soon-to-be-split sub-LVs
*/
if (!suspend_lv(cmd, mirrored_seg->lv)) {
log_error("Failed to lock %s", mirrored_seg->lv->name);
vg_revert(mirrored_seg->lv->vg);
2010-01-09 01:00:31 +03:00
return 0;
}
if (!vg_commit(mirrored_seg->lv->vg)) {
resume_lv(cmd, mirrored_seg->lv);
return 0;
}
log_very_verbose("Updating \"%s\" in kernel", mirrored_seg->lv->name);
/*
* Resume the mirror - this also activates the visible, independent
* soon-to-be-split sub-LVs
*/
if (!resume_lv(cmd, mirrored_seg->lv)) {
log_error("Problem resuming %s", mirrored_seg->lv->name);
return 0;
}
/*
* Recycle newly split LV so it is properly renamed.
* Cluster requires the extra deactivate/activate calls.
*/
if (vg_is_clustered(lv->vg) &&
(!deactivate_lv(cmd, new_lv) ||
!_activate_lv_like_model(lv, new_lv))) {
log_error("Failed to rename newly split LV in the kernel");
return 0;
}
if (!suspend_lv(cmd, new_lv) || !resume_lv(cmd, new_lv)) {
log_error("Failed to rename newly split LV in the kernel");
return 0;
}
/* Remove original mirror layer if it has been converted to linear */
if (sub_lv && !_delete_lv(lv, sub_lv, 1))
return_0;
/* Remove the log if it has been converted to linear */
if (detached_log_lv && !_delete_lv(lv, detached_log_lv, 1))
return_0;
2010-01-09 01:00:31 +03:00
return 1;
}
/*
2007-12-20 21:55:46 +03:00
* Remove num_removed images from mirrored_seg
2008-01-17 16:13:54 +03:00
*
* Arguments:
* num_removed: the requested (maximum) number of mirrors to be removed
* removable_pvs: if not NULL and list not empty, only mirrors using PVs
* in this list will be removed
2008-01-17 16:13:54 +03:00
* remove_log: if non-zero, log_lv will be removed
* (even if it's 0, log_lv will be removed if there is no
* mirror remaining after the removal)
* collapse: if non-zero, instead of removing, remove the temporary
* mirror layer and merge mirrors to the original LV.
* removable_pvs should be NULL and num_removed should be
* seg->area_count - 1.
* removed: if non NULL, the number of removed mirror images is set
* as a result
*
* If collapse is non-zero, <removed> is guaranteed to be equal to num_removed.
*
* Return values:
* Failure (0) means something unexpected has happend and
* the caller should abort.
* Even if no mirror was removed (e.g. no LV matches to 'removable_pvs'),
* returns success (1).
*/
2007-12-20 21:55:46 +03:00
static int _remove_mirror_images(struct logical_volume *lv,
uint32_t num_removed,
int (*is_removable)(struct logical_volume *, void *),
void *removable_baton,
2008-01-17 16:13:54 +03:00
unsigned remove_log, unsigned collapse,
uint32_t *removed, int preferred_only)
{
uint32_t m;
int32_t s;
2005-11-29 21:20:23 +03:00
struct logical_volume *sub_lv;
2008-01-26 03:25:04 +03:00
struct logical_volume *detached_log_lv = NULL;
struct logical_volume *temp_layer_lv = NULL;
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
struct lv_segment *pvmove_seg, *mirrored_seg = first_seg(lv);
2005-11-29 21:20:23 +03:00
uint32_t old_area_count = mirrored_seg->area_count;
uint32_t new_area_count = mirrored_seg->area_count;
2007-12-20 21:55:46 +03:00
struct lv_list *lvl;
struct dm_list tmp_orphan_lvs;
2013-11-25 16:42:30 +04:00
uint32_t orig_removed = num_removed;
int reactivate;
2005-11-29 21:20:23 +03:00
2008-01-17 16:13:54 +03:00
if (removed)
*removed = 0;
log_very_verbose("Reducing mirror set %s from %" PRIu32 " to %"
PRIu32 " image(s)%s.", lv->name,
2007-12-20 21:55:46 +03:00
old_area_count, old_area_count - num_removed,
2005-11-29 21:20:23 +03:00
remove_log ? " and no log volume" : "");
if (collapse && (old_area_count - num_removed != 1)) {
log_error("Incompatible parameters to _remove_mirror_images");
return 0;
}
num_removed = 0;
2005-11-29 21:20:23 +03:00
/* Move removable_pvs to end of array */
for (s = mirrored_seg->area_count - 1;
s >= 0 && old_area_count - new_area_count < orig_removed;
s--) {
sub_lv = seg_lv(mirrored_seg, s);
if (!(is_temporary_mirror_layer(sub_lv) && lv_mirror_count(sub_lv) != 1)) {
if (!is_removable) {
log_error(INTERNAL_ERROR "_remove_mirror_images called incorrectly with is_removable undefined.");
return 0;
}
if (!is_removable(sub_lv, removable_baton))
continue;
/*
* Check if the user is trying to pull the
* primary mirror image when the mirror is
* not in-sync.
*/
if ((s == 0) && !_mirrored_lv_in_sync(lv) &&
!(lv->status & PARTIAL_LV)) {
log_error("Unable to remove primary mirror image while mirror is not in-sync");
return 0;
2005-11-29 21:20:23 +03:00
}
if (!shift_mirror_images(mirrored_seg, s))
return_0;
--new_area_count;
++num_removed;
2005-11-29 21:20:23 +03:00
}
}
if (!preferred_only)
num_removed = orig_removed;
/*
* If removable_pvs were specified, then they have been shifted
* to the end to ensure they are removed. The remaining balance
* of images left to remove will be taken from the unspecified.
*/
new_area_count = old_area_count - num_removed;
if (num_removed && old_area_count == new_area_count)
return 1;
2007-12-20 21:55:46 +03:00
/* Remove mimage LVs from the segment */
dm_list_init(&tmp_orphan_lvs);
2007-12-20 21:55:46 +03:00
for (m = new_area_count; m < mirrored_seg->area_count; m++) {
2005-11-29 21:20:23 +03:00
seg_lv(mirrored_seg, m)->status &= ~MIRROR_IMAGE;
lv_set_visible(seg_lv(mirrored_seg, m));
2007-12-20 21:55:46 +03:00
if (!(lvl = dm_pool_alloc(lv->vg->cmd->mem, sizeof(*lvl)))) {
log_error("lv_list alloc failed");
return 0;
}
lvl->lv = seg_lv(mirrored_seg, m);
dm_list_add(&tmp_orphan_lvs, &lvl->list);
if (!release_lv_segment_area(mirrored_seg, m, mirrored_seg->area_len))
return_0;
2005-11-29 21:20:23 +03:00
}
2007-12-20 21:55:46 +03:00
mirrored_seg->area_count = new_area_count;
2005-11-29 21:20:23 +03:00
/* If no more mirrors, remove mirror layer */
2008-01-17 16:13:54 +03:00
/* As an exceptional case, if the lv is temporary layer,
* leave the LV as mirrored and let the lvconvert completion
* to remove the layer. */
if (new_area_count == 1 && !is_temporary_mirror_layer(lv)) {
temp_layer_lv = seg_lv(mirrored_seg, 0);
temp_layer_lv->status &= ~MIRROR_IMAGE;
lv_set_visible(temp_layer_lv);
2008-01-26 03:25:04 +03:00
detached_log_lv = detach_mirror_log(mirrored_seg);
if (!remove_layer_from_lv(lv, temp_layer_lv))
return_0;
if (collapse && !_merge_mirror_images(lv, &tmp_orphan_lvs)) {
log_error("Failed to add mirror images");
return 0;
}
/*
* No longer a mirror? Even though new_area_count was 1,
* _merge_mirror_images may have resulted into lv being still a
* mirror. Fix up the flags if we only have one image left.
*/
if (lv_mirror_count(lv) == 1) {
lv->status &= ~MIRROR;
lv->status &= ~MIRRORED;
lv->status &= ~LV_NOTSYNCED;
}
mirrored_seg = first_seg(lv);
if (remove_log && !detached_log_lv)
detached_log_lv = detach_mirror_log(mirrored_seg);
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
if (lv_is_pvmove(lv))
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
dm_list_iterate_items(pvmove_seg, &lv->segments)
pvmove_seg->status |= PVMOVE;
2008-01-17 16:13:54 +03:00
} else if (new_area_count == 0) {
2008-01-19 01:00:46 +03:00
log_very_verbose("All mimages of %s are gone", lv->name);
2008-01-17 16:13:54 +03:00
/* All mirror images are gone.
* It can happen for vgreduce --removemissing. */
2008-01-26 03:25:04 +03:00
detached_log_lv = detach_mirror_log(mirrored_seg);
lv->status &= ~MIRROR;
2008-01-17 16:13:54 +03:00
lv->status &= ~MIRRORED;
lv->status &= ~LV_NOTSYNCED;
2008-01-17 16:54:05 +03:00
if (!replace_lv_with_error_segment(lv))
2008-01-17 16:13:54 +03:00
return_0;
Taka's fix for handling failure of all mirrored log devices and all but one mirror leg. <patch header> To handle a double failure of a mirrored log, Jon's two patches are commited, however, lvconvert command can't still handle an error when mirror leg and mirrored log got failure at the same time. [Patch]: Handle both devices of a mirrored log failing (bug 607347) posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00009.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00027.html [Patch]: Handle both devices of a mirrored log failing (bug 607347) - additional fix posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00093.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00101.html In the second patch, the target type of mirrored log is replaced with error target when remove_log is set to 1, but this procedure should be also used in other cases such as the number of mirror leg is 1. This patch relocates the procedure to the main path. In addition, I added following three changes. - Removed tmp_orphan_lvs handling procedure It seems that _delete_lv() can handle detached_log_lv properly without adding mirror legs in mirrored log to tmp_orphan_lvs. Therefore, I removed the procedure. - Removed vg_write()/vg_commit() Metadata is saved by vg_write()/vg_commit() just after detached_log_lv is handled. Therefore, I removed vg_write()/vg_commit(). - With Jon's second patch, we think that we don't have to call remove_mirror_log() in _lv_update_mirrored_log() because will be handled remove_mirror_images() in _lvconvert_mirrors_repaire(). </patch header> Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com> Reviewed-by: Petr Rockai <prockai@redhat.com> Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-08-03 01:07:40 +04:00
} else if (remove_log)
2008-01-26 03:25:04 +03:00
detached_log_lv = detach_mirror_log(mirrored_seg);
Taka's fix for handling failure of all mirrored log devices and all but one mirror leg. <patch header> To handle a double failure of a mirrored log, Jon's two patches are commited, however, lvconvert command can't still handle an error when mirror leg and mirrored log got failure at the same time. [Patch]: Handle both devices of a mirrored log failing (bug 607347) posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00009.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00027.html [Patch]: Handle both devices of a mirrored log failing (bug 607347) - additional fix posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00093.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00101.html In the second patch, the target type of mirrored log is replaced with error target when remove_log is set to 1, but this procedure should be also used in other cases such as the number of mirror leg is 1. This patch relocates the procedure to the main path. In addition, I added following three changes. - Removed tmp_orphan_lvs handling procedure It seems that _delete_lv() can handle detached_log_lv properly without adding mirror legs in mirrored log to tmp_orphan_lvs. Therefore, I removed the procedure. - Removed vg_write()/vg_commit() Metadata is saved by vg_write()/vg_commit() just after detached_log_lv is handled. Therefore, I removed vg_write()/vg_commit(). - With Jon's second patch, we think that we don't have to call remove_mirror_log() in _lv_update_mirrored_log() because will be handled remove_mirror_images() in _lvconvert_mirrors_repaire(). </patch header> Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com> Reviewed-by: Petr Rockai <prockai@redhat.com> Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-08-03 01:07:40 +04:00
/*
* The log may be removed due to repair. If the log
* happens to be a mirrored log, then there is a special
* case we need to consider. One of the images of a
* mirrored log can fail followed shortly afterwards by
* a failure of the second. This means that the top-level
* mirror is waiting for writes to the log to finish, but
* they never will unless the mirrored log can be repaired
* or replaced with an error target. Since both the devices
* have failed, we must replace with error target - it is
* the only way to release the pending writes.
*/
if (detached_log_lv && lv_is_mirrored(detached_log_lv) &&
(detached_log_lv->status & PARTIAL_LV)) {
struct lv_segment *seg = first_seg(detached_log_lv);
Taka's fix for handling failure of all mirrored log devices and all but one mirror leg. <patch header> To handle a double failure of a mirrored log, Jon's two patches are commited, however, lvconvert command can't still handle an error when mirror leg and mirrored log got failure at the same time. [Patch]: Handle both devices of a mirrored log failing (bug 607347) posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00009.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00027.html [Patch]: Handle both devices of a mirrored log failing (bug 607347) - additional fix posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00093.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00101.html In the second patch, the target type of mirrored log is replaced with error target when remove_log is set to 1, but this procedure should be also used in other cases such as the number of mirror leg is 1. This patch relocates the procedure to the main path. In addition, I added following three changes. - Removed tmp_orphan_lvs handling procedure It seems that _delete_lv() can handle detached_log_lv properly without adding mirror legs in mirrored log to tmp_orphan_lvs. Therefore, I removed the procedure. - Removed vg_write()/vg_commit() Metadata is saved by vg_write()/vg_commit() just after detached_log_lv is handled. Therefore, I removed vg_write()/vg_commit(). - With Jon's second patch, we think that we don't have to call remove_mirror_log() in _lv_update_mirrored_log() because will be handled remove_mirror_images() in _lvconvert_mirrors_repaire(). </patch header> Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com> Reviewed-by: Petr Rockai <prockai@redhat.com> Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-08-03 01:07:40 +04:00
log_very_verbose("%s being removed due to failures",
detached_log_lv->name);
/*
* We are going to replace the mirror with an
* error segment, but before we do, we must remember
* all of the LVs that must be deleted later (i.e.
* the sub-lv's)
*/
for (m = 0; m < seg->area_count; m++) {
seg_lv(seg, m)->status &= ~MIRROR_IMAGE;
lv_set_visible(seg_lv(seg, m));
if (!(lvl = dm_pool_alloc(lv->vg->cmd->mem,
sizeof(*lvl)))) {
log_error("dm_pool_alloc failed");
return 0;
}
lvl->lv = seg_lv(seg, m);
dm_list_add(&tmp_orphan_lvs, &lvl->list);
}
Taka's fix for handling failure of all mirrored log devices and all but one mirror leg. <patch header> To handle a double failure of a mirrored log, Jon's two patches are commited, however, lvconvert command can't still handle an error when mirror leg and mirrored log got failure at the same time. [Patch]: Handle both devices of a mirrored log failing (bug 607347) posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00009.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00027.html [Patch]: Handle both devices of a mirrored log failing (bug 607347) - additional fix posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00093.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00101.html In the second patch, the target type of mirrored log is replaced with error target when remove_log is set to 1, but this procedure should be also used in other cases such as the number of mirror leg is 1. This patch relocates the procedure to the main path. In addition, I added following three changes. - Removed tmp_orphan_lvs handling procedure It seems that _delete_lv() can handle detached_log_lv properly without adding mirror legs in mirrored log to tmp_orphan_lvs. Therefore, I removed the procedure. - Removed vg_write()/vg_commit() Metadata is saved by vg_write()/vg_commit() just after detached_log_lv is handled. Therefore, I removed vg_write()/vg_commit(). - With Jon's second patch, we think that we don't have to call remove_mirror_log() in _lv_update_mirrored_log() because will be handled remove_mirror_images() in _lvconvert_mirrors_repaire(). </patch header> Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com> Reviewed-by: Petr Rockai <prockai@redhat.com> Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-08-03 01:07:40 +04:00
if (!replace_lv_with_error_segment(detached_log_lv)) {
log_error("Failed error target substitution for %s",
detached_log_lv->name);
return 0;
}
if (!vg_write(detached_log_lv->vg)) {
log_error("intermediate VG write failed.");
return 0;
}
Taka's fix for handling failure of all mirrored log devices and all but one mirror leg. <patch header> To handle a double failure of a mirrored log, Jon's two patches are commited, however, lvconvert command can't still handle an error when mirror leg and mirrored log got failure at the same time. [Patch]: Handle both devices of a mirrored log failing (bug 607347) posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00009.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00027.html [Patch]: Handle both devices of a mirrored log failing (bug 607347) - additional fix posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00093.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00101.html In the second patch, the target type of mirrored log is replaced with error target when remove_log is set to 1, but this procedure should be also used in other cases such as the number of mirror leg is 1. This patch relocates the procedure to the main path. In addition, I added following three changes. - Removed tmp_orphan_lvs handling procedure It seems that _delete_lv() can handle detached_log_lv properly without adding mirror legs in mirrored log to tmp_orphan_lvs. Therefore, I removed the procedure. - Removed vg_write()/vg_commit() Metadata is saved by vg_write()/vg_commit() just after detached_log_lv is handled. Therefore, I removed vg_write()/vg_commit(). - With Jon's second patch, we think that we don't have to call remove_mirror_log() in _lv_update_mirrored_log() because will be handled remove_mirror_images() in _lvconvert_mirrors_repaire(). </patch header> Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com> Reviewed-by: Petr Rockai <prockai@redhat.com> Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-08-03 01:07:40 +04:00
if (!suspend_lv(detached_log_lv->vg->cmd,
detached_log_lv)) {
log_error("Failed to suspend %s",
detached_log_lv->name);
return 0;
}
if (!vg_commit(detached_log_lv->vg)) {
if (!resume_lv(detached_log_lv->vg->cmd,
detached_log_lv))
stack;
return_0;
}
if (!resume_lv(detached_log_lv->vg->cmd, detached_log_lv)) {
Taka's fix for handling failure of all mirrored log devices and all but one mirror leg. <patch header> To handle a double failure of a mirrored log, Jon's two patches are commited, however, lvconvert command can't still handle an error when mirror leg and mirrored log got failure at the same time. [Patch]: Handle both devices of a mirrored log failing (bug 607347) posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00009.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00027.html [Patch]: Handle both devices of a mirrored log failing (bug 607347) - additional fix posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00093.html commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00101.html In the second patch, the target type of mirrored log is replaced with error target when remove_log is set to 1, but this procedure should be also used in other cases such as the number of mirror leg is 1. This patch relocates the procedure to the main path. In addition, I added following three changes. - Removed tmp_orphan_lvs handling procedure It seems that _delete_lv() can handle detached_log_lv properly without adding mirror legs in mirrored log to tmp_orphan_lvs. Therefore, I removed the procedure. - Removed vg_write()/vg_commit() Metadata is saved by vg_write()/vg_commit() just after detached_log_lv is handled. Therefore, I removed vg_write()/vg_commit(). - With Jon's second patch, we think that we don't have to call remove_mirror_log() in _lv_update_mirrored_log() because will be handled remove_mirror_images() in _lvconvert_mirrors_repaire(). </patch header> Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com> Reviewed-by: Petr Rockai <prockai@redhat.com> Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-08-03 01:07:40 +04:00
log_error("Failed to resume %s",
detached_log_lv->name);
return 0;
}
}
2005-11-29 21:20:23 +03:00
/*
* To successfully remove these unwanted LVs we need to
* remove the LVs from the mirror set, commit that metadata
* then deactivate and remove them fully.
*/
2005-11-29 21:20:23 +03:00
if (!vg_write(mirrored_seg->lv->vg)) {
log_error("intermediate VG write failed.");
return 0;
}
if (!suspend_lv_origin(mirrored_seg->lv->vg->cmd, mirrored_seg->lv)) {
2005-11-29 21:20:23 +03:00
log_error("Failed to lock %s", mirrored_seg->lv->name);
vg_revert(mirrored_seg->lv->vg);
return 0;
}
/* FIXME: second suspend should not be needed
2011-04-29 00:29:59 +04:00
* Explicitly suspend temporary LV.
* This balances critical_section_inc() calls with critical_section_dec()
* in resume (both local and cluster) and also properly propagates precommitted
* metadata into dm table on other nodes.
2011-04-29 00:29:59 +04:00
* FIXME: check propagation of suspend with visible flag
*/
if (temp_layer_lv && !suspend_lv(temp_layer_lv->vg->cmd, temp_layer_lv))
log_error("Problem suspending temporary LV %s", temp_layer_lv->name);
2005-11-29 21:20:23 +03:00
if (!vg_commit(mirrored_seg->lv->vg)) {
if (!resume_lv(mirrored_seg->lv->vg->cmd, mirrored_seg->lv))
stack;
return_0;
2005-11-29 21:20:23 +03:00
}
2005-11-29 21:20:23 +03:00
log_very_verbose("Updating \"%s\" in kernel", mirrored_seg->lv->name);
/*
* Avoid having same mirror target loaded twice simultaneously by first
* resuming the removed LV which now contains an error segment.
* As it's now detached from mirrored_seg->lv we must resume it
* explicitly.
*/
if (temp_layer_lv && !resume_lv(temp_layer_lv->vg->cmd, temp_layer_lv)) {
log_error("Problem resuming temporary LV, %s", temp_layer_lv->name);
return 0;
}
if (!resume_lv_origin(mirrored_seg->lv->vg->cmd, mirrored_seg->lv)) {
2005-11-29 21:20:23 +03:00
log_error("Problem reactivating %s", mirrored_seg->lv->name);
return 0;
}
2007-12-20 21:55:46 +03:00
/* Save or delete the 'orphan' LVs */
reactivate = lv_is_active(lv_lock_holder(lv));
if (!collapse) {
dm_list_iterate_items(lvl, &tmp_orphan_lvs)
if (!_delete_lv(lv, lvl->lv, reactivate))
2008-01-26 03:25:04 +03:00
return_0;
2007-12-20 21:55:46 +03:00
}
2006-07-21 00:37:10 +04:00
if (temp_layer_lv && !_delete_lv(lv, temp_layer_lv, reactivate))
2008-01-26 03:25:04 +03:00
return_0;
2005-11-29 21:20:23 +03:00
if (detached_log_lv && !_delete_lv(lv, detached_log_lv, reactivate))
2008-01-26 03:25:04 +03:00
return_0;
2005-11-29 21:20:23 +03:00
2008-01-17 16:13:54 +03:00
/* Mirror with only 1 area is 'in sync'. */
2008-01-26 03:25:04 +03:00
if (new_area_count == 1 && is_temporary_mirror_layer(lv)) {
if (first_seg(lv)->log_lv &&
!_init_mirror_log(lv->vg->cmd, first_seg(lv)->log_lv,
1, &lv->tags, 0)) {
2008-01-17 16:13:54 +03:00
/* As a result, unnecessary sync may run after
* collapsing. But safe.*/
log_error("Failed to initialize log device");
return 0;
2008-01-17 16:13:54 +03:00
}
}
if (removed)
*removed = old_area_count - new_area_count;
log_very_verbose(FMTu32 " image(s) removed from %s",
old_area_count - new_area_count, lv->name);
2008-01-19 01:00:46 +03:00
2005-11-29 21:20:23 +03:00
return 1;
}
2007-12-20 21:55:46 +03:00
/*
* Remove the number of mirror images from the LV
*/
int remove_mirror_images(struct logical_volume *lv, uint32_t num_mirrors,
int (*is_removable)(struct logical_volume *, void *),
void *removable_baton, unsigned remove_log)
2007-12-20 21:55:46 +03:00
{
2008-01-17 16:13:54 +03:00
uint32_t num_removed, removed_once, r;
2007-12-20 21:55:46 +03:00
uint32_t existing_mirrors = lv_mirror_count(lv);
2008-01-17 16:13:54 +03:00
struct logical_volume *next_lv = lv;
2007-12-20 21:55:46 +03:00
int preferred_only = 1;
int retries = 0;
2007-12-20 21:55:46 +03:00
num_removed = existing_mirrors - num_mirrors;
/* num_removed can be 0 if the function is called just to remove log */
do {
2008-01-17 16:13:54 +03:00
if (num_removed < first_seg(next_lv)->area_count)
2007-12-20 21:55:46 +03:00
removed_once = num_removed;
else
2008-01-17 16:13:54 +03:00
removed_once = first_seg(next_lv)->area_count - 1;
2007-12-20 21:55:46 +03:00
2008-01-17 16:13:54 +03:00
if (!_remove_mirror_images(next_lv, removed_once,
is_removable, removable_baton,
remove_log, 0, &r, preferred_only))
2007-12-20 21:55:46 +03:00
return_0;
if (r < removed_once || !removed_once) {
2008-01-17 16:13:54 +03:00
/* Some mirrors are removed from the temporary mirror,
* but the temporary layer still exists.
* Down the stack and retry for remainder. */
2008-01-17 16:54:05 +03:00
next_lv = find_temporary_mirror(next_lv);
if (!next_lv) {
preferred_only = 0;
next_lv = lv;
}
2008-01-17 16:13:54 +03:00
}
num_removed -= r;
/*
* if there are still images to be removed, try again; this is
* required since some temporary layers may have been reduced
* to 1, at which point they are made removable, just like
* normal images
*/
if (!next_lv && !preferred_only && !retries && num_removed) {
++retries;
preferred_only = 1;
}
2008-01-17 16:13:54 +03:00
} while (next_lv && num_removed);
if (num_removed) {
if (num_removed == existing_mirrors - num_mirrors)
log_error("No mirror images found using specified PVs.");
else {
log_error("%u images are removed out of requested %u.",
existing_mirrors - lv_mirror_count(lv),
existing_mirrors - num_mirrors);
}
return 0;
}
2007-12-20 21:55:46 +03:00
return 1;
}
static int _no_removable_images(struct logical_volume *lv __attribute__((unused)),
void *baton __attribute__((unused))) {
return 0;
}
2007-12-20 21:55:46 +03:00
/*
* Collapsing temporary mirror layers.
*
* When mirrors are added to already-mirrored LV, a temporary mirror layer
* is inserted at the top of the stack to reduce resync work.
* The function will remove the intermediate layer and collapse the stack
* as far as mirrors are in-sync.
*
* The function is destructive: to remove intermediate mirror layers,
* VG metadata commits and suspend/resume are necessary.
*/
int collapse_mirrored_lv(struct logical_volume *lv)
{
struct logical_volume *tmp_lv;
struct lv_segment *mirror_seg;
2007-12-20 21:55:46 +03:00
2008-01-16 22:13:51 +03:00
while ((tmp_lv = find_temporary_mirror(lv))) {
mirror_seg = find_mirror_seg(first_seg(tmp_lv));
if (!mirror_seg) {
log_error("Failed to find mirrored LV for %s",
tmp_lv->name);
return 0;
}
if (!_mirrored_lv_in_sync(mirror_seg->lv)) {
2007-12-20 21:55:46 +03:00
log_verbose("Not collapsing %s: out-of-sync",
mirror_seg->lv->name);
2007-12-20 21:55:46 +03:00
return 1;
}
if (!_remove_mirror_images(mirror_seg->lv,
mirror_seg->area_count - 1,
_no_removable_images, NULL, 0, 1, NULL, 0)) {
2007-12-20 21:55:46 +03:00
log_error("Failed to release mirror images");
return 0;
}
}
return 1;
}
#if 0
/* FIXME: reconfigure_mirror_images: remove this code? */
static int _get_mirror_fault_policy(struct cmd_context *cmd __attribute__((unused)),
int log_policy)
2006-05-11 23:45:53 +04:00
{
const char *policy = NULL;
/*
2006-05-11 23:45:53 +04:00
if (log_policy)
policy = find_config_tree_str(cmd, activation_mirror_log_fault_policy_CFG);
else {
policy = find_config_tree_str(cmd, activation_mirror_image_fault_policy_CFG);
if (!policy)
policy = find_config_tree_str(cmd, activation_mirror_device_fault_policy_CFG);
}
*/
2006-05-11 23:45:53 +04:00
if (!strcmp(policy, "remove"))
return MIRROR_REMOVE;
else if (!strcmp(policy, "allocate"))
return MIRROR_ALLOCATE;
else if (!strcmp(policy, "allocate_anywhere"))
return MIRROR_ALLOCATE_ANYWHERE;
if (log_policy)
log_error("Bad activation/mirror_log_fault_policy");
else
2006-05-12 23:47:40 +04:00
log_error("Bad activation/mirror_device_fault_policy");
2006-05-11 23:45:53 +04:00
return MIRROR_REMOVE;
}
static int _get_mirror_log_fault_policy(struct cmd_context *cmd)
2006-05-11 23:45:53 +04:00
{
return _get_mirror_fault_policy(cmd, 1);
2006-05-11 23:45:53 +04:00
}
static int _get_mirror_device_fault_policy(struct cmd_context *cmd)
2006-05-11 23:45:53 +04:00
{
return _get_mirror_fault_policy(cmd, 0);
2006-05-11 23:45:53 +04:00
}
/*
* replace_mirror_images
* @mirrored_seg: segment (which may be linear now) to restore
* @num_mirrors: number of copies we should end up with
* @replace_log: replace log if not present
* @in_sync: was the original mirror in-sync?
*
* in_sync will be set to 0 if new mirror devices are being added
* In other words, it is only useful if the log (and only the log)
* is being restored.
*
* Returns: 0 on failure, 1 on reconfig, -1 if no reconfig done
*/
static int _replace_mirror_images(struct lv_segment *mirrored_seg,
2006-05-11 23:45:53 +04:00
uint32_t num_mirrors,
int log_policy, int in_sync)
{
int r = -1;
struct logical_volume *lv = mirrored_seg->lv;
/* FIXME: Use lvconvert rather than duplicating its code */
if (mirrored_seg->area_count < num_mirrors) {
log_warn("WARNING: Failed to replace mirror device in %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
2006-05-11 23:45:53 +04:00
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv)
log_warn("WARNING: Use 'lvconvert -m %d %s/%s --corelog' to replace failed devices",
num_mirrors - 1, lv->vg->name, lv->name);
2006-05-11 23:45:53 +04:00
else
log_warn("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
num_mirrors - 1, lv->vg->name, lv->name);
2006-05-11 23:45:53 +04:00
r = 0;
/* REMEMBER/FIXME: set in_sync to 0 if a new mirror device was added */
in_sync = 0;
}
/*
* FIXME: right now, we ignore the allocation policy specified to
* allocate the new log.
*/
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv &&
(log_policy != MIRROR_REMOVE)) {
log_warn("WARNING: Failed to replace mirror log device in %s/%s",
lv->vg->name, lv->name);
2006-05-11 23:45:53 +04:00
log_warn("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
mirrored_seg->area_count - 1 , lv->vg->name, lv->name);
2006-05-11 23:45:53 +04:00
r = 0;
}
return r;
}
int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
struct dm_list *removable_pvs, unsigned remove_log)
{
int r;
2007-12-20 21:55:46 +03:00
int in_sync;
2006-05-11 23:45:53 +04:00
int log_policy, dev_policy;
uint32_t old_num_mirrors = mirrored_seg->area_count;
int had_log = (mirrored_seg->log_lv) ? 1 : 0;
/* was the mirror in-sync before problems? */
2007-12-20 21:55:46 +03:00
in_sync = _mirrored_lv_in_sync(mirrored_seg->lv);
/*
* While we are only removing devices, we can have sync set.
* Setting this is only useful if we are moving to core log
* otherwise the disk log will contain the sync information
*/
init_mirror_in_sync(in_sync);
2008-01-17 16:13:54 +03:00
r = _remove_mirror_images(mirrored_seg->lv, old_num_mirrors - num_mirrors,
is_mirror_image_removable, removable_pvs,
remove_log, 0, NULL, 0);
if (!r)
/* Unable to remove bad devices */
return 0;
log_warn("WARNING: Bad device removed from mirror volume, %s/%s",
2006-05-11 23:45:53 +04:00
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
log_policy = _get_mirror_log_fault_policy(mirrored_seg->lv->vg->cmd);
dev_policy = _get_mirror_device_fault_policy(mirrored_seg->lv->vg->cmd);
2006-05-11 23:45:53 +04:00
r = _replace_mirror_images(mirrored_seg,
2006-05-11 23:45:53 +04:00
(dev_policy != MIRROR_REMOVE) ?
old_num_mirrors : num_mirrors,
log_policy, in_sync);
2006-05-11 23:45:53 +04:00
if (!r)
/* Failed to replace device(s) */
log_warn("WARNING: Unable to find substitute device for mirror volume, %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
2006-05-11 23:45:53 +04:00
else if (r > 0)
/* Success in replacing device(s) */
log_warn("WARNING: Mirror volume, %s/%s restored - substitute for failed device found.",
2006-05-11 23:45:53 +04:00
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
else
/* Bad device removed, but not replaced because of policy */
if (mirrored_seg->area_count == 1) {
log_warn("WARNING: Mirror volume, %s/%s converted to linear due to device failure.",
2006-05-11 23:45:53 +04:00
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
} else if (had_log && !mirrored_seg->log_lv) {
log_warn("WARNING: Mirror volume, %s/%s disk log removed due to device failure.",
2006-05-11 23:45:53 +04:00
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
}
/*
* If we made it here, we at least removed the bad device.
* Consider this success.
*/
return 1;
}
#endif
static int _create_mimage_lvs(struct alloc_handle *ah,
uint32_t num_mirrors,
uint32_t stripes,
uint32_t stripe_size,
struct logical_volume *lv,
struct logical_volume **img_lvs,
int log)
{
uint32_t m, first_area;
char *img_name;
size_t len;
len = strlen(lv->name) + 32;
if (!(img_name = alloca(len))) {
log_error("img_name allocation failed. "
"Remove new LV and retry.");
return 0;
}
2006-08-21 16:54:53 +04:00
if (dm_snprintf(img_name, len, "%s_mimage_%%d", lv->name) < 0) {
log_error("img_name allocation failed. "
"Remove new LV and retry.");
return 0;
}
for (m = 0; m < num_mirrors; m++) {
if (!(img_lvs[m] = lv_create_empty(img_name,
NULL, LVM_READ | LVM_WRITE,
ALLOC_INHERIT, lv->vg))) {
log_error("Aborting. Failed to create mirror image LV. "
"Remove new LV and retry.");
return 0;
}
if (log) {
first_area = m * stripes + (log - 1);
if (!lv_add_log_segment(ah, first_area, img_lvs[m], 0)) {
log_error("Failed to add mirror image segment"
" to %s. Remove new LV and retry.",
img_lvs[m]->name);
return 0;
}
} else {
if (!lv_add_segment(ah, m * stripes, stripes, img_lvs[m],
get_segtype_from_string(lv->vg->cmd,
"striped"),
stripe_size, 0, 0)) {
log_error("Aborting. Failed to add mirror image segment "
"to %s. Remove new LV and retry.",
img_lvs[m]->name);
return 0;
}
}
}
2005-10-28 16:48:50 +04:00
return 1;
}
/*
* Remove mirrors from each segment.
* 'new_mirrors' is the number of mirrors after the removal. '0' for linear.
* If 'status_mask' is non-zero, the removal happens only when all segments
* has the status bits on.
2003-05-06 16:22:24 +04:00
*/
int remove_mirrors_from_segments(struct logical_volume *lv,
uint32_t new_mirrors, uint64_t status_mask)
2003-05-06 16:22:24 +04:00
{
struct lv_segment *seg;
uint32_t s;
/* Check the segment params are compatible */
dm_list_iterate_items(seg, &lv->segments) {
if (!seg_is_mirrored(seg)) {
log_error("Segment is not mirrored: %s:%" PRIu32,
lv->name, seg->le);
return 0;
} if ((seg->status & status_mask) != status_mask) {
log_error("Segment status does not match: %s:%" PRIu32
" status:0x%" PRIx64 "/0x%" PRIx64, lv->name, seg->le,
seg->status, status_mask);
return 0;
}
}
2003-05-06 16:22:24 +04:00
/* Convert the segments */
dm_list_iterate_items(seg, &lv->segments) {
if (!new_mirrors && seg->extents_copied == seg->area_len) {
if (!move_lv_segment_area(seg, 0, seg, 1))
return_0;
2003-05-06 16:22:24 +04:00
}
for (s = new_mirrors + 1; s < seg->area_count; s++)
if (!release_and_discard_lv_segment_area(seg, s, seg->area_len))
return_0;
seg->area_count = new_mirrors + 1;
2003-05-06 16:22:24 +04:00
if (!new_mirrors)
seg->segtype = get_segtype_from_string(lv->vg->cmd,
"striped");
}
2003-05-06 16:22:24 +04:00
return 1;
}
const char *get_pvmove_pvname_from_lv_mirr(const struct logical_volume *lv_mirr)
2003-05-06 16:22:24 +04:00
{
struct lv_segment *seg;
dm_list_iterate_items(seg, &lv_mirr->segments) {
if (!seg_is_mirrored(seg))
2003-05-06 16:22:24 +04:00
continue;
if (seg_type(seg, 0) == AREA_PV)
return dev_name(seg_dev(seg, 0));
if (seg_type(seg, 0) == AREA_LV)
return dev_name(seg_dev(first_seg(seg_lv(seg, 0)), 0));
2003-05-06 16:22:24 +04:00
}
return NULL;
}
/*
* Find first pvmove LV referenced by a segment of an LV.
*/
const struct logical_volume *find_pvmove_lv_in_lv(const struct logical_volume *lv)
2003-05-06 16:22:24 +04:00
{
const struct lv_segment *seg;
2003-05-06 16:22:24 +04:00
uint32_t s;
dm_list_iterate_items(seg, &lv->segments) {
2003-05-06 16:22:24 +04:00
for (s = 0; s < seg->area_count; s++) {
2005-06-01 20:51:55 +04:00
if (seg_type(seg, s) != AREA_LV)
2003-05-06 16:22:24 +04:00
continue;
if (lv_is_pvmove(seg_lv(seg, s)))
return seg_lv(seg, s);
2003-05-06 16:22:24 +04:00
}
}
return NULL;
}
const char *get_pvmove_pvname_from_lv(const struct logical_volume *lv)
{
const struct logical_volume *pvmove_lv;
pvmove_lv = find_pvmove_lv_in_lv(lv);
if (pvmove_lv)
return get_pvmove_pvname_from_lv_mirr(pvmove_lv);
else
return NULL;
}
2003-05-06 16:22:24 +04:00
struct logical_volume *find_pvmove_lv(struct volume_group *vg,
2004-05-05 21:56:20 +04:00
struct device *dev,
uint64_t lv_type)
2003-05-06 16:22:24 +04:00
{
2005-06-01 20:51:55 +04:00
struct lv_list *lvl;
2003-05-06 16:22:24 +04:00
struct logical_volume *lv;
struct lv_segment *seg;
/* Loop through all LVs */
dm_list_iterate_items(lvl, &vg->lvs) {
2005-06-01 20:51:55 +04:00
lv = lvl->lv;
2003-05-06 16:22:24 +04:00
2004-05-05 21:56:20 +04:00
if (!(lv->status & lv_type))
2003-05-06 16:22:24 +04:00
continue;
/*
* If this is an atomic pvmove, the first
* segment will be a mirror containing
* mimages (i.e. AREA_LVs)
*/
if (seg_type(first_seg(lv), 0) == AREA_LV) {
seg = first_seg(lv); /* the mirror segment */
seg = first_seg(seg_lv(seg, 0)); /* mimage_0 segment0 */
if (seg_dev(seg, 0) != dev)
continue;
return lv;
}
/*
* If this is a normal pvmove, check all the segments'
* first areas for the requested device
*/
dm_list_iterate_items(seg, &lv->segments) {
if (seg_type(seg, 0) != AREA_PV)
2003-05-06 16:22:24 +04:00
continue;
2005-06-01 20:51:55 +04:00
if (seg_dev(seg, 0) != dev)
2003-05-06 16:22:24 +04:00
continue;
2003-05-06 16:22:24 +04:00
return lv;
}
}
return NULL;
}
2004-05-05 21:56:20 +04:00
struct logical_volume *find_pvmove_lv_from_pvname(struct cmd_context *cmd,
struct volume_group *vg,
const char *name,
const char *uuid __attribute__((unused)),
uint64_t lv_type)
2004-05-05 21:56:20 +04:00
{
struct physical_volume *pv;
struct logical_volume *lv;
2004-05-05 21:56:20 +04:00
if (!(pv = find_pv_by_name(cmd, name, 0, 0)))
2008-01-30 16:19:47 +03:00
return_NULL;
2004-05-05 21:56:20 +04:00
lv = find_pvmove_lv(vg, pv->dev, lv_type);
free_pv_fid(pv);
return lv;
2004-05-05 21:56:20 +04:00
}
struct dm_list *lvs_using_lv(struct cmd_context *cmd, struct volume_group *vg,
2003-05-06 16:22:24 +04:00
struct logical_volume *lv)
{
struct dm_list *lvs;
2003-05-06 16:22:24 +04:00
struct logical_volume *lv1;
2005-06-01 20:51:55 +04:00
struct lv_list *lvl, *lvl1;
2003-05-06 16:22:24 +04:00
struct lv_segment *seg;
uint32_t s;
if (!(lvs = dm_pool_alloc(cmd->mem, sizeof(*lvs)))) {
2003-05-06 16:22:24 +04:00
log_error("lvs list alloc failed");
return NULL;
}
dm_list_init(lvs);
2003-05-06 16:22:24 +04:00
/* Loop through all LVs except the one supplied */
dm_list_iterate_items(lvl1, &vg->lvs) {
2005-06-01 20:51:55 +04:00
lv1 = lvl1->lv;
2003-05-06 16:22:24 +04:00
if (lv1 == lv)
continue;
2004-05-05 22:35:04 +04:00
/* Find whether any segment points at the supplied LV */
dm_list_iterate_items(seg, &lv1->segments) {
2003-05-06 16:22:24 +04:00
for (s = 0; s < seg->area_count; s++) {
2005-06-01 20:51:55 +04:00
if (seg_type(seg, s) != AREA_LV ||
seg_lv(seg, s) != lv)
2003-05-06 16:22:24 +04:00
continue;
if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
2003-05-06 16:22:24 +04:00
log_error("lv_list alloc failed");
return NULL;
}
lvl->lv = lv1;
dm_list_add(lvs, &lvl->list);
2003-05-06 16:22:24 +04:00
goto next_lv;
}
}
next_lv:
;
2003-05-06 16:22:24 +04:00
}
return lvs;
}
2005-10-28 01:51:28 +04:00
/*
* Fixup mirror pointers after single-pass segment import
*/
int fixup_imported_mirrors(struct volume_group *vg)
{
struct lv_list *lvl;
struct lv_segment *seg;
dm_list_iterate_items(lvl, &vg->lvs) {
dm_list_iterate_items(seg, &lvl->lv->segments) {
2005-10-28 01:51:28 +04:00
if (seg->segtype !=
get_segtype_from_string(vg->cmd, "mirror"))
continue;
if (seg->log_lv && !add_seg_to_segs_using_this_lv(seg->log_lv, seg))
return_0;
2005-10-28 01:51:28 +04:00
}
}
2005-10-28 16:48:50 +04:00
return 1;
2005-10-28 01:51:28 +04:00
}
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
static int _add_mirrors_that_preserve_segments(struct logical_volume *lv,
uint32_t flags,
uint32_t mirrors,
uint32_t region_size,
struct dm_list *allocatable_pvs,
alloc_policy_t alloc)
{
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
struct cmd_context *cmd = lv->vg->cmd;
struct alloc_handle *ah;
const struct segment_type *segtype;
struct dm_list *parallel_areas;
uint32_t adjusted_region_size;
int r = 1;
if (!(parallel_areas = build_parallel_areas_from_lv(lv, 1, 0)))
return_0;
if (!(segtype = get_segtype_from_string(cmd, "mirror")))
return_0;
adjusted_region_size = adjusted_mirror_region_size(lv->vg->extent_size,
lv->le_count,
region_size, 1,
vg_is_clustered(lv->vg));
if (!(ah = allocate_extents(lv->vg, NULL, segtype, 1, mirrors, 0, 0,
lv->le_count, allocatable_pvs, alloc, 0,
parallel_areas))) {
log_error("Unable to allocate mirror extents for %s.", lv->name);
return 0;
}
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
if (flags & MIRROR_BY_SEG) {
if (!lv_add_mirror_areas(ah, lv, 0, adjusted_region_size)) {
log_error("Failed to add mirror areas to %s", lv->name);
r = 0;
}
} else if (flags & MIRROR_BY_SEGMENTED_LV) {
if (!lv_add_segmented_mirror_image(ah, lv, 0,
adjusted_region_size)) {
log_error("Failed to add mirror areas to %s", lv->name);
r = 0;
}
} else {
log_error(INTERNAL_ERROR "Unknown mirror flag");
r = 0;
}
alloc_destroy(ah);
return r;
}
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
/*
* Add mirrors to "linear" or "mirror" segments
*/
int add_mirrors_to_segments(struct cmd_context *cmd, struct logical_volume *lv,
uint32_t mirrors, uint32_t region_size,
struct dm_list *allocatable_pvs, alloc_policy_t alloc)
{
return _add_mirrors_that_preserve_segments(lv, MIRROR_BY_SEG,
mirrors, region_size,
allocatable_pvs, alloc);
}
/*
* Convert mirror log
*
* FIXME: Can't handle segment-by-segment mirror (like pvmove)
*/
int remove_mirror_log(struct cmd_context *cmd,
struct logical_volume *lv,
struct dm_list *removable_pvs,
int force)
{
dm_percent_t sync_percent;
struct volume_group *vg = lv->vg;
/* Unimplemented features */
if (dm_list_size(&lv->segments) != 1) {
log_error("Multiple-segment mirror is not supported");
return 0;
}
/* Had disk log, switch to core. */
if (lv_is_active_locally(lv)) {
if (!lv_mirror_percent(cmd, lv, 0, &sync_percent,
NULL)) {
log_error("Unable to determine mirror sync status.");
return 0;
}
} else if (lv_is_active(lv)) {
log_error("Unable to determine sync status of"
" remotely active mirror, %s", lv->name);
return 0;
} else if (vg_is_clustered(vg)) {
log_error("Unable to convert the log of an inactive "
"cluster mirror, %s", lv->name);
return 0;
} else if (force || yes_no_prompt("Full resync required to convert "
"inactive mirror %s to core log. "
"Proceed? [y/n]: ", lv->name) == 'y')
sync_percent = 0;
else {
log_error("Logical volume %s NOT converted.", lv->name);
return 0;
}
if (sync_percent == DM_PERCENT_100)
init_mirror_in_sync(1);
else {
/* A full resync will take place */
lv->status &= ~LV_NOTSYNCED;
init_mirror_in_sync(0);
}
2007-12-20 21:55:46 +03:00
if (!remove_mirror_images(lv, lv_mirror_count(lv),
is_mirror_image_removable, removable_pvs, 1U))
return_0;
return 1;
}
2007-12-21 01:37:42 +03:00
static struct logical_volume *_create_mirror_log(struct logical_volume *lv,
struct alloc_handle *ah,
alloc_policy_t alloc,
const char *lv_name,
const char *suffix)
{
struct logical_volume *log_lv;
char *log_name;
size_t len;
len = strlen(lv_name) + 32;
if (!(log_name = alloca(len))) {
log_error("log_name allocation failed.");
return NULL;
}
if (dm_snprintf(log_name, len, "%s%s", lv_name, suffix) < 0) {
log_error("log_name allocation failed.");
return NULL;
}
if (!(log_lv = lv_create_empty(log_name, NULL,
VISIBLE_LV | LVM_READ | LVM_WRITE,
alloc, lv->vg)))
return_NULL;
if (!lv_add_log_segment(ah, 0, log_lv, MIRROR_LOG))
return_NULL;
return log_lv;
}
/*
* Returns: 1 on success, 0 on error
*/
static int _form_mirror(struct cmd_context *cmd, struct alloc_handle *ah,
struct logical_volume *lv,
uint32_t mirrors, uint32_t stripes,
uint32_t stripe_size, uint32_t region_size, int log)
{
struct logical_volume **img_lvs;
/*
* insert a mirror layer
*/
if (dm_list_size(&lv->segments) != 1 ||
seg_type(first_seg(lv), 0) != AREA_LV)
if (!insert_layer_for_lv(cmd, lv, 0, "_mimage_%d"))
return 0;
/*
* create mirror image LVs
*/
if (!(img_lvs = alloca(sizeof(*img_lvs) * mirrors))) {
log_error("img_lvs allocation failed. "
"Remove new LV and retry.");
return 0;
}
if (!_create_mimage_lvs(ah, mirrors, stripes, stripe_size, lv, img_lvs, log))
2015-05-07 13:37:15 +03:00
return_0;
if (!lv_add_mirror_lvs(lv, img_lvs, mirrors,
MIRROR_IMAGE | (lv->status & LOCKED),
region_size)) {
log_error("Aborting. Failed to add mirror segment. "
"Remove new LV and retry.");
return 0;
}
return 1;
}
static struct logical_volume *_set_up_mirror_log(struct cmd_context *cmd,
struct alloc_handle *ah,
struct logical_volume *lv,
uint32_t log_count,
uint32_t region_size,
alloc_policy_t alloc,
int in_sync)
{
struct logical_volume *log_lv;
const char *suffix, *lv_name;
char *tmp_name;
size_t len;
2007-12-22 15:13:29 +03:00
struct lv_segment *seg;
init_mirror_in_sync(in_sync);
/* Mirror log name is lv_name + suffix, determined as the following:
* 1. suffix is:
* o "_mlog" for the original mirror LV.
* o "_mlogtmp_%d" for temporary mirror LV,
* 2. lv_name is:
* o lv->name, if the log is temporary
* o otherwise, the top-level LV name
*/
2007-12-22 15:13:29 +03:00
seg = first_seg(lv);
if (seg_type(seg, 0) == AREA_LV &&
strstr(seg_lv(seg, 0)->name, MIRROR_SYNC_LAYER)) {
lv_name = lv->name;
2007-12-22 15:13:29 +03:00
suffix = "_mlogtmp_%d";
} else if ((lv_name = strstr(lv->name, MIRROR_SYNC_LAYER))) {
len = lv_name - lv->name;
tmp_name = alloca(len + 1);
tmp_name[len] = '\0';
lv_name = strncpy(tmp_name, lv->name, len);
suffix = "_mlog";
} else {
lv_name = lv->name;
2007-12-22 15:13:29 +03:00
suffix = "_mlog";
}
2007-12-22 15:13:29 +03:00
if (!(log_lv = _create_mirror_log(lv, ah, alloc, lv_name, suffix))) {
log_error("Failed to create mirror log.");
return NULL;
}
if ((log_count > 1) &&
!_form_mirror(cmd, ah, log_lv, log_count-1, 1, 0, region_size, 2)) {
log_error("Failed to form mirrored log.");
return NULL;
}
2008-01-17 16:37:51 +03:00
if (!_init_mirror_log(cmd, log_lv, in_sync, &lv->tags, 1)) {
log_error("Failed to initialise mirror log.");
return NULL;
}
return log_lv;
}
2008-01-26 03:25:04 +03:00
int attach_mirror_log(struct lv_segment *seg, struct logical_volume *log_lv)
{
2008-01-26 03:25:04 +03:00
seg->log_lv = log_lv;
log_lv->status |= MIRROR_LOG;
2009-05-21 07:04:52 +04:00
lv_set_hidden(log_lv);
2008-01-26 03:25:04 +03:00
return add_seg_to_segs_using_this_lv(log_lv, seg);
}
int add_mirror_log(struct cmd_context *cmd, struct logical_volume *lv,
uint32_t log_count, uint32_t region_size,
struct dm_list *allocatable_pvs, alloc_policy_t alloc)
{
struct alloc_handle *ah;
const struct segment_type *segtype;
struct dm_list *parallel_areas;
dm_percent_t sync_percent;
int in_sync;
struct logical_volume *log_lv;
unsigned old_log_count;
int r = 0;
if (vg_is_clustered(lv->vg) && (log_count > 1)) {
log_error("Log type, \"mirrored\", is unavailable to cluster mirrors");
return 0;
}
if (dm_list_size(&lv->segments) != 1) {
log_error("Multiple-segment mirror is not supported");
return 0;
}
if (lv_is_active_but_not_locally(lv)) {
log_error("Unable to convert the log of a mirror, %s, that is "
"active remotely but not locally", lv->name);
return 0;
}
log_lv = first_seg(lv)->log_lv;
old_log_count = (log_lv) ? lv_mirror_count(log_lv) : 0;
if (old_log_count == log_count) {
log_verbose("Mirror already has a %s log",
!log_count ? "core" :
(log_count == 1) ? "disk" : "mirrored");
return 1;
}
if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0, 0)))
return_0;
if (!(segtype = get_segtype_from_string(cmd, "mirror")))
return_0;
if (activation() && segtype->ops->target_present &&
2009-02-28 23:04:24 +03:00
!segtype->ops->target_present(cmd, NULL, NULL)) {
log_error("%s: Required device-mapper target(s) not "
"detected in your kernel", segtype->name);
return 0;
}
/* allocate destination extents */
ah = allocate_extents(lv->vg, NULL, segtype,
0, 0, log_count - old_log_count, region_size,
lv->le_count, allocatable_pvs,
alloc, 0, parallel_areas);
if (!ah) {
2008-01-16 22:50:23 +03:00
log_error("Unable to allocate extents for mirror log.");
return 0;
}
if (old_log_count) {
/* Converting from disk to mirrored log */
if (!_form_mirror(cmd, ah, log_lv, log_count - 1, 1, 0,
region_size, 1)) {
log_error("Failed to convert mirror log");
return 0;
}
r = 1;
goto out;
}
/* check sync status */
if (mirror_in_sync() ||
(lv_mirror_percent(cmd, lv, 0, &sync_percent, NULL) &&
(sync_percent == DM_PERCENT_100)))
in_sync = 1;
else
in_sync = 0;
if (!(log_lv = _set_up_mirror_log(cmd, ah, lv, log_count,
region_size, alloc, in_sync)))
goto_out;
2008-01-26 03:25:04 +03:00
if (!attach_mirror_log(first_seg(lv), log_lv))
goto_out;
r = 1;
out:
alloc_destroy(ah);
return r;
}
/*
* Convert "linear" LV to "mirror".
*/
int add_mirror_images(struct cmd_context *cmd, struct logical_volume *lv,
uint32_t mirrors, uint32_t stripes,
uint32_t stripe_size, uint32_t region_size,
struct dm_list *allocatable_pvs, alloc_policy_t alloc,
uint32_t log_count)
{
struct alloc_handle *ah;
const struct segment_type *segtype;
struct dm_list *parallel_areas;
struct logical_volume *log_lv = NULL;
/*
* allocate destination extents
*/
if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0, 0)))
return_0;
if (!(segtype = get_segtype_from_string(cmd, "mirror")))
return_0;
ah = allocate_extents(lv->vg, NULL, segtype,
stripes, mirrors, log_count, region_size, lv->le_count,
allocatable_pvs, alloc, 0, parallel_areas);
if (!ah) {
log_error("Unable to allocate extents for mirror(s).");
return 0;
}
/*
* create and initialize mirror log
*/
if (log_count &&
!(log_lv = _set_up_mirror_log(cmd, ah, lv, log_count,
(region_size > lv->vg->extent_size) ?
lv->vg->extent_size : region_size,
alloc, mirror_in_sync()))) {
stack;
2009-05-21 07:04:52 +04:00
goto out_remove_images;
}
/* The log initialization involves vg metadata commit.
So from here on, if failure occurs, the log must be explicitly
removed and the updated vg metadata should be committed. */
if (!_form_mirror(cmd, ah, lv, mirrors, stripes, stripe_size, region_size, 0))
goto out_remove_log;
2008-01-26 03:25:04 +03:00
if (log_count && !attach_mirror_log(first_seg(lv), log_lv))
stack;
alloc_destroy(ah);
return 1;
out_remove_log:
if (log_lv) {
if (!lv_remove(log_lv) ||
!vg_write(log_lv->vg) ||
!vg_commit(log_lv->vg))
log_error("Manual intervention may be required to remove "
"abandoned log LV before retrying.");
else
backup(log_lv->vg);
}
2009-05-21 07:04:52 +04:00
out_remove_images:
alloc_destroy(ah);
return 0;
}
/*
* Generic interface for adding mirror and/or mirror log.
* 'mirror' is the number of mirrors to be added.
* 'pvs' is either allocatable pvs.
*/
int lv_add_mirrors(struct cmd_context *cmd, struct logical_volume *lv,
uint32_t mirrors, uint32_t stripes, uint32_t stripe_size,
uint32_t region_size, uint32_t log_count,
struct dm_list *pvs, alloc_policy_t alloc, uint32_t flags)
{
if (!mirrors && !log_count) {
log_error("No conversion is requested");
return 0;
}
if (vg_is_clustered(lv->vg)) {
/* FIXME: move this test out of this function */
/* Skip test for pvmove mirrors, it can use local mirror */
if (!lv_is_pvmove(lv) && !lv_is_locked(lv) &&
lv_is_active(lv) &&
!lv_is_active_exclusive_locally(lv) && /* lv_is_active_remotely */
!cluster_mirror_is_available(lv->vg->cmd)) {
log_error("Shared cluster mirrors are not available.");
return 0;
}
/*
* No mirrored logs for cluster mirrors until
* log daemon is multi-threaded.
*/
if (log_count > 1) {
log_error("Log type, \"mirrored\", is unavailable to cluster mirrors");
return 0;
}
}
/* For corelog mirror, activation code depends on
* the global mirror_in_sync status. As we are adding
* a new mirror, it should be set as 'out-of-sync'
* so that the sync starts. */
/* However, MIRROR_SKIP_INIT_SYNC even overrides it. */
if (flags & MIRROR_SKIP_INIT_SYNC)
init_mirror_in_sync(1);
else if (!log_count)
init_mirror_in_sync(0);
if (flags & MIRROR_BY_SEG) {
if (log_count) {
log_error("Persistent log is not supported on "
"segment-by-segment mirroring");
return 0;
}
if (stripes > 1) {
log_error("Striped-mirroring is not supported on "
"segment-by-segment mirroring");
return 0;
}
pvmove: Enable all-or-nothing (atomic) pvmoves pvmove can be used to move single LVs by name or multiple LVs that lie within the specified PV range (e.g. /dev/sdb1:0-1000). When moving more than one LV, the portions of those LVs that are in the range to be moved are added to a new temporary pvmove LV. The LVs then point to the range in the pvmove LV, rather than the PV range. Example 1: We have two LVs in this example. After they were created, the first LV was grown, yeilding two segments in LV1. So, there are two LVs with a total of three segments. Before pvmove: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- After pvmove inserts the temporary pvmove LV: --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | ------------------------------------- PV | 000 - 255 | 256 - 511 | 512 - 767 | ------------------------------------- Each of the affected LV segments now point to a range of blocks in the pvmove LV, which purposefully corresponds to the segments moved from the original LVs into the temporary pvmove LV. The current implementation goes on from here to mirror the temporary pvmove LV by segment. Further, as the pvmove LV is activated, only one of its segments is actually mirrored (i.e. "moving") at a time. The rest are either complete or not addressed yet. If the pvmove is aborted, those segments that are completed will remain on the destination and those that are not yet addressed or in the process of moving will stay on the source PV. Thus, it is possible to have a partially completed move - some LVs (or certain segments of LVs) on the source PV and some on the destination. Example 2: What 'example 1' might look if it was half-way through the move. --------- --------- --------- | LV1s0 | | LV2s0 | | LV1s1 | --------- --------- --------- | | | ------------------------------------- pvmove0 | seg 0 | seg 1 | seg 2 | ------------------------------------- | | | | ------------------------- source PV | | 256 - 511 | 512 - 767 | | ------------------------- | || ------------------------- dest PV | 000 - 255 | 256 - 511 | ------------------------- This update allows the user to specify that they would like the pvmove mirror created "by LV" rather than "by segment". That is, the pvmove LV becomes an image in an encapsulating mirror along with the allocated copy image. Example 3: A pvmove that is performed "by LV" rather than "by segment". --------- --------- | LV1s0 | | LV2s0 | --------- --------- | | ------------------------- pvmove0 | * LV-level mirror * | ------------------------- / \ pvmove_mimage0 / pvmove_mimage1 ------------------------- ------------------------- | seg 0 | seg 1 | | seg 0 | seg 1 | ------------------------- ------------------------- | | | | ------------------------- ------------------------- | 000 - 255 | 256 - 511 | | 000 - 255 | 256 - 511 | ------------------------- ------------------------- source PV dest PV The thing that differentiates a pvmove done in this way and a simple "up-convert" from linear to mirror is the preservation of the distinct segments. A normal up-convert would simply allocate the necessary space with no regard for segment boundaries. The pvmove operation must preserve the segments because they are the critical boundary between the segments of the LVs being moved. So, when the pvmove copy image is allocated, all corresponding segments must be allocated. The code that merges ajoining segments that are part of the same LV when the metadata is written must also be avoided in this case. This method of mirroring is unique enough to warrant its own definitional macro, MIRROR_BY_SEGMENTED_LV. This joins the two existing macros: MIRROR_BY_SEG (for original pvmove) and MIRROR_BY_LV (for user created mirrors). The advantages of performing pvmove in this way is that all of the LVs affected can be moved together. It is an all-or-nothing approach that leaves all LV segments on the source PV if the move is aborted. Additionally, a mirror log can be used (in the future) to provide tracking of progress; allowing the copy to continue where it left off in the event there is a deactivation.
2014-06-18 07:59:36 +04:00
return _add_mirrors_that_preserve_segments(lv, MIRROR_BY_SEG,
mirrors, region_size,
pvs, alloc);
} else if (flags & MIRROR_BY_SEGMENTED_LV) {
if (stripes > 1) {
log_error("Striped-mirroring is not supported on "
"segment-by-segment mirroring");
return 0;
}
return _add_mirrors_that_preserve_segments(lv, MIRROR_BY_SEGMENTED_LV,
mirrors, region_size,
pvs, alloc);
} else if (flags & MIRROR_BY_LV) {
if (!mirrors)
return add_mirror_log(cmd, lv, log_count,
region_size, pvs, alloc);
return add_mirror_images(cmd, lv, mirrors,
stripes, stripe_size, region_size,
pvs, alloc, log_count);
}
log_error("Unsupported mirror conversion type");
return 0;
}
2010-01-09 01:00:31 +03:00
int lv_split_mirror_images(struct logical_volume *lv, const char *split_name,
uint32_t split_count, struct dm_list *removable_pvs)
{
int r;
if (find_lv_in_vg(lv->vg, split_name)) {
log_error("Logical Volume \"%s\" already exists in "
"volume group \"%s\"", split_name, lv->vg->name);
return 0;
}
2010-01-09 01:00:31 +03:00
/* Can't split a mirror that is not in-sync... unless force? */
if (!_mirrored_lv_in_sync(lv)) {
log_error("Unable to split mirror that is not in-sync.");
return 0;
2010-01-09 01:00:31 +03:00
}
/*
* FIXME: Generate default name when not supplied.
*
* If we were going to generate a default name, we would
* do it here. Better to wait for a decision on the form
* of the default name when '--track_deltas' (the ability
* to merge a split leg back in and only copy the changes)
* is being implemented. For now, we force the user to
* come up with a name for their LV.
*/
r = _split_mirror_images(lv, split_name, split_count, removable_pvs);
if (!r)
2014-05-20 14:53:51 +04:00
return_0;
2010-01-09 01:00:31 +03:00
return 1;
}
/*
* Generic interface for removing mirror and/or mirror log.
* 'mirror' is the number of mirrors to be removed.
* 'pvs' is removable pvs.
*/
int lv_remove_mirrors(struct cmd_context *cmd __attribute__((unused)),
2007-12-21 01:37:42 +03:00
struct logical_volume *lv,
uint32_t mirrors, uint32_t log_count,
int (*is_removable)(struct logical_volume *, void *),
void *removable_baton,
uint64_t status_mask)
{
uint32_t new_mirrors;
struct lv_segment *seg;
if (!mirrors && !log_count) {
log_error("No conversion is requested");
return 0;
}
seg = first_seg(lv);
if (!seg_is_mirrored(seg)) {
log_error("Not a mirror segment");
return 0;
}
2007-12-20 21:55:46 +03:00
if (lv_mirror_count(lv) <= mirrors) {
log_error("Removing more than existing: %d <= %d",
seg->area_count, mirrors);
return 0;
}
2007-12-20 21:55:46 +03:00
new_mirrors = lv_mirror_count(lv) - mirrors - 1;
/* MIRROR_BY_LV */
if (seg_type(seg, 0) == AREA_LV &&
lv_is_mirror_image(seg_lv(seg, 0)))
2007-12-20 21:55:46 +03:00
return remove_mirror_images(lv, new_mirrors + 1,
is_removable, removable_baton,
log_count ? 1U : 0);
/* MIRROR_BY_SEG */
if (log_count) {
log_error("Persistent log is not supported on "
"segment-by-segment mirroring");
return 0;
}
return remove_mirrors_from_segments(lv, new_mirrors, status_mask);
}
2014-11-08 03:28:38 +03:00
int set_mirror_log_count(int *log_count, const char *mirrorlog)
{
if (!strcmp("core", mirrorlog))
*log_count = MIRROR_LOG_CORE;
else if (!strcmp("disk", mirrorlog))
*log_count = MIRROR_LOG_DISK;
else if (!strcmp("mirrored", mirrorlog))
*log_count = MIRROR_LOG_MIRRORED;
else {
log_error("Mirror log type \"%s\" is unknown.", mirrorlog);
return 0;
}
return 1;
}
const char *get_mirror_log_name(int log_count)
{
switch (log_count) {
case MIRROR_LOG_CORE: return "core";
case MIRROR_LOG_DISK: return "disk";
case MIRROR_LOG_MIRRORED: return "mirrored";
default:
log_error(INTERNAL_ERROR "Unknown mirror log count %d.", log_count);
2014-11-08 03:28:38 +03:00
return NULL;
}
}