mirror of
git://sourceware.org/git/lvm2.git
synced 2025-02-06 01:58:01 +03:00
Add profilable configurable setting for vdo pool header size, that is used as 'extra' empty space at the front and end of vdo-pool device to avoid having a disk in the system the may have same data is real vdo LV. For some conversion cases however we may need to allow using '0' header size. TODO: in this case we may eventually avoid adding 'linear' mapping layer in future - but this requires further modification over lvm code base.
8944 lines
260 KiB
C
8944 lines
260 KiB
C
/*
|
|
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
|
|
* Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This file is part of LVM2.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "lib/misc/lib.h"
|
|
#include "lib/metadata/metadata.h"
|
|
#include "lib/locking/locking.h"
|
|
#include "pv_map.h"
|
|
#include "lib/misc/lvm-string.h"
|
|
#include "lib/commands/toolcontext.h"
|
|
#include "lib/metadata/lv_alloc.h"
|
|
#include "lib/metadata/pv_alloc.h"
|
|
#include "lib/display/display.h"
|
|
#include "lib/metadata/segtype.h"
|
|
#include "lib/format_text/archiver.h"
|
|
#include "lib/activate/activate.h"
|
|
#include "lib/datastruct/str_list.h"
|
|
#include "lib/config/defaults.h"
|
|
#include "lib/misc/lvm-exec.h"
|
|
#include "lib/mm/memlock.h"
|
|
#include "lib/locking/lvmlockd.h"
|
|
#include "lib/label/label.h"
|
|
#include "lib/misc/lvm-signal.h"
|
|
|
|
#ifdef HAVE_BLKZEROOUT
|
|
#include <sys/ioctl.h>
|
|
#include <linux/fs.h>
|
|
#endif
|
|
|
|
typedef enum {
|
|
PREFERRED,
|
|
USE_AREA,
|
|
NEXT_PV,
|
|
NEXT_AREA
|
|
} area_use_t;
|
|
|
|
/* FIXME: remove RAID_METADATA_AREA_LEN macro after defining 'raid_log_extents'*/
|
|
#define RAID_METADATA_AREA_LEN 1
|
|
|
|
/* FIXME These ended up getting used differently from first intended. Refactor. */
|
|
/* Only one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG, A_CLING_TO_ALLOCED may be set */
|
|
#define A_CONTIGUOUS_TO_LVSEG 0x01 /* Must be contiguous to an existing segment */
|
|
#define A_CLING_TO_LVSEG 0x02 /* Must use same disks as existing LV segment */
|
|
#define A_CLING_TO_ALLOCED 0x04 /* Must use same disks as already-allocated segment */
|
|
|
|
#define A_CLING_BY_TAGS 0x08 /* Must match tags against existing segment */
|
|
#define A_CAN_SPLIT 0x10
|
|
#define A_AREA_COUNT_MATCHES 0x20 /* Existing lvseg has same number of areas as new segment */
|
|
|
|
#define A_POSITIONAL_FILL 0x40 /* Slots are positional and filled using PREFERRED */
|
|
#define A_PARTITION_BY_TAGS 0x80 /* No allocated area may share any tag with any other */
|
|
|
|
/*
|
|
* Constant parameters during a single allocation attempt.
|
|
*/
|
|
struct alloc_parms {
|
|
alloc_policy_t alloc;
|
|
unsigned flags; /* Holds A_* */
|
|
struct lv_segment *prev_lvseg;
|
|
uint32_t extents_still_needed;
|
|
};
|
|
|
|
/*
|
|
* Holds varying state of each allocation attempt.
|
|
*/
|
|
struct alloc_state {
|
|
const struct alloc_parms *alloc_parms;
|
|
struct pv_area_used *areas;
|
|
uint32_t areas_size;
|
|
uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */
|
|
uint32_t allocated; /* Total number of extents allocated so far */
|
|
uint32_t num_positional_areas; /* Number of parallel allocations that must be contiguous/cling */
|
|
};
|
|
|
|
struct lv_names {
|
|
const char *old;
|
|
const char *new;
|
|
};
|
|
|
|
enum {
|
|
LV_TYPE_UNKNOWN,
|
|
LV_TYPE_NONE,
|
|
LV_TYPE_PUBLIC,
|
|
LV_TYPE_PRIVATE,
|
|
LV_TYPE_HISTORY,
|
|
LV_TYPE_LINEAR,
|
|
LV_TYPE_STRIPED,
|
|
LV_TYPE_MIRROR,
|
|
LV_TYPE_RAID,
|
|
LV_TYPE_THIN,
|
|
LV_TYPE_CACHE,
|
|
LV_TYPE_SPARSE,
|
|
LV_TYPE_ORIGIN,
|
|
LV_TYPE_THINORIGIN,
|
|
LV_TYPE_MULTITHINORIGIN,
|
|
LV_TYPE_THICKORIGIN,
|
|
LV_TYPE_MULTITHICKORIGIN,
|
|
LV_TYPE_CACHEORIGIN,
|
|
LV_TYPE_EXTTHINORIGIN,
|
|
LV_TYPE_MULTIEXTTHINORIGIN,
|
|
LV_TYPE_SNAPSHOT,
|
|
LV_TYPE_THINSNAPSHOT,
|
|
LV_TYPE_THICKSNAPSHOT,
|
|
LV_TYPE_PVMOVE,
|
|
LV_TYPE_IMAGE,
|
|
LV_TYPE_LOG,
|
|
LV_TYPE_METADATA,
|
|
LV_TYPE_POOL,
|
|
LV_TYPE_DATA,
|
|
LV_TYPE_SPARE,
|
|
LV_TYPE_VDO,
|
|
LV_TYPE_VIRTUAL,
|
|
LV_TYPE_RAID0,
|
|
LV_TYPE_RAID0_META,
|
|
LV_TYPE_RAID1,
|
|
LV_TYPE_RAID10,
|
|
LV_TYPE_RAID4,
|
|
LV_TYPE_RAID5,
|
|
LV_TYPE_RAID5_N,
|
|
LV_TYPE_RAID5_LA,
|
|
LV_TYPE_RAID5_RA,
|
|
LV_TYPE_RAID5_LS,
|
|
LV_TYPE_RAID5_RS,
|
|
LV_TYPE_RAID6,
|
|
LV_TYPE_RAID6_ZR,
|
|
LV_TYPE_RAID6_NR,
|
|
LV_TYPE_RAID6_NC,
|
|
LV_TYPE_LOCKD,
|
|
LV_TYPE_SANLOCK,
|
|
LV_TYPE_CACHEVOL,
|
|
LV_TYPE_WRITECACHE,
|
|
LV_TYPE_WRITECACHEORIGIN,
|
|
LV_TYPE_INTEGRITY,
|
|
LV_TYPE_INTEGRITYORIGIN
|
|
};
|
|
|
|
static const char *_lv_type_names[] = {
|
|
[LV_TYPE_UNKNOWN] = "unknown",
|
|
[LV_TYPE_NONE] = "none",
|
|
[LV_TYPE_PUBLIC] = "public",
|
|
[LV_TYPE_PRIVATE] = "private",
|
|
[LV_TYPE_HISTORY] = "history",
|
|
[LV_TYPE_LINEAR] = "linear",
|
|
[LV_TYPE_STRIPED] = "striped",
|
|
[LV_TYPE_MIRROR] = "mirror",
|
|
[LV_TYPE_RAID] = "raid",
|
|
[LV_TYPE_THIN] = "thin",
|
|
[LV_TYPE_CACHE] = "cache",
|
|
[LV_TYPE_SPARSE] = "sparse",
|
|
[LV_TYPE_ORIGIN] = "origin",
|
|
[LV_TYPE_THINORIGIN] = "thinorigin",
|
|
[LV_TYPE_MULTITHINORIGIN] = "multithinorigin",
|
|
[LV_TYPE_THICKORIGIN] = "thickorigin",
|
|
[LV_TYPE_MULTITHICKORIGIN] = "multithickorigin",
|
|
[LV_TYPE_CACHEORIGIN] = "cacheorigin",
|
|
[LV_TYPE_EXTTHINORIGIN] = "extthinorigin",
|
|
[LV_TYPE_MULTIEXTTHINORIGIN] = "multiextthinorigin",
|
|
[LV_TYPE_SNAPSHOT] = "snapshot",
|
|
[LV_TYPE_THINSNAPSHOT] = "thinsnapshot",
|
|
[LV_TYPE_THICKSNAPSHOT] = "thicksnapshot",
|
|
[LV_TYPE_PVMOVE] = "pvmove",
|
|
[LV_TYPE_IMAGE] = "image",
|
|
[LV_TYPE_LOG] = "log",
|
|
[LV_TYPE_METADATA] = "metadata",
|
|
[LV_TYPE_POOL] = "pool",
|
|
[LV_TYPE_DATA] = "data",
|
|
[LV_TYPE_SPARE] = "spare",
|
|
[LV_TYPE_VDO] = "vdo",
|
|
[LV_TYPE_VIRTUAL] = "virtual",
|
|
[LV_TYPE_RAID0] = SEG_TYPE_NAME_RAID0,
|
|
[LV_TYPE_RAID0_META] = SEG_TYPE_NAME_RAID0_META,
|
|
[LV_TYPE_RAID1] = SEG_TYPE_NAME_RAID1,
|
|
[LV_TYPE_RAID10] = SEG_TYPE_NAME_RAID10,
|
|
[LV_TYPE_RAID4] = SEG_TYPE_NAME_RAID4,
|
|
[LV_TYPE_RAID5] = SEG_TYPE_NAME_RAID5,
|
|
[LV_TYPE_RAID5_N] = SEG_TYPE_NAME_RAID5_N,
|
|
[LV_TYPE_RAID5_LA] = SEG_TYPE_NAME_RAID5_LA,
|
|
[LV_TYPE_RAID5_RA] = SEG_TYPE_NAME_RAID5_RA,
|
|
[LV_TYPE_RAID5_LS] = SEG_TYPE_NAME_RAID5_LS,
|
|
[LV_TYPE_RAID5_RS] = SEG_TYPE_NAME_RAID5_RS,
|
|
[LV_TYPE_RAID6] = SEG_TYPE_NAME_RAID6,
|
|
[LV_TYPE_RAID6_ZR] = SEG_TYPE_NAME_RAID6_ZR,
|
|
[LV_TYPE_RAID6_NR] = SEG_TYPE_NAME_RAID6_NR,
|
|
[LV_TYPE_RAID6_NC] = SEG_TYPE_NAME_RAID6_NC,
|
|
[LV_TYPE_LOCKD] = "lockd",
|
|
[LV_TYPE_SANLOCK] = "sanlock",
|
|
[LV_TYPE_CACHEVOL] = "cachevol",
|
|
[LV_TYPE_WRITECACHE] = "writecache",
|
|
[LV_TYPE_WRITECACHEORIGIN] = "writecacheorigin",
|
|
[LV_TYPE_INTEGRITY] = "integrity",
|
|
[LV_TYPE_INTEGRITYORIGIN] = "integrityorigin",
|
|
};
|
|
|
|
static int _lv_layout_and_role_mirror(struct dm_pool *mem,
|
|
const struct logical_volume *lv,
|
|
struct dm_list *layout,
|
|
struct dm_list *role,
|
|
int *public_lv)
|
|
{
|
|
int top_level = 0;
|
|
|
|
/* non-top-level LVs */
|
|
if (lv_is_mirror_image(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
|
|
goto_bad;
|
|
} else if (lv_is_mirror_log(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_LOG]))
|
|
goto_bad;
|
|
if (lv_is_mirrored(lv) &&
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
|
|
goto_bad;
|
|
} else if (lv_is_pvmove(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
|
|
goto_bad;
|
|
} else
|
|
top_level = 1;
|
|
|
|
|
|
if (!top_level) {
|
|
*public_lv = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* top-level LVs */
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
|
|
goto_bad;
|
|
|
|
return 1;
|
|
bad:
|
|
return 0;
|
|
}
|
|
|
|
static int _lv_layout_and_role_raid(struct dm_pool *mem,
|
|
const struct logical_volume *lv,
|
|
struct dm_list *layout,
|
|
struct dm_list *role,
|
|
int *public_lv)
|
|
{
|
|
int top_level = 0;
|
|
const struct segment_type *segtype;
|
|
|
|
/* non-top-level LVs */
|
|
if (lv_is_raid_image(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
|
|
goto_bad;
|
|
} else if (lv_is_raid_metadata(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
|
|
goto_bad;
|
|
} else if (lv_is_pvmove(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
|
|
goto_bad;
|
|
} else
|
|
top_level = 1;
|
|
|
|
if (!top_level) {
|
|
*public_lv = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* top-level LVs */
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
|
|
goto_bad;
|
|
|
|
segtype = first_seg(lv)->segtype;
|
|
|
|
if (segtype_is_raid0(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID0]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid1(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID1]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid10(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID10]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid4(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID4]))
|
|
goto_bad;
|
|
} else if (segtype_is_any_raid5(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5]))
|
|
goto_bad;
|
|
|
|
if (segtype_is_raid5_la(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LA]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid5_ra(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RA]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid5_ls(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LS]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid5_rs(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RS]))
|
|
goto_bad;
|
|
}
|
|
} else if (segtype_is_any_raid6(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6]))
|
|
goto_bad;
|
|
|
|
if (segtype_is_raid6_zr(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_ZR]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid6_nr(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NR]))
|
|
goto_bad;
|
|
} else if (segtype_is_raid6_nc(segtype)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NC]))
|
|
goto_bad;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
bad:
|
|
return 0;
|
|
}
|
|
|
|
static int _lv_layout_and_role_thin(struct dm_pool *mem,
|
|
const struct logical_volume *lv,
|
|
struct dm_list *layout,
|
|
struct dm_list *role,
|
|
int *public_lv)
|
|
{
|
|
int top_level = 0;
|
|
unsigned snap_count;
|
|
|
|
/* non-top-level LVs */
|
|
if (lv_is_thin_pool_metadata(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
|
|
goto_bad;
|
|
} else if (lv_is_thin_pool_data(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
|
|
goto_bad;
|
|
} else
|
|
top_level = 1;
|
|
|
|
if (!top_level) {
|
|
*public_lv = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* top-level LVs */
|
|
if (lv_is_thin_volume(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE]))
|
|
goto_bad;
|
|
if (lv_is_thin_origin(lv, &snap_count)) {
|
|
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINORIGIN]))
|
|
goto_bad;
|
|
if (snap_count > 1 &&
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHINORIGIN]))
|
|
goto_bad;
|
|
}
|
|
if (lv_is_thin_snapshot(lv))
|
|
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINSNAPSHOT]))
|
|
goto_bad;
|
|
} else if (lv_is_thin_pool(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
|
|
goto_bad;
|
|
*public_lv = 0;
|
|
}
|
|
|
|
if (lv_is_external_origin(lv)) {
|
|
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_EXTTHINORIGIN]))
|
|
goto_bad;
|
|
if (lv->external_count > 1 &&
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTIEXTTHINORIGIN]))
|
|
goto_bad;
|
|
}
|
|
|
|
return 1;
|
|
bad:
|
|
return 0;
|
|
}
|
|
|
|
static int _lv_layout_and_role_cache(struct dm_pool *mem,
|
|
const struct logical_volume *lv,
|
|
struct dm_list *layout,
|
|
struct dm_list *role,
|
|
int *public_lv)
|
|
{
|
|
int top_level = 0;
|
|
|
|
/* non-top-level LVs */
|
|
if (lv_is_cache_pool_metadata(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
|
|
goto_bad;
|
|
} else if (lv_is_cache_pool_data(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
|
|
goto_bad;
|
|
if (lv_is_cache(lv) &&
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
|
|
goto_bad;
|
|
} else if (lv_is_cache_origin(lv)) {
|
|
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
|
|
!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHEORIGIN]))
|
|
goto_bad;
|
|
if (lv_is_cache(lv) &&
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
|
|
goto_bad;
|
|
} else if (lv_is_writecache_origin(lv)) {
|
|
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_WRITECACHE]) ||
|
|
!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_WRITECACHEORIGIN]))
|
|
goto_bad;
|
|
if (lv_is_writecache(lv) &&
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]))
|
|
goto_bad;
|
|
} else
|
|
top_level = 1;
|
|
|
|
if (!top_level) {
|
|
*public_lv = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* top-level LVs */
|
|
if (lv_is_cache(lv) &&
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
|
|
goto_bad;
|
|
else if (lv_is_writecache(lv) &&
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]))
|
|
goto_bad;
|
|
else if (lv_is_writecache_cachevol(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHEVOL]))
|
|
goto_bad;
|
|
*public_lv = 0;
|
|
} else if (lv_is_cache_vol(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHEVOL]))
|
|
goto_bad;
|
|
*public_lv = 0;
|
|
} else if (lv_is_cache_pool(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
|
|
goto_bad;
|
|
*public_lv = 0;
|
|
}
|
|
|
|
return 1;
|
|
bad:
|
|
return 0;
|
|
}
|
|
|
|
static int _lv_layout_and_role_integrity(struct dm_pool *mem,
|
|
const struct logical_volume *lv,
|
|
struct dm_list *layout,
|
|
struct dm_list *role,
|
|
int *public_lv)
|
|
{
|
|
int top_level = 0;
|
|
|
|
/* non-top-level LVs */
|
|
if (lv_is_integrity_metadata(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITY]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
|
|
goto_bad;
|
|
} else if (lv_is_integrity_origin(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITY]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITYORIGIN]))
|
|
goto_bad;
|
|
} else
|
|
top_level = 1;
|
|
|
|
if (!top_level) {
|
|
*public_lv = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* top-level LVs */
|
|
if (lv_is_integrity(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_INTEGRITY]))
|
|
goto_bad;
|
|
}
|
|
|
|
return 1;
|
|
bad:
|
|
return 0;
|
|
}
|
|
|
|
static int _lv_layout_and_role_thick_origin_snapshot(struct dm_pool *mem,
|
|
const struct logical_volume *lv,
|
|
struct dm_list *layout,
|
|
struct dm_list *role,
|
|
int *public_lv)
|
|
{
|
|
if (lv_is_origin(lv)) {
|
|
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKORIGIN]))
|
|
goto_bad;
|
|
/*
|
|
* Thin volumes are also marked with virtual flag, but we don't show "virtual"
|
|
* layout for thin LVs as they have their own keyword for layout - "thin"!
|
|
* So rule thin LVs out here!
|
|
*/
|
|
if (lv_is_virtual(lv) && !lv_is_thin_volume(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VIRTUAL]))
|
|
goto_bad;
|
|
*public_lv = 0;
|
|
}
|
|
if (lv->origin_count > 1 &&
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHICKORIGIN]))
|
|
goto_bad;
|
|
} else if (lv_is_cow(lv)) {
|
|
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKSNAPSHOT]))
|
|
goto_bad;
|
|
}
|
|
|
|
return 1;
|
|
bad:
|
|
return 0;
|
|
}
|
|
|
|
static int _lv_layout_and_role_vdo(struct dm_pool *mem,
|
|
const struct logical_volume *lv,
|
|
struct dm_list *layout,
|
|
struct dm_list *role,
|
|
int *public_lv)
|
|
{
|
|
int top_level = 0;
|
|
|
|
/* non-top-level LVs */
|
|
if (lv_is_vdo_pool(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VDO]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
|
|
goto_bad;
|
|
} else if (lv_is_vdo_pool_data(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_VDO]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
|
|
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
|
|
goto_bad;
|
|
} else
|
|
top_level = 1;
|
|
|
|
if (!top_level) {
|
|
*public_lv = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* top-level LVs */
|
|
if (lv_is_vdo(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VDO]) ||
|
|
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE]))
|
|
goto_bad;
|
|
}
|
|
|
|
return 1;
|
|
bad:
|
|
return 0;
|
|
}
|
|
|
|
int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
|
|
struct dm_list **layout, struct dm_list **role) {
|
|
int linear, striped;
|
|
struct lv_segment *seg;
|
|
int public_lv = 1;
|
|
|
|
*layout = *role = NULL;
|
|
|
|
if (!(*layout = str_list_create(mem))) {
|
|
log_error("LV layout list allocation failed");
|
|
return 0;
|
|
}
|
|
|
|
if (!(*role = str_list_create(mem))) {
|
|
log_error("LV role list allocation failed");
|
|
goto bad;
|
|
}
|
|
|
|
if (lv_is_historical(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_NONE]) ||
|
|
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_HISTORY]))
|
|
goto_bad;
|
|
}
|
|
|
|
/* Mirrors and related */
|
|
if ((lv_is_mirror_type(lv) || lv_is_pvmove(lv)) &&
|
|
!_lv_layout_and_role_mirror(mem, lv, *layout, *role, &public_lv))
|
|
goto_bad;
|
|
|
|
/* RAIDs and related */
|
|
if (lv_is_raid_type(lv) &&
|
|
!_lv_layout_and_role_raid(mem, lv, *layout, *role, &public_lv))
|
|
goto_bad;
|
|
|
|
/* Thins and related */
|
|
if ((lv_is_thin_type(lv) || lv_is_external_origin(lv)) &&
|
|
!_lv_layout_and_role_thin(mem, lv, *layout, *role, &public_lv))
|
|
goto_bad;
|
|
|
|
/* Caches and related */
|
|
if ((lv_is_cache_type(lv) || lv_is_cache_origin(lv) || lv_is_writecache(lv) || lv_is_writecache_origin(lv)) &&
|
|
!_lv_layout_and_role_cache(mem, lv, *layout, *role, &public_lv))
|
|
goto_bad;
|
|
|
|
/* Integrity related */
|
|
if ((lv_is_integrity(lv) || lv_is_integrity_origin(lv) || lv_is_integrity_metadata(lv)) &&
|
|
!_lv_layout_and_role_integrity(mem, lv, *layout, *role, &public_lv))
|
|
goto_bad;
|
|
|
|
/* VDO and related */
|
|
if (lv_is_vdo_type(lv) &&
|
|
!_lv_layout_and_role_vdo(mem, lv, *layout, *role, &public_lv))
|
|
goto_bad;
|
|
|
|
/* Pool-specific */
|
|
if (lv_is_pool_metadata_spare(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_POOL]) ||
|
|
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SPARE]))
|
|
goto_bad;
|
|
public_lv = 0;
|
|
}
|
|
|
|
/* Old-style origins/snapshots, virtual origins */
|
|
if (!_lv_layout_and_role_thick_origin_snapshot(mem, lv, *layout, *role, &public_lv))
|
|
goto_bad;
|
|
|
|
if (lv_is_lockd_sanlock_lv(lv)) {
|
|
if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_LOCKD]) ||
|
|
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SANLOCK]))
|
|
goto_bad;
|
|
public_lv = 0;
|
|
}
|
|
|
|
/*
|
|
* If layout not yet determined, it must be either
|
|
* linear or striped or mixture of these two.
|
|
*/
|
|
if (dm_list_empty(*layout)) {
|
|
linear = striped = 0;
|
|
dm_list_iterate_items(seg, &lv->segments) {
|
|
if (seg_is_linear(seg))
|
|
linear = 1;
|
|
else if (seg_is_striped(seg))
|
|
striped = 1;
|
|
else {
|
|
/*
|
|
* This should not happen but if it does
|
|
* we'll see that there's "unknown" layout
|
|
* present. This means we forgot to detect
|
|
* the role above and we need add proper
|
|
* detection for such role!
|
|
*/
|
|
log_warn(INTERNAL_ERROR "WARNING: Failed to properly detect "
|
|
"layout and role for LV %s/%s.",
|
|
lv->vg->name, lv->name);
|
|
}
|
|
}
|
|
|
|
if (linear &&
|
|
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_LINEAR]))
|
|
goto_bad;
|
|
|
|
if (striped &&
|
|
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_STRIPED]))
|
|
goto_bad;
|
|
|
|
if (!linear && !striped &&
|
|
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_UNKNOWN]))
|
|
goto_bad;
|
|
}
|
|
|
|
/* finally, add either 'public' or 'private' role to the LV */
|
|
if (public_lv) {
|
|
if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PUBLIC]))
|
|
goto_bad;
|
|
} else {
|
|
if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PRIVATE]))
|
|
goto_bad;
|
|
}
|
|
|
|
return 1;
|
|
bad:
|
|
dm_pool_free(mem, *layout);
|
|
|
|
return 0;
|
|
}
|
|
struct dm_list_and_mempool {
|
|
struct dm_list *list;
|
|
struct dm_pool *mem;
|
|
};
|
|
static int _get_pv_list_for_lv(struct logical_volume *lv, void *data)
|
|
{
|
|
int dup_found;
|
|
uint32_t s;
|
|
struct pv_list *pvl;
|
|
struct lv_segment *seg;
|
|
struct dm_list *pvs = ((struct dm_list_and_mempool *)data)->list;
|
|
struct dm_pool *mem = ((struct dm_list_and_mempool *)data)->mem;
|
|
|
|
dm_list_iterate_items(seg, &lv->segments) {
|
|
for (s = 0; s < seg->area_count; s++) {
|
|
dup_found = 0;
|
|
|
|
if (seg_type(seg, s) != AREA_PV)
|
|
continue;
|
|
|
|
/* do not add duplicates */
|
|
dm_list_iterate_items(pvl, pvs)
|
|
if (pvl->pv == seg_pv(seg, s))
|
|
dup_found = 1;
|
|
|
|
if (dup_found)
|
|
continue;
|
|
|
|
if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
|
|
log_error("Failed to allocate memory");
|
|
return 0;
|
|
}
|
|
|
|
pvl->pv = seg_pv(seg, s);
|
|
log_debug_metadata(" %s/%s uses %s", lv->vg->name,
|
|
lv->name, pv_dev_name(pvl->pv));
|
|
|
|
dm_list_add(pvs, &pvl->list);
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* get_pv_list_for_lv
|
|
* @mem - mempool to allocate the list from.
|
|
* @lv
|
|
* @pvs - The list to add pv_list items to.
|
|
*
|
|
* 'pvs' is filled with 'pv_list' items for PVs that compose the LV.
|
|
* If the 'pvs' list already has items in it, duplicates will not be
|
|
* added. So, it is safe to repeatedly call this function for different
|
|
* LVs and build up a list of PVs for them all.
|
|
*
|
|
* Memory to create the list is obtained from the mempool provided.
|
|
*
|
|
* Returns: 1 on success, 0 on error
|
|
*/
|
|
int get_pv_list_for_lv(struct dm_pool *mem,
|
|
struct logical_volume *lv, struct dm_list *pvs)
|
|
{
|
|
struct dm_list_and_mempool context = { pvs, mem };
|
|
|
|
log_debug_metadata("Generating list of PVs that %s/%s uses:",
|
|
lv->vg->name, lv->name);
|
|
|
|
if (!_get_pv_list_for_lv(lv, &context))
|
|
return_0;
|
|
|
|
return for_each_sub_lv(lv, &_get_pv_list_for_lv, &context);
|
|
}
|
|
|
|
/*
|
|
* get_default_region_size
|
|
* @cmd
|
|
*
|
|
* 'mirror_region_size' and 'raid_region_size' are effectively the same thing.
|
|
* However, "raid" is more inclusive than "mirror", so the name has been
|
|
* changed. This function checks for the old setting and warns the user if
|
|
* it is being overridden by the new setting (i.e. warn if both settings are
|
|
* present).
|
|
*
|
|
* Note that the config files give defaults in kiB terms, but we
|
|
* return the value in terms of sectors.
|
|
*
|
|
* Returns: default region_size in sectors
|
|
*/
|
|
static int _get_default_region_size(struct cmd_context *cmd)
|
|
{
|
|
int mrs, rrs;
|
|
|
|
/*
|
|
* 'mirror_region_size' is the old setting. It is overridden
|
|
* by the new setting, 'raid_region_size'.
|
|
*/
|
|
mrs = 2 * find_config_tree_int(cmd, activation_mirror_region_size_CFG, NULL);
|
|
rrs = 2 * find_config_tree_int(cmd, activation_raid_region_size_CFG, NULL);
|
|
|
|
if (!mrs && !rrs)
|
|
return DEFAULT_RAID_REGION_SIZE * 2;
|
|
|
|
if (!mrs)
|
|
return rrs;
|
|
|
|
if (!rrs)
|
|
return mrs;
|
|
|
|
if (mrs != rrs)
|
|
log_verbose("Overriding default 'mirror_region_size' setting"
|
|
" with 'raid_region_size' setting of %u kiB",
|
|
rrs / 2);
|
|
|
|
return rrs;
|
|
}
|
|
|
|
static int _round_down_pow2(int r)
|
|
{
|
|
/* Set all bits to the right of the leftmost set bit */
|
|
r |= (r >> 1);
|
|
r |= (r >> 2);
|
|
r |= (r >> 4);
|
|
r |= (r >> 8);
|
|
r |= (r >> 16);
|
|
|
|
/* Pull out the leftmost set bit */
|
|
return r & ~(r >> 1);
|
|
}
|
|
|
|
uint32_t get_default_region_size(struct cmd_context *cmd)
|
|
{
|
|
int pagesize = lvm_getpagesize();
|
|
int region_size = _get_default_region_size(cmd);
|
|
|
|
if (!is_power_of_2(region_size)) {
|
|
region_size = _round_down_pow2(region_size);
|
|
log_verbose("Reducing region size to %u kiB (power of 2).",
|
|
region_size / 2);
|
|
}
|
|
|
|
if (region_size % (pagesize >> SECTOR_SHIFT)) {
|
|
region_size = DEFAULT_RAID_REGION_SIZE * 2;
|
|
log_verbose("Using default region size %u kiB (multiple of page size).",
|
|
region_size / 2);
|
|
}
|
|
|
|
return (uint32_t) region_size;
|
|
}
|
|
|
|
int add_seg_to_segs_using_this_lv(struct logical_volume *lv,
|
|
struct lv_segment *seg)
|
|
{
|
|
struct seg_list *sl;
|
|
|
|
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
|
|
if (sl->seg == seg) {
|
|
sl->count++;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
log_very_verbose("Adding %s:" FMTu32 " as an user of %s.",
|
|
display_lvname(seg->lv), seg->le, display_lvname(lv));
|
|
|
|
if (!(sl = dm_pool_zalloc(lv->vg->vgmem, sizeof(*sl)))) {
|
|
log_error("Failed to allocate segment list.");
|
|
return 0;
|
|
}
|
|
|
|
sl->count = 1;
|
|
sl->seg = seg;
|
|
dm_list_add(&lv->segs_using_this_lv, &sl->list);
|
|
|
|
return 1;
|
|
}
|
|
|
|
int remove_seg_from_segs_using_this_lv(struct logical_volume *lv,
|
|
struct lv_segment *seg)
|
|
{
|
|
struct seg_list *sl;
|
|
|
|
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
|
|
if (sl->seg != seg)
|
|
continue;
|
|
if (sl->count > 1)
|
|
sl->count--;
|
|
else {
|
|
log_very_verbose("%s:" FMTu32 " is no longer a user of %s.",
|
|
display_lvname(seg->lv), seg->le,
|
|
display_lvname(lv));
|
|
dm_list_del(&sl->list);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
log_error(INTERNAL_ERROR "Segment %s:" FMTu32 " is not a user of %s.",
|
|
display_lvname(seg->lv), seg->le, display_lvname(lv));
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This is a function specialized for the common case where there is
|
|
* only one segment which uses the LV.
|
|
* e.g. the LV is a layer inserted by insert_layer_for_lv().
|
|
*
|
|
* In general, walk through lv->segs_using_this_lv.
|
|
*/
|
|
struct lv_segment *get_only_segment_using_this_lv(const struct logical_volume *lv)
|
|
{
|
|
struct seg_list *sl;
|
|
|
|
if (!lv) {
|
|
log_error(INTERNAL_ERROR "get_only_segment_using_this_lv() called with NULL LV.");
|
|
return NULL;
|
|
}
|
|
|
|
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
|
|
/* Needs to be he only item in list */
|
|
if (!dm_list_end(&lv->segs_using_this_lv, &sl->list))
|
|
break;
|
|
|
|
if (sl->count != 1) {
|
|
log_error("%s is expected to have only one segment using it, "
|
|
"while %s:" FMTu32 " uses it %d times.",
|
|
display_lvname(lv), display_lvname(sl->seg->lv),
|
|
sl->seg->le, sl->count);
|
|
return NULL;
|
|
}
|
|
|
|
return sl->seg;
|
|
}
|
|
|
|
log_error("%s is expected to have only one segment using it, while it has %d.",
|
|
display_lvname(lv), dm_list_size(&lv->segs_using_this_lv));
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* PVs used by a segment of an LV
|
|
*/
|
|
struct seg_pvs {
|
|
struct dm_list list;
|
|
|
|
struct dm_list pvs; /* struct pv_list */
|
|
|
|
uint32_t le;
|
|
uint32_t len;
|
|
};
|
|
|
|
static struct seg_pvs *_find_seg_pvs_by_le(struct dm_list *list, uint32_t le)
|
|
{
|
|
struct seg_pvs *spvs;
|
|
|
|
dm_list_iterate_items(spvs, list)
|
|
if (le >= spvs->le && le < spvs->le + spvs->len)
|
|
return spvs;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Find first unused LV number.
|
|
*/
|
|
uint32_t find_free_lvnum(struct logical_volume *lv)
|
|
{
|
|
int lvnum_used[MAX_RESTRICTED_LVS + 1] = { 0 };
|
|
uint32_t i = 0;
|
|
struct lv_list *lvl;
|
|
int lvnum;
|
|
|
|
dm_list_iterate_items(lvl, &lv->vg->lvs) {
|
|
lvnum = lvnum_from_lvid(&lvl->lv->lvid);
|
|
if (lvnum <= MAX_RESTRICTED_LVS)
|
|
lvnum_used[lvnum] = 1;
|
|
}
|
|
|
|
while (lvnum_used[i])
|
|
i++;
|
|
|
|
/* FIXME What if none are free? */
|
|
|
|
return i;
|
|
}
|
|
|
|
dm_percent_t copy_percent(const struct logical_volume *lv)
|
|
{
|
|
uint32_t numerator = 0u, denominator = 0u;
|
|
struct lv_segment *seg;
|
|
|
|
dm_list_iterate_items(seg, &lv->segments) {
|
|
denominator += seg->area_len;
|
|
|
|
/* FIXME Generalise name of 'extents_copied' field */
|
|
if (((seg_is_raid(seg) && !seg_is_any_raid0(seg)) || seg_is_mirrored(seg)) &&
|
|
(seg->area_count > 1))
|
|
numerator += seg->extents_copied;
|
|
else
|
|
numerator += seg->area_len;
|
|
}
|
|
|
|
return denominator ? dm_make_percent(numerator, denominator) : DM_PERCENT_100;
|
|
}
|
|
|
|
/* Round up extents to next stripe boundary for number of stripes */
|
|
static uint32_t _round_to_stripe_boundary(struct volume_group *vg, uint32_t extents,
|
|
uint32_t stripes, int extend)
|
|
{
|
|
uint32_t size_rest, new_extents = extents;
|
|
|
|
if (!stripes)
|
|
return extents;
|
|
|
|
/* Round up extents to stripe divisible amount */
|
|
if ((size_rest = extents % stripes)) {
|
|
new_extents += extend ? stripes - size_rest : -size_rest;
|
|
log_print_unless_silent("Rounding size %s (%u extents) %s to stripe boundary size %s (%u extents).",
|
|
display_size(vg->cmd, (uint64_t) extents * vg->extent_size), extents,
|
|
new_extents < extents ? "down" : "up",
|
|
display_size(vg->cmd, (uint64_t) new_extents * vg->extent_size), new_extents);
|
|
}
|
|
|
|
return new_extents;
|
|
}
|
|
|
|
/*
|
|
* All lv_segments get created here.
|
|
*/
|
|
struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
|
|
struct logical_volume *lv,
|
|
uint32_t le, uint32_t len,
|
|
uint32_t reshape_len,
|
|
uint64_t status,
|
|
uint32_t stripe_size,
|
|
struct logical_volume *log_lv,
|
|
uint32_t area_count,
|
|
uint32_t area_len,
|
|
uint32_t data_copies,
|
|
uint32_t chunk_size,
|
|
uint32_t region_size,
|
|
uint32_t extents_copied,
|
|
struct lv_segment *pvmove_source_seg)
|
|
{
|
|
struct lv_segment *seg;
|
|
struct dm_pool *mem = lv->vg->vgmem;
|
|
uint32_t areas_sz = area_count * sizeof(*seg->areas);
|
|
|
|
if (!segtype) {
|
|
log_error(INTERNAL_ERROR "alloc_lv_segment: Missing segtype.");
|
|
return NULL;
|
|
}
|
|
|
|
if (!(seg = dm_pool_zalloc(mem, sizeof(*seg))))
|
|
return_NULL;
|
|
|
|
if (!(seg->areas = dm_pool_zalloc(mem, areas_sz))) {
|
|
dm_pool_free(mem, seg);
|
|
return_NULL;
|
|
}
|
|
|
|
if (segtype_is_raid_with_meta(segtype) &&
|
|
!(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
|
|
dm_pool_free(mem, seg); /* frees everything alloced since seg */
|
|
return_NULL;
|
|
}
|
|
|
|
seg->segtype = segtype;
|
|
seg->lv = lv;
|
|
seg->le = le;
|
|
seg->len = len;
|
|
seg->reshape_len = reshape_len;
|
|
seg->status = status;
|
|
seg->stripe_size = stripe_size;
|
|
seg->area_count = area_count;
|
|
seg->area_len = area_len;
|
|
seg->data_copies = data_copies ? : lv_raid_data_copies(segtype, area_count);
|
|
seg->chunk_size = chunk_size;
|
|
seg->region_size = region_size;
|
|
seg->extents_copied = extents_copied;
|
|
seg->pvmove_source_seg = pvmove_source_seg;
|
|
dm_list_init(&seg->tags);
|
|
dm_list_init(&seg->origin_list);
|
|
dm_list_init(&seg->thin_messages);
|
|
|
|
if (log_lv && !attach_mirror_log(seg, log_lv))
|
|
return_NULL;
|
|
|
|
if (segtype_is_mirror(segtype))
|
|
lv->status |= MIRROR;
|
|
|
|
if (segtype_is_mirrored(segtype))
|
|
lv->status |= MIRRORED;
|
|
|
|
return seg;
|
|
}
|
|
|
|
/*
|
|
* Temporary helper to return number of data copies for
|
|
* RAID segment @seg until seg->data_copies got added
|
|
*/
|
|
static uint32_t _raid_data_copies(struct lv_segment *seg)
|
|
{
|
|
/*
|
|
* FIXME: needs to change once more than 2 are supported.
|
|
* I.e. use seg->data_copies then
|
|
*/
|
|
if (seg_is_raid10(seg))
|
|
return 2;
|
|
|
|
if (seg_is_raid1(seg))
|
|
return seg->area_count;
|
|
|
|
return seg->segtype->parity_devs + 1;
|
|
}
|
|
|
|
/* Data image count for RAID segment @seg */
|
|
static uint32_t _raid_stripes_count(struct lv_segment *seg)
|
|
{
|
|
/*
|
|
* FIXME: raid10 needs to change once more than
|
|
* 2 data_copies and odd # of legs supported.
|
|
*/
|
|
if (seg_is_raid10(seg))
|
|
return seg->area_count / _raid_data_copies(seg);
|
|
|
|
return seg->area_count - seg->segtype->parity_devs;
|
|
}
|
|
|
|
static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s,
|
|
uint32_t area_reduction, int with_discard)
|
|
{
|
|
struct lv_segment *cache_seg;
|
|
struct logical_volume *lv = seg_lv(seg, s);
|
|
|
|
if (seg_type(seg, s) == AREA_UNASSIGNED)
|
|
return 1;
|
|
|
|
if (seg_type(seg, s) == AREA_PV) {
|
|
if (with_discard && !discard_pv_segment(seg_pvseg(seg, s), area_reduction))
|
|
return_0;
|
|
|
|
if (!release_pv_segment(seg_pvseg(seg, s), area_reduction))
|
|
return_0;
|
|
|
|
if (seg->area_len == area_reduction)
|
|
seg_type(seg, s) = AREA_UNASSIGNED;
|
|
|
|
return 1;
|
|
}
|
|
|
|
if (lv_is_mirror_image(lv) ||
|
|
lv_is_thin_pool_data(lv) ||
|
|
lv_is_vdo_pool_data(lv) ||
|
|
lv_is_cache_pool_data(lv)) {
|
|
if (!lv_reduce(lv, area_reduction))
|
|
return_0; /* FIXME: any upper level reporting */
|
|
return 1;
|
|
}
|
|
|
|
if (seg_is_cache_pool(seg) &&
|
|
!dm_list_empty(&seg->lv->segs_using_this_lv)) {
|
|
if (!(cache_seg = get_only_segment_using_this_lv(seg->lv)))
|
|
return_0;
|
|
|
|
if (!lv_cache_remove(cache_seg->lv))
|
|
return_0;
|
|
}
|
|
|
|
if (lv_is_raid_image(lv)) {
|
|
/* Calculate the amount of extents to reduce per rmeta/rimage LV */
|
|
uint32_t rimage_extents;
|
|
struct lv_segment *seg1 = first_seg(lv);
|
|
|
|
/* FIXME: avoid extra seg_is_*() conditionals here */
|
|
rimage_extents = raid_rimage_extents(seg1->segtype, area_reduction,
|
|
seg_is_any_raid0(seg) ? 0 : _raid_stripes_count(seg),
|
|
seg_is_raid10(seg) ? 1 :_raid_data_copies(seg));
|
|
if (!rimage_extents)
|
|
return 0;
|
|
|
|
if (seg->meta_areas) {
|
|
uint32_t meta_area_reduction;
|
|
struct logical_volume *mlv;
|
|
struct volume_group *vg = lv->vg;
|
|
|
|
if (seg_metatype(seg, s) != AREA_LV ||
|
|
!(mlv = seg_metalv(seg, s)))
|
|
return 0;
|
|
|
|
meta_area_reduction = raid_rmeta_extents_delta(vg->cmd, lv->le_count, lv->le_count - rimage_extents,
|
|
seg->region_size, vg->extent_size);
|
|
/* Limit for raid0_meta not having region size set */
|
|
if (meta_area_reduction > mlv->le_count ||
|
|
!(lv->le_count - rimage_extents))
|
|
meta_area_reduction = mlv->le_count;
|
|
|
|
if (meta_area_reduction &&
|
|
!lv_reduce(mlv, meta_area_reduction))
|
|
return_0; /* FIXME: any upper level reporting */
|
|
}
|
|
|
|
if (!lv_reduce(lv, rimage_extents))
|
|
return_0; /* FIXME: any upper level reporting */
|
|
|
|
return 1;
|
|
}
|
|
|
|
if (area_reduction == seg->area_len) {
|
|
log_very_verbose("Remove %s:" FMTu32 "[" FMTu32 "] from "
|
|
"the top of LV %s:" FMTu32 ".",
|
|
display_lvname(seg->lv), seg->le, s,
|
|
display_lvname(lv), seg_le(seg, s));
|
|
|
|
if (!remove_seg_from_segs_using_this_lv(lv, seg))
|
|
return_0;
|
|
|
|
seg_lv(seg, s) = NULL;
|
|
seg_le(seg, s) = 0;
|
|
seg_type(seg, s) = AREA_UNASSIGNED;
|
|
}
|
|
|
|
/* When removed last VDO user automatically removes VDO pool */
|
|
if (lv_is_vdo_pool(lv) && dm_list_empty(&(lv->segs_using_this_lv))) {
|
|
struct volume_group *vg = lv->vg;
|
|
|
|
if (!lv_remove(lv)) /* FIXME: any upper level reporting */
|
|
return_0;
|
|
|
|
if (vg_is_shared(vg)) {
|
|
if (!lockd_lv_name(vg->cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args, "un", LDLV_PERSISTENT))
|
|
log_error("Failed to unlock vdo pool in lvmlockd.");
|
|
lockd_free_lv(vg->cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
int release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction)
|
|
{
|
|
return _release_and_discard_lv_segment_area(seg, s, area_reduction, 1);
|
|
}
|
|
|
|
int release_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction)
|
|
{
|
|
return _release_and_discard_lv_segment_area(seg, s, area_reduction, 0);
|
|
}
|
|
|
|
/*
|
|
* Move a segment area from one segment to another
|
|
*/
|
|
int move_lv_segment_area(struct lv_segment *seg_to, uint32_t area_to,
|
|
struct lv_segment *seg_from, uint32_t area_from)
|
|
{
|
|
struct physical_volume *pv;
|
|
struct logical_volume *lv;
|
|
uint32_t pe, le;
|
|
|
|
switch (seg_type(seg_from, area_from)) {
|
|
case AREA_PV:
|
|
pv = seg_pv(seg_from, area_from);
|
|
pe = seg_pe(seg_from, area_from);
|
|
|
|
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
|
|
return_0;
|
|
|
|
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
|
|
return_0;
|
|
|
|
if (!set_lv_segment_area_pv(seg_to, area_to, pv, pe))
|
|
return_0;
|
|
|
|
break;
|
|
|
|
case AREA_LV:
|
|
lv = seg_lv(seg_from, area_from);
|
|
le = seg_le(seg_from, area_from);
|
|
|
|
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
|
|
return_0;
|
|
|
|
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
|
|
return_0;
|
|
|
|
if (!set_lv_segment_area_lv(seg_to, area_to, lv, le, 0))
|
|
return_0;
|
|
|
|
break;
|
|
|
|
case AREA_UNASSIGNED:
|
|
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
|
|
return_0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Link part of a PV to an LV segment.
|
|
*/
|
|
int set_lv_segment_area_pv(struct lv_segment *seg, uint32_t area_num,
|
|
struct physical_volume *pv, uint32_t pe)
|
|
{
|
|
seg->areas[area_num].type = AREA_PV;
|
|
|
|
if (!(seg_pvseg(seg, area_num) =
|
|
assign_peg_to_lvseg(pv, pe, seg->area_len, seg, area_num)))
|
|
return_0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Link one LV segment to another. Assumes sizes already match.
|
|
*/
|
|
int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
|
|
struct logical_volume *lv, uint32_t le,
|
|
uint64_t status)
|
|
{
|
|
log_very_verbose("Stack %s:" FMTu32 "[" FMTu32 "] on LV %s:" FMTu32 ".",
|
|
display_lvname(seg->lv), seg->le, area_num,
|
|
display_lvname(lv), le);
|
|
|
|
if (area_num >= seg->area_count) {
|
|
log_error(INTERNAL_ERROR "Try to set to high area number (%u >= %u) for LV %s.",
|
|
area_num, seg->area_count, display_lvname(seg->lv));
|
|
return 0;
|
|
}
|
|
lv->status |= status;
|
|
if (lv_is_raid_metadata(lv)) {
|
|
seg->meta_areas[area_num].type = AREA_LV;
|
|
seg_metalv(seg, area_num) = lv;
|
|
if (le) {
|
|
log_error(INTERNAL_ERROR "Meta le != 0.");
|
|
return 0;
|
|
}
|
|
seg_metale(seg, area_num) = 0;
|
|
} else {
|
|
seg->areas[area_num].type = AREA_LV;
|
|
seg_lv(seg, area_num) = lv;
|
|
seg_le(seg, area_num) = le;
|
|
}
|
|
|
|
if (!add_seg_to_segs_using_this_lv(lv, seg))
|
|
return_0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Prepare for adding parallel areas to an existing segment.
|
|
*/
|
|
int add_lv_segment_areas(struct lv_segment *seg, uint32_t new_area_count)
|
|
{
|
|
struct lv_segment_area *newareas;
|
|
uint32_t areas_sz = new_area_count * sizeof(*newareas);
|
|
|
|
if (!(newareas = dm_pool_zalloc(seg->lv->vg->vgmem, areas_sz))) {
|
|
log_error("Failed to allocate widened LV segment for %s.",
|
|
display_lvname(seg->lv));
|
|
return 0;
|
|
}
|
|
|
|
if (seg->area_count)
|
|
memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas));
|
|
|
|
seg->areas = newareas;
|
|
seg->area_count = new_area_count;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static uint32_t _calc_area_multiple(const struct segment_type *segtype,
|
|
const uint32_t area_count,
|
|
const uint32_t stripes)
|
|
{
|
|
if (!area_count)
|
|
return 1;
|
|
|
|
/* Striped */
|
|
if (segtype_is_striped(segtype))
|
|
return area_count;
|
|
|
|
/* Parity RAID (e.g. RAID 4/5/6) */
|
|
if (segtype_is_raid(segtype) && segtype->parity_devs) {
|
|
/*
|
|
* As articulated in _alloc_init, we can tell by
|
|
* the area_count whether a replacement drive is
|
|
* being allocated; and if this is the case, then
|
|
* there is no area_multiple that should be used.
|
|
*/
|
|
if (area_count <= segtype->parity_devs)
|
|
return 1;
|
|
|
|
return area_count - segtype->parity_devs;
|
|
}
|
|
|
|
/*
|
|
* RAID10 - only has 2-way mirror right now.
|
|
* If we are to move beyond 2-way RAID10, then
|
|
* the 'stripes' argument will always need to
|
|
* be given.
|
|
*/
|
|
if (segtype_is_raid10(segtype)) {
|
|
if (!stripes)
|
|
return area_count / 2;
|
|
return stripes;
|
|
}
|
|
|
|
/* Mirrored stripes */
|
|
if (stripes)
|
|
return stripes;
|
|
|
|
/* Mirrored */
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Reduce the size of an lv_segment. New size can be zero.
|
|
*/
|
|
static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
|
|
{
|
|
uint32_t area_reduction, s;
|
|
uint32_t areas = (seg->area_count / (seg_is_raid10(seg) ? seg->data_copies : 1)) - seg->segtype->parity_devs;
|
|
|
|
/* Caller must ensure exact divisibility */
|
|
if (seg_is_striped(seg) || seg_is_striped_raid(seg)) {
|
|
if (reduction % areas) {
|
|
log_error("Segment extent reduction %" PRIu32
|
|
" not divisible by #stripes %" PRIu32,
|
|
reduction, seg->area_count);
|
|
return 0;
|
|
}
|
|
area_reduction = reduction / areas;
|
|
} else
|
|
area_reduction = reduction;
|
|
|
|
for (s = 0; s < seg->area_count; s++)
|
|
if (!release_and_discard_lv_segment_area(seg, s, area_reduction))
|
|
return_0;
|
|
|
|
seg->len -= reduction;
|
|
|
|
if (seg_is_raid(seg))
|
|
seg->area_len = seg->len;
|
|
else
|
|
seg->area_len -= area_reduction;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* Find the bottommost resizable LV in the stack.
|
|
* It does not matter which LV is used in this stack for cmdline tool. */
|
|
static struct logical_volume *_get_resizable_layer_lv(struct logical_volume *lv)
|
|
{
|
|
while (lv_is_cache(lv) || /* _corig */
|
|
lv_is_integrity(lv) ||
|
|
lv_is_thin_pool(lv) || /* _tdata */
|
|
lv_is_vdo_pool(lv) || /* _vdata */
|
|
lv_is_writecache(lv)) /* _worigin */
|
|
lv = seg_lv(first_seg(lv), 0); /* component-level down */
|
|
|
|
return lv;
|
|
}
|
|
|
|
/* Check if LV is component of resizable LV.
|
|
* When resize changes size of LV this also changes the size whole stack upward.
|
|
* Support syntax suggar - so user can pick any LV in stack for resize. */
|
|
static int _is_layered_lv(struct logical_volume *lv)
|
|
{
|
|
return (lv_is_cache_origin(lv) ||
|
|
lv_is_integrity_origin(lv) ||
|
|
lv_is_thin_pool_data(lv) ||
|
|
lv_is_vdo_pool_data(lv) ||
|
|
lv_is_writecache_origin(lv));
|
|
}
|
|
|
|
/* Find the topmost LV in the stack - usually such LV is visible. */
|
|
static struct logical_volume *_get_top_layer_lv(struct logical_volume *lv)
|
|
{
|
|
struct lv_segment *seg;
|
|
|
|
while (_is_layered_lv(lv)) {
|
|
if (!(seg = get_only_segment_using_this_lv(lv))) {
|
|
log_error(INTERNAL_ERROR "No single component user of logical volume %s.",
|
|
display_lvname(lv));
|
|
return NULL;
|
|
}
|
|
lv = seg->lv; /* component-level up */
|
|
}
|
|
|
|
return lv;
|
|
}
|
|
|
|
|
|
/* Handles also stacking */
|
|
static int _setup_lv_size(struct logical_volume *lv, uint32_t extents)
|
|
{
|
|
struct lv_segment *seg;
|
|
|
|
lv->le_count = extents;
|
|
lv->size = (uint64_t) extents * lv->vg->extent_size;
|
|
|
|
while (lv->size && _is_layered_lv(lv)) {
|
|
if (!(seg = get_only_segment_using_this_lv(lv)))
|
|
return_0;
|
|
|
|
seg->lv->le_count =
|
|
seg->len =
|
|
seg->area_len = lv->le_count;
|
|
seg->lv->size = lv->size;
|
|
lv = seg->lv;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Entry point for all LV reductions in size.
|
|
*/
|
|
static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
|
|
{
|
|
struct lv_segment *seg = NULL;
|
|
uint32_t count = extents;
|
|
uint32_t reduction;
|
|
struct logical_volume *pool_lv;
|
|
struct logical_volume *external_lv = NULL;
|
|
int is_raid10 = 0;
|
|
uint32_t data_copies = 0;
|
|
struct lv_list *lvl;
|
|
int is_last_pool = lv_is_pool(lv);
|
|
|
|
if (!dm_list_empty(&lv->segments)) {
|
|
seg = first_seg(lv);
|
|
is_raid10 = seg_is_any_raid10(seg) && seg->reshape_len;
|
|
data_copies = seg->data_copies;
|
|
}
|
|
|
|
if (lv_is_merging_origin(lv)) {
|
|
log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.",
|
|
find_snapshot(lv)->lv->name, lv->name);
|
|
clear_snapshot_merge(lv);
|
|
}
|
|
|
|
dm_list_iterate_back_items(seg, &lv->segments) {
|
|
if (!count)
|
|
break;
|
|
|
|
if (seg->external_lv)
|
|
external_lv = seg->external_lv;
|
|
|
|
if (seg->len <= count) {
|
|
if (seg->merge_lv) {
|
|
log_debug_metadata("Dropping snapshot merge of removed %s to origin %s.",
|
|
seg->lv->name, seg->merge_lv->name);
|
|
clear_snapshot_merge(seg->merge_lv);
|
|
}
|
|
|
|
/* remove this segment completely */
|
|
/* FIXME Check this is safe */
|
|
if (seg->log_lv && !lv_remove(seg->log_lv))
|
|
return_0;
|
|
|
|
if (seg->metadata_lv && !lv_remove(seg->metadata_lv))
|
|
return_0;
|
|
|
|
/* Remove cache origin only when removing (not on lv_empty()) */
|
|
if (delete && seg_is_cache(seg)) {
|
|
if (lv_is_pending_delete(seg->lv)) {
|
|
/* Just dropping reference on origin when pending delete */
|
|
if (!remove_seg_from_segs_using_this_lv(seg_lv(seg, 0), seg))
|
|
return_0;
|
|
seg_lv(seg, 0) = NULL;
|
|
seg_le(seg, 0) = 0;
|
|
seg_type(seg, 0) = AREA_UNASSIGNED;
|
|
if (seg->pool_lv && !detach_pool_lv(seg))
|
|
return_0;
|
|
} else if (!lv_remove(seg_lv(seg, 0)))
|
|
return_0;
|
|
}
|
|
|
|
if (delete && seg_is_integrity(seg)) {
|
|
/* Remove integrity origin in addition to integrity layer. */
|
|
if (!lv_remove(seg_lv(seg, 0)))
|
|
return_0;
|
|
/* Remove integrity metadata. */
|
|
if (seg->integrity_meta_dev && !lv_remove(seg->integrity_meta_dev))
|
|
return_0;
|
|
}
|
|
|
|
if ((pool_lv = seg->pool_lv)) {
|
|
if (!detach_pool_lv(seg))
|
|
return_0;
|
|
/* When removing cached LV, remove pool as well */
|
|
if (seg_is_cache(seg) && !lv_remove(pool_lv))
|
|
return_0;
|
|
}
|
|
|
|
if (seg_is_thin_pool(seg)) {
|
|
/* For some segtypes the size may differ between the segment size and its layered LV
|
|
* i.e. thin-pool and tdata.
|
|
*
|
|
* This can get useful, when we will support multiple commits
|
|
* while resizing a stacked LV.
|
|
*/
|
|
if (seg->len != seg_lv(seg, 0)->le_count) {
|
|
seg->len = seg_lv(seg, 0)->le_count;
|
|
/* FIXME: ATM capture as error as it should not happen. */
|
|
log_debug(INTERNAL_ERROR "Pool size mismatched data size for %s",
|
|
display_lvname(seg->lv));
|
|
}
|
|
}
|
|
|
|
dm_list_del(&seg->list);
|
|
reduction = seg->len;
|
|
} else
|
|
reduction = count;
|
|
|
|
if (!_lv_segment_reduce(seg, reduction))
|
|
return_0;
|
|
count -= reduction;
|
|
}
|
|
|
|
if (!_setup_lv_size(lv, lv->le_count - extents * (is_raid10 ? data_copies : 1)))
|
|
return_0;
|
|
|
|
if ((seg = first_seg(lv))) {
|
|
if (is_raid10)
|
|
seg->len = seg->area_len = lv->le_count;
|
|
|
|
seg->extents_copied = seg->len;
|
|
}
|
|
|
|
if (!delete)
|
|
return 1;
|
|
|
|
if (lv == lv->vg->pool_metadata_spare_lv) {
|
|
lv->status &= ~POOL_METADATA_SPARE;
|
|
lv->vg->pool_metadata_spare_lv = NULL;
|
|
}
|
|
|
|
/* Remove the LV if it is now empty */
|
|
if (!lv->le_count && !unlink_lv_from_vg(lv))
|
|
return_0;
|
|
else if (lv->vg->fid->fmt->ops->lv_setup &&
|
|
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
|
|
return_0;
|
|
|
|
/* Removal of last user enforces refresh */
|
|
if (external_lv && !lv_is_external_origin(external_lv) &&
|
|
lv_is_active(external_lv) &&
|
|
!lv_update_and_reload(external_lv))
|
|
return_0;
|
|
|
|
/* When removing last pool, automatically drop the spare volume */
|
|
if (is_last_pool && lv->vg->pool_metadata_spare_lv) {
|
|
/* TODO: maybe use a list of pools or a counter to avoid linear search through VG */
|
|
dm_list_iterate_items(lvl, &lv->vg->lvs)
|
|
if (lv_is_thin_type(lvl->lv) ||
|
|
lv_is_cache_type(lvl->lv)) {
|
|
is_last_pool = 0;
|
|
break;
|
|
}
|
|
|
|
if (is_last_pool) {
|
|
/* This is purely internal LV volume, no question */
|
|
if (!deactivate_lv(lv->vg->cmd, lv->vg->pool_metadata_spare_lv)) {
|
|
log_error("Unable to deactivate spare logical volume %s.",
|
|
display_lvname(lv->vg->pool_metadata_spare_lv));
|
|
return 0;
|
|
}
|
|
if (!lv_remove(lv->vg->pool_metadata_spare_lv))
|
|
return_0;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Empty an LV.
|
|
*/
|
|
int lv_empty(struct logical_volume *lv)
|
|
{
|
|
return _lv_reduce(lv, lv->le_count, 0);
|
|
}
|
|
|
|
/*
|
|
* Empty an LV and add error segment.
|
|
*/
|
|
int replace_lv_with_error_segment(struct logical_volume *lv)
|
|
{
|
|
uint32_t len = lv->le_count;
|
|
|
|
if (len && !lv_empty(lv))
|
|
return_0;
|
|
|
|
/* Minimum size required for a table. */
|
|
if (!len)
|
|
len = 1;
|
|
|
|
/*
|
|
* Since we are replacing the whatever-was-there with
|
|
* an error segment, we should also clear any flags
|
|
* that suggest it is anything other than "error".
|
|
*/
|
|
/* FIXME Check for other flags that need removing */
|
|
lv->status &= ~(MIRROR|MIRRORED|PVMOVE|LOCKED);
|
|
|
|
/* FIXME Check for any attached LVs that will become orphans e.g. mirror logs */
|
|
|
|
if (!lv_add_virtual_segment(lv, 0, len, get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_ERROR)))
|
|
return_0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _lv_refresh_suspend_resume(const struct logical_volume *lv)
|
|
{
|
|
struct cmd_context *cmd = lv->vg->cmd;
|
|
int r = 1;
|
|
|
|
if (!cmd->partial_activation && lv_is_partial(lv)) {
|
|
log_error("Refusing refresh of partial LV %s."
|
|
" Use '--activationmode partial' to override.",
|
|
display_lvname(lv));
|
|
return 0;
|
|
}
|
|
|
|
if (!suspend_lv(cmd, lv)) {
|
|
log_error("Failed to suspend %s.", display_lvname(lv));
|
|
r = 0;
|
|
}
|
|
|
|
if (!resume_lv(cmd, lv)) {
|
|
log_error("Failed to reactivate %s.", display_lvname(lv));
|
|
r = 0;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
int lv_refresh_suspend_resume(const struct logical_volume *lv)
|
|
{
|
|
if (!_lv_refresh_suspend_resume(lv))
|
|
return 0;
|
|
|
|
/*
|
|
* Remove any transiently activated error
|
|
* devices which arean't used any more.
|
|
*/
|
|
if (lv_is_raid(lv) && !lv_deactivate_any_missing_subdevs(lv)) {
|
|
log_error("Failed to remove temporary SubLVs from %s", display_lvname(lv));
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Remove given number of extents from LV.
|
|
*/
|
|
int lv_reduce(struct logical_volume *lv, uint32_t extents)
|
|
{
|
|
struct lv_segment *seg = first_seg(lv);
|
|
|
|
/* Ensure stripe boundary extents on RAID LVs */
|
|
if (lv_is_raid(lv) && extents != lv->le_count)
|
|
extents =_round_to_stripe_boundary(lv->vg, extents,
|
|
seg_is_raid1(seg) ? 0 : _raid_stripes_count(seg), 0);
|
|
|
|
if ((extents == lv->le_count) && lv_is_component(lv) && lv_is_active(lv)) {
|
|
/* When LV is removed, make sure it is inactive */
|
|
log_error(INTERNAL_ERROR "Removing still active LV %s.", display_lvname(lv));
|
|
return 0;
|
|
}
|
|
|
|
return _lv_reduce(lv, extents, 1);
|
|
}
|
|
|
|
int historical_glv_remove(struct generic_logical_volume *glv)
|
|
{
|
|
struct generic_logical_volume *origin_glv;
|
|
struct glv_list *glvl, *user_glvl;
|
|
struct historical_logical_volume *hlv;
|
|
int reconnected;
|
|
|
|
if (!glv || !glv->is_historical)
|
|
return_0;
|
|
|
|
hlv = glv->historical;
|
|
|
|
if (!(glv = find_historical_glv(hlv->vg, hlv->name, 0, &glvl))) {
|
|
if (!(find_historical_glv(hlv->vg, hlv->name, 1, NULL))) {
|
|
log_error(INTERNAL_ERROR "historical_glv_remove: historical LV %s/-%s not found ",
|
|
hlv->vg->name, hlv->name);
|
|
return 0;
|
|
}
|
|
|
|
log_verbose("Historical LV %s/-%s already on removed list ",
|
|
hlv->vg->name, hlv->name);
|
|
return 1;
|
|
}
|
|
|
|
if ((origin_glv = hlv->indirect_origin) &&
|
|
!remove_glv_from_indirect_glvs(origin_glv, glv))
|
|
return_0;
|
|
|
|
dm_list_iterate_items(user_glvl, &hlv->indirect_glvs) {
|
|
reconnected = 0;
|
|
if ((origin_glv && !origin_glv->is_historical) && !user_glvl->glv->is_historical)
|
|
log_verbose("Removing historical connection between %s and %s.",
|
|
origin_glv->live->name, user_glvl->glv->live->name);
|
|
else if (hlv->vg->cmd->record_historical_lvs) {
|
|
if (!add_glv_to_indirect_glvs(hlv->vg->vgmem, origin_glv, user_glvl->glv))
|
|
return_0;
|
|
reconnected = 1;
|
|
}
|
|
|
|
if (!reconnected) {
|
|
/*
|
|
* Break ancestry chain if we're removing historical LV and tracking
|
|
* historical LVs is switched off either via:
|
|
* - "metadata/record_lvs_history=0" config
|
|
* - "--nohistory" cmd line option
|
|
*
|
|
* Also, break the chain if we're unable to store such connection at all
|
|
* because we're removing the very last historical LV that was in between
|
|
* live LVs - pure live LVs can't store any indirect origin relation in
|
|
* metadata - we need at least one historical LV to do that!
|
|
*/
|
|
if (user_glvl->glv->is_historical)
|
|
user_glvl->glv->historical->indirect_origin = NULL;
|
|
else
|
|
first_seg(user_glvl->glv->live)->indirect_origin = NULL;
|
|
}
|
|
}
|
|
|
|
dm_list_move(&hlv->vg->removed_historical_lvs, &glvl->list);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Completely remove an LV.
|
|
*/
|
|
int lv_remove(struct logical_volume *lv)
|
|
{
|
|
if (lv_is_historical(lv))
|
|
return historical_glv_remove(lv->this_glv);
|
|
|
|
if (!lv_reduce(lv, lv->le_count))
|
|
return_0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* A set of contiguous physical extents allocated
|
|
*/
|
|
struct alloced_area {
|
|
struct dm_list list;
|
|
|
|
struct physical_volume *pv;
|
|
uint32_t pe;
|
|
uint32_t len;
|
|
};
|
|
|
|
/*
|
|
* Details of an allocation attempt
|
|
*/
|
|
struct alloc_handle {
|
|
struct cmd_context *cmd;
|
|
struct dm_pool *mem;
|
|
|
|
alloc_policy_t alloc; /* Overall policy */
|
|
int approx_alloc; /* get as much as possible up to new_extents */
|
|
uint32_t new_extents; /* Number of new extents required */
|
|
uint32_t area_count; /* Number of parallel areas */
|
|
uint32_t parity_count; /* Adds to area_count, but not area_multiple */
|
|
uint32_t area_multiple; /* seg->len = area_len * area_multiple */
|
|
uint32_t log_area_count; /* Number of parallel logs */
|
|
uint32_t metadata_area_count; /* Number of parallel metadata areas */
|
|
uint32_t log_len; /* Length of log/metadata_area */
|
|
uint32_t region_size; /* Mirror region size */
|
|
uint32_t total_area_len; /* Total number of parallel extents */
|
|
|
|
unsigned maximise_cling;
|
|
unsigned mirror_logs_separate; /* Force mirror logs on separate PVs? */
|
|
|
|
/*
|
|
* RAID devices require a metadata area that accompanies each
|
|
* device. During initial creation, it is best to look for space
|
|
* that is new_extents + log_len and then split that between two
|
|
* allocated areas when found. 'alloc_and_split_meta' indicates
|
|
* that this is the desired dynamic.
|
|
*
|
|
* This same idea is used by cache LVs to get the metadata device
|
|
* and data device allocated together.
|
|
*/
|
|
unsigned alloc_and_split_meta;
|
|
unsigned split_metadata_is_allocated; /* Metadata has been allocated */
|
|
|
|
const struct dm_config_node *cling_tag_list_cn;
|
|
|
|
struct dm_list *parallel_areas; /* PVs to avoid */
|
|
|
|
/*
|
|
* Contains area_count lists of areas allocated to data stripes
|
|
* followed by log_area_count lists of areas allocated to log stripes.
|
|
*/
|
|
struct dm_list alloced_areas[];
|
|
};
|
|
|
|
/*
|
|
* Returns log device size in extents, algorithm from kernel code
|
|
*/
|
|
#define BYTE_SHIFT 3
|
|
static uint32_t _mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len)
|
|
{
|
|
uint64_t area_size, region_count, bitset_size, log_size;
|
|
|
|
area_size = (uint64_t) area_len * pe_size;
|
|
region_count = dm_div_up(area_size, region_size);
|
|
|
|
/* Work out how many "unsigned long"s we need to hold the bitset. */
|
|
bitset_size = dm_round_up(region_count, sizeof(uint32_t) << BYTE_SHIFT);
|
|
bitset_size >>= BYTE_SHIFT;
|
|
|
|
/* Log device holds both header and bitset. */
|
|
log_size = dm_round_up((MIRROR_LOG_OFFSET << SECTOR_SHIFT) + bitset_size, 1 << SECTOR_SHIFT);
|
|
log_size >>= SECTOR_SHIFT;
|
|
log_size = dm_div_up(log_size, pe_size);
|
|
|
|
if (log_size > UINT32_MAX) {
|
|
log_error("Log size needs too many extents "FMTu64" with region size of %u sectors.",
|
|
log_size, region_size);
|
|
log_size = UINT32_MAX;
|
|
/* VG likely will not have enough free space for this allocation -> error */
|
|
}
|
|
|
|
return (uint32_t) log_size;
|
|
}
|
|
|
|
/* Is there enough total space or should we give up immediately? */
|
|
static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms,
|
|
uint32_t allocated, uint32_t log_still_needed,
|
|
uint32_t extents_still_needed)
|
|
{
|
|
uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
|
|
uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple;
|
|
uint32_t metadata_extents_needed = (ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * RAID_METADATA_AREA_LEN) +
|
|
(log_still_needed ? ah->log_len : 0); /* One each */
|
|
uint64_t total_extents_needed = (uint64_t)area_extents_needed + parity_extents_needed + metadata_extents_needed;
|
|
uint32_t free_pes = pv_maps_size(pvms);
|
|
|
|
if (total_extents_needed > free_pes) {
|
|
log_error("Insufficient free space: %" PRIu64 " extents needed,"
|
|
" but only %" PRIu32 " available",
|
|
total_extents_needed, free_pes);
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* For striped mirrors, all the areas are counted, through the mirror layer */
|
|
static uint32_t _stripes_per_mimage(struct lv_segment *seg)
|
|
{
|
|
struct lv_segment *last_lvseg;
|
|
|
|
if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) {
|
|
last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment);
|
|
if (seg_is_striped(last_lvseg))
|
|
return last_lvseg->area_count;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void _init_alloc_parms(struct alloc_handle *ah,
|
|
struct alloc_parms *alloc_parms,
|
|
alloc_policy_t alloc,
|
|
struct lv_segment *prev_lvseg, unsigned can_split,
|
|
uint32_t allocated, uint32_t extents_still_needed)
|
|
{
|
|
alloc_parms->alloc = alloc;
|
|
alloc_parms->prev_lvseg = prev_lvseg;
|
|
alloc_parms->flags = 0;
|
|
alloc_parms->extents_still_needed = extents_still_needed;
|
|
|
|
/*
|
|
* Only attempt contiguous/cling allocation to previous segment
|
|
* areas if the number of areas matches.
|
|
*/
|
|
if (alloc_parms->prev_lvseg &&
|
|
((ah->area_count + ah->parity_count) == prev_lvseg->area_count)) {
|
|
alloc_parms->flags |= A_AREA_COUNT_MATCHES;
|
|
|
|
/* Are there any preceding segments we must follow on from? */
|
|
if (alloc_parms->alloc == ALLOC_CONTIGUOUS) {
|
|
alloc_parms->flags |= A_CONTIGUOUS_TO_LVSEG;
|
|
alloc_parms->flags |= A_POSITIONAL_FILL;
|
|
} else if ((alloc_parms->alloc == ALLOC_CLING) ||
|
|
(alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
|
|
alloc_parms->flags |= A_CLING_TO_LVSEG;
|
|
alloc_parms->flags |= A_POSITIONAL_FILL;
|
|
}
|
|
} else
|
|
/*
|
|
* A cling allocation that follows a successful contiguous
|
|
* allocation must use the same PVs (or else fail).
|
|
*/
|
|
if ((alloc_parms->alloc == ALLOC_CLING) ||
|
|
(alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
|
|
alloc_parms->flags |= A_CLING_TO_ALLOCED;
|
|
alloc_parms->flags |= A_POSITIONAL_FILL;
|
|
}
|
|
|
|
if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)
|
|
alloc_parms->flags |= A_CLING_BY_TAGS;
|
|
|
|
if (!(alloc_parms->alloc & A_POSITIONAL_FILL) &&
|
|
(alloc_parms->alloc == ALLOC_CONTIGUOUS) &&
|
|
ah->cling_tag_list_cn)
|
|
alloc_parms->flags |= A_PARTITION_BY_TAGS;
|
|
|
|
/*
|
|
* For normal allocations, if any extents have already been found
|
|
* for allocation, prefer to place further extents on the same disks as
|
|
* have already been used.
|
|
*/
|
|
if (ah->maximise_cling &&
|
|
(alloc_parms->alloc == ALLOC_NORMAL) &&
|
|
(allocated != alloc_parms->extents_still_needed))
|
|
alloc_parms->flags |= A_CLING_TO_ALLOCED;
|
|
|
|
if (can_split)
|
|
alloc_parms->flags |= A_CAN_SPLIT;
|
|
}
|
|
|
|
static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
|
|
uint32_t area_count,
|
|
uint32_t stripe_size,
|
|
const struct segment_type *segtype,
|
|
struct alloced_area *aa,
|
|
uint32_t region_size)
|
|
{
|
|
uint32_t s, extents, area_multiple;
|
|
struct lv_segment *seg;
|
|
|
|
area_multiple = _calc_area_multiple(segtype, area_count, 0);
|
|
extents = aa[0].len * area_multiple;
|
|
|
|
if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0,
|
|
status, stripe_size, NULL,
|
|
area_count,
|
|
aa[0].len, 0, 0u, region_size, 0u, NULL))) {
|
|
log_error("Couldn't allocate new LV segment.");
|
|
return 0;
|
|
}
|
|
|
|
for (s = 0; s < area_count; s++)
|
|
if (!set_lv_segment_area_pv(seg, s, aa[s].pv, aa[s].pe))
|
|
return_0;
|
|
|
|
dm_list_add(&lv->segments, &seg->list);
|
|
|
|
extents = aa[0].len * area_multiple;
|
|
|
|
if (!_setup_lv_size(lv, lv->le_count + extents))
|
|
return_0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _setup_alloced_segments(struct logical_volume *lv,
|
|
struct dm_list *alloced_areas,
|
|
uint32_t area_count,
|
|
uint64_t status,
|
|
uint32_t stripe_size,
|
|
const struct segment_type *segtype,
|
|
uint32_t region_size)
|
|
{
|
|
struct alloced_area *aa;
|
|
|
|
dm_list_iterate_items(aa, &alloced_areas[0]) {
|
|
if (!_setup_alloced_segment(lv, status, area_count,
|
|
stripe_size, segtype, aa,
|
|
region_size))
|
|
return_0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* This function takes a list of pv_areas and adds them to allocated_areas.
|
|
* If the complete area is not needed then it gets split.
|
|
* The part used is removed from the pv_map so it can't be allocated twice.
|
|
*/
|
|
static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocate,
|
|
struct alloc_state *alloc_state, uint32_t ix_log_offset)
|
|
{
|
|
uint32_t area_len, len;
|
|
uint32_t s, smeta;
|
|
uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */
|
|
uint32_t total_area_count;
|
|
struct alloced_area *aa;
|
|
struct pv_area *pva;
|
|
|
|
total_area_count = ah->area_count + ah->parity_count + alloc_state->log_area_count_still_needed;
|
|
if (!total_area_count) {
|
|
log_warn(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do.");
|
|
return 1;
|
|
}
|
|
|
|
area_len = max_to_allocate / ah->area_multiple;
|
|
|
|
/* Reduce area_len to the smallest of the areas */
|
|
for (s = 0; s < ah->area_count + ah->parity_count; s++)
|
|
if (area_len > alloc_state->areas[s].used)
|
|
area_len = alloc_state->areas[s].used;
|
|
|
|
len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? total_area_count * 2 : total_area_count;
|
|
len *= sizeof(*aa);
|
|
if (!(aa = dm_pool_alloc(ah->mem, len))) {
|
|
log_error("alloced_area allocation failed");
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Areas consists of area_count areas for data stripes, then
|
|
* ix_log_skip areas to skip, then log_area_count areas to use for the
|
|
* log, then some areas too small for the log.
|
|
*/
|
|
len = area_len;
|
|
for (s = 0; s < total_area_count; s++) {
|
|
if (s == (ah->area_count + ah->parity_count)) {
|
|
ix_log_skip = ix_log_offset - ah->area_count;
|
|
len = ah->log_len;
|
|
}
|
|
|
|
pva = alloc_state->areas[s + ix_log_skip].pva;
|
|
if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) {
|
|
/*
|
|
* The metadata area goes at the front of the allocated
|
|
* space for now, but could easily go at the end (or
|
|
* middle!).
|
|
*
|
|
* Even though we split these two from the same
|
|
* allocation, we store the images at the beginning
|
|
* of the areas array and the metadata at the end.
|
|
*/
|
|
smeta = s + ah->area_count + ah->parity_count;
|
|
aa[smeta].pv = pva->map->pv;
|
|
aa[smeta].pe = pva->start;
|
|
aa[smeta].len = ah->log_len;
|
|
if (aa[smeta].len > pva->count) {
|
|
log_error("Metadata does not fit on a single PV.");
|
|
return 0;
|
|
}
|
|
log_debug_alloc("Allocating parallel metadata area %" PRIu32
|
|
" on %s start PE %" PRIu32
|
|
" length %" PRIu32 ".",
|
|
(smeta - (ah->area_count + ah->parity_count)),
|
|
pv_dev_name(aa[smeta].pv), aa[smeta].pe,
|
|
aa[smeta].len);
|
|
|
|
consume_pv_area(pva, aa[smeta].len);
|
|
dm_list_add(&ah->alloced_areas[smeta], &aa[smeta].list);
|
|
}
|
|
aa[s].len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? len - ah->log_len : len;
|
|
/* Skip empty allocations */
|
|
if (!aa[s].len)
|
|
continue;
|
|
|
|
aa[s].pv = pva->map->pv;
|
|
aa[s].pe = pva->start;
|
|
|
|
log_debug_alloc("Allocating parallel area %" PRIu32
|
|
" on %s start PE %" PRIu32 " length %" PRIu32 ".",
|
|
s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len);
|
|
|
|
consume_pv_area(pva, aa[s].len);
|
|
|
|
dm_list_add(&ah->alloced_areas[s], &aa[s].list);
|
|
}
|
|
|
|
/* Only need to alloc metadata from the first batch */
|
|
if (ah->alloc_and_split_meta)
|
|
ah->split_metadata_is_allocated = 1;
|
|
|
|
ah->total_area_len += area_len;
|
|
|
|
alloc_state->allocated += area_len * ah->area_multiple;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Call fn for each AREA_PV used by the LV segment at lv:le of length *max_seg_len.
|
|
* If any constituent area contains more than one segment, max_seg_len is
|
|
* reduced to cover only the first.
|
|
* fn should return 0 on error, 1 to continue scanning or >1 to terminate without error.
|
|
* In the last case, this function passes on the return code.
|
|
* FIXME I think some callers are expecting this to check all PV segments used by an LV.
|
|
*/
|
|
static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
|
|
uint32_t le, uint32_t len, struct lv_segment *seg,
|
|
uint32_t *max_seg_len,
|
|
uint32_t first_area, uint32_t max_areas,
|
|
int top_level_area_index,
|
|
int only_single_area_segments,
|
|
int (*fn)(struct cmd_context *cmd,
|
|
struct pv_segment *peg, uint32_t s,
|
|
void *data),
|
|
void *data)
|
|
{
|
|
uint32_t s;
|
|
uint32_t remaining_seg_len, area_len, area_multiple;
|
|
uint32_t stripes_per_mimage = 1;
|
|
int r = 1;
|
|
|
|
if (!seg && !(seg = find_seg_by_le(lv, le))) {
|
|
log_error("Failed to find segment for %s extent %" PRIu32,
|
|
lv->name, le);
|
|
return 0;
|
|
}
|
|
|
|
/* Remaining logical length of segment */
|
|
remaining_seg_len = seg->len - (le - seg->le);
|
|
|
|
if (remaining_seg_len > len)
|
|
remaining_seg_len = len;
|
|
|
|
if (max_seg_len && *max_seg_len > remaining_seg_len)
|
|
*max_seg_len = remaining_seg_len;
|
|
|
|
area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0);
|
|
area_len = (remaining_seg_len / area_multiple) ? : 1;
|
|
|
|
/* For striped mirrors, all the areas are counted, through the mirror layer */
|
|
if (top_level_area_index == -1)
|
|
stripes_per_mimage = _stripes_per_mimage(seg);
|
|
|
|
for (s = first_area;
|
|
s < seg->area_count && (!max_areas || s <= max_areas);
|
|
s++) {
|
|
if (seg_type(seg, s) == AREA_LV) {
|
|
if (!(r = _for_each_pv(cmd, seg_lv(seg, s),
|
|
seg_le(seg, s) +
|
|
(le - seg->le) / area_multiple,
|
|
area_len, NULL, max_seg_len, 0,
|
|
(stripes_per_mimage == 1) && only_single_area_segments ? 1U : 0U,
|
|
(top_level_area_index != -1) ? top_level_area_index : (int) (s * stripes_per_mimage),
|
|
only_single_area_segments, fn,
|
|
data)))
|
|
stack;
|
|
} else if (seg_type(seg, s) == AREA_PV)
|
|
if (!(r = fn(cmd, seg_pvseg(seg, s), top_level_area_index != -1 ? (uint32_t) top_level_area_index + s : s, data)))
|
|
stack;
|
|
if (r != 1)
|
|
return r;
|
|
}
|
|
|
|
/* FIXME only_single_area_segments used as workaround to skip log LV - needs new param? */
|
|
if (!only_single_area_segments && seg_is_mirrored(seg) && seg->log_lv) {
|
|
if (!(r = _for_each_pv(cmd, seg->log_lv, 0, seg->log_lv->le_count, NULL,
|
|
NULL, 0, 0, 0, only_single_area_segments,
|
|
fn, data)))
|
|
stack;
|
|
if (r != 1)
|
|
return r;
|
|
}
|
|
|
|
/* FIXME Add snapshot cow, thin meta etc. */
|
|
|
|
/*
|
|
if (!only_single_area_segments && !max_areas && seg_is_raid(seg)) {
|
|
for (s = first_area; s < seg->area_count; s++) {
|
|
if (seg_metalv(seg, s))
|
|
if (!(r = _for_each_pv(cmd, seg_metalv(seg, s), 0, seg_metalv(seg, s)->le_count, NULL,
|
|
NULL, 0, 0, 0, 0, fn, data)))
|
|
stack;
|
|
if (r != 1)
|
|
return r;
|
|
}
|
|
}
|
|
*/
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _comp_area(const void *l, const void *r)
|
|
{
|
|
const struct pv_area_used *lhs = (const struct pv_area_used *) l;
|
|
const struct pv_area_used *rhs = (const struct pv_area_used *) r;
|
|
|
|
if (lhs->used < rhs->used)
|
|
return 1;
|
|
|
|
if (lhs->used > rhs->used)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Search for pvseg that matches condition
|
|
*/
|
|
struct pv_match {
|
|
int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva);
|
|
|
|
struct alloc_handle *ah;
|
|
struct alloc_state *alloc_state;
|
|
struct pv_area *pva;
|
|
const struct dm_config_node *cling_tag_list_cn;
|
|
int s; /* Area index of match */
|
|
};
|
|
|
|
/*
|
|
* Is PV area on the same PV?
|
|
*/
|
|
static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
|
|
{
|
|
if (pvseg->pv != pva->map->pv)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Does PV area have a tag listed in allocation/cling_tag_list that
|
|
* matches EITHER a tag of the PV of the existing segment OR a tag in pv_tags?
|
|
* If mem is set, then instead we append a list of matching tags for printing to the object there.
|
|
*/
|
|
static int _match_pv_tags(const struct dm_config_node *cling_tag_list_cn,
|
|
struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
|
|
struct physical_volume *pv2, struct dm_list *pv_tags, unsigned validate_only,
|
|
struct dm_pool *mem, unsigned parallel_pv)
|
|
{
|
|
const struct dm_config_value *cv;
|
|
const char *str;
|
|
const char *tag_matched;
|
|
struct dm_list *tags_to_match = mem ? NULL : pv_tags ? : &pv2->tags;
|
|
struct dm_str_list *sl;
|
|
unsigned first_tag = 1;
|
|
|
|
for (cv = cling_tag_list_cn->v; cv; cv = cv->next) {
|
|
if (cv->type != DM_CFG_STRING) {
|
|
if (validate_only)
|
|
log_warn("WARNING: Ignoring invalid string in config file entry "
|
|
"allocation/cling_tag_list");
|
|
continue;
|
|
}
|
|
str = cv->v.str;
|
|
if (!*str) {
|
|
if (validate_only)
|
|
log_warn("WARNING: Ignoring empty string in config file entry "
|
|
"allocation/cling_tag_list");
|
|
continue;
|
|
}
|
|
|
|
if (*str != '@') {
|
|
if (validate_only)
|
|
log_warn("WARNING: Ignoring string not starting with @ in config file entry "
|
|
"allocation/cling_tag_list: %s", str);
|
|
continue;
|
|
}
|
|
|
|
str++;
|
|
|
|
if (!*str) {
|
|
if (validate_only)
|
|
log_warn("WARNING: Ignoring empty tag in config file entry "
|
|
"allocation/cling_tag_list");
|
|
continue;
|
|
}
|
|
|
|
if (validate_only)
|
|
continue;
|
|
|
|
/* Wildcard matches any tag against any tag. */
|
|
if (!strcmp(str, "*")) {
|
|
if (mem) {
|
|
dm_list_iterate_items(sl, &pv1->tags) {
|
|
if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
|
|
log_error("PV tags string extension failed.");
|
|
return 0;
|
|
}
|
|
first_tag = 0;
|
|
if (!dm_pool_grow_object(mem, sl->str, 0)) {
|
|
log_error("PV tags string extension failed.");
|
|
return 0;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (!str_list_match_list(&pv1->tags, tags_to_match, &tag_matched))
|
|
continue;
|
|
|
|
if (!pv_tags) {
|
|
if (parallel_pv)
|
|
log_debug_alloc("Not using free space on %s: Matched allocation PV tag %s on existing parallel PV %s.",
|
|
pv_dev_name(pv1), tag_matched, pv2 ? pv_dev_name(pv2) : "-");
|
|
else
|
|
log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
|
|
tag_matched, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
|
|
} else
|
|
log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
|
|
" from consideration: PV tag %s already used.",
|
|
area_num, pv_dev_name(pv1), pv1_start_pe, tag_matched);
|
|
return 1;
|
|
}
|
|
|
|
if (!str_list_match_item(&pv1->tags, str) ||
|
|
(tags_to_match && !str_list_match_item(tags_to_match, str)))
|
|
continue;
|
|
|
|
if (mem) {
|
|
if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
|
|
log_error("PV tags string extension failed.");
|
|
return 0;
|
|
}
|
|
first_tag = 0;
|
|
if (!dm_pool_grow_object(mem, str, 0)) {
|
|
log_error("PV tags string extension failed.");
|
|
return 0;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (!pv_tags) {
|
|
if (parallel_pv)
|
|
log_debug_alloc("Not using free space on %s: Matched allocation PV tag %s on existing parallel PV %s.",
|
|
pv2 ? pv_dev_name(pv2) : "-", str, pv_dev_name(pv1));
|
|
else
|
|
log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
|
|
str, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
|
|
} else
|
|
log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
|
|
" from consideration: PV tag %s already used.",
|
|
area_num, pv_dev_name(pv1), pv1_start_pe, str);
|
|
|
|
return 1;
|
|
}
|
|
|
|
if (mem)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int _validate_tag_list(const struct dm_config_node *cling_tag_list_cn)
|
|
{
|
|
return _match_pv_tags(cling_tag_list_cn, NULL, 0, 0, NULL, NULL, 1, NULL, 0);
|
|
}
|
|
|
|
static int _tags_list_str(struct dm_pool *mem, struct physical_volume *pv1, const struct dm_config_node *cling_tag_list_cn)
|
|
{
|
|
if (!_match_pv_tags(cling_tag_list_cn, pv1, 0, 0, NULL, NULL, 0, mem, 0)) {
|
|
dm_pool_abandon_object(mem);
|
|
return_0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Does PV area have a tag listed in allocation/cling_tag_list that
|
|
* matches a tag in the pv_tags list?
|
|
*/
|
|
static int _pv_has_matching_tag(const struct dm_config_node *cling_tag_list_cn,
|
|
struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
|
|
struct dm_list *pv_tags)
|
|
{
|
|
return _match_pv_tags(cling_tag_list_cn, pv1, pv1_start_pe, area_num, NULL, pv_tags, 0, NULL, 0);
|
|
}
|
|
|
|
/*
|
|
* Does PV area have a tag listed in allocation/cling_tag_list that
|
|
* matches a tag of the PV of the existing segment?
|
|
*/
|
|
static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn,
|
|
struct physical_volume *pv1, struct physical_volume *pv2,
|
|
unsigned parallel_pv)
|
|
{
|
|
return _match_pv_tags(cling_tag_list_cn, pv1, 0, 0, pv2, NULL, 0, NULL, parallel_pv);
|
|
}
|
|
|
|
static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva)
|
|
{
|
|
return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv, 0);
|
|
}
|
|
|
|
static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas,
|
|
const struct dm_config_node *cling_tag_list_cn)
|
|
{
|
|
struct seg_pvs *spvs;
|
|
struct pv_list *pvl;
|
|
char *pvnames;
|
|
unsigned first;
|
|
|
|
if (!parallel_areas)
|
|
return 1;
|
|
|
|
dm_list_iterate_items(spvs, parallel_areas) {
|
|
first = 1;
|
|
|
|
if (!dm_pool_begin_object(mem, 256)) {
|
|
log_error("dm_pool_begin_object failed");
|
|
return 0;
|
|
}
|
|
|
|
dm_list_iterate_items(pvl, &spvs->pvs) {
|
|
if (!first && !dm_pool_grow_object(mem, " ", 1)) {
|
|
log_error("dm_pool_grow_object failed");
|
|
dm_pool_abandon_object(mem);
|
|
return 0;
|
|
}
|
|
|
|
if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) {
|
|
log_error("dm_pool_grow_object failed");
|
|
dm_pool_abandon_object(mem);
|
|
return 0;
|
|
}
|
|
|
|
if (cling_tag_list_cn) {
|
|
if (!dm_pool_grow_object(mem, "(", 1)) {
|
|
log_error("dm_pool_grow_object failed");
|
|
dm_pool_abandon_object(mem);
|
|
return 0;
|
|
}
|
|
if (!_tags_list_str(mem, pvl->pv, cling_tag_list_cn)) {
|
|
dm_pool_abandon_object(mem);
|
|
return_0;
|
|
}
|
|
if (!dm_pool_grow_object(mem, ")", 1)) {
|
|
log_error("dm_pool_grow_object failed");
|
|
dm_pool_abandon_object(mem);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
first = 0;
|
|
}
|
|
|
|
if (!dm_pool_grow_object(mem, "\0", 1)) {
|
|
log_error("dm_pool_grow_object failed");
|
|
dm_pool_abandon_object(mem);
|
|
return 0;
|
|
}
|
|
|
|
pvnames = dm_pool_end_object(mem);
|
|
log_debug_alloc("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s",
|
|
spvs->le, spvs->len, pvnames);
|
|
dm_pool_free(mem, pvnames);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Is PV area contiguous to PV segment?
|
|
*/
|
|
static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
|
|
{
|
|
if (pvseg->pv != pva->map->pv)
|
|
return 0;
|
|
|
|
if (pvseg->pe + pvseg->len != pva->start)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _reserve_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
|
|
uint32_t required, uint32_t ix_pva, uint32_t unreserved)
|
|
{
|
|
struct pv_area_used *area_used = &alloc_state->areas[ix_pva];
|
|
const char *pv_tag_list = NULL;
|
|
|
|
if (ah->cling_tag_list_cn) {
|
|
if (!dm_pool_begin_object(ah->mem, 256)) {
|
|
log_error("PV tags string allocation failed.");
|
|
return 0;
|
|
} else if (!_tags_list_str(ah->mem, pva->map->pv, ah->cling_tag_list_cn))
|
|
dm_pool_abandon_object(ah->mem);
|
|
else if (!dm_pool_grow_object(ah->mem, "\0", 1)) {
|
|
dm_pool_abandon_object(ah->mem);
|
|
log_error("PV tags string extension failed.");
|
|
return 0;
|
|
} else
|
|
pv_tag_list = dm_pool_end_object(ah->mem);
|
|
}
|
|
|
|
log_debug_alloc("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
|
|
" length %" PRIu32 " leaving %" PRIu32 "%s%s.",
|
|
area_used->pva ? "Changing " : "Considering",
|
|
ix_pva, area_used->pva ? "to" : "as",
|
|
dev_name(pva->map->pv->dev), pva->start, required, unreserved,
|
|
pv_tag_list ? " with PV tags: " : "",
|
|
pv_tag_list ? : "");
|
|
|
|
if (pv_tag_list)
|
|
dm_pool_free(ah->mem, (void *)pv_tag_list);
|
|
|
|
area_used->pva = pva;
|
|
area_used->used = required;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _reserve_required_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
|
|
uint32_t required, uint32_t ix_pva, uint32_t unreserved)
|
|
{
|
|
uint32_t s;
|
|
struct pv_area_used *new_state;
|
|
|
|
/* Expand areas array if needed after an area was split. */
|
|
if (ix_pva >= alloc_state->areas_size) {
|
|
alloc_state->areas_size *= 2;
|
|
if (!(new_state = realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
|
|
log_error("Memory reallocation for parallel areas failed.");
|
|
return 0;
|
|
}
|
|
alloc_state->areas = new_state;
|
|
for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
|
|
alloc_state->areas[s].pva = NULL;
|
|
}
|
|
|
|
if (!_reserve_area(ah, alloc_state, pva, required, ix_pva, unreserved))
|
|
return_0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
|
|
struct pv_segment *pvseg, uint32_t s,
|
|
void *data)
|
|
{
|
|
struct pv_match *pvmatch = data;
|
|
int positional = pvmatch->alloc_state->alloc_parms->flags & A_POSITIONAL_FILL;
|
|
|
|
if (positional && pvmatch->alloc_state->areas[s].pva)
|
|
return 1; /* Area already assigned */
|
|
|
|
if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
|
|
return 1; /* Continue */
|
|
|
|
if (positional && (s >= pvmatch->alloc_state->num_positional_areas))
|
|
return 1;
|
|
|
|
/* FIXME The previous test should make this one redundant. */
|
|
if (positional && (s >= pvmatch->alloc_state->areas_size))
|
|
return 1;
|
|
|
|
/*
|
|
* Only used for cling and contiguous policies (which only make one allocation per PV)
|
|
* so it's safe to say all the available space is used.
|
|
*/
|
|
if (positional &&
|
|
!_reserve_required_area(pvmatch->ah, pvmatch->alloc_state, pvmatch->pva, pvmatch->pva->count, s, 0))
|
|
return_0;
|
|
|
|
return 2; /* Finished */
|
|
}
|
|
|
|
/*
|
|
* Is pva on same PV as any existing areas?
|
|
*/
|
|
static int _check_cling(struct alloc_handle *ah,
|
|
const struct dm_config_node *cling_tag_list_cn,
|
|
struct lv_segment *prev_lvseg, struct pv_area *pva,
|
|
struct alloc_state *alloc_state)
|
|
{
|
|
struct pv_match pvmatch;
|
|
int r;
|
|
uint32_t le, len;
|
|
|
|
pvmatch.ah = ah;
|
|
pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
|
|
pvmatch.alloc_state = alloc_state;
|
|
pvmatch.pva = pva;
|
|
pvmatch.cling_tag_list_cn = cling_tag_list_cn;
|
|
|
|
if (ah->maximise_cling) {
|
|
/* Check entire LV */
|
|
le = 0;
|
|
len = prev_lvseg->le + prev_lvseg->len;
|
|
} else {
|
|
/* Only check 1 LE at end of previous LV segment */
|
|
le = prev_lvseg->le + prev_lvseg->len - 1;
|
|
len = 1;
|
|
}
|
|
|
|
/* FIXME Cope with stacks by flattening */
|
|
if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, le, len, NULL, NULL,
|
|
0, 0, -1, 1,
|
|
_is_condition, &pvmatch)))
|
|
stack;
|
|
|
|
if (r != 2)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Is pva contiguous to any existing areas or on the same PV?
|
|
*/
|
|
static int _check_contiguous(struct alloc_handle *ah,
|
|
struct lv_segment *prev_lvseg, struct pv_area *pva,
|
|
struct alloc_state *alloc_state)
|
|
{
|
|
struct pv_match pvmatch;
|
|
int r;
|
|
|
|
pvmatch.ah = ah;
|
|
pvmatch.condition = _is_contiguous;
|
|
pvmatch.alloc_state = alloc_state |