1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-02-06 01:58:01 +03:00
lvm2/lib/metadata/lv_manip.c
Zdenek Kabelac 2c6a2b6e86 vdo: support vdo_pool_header_size
Add profilable configurable setting for vdo pool header size, that is
used as 'extra' empty space at the front and end of vdo-pool device
to avoid having a disk in the system the may have same data is real
vdo LV.

For some conversion cases however we may need to allow using '0' header size.

TODO: in this case we may eventually avoid adding 'linear' mapping layer
in future - but this requires further modification over lvm code base.
2021-06-28 20:41:07 +02:00

8944 lines
260 KiB
C

/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "lib/misc/lib.h"
#include "lib/metadata/metadata.h"
#include "lib/locking/locking.h"
#include "pv_map.h"
#include "lib/misc/lvm-string.h"
#include "lib/commands/toolcontext.h"
#include "lib/metadata/lv_alloc.h"
#include "lib/metadata/pv_alloc.h"
#include "lib/display/display.h"
#include "lib/metadata/segtype.h"
#include "lib/format_text/archiver.h"
#include "lib/activate/activate.h"
#include "lib/datastruct/str_list.h"
#include "lib/config/defaults.h"
#include "lib/misc/lvm-exec.h"
#include "lib/mm/memlock.h"
#include "lib/locking/lvmlockd.h"
#include "lib/label/label.h"
#include "lib/misc/lvm-signal.h"
#ifdef HAVE_BLKZEROOUT
#include <sys/ioctl.h>
#include <linux/fs.h>
#endif
typedef enum {
PREFERRED,
USE_AREA,
NEXT_PV,
NEXT_AREA
} area_use_t;
/* FIXME: remove RAID_METADATA_AREA_LEN macro after defining 'raid_log_extents'*/
#define RAID_METADATA_AREA_LEN 1
/* FIXME These ended up getting used differently from first intended. Refactor. */
/* Only one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG, A_CLING_TO_ALLOCED may be set */
#define A_CONTIGUOUS_TO_LVSEG 0x01 /* Must be contiguous to an existing segment */
#define A_CLING_TO_LVSEG 0x02 /* Must use same disks as existing LV segment */
#define A_CLING_TO_ALLOCED 0x04 /* Must use same disks as already-allocated segment */
#define A_CLING_BY_TAGS 0x08 /* Must match tags against existing segment */
#define A_CAN_SPLIT 0x10
#define A_AREA_COUNT_MATCHES 0x20 /* Existing lvseg has same number of areas as new segment */
#define A_POSITIONAL_FILL 0x40 /* Slots are positional and filled using PREFERRED */
#define A_PARTITION_BY_TAGS 0x80 /* No allocated area may share any tag with any other */
/*
* Constant parameters during a single allocation attempt.
*/
struct alloc_parms {
alloc_policy_t alloc;
unsigned flags; /* Holds A_* */
struct lv_segment *prev_lvseg;
uint32_t extents_still_needed;
};
/*
* Holds varying state of each allocation attempt.
*/
struct alloc_state {
const struct alloc_parms *alloc_parms;
struct pv_area_used *areas;
uint32_t areas_size;
uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */
uint32_t allocated; /* Total number of extents allocated so far */
uint32_t num_positional_areas; /* Number of parallel allocations that must be contiguous/cling */
};
struct lv_names {
const char *old;
const char *new;
};
enum {
LV_TYPE_UNKNOWN,
LV_TYPE_NONE,
LV_TYPE_PUBLIC,
LV_TYPE_PRIVATE,
LV_TYPE_HISTORY,
LV_TYPE_LINEAR,
LV_TYPE_STRIPED,
LV_TYPE_MIRROR,
LV_TYPE_RAID,
LV_TYPE_THIN,
LV_TYPE_CACHE,
LV_TYPE_SPARSE,
LV_TYPE_ORIGIN,
LV_TYPE_THINORIGIN,
LV_TYPE_MULTITHINORIGIN,
LV_TYPE_THICKORIGIN,
LV_TYPE_MULTITHICKORIGIN,
LV_TYPE_CACHEORIGIN,
LV_TYPE_EXTTHINORIGIN,
LV_TYPE_MULTIEXTTHINORIGIN,
LV_TYPE_SNAPSHOT,
LV_TYPE_THINSNAPSHOT,
LV_TYPE_THICKSNAPSHOT,
LV_TYPE_PVMOVE,
LV_TYPE_IMAGE,
LV_TYPE_LOG,
LV_TYPE_METADATA,
LV_TYPE_POOL,
LV_TYPE_DATA,
LV_TYPE_SPARE,
LV_TYPE_VDO,
LV_TYPE_VIRTUAL,
LV_TYPE_RAID0,
LV_TYPE_RAID0_META,
LV_TYPE_RAID1,
LV_TYPE_RAID10,
LV_TYPE_RAID4,
LV_TYPE_RAID5,
LV_TYPE_RAID5_N,
LV_TYPE_RAID5_LA,
LV_TYPE_RAID5_RA,
LV_TYPE_RAID5_LS,
LV_TYPE_RAID5_RS,
LV_TYPE_RAID6,
LV_TYPE_RAID6_ZR,
LV_TYPE_RAID6_NR,
LV_TYPE_RAID6_NC,
LV_TYPE_LOCKD,
LV_TYPE_SANLOCK,
LV_TYPE_CACHEVOL,
LV_TYPE_WRITECACHE,
LV_TYPE_WRITECACHEORIGIN,
LV_TYPE_INTEGRITY,
LV_TYPE_INTEGRITYORIGIN
};
static const char *_lv_type_names[] = {
[LV_TYPE_UNKNOWN] = "unknown",
[LV_TYPE_NONE] = "none",
[LV_TYPE_PUBLIC] = "public",
[LV_TYPE_PRIVATE] = "private",
[LV_TYPE_HISTORY] = "history",
[LV_TYPE_LINEAR] = "linear",
[LV_TYPE_STRIPED] = "striped",
[LV_TYPE_MIRROR] = "mirror",
[LV_TYPE_RAID] = "raid",
[LV_TYPE_THIN] = "thin",
[LV_TYPE_CACHE] = "cache",
[LV_TYPE_SPARSE] = "sparse",
[LV_TYPE_ORIGIN] = "origin",
[LV_TYPE_THINORIGIN] = "thinorigin",
[LV_TYPE_MULTITHINORIGIN] = "multithinorigin",
[LV_TYPE_THICKORIGIN] = "thickorigin",
[LV_TYPE_MULTITHICKORIGIN] = "multithickorigin",
[LV_TYPE_CACHEORIGIN] = "cacheorigin",
[LV_TYPE_EXTTHINORIGIN] = "extthinorigin",
[LV_TYPE_MULTIEXTTHINORIGIN] = "multiextthinorigin",
[LV_TYPE_SNAPSHOT] = "snapshot",
[LV_TYPE_THINSNAPSHOT] = "thinsnapshot",
[LV_TYPE_THICKSNAPSHOT] = "thicksnapshot",
[LV_TYPE_PVMOVE] = "pvmove",
[LV_TYPE_IMAGE] = "image",
[LV_TYPE_LOG] = "log",
[LV_TYPE_METADATA] = "metadata",
[LV_TYPE_POOL] = "pool",
[LV_TYPE_DATA] = "data",
[LV_TYPE_SPARE] = "spare",
[LV_TYPE_VDO] = "vdo",
[LV_TYPE_VIRTUAL] = "virtual",
[LV_TYPE_RAID0] = SEG_TYPE_NAME_RAID0,
[LV_TYPE_RAID0_META] = SEG_TYPE_NAME_RAID0_META,
[LV_TYPE_RAID1] = SEG_TYPE_NAME_RAID1,
[LV_TYPE_RAID10] = SEG_TYPE_NAME_RAID10,
[LV_TYPE_RAID4] = SEG_TYPE_NAME_RAID4,
[LV_TYPE_RAID5] = SEG_TYPE_NAME_RAID5,
[LV_TYPE_RAID5_N] = SEG_TYPE_NAME_RAID5_N,
[LV_TYPE_RAID5_LA] = SEG_TYPE_NAME_RAID5_LA,
[LV_TYPE_RAID5_RA] = SEG_TYPE_NAME_RAID5_RA,
[LV_TYPE_RAID5_LS] = SEG_TYPE_NAME_RAID5_LS,
[LV_TYPE_RAID5_RS] = SEG_TYPE_NAME_RAID5_RS,
[LV_TYPE_RAID6] = SEG_TYPE_NAME_RAID6,
[LV_TYPE_RAID6_ZR] = SEG_TYPE_NAME_RAID6_ZR,
[LV_TYPE_RAID6_NR] = SEG_TYPE_NAME_RAID6_NR,
[LV_TYPE_RAID6_NC] = SEG_TYPE_NAME_RAID6_NC,
[LV_TYPE_LOCKD] = "lockd",
[LV_TYPE_SANLOCK] = "sanlock",
[LV_TYPE_CACHEVOL] = "cachevol",
[LV_TYPE_WRITECACHE] = "writecache",
[LV_TYPE_WRITECACHEORIGIN] = "writecacheorigin",
[LV_TYPE_INTEGRITY] = "integrity",
[LV_TYPE_INTEGRITYORIGIN] = "integrityorigin",
};
static int _lv_layout_and_role_mirror(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
/* non-top-level LVs */
if (lv_is_mirror_image(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
goto_bad;
} else if (lv_is_mirror_log(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_LOG]))
goto_bad;
if (lv_is_mirrored(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
goto_bad;
} else if (lv_is_pvmove(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
goto_bad;
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_raid(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
const struct segment_type *segtype;
/* non-top-level LVs */
if (lv_is_raid_image(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
goto_bad;
} else if (lv_is_raid_metadata(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
goto_bad;
} else if (lv_is_pvmove(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
goto_bad;
segtype = first_seg(lv)->segtype;
if (segtype_is_raid0(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID0]))
goto_bad;
} else if (segtype_is_raid1(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID1]))
goto_bad;
} else if (segtype_is_raid10(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID10]))
goto_bad;
} else if (segtype_is_raid4(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID4]))
goto_bad;
} else if (segtype_is_any_raid5(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5]))
goto_bad;
if (segtype_is_raid5_la(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LA]))
goto_bad;
} else if (segtype_is_raid5_ra(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RA]))
goto_bad;
} else if (segtype_is_raid5_ls(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LS]))
goto_bad;
} else if (segtype_is_raid5_rs(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RS]))
goto_bad;
}
} else if (segtype_is_any_raid6(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6]))
goto_bad;
if (segtype_is_raid6_zr(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_ZR]))
goto_bad;
} else if (segtype_is_raid6_nr(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NR]))
goto_bad;
} else if (segtype_is_raid6_nc(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NC]))
goto_bad;
}
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_thin(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
unsigned snap_count;
/* non-top-level LVs */
if (lv_is_thin_pool_metadata(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
goto_bad;
} else if (lv_is_thin_pool_data(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (lv_is_thin_volume(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE]))
goto_bad;
if (lv_is_thin_origin(lv, &snap_count)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINORIGIN]))
goto_bad;
if (snap_count > 1 &&
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHINORIGIN]))
goto_bad;
}
if (lv_is_thin_snapshot(lv))
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINSNAPSHOT]))
goto_bad;
} else if (lv_is_thin_pool(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
goto_bad;
*public_lv = 0;
}
if (lv_is_external_origin(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_EXTTHINORIGIN]))
goto_bad;
if (lv->external_count > 1 &&
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTIEXTTHINORIGIN]))
goto_bad;
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_cache(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
/* non-top-level LVs */
if (lv_is_cache_pool_metadata(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
goto_bad;
} else if (lv_is_cache_pool_data(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
goto_bad;
if (lv_is_cache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
goto_bad;
} else if (lv_is_cache_origin(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHEORIGIN]))
goto_bad;
if (lv_is_cache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
goto_bad;
} else if (lv_is_writecache_origin(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_WRITECACHE]) ||
!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_WRITECACHEORIGIN]))
goto_bad;
if (lv_is_writecache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (lv_is_cache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
goto_bad;
else if (lv_is_writecache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]))
goto_bad;
else if (lv_is_writecache_cachevol(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHEVOL]))
goto_bad;
*public_lv = 0;
} else if (lv_is_cache_vol(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHEVOL]))
goto_bad;
*public_lv = 0;
} else if (lv_is_cache_pool(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
goto_bad;
*public_lv = 0;
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_integrity(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
/* non-top-level LVs */
if (lv_is_integrity_metadata(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITY]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
goto_bad;
} else if (lv_is_integrity_origin(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITY]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITYORIGIN]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (lv_is_integrity(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_INTEGRITY]))
goto_bad;
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_thick_origin_snapshot(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
if (lv_is_origin(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKORIGIN]))
goto_bad;
/*
* Thin volumes are also marked with virtual flag, but we don't show "virtual"
* layout for thin LVs as they have their own keyword for layout - "thin"!
* So rule thin LVs out here!
*/
if (lv_is_virtual(lv) && !lv_is_thin_volume(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VIRTUAL]))
goto_bad;
*public_lv = 0;
}
if (lv->origin_count > 1 &&
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHICKORIGIN]))
goto_bad;
} else if (lv_is_cow(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKSNAPSHOT]))
goto_bad;
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_vdo(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
/* non-top-level LVs */
if (lv_is_vdo_pool(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VDO]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
goto_bad;
} else if (lv_is_vdo_pool_data(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_VDO]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (lv_is_vdo(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VDO]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE]))
goto_bad;
}
return 1;
bad:
return 0;
}
int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
struct dm_list **layout, struct dm_list **role) {
int linear, striped;
struct lv_segment *seg;
int public_lv = 1;
*layout = *role = NULL;
if (!(*layout = str_list_create(mem))) {
log_error("LV layout list allocation failed");
return 0;
}
if (!(*role = str_list_create(mem))) {
log_error("LV role list allocation failed");
goto bad;
}
if (lv_is_historical(lv)) {
if (!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_NONE]) ||
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_HISTORY]))
goto_bad;
}
/* Mirrors and related */
if ((lv_is_mirror_type(lv) || lv_is_pvmove(lv)) &&
!_lv_layout_and_role_mirror(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* RAIDs and related */
if (lv_is_raid_type(lv) &&
!_lv_layout_and_role_raid(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* Thins and related */
if ((lv_is_thin_type(lv) || lv_is_external_origin(lv)) &&
!_lv_layout_and_role_thin(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* Caches and related */
if ((lv_is_cache_type(lv) || lv_is_cache_origin(lv) || lv_is_writecache(lv) || lv_is_writecache_origin(lv)) &&
!_lv_layout_and_role_cache(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* Integrity related */
if ((lv_is_integrity(lv) || lv_is_integrity_origin(lv) || lv_is_integrity_metadata(lv)) &&
!_lv_layout_and_role_integrity(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* VDO and related */
if (lv_is_vdo_type(lv) &&
!_lv_layout_and_role_vdo(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* Pool-specific */
if (lv_is_pool_metadata_spare(lv)) {
if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SPARE]))
goto_bad;
public_lv = 0;
}
/* Old-style origins/snapshots, virtual origins */
if (!_lv_layout_and_role_thick_origin_snapshot(mem, lv, *layout, *role, &public_lv))
goto_bad;
if (lv_is_lockd_sanlock_lv(lv)) {
if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_LOCKD]) ||
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SANLOCK]))
goto_bad;
public_lv = 0;
}
/*
* If layout not yet determined, it must be either
* linear or striped or mixture of these two.
*/
if (dm_list_empty(*layout)) {
linear = striped = 0;
dm_list_iterate_items(seg, &lv->segments) {
if (seg_is_linear(seg))
linear = 1;
else if (seg_is_striped(seg))
striped = 1;
else {
/*
* This should not happen but if it does
* we'll see that there's "unknown" layout
* present. This means we forgot to detect
* the role above and we need add proper
* detection for such role!
*/
log_warn(INTERNAL_ERROR "WARNING: Failed to properly detect "
"layout and role for LV %s/%s.",
lv->vg->name, lv->name);
}
}
if (linear &&
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_LINEAR]))
goto_bad;
if (striped &&
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_STRIPED]))
goto_bad;
if (!linear && !striped &&
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_UNKNOWN]))
goto_bad;
}
/* finally, add either 'public' or 'private' role to the LV */
if (public_lv) {
if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PUBLIC]))
goto_bad;
} else {
if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PRIVATE]))
goto_bad;
}
return 1;
bad:
dm_pool_free(mem, *layout);
return 0;
}
struct dm_list_and_mempool {
struct dm_list *list;
struct dm_pool *mem;
};
static int _get_pv_list_for_lv(struct logical_volume *lv, void *data)
{
int dup_found;
uint32_t s;
struct pv_list *pvl;
struct lv_segment *seg;
struct dm_list *pvs = ((struct dm_list_and_mempool *)data)->list;
struct dm_pool *mem = ((struct dm_list_and_mempool *)data)->mem;
dm_list_iterate_items(seg, &lv->segments) {
for (s = 0; s < seg->area_count; s++) {
dup_found = 0;
if (seg_type(seg, s) != AREA_PV)
continue;
/* do not add duplicates */
dm_list_iterate_items(pvl, pvs)
if (pvl->pv == seg_pv(seg, s))
dup_found = 1;
if (dup_found)
continue;
if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
log_error("Failed to allocate memory");
return 0;
}
pvl->pv = seg_pv(seg, s);
log_debug_metadata(" %s/%s uses %s", lv->vg->name,
lv->name, pv_dev_name(pvl->pv));
dm_list_add(pvs, &pvl->list);
}
}
return 1;
}
/*
* get_pv_list_for_lv
* @mem - mempool to allocate the list from.
* @lv
* @pvs - The list to add pv_list items to.
*
* 'pvs' is filled with 'pv_list' items for PVs that compose the LV.
* If the 'pvs' list already has items in it, duplicates will not be
* added. So, it is safe to repeatedly call this function for different
* LVs and build up a list of PVs for them all.
*
* Memory to create the list is obtained from the mempool provided.
*
* Returns: 1 on success, 0 on error
*/
int get_pv_list_for_lv(struct dm_pool *mem,
struct logical_volume *lv, struct dm_list *pvs)
{
struct dm_list_and_mempool context = { pvs, mem };
log_debug_metadata("Generating list of PVs that %s/%s uses:",
lv->vg->name, lv->name);
if (!_get_pv_list_for_lv(lv, &context))
return_0;
return for_each_sub_lv(lv, &_get_pv_list_for_lv, &context);
}
/*
* get_default_region_size
* @cmd
*
* 'mirror_region_size' and 'raid_region_size' are effectively the same thing.
* However, "raid" is more inclusive than "mirror", so the name has been
* changed. This function checks for the old setting and warns the user if
* it is being overridden by the new setting (i.e. warn if both settings are
* present).
*
* Note that the config files give defaults in kiB terms, but we
* return the value in terms of sectors.
*
* Returns: default region_size in sectors
*/
static int _get_default_region_size(struct cmd_context *cmd)
{
int mrs, rrs;
/*
* 'mirror_region_size' is the old setting. It is overridden
* by the new setting, 'raid_region_size'.
*/
mrs = 2 * find_config_tree_int(cmd, activation_mirror_region_size_CFG, NULL);
rrs = 2 * find_config_tree_int(cmd, activation_raid_region_size_CFG, NULL);
if (!mrs && !rrs)
return DEFAULT_RAID_REGION_SIZE * 2;
if (!mrs)
return rrs;
if (!rrs)
return mrs;
if (mrs != rrs)
log_verbose("Overriding default 'mirror_region_size' setting"
" with 'raid_region_size' setting of %u kiB",
rrs / 2);
return rrs;
}
static int _round_down_pow2(int r)
{
/* Set all bits to the right of the leftmost set bit */
r |= (r >> 1);
r |= (r >> 2);
r |= (r >> 4);
r |= (r >> 8);
r |= (r >> 16);
/* Pull out the leftmost set bit */
return r & ~(r >> 1);
}
uint32_t get_default_region_size(struct cmd_context *cmd)
{
int pagesize = lvm_getpagesize();
int region_size = _get_default_region_size(cmd);
if (!is_power_of_2(region_size)) {
region_size = _round_down_pow2(region_size);
log_verbose("Reducing region size to %u kiB (power of 2).",
region_size / 2);
}
if (region_size % (pagesize >> SECTOR_SHIFT)) {
region_size = DEFAULT_RAID_REGION_SIZE * 2;
log_verbose("Using default region size %u kiB (multiple of page size).",
region_size / 2);
}
return (uint32_t) region_size;
}
int add_seg_to_segs_using_this_lv(struct logical_volume *lv,
struct lv_segment *seg)
{
struct seg_list *sl;
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
if (sl->seg == seg) {
sl->count++;
return 1;
}
}
log_very_verbose("Adding %s:" FMTu32 " as an user of %s.",
display_lvname(seg->lv), seg->le, display_lvname(lv));
if (!(sl = dm_pool_zalloc(lv->vg->vgmem, sizeof(*sl)))) {
log_error("Failed to allocate segment list.");
return 0;
}
sl->count = 1;
sl->seg = seg;
dm_list_add(&lv->segs_using_this_lv, &sl->list);
return 1;
}
int remove_seg_from_segs_using_this_lv(struct logical_volume *lv,
struct lv_segment *seg)
{
struct seg_list *sl;
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
if (sl->seg != seg)
continue;
if (sl->count > 1)
sl->count--;
else {
log_very_verbose("%s:" FMTu32 " is no longer a user of %s.",
display_lvname(seg->lv), seg->le,
display_lvname(lv));
dm_list_del(&sl->list);
}
return 1;
}
log_error(INTERNAL_ERROR "Segment %s:" FMTu32 " is not a user of %s.",
display_lvname(seg->lv), seg->le, display_lvname(lv));
return 0;
}
/*
* This is a function specialized for the common case where there is
* only one segment which uses the LV.
* e.g. the LV is a layer inserted by insert_layer_for_lv().
*
* In general, walk through lv->segs_using_this_lv.
*/
struct lv_segment *get_only_segment_using_this_lv(const struct logical_volume *lv)
{
struct seg_list *sl;
if (!lv) {
log_error(INTERNAL_ERROR "get_only_segment_using_this_lv() called with NULL LV.");
return NULL;
}
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
/* Needs to be he only item in list */
if (!dm_list_end(&lv->segs_using_this_lv, &sl->list))
break;
if (sl->count != 1) {
log_error("%s is expected to have only one segment using it, "
"while %s:" FMTu32 " uses it %d times.",
display_lvname(lv), display_lvname(sl->seg->lv),
sl->seg->le, sl->count);
return NULL;
}
return sl->seg;
}
log_error("%s is expected to have only one segment using it, while it has %d.",
display_lvname(lv), dm_list_size(&lv->segs_using_this_lv));
return NULL;
}
/*
* PVs used by a segment of an LV
*/
struct seg_pvs {
struct dm_list list;
struct dm_list pvs; /* struct pv_list */
uint32_t le;
uint32_t len;
};
static struct seg_pvs *_find_seg_pvs_by_le(struct dm_list *list, uint32_t le)
{
struct seg_pvs *spvs;
dm_list_iterate_items(spvs, list)
if (le >= spvs->le && le < spvs->le + spvs->len)
return spvs;
return NULL;
}
/*
* Find first unused LV number.
*/
uint32_t find_free_lvnum(struct logical_volume *lv)
{
int lvnum_used[MAX_RESTRICTED_LVS + 1] = { 0 };
uint32_t i = 0;
struct lv_list *lvl;
int lvnum;
dm_list_iterate_items(lvl, &lv->vg->lvs) {
lvnum = lvnum_from_lvid(&lvl->lv->lvid);
if (lvnum <= MAX_RESTRICTED_LVS)
lvnum_used[lvnum] = 1;
}
while (lvnum_used[i])
i++;
/* FIXME What if none are free? */
return i;
}
dm_percent_t copy_percent(const struct logical_volume *lv)
{
uint32_t numerator = 0u, denominator = 0u;
struct lv_segment *seg;
dm_list_iterate_items(seg, &lv->segments) {
denominator += seg->area_len;
/* FIXME Generalise name of 'extents_copied' field */
if (((seg_is_raid(seg) && !seg_is_any_raid0(seg)) || seg_is_mirrored(seg)) &&
(seg->area_count > 1))
numerator += seg->extents_copied;
else
numerator += seg->area_len;
}
return denominator ? dm_make_percent(numerator, denominator) : DM_PERCENT_100;
}
/* Round up extents to next stripe boundary for number of stripes */
static uint32_t _round_to_stripe_boundary(struct volume_group *vg, uint32_t extents,
uint32_t stripes, int extend)
{
uint32_t size_rest, new_extents = extents;
if (!stripes)
return extents;
/* Round up extents to stripe divisible amount */
if ((size_rest = extents % stripes)) {
new_extents += extend ? stripes - size_rest : -size_rest;
log_print_unless_silent("Rounding size %s (%u extents) %s to stripe boundary size %s (%u extents).",
display_size(vg->cmd, (uint64_t) extents * vg->extent_size), extents,
new_extents < extents ? "down" : "up",
display_size(vg->cmd, (uint64_t) new_extents * vg->extent_size), new_extents);
}
return new_extents;
}
/*
* All lv_segments get created here.
*/
struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
struct logical_volume *lv,
uint32_t le, uint32_t len,
uint32_t reshape_len,
uint64_t status,
uint32_t stripe_size,
struct logical_volume *log_lv,
uint32_t area_count,
uint32_t area_len,
uint32_t data_copies,
uint32_t chunk_size,
uint32_t region_size,
uint32_t extents_copied,
struct lv_segment *pvmove_source_seg)
{
struct lv_segment *seg;
struct dm_pool *mem = lv->vg->vgmem;
uint32_t areas_sz = area_count * sizeof(*seg->areas);
if (!segtype) {
log_error(INTERNAL_ERROR "alloc_lv_segment: Missing segtype.");
return NULL;
}
if (!(seg = dm_pool_zalloc(mem, sizeof(*seg))))
return_NULL;
if (!(seg->areas = dm_pool_zalloc(mem, areas_sz))) {
dm_pool_free(mem, seg);
return_NULL;
}
if (segtype_is_raid_with_meta(segtype) &&
!(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
dm_pool_free(mem, seg); /* frees everything alloced since seg */
return_NULL;
}
seg->segtype = segtype;
seg->lv = lv;
seg->le = le;
seg->len = len;
seg->reshape_len = reshape_len;
seg->status = status;
seg->stripe_size = stripe_size;
seg->area_count = area_count;
seg->area_len = area_len;
seg->data_copies = data_copies ? : lv_raid_data_copies(segtype, area_count);
seg->chunk_size = chunk_size;
seg->region_size = region_size;
seg->extents_copied = extents_copied;
seg->pvmove_source_seg = pvmove_source_seg;
dm_list_init(&seg->tags);
dm_list_init(&seg->origin_list);
dm_list_init(&seg->thin_messages);
if (log_lv && !attach_mirror_log(seg, log_lv))
return_NULL;
if (segtype_is_mirror(segtype))
lv->status |= MIRROR;
if (segtype_is_mirrored(segtype))
lv->status |= MIRRORED;
return seg;
}
/*
* Temporary helper to return number of data copies for
* RAID segment @seg until seg->data_copies got added
*/
static uint32_t _raid_data_copies(struct lv_segment *seg)
{
/*
* FIXME: needs to change once more than 2 are supported.
* I.e. use seg->data_copies then
*/
if (seg_is_raid10(seg))
return 2;
if (seg_is_raid1(seg))
return seg->area_count;
return seg->segtype->parity_devs + 1;
}
/* Data image count for RAID segment @seg */
static uint32_t _raid_stripes_count(struct lv_segment *seg)
{
/*
* FIXME: raid10 needs to change once more than
* 2 data_copies and odd # of legs supported.
*/
if (seg_is_raid10(seg))
return seg->area_count / _raid_data_copies(seg);
return seg->area_count - seg->segtype->parity_devs;
}
static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s,
uint32_t area_reduction, int with_discard)
{
struct lv_segment *cache_seg;
struct logical_volume *lv = seg_lv(seg, s);
if (seg_type(seg, s) == AREA_UNASSIGNED)
return 1;
if (seg_type(seg, s) == AREA_PV) {
if (with_discard && !discard_pv_segment(seg_pvseg(seg, s), area_reduction))
return_0;
if (!release_pv_segment(seg_pvseg(seg, s), area_reduction))
return_0;
if (seg->area_len == area_reduction)
seg_type(seg, s) = AREA_UNASSIGNED;
return 1;
}
if (lv_is_mirror_image(lv) ||
lv_is_thin_pool_data(lv) ||
lv_is_vdo_pool_data(lv) ||
lv_is_cache_pool_data(lv)) {
if (!lv_reduce(lv, area_reduction))
return_0; /* FIXME: any upper level reporting */
return 1;
}
if (seg_is_cache_pool(seg) &&
!dm_list_empty(&seg->lv->segs_using_this_lv)) {
if (!(cache_seg = get_only_segment_using_this_lv(seg->lv)))
return_0;
if (!lv_cache_remove(cache_seg->lv))
return_0;
}
if (lv_is_raid_image(lv)) {
/* Calculate the amount of extents to reduce per rmeta/rimage LV */
uint32_t rimage_extents;
struct lv_segment *seg1 = first_seg(lv);
/* FIXME: avoid extra seg_is_*() conditionals here */
rimage_extents = raid_rimage_extents(seg1->segtype, area_reduction,
seg_is_any_raid0(seg) ? 0 : _raid_stripes_count(seg),
seg_is_raid10(seg) ? 1 :_raid_data_copies(seg));
if (!rimage_extents)
return 0;
if (seg->meta_areas) {
uint32_t meta_area_reduction;
struct logical_volume *mlv;
struct volume_group *vg = lv->vg;
if (seg_metatype(seg, s) != AREA_LV ||
!(mlv = seg_metalv(seg, s)))
return 0;
meta_area_reduction = raid_rmeta_extents_delta(vg->cmd, lv->le_count, lv->le_count - rimage_extents,
seg->region_size, vg->extent_size);
/* Limit for raid0_meta not having region size set */
if (meta_area_reduction > mlv->le_count ||
!(lv->le_count - rimage_extents))
meta_area_reduction = mlv->le_count;
if (meta_area_reduction &&
!lv_reduce(mlv, meta_area_reduction))
return_0; /* FIXME: any upper level reporting */
}
if (!lv_reduce(lv, rimage_extents))
return_0; /* FIXME: any upper level reporting */
return 1;
}
if (area_reduction == seg->area_len) {
log_very_verbose("Remove %s:" FMTu32 "[" FMTu32 "] from "
"the top of LV %s:" FMTu32 ".",
display_lvname(seg->lv), seg->le, s,
display_lvname(lv), seg_le(seg, s));
if (!remove_seg_from_segs_using_this_lv(lv, seg))
return_0;
seg_lv(seg, s) = NULL;
seg_le(seg, s) = 0;
seg_type(seg, s) = AREA_UNASSIGNED;
}
/* When removed last VDO user automatically removes VDO pool */
if (lv_is_vdo_pool(lv) && dm_list_empty(&(lv->segs_using_this_lv))) {
struct volume_group *vg = lv->vg;
if (!lv_remove(lv)) /* FIXME: any upper level reporting */
return_0;
if (vg_is_shared(vg)) {
if (!lockd_lv_name(vg->cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args, "un", LDLV_PERSISTENT))
log_error("Failed to unlock vdo pool in lvmlockd.");
lockd_free_lv(vg->cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args);
}
return 1;
}
return 1;
}
int release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction)
{
return _release_and_discard_lv_segment_area(seg, s, area_reduction, 1);
}
int release_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction)
{
return _release_and_discard_lv_segment_area(seg, s, area_reduction, 0);
}
/*
* Move a segment area from one segment to another
*/
int move_lv_segment_area(struct lv_segment *seg_to, uint32_t area_to,
struct lv_segment *seg_from, uint32_t area_from)
{
struct physical_volume *pv;
struct logical_volume *lv;
uint32_t pe, le;
switch (seg_type(seg_from, area_from)) {
case AREA_PV:
pv = seg_pv(seg_from, area_from);
pe = seg_pe(seg_from, area_from);
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
return_0;
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
return_0;
if (!set_lv_segment_area_pv(seg_to, area_to, pv, pe))
return_0;
break;
case AREA_LV:
lv = seg_lv(seg_from, area_from);
le = seg_le(seg_from, area_from);
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
return_0;
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
return_0;
if (!set_lv_segment_area_lv(seg_to, area_to, lv, le, 0))
return_0;
break;
case AREA_UNASSIGNED:
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
return_0;
}
return 1;
}
/*
* Link part of a PV to an LV segment.
*/
int set_lv_segment_area_pv(struct lv_segment *seg, uint32_t area_num,
struct physical_volume *pv, uint32_t pe)
{
seg->areas[area_num].type = AREA_PV;
if (!(seg_pvseg(seg, area_num) =
assign_peg_to_lvseg(pv, pe, seg->area_len, seg, area_num)))
return_0;
return 1;
}
/*
* Link one LV segment to another. Assumes sizes already match.
*/
int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
struct logical_volume *lv, uint32_t le,
uint64_t status)
{
log_very_verbose("Stack %s:" FMTu32 "[" FMTu32 "] on LV %s:" FMTu32 ".",
display_lvname(seg->lv), seg->le, area_num,
display_lvname(lv), le);
if (area_num >= seg->area_count) {
log_error(INTERNAL_ERROR "Try to set to high area number (%u >= %u) for LV %s.",
area_num, seg->area_count, display_lvname(seg->lv));
return 0;
}
lv->status |= status;
if (lv_is_raid_metadata(lv)) {
seg->meta_areas[area_num].type = AREA_LV;
seg_metalv(seg, area_num) = lv;
if (le) {
log_error(INTERNAL_ERROR "Meta le != 0.");
return 0;
}
seg_metale(seg, area_num) = 0;
} else {
seg->areas[area_num].type = AREA_LV;
seg_lv(seg, area_num) = lv;
seg_le(seg, area_num) = le;
}
if (!add_seg_to_segs_using_this_lv(lv, seg))
return_0;
return 1;
}
/*
* Prepare for adding parallel areas to an existing segment.
*/
int add_lv_segment_areas(struct lv_segment *seg, uint32_t new_area_count)
{
struct lv_segment_area *newareas;
uint32_t areas_sz = new_area_count * sizeof(*newareas);
if (!(newareas = dm_pool_zalloc(seg->lv->vg->vgmem, areas_sz))) {
log_error("Failed to allocate widened LV segment for %s.",
display_lvname(seg->lv));
return 0;
}
if (seg->area_count)
memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas));
seg->areas = newareas;
seg->area_count = new_area_count;
return 1;
}
static uint32_t _calc_area_multiple(const struct segment_type *segtype,
const uint32_t area_count,
const uint32_t stripes)
{
if (!area_count)
return 1;
/* Striped */
if (segtype_is_striped(segtype))
return area_count;
/* Parity RAID (e.g. RAID 4/5/6) */
if (segtype_is_raid(segtype) && segtype->parity_devs) {
/*
* As articulated in _alloc_init, we can tell by
* the area_count whether a replacement drive is
* being allocated; and if this is the case, then
* there is no area_multiple that should be used.
*/
if (area_count <= segtype->parity_devs)
return 1;
return area_count - segtype->parity_devs;
}
/*
* RAID10 - only has 2-way mirror right now.
* If we are to move beyond 2-way RAID10, then
* the 'stripes' argument will always need to
* be given.
*/
if (segtype_is_raid10(segtype)) {
if (!stripes)
return area_count / 2;
return stripes;
}
/* Mirrored stripes */
if (stripes)
return stripes;
/* Mirrored */
return 1;
}
/*
* Reduce the size of an lv_segment. New size can be zero.
*/
static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
{
uint32_t area_reduction, s;
uint32_t areas = (seg->area_count / (seg_is_raid10(seg) ? seg->data_copies : 1)) - seg->segtype->parity_devs;
/* Caller must ensure exact divisibility */
if (seg_is_striped(seg) || seg_is_striped_raid(seg)) {
if (reduction % areas) {
log_error("Segment extent reduction %" PRIu32
" not divisible by #stripes %" PRIu32,
reduction, seg->area_count);
return 0;
}
area_reduction = reduction / areas;
} else
area_reduction = reduction;
for (s = 0; s < seg->area_count; s++)
if (!release_and_discard_lv_segment_area(seg, s, area_reduction))
return_0;
seg->len -= reduction;
if (seg_is_raid(seg))
seg->area_len = seg->len;
else
seg->area_len -= area_reduction;
return 1;
}
/* Find the bottommost resizable LV in the stack.
* It does not matter which LV is used in this stack for cmdline tool. */
static struct logical_volume *_get_resizable_layer_lv(struct logical_volume *lv)
{
while (lv_is_cache(lv) || /* _corig */
lv_is_integrity(lv) ||
lv_is_thin_pool(lv) || /* _tdata */
lv_is_vdo_pool(lv) || /* _vdata */
lv_is_writecache(lv)) /* _worigin */
lv = seg_lv(first_seg(lv), 0); /* component-level down */
return lv;
}
/* Check if LV is component of resizable LV.
* When resize changes size of LV this also changes the size whole stack upward.
* Support syntax suggar - so user can pick any LV in stack for resize. */
static int _is_layered_lv(struct logical_volume *lv)
{
return (lv_is_cache_origin(lv) ||
lv_is_integrity_origin(lv) ||
lv_is_thin_pool_data(lv) ||
lv_is_vdo_pool_data(lv) ||
lv_is_writecache_origin(lv));
}
/* Find the topmost LV in the stack - usually such LV is visible. */
static struct logical_volume *_get_top_layer_lv(struct logical_volume *lv)
{
struct lv_segment *seg;
while (_is_layered_lv(lv)) {
if (!(seg = get_only_segment_using_this_lv(lv))) {
log_error(INTERNAL_ERROR "No single component user of logical volume %s.",
display_lvname(lv));
return NULL;
}
lv = seg->lv; /* component-level up */
}
return lv;
}
/* Handles also stacking */
static int _setup_lv_size(struct logical_volume *lv, uint32_t extents)
{
struct lv_segment *seg;
lv->le_count = extents;
lv->size = (uint64_t) extents * lv->vg->extent_size;
while (lv->size && _is_layered_lv(lv)) {
if (!(seg = get_only_segment_using_this_lv(lv)))
return_0;
seg->lv->le_count =
seg->len =
seg->area_len = lv->le_count;
seg->lv->size = lv->size;
lv = seg->lv;
}
return 1;
}
/*
* Entry point for all LV reductions in size.
*/
static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
{
struct lv_segment *seg = NULL;
uint32_t count = extents;
uint32_t reduction;
struct logical_volume *pool_lv;
struct logical_volume *external_lv = NULL;
int is_raid10 = 0;
uint32_t data_copies = 0;
struct lv_list *lvl;
int is_last_pool = lv_is_pool(lv);
if (!dm_list_empty(&lv->segments)) {
seg = first_seg(lv);
is_raid10 = seg_is_any_raid10(seg) && seg->reshape_len;
data_copies = seg->data_copies;
}
if (lv_is_merging_origin(lv)) {
log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.",
find_snapshot(lv)->lv->name, lv->name);
clear_snapshot_merge(lv);
}
dm_list_iterate_back_items(seg, &lv->segments) {
if (!count)
break;
if (seg->external_lv)
external_lv = seg->external_lv;
if (seg->len <= count) {
if (seg->merge_lv) {
log_debug_metadata("Dropping snapshot merge of removed %s to origin %s.",
seg->lv->name, seg->merge_lv->name);
clear_snapshot_merge(seg->merge_lv);
}
/* remove this segment completely */
/* FIXME Check this is safe */
if (seg->log_lv && !lv_remove(seg->log_lv))
return_0;
if (seg->metadata_lv && !lv_remove(seg->metadata_lv))
return_0;
/* Remove cache origin only when removing (not on lv_empty()) */
if (delete && seg_is_cache(seg)) {
if (lv_is_pending_delete(seg->lv)) {
/* Just dropping reference on origin when pending delete */
if (!remove_seg_from_segs_using_this_lv(seg_lv(seg, 0), seg))
return_0;
seg_lv(seg, 0) = NULL;
seg_le(seg, 0) = 0;
seg_type(seg, 0) = AREA_UNASSIGNED;
if (seg->pool_lv && !detach_pool_lv(seg))
return_0;
} else if (!lv_remove(seg_lv(seg, 0)))
return_0;
}
if (delete && seg_is_integrity(seg)) {
/* Remove integrity origin in addition to integrity layer. */
if (!lv_remove(seg_lv(seg, 0)))
return_0;
/* Remove integrity metadata. */
if (seg->integrity_meta_dev && !lv_remove(seg->integrity_meta_dev))
return_0;
}
if ((pool_lv = seg->pool_lv)) {
if (!detach_pool_lv(seg))
return_0;
/* When removing cached LV, remove pool as well */
if (seg_is_cache(seg) && !lv_remove(pool_lv))
return_0;
}
if (seg_is_thin_pool(seg)) {
/* For some segtypes the size may differ between the segment size and its layered LV
* i.e. thin-pool and tdata.
*
* This can get useful, when we will support multiple commits
* while resizing a stacked LV.
*/
if (seg->len != seg_lv(seg, 0)->le_count) {
seg->len = seg_lv(seg, 0)->le_count;
/* FIXME: ATM capture as error as it should not happen. */
log_debug(INTERNAL_ERROR "Pool size mismatched data size for %s",
display_lvname(seg->lv));
}
}
dm_list_del(&seg->list);
reduction = seg->len;
} else
reduction = count;
if (!_lv_segment_reduce(seg, reduction))
return_0;
count -= reduction;
}
if (!_setup_lv_size(lv, lv->le_count - extents * (is_raid10 ? data_copies : 1)))
return_0;
if ((seg = first_seg(lv))) {
if (is_raid10)
seg->len = seg->area_len = lv->le_count;
seg->extents_copied = seg->len;
}
if (!delete)
return 1;
if (lv == lv->vg->pool_metadata_spare_lv) {
lv->status &= ~POOL_METADATA_SPARE;
lv->vg->pool_metadata_spare_lv = NULL;
}
/* Remove the LV if it is now empty */
if (!lv->le_count && !unlink_lv_from_vg(lv))
return_0;
else if (lv->vg->fid->fmt->ops->lv_setup &&
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
return_0;
/* Removal of last user enforces refresh */
if (external_lv && !lv_is_external_origin(external_lv) &&
lv_is_active(external_lv) &&
!lv_update_and_reload(external_lv))
return_0;
/* When removing last pool, automatically drop the spare volume */
if (is_last_pool && lv->vg->pool_metadata_spare_lv) {
/* TODO: maybe use a list of pools or a counter to avoid linear search through VG */
dm_list_iterate_items(lvl, &lv->vg->lvs)
if (lv_is_thin_type(lvl->lv) ||
lv_is_cache_type(lvl->lv)) {
is_last_pool = 0;
break;
}
if (is_last_pool) {
/* This is purely internal LV volume, no question */
if (!deactivate_lv(lv->vg->cmd, lv->vg->pool_metadata_spare_lv)) {
log_error("Unable to deactivate spare logical volume %s.",
display_lvname(lv->vg->pool_metadata_spare_lv));
return 0;
}
if (!lv_remove(lv->vg->pool_metadata_spare_lv))
return_0;
}
}
return 1;
}
/*
* Empty an LV.
*/
int lv_empty(struct logical_volume *lv)
{
return _lv_reduce(lv, lv->le_count, 0);
}
/*
* Empty an LV and add error segment.
*/
int replace_lv_with_error_segment(struct logical_volume *lv)
{
uint32_t len = lv->le_count;
if (len && !lv_empty(lv))
return_0;
/* Minimum size required for a table. */
if (!len)
len = 1;
/*
* Since we are replacing the whatever-was-there with
* an error segment, we should also clear any flags
* that suggest it is anything other than "error".
*/
/* FIXME Check for other flags that need removing */
lv->status &= ~(MIRROR|MIRRORED|PVMOVE|LOCKED);
/* FIXME Check for any attached LVs that will become orphans e.g. mirror logs */
if (!lv_add_virtual_segment(lv, 0, len, get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_ERROR)))
return_0;
return 1;
}
static int _lv_refresh_suspend_resume(const struct logical_volume *lv)
{
struct cmd_context *cmd = lv->vg->cmd;
int r = 1;
if (!cmd->partial_activation && lv_is_partial(lv)) {
log_error("Refusing refresh of partial LV %s."
" Use '--activationmode partial' to override.",
display_lvname(lv));
return 0;
}
if (!suspend_lv(cmd, lv)) {
log_error("Failed to suspend %s.", display_lvname(lv));
r = 0;
}
if (!resume_lv(cmd, lv)) {
log_error("Failed to reactivate %s.", display_lvname(lv));
r = 0;
}
return r;
}
int lv_refresh_suspend_resume(const struct logical_volume *lv)
{
if (!_lv_refresh_suspend_resume(lv))
return 0;
/*
* Remove any transiently activated error
* devices which arean't used any more.
*/
if (lv_is_raid(lv) && !lv_deactivate_any_missing_subdevs(lv)) {
log_error("Failed to remove temporary SubLVs from %s", display_lvname(lv));
return 0;
}
return 1;
}
/*
* Remove given number of extents from LV.
*/
int lv_reduce(struct logical_volume *lv, uint32_t extents)
{
struct lv_segment *seg = first_seg(lv);
/* Ensure stripe boundary extents on RAID LVs */
if (lv_is_raid(lv) && extents != lv->le_count)
extents =_round_to_stripe_boundary(lv->vg, extents,
seg_is_raid1(seg) ? 0 : _raid_stripes_count(seg), 0);
if ((extents == lv->le_count) && lv_is_component(lv) && lv_is_active(lv)) {
/* When LV is removed, make sure it is inactive */
log_error(INTERNAL_ERROR "Removing still active LV %s.", display_lvname(lv));
return 0;
}
return _lv_reduce(lv, extents, 1);
}
int historical_glv_remove(struct generic_logical_volume *glv)
{
struct generic_logical_volume *origin_glv;
struct glv_list *glvl, *user_glvl;
struct historical_logical_volume *hlv;
int reconnected;
if (!glv || !glv->is_historical)
return_0;
hlv = glv->historical;
if (!(glv = find_historical_glv(hlv->vg, hlv->name, 0, &glvl))) {
if (!(find_historical_glv(hlv->vg, hlv->name, 1, NULL))) {
log_error(INTERNAL_ERROR "historical_glv_remove: historical LV %s/-%s not found ",
hlv->vg->name, hlv->name);
return 0;
}
log_verbose("Historical LV %s/-%s already on removed list ",
hlv->vg->name, hlv->name);
return 1;
}
if ((origin_glv = hlv->indirect_origin) &&
!remove_glv_from_indirect_glvs(origin_glv, glv))
return_0;
dm_list_iterate_items(user_glvl, &hlv->indirect_glvs) {
reconnected = 0;
if ((origin_glv && !origin_glv->is_historical) && !user_glvl->glv->is_historical)
log_verbose("Removing historical connection between %s and %s.",
origin_glv->live->name, user_glvl->glv->live->name);
else if (hlv->vg->cmd->record_historical_lvs) {
if (!add_glv_to_indirect_glvs(hlv->vg->vgmem, origin_glv, user_glvl->glv))
return_0;
reconnected = 1;
}
if (!reconnected) {
/*
* Break ancestry chain if we're removing historical LV and tracking
* historical LVs is switched off either via:
* - "metadata/record_lvs_history=0" config
* - "--nohistory" cmd line option
*
* Also, break the chain if we're unable to store such connection at all
* because we're removing the very last historical LV that was in between
* live LVs - pure live LVs can't store any indirect origin relation in
* metadata - we need at least one historical LV to do that!
*/
if (user_glvl->glv->is_historical)
user_glvl->glv->historical->indirect_origin = NULL;
else
first_seg(user_glvl->glv->live)->indirect_origin = NULL;
}
}
dm_list_move(&hlv->vg->removed_historical_lvs, &glvl->list);
return 1;
}
/*
* Completely remove an LV.
*/
int lv_remove(struct logical_volume *lv)
{
if (lv_is_historical(lv))
return historical_glv_remove(lv->this_glv);
if (!lv_reduce(lv, lv->le_count))
return_0;
return 1;
}
/*
* A set of contiguous physical extents allocated
*/
struct alloced_area {
struct dm_list list;
struct physical_volume *pv;
uint32_t pe;
uint32_t len;
};
/*
* Details of an allocation attempt
*/
struct alloc_handle {
struct cmd_context *cmd;
struct dm_pool *mem;
alloc_policy_t alloc; /* Overall policy */
int approx_alloc; /* get as much as possible up to new_extents */
uint32_t new_extents; /* Number of new extents required */
uint32_t area_count; /* Number of parallel areas */
uint32_t parity_count; /* Adds to area_count, but not area_multiple */
uint32_t area_multiple; /* seg->len = area_len * area_multiple */
uint32_t log_area_count; /* Number of parallel logs */
uint32_t metadata_area_count; /* Number of parallel metadata areas */
uint32_t log_len; /* Length of log/metadata_area */
uint32_t region_size; /* Mirror region size */
uint32_t total_area_len; /* Total number of parallel extents */
unsigned maximise_cling;
unsigned mirror_logs_separate; /* Force mirror logs on separate PVs? */
/*
* RAID devices require a metadata area that accompanies each
* device. During initial creation, it is best to look for space
* that is new_extents + log_len and then split that between two
* allocated areas when found. 'alloc_and_split_meta' indicates
* that this is the desired dynamic.
*
* This same idea is used by cache LVs to get the metadata device
* and data device allocated together.
*/
unsigned alloc_and_split_meta;
unsigned split_metadata_is_allocated; /* Metadata has been allocated */
const struct dm_config_node *cling_tag_list_cn;
struct dm_list *parallel_areas; /* PVs to avoid */
/*
* Contains area_count lists of areas allocated to data stripes
* followed by log_area_count lists of areas allocated to log stripes.
*/
struct dm_list alloced_areas[];
};
/*
* Returns log device size in extents, algorithm from kernel code
*/
#define BYTE_SHIFT 3
static uint32_t _mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len)
{
uint64_t area_size, region_count, bitset_size, log_size;
area_size = (uint64_t) area_len * pe_size;
region_count = dm_div_up(area_size, region_size);
/* Work out how many "unsigned long"s we need to hold the bitset. */
bitset_size = dm_round_up(region_count, sizeof(uint32_t) << BYTE_SHIFT);
bitset_size >>= BYTE_SHIFT;
/* Log device holds both header and bitset. */
log_size = dm_round_up((MIRROR_LOG_OFFSET << SECTOR_SHIFT) + bitset_size, 1 << SECTOR_SHIFT);
log_size >>= SECTOR_SHIFT;
log_size = dm_div_up(log_size, pe_size);
if (log_size > UINT32_MAX) {
log_error("Log size needs too many extents "FMTu64" with region size of %u sectors.",
log_size, region_size);
log_size = UINT32_MAX;
/* VG likely will not have enough free space for this allocation -> error */
}
return (uint32_t) log_size;
}
/* Is there enough total space or should we give up immediately? */
static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms,
uint32_t allocated, uint32_t log_still_needed,
uint32_t extents_still_needed)
{
uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple;
uint32_t metadata_extents_needed = (ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * RAID_METADATA_AREA_LEN) +
(log_still_needed ? ah->log_len : 0); /* One each */
uint64_t total_extents_needed = (uint64_t)area_extents_needed + parity_extents_needed + metadata_extents_needed;
uint32_t free_pes = pv_maps_size(pvms);
if (total_extents_needed > free_pes) {
log_error("Insufficient free space: %" PRIu64 " extents needed,"
" but only %" PRIu32 " available",
total_extents_needed, free_pes);
return 0;
}
return 1;
}
/* For striped mirrors, all the areas are counted, through the mirror layer */
static uint32_t _stripes_per_mimage(struct lv_segment *seg)
{
struct lv_segment *last_lvseg;
if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) {
last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment);
if (seg_is_striped(last_lvseg))
return last_lvseg->area_count;
}
return 1;
}
static void _init_alloc_parms(struct alloc_handle *ah,
struct alloc_parms *alloc_parms,
alloc_policy_t alloc,
struct lv_segment *prev_lvseg, unsigned can_split,
uint32_t allocated, uint32_t extents_still_needed)
{
alloc_parms->alloc = alloc;
alloc_parms->prev_lvseg = prev_lvseg;
alloc_parms->flags = 0;
alloc_parms->extents_still_needed = extents_still_needed;
/*
* Only attempt contiguous/cling allocation to previous segment
* areas if the number of areas matches.
*/
if (alloc_parms->prev_lvseg &&
((ah->area_count + ah->parity_count) == prev_lvseg->area_count)) {
alloc_parms->flags |= A_AREA_COUNT_MATCHES;
/* Are there any preceding segments we must follow on from? */
if (alloc_parms->alloc == ALLOC_CONTIGUOUS) {
alloc_parms->flags |= A_CONTIGUOUS_TO_LVSEG;
alloc_parms->flags |= A_POSITIONAL_FILL;
} else if ((alloc_parms->alloc == ALLOC_CLING) ||
(alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
alloc_parms->flags |= A_CLING_TO_LVSEG;
alloc_parms->flags |= A_POSITIONAL_FILL;
}
} else
/*
* A cling allocation that follows a successful contiguous
* allocation must use the same PVs (or else fail).
*/
if ((alloc_parms->alloc == ALLOC_CLING) ||
(alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
alloc_parms->flags |= A_CLING_TO_ALLOCED;
alloc_parms->flags |= A_POSITIONAL_FILL;
}
if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)
alloc_parms->flags |= A_CLING_BY_TAGS;
if (!(alloc_parms->alloc & A_POSITIONAL_FILL) &&
(alloc_parms->alloc == ALLOC_CONTIGUOUS) &&
ah->cling_tag_list_cn)
alloc_parms->flags |= A_PARTITION_BY_TAGS;
/*
* For normal allocations, if any extents have already been found
* for allocation, prefer to place further extents on the same disks as
* have already been used.
*/
if (ah->maximise_cling &&
(alloc_parms->alloc == ALLOC_NORMAL) &&
(allocated != alloc_parms->extents_still_needed))
alloc_parms->flags |= A_CLING_TO_ALLOCED;
if (can_split)
alloc_parms->flags |= A_CAN_SPLIT;
}
static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
uint32_t area_count,
uint32_t stripe_size,
const struct segment_type *segtype,
struct alloced_area *aa,
uint32_t region_size)
{
uint32_t s, extents, area_multiple;
struct lv_segment *seg;
area_multiple = _calc_area_multiple(segtype, area_count, 0);
extents = aa[0].len * area_multiple;
if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0,
status, stripe_size, NULL,
area_count,
aa[0].len, 0, 0u, region_size, 0u, NULL))) {
log_error("Couldn't allocate new LV segment.");
return 0;
}
for (s = 0; s < area_count; s++)
if (!set_lv_segment_area_pv(seg, s, aa[s].pv, aa[s].pe))
return_0;
dm_list_add(&lv->segments, &seg->list);
extents = aa[0].len * area_multiple;
if (!_setup_lv_size(lv, lv->le_count + extents))
return_0;
return 1;
}
static int _setup_alloced_segments(struct logical_volume *lv,
struct dm_list *alloced_areas,
uint32_t area_count,
uint64_t status,
uint32_t stripe_size,
const struct segment_type *segtype,
uint32_t region_size)
{
struct alloced_area *aa;
dm_list_iterate_items(aa, &alloced_areas[0]) {
if (!_setup_alloced_segment(lv, status, area_count,
stripe_size, segtype, aa,
region_size))
return_0;
}
return 1;
}
/*
* This function takes a list of pv_areas and adds them to allocated_areas.
* If the complete area is not needed then it gets split.
* The part used is removed from the pv_map so it can't be allocated twice.
*/
static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocate,
struct alloc_state *alloc_state, uint32_t ix_log_offset)
{
uint32_t area_len, len;
uint32_t s, smeta;
uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */
uint32_t total_area_count;
struct alloced_area *aa;
struct pv_area *pva;
total_area_count = ah->area_count + ah->parity_count + alloc_state->log_area_count_still_needed;
if (!total_area_count) {
log_warn(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do.");
return 1;
}
area_len = max_to_allocate / ah->area_multiple;
/* Reduce area_len to the smallest of the areas */
for (s = 0; s < ah->area_count + ah->parity_count; s++)
if (area_len > alloc_state->areas[s].used)
area_len = alloc_state->areas[s].used;
len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? total_area_count * 2 : total_area_count;
len *= sizeof(*aa);
if (!(aa = dm_pool_alloc(ah->mem, len))) {
log_error("alloced_area allocation failed");
return 0;
}
/*
* Areas consists of area_count areas for data stripes, then
* ix_log_skip areas to skip, then log_area_count areas to use for the
* log, then some areas too small for the log.
*/
len = area_len;
for (s = 0; s < total_area_count; s++) {
if (s == (ah->area_count + ah->parity_count)) {
ix_log_skip = ix_log_offset - ah->area_count;
len = ah->log_len;
}
pva = alloc_state->areas[s + ix_log_skip].pva;
if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) {
/*
* The metadata area goes at the front of the allocated
* space for now, but could easily go at the end (or
* middle!).
*
* Even though we split these two from the same
* allocation, we store the images at the beginning
* of the areas array and the metadata at the end.
*/
smeta = s + ah->area_count + ah->parity_count;
aa[smeta].pv = pva->map->pv;
aa[smeta].pe = pva->start;
aa[smeta].len = ah->log_len;
if (aa[smeta].len > pva->count) {
log_error("Metadata does not fit on a single PV.");
return 0;
}
log_debug_alloc("Allocating parallel metadata area %" PRIu32
" on %s start PE %" PRIu32
" length %" PRIu32 ".",
(smeta - (ah->area_count + ah->parity_count)),
pv_dev_name(aa[smeta].pv), aa[smeta].pe,
aa[smeta].len);
consume_pv_area(pva, aa[smeta].len);
dm_list_add(&ah->alloced_areas[smeta], &aa[smeta].list);
}
aa[s].len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? len - ah->log_len : len;
/* Skip empty allocations */
if (!aa[s].len)
continue;
aa[s].pv = pva->map->pv;
aa[s].pe = pva->start;
log_debug_alloc("Allocating parallel area %" PRIu32
" on %s start PE %" PRIu32 " length %" PRIu32 ".",
s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len);
consume_pv_area(pva, aa[s].len);
dm_list_add(&ah->alloced_areas[s], &aa[s].list);
}
/* Only need to alloc metadata from the first batch */
if (ah->alloc_and_split_meta)
ah->split_metadata_is_allocated = 1;
ah->total_area_len += area_len;
alloc_state->allocated += area_len * ah->area_multiple;
return 1;
}
/*
* Call fn for each AREA_PV used by the LV segment at lv:le of length *max_seg_len.
* If any constituent area contains more than one segment, max_seg_len is
* reduced to cover only the first.
* fn should return 0 on error, 1 to continue scanning or >1 to terminate without error.
* In the last case, this function passes on the return code.
* FIXME I think some callers are expecting this to check all PV segments used by an LV.
*/
static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
uint32_t le, uint32_t len, struct lv_segment *seg,
uint32_t *max_seg_len,
uint32_t first_area, uint32_t max_areas,
int top_level_area_index,
int only_single_area_segments,
int (*fn)(struct cmd_context *cmd,
struct pv_segment *peg, uint32_t s,
void *data),
void *data)
{
uint32_t s;
uint32_t remaining_seg_len, area_len, area_multiple;
uint32_t stripes_per_mimage = 1;
int r = 1;
if (!seg && !(seg = find_seg_by_le(lv, le))) {
log_error("Failed to find segment for %s extent %" PRIu32,
lv->name, le);
return 0;
}
/* Remaining logical length of segment */
remaining_seg_len = seg->len - (le - seg->le);
if (remaining_seg_len > len)
remaining_seg_len = len;
if (max_seg_len && *max_seg_len > remaining_seg_len)
*max_seg_len = remaining_seg_len;
area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0);
area_len = (remaining_seg_len / area_multiple) ? : 1;
/* For striped mirrors, all the areas are counted, through the mirror layer */
if (top_level_area_index == -1)
stripes_per_mimage = _stripes_per_mimage(seg);
for (s = first_area;
s < seg->area_count && (!max_areas || s <= max_areas);
s++) {
if (seg_type(seg, s) == AREA_LV) {
if (!(r = _for_each_pv(cmd, seg_lv(seg, s),
seg_le(seg, s) +
(le - seg->le) / area_multiple,
area_len, NULL, max_seg_len, 0,
(stripes_per_mimage == 1) && only_single_area_segments ? 1U : 0U,
(top_level_area_index != -1) ? top_level_area_index : (int) (s * stripes_per_mimage),
only_single_area_segments, fn,
data)))
stack;
} else if (seg_type(seg, s) == AREA_PV)
if (!(r = fn(cmd, seg_pvseg(seg, s), top_level_area_index != -1 ? (uint32_t) top_level_area_index + s : s, data)))
stack;
if (r != 1)
return r;
}
/* FIXME only_single_area_segments used as workaround to skip log LV - needs new param? */
if (!only_single_area_segments && seg_is_mirrored(seg) && seg->log_lv) {
if (!(r = _for_each_pv(cmd, seg->log_lv, 0, seg->log_lv->le_count, NULL,
NULL, 0, 0, 0, only_single_area_segments,
fn, data)))
stack;
if (r != 1)
return r;
}
/* FIXME Add snapshot cow, thin meta etc. */
/*
if (!only_single_area_segments && !max_areas && seg_is_raid(seg)) {
for (s = first_area; s < seg->area_count; s++) {
if (seg_metalv(seg, s))
if (!(r = _for_each_pv(cmd, seg_metalv(seg, s), 0, seg_metalv(seg, s)->le_count, NULL,
NULL, 0, 0, 0, 0, fn, data)))
stack;
if (r != 1)
return r;
}
}
*/
return 1;
}
static int _comp_area(const void *l, const void *r)
{
const struct pv_area_used *lhs = (const struct pv_area_used *) l;
const struct pv_area_used *rhs = (const struct pv_area_used *) r;
if (lhs->used < rhs->used)
return 1;
if (lhs->used > rhs->used)
return -1;
return 0;
}
/*
* Search for pvseg that matches condition
*/
struct pv_match {
int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva);
struct alloc_handle *ah;
struct alloc_state *alloc_state;
struct pv_area *pva;
const struct dm_config_node *cling_tag_list_cn;
int s; /* Area index of match */
};
/*
* Is PV area on the same PV?
*/
static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
{
if (pvseg->pv != pva->map->pv)
return 0;
return 1;
}
/*
* Does PV area have a tag listed in allocation/cling_tag_list that
* matches EITHER a tag of the PV of the existing segment OR a tag in pv_tags?
* If mem is set, then instead we append a list of matching tags for printing to the object there.
*/
static int _match_pv_tags(const struct dm_config_node *cling_tag_list_cn,
struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
struct physical_volume *pv2, struct dm_list *pv_tags, unsigned validate_only,
struct dm_pool *mem, unsigned parallel_pv)
{
const struct dm_config_value *cv;
const char *str;
const char *tag_matched;
struct dm_list *tags_to_match = mem ? NULL : pv_tags ? : &pv2->tags;
struct dm_str_list *sl;
unsigned first_tag = 1;
for (cv = cling_tag_list_cn->v; cv; cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
if (validate_only)
log_warn("WARNING: Ignoring invalid string in config file entry "
"allocation/cling_tag_list");
continue;
}
str = cv->v.str;
if (!*str) {
if (validate_only)
log_warn("WARNING: Ignoring empty string in config file entry "
"allocation/cling_tag_list");
continue;
}
if (*str != '@') {
if (validate_only)
log_warn("WARNING: Ignoring string not starting with @ in config file entry "
"allocation/cling_tag_list: %s", str);
continue;
}
str++;
if (!*str) {
if (validate_only)
log_warn("WARNING: Ignoring empty tag in config file entry "
"allocation/cling_tag_list");
continue;
}
if (validate_only)
continue;
/* Wildcard matches any tag against any tag. */
if (!strcmp(str, "*")) {
if (mem) {
dm_list_iterate_items(sl, &pv1->tags) {
if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
log_error("PV tags string extension failed.");
return 0;
}
first_tag = 0;
if (!dm_pool_grow_object(mem, sl->str, 0)) {
log_error("PV tags string extension failed.");
return 0;
}
}
continue;
}
if (!str_list_match_list(&pv1->tags, tags_to_match, &tag_matched))
continue;
if (!pv_tags) {
if (parallel_pv)
log_debug_alloc("Not using free space on %s: Matched allocation PV tag %s on existing parallel PV %s.",
pv_dev_name(pv1), tag_matched, pv2 ? pv_dev_name(pv2) : "-");
else
log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
tag_matched, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
} else
log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
" from consideration: PV tag %s already used.",
area_num, pv_dev_name(pv1), pv1_start_pe, tag_matched);
return 1;
}
if (!str_list_match_item(&pv1->tags, str) ||
(tags_to_match && !str_list_match_item(tags_to_match, str)))
continue;
if (mem) {
if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
log_error("PV tags string extension failed.");
return 0;
}
first_tag = 0;
if (!dm_pool_grow_object(mem, str, 0)) {
log_error("PV tags string extension failed.");
return 0;
}
continue;
}
if (!pv_tags) {
if (parallel_pv)
log_debug_alloc("Not using free space on %s: Matched allocation PV tag %s on existing parallel PV %s.",
pv2 ? pv_dev_name(pv2) : "-", str, pv_dev_name(pv1));
else
log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
str, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
} else
log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
" from consideration: PV tag %s already used.",
area_num, pv_dev_name(pv1), pv1_start_pe, str);
return 1;
}
if (mem)
return 1;
return 0;
}
static int _validate_tag_list(const struct dm_config_node *cling_tag_list_cn)
{
return _match_pv_tags(cling_tag_list_cn, NULL, 0, 0, NULL, NULL, 1, NULL, 0);
}
static int _tags_list_str(struct dm_pool *mem, struct physical_volume *pv1, const struct dm_config_node *cling_tag_list_cn)
{
if (!_match_pv_tags(cling_tag_list_cn, pv1, 0, 0, NULL, NULL, 0, mem, 0)) {
dm_pool_abandon_object(mem);
return_0;
}
return 1;
}
/*
* Does PV area have a tag listed in allocation/cling_tag_list that
* matches a tag in the pv_tags list?
*/
static int _pv_has_matching_tag(const struct dm_config_node *cling_tag_list_cn,
struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
struct dm_list *pv_tags)
{
return _match_pv_tags(cling_tag_list_cn, pv1, pv1_start_pe, area_num, NULL, pv_tags, 0, NULL, 0);
}
/*
* Does PV area have a tag listed in allocation/cling_tag_list that
* matches a tag of the PV of the existing segment?
*/
static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn,
struct physical_volume *pv1, struct physical_volume *pv2,
unsigned parallel_pv)
{
return _match_pv_tags(cling_tag_list_cn, pv1, 0, 0, pv2, NULL, 0, NULL, parallel_pv);
}
static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva)
{
return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv, 0);
}
static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas,
const struct dm_config_node *cling_tag_list_cn)
{
struct seg_pvs *spvs;
struct pv_list *pvl;
char *pvnames;
unsigned first;
if (!parallel_areas)
return 1;
dm_list_iterate_items(spvs, parallel_areas) {
first = 1;
if (!dm_pool_begin_object(mem, 256)) {
log_error("dm_pool_begin_object failed");
return 0;
}
dm_list_iterate_items(pvl, &spvs->pvs) {
if (!first && !dm_pool_grow_object(mem, " ", 1)) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
if (cling_tag_list_cn) {
if (!dm_pool_grow_object(mem, "(", 1)) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
if (!_tags_list_str(mem, pvl->pv, cling_tag_list_cn)) {
dm_pool_abandon_object(mem);
return_0;
}
if (!dm_pool_grow_object(mem, ")", 1)) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
}
first = 0;
}
if (!dm_pool_grow_object(mem, "\0", 1)) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
pvnames = dm_pool_end_object(mem);
log_debug_alloc("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s",
spvs->le, spvs->len, pvnames);
dm_pool_free(mem, pvnames);
}
return 1;
}
/*
* Is PV area contiguous to PV segment?
*/
static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
{
if (pvseg->pv != pva->map->pv)
return 0;
if (pvseg->pe + pvseg->len != pva->start)
return 0;
return 1;
}
static int _reserve_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
uint32_t required, uint32_t ix_pva, uint32_t unreserved)
{
struct pv_area_used *area_used = &alloc_state->areas[ix_pva];
const char *pv_tag_list = NULL;
if (ah->cling_tag_list_cn) {
if (!dm_pool_begin_object(ah->mem, 256)) {
log_error("PV tags string allocation failed.");
return 0;
} else if (!_tags_list_str(ah->mem, pva->map->pv, ah->cling_tag_list_cn))
dm_pool_abandon_object(ah->mem);
else if (!dm_pool_grow_object(ah->mem, "\0", 1)) {
dm_pool_abandon_object(ah->mem);
log_error("PV tags string extension failed.");
return 0;
} else
pv_tag_list = dm_pool_end_object(ah->mem);
}
log_debug_alloc("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
" length %" PRIu32 " leaving %" PRIu32 "%s%s.",
area_used->pva ? "Changing " : "Considering",
ix_pva, area_used->pva ? "to" : "as",
dev_name(pva->map->pv->dev), pva->start, required, unreserved,
pv_tag_list ? " with PV tags: " : "",
pv_tag_list ? : "");
if (pv_tag_list)
dm_pool_free(ah->mem, (void *)pv_tag_list);
area_used->pva = pva;
area_used->used = required;
return 1;
}
static int _reserve_required_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
uint32_t required, uint32_t ix_pva, uint32_t unreserved)
{
uint32_t s;
struct pv_area_used *new_state;
/* Expand areas array if needed after an area was split. */
if (ix_pva >= alloc_state->areas_size) {
alloc_state->areas_size *= 2;
if (!(new_state = realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
log_error("Memory reallocation for parallel areas failed.");
return 0;
}
alloc_state->areas = new_state;
for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
alloc_state->areas[s].pva = NULL;
}
if (!_reserve_area(ah, alloc_state, pva, required, ix_pva, unreserved))
return_0;
return 1;
}
static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
struct pv_segment *pvseg, uint32_t s,
void *data)
{
struct pv_match *pvmatch = data;
int positional = pvmatch->alloc_state->alloc_parms->flags & A_POSITIONAL_FILL;
if (positional && pvmatch->alloc_state->areas[s].pva)
return 1; /* Area already assigned */
if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
return 1; /* Continue */
if (positional && (s >= pvmatch->alloc_state->num_positional_areas))
return 1;
/* FIXME The previous test should make this one redundant. */
if (positional && (s >= pvmatch->alloc_state->areas_size))
return 1;
/*
* Only used for cling and contiguous policies (which only make one allocation per PV)
* so it's safe to say all the available space is used.
*/
if (positional &&
!_reserve_required_area(pvmatch->ah, pvmatch->alloc_state, pvmatch->pva, pvmatch->pva->count, s, 0))
return_0;
return 2; /* Finished */
}
/*
* Is pva on same PV as any existing areas?
*/
static int _check_cling(struct alloc_handle *ah,
const struct dm_config_node *cling_tag_list_cn,
struct lv_segment *prev_lvseg, struct pv_area *pva,
struct alloc_state *alloc_state)
{
struct pv_match pvmatch;
int r;
uint32_t le, len;
pvmatch.ah = ah;
pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
pvmatch.alloc_state = alloc_state;
pvmatch.pva = pva;
pvmatch.cling_tag_list_cn = cling_tag_list_cn;
if (ah->maximise_cling) {
/* Check entire LV */
le = 0;
len = prev_lvseg->le + prev_lvseg->len;
} else {
/* Only check 1 LE at end of previous LV segment */
le = prev_lvseg->le + prev_lvseg->len - 1;
len = 1;
}
/* FIXME Cope with stacks by flattening */
if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, le, len, NULL, NULL,
0, 0, -1, 1,
_is_condition, &pvmatch)))
stack;
if (r != 2)
return 0;
return 1;
}
/*
* Is pva contiguous to any existing areas or on the same PV?
*/
static int _check_contiguous(struct alloc_handle *ah,
struct lv_segment *prev_lvseg, struct pv_area *pva,
struct alloc_state *alloc_state)
{
struct pv_match pvmatch;
int r;
pvmatch.ah = ah;
pvmatch.condition = _is_contiguous;
pvmatch.alloc_state = alloc_state