1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00
lvm2/lib/raid/raid.c
Zdenek Kabelac 9a371639fc cleanup: relocate section
Move _features[] structure from .data.rel.ro to .rodata.
2024-05-27 15:16:26 +02:00

735 lines
20 KiB
C

/*
* Copyright (C) 2011-2017 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "base/memory/zalloc.h"
#include "lib/misc/lib.h"
#include "lib/metadata/segtype.h"
#include "lib/display/display.h"
#include "lib/format_text/text_export.h"
#include "lib/config/config.h"
#include "lib/datastruct/str_list.h"
#include "lib/activate/targets.h"
#include "lib/misc/lvm-string.h"
#include "lib/activate/activate.h"
#include "lib/metadata/metadata.h"
#include "lib/metadata/lv_alloc.h"
static void _raid_display(const struct lv_segment *seg)
{
unsigned s;
for (s = 0; s < seg->area_count; ++s) {
log_print(" Raid Data LV%2d", s);
display_stripe(seg, s, " ");
}
if (seg->meta_areas)
for (s = 0; s < seg->area_count; ++s)
if (seg_metalv(seg, s))
log_print(" Raid Metadata LV%2d\t%s", s, seg_metalv(seg, s)->name);
log_print(" ");
}
static int _raid_text_import_area_count(const struct dm_config_node *sn,
uint32_t *area_count)
{
uint32_t stripe_count = 0, device_count = 0;
int stripe_count_found, device_count_found;
device_count_found = dm_config_get_uint32(sn, "device_count", &device_count);
stripe_count_found = dm_config_get_uint32(sn, "stripe_count", &stripe_count);
if (!device_count_found && !stripe_count_found) {
log_error("Couldn't read 'device_count' or 'stripe_count' for "
"segment '%s'.", dm_config_parent_name(sn));
return 0;
}
if (device_count_found && stripe_count_found) {
log_error("Only one of 'device_count' and 'stripe_count' allowed for "
"segment '%s'.", dm_config_parent_name(sn));
return 0;
}
*area_count = stripe_count + device_count;
return 1;
}
static int _raid_text_import_areas(struct lv_segment *seg,
const struct dm_config_node *sn,
const struct dm_config_value *cv)
{
unsigned int s;
struct logical_volume *lv;
const char *seg_name = dm_config_parent_name(sn);
if (!seg->area_count) {
log_error("No areas found for segment %s", seg_name);
return 0;
}
for (s = 0; cv && s < seg->area_count; s++, cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
log_error("Bad volume name in areas array for segment %s.", seg_name);
return 0;
}
/* Metadata device comes first. */
if (!(lv = find_lv(seg->lv->vg, cv->v.str))) {
log_error("Couldn't find volume '%s' for segment '%s'.",
cv->v.str ? : "NULL", seg_name);
return 0;
}
if (strstr(lv->name, "_rmeta_")) {
if (!set_lv_segment_area_lv(seg, s, lv, 0, RAID_META))
return_0;
cv = cv->next;
}
if (!cv) {
log_error("Missing data device in areas array for segment %s.", seg_name);
return 0;
}
/* Data device comes second */
if (!(lv = find_lv(seg->lv->vg, cv->v.str))) {
log_error("Couldn't find volume '%s' for segment '%s'.",
cv->v.str ? : "NULL", seg_name);
return 0;
}
if (!set_lv_segment_area_lv(seg, s, lv, 0, RAID_IMAGE))
return_0;
}
/*
* Check we read the correct number of RAID data/meta pairs.
*/
if (cv || (s < seg->area_count)) {
log_error("Incorrect number of areas in area array "
"for segment '%s'.", seg_name);
return 0;
}
return 1;
}
static int _raid_text_import(struct lv_segment *seg,
const struct dm_config_node *sn,
struct dm_hash_table *pv_hash)
{
const struct dm_config_value *cv;
const struct {
const char *name;
uint32_t *var;
} raid_attr_import[] = {
{ "region_size", &seg->region_size },
{ "stripe_size", &seg->stripe_size },
{ "data_copies", &seg->data_copies },
{ "writebehind", &seg->writebehind },
{ "min_recovery_rate", &seg->min_recovery_rate },
{ "max_recovery_rate", &seg->max_recovery_rate },
{ "data_offset", &seg->data_offset },
}, *aip = raid_attr_import;
unsigned i;
for (i = 0; i < DM_ARRAY_SIZE(raid_attr_import); i++, aip++) {
if (dm_config_has_node(sn, aip->name)) {
if (!dm_config_get_uint32(sn, aip->name, aip->var)) {
if (!strcmp(aip->name, "data_copies") ||
!strcmp(aip->name, "data_offset")) {
*aip->var = 0;
continue;
}
log_error("Couldn't read '%s' for segment %s of logical volume %s.",
aip->name, dm_config_parent_name(sn), seg->lv->name);
return 0;
}
if (!strcmp(aip->name, "data_offset") && !*aip->var)
*aip->var = 1;
}
}
if (!dm_config_get_list(sn, seg_is_raid0(seg) ? "raid0_lvs" : "raids", &cv)) {
log_error("Couldn't find RAID array for "
"segment %s of logical volume %s.",
dm_config_parent_name(sn), seg->lv->name);
return 0;
}
if (!_raid_text_import_areas(seg, sn, cv)) {
log_error("Failed to import RAID component pairs.");
return 0;
}
if (seg->data_copies < 2)
seg->data_copies = lv_raid_data_copies(seg->segtype, seg->area_count);
if (seg_is_any_raid0(seg))
seg->area_len /= seg->area_count;
return 1;
}
static int _raid_text_export_raid0(const struct lv_segment *seg, struct formatter *f)
{
outf(f, "stripe_count = %u", seg->area_count);
if (seg->stripe_size)
outf(f, "stripe_size = %" PRIu32, seg->stripe_size);
return out_areas(f, seg, seg_is_raid0(seg) ? "raid0_lv" : "raid");
}
static int _raid_text_export_raid(const struct lv_segment *seg, struct formatter *f)
{
int raid0 = seg_is_any_raid0(seg);
if (raid0)
outfc(f, (seg->area_count == 1) ? "# linear" : NULL,
"stripe_count = %u", seg->area_count);
else {
outf(f, "device_count = %u", seg->area_count);
if (seg_is_any_raid10(seg) && seg->data_copies > 0)
outf(f, "data_copies = %" PRIu32, seg->data_copies);
if (seg->region_size)
outf(f, "region_size = %" PRIu32, seg->region_size);
}
if (seg->stripe_size)
outf(f, "stripe_size = %" PRIu32, seg->stripe_size);
if (!raid0) {
if (seg_is_raid1(seg) && seg->writebehind)
outf(f, "writebehind = %" PRIu32, seg->writebehind);
if (seg->min_recovery_rate)
outf(f, "min_recovery_rate = %" PRIu32, seg->min_recovery_rate);
if (seg->max_recovery_rate)
outf(f, "max_recovery_rate = %" PRIu32, seg->max_recovery_rate);
if (seg->data_offset)
outf(f, "data_offset = %" PRIu32, seg->data_offset == 1 ? 0 : seg->data_offset);
}
return out_areas(f, seg, "raid");
}
static int _raid_text_export(const struct lv_segment *seg, struct formatter *f)
{
if (seg_is_any_raid0(seg))
return _raid_text_export_raid0(seg, f);
return _raid_text_export_raid(seg, f);
}
static int _raid_target_status_compatible(const char *type)
{
return (strstr(type, "raid") != NULL);
}
static void _raid_destroy(struct segment_type *segtype)
{
free((void *) segtype->dso);
free(segtype);
}
/* Check availability of raid10 taking data copies into consideration. */
static bool _raid10_is_available(const struct logical_volume *lv)
{
uint32_t i, rebuilds_per_group = 0, s;
const uint32_t copies = 2; /* FIXME: we only support 2-way mirrors (i.e. 2 data copies) in RAID10 for now. */
struct lv_segment *seg = first_seg(lv); /* We only have one segment in RaidLVs for now. */
for (i = 0; i < seg->area_count * copies; ++i) {
s = i % seg->area_count;
if (!(i % copies))
rebuilds_per_group = 0;
if (seg_type(seg, s) == AREA_LV &&
(lv_is_partial(seg_lv(seg, s)) ||
lv_is_virtual(seg_lv(seg, s))))
rebuilds_per_group++;
if (rebuilds_per_group >= copies)
return false;
}
return true;
}
/*
* Return true in case RaidLV with specific RAID level is available.
*
* - raid0: all legs have to be live
* - raid1 : minimum of 1 leg live
* - raid4/5: maximum of 1 leg unavailable
* - raid6: maximum of 2 legs unavailable
* - raid10: minimum of 1 leg per mirror group available
*
*/
bool raid_is_available(const struct logical_volume *lv)
{
uint32_t s, missing_legs = 0;
struct lv_segment *seg = first_seg(lv); /* We only have one segment in RaidLVs for now. */
/* Be cautious about bogus calls. */
if (!seg || !seg_is_raid(seg))
return false;
if (seg_is_any_raid10(seg))
return _raid10_is_available(lv);
/* Count missing RAID legs */
for (s = 0; s < seg->area_count; ++s)
if (seg_type(seg, s) == AREA_LV &&
lv_is_partial(seg_lv(seg, s)))
missing_legs++;
/* Degradation: segtype raid1 may miss legs-1, raid0/4/5/6 may loose parity devices. */
return missing_legs <= (seg_is_raid1(seg) ? seg->area_count - 1 : seg->segtype->parity_devs);
}
#ifdef DEVMAPPER_SUPPORT
static int _raid_target_present(struct cmd_context *cmd,
const struct lv_segment *seg __attribute__((unused)),
unsigned *attributes);
static int _raid_add_target_line(struct dev_manager *dm __attribute__((unused)),
struct dm_pool *mem __attribute__((unused)),
struct cmd_context *cmd __attribute__((unused)),
void **target_state __attribute__((unused)),
struct lv_segment *seg,
const struct lv_activate_opts *laopts __attribute__((unused)),
struct dm_tree_node *node, uint64_t len,
uint32_t *pvmove_mirror_count __attribute__((unused)))
{
int delta_disks = 0, delta_disks_minus = 0, delta_disks_plus = 0, data_offset = 0;
uint32_t s;
uint64_t flags = 0;
uint64_t rebuilds[RAID_BITMAP_SIZE] = { 0 };
uint64_t writemostly[RAID_BITMAP_SIZE] = { 0 };
struct dm_tree_node_raid_params_v2 params = { 0 };
unsigned attrs;
if (seg_is_raid4(seg)) {
if (!_raid_target_present(cmd, NULL, &attrs) ||
!(attrs & RAID_FEATURE_RAID4)) {
log_error("RAID target does not support RAID4 for LV %s.",
display_lvname(seg->lv));
return 0;
}
}
if (!seg->area_count) {
log_error(INTERNAL_ERROR "_raid_add_target_line called "
"with no areas for %s.", seg->lv->name);
return 0;
}
/*
* 253 device restriction imposed by kernel due to MD and dm-raid bitfield limitation in superblock.
* It is not strictly a userspace limitation.
*/
if (seg->area_count > DEFAULT_RAID_MAX_IMAGES) {
log_error("Unable to handle more than %u devices in a "
"single RAID array", DEFAULT_RAID_MAX_IMAGES);
return 0;
}
if (!seg_is_any_raid0(seg)) {
if (!seg->region_size) {
log_error("Missing region size for raid segment in %s.",
seg_lv(seg, 0)->name);
return 0;
}
for (s = 0; s < seg->area_count; s++) {
uint64_t status = seg_lv(seg, s)->status;
if (status & LV_REBUILD)
rebuilds[s/64] |= 1ULL << (s%64);
if (status & LV_RESHAPE_DELTA_DISKS_PLUS) {
delta_disks++;
delta_disks_plus++;
} else if (status & LV_RESHAPE_DELTA_DISKS_MINUS) {
delta_disks--;
delta_disks_minus++;
}
if (delta_disks_plus && delta_disks_minus) {
log_error(INTERNAL_ERROR "Invalid request for delta disks minus and delta disks plus!");
return 0;
}
if (status & LV_WRITEMOSTLY)
writemostly[s/64] |= 1ULL << (s%64);
}
data_offset = seg->data_offset;
if (mirror_in_sync())
flags = DM_NOSYNC;
}
params.raid_type = lvseg_name(seg);
if (seg->segtype->parity_devs) {
/* RAID 4/5/6 */
params.mirrors = 1;
params.stripes = seg->area_count - seg->segtype->parity_devs;
} else if (seg_is_any_raid0(seg)) {
params.mirrors = 1;
params.stripes = seg->area_count;
} else if (seg_is_any_raid10(seg)) {
params.data_copies = seg->data_copies;
params.stripes = seg->area_count;
} else {
/* RAID 1 */
params.mirrors = seg->data_copies;
params.stripes = 1;
params.writebehind = seg->writebehind;
memcpy(params.writemostly, writemostly, sizeof(params.writemostly));
}
/* RAID 0 doesn't have a bitmap, thus no region_size, rebuilds etc. */
if (!seg_is_any_raid0(seg)) {
params.region_size = seg->region_size;
memcpy(params.rebuilds, rebuilds, sizeof(params.rebuilds));
params.min_recovery_rate = seg->min_recovery_rate;
params.max_recovery_rate = seg->max_recovery_rate;
params.delta_disks = delta_disks;
params.data_offset = data_offset;
}
params.stripe_size = seg->stripe_size;
params.flags = flags;
if (!dm_tree_node_add_raid_target_with_params_v2(node, len, &params))
return_0;
return add_areas_line(dm, seg, node, 0u, seg->area_count);
}
static int _raid_target_percent(void **target_state,
dm_percent_t *percent,
struct dm_pool *mem,
struct cmd_context *cmd,
struct lv_segment *seg, char *params,
uint64_t *total_numerator,
uint64_t *total_denominator)
{
struct dm_status_raid *sr;
if (!dm_get_status_raid(mem, params, &sr))
return_0;
*total_numerator += sr->insync_regions;
*total_denominator += sr->total_regions;
*percent = dm_make_percent(sr->insync_regions, sr->total_regions);
if (seg)
seg->extents_copied = (uint64_t) seg->area_len * *percent / DM_PERCENT_100;
dm_pool_free(mem, sr);
return 1;
}
static int _raid_transient_status(struct dm_pool *mem,
struct lv_segment *seg,
char *params)
{
int failed = 0, r = 0;
unsigned i;
struct logical_volume *lv;
struct dm_status_raid *sr;
log_debug("Raid transient status %s.", params);
if (!dm_get_status_raid(mem, params, &sr))
return_0;
if (sr->dev_count != seg->area_count) {
log_error("Active raid has a wrong number of raid images!");
log_error("Metadata says %u, kernel says %u.",
seg->area_count, sr->dev_count);
goto out;
}
if (seg->meta_areas)
for (i = 0; i < seg->area_count; ++i) {
lv = seg_metalv(seg, i);
if (!lv_info(lv->vg->cmd, lv, 0, NULL, 0, 0)) {
log_error("Check for existence of raid meta %s failed.",
display_lvname(lv));
goto out;
}
}
for (i = 0; i < seg->area_count; ++i) {
lv = seg_lv(seg, i);
if (!lv_info(lv->vg->cmd, lv, 0, NULL, 0, 0)) {
log_error("Check for existence of raid image %s failed.",
display_lvname(lv));
goto out;
}
if (sr->dev_health[i] == 'D') {
lv->status |= PARTIAL_LV;
++failed;
}
}
/* Update PARTIAL_LV flags across the VG */
if (failed)
vg_mark_partial_lvs(lv->vg, 0);
r = 1;
out:
dm_pool_free(mem, sr);
return r;
}
/* Define raid feature based on the tuple(major, minor, patchlevel) of raid target */
struct raid_feature {
uint16_t maj;
uint16_t min;
uint16_t patchlevel;
uint16_t raid_feature;
const char feature[24];
};
/* Return true if tuple(@maj, @min, @patchlevel) is greater/equal to @*feature members */
static int _check_feature(const struct raid_feature *feature, uint32_t maj, uint32_t min, uint32_t patchlevel)
{
return (maj > feature->maj) ||
(maj == feature->maj && min > feature->min) ||
(maj == feature->maj && min == feature->min && patchlevel >= feature->patchlevel);
}
static int _raid_target_present(struct cmd_context *cmd,
const struct lv_segment *seg __attribute__((unused)),
unsigned *attributes)
{
/* List of features with their kernel target version */
const struct raid_feature _features[] = {
{ 1, 3, 0, RAID_FEATURE_RAID10, SEG_TYPE_NAME_RAID10 },
{ 1, 7, 0, RAID_FEATURE_RAID0, SEG_TYPE_NAME_RAID0 },
{ 1, 9, 0, RAID_FEATURE_SHRINK, "shrinking" },
{ 1, 9, 0, RAID_FEATURE_NEW_DEVICES_ACCEPT_REBUILD, "rebuild+emptymeta" },
{ 1, 12, 0, RAID_FEATURE_RESHAPE, "reshaping" },
};
static int _raid_checked = 0;
static int _raid_present = 0;
static unsigned _raid_attrs = 0;
uint32_t maj, min, patchlevel;
unsigned i;
if (!activation())
return 0;
if (!_raid_checked) {
_raid_checked = 1;
if (!(_raid_present = target_present_version(cmd, TARGET_NAME_RAID, 1,
&maj, &min, &patchlevel)))
return_0;
for (i = 0; i < DM_ARRAY_SIZE(_features); ++i)
if (_check_feature(_features + i, maj, min, patchlevel))
_raid_attrs |= _features[i].raid_feature;
else
log_very_verbose("Target raid does not support %s.",
_features[i].feature);
/*
* Seperate check for proper raid4 mapping supported
*
* If we get more of these range checks, avoid them
* altogether by enhancing 'struct raid_feature'
* and _check_feature() to handle them.
*/
if (!(maj == 1 && (min == 8 || (min == 9 && patchlevel == 0))))
_raid_attrs |= RAID_FEATURE_RAID4;
else
log_very_verbose("Target raid does not support %s.",
SEG_TYPE_NAME_RAID4);
}
if (attributes)
*attributes = _raid_attrs;
return _raid_present;
}
static int _raid_modules_needed(struct dm_pool *mem,
const struct lv_segment *seg __attribute__((unused)),
struct dm_list *modules)
{
if (!str_list_add(mem, modules, MODULE_NAME_RAID)) {
log_error("raid module string list allocation failed");
return 0;
}
return 1;
}
# ifdef DMEVENTD
static int _raid_target_monitored(struct lv_segment *seg, int *pending, int *monitored)
{
return target_registered_with_dmeventd(seg->lv->vg->cmd, seg->segtype->dso,
seg->lv, pending, monitored);
}
static int _raid_set_events(struct lv_segment *seg, int evmask, int set)
{
return target_register_events(seg->lv->vg->cmd, seg->segtype->dso,
seg->lv, evmask, set, 0);
}
static int _raid_target_monitor_events(struct lv_segment *seg, int events)
{
return _raid_set_events(seg, events, 1);
}
static int _raid_target_unmonitor_events(struct lv_segment *seg, int events)
{
return _raid_set_events(seg, events, 0);
}
# endif /* DMEVENTD */
#endif /* DEVMAPPER_SUPPORT */
static const struct segtype_handler _raid_ops = {
.display = _raid_display,
.text_import_area_count = _raid_text_import_area_count,
.text_import = _raid_text_import,
.text_export = _raid_text_export,
.target_status_compatible = _raid_target_status_compatible,
#ifdef DEVMAPPER_SUPPORT
.add_target_line = _raid_add_target_line,
.target_percent = _raid_target_percent,
.target_present = _raid_target_present,
.check_transient_status = _raid_transient_status,
.modules_needed = _raid_modules_needed,
# ifdef DMEVENTD
.target_monitored = _raid_target_monitored,
.target_monitor_events = _raid_target_monitor_events,
.target_unmonitor_events = _raid_target_unmonitor_events,
# endif /* DMEVENTD */
#endif
.destroy = _raid_destroy,
};
static const struct raid_type {
const char name[12];
unsigned parity;
uint64_t extra_flags;
} _raid_types[] = {
{ SEG_TYPE_NAME_RAID0, 0, SEG_RAID0 | SEG_AREAS_STRIPED },
{ SEG_TYPE_NAME_RAID0_META, 0, SEG_RAID0_META | SEG_AREAS_STRIPED },
{ SEG_TYPE_NAME_RAID1, 0, SEG_RAID1 | SEG_AREAS_MIRRORED },
{ SEG_TYPE_NAME_RAID10, 0, SEG_RAID10 | SEG_AREAS_MIRRORED },
{ SEG_TYPE_NAME_RAID10_NEAR,0, SEG_RAID10_NEAR | SEG_AREAS_MIRRORED },
{ SEG_TYPE_NAME_RAID4, 1, SEG_RAID4 },
{ SEG_TYPE_NAME_RAID5, 1, SEG_RAID5 },
{ SEG_TYPE_NAME_RAID5_N, 1, SEG_RAID5_N },
{ SEG_TYPE_NAME_RAID5_LA, 1, SEG_RAID5_LA },
{ SEG_TYPE_NAME_RAID5_LS, 1, SEG_RAID5_LS },
{ SEG_TYPE_NAME_RAID5_RA, 1, SEG_RAID5_RA },
{ SEG_TYPE_NAME_RAID5_RS, 1, SEG_RAID5_RS },
{ SEG_TYPE_NAME_RAID6, 2, SEG_RAID6 },
{ SEG_TYPE_NAME_RAID6_N_6, 2, SEG_RAID6_N_6 },
{ SEG_TYPE_NAME_RAID6_NC, 2, SEG_RAID6_NC },
{ SEG_TYPE_NAME_RAID6_NR, 2, SEG_RAID6_NR },
{ SEG_TYPE_NAME_RAID6_ZR, 2, SEG_RAID6_ZR },
{ SEG_TYPE_NAME_RAID6_LS_6, 2, SEG_RAID6_LS_6 },
{ SEG_TYPE_NAME_RAID6_RS_6, 2, SEG_RAID6_RS_6 },
{ SEG_TYPE_NAME_RAID6_LA_6, 2, SEG_RAID6_LA_6 },
{ SEG_TYPE_NAME_RAID6_RA_6, 2, SEG_RAID6_RA_6 }
};
static struct segment_type *_init_raid_segtype(struct cmd_context *cmd,
const struct raid_type *rt,
const char *dso,
uint64_t monitored)
{
struct segment_type *segtype = zalloc(sizeof(*segtype));
if (!segtype) {
log_error("Failed to allocate memory for %s segtype",
rt->name);
return NULL;
}
segtype->ops = &_raid_ops;
segtype->name = rt->name;
segtype->flags = SEG_RAID | SEG_ONLY_EXCLUSIVE | rt->extra_flags;
/* Never monitor raid0 or raid0_meta LVs */
if (!segtype_is_any_raid0(segtype) &&
dso && (dso = strdup(dso))) {
segtype->dso = dso;
segtype->flags |= monitored;
}
segtype->parity_devs = rt->parity;
log_very_verbose("Initialised segtype: %s", segtype->name);
return segtype;
}
#ifdef RAID_INTERNAL /* Shared */
int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib)
#else
int init_multiple_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
int init_multiple_segtypes(struct cmd_context *cmd, struct segtype_library *seglib)
#endif
{
struct segment_type *segtype;
char *dso = NULL;
unsigned i;
uint64_t monitored = 0;
int r = 1;
#ifdef DEVMAPPER_SUPPORT
# ifdef DMEVENTD
dso = get_monitor_dso_path(cmd, dmeventd_raid_library_CFG);
if (dso)
monitored = SEG_MONITORED;
# endif
#endif
for (i = 0; i < DM_ARRAY_SIZE(_raid_types); ++i)
if ((segtype = _init_raid_segtype(cmd, &_raid_types[i], dso, monitored)) &&
!lvm_register_segtype(seglib, segtype)) {
/* segtype is already destroyed */
stack;
r = 0;
break;
}
free(dso);
return r;
}