1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-08-09 05:49:28 +03:00

raid: count or clear transiently failed devices

Count or clear transiently failed devices as of dm-raid superblocks.
Updated debuging.
Use lvconvert --repair to repair transiently failed legs.
Activating all 'meta' LVs with single sync_local_dev_names().
Using proper DM path for meta LV.

Modified-by: zkabelac@redhat.com
This commit is contained in:
Heinz Mauelshagen
2024-11-05 18:33:19 +01:00
committed by Zdenek Kabelac
parent b66cc11b78
commit 03d8661657
6 changed files with 369 additions and 1 deletions

View File

@ -25,6 +25,7 @@ DEVICE_MAPPER_SOURCE=\
device_mapper/libdm-targets.c \
device_mapper/libdm-timestamp.c \
device_mapper/mm/pool.c \
device_mapper/raid/raid_parser.c \
device_mapper/regex/matcher.c \
device_mapper/regex/parse_rx.c \
device_mapper/regex/ttree.c \

View File

@ -19,6 +19,7 @@
#include "base/data-struct/list.h"
#include "base/data-struct/hash.h"
#include "raid/target.h"
#include "vdo/target.h"
#include <inttypes.h>

View File

@ -0,0 +1,164 @@
/*
* Copyright (C) 2024 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* Support counting number of failed device bits in dm-raid superblock bit arrays or clear them out.
*/
#include "device_mapper/misc/dmlib.h"
#include "device_mapper/all.h"
#include "device_mapper/raid/target.h"
#include <fcntl.h>
#include <unistd.h>
/* Copied/derived from kernel's drivers/md/dm-raid.c so this is prone to out-of-sync (factor out to header file?). */
#define MAX_RAID_DEVICES 253 /* md-raid kernel limit? */
#define UINT64_BITS (sizeof(uint64_t) * 8)
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (UINT64_BITS - 1)) / UINT64_BITS)
#define DM_RAID_SB_MAGIC 0x446D5264 /* "DmRd" */
#define FEATURE_FLAG_SUPPORTS_V190 0x1 /* Supports extended superblock */
/* RAID superblock at beginning of rmeta SubLVs trimmed down to mandatory members. */
struct dm_raid_superblock {
__le32 magic; /* "DmRd" */
__le32 compat_features; /* Used to indicate compatible features (like 1.9.0 ondisk metadata extension) */
__le32 dummy[4];
__le64 failed_devices; /* Pre 1.9.0 part of bit field of devices to */
/* indicate device failures (see extension below) */
__le32 dummy1[7];
/********************************************************************
* BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
*
* FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
*/
__le32 flags; /* Flags defining array states for reshaping */
__le32 dummy2[14];
__le64 extended_failed_devices[DISKS_ARRAY_ELEMS - 1];
__le32 dummy3;
/* Always set rest up to logical block size to 0 when writing ... */
} __packed;
/* END: Copied from ... */
/* Superblock I/O buffer size to be able to Cope with 4K native devices... */
#define SB_BUFSZ 4096
static size_t _get_sb_size(const struct dm_raid_superblock *sb)
{
return (FEATURE_FLAG_SUPPORTS_V190 & le32toh(sb->compat_features)) ?
sizeof(*sb) : ((char *) &sb->flags - (char *) sb);
}
static uint32_t _hweight64(__le64 v)
{
uint32_t r = 0;
while (v) {
r += v & 1;
v >>= 1;
}
return r;
}
static uint32_t _hweight_failed(struct dm_raid_superblock *sb)
{
uint32_t r = _hweight64(sb->failed_devices);
if (_get_sb_size(sb) == sizeof(*sb)) {
size_t i = DM_ARRAY_SIZE(sb->extended_failed_devices);
while (i--)
r = max(r, _hweight64(sb->extended_failed_devices[i]));
}
return r;
}
static void _clear_failed_devices(struct dm_raid_superblock *sb)
{
sb->failed_devices = 0;
if (_get_sb_size(sb) == sizeof(*sb))
memset(sb->extended_failed_devices, 0, sizeof(sb->extended_failed_devices));
}
static int _count_or_clear_failed_devices(const char *dev_path, bool clear, uint32_t *nr_failed)
{
struct dm_raid_superblock *sb = NULL;
size_t sz;
int fd, r = 0;
if (posix_memalign((void *) &sb, SB_BUFSZ, SB_BUFSZ)) {
log_sys_error("Failed to allocate RAID superblock buffer", dev_path);
return 0;
}
fd = open(dev_path, O_EXCL | ((clear) ? O_RDWR : O_RDONLY) | O_DIRECT);
if (fd < 0) {
log_sys_error("Failed to open RAID metadata volume", dev_path);
goto out;
}
if (read(fd, sb, SB_BUFSZ) != SB_BUFSZ) {
log_sys_error("Failed to read RAID metadata volume", dev_path);
goto out;
}
/* FIXME: big endian??? */
if (sb->magic != htobe32(DM_RAID_SB_MAGIC)) {
log_error("No RAID signature on %s.", dev_path);
goto out;
}
if (nr_failed)
*nr_failed = _hweight_failed(sb);
if (clear) {
if (lseek(fd, 0, SEEK_SET) < 0) {
log_sys_error("Failed to seek RAID metadata volume", dev_path);
goto out;
}
sz = _get_sb_size(sb);
memset((void *)((char *) sb + sz), 0, SB_BUFSZ - sz);
_clear_failed_devices(sb);
if (write(fd, sb, SB_BUFSZ) != SB_BUFSZ) {
log_sys_error("Failed to clear RAID metadata volume", dev_path);
goto out;
}
}
r = 1;
out:
if ((fd >= 0) && close(fd))
log_sys_debug("close", dev_path);
free(sb);
return r;
}
int dm_raid_count_failed_devices(const char *dev_path, uint32_t *nr_failed)
{
return _count_or_clear_failed_devices(dev_path, false, nr_failed);
}
int dm_raid_clear_failed_devices(const char *dev_path, uint32_t *nr_failed)
{
return _count_or_clear_failed_devices(dev_path, true, nr_failed);
}

View File

@ -0,0 +1,23 @@
/*
* Copyright (C) 2024 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef DEVICE_MAPPER_RAID_TARGET_H
#define DEVICE_MAPPER_RAID_TARGET_H
#include <stdint.h>
int dm_raid_count_failed_devices(const char *dev_path, uint32_t *nr_failed);
int dm_raid_clear_failed_devices(const char *dev_path, uint32_t *nr_failed);
#endif

View File

@ -197,6 +197,8 @@ int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt);
int lv_raid_sync_action(const struct logical_volume *lv, char **sync_action);
int lv_raid_message(const struct logical_volume *lv, const char *msg);
int lv_raid_status(const struct logical_volume *lv, struct lv_status_raid **status);
int lv_raid_clear_failed_devices(const struct logical_volume *lv);
int lv_raid_count_failed_devices(const struct logical_volume *lv, uint32_t *failed_cnt);
int lv_writecache_message(const struct logical_volume *lv, const char *msg);
int lv_cache_status(const struct logical_volume *cache_lv,
struct lv_status_cache **status);

View File

@ -3242,7 +3242,7 @@ static int _raid_leg_degraded(struct lv_segment *raid_seg, uint32_t s)
_sublv_is_degraded(seg_metalv(raid_seg, s))));
}
/* Return failed component SubLV count for @lv. */
/* Return failed component SubLV pair count for @lv. */
static uint32_t _lv_get_nr_failed_components(const struct logical_volume *lv)
{
uint32_t r = 0, s;
@ -7328,6 +7328,183 @@ int lv_raid_remove_missing(struct logical_volume *lv)
return 1;
}
/*
* Count number of failed device bits in dm-raid superblock bit arrays -or- clear them out.
*
* If any failed devices, return != 0 maximum of failed SubLVs and parity_devs so that the
* caller will ask to clear and try activation of the RaidLV unless more than parity_devs
* component device pairs (rmeta and rimage) are still failed. This'll allow early exit
* in the caller preventing MD kernel rejection to activate the RAID array with > parity_devs
* failed component device pairs.
*/
static int _count_or_clear_failed_devices_bits(struct logical_volume *meta_lv,
bool clear, uint32_t *nr_failed)
{
char *meta_path = lv_dmpath_dup(meta_lv->vg->cmd->mem, meta_lv);
if (!meta_path) {
log_error("Failed to build device path for %s.",
display_lvname(meta_lv));
return 0;
}
if (!clear) /* only counting */
return dm_raid_count_failed_devices(meta_path, nr_failed);
return dm_raid_clear_failed_devices(meta_path, nr_failed);
}
/* Count or clear failed devices bits in RAID superblocks for
* recurred transiently failed component SubLV pairs. */
static int _raid_count_or_clear_failed_devices(const struct logical_volume *lv,
bool clear, uint32_t *failed_devices)
{
uint32_t nr_failed = 0, nr_failed_tmp = 0, failed_sublvs = 0, s;
struct lv_segment *raid_seg = first_seg(lv);
struct logical_volume *meta_lv;
const char *str;
int r = 1, cleared_devs = 0;
/* Prevent bogus use. */
if (!seg_is_raid_with_meta(raid_seg)) {
log_error("%s is not a RaidLV with metadata.", display_lvname(lv));
return 0;
}
failed_sublvs = _lv_get_nr_failed_components(lv);
if (clear && (failed_sublvs > raid_seg->segtype->parity_devs)) {
log_error("Can't clear transiently failed devices on still failed %s.",
display_lvname(lv));
return 0;
}
if (!raid_seg->meta_areas) {
log_error(INTERNAL_ERROR "Missing metadata areas on %s!", display_lvname(lv));
return 0;
}
/* Check if there isn't any meta LV already active */
for (s = 0; s < raid_seg->area_count; s++) {
if (_raid_leg_degraded(raid_seg, s))
continue;
meta_lv = seg_metalv(raid_seg, s);
if (lv_is_active(meta_lv)) {
/* DM table is in some unknown condition, aborting... */
log_error("Can't %s failed devices with active %s metadata volume %s.",
clear ? "clear" : "count",
lvseg_name(raid_seg), display_lvname(meta_lv));
return 0;
}
}
/* Activate all non degraded meta LVs before count or clear */
for (s = 0; s < raid_seg->area_count; s++) {
meta_lv = seg_metalv(raid_seg, s);
if (_raid_leg_degraded(raid_seg, s)) {
log_debug("Skipping activation of failed devices for degraded %s metadata volume %s.",
lvseg_name(raid_seg), display_lvname(meta_lv));
continue;
}
if (!activate_lv(lv->vg->cmd, meta_lv)) {
log_error("Failed to activate %s metadata volume %s.",
lvseg_name(raid_seg), display_lvname(meta_lv));
r = 0; /* how many can be counted... */
}
}
/* Wait for meta activation. */
if (!sync_local_dev_names(lv->vg->cmd))
stack;
for (s = 0; s < raid_seg->area_count; s++) {
meta_lv = seg_metalv(raid_seg, s);
if (_raid_leg_degraded(raid_seg, s)) {
if (clear)
log_debug("Skipping clear of failed devices for degraded %s metadata volume %s.",
lvseg_name(raid_seg), display_lvname(meta_lv));
continue;
}
if (lv_is_active(meta_lv) &&
!_count_or_clear_failed_devices_bits(meta_lv, clear,
&nr_failed_tmp)) {
log_error("Failed to %s failed device(s) in superblock of %s metadata volume %s.",
clear ? "clear" : "count",
lvseg_name(raid_seg), display_lvname(meta_lv));
r = 0;
continue;
}
if (nr_failed_tmp) {
log_verbose("%s %u failed device(s) in superblock of %s metadata volume %s.",
clear ? "Cleared" : "Counted", nr_failed_tmp,
lvseg_name(raid_seg), display_lvname(meta_lv));
cleared_devs++;
}
if (nr_failed_tmp > nr_failed)
nr_failed = nr_failed_tmp;
}
/* Deactivate meta LVs */
for (s = 0; s < raid_seg->area_count; s++) {
if (_raid_leg_degraded(raid_seg, s))
continue;
if (!deactivate_lv(lv->vg->cmd, seg_metalv(raid_seg, s))) {
stack;
r = 0;
}
}
if (clear) {
if (!failed_sublvs)
str = "fully operational";
else if (failed_sublvs <= raid_seg->segtype->parity_devs)
str = "degraded";
else
str = "still failed";
log_print_unless_silent("The %s volume %s is %s with %u transiently failed device(s).",
lvseg_name(raid_seg), display_lvname(lv), str,
nr_failed - failed_sublvs);
if (r && cleared_devs &&
(failed_sublvs <= raid_seg->segtype->parity_devs))
/* TODO: maybe we want to activate RAID volume here ? */
log_print_unless_silent("Volume has been restored after clearing %u superblocks(s). Once online please check its content.",
cleared_devs);
}
if (failed_devices)
*failed_devices = max(failed_sublvs, raid_seg->segtype->parity_devs);
return r;
}
/* Clear failed device bits in RAID superblocks for recurred
* transiently failed component SubLV pairs. */
int lv_raid_clear_failed_devices(const struct logical_volume *lv)
{
return _raid_count_or_clear_failed_devices(lv, true, NULL);
}
/* Count failed device bits in RAID superblocks for recurred
* transiently failed component SubLV pairs.
*
* On success, @failed_cnt contains the current number.
*/
int lv_raid_count_failed_devices(const struct logical_volume *lv, uint32_t *failed_cnt)
{
return _raid_count_or_clear_failed_devices(lv, false, failed_cnt);
}
/* Return 1 if a partial raid LV can be activated redundantly */
static int _partial_raid_lv_is_redundant(const struct logical_volume *lv)
{