1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-09 01:18:39 +03:00
lvm2/daemons/dmeventd/plugins/raid/dmeventd_raid.c
Zdenek Kabelac 0016b79c8b dmeventd: improve more raid status reporting
When we want to report primary leg failure, check for intial 'a',
since otherwice 'Aa idle' is normally visible.

Also reset array of bit flags marking dead devices, once
plugin detects raid is in sync.
2017-06-24 00:06:12 +02:00

187 lines
4.8 KiB
C

/*
* Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "lib.h"
#include "defaults.h"
#include "dmeventd_lvm.h"
#include "libdevmapper-event.h"
/* Hold enough elements for the mximum number of RAID images */
#define RAID_DEVS_ELEMS ((DEFAULT_RAID_MAX_IMAGES + 63) / 64)
struct dso_state {
struct dm_pool *mem;
char cmd_lvconvert[512];
uint64_t raid_devs[RAID_DEVS_ELEMS];
int failed;
int warned;
};
DM_EVENT_LOG_FN("raid")
/* FIXME Reformat to 80 char lines. */
static int _process_raid_event(struct dso_state *state, char *params, const char *device)
{
struct dm_status_raid *status;
const char *d;
int dead = 0, r = 1;
uint32_t dev;
if (!dm_get_status_raid(state->mem, params, &status)) {
log_error("Failed to process status line for %s.", device);
return 0;
}
d = status->dev_health;
while ((d = strchr(d, 'D'))) {
dev = (uint32_t)(d - status->dev_health);
if (!(state->raid_devs[dev / 64] & (UINT64_C(1) << (dev % 64)))) {
state->raid_devs[dev / 64] |= (UINT64_C(1) << (dev % 64));
log_warn("WARNING: Device #%u of %s array, %s, has failed.",
dev, status->raid_type, device);
}
d++;
dead = 1;
}
/*
* if we are converting from non-RAID to RAID (e.g. linear -> raid1)
* and too many original devices die, such that we cannot continue
* the "recover" operation, the sync action will go to "idle", the
* unsynced devs will remain at 'a', and the original devices will
* NOT SWITCH TO 'D', but will remain at 'A' - hoping to be revived.
*
* This is simply the way the kernel works...
*/
if (!strcmp(status->sync_action, "idle") &&
(status->dev_health[0] == 'a') &&
(status->insync_regions < status->total_regions)) {
log_error("Primary sources for new RAID, %s, have failed.",
device);
dead = 1; /* run it through LVM repair */
}
if (dead) {
if (status->insync_regions < status->total_regions) {
if (!state->warned) {
state->warned = 1;
log_warn("WARNING: waiting for resynchronization to finish "
"before initiating repair on RAID device %s.", device);
}
goto out; /* Not yet done syncing with accessible devices */
}
if (state->failed)
goto out; /* already reported */
state->failed = 1;
/* if repair goes OK, report success even if lvscan has failed */
if (!dmeventd_lvm2_run_with_lock(state->cmd_lvconvert)) {
log_error("Repair of RAID device %s failed.", device);
r = 0;
}
} else {
state->failed = 0;
if (status->insync_regions == status->total_regions)
memset(&state->raid_devs, 0, sizeof(state->raid_devs));
log_info("%s array, %s, is %s in-sync.",
status->raid_type, device,
(status->insync_regions == status->total_regions) ? "now" : "not");
}
out:
dm_pool_free(state->mem, status);
return r;
}
void process_event(struct dm_task *dmt,
enum dm_event_mask event __attribute__((unused)),
void **user)
{
struct dso_state *state = *user;
void *next = NULL;
uint64_t start, length;
char *target_type = NULL;
char *params;
const char *device = dm_task_get_name(dmt);
do {
next = dm_get_next_target(dmt, next, &start, &length,
&target_type, &params);
if (!target_type) {
log_info("%s mapping lost.", device);
continue;
}
if (strcmp(target_type, "raid")) {
log_info("%s has non-raid portion.", device);
continue;
}
if (!_process_raid_event(state, params, device))
log_error("Failed to process event for %s.",
device);
} while (next);
}
int register_device(const char *device,
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **user)
{
struct dso_state *state;
if (!dmeventd_lvm2_init_with_pool("raid_state", state))
goto_bad;
if (!dmeventd_lvm2_command(state->mem, state->cmd_lvconvert, sizeof(state->cmd_lvconvert),
"lvconvert --repair --use-policies", device))
goto_bad;
*user = state;
log_info("Monitoring RAID device %s for events.", device);
return 1;
bad:
log_error("Failed to monitor RAID %s.", device);
if (state)
dmeventd_lvm2_exit_with_pool(state);
return 0;
}
int unregister_device(const char *device,
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **user)
{
struct dso_state *state = *user;
dmeventd_lvm2_exit_with_pool(state);
log_info("No longer monitoring RAID device %s for events.",
device);
return 1;
}