mirror of
git://sourceware.org/git/lvm2.git
synced 2025-02-06 01:58:01 +03:00
Add mirror log fault-handling policy.
This commit is contained in:
parent
9723090c92
commit
aeb2c277a8
@ -1,5 +1,7 @@
|
||||
Version 2.02.06 -
|
||||
=================================
|
||||
Add mirror log fault-handling policy.
|
||||
Improve mirror warning messages and tidy dmeventd syslog output.
|
||||
Propagate nosync flag around cluster.
|
||||
Allow vgreduce to handle mirror log failures.
|
||||
Add --corelog to lvcreate and lvconvert.
|
||||
|
@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
|
||||
return rtn;
|
||||
}
|
||||
|
||||
static void _temporary_log_fn(int level, const char *file, int line, const char *format)
|
||||
static void _temporary_log_fn(int level, const char *file,
|
||||
int line, const char *format)
|
||||
{
|
||||
return;
|
||||
syslog(LOG_DEBUG, "%s", format);
|
||||
if (!strncmp(format, "WARNING: ", 9) && (level < 5))
|
||||
syslog(LOG_CRIT, "%s", format);
|
||||
else
|
||||
syslog(LOG_DEBUG, "%s", format);
|
||||
}
|
||||
|
||||
static int _remove_failed_devices(const char *device)
|
||||
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
|
||||
|
||||
int register_device(const char *device)
|
||||
{
|
||||
syslog(LOG_INFO, "Monitoring %s for events\n", device);
|
||||
syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
|
||||
|
||||
/*
|
||||
* Need some space for allocations. 1024 should be more
|
||||
@ -224,8 +227,6 @@ int register_device(const char *device)
|
||||
|
||||
int unregister_device(const char *device)
|
||||
{
|
||||
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
|
||||
|
||||
if (!(--register_count)) {
|
||||
dm_pool_destroy(mem_pool);
|
||||
mem_pool = NULL;
|
||||
|
@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
|
||||
return rtn;
|
||||
}
|
||||
|
||||
static void _temporary_log_fn(int level, const char *file, int line, const char *format)
|
||||
static void _temporary_log_fn(int level, const char *file,
|
||||
int line, const char *format)
|
||||
{
|
||||
return;
|
||||
syslog(LOG_DEBUG, "%s", format);
|
||||
if (!strncmp(format, "WARNING: ", 9) && (level < 5))
|
||||
syslog(LOG_CRIT, "%s", format);
|
||||
else
|
||||
syslog(LOG_DEBUG, "%s", format);
|
||||
}
|
||||
|
||||
static int _remove_failed_devices(const char *device)
|
||||
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
|
||||
|
||||
int register_device(const char *device)
|
||||
{
|
||||
syslog(LOG_INFO, "Monitoring %s for events\n", device);
|
||||
syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
|
||||
|
||||
/*
|
||||
* Need some space for allocations. 1024 should be more
|
||||
@ -224,8 +227,6 @@ int register_device(const char *device)
|
||||
|
||||
int unregister_device(const char *device)
|
||||
{
|
||||
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
|
||||
|
||||
if (!(--register_count)) {
|
||||
dm_pool_destroy(mem_pool);
|
||||
mem_pool = NULL;
|
||||
|
@ -232,9 +232,6 @@ activation {
|
||||
# target or make it return zeros.
|
||||
missing_stripe_filler = "/dev/ioerror"
|
||||
|
||||
# Size (in KB) of each copy operation when mirroring
|
||||
mirror_region_size = 512
|
||||
|
||||
# How much stack (in KB) to reserve for use while devices suspended
|
||||
reserved_stack = 256
|
||||
|
||||
@ -251,6 +248,54 @@ activation {
|
||||
# "@*" matches if any tag defined on the host is also set in the LV or VG
|
||||
#
|
||||
# volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
|
||||
|
||||
# Size (in KB) of each copy operation when mirroring
|
||||
mirror_region_size = 512
|
||||
|
||||
# 'mirror_image_fault_policy' and 'mirror_log_fault_policy' define
|
||||
# how a device failure affecting a mirror is handled.
|
||||
# A mirror is composed of mirror images (copies) and a log.
|
||||
# A disk log ensures that a mirror does not need to be re-synced
|
||||
# (all copies made the same) every time a machine reboots or crashes.
|
||||
#
|
||||
# In the event of a failure, the specified policy will be used to
|
||||
# determine what happens:
|
||||
#
|
||||
# "remove" - Simply remove the faulty device and run without it. If
|
||||
# the log device fails, the mirror would convert to using
|
||||
# an in-memory log. This means the mirror will not
|
||||
# remember its sync status across crashes/reboots and
|
||||
# the entire mirror will be re-synced. If a
|
||||
# mirror image fails, the mirror will convert to a
|
||||
# non-mirrored device if there is only one remaining good
|
||||
# copy.
|
||||
#
|
||||
# "allocate" - Remove the faulty device and try to allocate space on
|
||||
# a new device to be a replacement for the failed device.
|
||||
# Using this policy for the log is fast and maintains the
|
||||
# ability to remember sync state through crashes/reboots.
|
||||
# Using this policy for a mirror device is slow, as it
|
||||
# requires the mirror to resynchronize the devices, but it
|
||||
# will preserve the mirror characteristic of the device.
|
||||
# This policy acts like "remove" if no suitable device and
|
||||
# space can be allocated for the replacement.
|
||||
# Currently this is not implemented properly and behaves
|
||||
# similarly to:
|
||||
#
|
||||
# "allocate_anywhere" - Operates like "allocate", but it does not
|
||||
# require that the new space being allocated be on a
|
||||
# device is not part of the mirror. For a log device
|
||||
# failure, this could mean that the log is allocated on
|
||||
# the same device as a mirror device. For a mirror
|
||||
# device, this could mean that the mirror device is
|
||||
# allocated on the same device as another mirror device.
|
||||
# This policy would not be wise for mirror devices
|
||||
# because it would break the redundant nature of the
|
||||
# mirror. This policy acts like "remove" if no suitable
|
||||
# device and space can be allocated for the replacement.
|
||||
|
||||
mirror_log_fault_policy = "allocate"
|
||||
mirror_dev_fault_policy = "remove"
|
||||
}
|
||||
|
||||
|
||||
|
@ -34,6 +34,8 @@
|
||||
#define DEFAULT_LOCK_DIR "/var/lock/lvm"
|
||||
#define DEFAULT_LOCKING_LIB "lvm2_locking.so"
|
||||
|
||||
#define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate"
|
||||
#define DEFAULT_MIRROR_DEV_FAULT_POLICY "remove"
|
||||
#define DEFAULT_DMEVENTD_MIRROR_LIB "libdevmapper-event-lvm2mirror.so"
|
||||
|
||||
#define DEFAULT_UMASK 0077
|
||||
|
@ -23,6 +23,13 @@
|
||||
#include "lvm-string.h"
|
||||
#include "locking.h" /* FIXME Should not be used in this file */
|
||||
|
||||
#include "defaults.h" /* FIXME: should this be defaults.h? */
|
||||
|
||||
/* These are the flags that represent the mirror failure restoration policies */
|
||||
#define MIRROR_REMOVE 0
|
||||
#define MIRROR_ALLOCATE 1
|
||||
#define MIRROR_ALLOCATE_ANYWHERE 2
|
||||
|
||||
struct lv_segment *find_mirror_seg(struct lv_segment *seg)
|
||||
{
|
||||
return seg->mirror_seg;
|
||||
@ -230,18 +237,112 @@ int remove_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int get_mirror_fault_policy(struct cmd_context *cmd, int log_policy)
|
||||
{
|
||||
const char *policy;
|
||||
|
||||
if (log_policy)
|
||||
policy = find_config_str(NULL, "activation/mirror_log_fault_policy",
|
||||
DEFAULT_MIRROR_LOG_FAULT_POLICY);
|
||||
else
|
||||
policy = find_config_str(NULL, "activation/mirror_dev_fault_policy",
|
||||
DEFAULT_MIRROR_DEV_FAULT_POLICY);
|
||||
|
||||
if (!strcmp(policy, "remove"))
|
||||
return MIRROR_REMOVE;
|
||||
else if (!strcmp(policy, "allocate"))
|
||||
return MIRROR_ALLOCATE;
|
||||
else if (!strcmp(policy, "allocate_anywhere"))
|
||||
return MIRROR_ALLOCATE_ANYWHERE;
|
||||
|
||||
if (log_policy)
|
||||
log_error("Bad activation/mirror_log_fault_policy");
|
||||
else
|
||||
log_error("Bad activation/mirror_dev_fault_policy");
|
||||
|
||||
return MIRROR_REMOVE;
|
||||
}
|
||||
|
||||
static int get_mirror_log_fault_policy(struct cmd_context *cmd)
|
||||
{
|
||||
return get_mirror_fault_policy(cmd, 1);
|
||||
}
|
||||
|
||||
static int get_mirror_dev_fault_policy(struct cmd_context *cmd)
|
||||
{
|
||||
return get_mirror_fault_policy(cmd, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* replace_mirror_images
|
||||
* @mirrored_seg: segment (which may be linear now) to restore
|
||||
* @num_mirrors: number of copies we should end up with
|
||||
* @replace_log: replace log if not present
|
||||
* @in_sync: was the original mirror in-sync?
|
||||
*
|
||||
* in_sync will be set to 0 if new mirror devices are being added
|
||||
* In other words, it is only useful if the log (and only the log)
|
||||
* is being restored.
|
||||
*
|
||||
* Returns: 0 on failure, 1 on reconfig, -1 if no reconfig done
|
||||
*/
|
||||
static int replace_mirror_images(struct lv_segment *mirrored_seg,
|
||||
uint32_t num_mirrors,
|
||||
int log_policy, int in_sync)
|
||||
{
|
||||
int r = -1;
|
||||
struct logical_volume *lv = mirrored_seg->lv;
|
||||
|
||||
/* FIXME: Use lvconvert rather than duplicating its code */
|
||||
|
||||
if (mirrored_seg->area_count < num_mirrors) {
|
||||
log_error("WARNING: Failed to replace mirror device in %s/%s",
|
||||
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||
|
||||
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv)
|
||||
log_error("WARNING: Use 'lvconvert -m %d %s/%s --corelog' to replace failed devices",
|
||||
num_mirrors - 1, lv->vg->name, lv->name);
|
||||
else
|
||||
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
|
||||
num_mirrors - 1, lv->vg->name, lv->name);
|
||||
r = 0;
|
||||
|
||||
/* REMEMBER/FIXME: set in_sync to 0 if a new mirror device was added */
|
||||
in_sync = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* FIXME: right now, we ignore the allocation policy specified to
|
||||
* allocate the new log.
|
||||
*/
|
||||
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv &&
|
||||
(log_policy != MIRROR_REMOVE)) {
|
||||
log_error("WARNING: Failed to replace mirror log device in %s/%s",
|
||||
lv->vg->name, lv->name);
|
||||
|
||||
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
|
||||
mirrored_seg->area_count - 1 , lv->vg->name, lv->name);
|
||||
r = 0;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
|
||||
struct list *removable_pvs, int remove_log)
|
||||
{
|
||||
int r;
|
||||
int insync = 0;
|
||||
// int mirror_dev_failed = (mirrored_seg->area_count != num_mirrors);
|
||||
int log_policy, dev_policy;
|
||||
uint32_t old_num_mirrors = mirrored_seg->area_count;
|
||||
int had_log = (mirrored_seg->log_lv) ? 1 : 0;
|
||||
float sync_percent = 0;
|
||||
|
||||
/* was the mirror in-sync before problems? */
|
||||
if (!lv_mirror_percent(mirrored_seg->lv->vg->cmd,
|
||||
mirrored_seg->lv, 0, &sync_percent, NULL))
|
||||
log_error("Unable to determine mirror sync status.");
|
||||
log_error("WARNING: Unable to determine mirror sync status of %s/%s.",
|
||||
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||
else if (sync_percent >= 100.0)
|
||||
insync = 1;
|
||||
|
||||
@ -258,6 +359,38 @@ int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirr
|
||||
/* Unable to remove bad devices */
|
||||
return 0;
|
||||
|
||||
log_print("WARNING: Bad device removed from mirror volume, %s/%s",
|
||||
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||
|
||||
log_policy = get_mirror_log_fault_policy(mirrored_seg->lv->vg->cmd);
|
||||
dev_policy = get_mirror_dev_fault_policy(mirrored_seg->lv->vg->cmd);
|
||||
|
||||
r = replace_mirror_images(mirrored_seg,
|
||||
(dev_policy != MIRROR_REMOVE) ?
|
||||
old_num_mirrors : num_mirrors,
|
||||
log_policy, insync);
|
||||
|
||||
if (!r)
|
||||
/* Failed to replace device(s) */
|
||||
log_error("WARNING: Unable to find substitute device for mirror volume, %s/%s",
|
||||
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||
else if (r > 0)
|
||||
/* Success in replacing device(s) */
|
||||
log_print("WARNING: Mirror volume, %s/%s restored - substitute for failed device found.",
|
||||
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||
else
|
||||
/* Bad device removed, but not replaced because of policy */
|
||||
if (mirrored_seg->area_count == 1) {
|
||||
log_print("WARNING: Mirror volume, %s/%s converted to linear due to device failure.",
|
||||
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||
} else if (had_log && !mirrored_seg->log_lv) {
|
||||
log_print("WARNING: Mirror volume, %s/%s disk log removed due to device failure.",
|
||||
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||
}
|
||||
/*
|
||||
* If we made it here, we at least removed the bad device.
|
||||
* Consider this success.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user