1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-02-06 01:58:01 +03:00

Add mirror log fault-handling policy.

This commit is contained in:
Alasdair Kergon 2006-05-11 19:45:53 +00:00
parent 9723090c92
commit aeb2c277a8
6 changed files with 201 additions and 17 deletions

View File

@ -1,5 +1,7 @@
Version 2.02.06 -
=================================
Add mirror log fault-handling policy.
Improve mirror warning messages and tidy dmeventd syslog output.
Propagate nosync flag around cluster.
Allow vgreduce to handle mirror log failures.
Add --corelog to lvcreate and lvconvert.

View File

@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
return rtn;
}
static void _temporary_log_fn(int level, const char *file, int line, const char *format)
static void _temporary_log_fn(int level, const char *file,
int line, const char *format)
{
return;
syslog(LOG_DEBUG, "%s", format);
if (!strncmp(format, "WARNING: ", 9) && (level < 5))
syslog(LOG_CRIT, "%s", format);
else
syslog(LOG_DEBUG, "%s", format);
}
static int _remove_failed_devices(const char *device)
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
int register_device(const char *device)
{
syslog(LOG_INFO, "Monitoring %s for events\n", device);
syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
/*
* Need some space for allocations. 1024 should be more
@ -224,8 +227,6 @@ int register_device(const char *device)
int unregister_device(const char *device)
{
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
if (!(--register_count)) {
dm_pool_destroy(mem_pool);
mem_pool = NULL;

View File

@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
return rtn;
}
static void _temporary_log_fn(int level, const char *file, int line, const char *format)
static void _temporary_log_fn(int level, const char *file,
int line, const char *format)
{
return;
syslog(LOG_DEBUG, "%s", format);
if (!strncmp(format, "WARNING: ", 9) && (level < 5))
syslog(LOG_CRIT, "%s", format);
else
syslog(LOG_DEBUG, "%s", format);
}
static int _remove_failed_devices(const char *device)
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
int register_device(const char *device)
{
syslog(LOG_INFO, "Monitoring %s for events\n", device);
syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
/*
* Need some space for allocations. 1024 should be more
@ -224,8 +227,6 @@ int register_device(const char *device)
int unregister_device(const char *device)
{
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
if (!(--register_count)) {
dm_pool_destroy(mem_pool);
mem_pool = NULL;

View File

@ -232,9 +232,6 @@ activation {
# target or make it return zeros.
missing_stripe_filler = "/dev/ioerror"
# Size (in KB) of each copy operation when mirroring
mirror_region_size = 512
# How much stack (in KB) to reserve for use while devices suspended
reserved_stack = 256
@ -251,6 +248,54 @@ activation {
# "@*" matches if any tag defined on the host is also set in the LV or VG
#
# volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
# Size (in KB) of each copy operation when mirroring
mirror_region_size = 512
# 'mirror_image_fault_policy' and 'mirror_log_fault_policy' define
# how a device failure affecting a mirror is handled.
# A mirror is composed of mirror images (copies) and a log.
# A disk log ensures that a mirror does not need to be re-synced
# (all copies made the same) every time a machine reboots or crashes.
#
# In the event of a failure, the specified policy will be used to
# determine what happens:
#
# "remove" - Simply remove the faulty device and run without it. If
# the log device fails, the mirror would convert to using
# an in-memory log. This means the mirror will not
# remember its sync status across crashes/reboots and
# the entire mirror will be re-synced. If a
# mirror image fails, the mirror will convert to a
# non-mirrored device if there is only one remaining good
# copy.
#
# "allocate" - Remove the faulty device and try to allocate space on
# a new device to be a replacement for the failed device.
# Using this policy for the log is fast and maintains the
# ability to remember sync state through crashes/reboots.
# Using this policy for a mirror device is slow, as it
# requires the mirror to resynchronize the devices, but it
# will preserve the mirror characteristic of the device.
# This policy acts like "remove" if no suitable device and
# space can be allocated for the replacement.
# Currently this is not implemented properly and behaves
# similarly to:
#
# "allocate_anywhere" - Operates like "allocate", but it does not
# require that the new space being allocated be on a
# device is not part of the mirror. For a log device
# failure, this could mean that the log is allocated on
# the same device as a mirror device. For a mirror
# device, this could mean that the mirror device is
# allocated on the same device as another mirror device.
# This policy would not be wise for mirror devices
# because it would break the redundant nature of the
# mirror. This policy acts like "remove" if no suitable
# device and space can be allocated for the replacement.
mirror_log_fault_policy = "allocate"
mirror_dev_fault_policy = "remove"
}

View File

@ -34,6 +34,8 @@
#define DEFAULT_LOCK_DIR "/var/lock/lvm"
#define DEFAULT_LOCKING_LIB "lvm2_locking.so"
#define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate"
#define DEFAULT_MIRROR_DEV_FAULT_POLICY "remove"
#define DEFAULT_DMEVENTD_MIRROR_LIB "libdevmapper-event-lvm2mirror.so"
#define DEFAULT_UMASK 0077

View File

@ -23,6 +23,13 @@
#include "lvm-string.h"
#include "locking.h" /* FIXME Should not be used in this file */
#include "defaults.h" /* FIXME: should this be defaults.h? */
/* These are the flags that represent the mirror failure restoration policies */
#define MIRROR_REMOVE 0
#define MIRROR_ALLOCATE 1
#define MIRROR_ALLOCATE_ANYWHERE 2
struct lv_segment *find_mirror_seg(struct lv_segment *seg)
{
return seg->mirror_seg;
@ -230,18 +237,112 @@ int remove_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
return 1;
}
static int get_mirror_fault_policy(struct cmd_context *cmd, int log_policy)
{
const char *policy;
if (log_policy)
policy = find_config_str(NULL, "activation/mirror_log_fault_policy",
DEFAULT_MIRROR_LOG_FAULT_POLICY);
else
policy = find_config_str(NULL, "activation/mirror_dev_fault_policy",
DEFAULT_MIRROR_DEV_FAULT_POLICY);
if (!strcmp(policy, "remove"))
return MIRROR_REMOVE;
else if (!strcmp(policy, "allocate"))
return MIRROR_ALLOCATE;
else if (!strcmp(policy, "allocate_anywhere"))
return MIRROR_ALLOCATE_ANYWHERE;
if (log_policy)
log_error("Bad activation/mirror_log_fault_policy");
else
log_error("Bad activation/mirror_dev_fault_policy");
return MIRROR_REMOVE;
}
static int get_mirror_log_fault_policy(struct cmd_context *cmd)
{
return get_mirror_fault_policy(cmd, 1);
}
static int get_mirror_dev_fault_policy(struct cmd_context *cmd)
{
return get_mirror_fault_policy(cmd, 0);
}
/*
* replace_mirror_images
* @mirrored_seg: segment (which may be linear now) to restore
* @num_mirrors: number of copies we should end up with
* @replace_log: replace log if not present
* @in_sync: was the original mirror in-sync?
*
* in_sync will be set to 0 if new mirror devices are being added
* In other words, it is only useful if the log (and only the log)
* is being restored.
*
* Returns: 0 on failure, 1 on reconfig, -1 if no reconfig done
*/
static int replace_mirror_images(struct lv_segment *mirrored_seg,
uint32_t num_mirrors,
int log_policy, int in_sync)
{
int r = -1;
struct logical_volume *lv = mirrored_seg->lv;
/* FIXME: Use lvconvert rather than duplicating its code */
if (mirrored_seg->area_count < num_mirrors) {
log_error("WARNING: Failed to replace mirror device in %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv)
log_error("WARNING: Use 'lvconvert -m %d %s/%s --corelog' to replace failed devices",
num_mirrors - 1, lv->vg->name, lv->name);
else
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
num_mirrors - 1, lv->vg->name, lv->name);
r = 0;
/* REMEMBER/FIXME: set in_sync to 0 if a new mirror device was added */
in_sync = 0;
}
/*
* FIXME: right now, we ignore the allocation policy specified to
* allocate the new log.
*/
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv &&
(log_policy != MIRROR_REMOVE)) {
log_error("WARNING: Failed to replace mirror log device in %s/%s",
lv->vg->name, lv->name);
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
mirrored_seg->area_count - 1 , lv->vg->name, lv->name);
r = 0;
}
return r;
}
int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
struct list *removable_pvs, int remove_log)
{
int r;
int insync = 0;
// int mirror_dev_failed = (mirrored_seg->area_count != num_mirrors);
int log_policy, dev_policy;
uint32_t old_num_mirrors = mirrored_seg->area_count;
int had_log = (mirrored_seg->log_lv) ? 1 : 0;
float sync_percent = 0;
/* was the mirror in-sync before problems? */
if (!lv_mirror_percent(mirrored_seg->lv->vg->cmd,
mirrored_seg->lv, 0, &sync_percent, NULL))
log_error("Unable to determine mirror sync status.");
log_error("WARNING: Unable to determine mirror sync status of %s/%s.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
else if (sync_percent >= 100.0)
insync = 1;
@ -258,6 +359,38 @@ int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirr
/* Unable to remove bad devices */
return 0;
log_print("WARNING: Bad device removed from mirror volume, %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
log_policy = get_mirror_log_fault_policy(mirrored_seg->lv->vg->cmd);
dev_policy = get_mirror_dev_fault_policy(mirrored_seg->lv->vg->cmd);
r = replace_mirror_images(mirrored_seg,
(dev_policy != MIRROR_REMOVE) ?
old_num_mirrors : num_mirrors,
log_policy, insync);
if (!r)
/* Failed to replace device(s) */
log_error("WARNING: Unable to find substitute device for mirror volume, %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
else if (r > 0)
/* Success in replacing device(s) */
log_print("WARNING: Mirror volume, %s/%s restored - substitute for failed device found.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
else
/* Bad device removed, but not replaced because of policy */
if (mirrored_seg->area_count == 1) {
log_print("WARNING: Mirror volume, %s/%s converted to linear due to device failure.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
} else if (had_log && !mirrored_seg->log_lv) {
log_print("WARNING: Mirror volume, %s/%s disk log removed due to device failure.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
}
/*
* If we made it here, we at least removed the bad device.
* Consider this success.
*/
return 1;
}