1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-03-13 00:58:47 +03:00

Add mirror log fault-handling policy.

This commit is contained in:
Alasdair Kergon 2006-05-11 19:45:53 +00:00
parent 9723090c92
commit aeb2c277a8
6 changed files with 201 additions and 17 deletions

View File

@ -1,5 +1,7 @@
Version 2.02.06 - Version 2.02.06 -
================================= =================================
Add mirror log fault-handling policy.
Improve mirror warning messages and tidy dmeventd syslog output.
Propagate nosync flag around cluster. Propagate nosync flag around cluster.
Allow vgreduce to handle mirror log failures. Allow vgreduce to handle mirror log failures.
Add --corelog to lvcreate and lvconvert. Add --corelog to lvcreate and lvconvert.

View File

@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
return rtn; return rtn;
} }
static void _temporary_log_fn(int level, const char *file, int line, const char *format) static void _temporary_log_fn(int level, const char *file,
int line, const char *format)
{ {
return; if (!strncmp(format, "WARNING: ", 9) && (level < 5))
syslog(LOG_DEBUG, "%s", format); syslog(LOG_CRIT, "%s", format);
else
syslog(LOG_DEBUG, "%s", format);
} }
static int _remove_failed_devices(const char *device) static int _remove_failed_devices(const char *device)
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
int register_device(const char *device) int register_device(const char *device)
{ {
syslog(LOG_INFO, "Monitoring %s for events\n", device); syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
/* /*
* Need some space for allocations. 1024 should be more * Need some space for allocations. 1024 should be more
@ -224,8 +227,6 @@ int register_device(const char *device)
int unregister_device(const char *device) int unregister_device(const char *device)
{ {
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
if (!(--register_count)) { if (!(--register_count)) {
dm_pool_destroy(mem_pool); dm_pool_destroy(mem_pool);
mem_pool = NULL; mem_pool = NULL;

View File

@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
return rtn; return rtn;
} }
static void _temporary_log_fn(int level, const char *file, int line, const char *format) static void _temporary_log_fn(int level, const char *file,
int line, const char *format)
{ {
return; if (!strncmp(format, "WARNING: ", 9) && (level < 5))
syslog(LOG_DEBUG, "%s", format); syslog(LOG_CRIT, "%s", format);
else
syslog(LOG_DEBUG, "%s", format);
} }
static int _remove_failed_devices(const char *device) static int _remove_failed_devices(const char *device)
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
int register_device(const char *device) int register_device(const char *device)
{ {
syslog(LOG_INFO, "Monitoring %s for events\n", device); syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
/* /*
* Need some space for allocations. 1024 should be more * Need some space for allocations. 1024 should be more
@ -224,8 +227,6 @@ int register_device(const char *device)
int unregister_device(const char *device) int unregister_device(const char *device)
{ {
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
if (!(--register_count)) { if (!(--register_count)) {
dm_pool_destroy(mem_pool); dm_pool_destroy(mem_pool);
mem_pool = NULL; mem_pool = NULL;

View File

@ -232,9 +232,6 @@ activation {
# target or make it return zeros. # target or make it return zeros.
missing_stripe_filler = "/dev/ioerror" missing_stripe_filler = "/dev/ioerror"
# Size (in KB) of each copy operation when mirroring
mirror_region_size = 512
# How much stack (in KB) to reserve for use while devices suspended # How much stack (in KB) to reserve for use while devices suspended
reserved_stack = 256 reserved_stack = 256
@ -251,6 +248,54 @@ activation {
# "@*" matches if any tag defined on the host is also set in the LV or VG # "@*" matches if any tag defined on the host is also set in the LV or VG
# #
# volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ] # volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
# Size (in KB) of each copy operation when mirroring
mirror_region_size = 512
# 'mirror_image_fault_policy' and 'mirror_log_fault_policy' define
# how a device failure affecting a mirror is handled.
# A mirror is composed of mirror images (copies) and a log.
# A disk log ensures that a mirror does not need to be re-synced
# (all copies made the same) every time a machine reboots or crashes.
#
# In the event of a failure, the specified policy will be used to
# determine what happens:
#
# "remove" - Simply remove the faulty device and run without it. If
# the log device fails, the mirror would convert to using
# an in-memory log. This means the mirror will not
# remember its sync status across crashes/reboots and
# the entire mirror will be re-synced. If a
# mirror image fails, the mirror will convert to a
# non-mirrored device if there is only one remaining good
# copy.
#
# "allocate" - Remove the faulty device and try to allocate space on
# a new device to be a replacement for the failed device.
# Using this policy for the log is fast and maintains the
# ability to remember sync state through crashes/reboots.
# Using this policy for a mirror device is slow, as it
# requires the mirror to resynchronize the devices, but it
# will preserve the mirror characteristic of the device.
# This policy acts like "remove" if no suitable device and
# space can be allocated for the replacement.
# Currently this is not implemented properly and behaves
# similarly to:
#
# "allocate_anywhere" - Operates like "allocate", but it does not
# require that the new space being allocated be on a
# device is not part of the mirror. For a log device
# failure, this could mean that the log is allocated on
# the same device as a mirror device. For a mirror
# device, this could mean that the mirror device is
# allocated on the same device as another mirror device.
# This policy would not be wise for mirror devices
# because it would break the redundant nature of the
# mirror. This policy acts like "remove" if no suitable
# device and space can be allocated for the replacement.
mirror_log_fault_policy = "allocate"
mirror_dev_fault_policy = "remove"
} }

View File

@ -34,6 +34,8 @@
#define DEFAULT_LOCK_DIR "/var/lock/lvm" #define DEFAULT_LOCK_DIR "/var/lock/lvm"
#define DEFAULT_LOCKING_LIB "lvm2_locking.so" #define DEFAULT_LOCKING_LIB "lvm2_locking.so"
#define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate"
#define DEFAULT_MIRROR_DEV_FAULT_POLICY "remove"
#define DEFAULT_DMEVENTD_MIRROR_LIB "libdevmapper-event-lvm2mirror.so" #define DEFAULT_DMEVENTD_MIRROR_LIB "libdevmapper-event-lvm2mirror.so"
#define DEFAULT_UMASK 0077 #define DEFAULT_UMASK 0077

View File

@ -23,6 +23,13 @@
#include "lvm-string.h" #include "lvm-string.h"
#include "locking.h" /* FIXME Should not be used in this file */ #include "locking.h" /* FIXME Should not be used in this file */
#include "defaults.h" /* FIXME: should this be defaults.h? */
/* These are the flags that represent the mirror failure restoration policies */
#define MIRROR_REMOVE 0
#define MIRROR_ALLOCATE 1
#define MIRROR_ALLOCATE_ANYWHERE 2
struct lv_segment *find_mirror_seg(struct lv_segment *seg) struct lv_segment *find_mirror_seg(struct lv_segment *seg)
{ {
return seg->mirror_seg; return seg->mirror_seg;
@ -230,18 +237,112 @@ int remove_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
return 1; return 1;
} }
static int get_mirror_fault_policy(struct cmd_context *cmd, int log_policy)
{
const char *policy;
if (log_policy)
policy = find_config_str(NULL, "activation/mirror_log_fault_policy",
DEFAULT_MIRROR_LOG_FAULT_POLICY);
else
policy = find_config_str(NULL, "activation/mirror_dev_fault_policy",
DEFAULT_MIRROR_DEV_FAULT_POLICY);
if (!strcmp(policy, "remove"))
return MIRROR_REMOVE;
else if (!strcmp(policy, "allocate"))
return MIRROR_ALLOCATE;
else if (!strcmp(policy, "allocate_anywhere"))
return MIRROR_ALLOCATE_ANYWHERE;
if (log_policy)
log_error("Bad activation/mirror_log_fault_policy");
else
log_error("Bad activation/mirror_dev_fault_policy");
return MIRROR_REMOVE;
}
static int get_mirror_log_fault_policy(struct cmd_context *cmd)
{
return get_mirror_fault_policy(cmd, 1);
}
static int get_mirror_dev_fault_policy(struct cmd_context *cmd)
{
return get_mirror_fault_policy(cmd, 0);
}
/*
* replace_mirror_images
* @mirrored_seg: segment (which may be linear now) to restore
* @num_mirrors: number of copies we should end up with
* @replace_log: replace log if not present
* @in_sync: was the original mirror in-sync?
*
* in_sync will be set to 0 if new mirror devices are being added
* In other words, it is only useful if the log (and only the log)
* is being restored.
*
* Returns: 0 on failure, 1 on reconfig, -1 if no reconfig done
*/
static int replace_mirror_images(struct lv_segment *mirrored_seg,
uint32_t num_mirrors,
int log_policy, int in_sync)
{
int r = -1;
struct logical_volume *lv = mirrored_seg->lv;
/* FIXME: Use lvconvert rather than duplicating its code */
if (mirrored_seg->area_count < num_mirrors) {
log_error("WARNING: Failed to replace mirror device in %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv)
log_error("WARNING: Use 'lvconvert -m %d %s/%s --corelog' to replace failed devices",
num_mirrors - 1, lv->vg->name, lv->name);
else
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
num_mirrors - 1, lv->vg->name, lv->name);
r = 0;
/* REMEMBER/FIXME: set in_sync to 0 if a new mirror device was added */
in_sync = 0;
}
/*
* FIXME: right now, we ignore the allocation policy specified to
* allocate the new log.
*/
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv &&
(log_policy != MIRROR_REMOVE)) {
log_error("WARNING: Failed to replace mirror log device in %s/%s",
lv->vg->name, lv->name);
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
mirrored_seg->area_count - 1 , lv->vg->name, lv->name);
r = 0;
}
return r;
}
int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors, int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
struct list *removable_pvs, int remove_log) struct list *removable_pvs, int remove_log)
{ {
int r; int r;
int insync = 0; int insync = 0;
// int mirror_dev_failed = (mirrored_seg->area_count != num_mirrors); int log_policy, dev_policy;
uint32_t old_num_mirrors = mirrored_seg->area_count;
int had_log = (mirrored_seg->log_lv) ? 1 : 0;
float sync_percent = 0; float sync_percent = 0;
/* was the mirror in-sync before problems? */ /* was the mirror in-sync before problems? */
if (!lv_mirror_percent(mirrored_seg->lv->vg->cmd, if (!lv_mirror_percent(mirrored_seg->lv->vg->cmd,
mirrored_seg->lv, 0, &sync_percent, NULL)) mirrored_seg->lv, 0, &sync_percent, NULL))
log_error("Unable to determine mirror sync status."); log_error("WARNING: Unable to determine mirror sync status of %s/%s.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
else if (sync_percent >= 100.0) else if (sync_percent >= 100.0)
insync = 1; insync = 1;
@ -258,6 +359,38 @@ int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirr
/* Unable to remove bad devices */ /* Unable to remove bad devices */
return 0; return 0;
log_print("WARNING: Bad device removed from mirror volume, %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
log_policy = get_mirror_log_fault_policy(mirrored_seg->lv->vg->cmd);
dev_policy = get_mirror_dev_fault_policy(mirrored_seg->lv->vg->cmd);
r = replace_mirror_images(mirrored_seg,
(dev_policy != MIRROR_REMOVE) ?
old_num_mirrors : num_mirrors,
log_policy, insync);
if (!r)
/* Failed to replace device(s) */
log_error("WARNING: Unable to find substitute device for mirror volume, %s/%s",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
else if (r > 0)
/* Success in replacing device(s) */
log_print("WARNING: Mirror volume, %s/%s restored - substitute for failed device found.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
else
/* Bad device removed, but not replaced because of policy */
if (mirrored_seg->area_count == 1) {
log_print("WARNING: Mirror volume, %s/%s converted to linear due to device failure.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
} else if (had_log && !mirrored_seg->log_lv) {
log_print("WARNING: Mirror volume, %s/%s disk log removed due to device failure.",
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
}
/*
* If we made it here, we at least removed the bad device.
* Consider this success.
*/
return 1; return 1;
} }