mirror of
git://sourceware.org/git/lvm2.git
synced 2025-03-13 00:58:47 +03:00
Add mirror log fault-handling policy.
This commit is contained in:
parent
9723090c92
commit
aeb2c277a8
@ -1,5 +1,7 @@
|
|||||||
Version 2.02.06 -
|
Version 2.02.06 -
|
||||||
=================================
|
=================================
|
||||||
|
Add mirror log fault-handling policy.
|
||||||
|
Improve mirror warning messages and tidy dmeventd syslog output.
|
||||||
Propagate nosync flag around cluster.
|
Propagate nosync flag around cluster.
|
||||||
Allow vgreduce to handle mirror log failures.
|
Allow vgreduce to handle mirror log failures.
|
||||||
Add --corelog to lvcreate and lvconvert.
|
Add --corelog to lvcreate and lvconvert.
|
||||||
|
@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
|
|||||||
return rtn;
|
return rtn;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _temporary_log_fn(int level, const char *file, int line, const char *format)
|
static void _temporary_log_fn(int level, const char *file,
|
||||||
|
int line, const char *format)
|
||||||
{
|
{
|
||||||
return;
|
if (!strncmp(format, "WARNING: ", 9) && (level < 5))
|
||||||
syslog(LOG_DEBUG, "%s", format);
|
syslog(LOG_CRIT, "%s", format);
|
||||||
|
else
|
||||||
|
syslog(LOG_DEBUG, "%s", format);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int _remove_failed_devices(const char *device)
|
static int _remove_failed_devices(const char *device)
|
||||||
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
|
|||||||
|
|
||||||
int register_device(const char *device)
|
int register_device(const char *device)
|
||||||
{
|
{
|
||||||
syslog(LOG_INFO, "Monitoring %s for events\n", device);
|
syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Need some space for allocations. 1024 should be more
|
* Need some space for allocations. 1024 should be more
|
||||||
@ -224,8 +227,6 @@ int register_device(const char *device)
|
|||||||
|
|
||||||
int unregister_device(const char *device)
|
int unregister_device(const char *device)
|
||||||
{
|
{
|
||||||
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
|
|
||||||
|
|
||||||
if (!(--register_count)) {
|
if (!(--register_count)) {
|
||||||
dm_pool_destroy(mem_pool);
|
dm_pool_destroy(mem_pool);
|
||||||
mem_pool = NULL;
|
mem_pool = NULL;
|
||||||
|
@ -99,10 +99,13 @@ static int _get_mirror_event(char *params)
|
|||||||
return rtn;
|
return rtn;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _temporary_log_fn(int level, const char *file, int line, const char *format)
|
static void _temporary_log_fn(int level, const char *file,
|
||||||
|
int line, const char *format)
|
||||||
{
|
{
|
||||||
return;
|
if (!strncmp(format, "WARNING: ", 9) && (level < 5))
|
||||||
syslog(LOG_DEBUG, "%s", format);
|
syslog(LOG_CRIT, "%s", format);
|
||||||
|
else
|
||||||
|
syslog(LOG_DEBUG, "%s", format);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int _remove_failed_devices(const char *device)
|
static int _remove_failed_devices(const char *device)
|
||||||
@ -205,7 +208,7 @@ void process_event(const char *device, enum dm_event_type event)
|
|||||||
|
|
||||||
int register_device(const char *device)
|
int register_device(const char *device)
|
||||||
{
|
{
|
||||||
syslog(LOG_INFO, "Monitoring %s for events\n", device);
|
syslog(LOG_INFO, "Monitoring mirror device, %s for events\n", device);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Need some space for allocations. 1024 should be more
|
* Need some space for allocations. 1024 should be more
|
||||||
@ -224,8 +227,6 @@ int register_device(const char *device)
|
|||||||
|
|
||||||
int unregister_device(const char *device)
|
int unregister_device(const char *device)
|
||||||
{
|
{
|
||||||
syslog(LOG_INFO, "Stopped monitoring %s for events\n", device);
|
|
||||||
|
|
||||||
if (!(--register_count)) {
|
if (!(--register_count)) {
|
||||||
dm_pool_destroy(mem_pool);
|
dm_pool_destroy(mem_pool);
|
||||||
mem_pool = NULL;
|
mem_pool = NULL;
|
||||||
|
@ -232,9 +232,6 @@ activation {
|
|||||||
# target or make it return zeros.
|
# target or make it return zeros.
|
||||||
missing_stripe_filler = "/dev/ioerror"
|
missing_stripe_filler = "/dev/ioerror"
|
||||||
|
|
||||||
# Size (in KB) of each copy operation when mirroring
|
|
||||||
mirror_region_size = 512
|
|
||||||
|
|
||||||
# How much stack (in KB) to reserve for use while devices suspended
|
# How much stack (in KB) to reserve for use while devices suspended
|
||||||
reserved_stack = 256
|
reserved_stack = 256
|
||||||
|
|
||||||
@ -251,6 +248,54 @@ activation {
|
|||||||
# "@*" matches if any tag defined on the host is also set in the LV or VG
|
# "@*" matches if any tag defined on the host is also set in the LV or VG
|
||||||
#
|
#
|
||||||
# volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
|
# volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
|
||||||
|
|
||||||
|
# Size (in KB) of each copy operation when mirroring
|
||||||
|
mirror_region_size = 512
|
||||||
|
|
||||||
|
# 'mirror_image_fault_policy' and 'mirror_log_fault_policy' define
|
||||||
|
# how a device failure affecting a mirror is handled.
|
||||||
|
# A mirror is composed of mirror images (copies) and a log.
|
||||||
|
# A disk log ensures that a mirror does not need to be re-synced
|
||||||
|
# (all copies made the same) every time a machine reboots or crashes.
|
||||||
|
#
|
||||||
|
# In the event of a failure, the specified policy will be used to
|
||||||
|
# determine what happens:
|
||||||
|
#
|
||||||
|
# "remove" - Simply remove the faulty device and run without it. If
|
||||||
|
# the log device fails, the mirror would convert to using
|
||||||
|
# an in-memory log. This means the mirror will not
|
||||||
|
# remember its sync status across crashes/reboots and
|
||||||
|
# the entire mirror will be re-synced. If a
|
||||||
|
# mirror image fails, the mirror will convert to a
|
||||||
|
# non-mirrored device if there is only one remaining good
|
||||||
|
# copy.
|
||||||
|
#
|
||||||
|
# "allocate" - Remove the faulty device and try to allocate space on
|
||||||
|
# a new device to be a replacement for the failed device.
|
||||||
|
# Using this policy for the log is fast and maintains the
|
||||||
|
# ability to remember sync state through crashes/reboots.
|
||||||
|
# Using this policy for a mirror device is slow, as it
|
||||||
|
# requires the mirror to resynchronize the devices, but it
|
||||||
|
# will preserve the mirror characteristic of the device.
|
||||||
|
# This policy acts like "remove" if no suitable device and
|
||||||
|
# space can be allocated for the replacement.
|
||||||
|
# Currently this is not implemented properly and behaves
|
||||||
|
# similarly to:
|
||||||
|
#
|
||||||
|
# "allocate_anywhere" - Operates like "allocate", but it does not
|
||||||
|
# require that the new space being allocated be on a
|
||||||
|
# device is not part of the mirror. For a log device
|
||||||
|
# failure, this could mean that the log is allocated on
|
||||||
|
# the same device as a mirror device. For a mirror
|
||||||
|
# device, this could mean that the mirror device is
|
||||||
|
# allocated on the same device as another mirror device.
|
||||||
|
# This policy would not be wise for mirror devices
|
||||||
|
# because it would break the redundant nature of the
|
||||||
|
# mirror. This policy acts like "remove" if no suitable
|
||||||
|
# device and space can be allocated for the replacement.
|
||||||
|
|
||||||
|
mirror_log_fault_policy = "allocate"
|
||||||
|
mirror_dev_fault_policy = "remove"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,6 +34,8 @@
|
|||||||
#define DEFAULT_LOCK_DIR "/var/lock/lvm"
|
#define DEFAULT_LOCK_DIR "/var/lock/lvm"
|
||||||
#define DEFAULT_LOCKING_LIB "lvm2_locking.so"
|
#define DEFAULT_LOCKING_LIB "lvm2_locking.so"
|
||||||
|
|
||||||
|
#define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate"
|
||||||
|
#define DEFAULT_MIRROR_DEV_FAULT_POLICY "remove"
|
||||||
#define DEFAULT_DMEVENTD_MIRROR_LIB "libdevmapper-event-lvm2mirror.so"
|
#define DEFAULT_DMEVENTD_MIRROR_LIB "libdevmapper-event-lvm2mirror.so"
|
||||||
|
|
||||||
#define DEFAULT_UMASK 0077
|
#define DEFAULT_UMASK 0077
|
||||||
|
@ -23,6 +23,13 @@
|
|||||||
#include "lvm-string.h"
|
#include "lvm-string.h"
|
||||||
#include "locking.h" /* FIXME Should not be used in this file */
|
#include "locking.h" /* FIXME Should not be used in this file */
|
||||||
|
|
||||||
|
#include "defaults.h" /* FIXME: should this be defaults.h? */
|
||||||
|
|
||||||
|
/* These are the flags that represent the mirror failure restoration policies */
|
||||||
|
#define MIRROR_REMOVE 0
|
||||||
|
#define MIRROR_ALLOCATE 1
|
||||||
|
#define MIRROR_ALLOCATE_ANYWHERE 2
|
||||||
|
|
||||||
struct lv_segment *find_mirror_seg(struct lv_segment *seg)
|
struct lv_segment *find_mirror_seg(struct lv_segment *seg)
|
||||||
{
|
{
|
||||||
return seg->mirror_seg;
|
return seg->mirror_seg;
|
||||||
@ -230,18 +237,112 @@ int remove_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int get_mirror_fault_policy(struct cmd_context *cmd, int log_policy)
|
||||||
|
{
|
||||||
|
const char *policy;
|
||||||
|
|
||||||
|
if (log_policy)
|
||||||
|
policy = find_config_str(NULL, "activation/mirror_log_fault_policy",
|
||||||
|
DEFAULT_MIRROR_LOG_FAULT_POLICY);
|
||||||
|
else
|
||||||
|
policy = find_config_str(NULL, "activation/mirror_dev_fault_policy",
|
||||||
|
DEFAULT_MIRROR_DEV_FAULT_POLICY);
|
||||||
|
|
||||||
|
if (!strcmp(policy, "remove"))
|
||||||
|
return MIRROR_REMOVE;
|
||||||
|
else if (!strcmp(policy, "allocate"))
|
||||||
|
return MIRROR_ALLOCATE;
|
||||||
|
else if (!strcmp(policy, "allocate_anywhere"))
|
||||||
|
return MIRROR_ALLOCATE_ANYWHERE;
|
||||||
|
|
||||||
|
if (log_policy)
|
||||||
|
log_error("Bad activation/mirror_log_fault_policy");
|
||||||
|
else
|
||||||
|
log_error("Bad activation/mirror_dev_fault_policy");
|
||||||
|
|
||||||
|
return MIRROR_REMOVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_mirror_log_fault_policy(struct cmd_context *cmd)
|
||||||
|
{
|
||||||
|
return get_mirror_fault_policy(cmd, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_mirror_dev_fault_policy(struct cmd_context *cmd)
|
||||||
|
{
|
||||||
|
return get_mirror_fault_policy(cmd, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* replace_mirror_images
|
||||||
|
* @mirrored_seg: segment (which may be linear now) to restore
|
||||||
|
* @num_mirrors: number of copies we should end up with
|
||||||
|
* @replace_log: replace log if not present
|
||||||
|
* @in_sync: was the original mirror in-sync?
|
||||||
|
*
|
||||||
|
* in_sync will be set to 0 if new mirror devices are being added
|
||||||
|
* In other words, it is only useful if the log (and only the log)
|
||||||
|
* is being restored.
|
||||||
|
*
|
||||||
|
* Returns: 0 on failure, 1 on reconfig, -1 if no reconfig done
|
||||||
|
*/
|
||||||
|
static int replace_mirror_images(struct lv_segment *mirrored_seg,
|
||||||
|
uint32_t num_mirrors,
|
||||||
|
int log_policy, int in_sync)
|
||||||
|
{
|
||||||
|
int r = -1;
|
||||||
|
struct logical_volume *lv = mirrored_seg->lv;
|
||||||
|
|
||||||
|
/* FIXME: Use lvconvert rather than duplicating its code */
|
||||||
|
|
||||||
|
if (mirrored_seg->area_count < num_mirrors) {
|
||||||
|
log_error("WARNING: Failed to replace mirror device in %s/%s",
|
||||||
|
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||||
|
|
||||||
|
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv)
|
||||||
|
log_error("WARNING: Use 'lvconvert -m %d %s/%s --corelog' to replace failed devices",
|
||||||
|
num_mirrors - 1, lv->vg->name, lv->name);
|
||||||
|
else
|
||||||
|
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
|
||||||
|
num_mirrors - 1, lv->vg->name, lv->name);
|
||||||
|
r = 0;
|
||||||
|
|
||||||
|
/* REMEMBER/FIXME: set in_sync to 0 if a new mirror device was added */
|
||||||
|
in_sync = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME: right now, we ignore the allocation policy specified to
|
||||||
|
* allocate the new log.
|
||||||
|
*/
|
||||||
|
if ((mirrored_seg->area_count > 1) && !mirrored_seg->log_lv &&
|
||||||
|
(log_policy != MIRROR_REMOVE)) {
|
||||||
|
log_error("WARNING: Failed to replace mirror log device in %s/%s",
|
||||||
|
lv->vg->name, lv->name);
|
||||||
|
|
||||||
|
log_error("WARNING: Use 'lvconvert -m %d %s/%s' to replace failed devices",
|
||||||
|
mirrored_seg->area_count - 1 , lv->vg->name, lv->name);
|
||||||
|
r = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
|
int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirrors,
|
||||||
struct list *removable_pvs, int remove_log)
|
struct list *removable_pvs, int remove_log)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
int insync = 0;
|
int insync = 0;
|
||||||
// int mirror_dev_failed = (mirrored_seg->area_count != num_mirrors);
|
int log_policy, dev_policy;
|
||||||
|
uint32_t old_num_mirrors = mirrored_seg->area_count;
|
||||||
|
int had_log = (mirrored_seg->log_lv) ? 1 : 0;
|
||||||
float sync_percent = 0;
|
float sync_percent = 0;
|
||||||
|
|
||||||
/* was the mirror in-sync before problems? */
|
/* was the mirror in-sync before problems? */
|
||||||
if (!lv_mirror_percent(mirrored_seg->lv->vg->cmd,
|
if (!lv_mirror_percent(mirrored_seg->lv->vg->cmd,
|
||||||
mirrored_seg->lv, 0, &sync_percent, NULL))
|
mirrored_seg->lv, 0, &sync_percent, NULL))
|
||||||
log_error("Unable to determine mirror sync status.");
|
log_error("WARNING: Unable to determine mirror sync status of %s/%s.",
|
||||||
|
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||||
else if (sync_percent >= 100.0)
|
else if (sync_percent >= 100.0)
|
||||||
insync = 1;
|
insync = 1;
|
||||||
|
|
||||||
@ -258,6 +359,38 @@ int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirr
|
|||||||
/* Unable to remove bad devices */
|
/* Unable to remove bad devices */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
log_print("WARNING: Bad device removed from mirror volume, %s/%s",
|
||||||
|
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||||
|
|
||||||
|
log_policy = get_mirror_log_fault_policy(mirrored_seg->lv->vg->cmd);
|
||||||
|
dev_policy = get_mirror_dev_fault_policy(mirrored_seg->lv->vg->cmd);
|
||||||
|
|
||||||
|
r = replace_mirror_images(mirrored_seg,
|
||||||
|
(dev_policy != MIRROR_REMOVE) ?
|
||||||
|
old_num_mirrors : num_mirrors,
|
||||||
|
log_policy, insync);
|
||||||
|
|
||||||
|
if (!r)
|
||||||
|
/* Failed to replace device(s) */
|
||||||
|
log_error("WARNING: Unable to find substitute device for mirror volume, %s/%s",
|
||||||
|
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||||
|
else if (r > 0)
|
||||||
|
/* Success in replacing device(s) */
|
||||||
|
log_print("WARNING: Mirror volume, %s/%s restored - substitute for failed device found.",
|
||||||
|
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||||
|
else
|
||||||
|
/* Bad device removed, but not replaced because of policy */
|
||||||
|
if (mirrored_seg->area_count == 1) {
|
||||||
|
log_print("WARNING: Mirror volume, %s/%s converted to linear due to device failure.",
|
||||||
|
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||||
|
} else if (had_log && !mirrored_seg->log_lv) {
|
||||||
|
log_print("WARNING: Mirror volume, %s/%s disk log removed due to device failure.",
|
||||||
|
mirrored_seg->lv->vg->name, mirrored_seg->lv->name);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If we made it here, we at least removed the bad device.
|
||||||
|
* Consider this success.
|
||||||
|
*/
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user