1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00

cmirrord: Prevent secondary checkpoints from corrupting bitmaps

The bug addressed by this patch manifested itself during testing
by showing a mirror that never became 'in-sync' after creation.
The bug is isolated to distributions that do not have support
for openAIS checkpointing (i.e. > RHEL6, > F16).

When a node joins a group that is managing a mirror log, the other
machines in the group send it a checkpoint representing the current
state of the bitmap.  More than one machine can send a checkpoint,
but only the initial one should be imported.  Once the bitmap state
has been imported from the initial checkpoint, operations (such
as resync, mark, and clear operations) can begin.  When subsequent
checkpoints are allowed to be imported, it has the effect of erasing
all the log operations between the initial checkpoint and the ones
that follow.

When cmirrord was updated to handle the absence of openAIS
checkpointing (commit 62e38da133),
the new import_checkpoint() function failed to honor the 'no_read'
parameter.  This parameter was designed to avoid reading all but
the initial checkpoint.  Honoring this parameter has solved the
issue of corrupting bitmap data with secondary checkpoints.
This commit is contained in:
Jonathan Brassow 2013-08-20 13:21:09 -05:00
parent 99fd710cfd
commit f0be9ac904
2 changed files with 42 additions and 2 deletions

View File

@ -1,5 +1,6 @@
Version 2.02.101 -
===================================
Prevent cluster mirror logs from being corrupted by redundant checkpoints.
Fix ignored lvmetad update on loop device configuration (2.02.99).
Use LVM_PATH instead of hardcoded value in lvm2 activation systemd generator.
Refresh existing VG before autoactivation (event retrigger/device reappeared).

View File

@ -25,7 +25,6 @@
#if CMIRROR_HAS_CHECKPOINT
#include <openais/saAis.h>
#include <openais/saCkpt.h>
#endif
/* Open AIS error codes */
#define str_ais_error(x) \
@ -57,6 +56,40 @@
((x) == SA_AIS_ERR_TOO_BIG) ? "SA_AIS_ERR_TOO_BIG" : \
((x) == SA_AIS_ERR_NO_SECTIONS) ? "SA_AIS_ERR_NO_SECTIONS" : \
"ais_error_unknown"
#else
#define str_ais_error(x) \
((x) == CS_OK) ? "CS_OK" : \
((x) == CS_ERR_LIBRARY) ? "CS_ERR_LIBRARY" : \
((x) == CS_ERR_VERSION) ? "CS_ERR_VERSION" : \
((x) == CS_ERR_INIT) ? "CS_ERR_INIT" : \
((x) == CS_ERR_TIMEOUT) ? "CS_ERR_TIMEOUT" : \
((x) == CS_ERR_TRY_AGAIN) ? "CS_ERR_TRY_AGAIN" : \
((x) == CS_ERR_INVALID_PARAM) ? "CS_ERR_INVALID_PARAM" : \
((x) == CS_ERR_NO_MEMORY) ? "CS_ERR_NO_MEMORY" : \
((x) == CS_ERR_BAD_HANDLE) ? "CS_ERR_BAD_HANDLE" : \
((x) == CS_ERR_BUSY) ? "CS_ERR_BUSY" : \
((x) == CS_ERR_ACCESS) ? "CS_ERR_ACCESS" : \
((x) == CS_ERR_NOT_EXIST) ? "CS_ERR_NOT_EXIST" : \
((x) == CS_ERR_NAME_TOO_LONG) ? "CS_ERR_NAME_TOO_LONG" : \
((x) == CS_ERR_EXIST) ? "CS_ERR_EXIST" : \
((x) == CS_ERR_NO_SPACE) ? "CS_ERR_NO_SPACE" : \
((x) == CS_ERR_INTERRUPT) ? "CS_ERR_INTERRUPT" : \
((x) == CS_ERR_NAME_NOT_FOUND) ? "CS_ERR_NAME_NOT_FOUND" : \
((x) == CS_ERR_NO_RESOURCES) ? "CS_ERR_NO_RESOURCES" : \
((x) == CS_ERR_NOT_SUPPORTED) ? "CS_ERR_NOT_SUPPORTED" : \
((x) == CS_ERR_BAD_OPERATION) ? "CS_ERR_BAD_OPERATION" : \
((x) == CS_ERR_FAILED_OPERATION) ? "CS_ERR_FAILED_OPERATION" : \
((x) == CS_ERR_MESSAGE_ERROR) ? "CS_ERR_MESSAGE_ERROR" : \
((x) == CS_ERR_QUEUE_FULL) ? "CS_ERR_QUEUE_FULL" : \
((x) == CS_ERR_QUEUE_NOT_AVAILABLE) ? "CS_ERR_QUEUE_NOT_AVAILABLE" : \
((x) == CS_ERR_BAD_FLAGS) ? "CS_ERR_BAD_FLAGS" : \
((x) == CS_ERR_TOO_BIG) ? "CS_ERR_TOO_BIG" : \
((x) == CS_ERR_NO_SECTIONS) ? "CS_ERR_NO_SECTIONS" : \
((x) == CS_ERR_CONTEXT_NOT_FOUND) ? "CS_ERR_CONTEXT_NOT_FOUND" : \
((x) == CS_ERR_TOO_MANY_GROUPS) ? "CS_ERR_TOO_MANY_GROUPS" : \
((x) == CS_ERR_SECURITY) ? "CS_ERR_SECURITY" : \
"cs_error_unknown"
#endif
#define _RQ_TYPE(x) \
((x) == DM_ULOG_CHECKPOINT_READY) ? "DM_ULOG_CHECKPOINT_READY": \
@ -803,6 +836,11 @@ static int import_checkpoint(struct clog_cpg *entry, int no_read,
{
int bitmap_size;
if (no_read) {
LOG_DBG("Checkpoint for this log already received");
return 0;
}
bitmap_size = (rq->u_rq.data_size - RECOVERING_REGION_SECTION_SIZE) / 2;
if (bitmap_size < 0) {
LOG_ERROR("Checkpoint has invalid payload size.");
@ -947,7 +985,8 @@ static int do_cluster_work(void *data __attribute__((unused)))
dm_list_iterate_items_safe(entry, tmp, &clog_cpg_list) {
r = cpg_dispatch(entry->handle, CS_DISPATCH_ALL);
if (r != CS_OK)
LOG_ERROR("cpg_dispatch failed: %d", r);
LOG_ERROR("cpg_dispatch failed: %s",
str_ais_error(r));
if (entry->free_me) {
free(entry);