1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-17 06:04:23 +03:00

cmirror: Adjust region size to work around CPG msg limit to avoid hang.

cmirror uses the CPG library to pass messages around the cluster and maintain
its bitmaps.  When a cluster mirror starts-up, it must send the current state
to any joining members - a checkpoint.  When mirrors are large (or the region
size is small), the bitmap size can exceed the message limit of the CPG
library.  When this happens, the CPG library returns CPG_ERR_TRY_AGAIN.
(This is also a bug in CPG, since the message will never be successfully sent.)

There is an outstanding bug (bug 682771) that is meant to lift this message
length restriction in CPG, but for now we work around the issue by increasing
the mirror region size.  This limits the size of the bitmap and avoids any
issues we would otherwise have around checkpointing.

Since this issue only affects cluster mirrors, the region size adjustments
are only made on cluster mirrors.  This patch handles cluster mirror issues
involving pvmove, lvconvert (from linear to mirror), and lvcreate.  It also
ensures that when users convert a VG from single-machine to clustered, any
mirrors with too many regions (i.e. a bitmap that would be too large to
properly checkpoint) are trapped.
This commit is contained in:
Jonathan Brassow 2015-02-25 14:42:15 -06:00
parent a88430c6a1
commit dd0ee35378
7 changed files with 72 additions and 7 deletions

View File

@ -1,5 +1,6 @@
Version 2.02.117 - Version 2.02.117 -
==================================== ====================================
Fix hang by adjusting cluster mirror regionsize, avoiding CPG msg limit.
Do not crash when --cachepolicy is given without --cachesettings. Do not crash when --cachepolicy is given without --cachesettings.
Add NEEDS_FOREIGN_VGS flag to vgimport so --foreign is always supplied. Add NEEDS_FOREIGN_VGS flag to vgimport so --foreign is always supplied.
Add --foreign to the 6 display and reporting tools and vgcfgbackup. Add --foreign to the 6 display and reporting tools and vgcfgbackup.

View File

@ -6854,7 +6854,8 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
lp->region_size = adjusted_mirror_region_size(vg->extent_size, lp->region_size = adjusted_mirror_region_size(vg->extent_size,
lp->extents, lp->extents,
lp->region_size, 0); lp->region_size, 0,
vg_is_clustered(vg));
} else if (pool_lv && seg_is_thin_volume(lp)) { } else if (pool_lv && seg_is_thin_volume(lp)) {
if (!lv_is_thin_pool(pool_lv)) { if (!lv_is_thin_pool(pool_lv)) {
log_error("Logical volume %s is not a thin pool.", log_error("Logical volume %s is not a thin pool.",

View File

@ -1032,8 +1032,12 @@ int cluster_mirror_is_available(struct cmd_context *cmd);
int is_temporary_mirror_layer(const struct logical_volume *lv); int is_temporary_mirror_layer(const struct logical_volume *lv);
struct logical_volume * find_temporary_mirror(const struct logical_volume *lv); struct logical_volume * find_temporary_mirror(const struct logical_volume *lv);
uint32_t lv_mirror_count(const struct logical_volume *lv); uint32_t lv_mirror_count(const struct logical_volume *lv);
/* Remove CMIRROR_REGION_COUNT_LIMIT when http://bugzilla.redhat.com/682771 is fixed */
#define CMIRROR_REGION_COUNT_LIMIT (256*1024 * 8)
uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents, uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents,
uint32_t region_size, int internal); uint32_t region_size, int internal, int clustered);
int remove_mirrors_from_segments(struct logical_volume *lv, int remove_mirrors_from_segments(struct logical_volume *lv,
uint32_t new_mirrors, uint64_t status_mask); uint32_t new_mirrors, uint64_t status_mask);
int add_mirrors_to_segments(struct cmd_context *cmd, struct logical_volume *lv, int add_mirrors_to_segments(struct cmd_context *cmd, struct logical_volume *lv,

View File

@ -159,9 +159,10 @@ struct lv_segment *find_mirror_seg(struct lv_segment *seg)
* For internal use only log only in verbose mode * For internal use only log only in verbose mode
*/ */
uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents, uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents,
uint32_t region_size, int internal) uint32_t region_size, int internal, int clustered)
{ {
uint64_t region_max; uint64_t region_max;
uint64_t region_min, region_min_pow2;
region_max = (1 << (ffs((int)extents) - 1)) * (uint64_t) (1 << (ffs((int)extent_size) - 1)); region_max = (1 << (ffs((int)extents) - 1)) * (uint64_t) (1 << (ffs((int)extent_size) - 1));
@ -175,6 +176,44 @@ uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents,
PRIu32 " sectors.", region_size); PRIu32 " sectors.", region_size);
} }
#ifdef CMIRROR_REGION_COUNT_LIMIT
if (clustered) {
/*
* The CPG code used by cluster mirrors can only handle a
* payload of < 1MB currently. (This deficiency is tracked by
* http://bugzilla.redhat.com/682771.) The region size for cluster
* mirrors must be restricted in such a way as to limit the
* size of the bitmap to < 512kB, because there are two bitmaps
* which get sent around during checkpointing while a cluster
* mirror starts up. Ergo, the number of regions must not
* exceed 512k * 8. We also need some room for the other
* checkpointing structures as well, so we reduce by another
* factor of two.
*
* This code should be removed when the CPG restriction is
* lifted.
*/
region_min = extents;
region_min *= extent_size;
region_min /= CMIRROR_REGION_COUNT_LIMIT;
region_min_pow2 = 1;
while (region_min_pow2 < region_min)
region_min_pow2 *= 2;
if (region_size < region_min_pow2) {
if (internal)
log_print_unless_silent("Increasing mirror region size from %"
PRIu32 " to %" PRIu32 " sectors.",
region_size, region_min_pow2);
else
log_verbose("Increasing mirror region size from %"
PRIu32 " to %" PRIu32 " sectors.",
region_size, region_min_pow2);
region_size = region_min_pow2;
}
}
#endif /* CMIRROR_REGION_COUNT_LIMIT */
return region_size; return region_size;
} }
@ -1708,7 +1747,8 @@ static int _add_mirrors_that_preserve_segments(struct logical_volume *lv,
adjusted_region_size = adjusted_mirror_region_size(lv->vg->extent_size, adjusted_region_size = adjusted_mirror_region_size(lv->vg->extent_size,
lv->le_count, lv->le_count,
region_size, 1); region_size, 1,
vg_is_clustered(lv->vg));
if (!(ah = allocate_extents(lv->vg, NULL, segtype, 1, mirrors, 0, 0, if (!(ah = allocate_extents(lv->vg, NULL, segtype, 1, mirrors, 0, 0,
lv->le_count, allocatable_pvs, alloc, 0, lv->le_count, allocatable_pvs, alloc, 0,

View File

@ -435,7 +435,8 @@ static int _mirrored_add_target_line(struct dev_manager *dm, struct dm_pool *mem
} else } else
region_size = adjusted_mirror_region_size(seg->lv->vg->extent_size, region_size = adjusted_mirror_region_size(seg->lv->vg->extent_size,
seg->area_len, seg->area_len,
mirr_state->default_region_size, 1); mirr_state->default_region_size, 1,
vg_is_clustered(seg->lv->vg));
if (!dm_tree_node_add_mirror_target(node, len)) if (!dm_tree_node_add_mirror_target(node, len))
return_0; return_0;

View File

@ -1209,7 +1209,8 @@ static int _lv_update_log_type(struct cmd_context *cmd,
if (old_log_count < log_count) { if (old_log_count < log_count) {
region_size = adjusted_mirror_region_size(lv->vg->extent_size, region_size = adjusted_mirror_region_size(lv->vg->extent_size,
lv->le_count, lv->le_count,
region_size, 0); region_size, 0,
vg_is_clustered(lv->vg));
if (!add_mirror_log(cmd, original_lv, log_count, if (!add_mirror_log(cmd, original_lv, log_count,
region_size, operable_pvs, alloc)) region_size, operable_pvs, alloc))
@ -1425,7 +1426,8 @@ static int _lvconvert_mirrors_aux(struct cmd_context *cmd,
region_size = adjusted_mirror_region_size(lv->vg->extent_size, region_size = adjusted_mirror_region_size(lv->vg->extent_size,
lv->le_count, lv->le_count,
lp->region_size, 0); lp->region_size, 0,
vg_is_clustered(lv->vg));
if (!operable_pvs) if (!operable_pvs)
operable_pvs = lp->pvh; operable_pvs = lp->pvh;

View File

@ -300,6 +300,8 @@ static int _vgchange_clustered(struct cmd_context *cmd,
struct volume_group *vg) struct volume_group *vg)
{ {
int clustered = arg_int_value(cmd, clustered_ARG, 0); int clustered = arg_int_value(cmd, clustered_ARG, 0);
struct lv_list *lvl;
struct lv_segment *mirror_seg;
if (clustered && vg_is_clustered(vg)) { if (clustered && vg_is_clustered(vg)) {
if (vg->system_id && *vg->system_id) if (vg->system_id && *vg->system_id)
@ -338,6 +340,20 @@ static int _vgchange_clustered(struct cmd_context *cmd,
log_error("No volume groups changed."); log_error("No volume groups changed.");
return 0; return 0;
} }
#ifdef CMIRROR_REGION_COUNT_LIMIT
dm_list_iterate_items(lvl, &vg->lvs) {
if (!lv_is_mirror(lvl->lv))
continue;
mirror_seg = first_seg(lvl->lv);
if ((lvl->lv->size / mirror_seg->region_size) >
CMIRROR_REGION_COUNT_LIMIT) {
log_error("Unable to convert %s to clustered mode:"
" Mirror region size of %s is too small.",
vg->name, lvl->lv->name);
return 0;
}
}
#endif
} }
if (!vg_set_system_id(vg, clustered ? NULL : cmd->system_id)) if (!vg_set_system_id(vg, clustered ? NULL : cmd->system_id))