From 7f29afdb06d9f3420b3d2174d6ed4c55a58ac706 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 27 Sep 2024 17:42:26 -0500 Subject: [PATCH] lvmlockd: configurable sanlock lease sizes on 4K disks New config setting sanlock_align_size can be used to configure the sanlock lease size that lvmlockd will use on 4K disks. By default, lvmlockd and sanlock use 8MiB align_size (lease size) on 4K disks, which supports up to 2000 hosts (and max host_id.) This can be reduced to 1, 2 or 4 (in MiB), to reduce lease i/o. The reduced sizes correspond to smaller max hosts/host_id: 1 MiB = 250 hosts 2 MiB = 500 hosts 4 MiB = 1000 hosts 8 MiB = 2000 hosts (default) (Disks with 512 byte sectors always use 1MiB leases and support 2000 hosts/host_id, and are not affected by this.) --- daemons/lvmlockd/lvmlockd-core.c | 38 ++--- daemons/lvmlockd/lvmlockd-internal.h | 15 +- daemons/lvmlockd/lvmlockd-sanlock.c | 228 +++++++++++++++++---------- lib/config/config_settings.h | 15 +- lib/config/defaults.h | 1 + lib/locking/lvmlockd.c | 56 ++++++- 6 files changed, 231 insertions(+), 122 deletions(-) diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c index 4b2d28c22..87a83d578 100644 --- a/daemons/lvmlockd/lvmlockd-core.c +++ b/daemons/lvmlockd/lvmlockd-core.c @@ -1177,12 +1177,12 @@ static void lm_rem_resource(struct lockspace *ls, struct resource *r) lm_rem_resource_idm(ls, r); } -static int lm_find_free_lock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) +static int lm_find_free_lock(struct lockspace *ls, uint64_t lv_size_bytes) { if (ls->lm_type == LD_LM_DLM) return 0; else if (ls->lm_type == LD_LM_SANLOCK) - return lm_find_free_lock_sanlock(ls, lv_size_bytes, free_offset, sector_size, align_size); + return lm_find_free_lock_sanlock(ls, lv_size_bytes); else if (ls->lm_type == LD_LM_IDM) return 0; return -1; @@ -2712,17 +2712,10 @@ static void *lockspace_thread_main(void *arg_in) } if (act->op == LD_OP_FIND_FREE_LOCK && act->rt == LD_RT_VG) { - uint64_t free_offset = 0; - int sector_size = 0; - int align_size = 0; - log_debug("S %s find free lock", ls->name); - rv = lm_find_free_lock(ls, act->lv_size_bytes, &free_offset, §or_size, &align_size); - log_debug("S %s find free lock %d offset %llu sector_size %d align_size %d", - ls->name, rv, (unsigned long long)free_offset, sector_size, align_size); - ls->free_lock_offset = free_offset; - ls->free_lock_sector_size = sector_size; - ls->free_lock_align_size = align_size; + rv = lm_find_free_lock(ls, act->lv_size_bytes); + log_debug("S %s find free lock %d offset %llu", + ls->name, rv, (unsigned long long)ls->free_lock_offset); list_del(&act->list); act->result = rv; add_client_result(act); @@ -3556,7 +3549,7 @@ static int work_init_vg(struct action *act) } if (act->lm_type == LD_LM_SANLOCK) - rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args); + rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args, act->align_mb); else if (act->lm_type == LD_LM_DLM) rv = lm_init_vg_dlm(ls_name, act->vg_name, act->flags, act->vg_args); else if (act->lm_type == LD_LM_IDM) @@ -3622,9 +3615,6 @@ static int work_init_lv(struct action *act) char ls_name[MAX_NAME+1]; char vg_args[MAX_ARGS+1]; char lv_args[MAX_ARGS+1]; - uint64_t free_offset = 0; - int sector_size = 0; - int align_size = 0; int lm_type = 0; int rv = 0; @@ -3639,9 +3629,6 @@ static int work_init_lv(struct action *act) if (ls) { lm_type = ls->lm_type; memcpy(vg_args, ls->vg_args, MAX_ARGS); - free_offset = ls->free_lock_offset; - sector_size = ls->free_lock_sector_size; - align_size = ls->free_lock_align_size; } pthread_mutex_unlock(&lockspaces_mutex); @@ -3657,8 +3644,13 @@ static int work_init_lv(struct action *act) } if (lm_type == LD_LM_SANLOCK) { - rv = lm_init_lv_sanlock(ls_name, act->vg_name, act->lv_uuid, - vg_args, lv_args, sector_size, align_size, free_offset); + /* FIXME: can init_lv ever be called without the lockspace already started? */ + if (!ls) { + log_error("init_lv no lockspace found"); + return -EINVAL; + } + + rv = lm_init_lv_sanlock(ls, act->lv_uuid, vg_args, lv_args); memcpy(act->lv_args, lv_args, MAX_ARGS); return rv; @@ -5032,6 +5024,10 @@ static void client_recv_action(struct client *cl) if (val) act->host_id = val; + val = daemon_request_int(req, "align_mb", 0); + if (val) + act->align_mb = val; + act->lv_size_bytes = (uint64_t)dm_config_find_int64(req.cft->root, "lv_size_bytes", 0); /* Create PV list for idm */ diff --git a/daemons/lvmlockd/lvmlockd-internal.h b/daemons/lvmlockd/lvmlockd-internal.h index fbbefbeaa..fd822b7ca 100644 --- a/daemons/lvmlockd/lvmlockd-internal.h +++ b/daemons/lvmlockd/lvmlockd-internal.h @@ -141,6 +141,7 @@ struct action { int max_retries; int result; int lm_rv; /* return value from lm_ function */ + int align_mb; char *path; char vg_uuid[64]; char vg_name[MAX_NAME+1]; @@ -192,8 +193,6 @@ struct lockspace { void *lm_data; uint64_t host_id; uint64_t free_lock_offset; /* for sanlock, start search for free lock here */ - int free_lock_sector_size; /* for sanlock */ - int free_lock_align_size; /* for sanlock */ struct pvs pvs; /* for idm: PV list */ uint32_t start_client_id; /* client_id that started the lockspace */ @@ -506,8 +505,8 @@ static inline int lm_refresh_lv_check_dlm(struct action *act) #ifdef LOCKDSANLOCK_SUPPORT -int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args); -int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, int sector_size, int align_size, uint64_t free_offset); +int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb); +int lm_init_lv_sanlock(struct lockspace *ls, char *lv_name, char *vg_args, char *lv_args); int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r); int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args); int lm_prepare_lockspace_sanlock(struct lockspace *ls); @@ -528,7 +527,7 @@ int lm_gl_is_enabled(struct lockspace *ls); int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin); int lm_data_size_sanlock(void); int lm_is_running_sanlock(void); -int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size); +int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes); static inline int lm_support_sanlock(void) { @@ -537,12 +536,12 @@ static inline int lm_support_sanlock(void) #else -static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args) +static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb) { return -1; } -static inline int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, int sector_size, int align_size, uint64_t free_offset) +static inline int lm_init_lv_sanlock(struct lockspace *ls, char *lv_name, char *vg_args, char *lv_args) { return -1; } @@ -631,7 +630,7 @@ static inline int lm_is_running_sanlock(void) return 0; } -static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) +static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes); { return -1; } diff --git a/daemons/lvmlockd/lvmlockd-sanlock.c b/daemons/lvmlockd/lvmlockd-sanlock.c index 55743cffb..b761f0fa6 100644 --- a/daemons/lvmlockd/lvmlockd-sanlock.c +++ b/daemons/lvmlockd/lvmlockd-sanlock.c @@ -145,6 +145,8 @@ struct lm_sanlock { int sector_size; int align_size; int sock; /* sanlock daemon connection */ + uint32_t ss_flags; /* sector and align flags for lockspace */ + uint32_t rs_flags; /* sector and align flags for resource */ }; struct rd_sanlock { @@ -369,7 +371,7 @@ out: /* Select sector/align size for a new VG based on what the device reports for sector size of the lvmlock LV. */ -static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, int *align_size) +static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, int *align_size, int *align_mb) { unsigned int physical_block_size = 0; unsigned int logical_block_size = 0; @@ -395,12 +397,14 @@ static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, in if ((physical_block_size == 512) && (logical_block_size == 512)) { *sector_size = 512; *align_size = ONE_MB; + *align_mb = 1; return 0; } if ((physical_block_size == 4096) && (logical_block_size == 4096)) { *sector_size = 4096; *align_size = 8 * ONE_MB; + *align_mb = 8; return 0; } @@ -435,6 +439,7 @@ static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, in physical_block_size, logical_block_size, path); *sector_size = 4096; *align_size = 8 * ONE_MB; + *align_mb = 8; return 0; } @@ -443,18 +448,21 @@ static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, in physical_block_size, logical_block_size, path); *sector_size = 4096; *align_size = 8 * ONE_MB; + *align_mb = 8; return 0; } if (physical_block_size == 512) { *sector_size = 512; *align_size = ONE_MB; + *align_mb = 1; return 0; } if (physical_block_size == 4096) { *sector_size = 4096; *align_size = 8 * ONE_MB; + *align_mb = 8; return 0; } @@ -466,7 +474,8 @@ static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, in /* Get the sector/align sizes that were used to create an existing VG. sanlock encoded this in the lockspace/resource structs on disk. */ -static int get_sizes_lockspace(char *path, int *sector_size, int *align_size) +static int get_sizes_lockspace(char *path, int *sector_size, int *align_size, int *align_mb, + uint32_t *ss_flags, uint32_t *rs_flags) { struct sanlk_lockspace ss; uint32_t io_timeout = 0; @@ -484,10 +493,38 @@ static int get_sizes_lockspace(char *path, int *sector_size, int *align_size) if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN8M)) { *sector_size = 4096; + *align_mb = 8; *align_size = 8 * ONE_MB; + *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M; + *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M; + + } else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN4M)) { + *sector_size = 4096; + *align_mb = 4; + *align_size = 4 * ONE_MB; + *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN4M; + *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN4M; + + } else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN2M)) { + *sector_size = 4096; + *align_mb = 2; + *align_size = 2 * ONE_MB; + *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN2M; + *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN2M; + + } else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN1M)) { + *sector_size = 4096; + *align_mb = 1; + *align_size = ONE_MB; + *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN1M; + *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN1M; + } else if ((ss.flags & SANLK_LSF_SECTOR512) && (ss.flags & SANLK_LSF_ALIGN1M)) { *sector_size = 512; + *align_mb = 1; *align_size = ONE_MB; + *ss_flags = SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M; + *rs_flags = SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M; } log_debug("get_sizes_lockspace found %d %d", *sector_size, *align_size); @@ -504,7 +541,7 @@ static int get_sizes_lockspace(char *path, int *sector_size, int *align_size) #define MAX_VERSION 16 -int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args) +int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb) { struct sanlk_lockspace ss; struct sanlk_resourced rd; @@ -512,12 +549,14 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar char lock_lv_name[MAX_ARGS+1]; char lock_args_version[MAX_VERSION+1]; const char *gl_name = NULL; + uint32_t rs_flags; uint32_t daemon_version; uint32_t daemon_proto; uint64_t offset; uint64_t dev_size; int sector_size = 0; int align_size = 0; + int align_mb = 0; int i, rv; memset(&ss, 0, sizeof(ss)); @@ -542,7 +581,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar if ((rv = build_dm_path(disk.path, SANLK_PATH_LEN, vg_name, lock_lv_name))) return rv; - log_debug("S %s init_vg_san path %s", ls_name, disk.path); + log_debug("S %s init_vg_san path %s align %d", ls_name, disk.path, opt_align_mb); if (daemon_test) { if (!gl_lsname_sanlock[0]) @@ -563,7 +602,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar daemon_version, daemon_proto); /* Nothing formatted on disk yet, use what the device reports. */ - rv = get_sizes_device(disk.path, &dev_size, §or_size, &align_size); + rv = get_sizes_device(disk.path, &dev_size, §or_size, &align_size, &align_mb); if (rv < 0) { if (rv == -EACCES) { log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s", @@ -576,14 +615,48 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar } } + /* Non-default lease size is requested. */ + if ((sector_size == 4096) && opt_align_mb && (opt_align_mb != 8)) { + if (opt_align_mb != 1 && opt_align_mb != 2 && opt_align_mb != 4) { + log_error("S %s init_vg_sanlock invalid align input %u", ls_name, opt_align_mb); + return -EARGS; + } + align_mb = opt_align_mb; + align_size = align_mb * ONE_MB; + } + log_debug("S %s init_vg_san %s dev_size %llu sector_size %u align_size %u", ls_name, disk.path, (unsigned long long)dev_size, sector_size, align_size); strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN); memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN); ss.host_id_disk.offset = 0; - ss.flags = (sector_size == 4096) ? (SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M) : - (SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M); + + if (sector_size == 512) { + ss.flags = SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M; + rs_flags = SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M; + } else if (sector_size == 4096) { + if (align_mb == 8) { + ss.flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M; + rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M; + } else if (align_mb == 4) { + ss.flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN4M; + rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN4M; + } else if (align_mb == 2) { + ss.flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN2M; + rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN2M; + } else if (align_mb == 1) { + ss.flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN1M; + rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN1M; + } + else { + log_error("Invalid sanlock align_size %d %d", align_size, align_mb); + return -EARGS; + } + } else { + log_error("Invalid sanlock sector_size %d", sector_size); + return -EARGS; + } rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout); if (rv < 0) { @@ -616,8 +689,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN); rd.rs.disks[0].offset = align_size * GL_LOCK_BEGIN; rd.rs.num_disks = 1; - rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rd.rs.flags = rs_flags; rv = sanlock_write_resource(&rd.rs, 0, 0, 0); if (rv < 0) { @@ -631,8 +703,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN); rd.rs.disks[0].offset = align_size * VG_LOCK_BEGIN; rd.rs.num_disks = 1; - rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rd.rs.flags = rs_flags; rv = sanlock_write_resource(&rd.rs, 0, 0, 0); if (rv < 0) { @@ -658,8 +729,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar memset(&rd, 0, sizeof(rd)); rd.rs.num_disks = 1; - rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rd.rs.flags = rs_flags; memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN); strcpy_name_len(rd.rs.lockspace_name, ls_name, SANLK_NAME_LEN); strcpy_name_len(rd.rs.name, "#unused", SANLK_NAME_LEN); @@ -700,14 +770,14 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar * can be saved in the lv's lock_args in the vg metadata. */ -int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, - char *vg_args, char *lv_args, - int sector_size, int align_size, uint64_t free_offset) +int lm_init_lv_sanlock(struct lockspace *ls, char *lv_name, char *vg_args, char *lv_args) { + struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data; struct sanlk_resourced rd; char lock_lv_name[MAX_ARGS+1]; char lock_args_version[MAX_VERSION+1]; uint64_t offset; + int align_size = lms->align_size; int rv; memset(&rd, 0, sizeof(rd)); @@ -717,7 +787,7 @@ int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, rv = lock_lv_name_from_args(vg_args, lock_lv_name); if (rv < 0) { log_error("S %s init_lv_san lock_lv_name_from_args error %d %s", - ls_name, rv, vg_args); + ls->name, rv, vg_args); return rv; } @@ -725,7 +795,6 @@ int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, LV_LOCK_ARGS_MAJOR, LV_LOCK_ARGS_MINOR, LV_LOCK_ARGS_PATCH); if (daemon_test) { - align_size = ONE_MB; snprintf(lv_args, MAX_ARGS, "%s:%llu", lock_args_version, (unsigned long long)((align_size * LV_LOCK_BEGIN) + (align_size * daemon_test_lv_count))); @@ -733,42 +802,15 @@ int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, return 0; } - strcpy_name_len(rd.rs.lockspace_name, ls_name, SANLK_NAME_LEN); + strcpy_name_len(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN); rd.rs.num_disks = 1; - if ((rv = build_dm_path(rd.rs.disks[0].path, SANLK_PATH_LEN, vg_name, lock_lv_name))) + if ((rv = build_dm_path(rd.rs.disks[0].path, SANLK_PATH_LEN, ls->vg_name, lock_lv_name))) return rv; - /* - * These should not usually be zero, maybe only the first time this function is called? - * We need to use the same sector/align sizes that are already being used. - */ - if (!sector_size || !align_size) { - rv = get_sizes_lockspace(rd.rs.disks[0].path, §or_size, &align_size); - if (rv < 0) { - log_error("S %s init_lv_san read_lockspace error %d %s", - ls_name, rv, rd.rs.disks[0].path); - return rv; - } + rd.rs.flags = lms->rs_flags; - if (sector_size) - log_debug("S %s init_lv_san found ls sector_size %d align_size %d", ls_name, sector_size, align_size); - else { - /* use the old method */ - align_size = sanlock_align(&rd.rs.disks[0]); - if (align_size <= 0) { - log_error("S %s init_lv_san align error %d", ls_name, align_size); - return -EINVAL; - } - sector_size = (align_size == ONE_MB) ? 512 : 4096; - log_debug("S %s init_lv_san found old sector_size %d align_size %d", ls_name, sector_size, align_size); - } - } - - rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); - - if (free_offset) - offset = free_offset; + if (ls->free_lock_offset) + offset = ls->free_lock_offset; else offset = align_size * LV_LOCK_BEGIN; rd.rs.disks[0].offset = offset; @@ -782,20 +824,20 @@ int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, if (rv == -EMSGSIZE || rv == -ENOSPC) { /* This indicates the end of the device is reached. */ log_debug("S %s init_lv_san read limit offset %llu", - ls_name, (unsigned long long)offset); + ls->name, (unsigned long long)offset); rv = -EMSGSIZE; return rv; } if (rv && rv != SANLK_LEADER_MAGIC) { log_error("S %s init_lv_san read error %d offset %llu", - ls_name, rv, (unsigned long long)offset); + ls->name, rv, (unsigned long long)offset); break; } if (!strncmp(rd.rs.name, lv_name, SANLK_NAME_LEN)) { log_error("S %s init_lv_san resource name %s already exists at %llu", - ls_name, lv_name, (unsigned long long)offset); + ls->name, lv_name, (unsigned long long)offset); return -EEXIST; } @@ -806,11 +848,10 @@ int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, */ if ((rv == SANLK_LEADER_MAGIC) || !strcmp(rd.rs.name, "#unused")) { log_debug("S %s init_lv_san %s found unused area at %llu", - ls_name, lv_name, (unsigned long long)offset); + ls->name, lv_name, (unsigned long long)offset); strcpy_name_len(rd.rs.name, lv_name, SANLK_NAME_LEN); - rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rd.rs.flags = lms->rs_flags; rv = sanlock_write_resource(&rd.rs, 0, 0, 0); if (!rv) { @@ -818,7 +859,7 @@ int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, lock_args_version, (unsigned long long)offset); } else { log_error("S %s init_lv_san write error %d offset %llu", - ls_name, rv, (unsigned long long)rv); + ls->name, rv, (unsigned long long)rv); } break; } @@ -887,12 +928,19 @@ int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ return rv; } - if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN8M)) { - sector_size = 4096; - align_size = 8 * ONE_MB; - } else if ((ss.flags & SANLK_LSF_SECTOR512) && (ss.flags & SANLK_LSF_ALIGN1M)) { + if (ss.flags & SANLK_LSF_SECTOR512) { sector_size = 512; align_size = ONE_MB; + } else if (ss.flags & SANLK_LSF_SECTOR4K) { + sector_size = 4096; + if (ss.flags & SANLK_LSF_ALIGN8M) + align_size = 8 * ONE_MB; + else if (ss.flags & SANLK_LSF_ALIGN4M) + align_size = 4 * ONE_MB; + else if (ss.flags & SANLK_LSF_ALIGN2M) + align_size = 2 * ONE_MB; + else if (ss.flags & SANLK_LSF_ALIGN1M) + align_size = ONE_MB; } else { /* use the old method */ align_size = sanlock_align(&ss.host_id_disk); @@ -1061,10 +1109,8 @@ int lm_ex_disable_gl_sanlock(struct lockspace *ls) memcpy(rd1.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1); rd1.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN; - rd1.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); - rd2.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rd1.rs.flags = lms->rs_flags; + rd2.rs.flags = lms->rs_flags; rv = sanlock_acquire(lms->sock, -1, 0, 1, &rs1, NULL); if (rv < 0) { @@ -1126,8 +1172,7 @@ int lm_able_gl_sanlock(struct lockspace *ls, int enable) rd.rs.num_disks = 1; memcpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1); rd.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN; - rd.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rd.rs.flags = lms->rs_flags; rv = sanlock_write_resource(&rd.rs, 0, 0, 0); if (rv < 0) { @@ -1211,7 +1256,7 @@ int lm_gl_is_enabled(struct lockspace *ls) * been disabled.) */ -int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) +int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes) { struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data; struct sanlk_resourced rd; @@ -1221,22 +1266,16 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint int round = 0; if (daemon_test) { - *free_offset = (ONE_MB * LV_LOCK_BEGIN) + (ONE_MB * (daemon_test_lv_count + 1)); - *sector_size = 512; - *align_size = ONE_MB; + ls->free_lock_offset = (ONE_MB * LV_LOCK_BEGIN) + (ONE_MB * (daemon_test_lv_count + 1)); return 0; } - *sector_size = lms->sector_size; - *align_size = lms->align_size; - memset(&rd, 0, sizeof(rd)); strcpy_name_len(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN); rd.rs.num_disks = 1; memcpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1); - rd.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : - (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rd.rs.flags = lms->rs_flags; if (ls->free_lock_offset) offset = ls->free_lock_offset; @@ -1270,7 +1309,7 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint /* remember the NO SPACE offset, if no free area left, * search from this offset after extend */ - *free_offset = offset; + ls->free_lock_offset = offset; offset = lms->align_size * LV_LOCK_BEGIN; round = 1; @@ -1288,7 +1327,7 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint /* remember the NO SPACE offset, if no free area left, * search from this offset after extend */ - *free_offset = offset; + ls->free_lock_offset = offset; offset = lms->align_size * LV_LOCK_BEGIN; round = 1; @@ -1303,7 +1342,7 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint if (rv == SANLK_LEADER_MAGIC) { log_debug("S %s find_free_lock_san found empty area at %llu", ls->name, (unsigned long long)offset); - *free_offset = offset; + ls->free_lock_offset = offset; return 0; } @@ -1316,7 +1355,7 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint if (!strcmp(rd.rs.name, "#unused")) { log_debug("S %s find_free_lock_san found unused area at %llu", ls->name, (unsigned long long)offset); - *free_offset = offset; + ls->free_lock_offset = offset; return 0; } @@ -1358,8 +1397,11 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls) char disk_path[SANLK_PATH_LEN]; char killpath[SANLK_PATH_LEN]; char killargs[SANLK_PATH_LEN]; + uint32_t ss_flags = 0; + uint32_t rs_flags = 0; int sector_size = 0; int align_size = 0; + int align_mb = 0; int gl_found; int ret, rv; @@ -1447,6 +1489,8 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls) strncpy(gl_lsname_sanlock, lsname, MAX_NAME); log_debug("S %s prepare_lockspace_san use global lock", lsname); } + lms->align_size = ONE_MB; + lms->sector_size = 512; goto out; } @@ -1474,7 +1518,7 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls) goto fail; } - rv = get_sizes_lockspace(disk_path, §or_size, &align_size); + rv = get_sizes_lockspace(disk_path, §or_size, &align_size, &align_mb, &ss_flags, &rs_flags); if (rv < 0) { log_error("S %s prepare_lockspace_san cannot get sector/align sizes %d", lsname, rv); ret = -EMANAGER; @@ -1494,13 +1538,27 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls) log_debug("S %s prepare_lockspace_san found old sector_size %d align_size %d", lsname, sector_size, align_size); } - log_debug("S %s prepare_lockspace_san sizes %d %d", lsname, sector_size, align_size); + log_debug("S %s prepare_lockspace_san sector_size %d align_mb %d align_size %d", + lsname, sector_size, align_mb, align_size); + + if (sector_size == 4096) { + if (((align_mb == 1) && (ls->host_id > 250)) || + ((align_mb == 2) && (ls->host_id > 500)) || + ((align_mb == 4) && (ls->host_id > 1000)) || + ((align_mb == 8) && (ls->host_id > 2000))) { + log_error("S %s prepare_lockspace_san invalid host_id %llu for align %d MiB", + lsname, (unsigned long long)ls->host_id, align_mb); + ret = -EHOSTID; + goto fail; + } + } lms->align_size = align_size; lms->sector_size = sector_size; + lms->ss_flags = ss_flags; + lms->rs_flags = rs_flags; - lms->ss.flags = (sector_size == 4096) ? (SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M) : - (SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M); + lms->ss.flags = ss_flags; gl_found = gl_is_enabled(ls, lms); if (gl_found < 0) { @@ -1642,7 +1700,7 @@ static int lm_add_resource_sanlock(struct lockspace *ls, struct resource *r) strcpy_name_len(rds->rs.name, r->name, SANLK_NAME_LEN); rds->rs.num_disks = 1; memcpy(rds->rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN); - rds->rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M); + rds->rs.flags = lms->rs_flags; if (r->type == LD_RT_GL) rds->rs.disks[0].offset = GL_LOCK_BEGIN * lms->align_size; diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h index f782c482a..c99f9143e 100644 --- a/lib/config/config_settings.h +++ b/lib/config/config_settings.h @@ -1189,6 +1189,14 @@ cfg(global_sanlock_lv_extend_CFG, "sanlock_lv_extend", global_CFG_SECTION, CFG_D "and can cause lvcreate to fail. Applicable only if LVM is compiled\n" "with lockd support\n") +cfg(global_sanlock_align_size_CFG, "sanlock_align_size", global_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_SANLOCK_ALIGN_SIZE, vsn(2, 3, 27), NULL, 0, NULL, + "The sanlock lease size in MiB to use on disks with a 4K sector size.\n" + "Possible values are 1,2,4,8. The default is 8, which supports up to\n" + "2000 hosts (and max host_id 2000.) Smaller values support smaller\n" + "numbers of max hosts (and max host_ids): 250, 500, 1000, 2000 for\n" + "lease sizes 1,2,4,8. Disks with 512 byte sectors always use 1MiB\n" + "leases and support 2000 hosts, and are not affected by this setting.\n") + cfg(global_lvmlockctl_kill_command_CFG, "lvmlockctl_kill_command", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, "", vsn(2, 3, 12), NULL, 0, NULL, "The command that lvmlockctl --kill should use to force LVs offline.\n" "The lvmlockctl --kill command is run when a shared VG has lost\n" @@ -2267,8 +2275,9 @@ cfg_array(local_extra_system_ids_CFG, "extra_system_ids", local_CFG_SECTION, CFG "correct usage and possible dangers.\n") cfg(local_host_id_CFG, "host_id", local_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, 0, vsn(2, 2, 124), NULL, 0, NULL, - "The lvmlockd sanlock host_id.\n" - "This must be unique among all hosts, and must be between 1 and 2000.\n" - "Applicable only if LVM is compiled with lockd support\n") + "The sanlock host_id used by lvmlockd. This must be unique among all the hosts\n" + "using shared VGs with sanlock. Accepted values are 1-2000, except when sanlock_align_size\n" + "is configured to 1, 2 or 4, which correspond to max host_id values of 250, 500, or 1000.\n" + "Applicable only if LVM is compiled with support for lvmlockd+sanlock.\n") cfg(CFG_COUNT, NULL, root_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, 0, vsn(0, 0, 0), NULL, 0, NULL, NULL) diff --git a/lib/config/defaults.h b/lib/config/defaults.h index 5407282d3..54dba8191 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -72,6 +72,7 @@ #define DEFAULT_USE_AIO 1 #define DEFAULT_SANLOCK_LV_EXTEND_MB 256 +#define DEFAULT_SANLOCK_ALIGN_SIZE 8 /* in MiB, applies to 4K disks only */ #define DEFAULT_MIRRORLOG MIRROR_LOG_DISK #define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate" diff --git a/lib/locking/lvmlockd.c b/lib/locking/lvmlockd.c index 746aacfe5..1abdf1f0c 100644 --- a/lib/locking/lvmlockd.c +++ b/lib/locking/lvmlockd.c @@ -867,7 +867,9 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in const char *opts = NULL; struct pv_list *pvl; uint32_t sector_size = 0; + uint32_t align_size = 0; unsigned int physical_block_size, logical_block_size; + int host_id; int num_mb = 0; int result; int ret; @@ -894,11 +896,54 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in log_debug("Using sector size %u for sanlock LV", sector_size); - /* Base starting size of sanlock LV is 256MB/1GB for 512/4K sectors */ - switch (sector_size) { - case 512: num_mb = 256; break; - case 4096: num_mb = 1024; break; - default: log_error("Unknown sector size %u.", sector_size); return 0; + host_id = find_config_tree_int(cmd, local_host_id_CFG, NULL); + + /* + * Starting size of lvmlock LV is 256MB/512MB/1GB depending + * on sector_size/align_size, and max valid host_id depends + * on sector_size/align_size. + */ + + if (sector_size == 4096) { + align_size = find_config_tree_int(cmd, global_sanlock_align_size_CFG, NULL); + + if (align_size == 1) { + num_mb = 256; + if (host_id < 1 || host_id > 250) { + log_error("Invalid host_id %d, use 1-250 (sanlock_align_size is 1MiB).", host_id); + return 0; + } + } else if (align_size == 2) { + num_mb = 512; + if (host_id < 1 || host_id > 500) { + log_error("Invalid host_id %d, use 1-500 (sanlock_align_size is 2MiB).", host_id); + return 0; + } + } else if (align_size == 4) { + num_mb = 1024; + if (host_id < 1 || host_id > 1000) { + log_error("Invalid host_id %d, use 1-1000 (sanlock_align_size is 4MiB).", host_id); + return 0; + } + } else if (align_size == 8) { + num_mb = 1024; + if (host_id < 1 || host_id > 2000) { + log_error("Invalid host_id %d, use 1-2000 (sanlock_align_size is 8MiB).", host_id); + return 0; + } + } else { + log_error("Invalid sanlock_align_size %u, use 1,2,4,8.", align_size); + return 0; + } + } else if (sector_size == 512) { + num_mb = 256; + if (host_id < 1 || host_id > 2000) { + log_error("Invalid host_id %d, use 1-2000.", host_id); + return 0; + } + } else { + log_error("Unsupported sector size %u.", sector_size); + return 0; } /* @@ -936,6 +981,7 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in "vg_name = %s", vg->name, "vg_lock_type = %s", "sanlock", "vg_lock_args = %s", vg->sanlock_lv->name, + "align_mb = " FMTd64, (int64_t) align_size, "opts = %s", opts ?: "none", NULL);