diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c index 733e71821..4b2d28c22 100644 --- a/daemons/lvmlockd/lvmlockd-core.c +++ b/daemons/lvmlockd/lvmlockd-core.c @@ -1177,12 +1177,12 @@ static void lm_rem_resource(struct lockspace *ls, struct resource *r) lm_rem_resource_idm(ls, r); } -static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) +static int lm_find_free_lock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) { if (ls->lm_type == LD_LM_DLM) return 0; else if (ls->lm_type == LD_LM_SANLOCK) - return lm_find_free_lock_sanlock(ls, free_offset, sector_size, align_size); + return lm_find_free_lock_sanlock(ls, lv_size_bytes, free_offset, sector_size, align_size); else if (ls->lm_type == LD_LM_IDM) return 0; return -1; @@ -2717,7 +2717,7 @@ static void *lockspace_thread_main(void *arg_in) int align_size = 0; log_debug("S %s find free lock", ls->name); - rv = lm_find_free_lock(ls, &free_offset, §or_size, &align_size); + rv = lm_find_free_lock(ls, act->lv_size_bytes, &free_offset, §or_size, &align_size); log_debug("S %s find free lock %d offset %llu sector_size %d align_size %d", ls->name, rv, (unsigned long long)free_offset, sector_size, align_size); ls->free_lock_offset = free_offset; @@ -5032,6 +5032,8 @@ static void client_recv_action(struct client *cl) if (val) act->host_id = val; + act->lv_size_bytes = (uint64_t)dm_config_find_int64(req.cft->root, "lv_size_bytes", 0); + /* Create PV list for idm */ if (lm == LD_LM_IDM) { memset(&pvs, 0x0, sizeof(pvs)); diff --git a/daemons/lvmlockd/lvmlockd-internal.h b/daemons/lvmlockd/lvmlockd-internal.h index 6034349b2..fbbefbeaa 100644 --- a/daemons/lvmlockd/lvmlockd-internal.h +++ b/daemons/lvmlockd/lvmlockd-internal.h @@ -132,6 +132,7 @@ struct action { uint32_t flags; /* LD_AF_ */ uint32_t version; uint64_t host_id; + uint64_t lv_size_bytes; int8_t op; /* operation type LD_OP_ */ int8_t rt; /* resource type LD_RT_ */ int8_t mode; /* lock mode LD_LK_ */ @@ -527,7 +528,7 @@ int lm_gl_is_enabled(struct lockspace *ls); int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin); int lm_data_size_sanlock(void); int lm_is_running_sanlock(void); -int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size); +int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size); static inline int lm_support_sanlock(void) { @@ -630,7 +631,7 @@ static inline int lm_is_running_sanlock(void) return 0; } -static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) +static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) { return -1; } diff --git a/daemons/lvmlockd/lvmlockd-sanlock.c b/daemons/lvmlockd/lvmlockd-sanlock.c index 7492dbc90..55743cffb 100644 --- a/daemons/lvmlockd/lvmlockd-sanlock.c +++ b/daemons/lvmlockd/lvmlockd-sanlock.c @@ -339,14 +339,16 @@ fail: return rv; } -static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val) +static void _read_sysfs_size(dev_t devno, const char *name, uint64_t *val) { char path[PATH_MAX]; char buf[32]; FILE *fp; size_t len; - snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s", + *val = 0; + + snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/%s", (int)major(devno), (int)minor(devno), name); if (!(fp = fopen(path, "r"))) @@ -359,20 +361,19 @@ static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val) buf[--len] = '\0'; if (strlen(buf)) - *val = atoi(buf); + *val = strtoull(buf, NULL, 0); out: - if (fclose(fp)) - log_debug("Failed to fclose host id file %s (%s).", path, strerror(errno)); - + (void)fclose(fp); } /* Select sector/align size for a new VG based on what the device reports for sector size of the lvmlock LV. */ -static int get_sizes_device(char *path, int *sector_size, int *align_size) +static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, int *align_size) { unsigned int physical_block_size = 0; unsigned int logical_block_size = 0; + uint64_t val; struct stat st; int rv; @@ -382,8 +383,14 @@ static int get_sizes_device(char *path, int *sector_size, int *align_size) return -1; } - _read_sysfs_size(st.st_rdev, "physical_block_size", &physical_block_size); - _read_sysfs_size(st.st_rdev, "logical_block_size", &logical_block_size); + _read_sysfs_size(st.st_rdev, "size", &val); + *dev_size = val * 512; + + _read_sysfs_size(st.st_rdev, "queue/physical_block_size", &val); + physical_block_size = (unsigned int)val; + + _read_sysfs_size(st.st_rdev, "queue/logical_block_size", &val); + logical_block_size = (unsigned int)val; if ((physical_block_size == 512) && (logical_block_size == 512)) { *sector_size = 512; @@ -508,6 +515,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar uint32_t daemon_version; uint32_t daemon_proto; uint64_t offset; + uint64_t dev_size; int sector_size = 0; int align_size = 0; int i, rv; @@ -555,7 +563,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar daemon_version, daemon_proto); /* Nothing formatted on disk yet, use what the device reports. */ - rv = get_sizes_device(disk.path, §or_size, &align_size); + rv = get_sizes_device(disk.path, &dev_size, §or_size, &align_size); if (rv < 0) { if (rv == -EACCES) { log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s", @@ -568,6 +576,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar } } + log_debug("S %s init_vg_san %s dev_size %llu sector_size %u align_size %u", + ls_name, disk.path, (unsigned long long)dev_size, sector_size, align_size); + strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN); memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN); ss.host_id_disk.offset = 0; @@ -658,6 +669,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar log_debug("S %s init_vg_san clearing lv lease areas", ls_name); for (i = 0; ; i++) { + if (dev_size && (offset + align_size > dev_size)) + break; + rd.rs.disks[0].offset = offset; rv = sanlock_write_resource(&rd.rs, 0, 0, 0); @@ -1197,7 +1211,7 @@ int lm_gl_is_enabled(struct lockspace *ls) * been disabled.) */ -int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) +int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) { struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data; struct sanlk_resourced rd; @@ -1244,9 +1258,31 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int * memset(rd.rs.name, 0, SANLK_NAME_LEN); + /* + * End of the device. Older lvm versions didn't pass lv_size_bytes + * and just relied on sanlock_read_resource returning an error when + * reading beyond the device. + */ + if (lv_size_bytes && (offset + lms->align_size > lv_size_bytes)) { + /* end of the device */ + log_debug("S %s find_free_lock_san read limit offset %llu lv_size_bytes %llu", + ls->name, (unsigned long long)offset, (unsigned long long)lv_size_bytes); + + /* remember the NO SPACE offset, if no free area left, + * search from this offset after extend */ + *free_offset = offset; + + offset = lms->align_size * LV_LOCK_BEGIN; + round = 1; + continue; + } + rv = sanlock_read_resource(&rd.rs, 0); if (rv == -EMSGSIZE || rv == -ENOSPC) { - /* This indicates the end of the device is reached. */ + /* + * These errors indicate the end of the device is reached. + * Still check this in case lv_size_bytes is not provided. + */ log_debug("S %s find_free_lock_san read limit offset %llu", ls->name, (unsigned long long)offset); diff --git a/lib/locking/lvmlockd.c b/lib/locking/lvmlockd.c index 6ea93d747..746aacfe5 100644 --- a/lib/locking/lvmlockd.c +++ b/lib/locking/lvmlockd.c @@ -18,6 +18,7 @@ #include "daemons/lvmlockd/lvmlockd-client.h" #include +#include static daemon_handle _lvmlockd; static const char *_lvmlockd_socket = NULL; @@ -493,7 +494,7 @@ static int _lockd_request(struct cmd_context *cmd, static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, const char *lock_lv_name, int num_mb) { - uint32_t lv_size_bytes; + uint64_t lv_size_bytes; uint32_t extent_bytes; uint32_t total_extents; struct logical_volume *lv; @@ -511,6 +512,15 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, .zero = 1, }; + /* + * Make the lvmlock lv a multiple of 8 MB, i.e. a multiple of any + * sanlock align_size, to avoid having unused space at the end of the + * lvmlock LV. + */ + + if (num_mb % 8) + num_mb += (8 - (num_mb % 8)); + lv_size_bytes = num_mb * ONE_MB_IN_BYTES; /* size of sanlock LV in bytes */ extent_bytes = vg->extent_size * SECTOR_SIZE; /* size of one extent in bytes */ total_extents = dm_div_up(lv_size_bytes, extent_bytes); /* number of extents in sanlock LV */ @@ -518,7 +528,8 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, lv_size_bytes = total_extents * extent_bytes; num_mb = lv_size_bytes / ONE_MB_IN_BYTES; - log_debug("Creating lvmlock LV for sanlock with size %um %ub %u extents", num_mb, lv_size_bytes, lp.extents); + log_debug("Creating lvmlock LV for sanlock with size %um %llub %u extents", + num_mb, (unsigned long long)lv_size_bytes, lp.extents); dm_list_init(&lp.tags); @@ -547,11 +558,9 @@ static int _remove_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) return 1; } -static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb) +static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb, char *lvmlock_path) { struct device *dev; - char path[PATH_MAX]; - char *name; uint64_t old_size_bytes; uint64_t new_size_bytes; uint32_t extend_bytes; @@ -594,23 +603,14 @@ static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, new_size_bytes = lv->size * SECTOR_SIZE; - if (!(name = dm_build_dm_name(lv->vg->cmd->mem, lv->vg->name, lv->name, NULL))) - return_0; - - if (dm_snprintf(path, sizeof(path), "%s/%s", dm_dir(), name) < 0) { - log_error("Extend sanlock LV %s name too long - extended size not zeroed.", - display_lvname(lv)); - return 0; - } - log_debug("Extend sanlock LV zeroing %u bytes from offset %llu to %llu", (uint32_t)(new_size_bytes - old_size_bytes), (unsigned long long)old_size_bytes, (unsigned long long)new_size_bytes); - log_print_unless_silent("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb); + log_debug("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb); - if (!(dev = dev_cache_get(cmd, path, NULL))) { + if (!(dev = dev_cache_get(cmd, lvmlock_path, NULL))) { log_error("Extend sanlock LV %s cannot find device.", display_lvname(lv)); return 0; } @@ -653,16 +653,27 @@ static int _refresh_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) { + struct logical_volume *lv = vg->sanlock_lv; daemon_reply reply; + char *lvmlock_name; + char lvmlock_path[PATH_MAX]; unsigned extend_mb; + uint64_t lv_size_bytes; + uint64_t dm_size_bytes; int result; int ret; + int fd; if (!_use_lvmlockd) return 1; if (!_lvmlockd_connected) return 0; + if (!lv) { + log_error("No internal lvmlock LV found."); + return 0; + } + extend_mb = (unsigned) find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL); /* @@ -672,17 +683,46 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) if (!extend_mb) return 1; - /* - * Another host may have extended the lvmlock LV already. - * Refresh so that we'll find the new space they added - * when we search for new space. - * - * FIXME: we should be able to check if the lvmlock size - * in VG metadata is smaller than lvmlock size reported - * by the kernel, and avoid refresh if they match. - */ - if (!_refresh_sanlock_lv(cmd, vg)) + lv_size_bytes = lv->size * SECTOR_SIZE; + + if (!(lvmlock_name = dm_build_dm_name(cmd->mem, vg->name, lv->name, NULL))) + return_0; + + if (dm_snprintf(lvmlock_path, sizeof(lvmlock_path), "%s/%s", dm_dir(), lvmlock_name) < 0) { + log_error("Handle sanlock LV %s path too long.", lvmlock_name); return 0; + } + + fd = open(lvmlock_path, O_RDONLY); + if (fd < 0) { + log_error("Cannot open sanlock LV %s.", lvmlock_path); + return 0; + } + + if (ioctl(fd, BLKGETSIZE64, &dm_size_bytes) < 0) { + log_error("Cannot get size of sanlock LV %s.", lvmlock_path); + if (close(fd)) + stack; + return 0; + } + + if (close(fd)) + stack; + + /* + * Another host may have extended the lvmlock LV. + * If so the lvmlock LV size in metadata will be + * larger than our active lvmlock LV, and we need + * to refresh our lvmlock LV to use the new space. + */ + if (lv_size_bytes > dm_size_bytes) { + log_debug("Refresh sanlock lv %llu dm %llu", + (unsigned long long)lv_size_bytes, + (unsigned long long)dm_size_bytes); + + if (!_refresh_sanlock_lv(cmd, vg)) + return 0; + } /* * Ask lvmlockd/sanlock to look for an unused lock. @@ -690,6 +730,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) reply = _lockd_send("find_free_lock", "pid = " FMTd64, (int64_t) getpid(), "vg_name = %s", vg->name, + "lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes, NULL); if (!_lockd_result(reply, &result, NULL)) { @@ -700,7 +741,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) /* No space on the lvmlock lv for a new lease. */ if (result == -EMSGSIZE) - ret = _extend_sanlock_lv(cmd, vg, extend_mb); + ret = _extend_sanlock_lv(cmd, vg, extend_mb, lvmlock_path); daemon_reply_destroy(reply);