1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00

lvmlockd: use lvmlock LV size

Previously, lvmlockd detected the end of the lvmlock LV
by doing i/o to it until an i/o error was returned.
This triggered sanlock warning messages, so use the LV
size to avoid accessing beyond the end of the device.

Previously, every lvcreate would refresh the lvmlock LV
in case another machine had extended it.  This involves
a lot of unnecessary work in most cases, so now compare
the LV size and device size to detect when a refresh is
needed.
This commit is contained in:
David Teigland 2024-09-25 16:18:32 -05:00
parent 9d7c19c2ce
commit e9413fb211
4 changed files with 124 additions and 44 deletions

View File

@ -1177,12 +1177,12 @@ static void lm_rem_resource(struct lockspace *ls, struct resource *r)
lm_rem_resource_idm(ls, r); lm_rem_resource_idm(ls, r);
} }
static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) static int lm_find_free_lock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size)
{ {
if (ls->lm_type == LD_LM_DLM) if (ls->lm_type == LD_LM_DLM)
return 0; return 0;
else if (ls->lm_type == LD_LM_SANLOCK) else if (ls->lm_type == LD_LM_SANLOCK)
return lm_find_free_lock_sanlock(ls, free_offset, sector_size, align_size); return lm_find_free_lock_sanlock(ls, lv_size_bytes, free_offset, sector_size, align_size);
else if (ls->lm_type == LD_LM_IDM) else if (ls->lm_type == LD_LM_IDM)
return 0; return 0;
return -1; return -1;
@ -2717,7 +2717,7 @@ static void *lockspace_thread_main(void *arg_in)
int align_size = 0; int align_size = 0;
log_debug("S %s find free lock", ls->name); log_debug("S %s find free lock", ls->name);
rv = lm_find_free_lock(ls, &free_offset, &sector_size, &align_size); rv = lm_find_free_lock(ls, act->lv_size_bytes, &free_offset, &sector_size, &align_size);
log_debug("S %s find free lock %d offset %llu sector_size %d align_size %d", log_debug("S %s find free lock %d offset %llu sector_size %d align_size %d",
ls->name, rv, (unsigned long long)free_offset, sector_size, align_size); ls->name, rv, (unsigned long long)free_offset, sector_size, align_size);
ls->free_lock_offset = free_offset; ls->free_lock_offset = free_offset;
@ -5032,6 +5032,8 @@ static void client_recv_action(struct client *cl)
if (val) if (val)
act->host_id = val; act->host_id = val;
act->lv_size_bytes = (uint64_t)dm_config_find_int64(req.cft->root, "lv_size_bytes", 0);
/* Create PV list for idm */ /* Create PV list for idm */
if (lm == LD_LM_IDM) { if (lm == LD_LM_IDM) {
memset(&pvs, 0x0, sizeof(pvs)); memset(&pvs, 0x0, sizeof(pvs));

View File

@ -132,6 +132,7 @@ struct action {
uint32_t flags; /* LD_AF_ */ uint32_t flags; /* LD_AF_ */
uint32_t version; uint32_t version;
uint64_t host_id; uint64_t host_id;
uint64_t lv_size_bytes;
int8_t op; /* operation type LD_OP_ */ int8_t op; /* operation type LD_OP_ */
int8_t rt; /* resource type LD_RT_ */ int8_t rt; /* resource type LD_RT_ */
int8_t mode; /* lock mode LD_LK_ */ int8_t mode; /* lock mode LD_LK_ */
@ -527,7 +528,7 @@ int lm_gl_is_enabled(struct lockspace *ls);
int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin); int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin);
int lm_data_size_sanlock(void); int lm_data_size_sanlock(void);
int lm_is_running_sanlock(void); int lm_is_running_sanlock(void);
int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size); int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size);
static inline int lm_support_sanlock(void) static inline int lm_support_sanlock(void)
{ {
@ -630,7 +631,7 @@ static inline int lm_is_running_sanlock(void)
return 0; return 0;
} }
static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size)
{ {
return -1; return -1;
} }

View File

@ -339,14 +339,16 @@ fail:
return rv; return rv;
} }
static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val) static void _read_sysfs_size(dev_t devno, const char *name, uint64_t *val)
{ {
char path[PATH_MAX]; char path[PATH_MAX];
char buf[32]; char buf[32];
FILE *fp; FILE *fp;
size_t len; size_t len;
snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s", *val = 0;
snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/%s",
(int)major(devno), (int)minor(devno), name); (int)major(devno), (int)minor(devno), name);
if (!(fp = fopen(path, "r"))) if (!(fp = fopen(path, "r")))
@ -359,20 +361,19 @@ static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val)
buf[--len] = '\0'; buf[--len] = '\0';
if (strlen(buf)) if (strlen(buf))
*val = atoi(buf); *val = strtoull(buf, NULL, 0);
out: out:
if (fclose(fp)) (void)fclose(fp);
log_debug("Failed to fclose host id file %s (%s).", path, strerror(errno));
} }
/* Select sector/align size for a new VG based on what the device reports for /* Select sector/align size for a new VG based on what the device reports for
sector size of the lvmlock LV. */ sector size of the lvmlock LV. */
static int get_sizes_device(char *path, int *sector_size, int *align_size) static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, int *align_size)
{ {
unsigned int physical_block_size = 0; unsigned int physical_block_size = 0;
unsigned int logical_block_size = 0; unsigned int logical_block_size = 0;
uint64_t val;
struct stat st; struct stat st;
int rv; int rv;
@ -382,8 +383,14 @@ static int get_sizes_device(char *path, int *sector_size, int *align_size)
return -1; return -1;
} }
_read_sysfs_size(st.st_rdev, "physical_block_size", &physical_block_size); _read_sysfs_size(st.st_rdev, "size", &val);
_read_sysfs_size(st.st_rdev, "logical_block_size", &logical_block_size); *dev_size = val * 512;
_read_sysfs_size(st.st_rdev, "queue/physical_block_size", &val);
physical_block_size = (unsigned int)val;
_read_sysfs_size(st.st_rdev, "queue/logical_block_size", &val);
logical_block_size = (unsigned int)val;
if ((physical_block_size == 512) && (logical_block_size == 512)) { if ((physical_block_size == 512) && (logical_block_size == 512)) {
*sector_size = 512; *sector_size = 512;
@ -508,6 +515,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
uint32_t daemon_version; uint32_t daemon_version;
uint32_t daemon_proto; uint32_t daemon_proto;
uint64_t offset; uint64_t offset;
uint64_t dev_size;
int sector_size = 0; int sector_size = 0;
int align_size = 0; int align_size = 0;
int i, rv; int i, rv;
@ -555,7 +563,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
daemon_version, daemon_proto); daemon_version, daemon_proto);
/* Nothing formatted on disk yet, use what the device reports. */ /* Nothing formatted on disk yet, use what the device reports. */
rv = get_sizes_device(disk.path, &sector_size, &align_size); rv = get_sizes_device(disk.path, &dev_size, &sector_size, &align_size);
if (rv < 0) { if (rv < 0) {
if (rv == -EACCES) { if (rv == -EACCES) {
log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s", log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s",
@ -568,6 +576,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
} }
} }
log_debug("S %s init_vg_san %s dev_size %llu sector_size %u align_size %u",
ls_name, disk.path, (unsigned long long)dev_size, sector_size, align_size);
strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN); strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN);
memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN); memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
ss.host_id_disk.offset = 0; ss.host_id_disk.offset = 0;
@ -658,6 +669,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
log_debug("S %s init_vg_san clearing lv lease areas", ls_name); log_debug("S %s init_vg_san clearing lv lease areas", ls_name);
for (i = 0; ; i++) { for (i = 0; ; i++) {
if (dev_size && (offset + align_size > dev_size))
break;
rd.rs.disks[0].offset = offset; rd.rs.disks[0].offset = offset;
rv = sanlock_write_resource(&rd.rs, 0, 0, 0); rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
@ -1197,7 +1211,7 @@ int lm_gl_is_enabled(struct lockspace *ls)
* been disabled.) * been disabled.)
*/ */
int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size)
{ {
struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data; struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
struct sanlk_resourced rd; struct sanlk_resourced rd;
@ -1244,9 +1258,31 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *
memset(rd.rs.name, 0, SANLK_NAME_LEN); memset(rd.rs.name, 0, SANLK_NAME_LEN);
/*
* End of the device. Older lvm versions didn't pass lv_size_bytes
* and just relied on sanlock_read_resource returning an error when
* reading beyond the device.
*/
if (lv_size_bytes && (offset + lms->align_size > lv_size_bytes)) {
/* end of the device */
log_debug("S %s find_free_lock_san read limit offset %llu lv_size_bytes %llu",
ls->name, (unsigned long long)offset, (unsigned long long)lv_size_bytes);
/* remember the NO SPACE offset, if no free area left,
* search from this offset after extend */
*free_offset = offset;
offset = lms->align_size * LV_LOCK_BEGIN;
round = 1;
continue;
}
rv = sanlock_read_resource(&rd.rs, 0); rv = sanlock_read_resource(&rd.rs, 0);
if (rv == -EMSGSIZE || rv == -ENOSPC) { if (rv == -EMSGSIZE || rv == -ENOSPC) {
/* This indicates the end of the device is reached. */ /*
* These errors indicate the end of the device is reached.
* Still check this in case lv_size_bytes is not provided.
*/
log_debug("S %s find_free_lock_san read limit offset %llu", log_debug("S %s find_free_lock_san read limit offset %llu",
ls->name, (unsigned long long)offset); ls->name, (unsigned long long)offset);

View File

@ -18,6 +18,7 @@
#include "daemons/lvmlockd/lvmlockd-client.h" #include "daemons/lvmlockd/lvmlockd-client.h"
#include <mntent.h> #include <mntent.h>
#include <sys/ioctl.h>
static daemon_handle _lvmlockd; static daemon_handle _lvmlockd;
static const char *_lvmlockd_socket = NULL; static const char *_lvmlockd_socket = NULL;
@ -493,7 +494,7 @@ static int _lockd_request(struct cmd_context *cmd,
static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
const char *lock_lv_name, int num_mb) const char *lock_lv_name, int num_mb)
{ {
uint32_t lv_size_bytes; uint64_t lv_size_bytes;
uint32_t extent_bytes; uint32_t extent_bytes;
uint32_t total_extents; uint32_t total_extents;
struct logical_volume *lv; struct logical_volume *lv;
@ -511,6 +512,15 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
.zero = 1, .zero = 1,
}; };
/*
* Make the lvmlock lv a multiple of 8 MB, i.e. a multiple of any
* sanlock align_size, to avoid having unused space at the end of the
* lvmlock LV.
*/
if (num_mb % 8)
num_mb += (8 - (num_mb % 8));
lv_size_bytes = num_mb * ONE_MB_IN_BYTES; /* size of sanlock LV in bytes */ lv_size_bytes = num_mb * ONE_MB_IN_BYTES; /* size of sanlock LV in bytes */
extent_bytes = vg->extent_size * SECTOR_SIZE; /* size of one extent in bytes */ extent_bytes = vg->extent_size * SECTOR_SIZE; /* size of one extent in bytes */
total_extents = dm_div_up(lv_size_bytes, extent_bytes); /* number of extents in sanlock LV */ total_extents = dm_div_up(lv_size_bytes, extent_bytes); /* number of extents in sanlock LV */
@ -518,7 +528,8 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
lv_size_bytes = total_extents * extent_bytes; lv_size_bytes = total_extents * extent_bytes;
num_mb = lv_size_bytes / ONE_MB_IN_BYTES; num_mb = lv_size_bytes / ONE_MB_IN_BYTES;
log_debug("Creating lvmlock LV for sanlock with size %um %ub %u extents", num_mb, lv_size_bytes, lp.extents); log_debug("Creating lvmlock LV for sanlock with size %um %llub %u extents",
num_mb, (unsigned long long)lv_size_bytes, lp.extents);
dm_list_init(&lp.tags); dm_list_init(&lp.tags);
@ -547,11 +558,9 @@ static int _remove_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
return 1; return 1;
} }
static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb) static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb, char *lvmlock_path)
{ {
struct device *dev; struct device *dev;
char path[PATH_MAX];
char *name;
uint64_t old_size_bytes; uint64_t old_size_bytes;
uint64_t new_size_bytes; uint64_t new_size_bytes;
uint32_t extend_bytes; uint32_t extend_bytes;
@ -594,23 +603,14 @@ static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
new_size_bytes = lv->size * SECTOR_SIZE; new_size_bytes = lv->size * SECTOR_SIZE;
if (!(name = dm_build_dm_name(lv->vg->cmd->mem, lv->vg->name, lv->name, NULL)))
return_0;
if (dm_snprintf(path, sizeof(path), "%s/%s", dm_dir(), name) < 0) {
log_error("Extend sanlock LV %s name too long - extended size not zeroed.",
display_lvname(lv));
return 0;
}
log_debug("Extend sanlock LV zeroing %u bytes from offset %llu to %llu", log_debug("Extend sanlock LV zeroing %u bytes from offset %llu to %llu",
(uint32_t)(new_size_bytes - old_size_bytes), (uint32_t)(new_size_bytes - old_size_bytes),
(unsigned long long)old_size_bytes, (unsigned long long)old_size_bytes,
(unsigned long long)new_size_bytes); (unsigned long long)new_size_bytes);
log_print_unless_silent("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb); log_debug("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb);
if (!(dev = dev_cache_get(cmd, path, NULL))) { if (!(dev = dev_cache_get(cmd, lvmlock_path, NULL))) {
log_error("Extend sanlock LV %s cannot find device.", display_lvname(lv)); log_error("Extend sanlock LV %s cannot find device.", display_lvname(lv));
return 0; return 0;
} }
@ -653,16 +653,27 @@ static int _refresh_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
{ {
struct logical_volume *lv = vg->sanlock_lv;
daemon_reply reply; daemon_reply reply;
char *lvmlock_name;
char lvmlock_path[PATH_MAX];
unsigned extend_mb; unsigned extend_mb;
uint64_t lv_size_bytes;
uint64_t dm_size_bytes;
int result; int result;
int ret; int ret;
int fd;
if (!_use_lvmlockd) if (!_use_lvmlockd)
return 1; return 1;
if (!_lvmlockd_connected) if (!_lvmlockd_connected)
return 0; return 0;
if (!lv) {
log_error("No internal lvmlock LV found.");
return 0;
}
extend_mb = (unsigned) find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL); extend_mb = (unsigned) find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL);
/* /*
@ -672,17 +683,46 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
if (!extend_mb) if (!extend_mb)
return 1; return 1;
/* lv_size_bytes = lv->size * SECTOR_SIZE;
* Another host may have extended the lvmlock LV already.
* Refresh so that we'll find the new space they added if (!(lvmlock_name = dm_build_dm_name(cmd->mem, vg->name, lv->name, NULL)))
* when we search for new space. return_0;
*
* FIXME: we should be able to check if the lvmlock size if (dm_snprintf(lvmlock_path, sizeof(lvmlock_path), "%s/%s", dm_dir(), lvmlock_name) < 0) {
* in VG metadata is smaller than lvmlock size reported log_error("Handle sanlock LV %s path too long.", lvmlock_name);
* by the kernel, and avoid refresh if they match.
*/
if (!_refresh_sanlock_lv(cmd, vg))
return 0; return 0;
}
fd = open(lvmlock_path, O_RDONLY);
if (fd < 0) {
log_error("Cannot open sanlock LV %s.", lvmlock_path);
return 0;
}
if (ioctl(fd, BLKGETSIZE64, &dm_size_bytes) < 0) {
log_error("Cannot get size of sanlock LV %s.", lvmlock_path);
if (close(fd))
stack;
return 0;
}
if (close(fd))
stack;
/*
* Another host may have extended the lvmlock LV.
* If so the lvmlock LV size in metadata will be
* larger than our active lvmlock LV, and we need
* to refresh our lvmlock LV to use the new space.
*/
if (lv_size_bytes > dm_size_bytes) {
log_debug("Refresh sanlock lv %llu dm %llu",
(unsigned long long)lv_size_bytes,
(unsigned long long)dm_size_bytes);
if (!_refresh_sanlock_lv(cmd, vg))
return 0;
}
/* /*
* Ask lvmlockd/sanlock to look for an unused lock. * Ask lvmlockd/sanlock to look for an unused lock.
@ -690,6 +730,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
reply = _lockd_send("find_free_lock", reply = _lockd_send("find_free_lock",
"pid = " FMTd64, (int64_t) getpid(), "pid = " FMTd64, (int64_t) getpid(),
"vg_name = %s", vg->name, "vg_name = %s", vg->name,
"lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes,
NULL); NULL);
if (!_lockd_result(reply, &result, NULL)) { if (!_lockd_result(reply, &result, NULL)) {
@ -700,7 +741,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
/* No space on the lvmlock lv for a new lease. */ /* No space on the lvmlock lv for a new lease. */
if (result == -EMSGSIZE) if (result == -EMSGSIZE)
ret = _extend_sanlock_lv(cmd, vg, extend_mb); ret = _extend_sanlock_lv(cmd, vg, extend_mb, lvmlock_path);
daemon_reply_destroy(reply); daemon_reply_destroy(reply);