1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-09-13 05:44:19 +03:00

Compare commits

...

2 Commits

Author SHA1 Message Date
David Teigland
31e2c4c1a6 locking: allow using global lock for scanning
When vg metadata consumes a major percentage of the metadata area,
acquire the global lock prior to label_scan.  Many commands
already acquire the global lock prior to label_scan, but
VG-specific commands may not otherwise use the global lock.

This attempts to avoid the rare situation in which the metadata is
large enough to wrap around the metadata area and invalidate the
metadata location information that a single command gathered from
label_scan.  If this wrapping with large sizes occurs, the metadata
locations seen during label scan may be overwritten before the
same command is able to use them for vg_read(), causing vg_read
to see invalid metadata and the command to fail.

A large number of concurrent lvm commands is also a factor that
can lead to this problem due to longer delays between label_scan
and vg_read.

This problem can be avoided if all commands acquire the global lock
prior to label scan, and hold it across all the vg_read() calls.
This ensures that the results from label scan are unchanging during
label scan and remain valid for use in vg_read.
Commands modifying vg metadata take the global lock ex, and those
only reading vg metadata use sh.

This extra use of the global lock is usually unnecessary, so lvm
automatically detects when the extra locking may be needed before
starting to use it.  When vg metadata is a large enough percentage
of the total metadata area, lvm begins doing extra locking.
Currently this is 25% (could be configurable.)

When one command sees this threshold has been reached, it creates
the file /run/lvm/scan_lock_global.  When subsequent commands see
this file exists, then will acquire the global lock prior to their
label_scan.  When metadata goes below the threshold, the temp file
is removed, and commands no longer do the extra locking.
2021-06-14 16:18:33 -05:00
David Teigland
09b0eea6a0 locking: hint-based vg locking optimization
This adds an optimization for some common cases in which
the VG lock can be acquired early, prior to label scan.
This reduces the chance that devices may be changed
between label scan and the normal vg lock in vg_read.

This is a proof-of-concept / experimental patch for testing.
2021-06-14 11:56:37 -05:00
9 changed files with 223 additions and 40 deletions

115
lib/cache/lvmcache.c vendored
View File

@@ -88,6 +88,9 @@ static int _vgs_locked = 0;
static int _found_duplicate_vgnames = 0;
static int _outdated_warning = 0;
static const char *_scan_lock_global_file = DEFAULT_RUN_DIR "/scan_lock_global";
static int _scan_lock_global_file_exists = 0;
int lvmcache_init(struct cmd_context *cmd)
{
/*
@@ -2742,19 +2745,115 @@ bool lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const c
return true;
}
static uint64_t _max_metadata_size;
/*
* max_size_bytes and max_size_percent may come from different areas and
* different vgs because of different area sizes.
*/
static uint64_t _max_metadata_size_bytes;
static dm_percent_t _max_metadata_size_percent = DM_PERCENT_INVALID;
void lvmcache_save_metadata_size(uint64_t val)
void lvmcache_save_metadata_size_bytes(uint64_t val)
{
if (!_max_metadata_size)
_max_metadata_size = val;
else if (_max_metadata_size < val)
_max_metadata_size = val;
if (!_max_metadata_size_bytes)
_max_metadata_size_bytes = val;
else if (_max_metadata_size_bytes < val)
_max_metadata_size_bytes = val;
}
uint64_t lvmcache_max_metadata_size(void)
uint64_t lvmcache_max_metadata_size_bytes(void)
{
return _max_metadata_size;
return _max_metadata_size_bytes;
}
/*
* TODO: enable/disable scan_lock_global with config setting:
* y: always use it
* n: never use it
* auto (default): use based on /run/lvm/scan_lock_global
*/
void lvmcache_save_metadata_size_percent(uint64_t meta_size, uint64_t mdah_size)
{
dm_percent_t pc = dm_make_percent(meta_size, mdah_size);
if (pc == DM_PERCENT_INVALID || pc == DM_PERCENT_FAILED ||
pc == DM_PERCENT_0 || pc == DM_PERCENT_1)
return;
if (_max_metadata_size_percent == DM_PERCENT_INVALID) {
_max_metadata_size_percent = pc;
return;
}
if (_max_metadata_size_percent < pc)
_max_metadata_size_percent = pc;
}
/*
* TODO: make the percent at which scan_lock_global is used
* configurable?
*/
#define SCAN_LOCK_GLOBAL_METADATA_PERCENT (DM_PERCENT_1 * 25)
void set_scan_lock_global(struct cmd_context *cmd)
{
FILE *fp;
if (_max_metadata_size_percent == DM_PERCENT_INVALID)
return;
if (_max_metadata_size_percent >= SCAN_LOCK_GLOBAL_METADATA_PERCENT) {
if (_scan_lock_global_file_exists)
return;
log_debug("Creating %s.", _scan_lock_global_file);
if (!(fp = fopen(_scan_lock_global_file, "w")))
return;
if (fclose(fp))
stack;
} else {
if (_scan_lock_global_file_exists) {
log_debug("Unlinking %s.", _scan_lock_global_file);
if (unlink(_scan_lock_global_file))
stack;
}
}
}
int do_scan_lock_global(struct cmd_context *cmd, int *gl_ex)
{
struct stat buf;
if (cmd->nolocking)
return 0;
/* global lock is already held */
if (cmd->lockf_global_ex)
return 0;
if (!stat(_scan_lock_global_file, &buf)) {
_scan_lock_global_file_exists = 1;
/*
* Tell the caller to use sh or ex. A command that may write
* vg metadata should use ex, otherwise sh.
*
* lockd_vg_default_sh/LOCKD_VG_SH is set for commands that
* do not modify vg metadata.
*
* FIXME: this variable/flag was previously used only for
* lvmlockd locking logic, but is now more general, so
* it should be renamed.
*/
if (cmd->lockd_vg_default_sh)
*gl_ex = 0;
else
*gl_ex = 1;
return 1;
}
return 0;
}
int lvmcache_vginfo_has_pvid(struct lvmcache_vginfo *vginfo, char *pvid)

View File

@@ -183,8 +183,9 @@ bool lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const c
int lvmcache_vginfo_has_pvid(struct lvmcache_vginfo *vginfo, char *pvid);
uint64_t lvmcache_max_metadata_size(void);
void lvmcache_save_metadata_size(uint64_t val);
uint64_t lvmcache_max_metadata_size_bytes(void);
void lvmcache_save_metadata_size_bytes(uint64_t val);
void lvmcache_save_metadata_size_percent(uint64_t meta_size, uint64_t mdah_size);
int dev_in_device_list(struct device *dev, struct dm_list *head);
@@ -226,4 +227,8 @@ void lvmcache_extra_md_component_checks(struct cmd_context *cmd);
unsigned int lvmcache_vg_info_count(void);
void set_scan_lock_global(struct cmd_context *cmd);
int do_scan_lock_global(struct cmd_context *cmd, int *gl_ex);
#endif

View File

@@ -256,6 +256,7 @@ struct cmd_context {
unsigned rand_seed;
struct dm_list pending_delete; /* list of LVs for removal */
struct dm_pool *pending_delete_mem; /* memory pool for pending deletes */
int early_lock_vg_mode;
};
/*

View File

@@ -1646,7 +1646,9 @@ int read_metadata_location_summary(const struct format_type *fmt,
vgsummary->mda_size = rlocn->size;
/* Keep track of largest metadata size we find. */
lvmcache_save_metadata_size(rlocn->size);
lvmcache_save_metadata_size_bytes(rlocn->size);
/* Keep track of the most full metadata area. */
lvmcache_save_metadata_size_percent(rlocn->size, mdah->size);
lvmcache_lookup_mda(vgsummary);

View File

@@ -1288,12 +1288,14 @@ check:
*/
int get_hints(struct cmd_context *cmd, struct dm_list *hints_out, int *newhints,
struct dm_list *devs_in, struct dm_list *devs_out)
struct dm_list *devs_in, struct dm_list *devs_out, char **vgname_out)
{
struct dm_list hints_list;
int needs_refresh = 0;
char *vgname = NULL;
*vgname_out = NULL;
dm_list_init(&hints_list);
/* Decide below if the caller should create new hints. */
@@ -1433,7 +1435,7 @@ int get_hints(struct cmd_context *cmd, struct dm_list *hints_out, int *newhints,
dm_list_splice(hints_out, &hints_list);
free(vgname);
*vgname_out = vgname;
return 1;
}

View File

@@ -33,7 +33,7 @@ void clear_hint_file(struct cmd_context *cmd);
void invalidate_hints(struct cmd_context *cmd);
int get_hints(struct cmd_context *cmd, struct dm_list *hints, int *newhints,
struct dm_list *devs_in, struct dm_list *devs_out);
struct dm_list *devs_in, struct dm_list *devs_out, char **vgname_out);
int validate_hints(struct cmd_context *cmd, struct dm_list *hints);

View File

@@ -1032,6 +1032,7 @@ int label_scan(struct cmd_context *cmd)
struct dev_iter *iter;
struct device_list *devl, *devl2;
struct device *dev;
char *vgname_hint = NULL;
uint64_t max_metadata_size_bytes;
int device_ids_invalid = 0;
int using_hints;
@@ -1137,21 +1138,54 @@ int label_scan(struct cmd_context *cmd)
* by using hints which tell us which devices are PVs, which
* are the only devices we actually need to scan. Without
* hints we need to scan all devs to find which are PVs.
*
* TODO: if the command is using hints and a single vgname
*/
if (!get_hints(cmd, &hints_list, &create_hints, &all_devs, &scan_devs, &vgname_hint)) {
dm_list_splice(&scan_devs, &all_devs);
dm_list_init(&hints_list);
using_hints = 0;
} else
using_hints = 1;
/*
* If the command is using hints and a single vgname
* arg, we can also take the vg lock here, prior to scanning.
* This means we would not need to rescan the PVs in the VG
* in vg_read (skip lvmcache_label_rescan_vg) after the
* vg lock is usually taken. (Some commands are already
* able to avoid rescan in vg_read, but locking early would
* apply to more cases.)
*
* TODO: we don't know exactly which vg lock mode (read or write)
* the command will use in vg_read() for the normal lock_vol(),
* but we could make a fairly accurate guess of READ/WRITE based
* on looking at the command name. If we guess wrong we can
* just unlock_vg and lock_vol again with the correct mode in
* vg_read().
*/
if (!get_hints(cmd, &hints_list, &create_hints, &all_devs, &scan_devs)) {
dm_list_splice(&scan_devs, &all_devs);
dm_list_init(&hints_list);
using_hints = 0;
} else
using_hints = 1;
if (vgname_hint) {
uint32_t lck_type = LCK_VG_WRITE;
log_debug("Early lock vg");
/* FIXME: borrowing this lockd flag which should be
quite close to what we want, based on the command name.
Need to do proper mode selection here, and then check
in case the later lock_vol in vg_read wants different. */
if (cmd->lockd_vg_default_sh)
lck_type = LCK_VG_READ;
if (!lock_vol(cmd, vgname_hint, lck_type, NULL)) {
log_warn("Could not pre-lock VG %s.", vgname_hint);
/* not an error since this is just an optimization */
} else {
/* Save some state indicating that the vg lock
is already held so that the normal lock_vol()
will know. */
cmd->early_lock_vg_mode = lck_type;
}
free(vgname_hint);
}
/*
* If the total number of devices exceeds the soft open file
@@ -1187,7 +1221,7 @@ int label_scan(struct cmd_context *cmd)
* If the largest metadata is within 1MB of the bcache size, then start
* warning.
*/
max_metadata_size_bytes = lvmcache_max_metadata_size();
max_metadata_size_bytes = lvmcache_max_metadata_size_bytes();
if (max_metadata_size_bytes + (1024 * 1024) > _current_bcache_size_bytes) {
/* we want bcache to be 1MB larger than the max metadata seen */
@@ -1202,6 +1236,14 @@ int label_scan(struct cmd_context *cmd)
(unsigned long long)want_size_kb);
}
/*
* If vg metadata is using a large percentage of a metadata area, then
* create /run/lvm/scan_lock_global to tell future lvm commands to
* begin doing lock_global() prior to scanning to avoid problems due to
* metadata wrapping between label_scan and vg_read.
*/
set_scan_lock_global(cmd);
dm_list_init(&cmd->hints);
/*

View File

@@ -203,6 +203,11 @@ int lock_vol(struct cmd_context *cmd, const char *vol, uint32_t flags, const str
if (is_orphan_vg(vol))
return 1;
if (!is_global && cmd->early_lock_vg_mode && (lck_type != LCK_UNLOCK)) {
log_debug("VG was locked early.");
return 1;
}
if (!_blocking_supported)
flags |= LCK_NONBLOCK;
@@ -354,10 +359,8 @@ static int _lockf_global(struct cmd_context *cmd, const char *mode, int convert,
if (!strcmp(mode, "ex")) {
flags |= LCK_WRITE;
if (cmd->lockf_global_ex) {
log_warn("global flock already held ex");
if (cmd->lockf_global_ex)
return 1;
}
ret = lock_vol(cmd, VG_GLOBAL, flags, NULL);
if (ret)

View File

@@ -2140,6 +2140,7 @@ int process_each_vg(struct cmd_context *cmd,
struct dm_list vgnameids_to_process; /* vgnameid_list */
int enable_all_vgs = (cmd->cname->flags & ALL_VGS_IS_DEFAULT);
int process_all_vgs_on_system = 0;
int gl_ex = 0;
int ret_max = ECMD_PROCESSED;
int ret;
@@ -2173,11 +2174,25 @@ int process_each_vg(struct cmd_context *cmd,
process_all_vgs_on_system = 1;
/*
* Needed for a current listing of the global VG namespace.
* The global lock will be taken prior to scanning if the
* /run/lvm/scan_lock_global file has been created by a prior command,
* indicating that vg metadata sizes are large enough to possibly wrap
* around the metadata area during label_scan or between label_scan and
* vg_read, which can invalidate the scan results (normally unlocked)
* and prevent a valid vg_read (which uses metadata locations saved by
* label_scan).
*/
if (process_all_vgs_on_system && !lock_global(cmd, "sh")) {
ret_max = ECMD_FAILED;
goto_out;
if (do_scan_lock_global(cmd, &gl_ex)) {
if (!lock_global(cmd, gl_ex ? "ex" : "sh")) {
ret_max = ECMD_FAILED;
goto_out;
}
} else if (process_all_vgs_on_system) {
/* Needed for a current listing of the global VG namespace. */
if (!lock_global(cmd, "sh")) {
ret_max = ECMD_FAILED;
goto_out;
}
}
/*
@@ -3668,6 +3683,7 @@ int process_each_lv(struct cmd_context *cmd,
struct dm_list vgnameids_to_process; /* vgnameid_list */
int enable_all_vgs = (cmd->cname->flags & ALL_VGS_IS_DEFAULT);
int process_all_vgs_on_system = 0;
int gl_ex = 0;
int ret_max = ECMD_PROCESSED;
int ret;
@@ -3722,12 +3738,17 @@ int process_each_lv(struct cmd_context *cmd,
else if (dm_list_empty(&arg_vgnames) && handle->internal_report_for_select)
process_all_vgs_on_system = 1;
/*
* Needed for a current listing of the global VG namespace.
*/
if (process_all_vgs_on_system && !lock_global(cmd, "sh")) {
ret_max = ECMD_FAILED;
goto_out;
if (do_scan_lock_global(cmd, &gl_ex)) {
if (!lock_global(cmd, gl_ex ? "ex" : "sh")) {
ret_max = ECMD_FAILED;
goto_out;
}
} else if (process_all_vgs_on_system) {
/* Needed for a current listing of the global VG namespace. */
if (!lock_global(cmd, "sh")) {
ret_max = ECMD_FAILED;
goto_out;
}
}
/*
@@ -4384,6 +4405,7 @@ int process_each_pv(struct cmd_context *cmd,
struct device_id_list *dil;
int process_all_pvs;
int process_all_devices;
int gl_ex = 0;
int ret_max = ECMD_PROCESSED;
int ret;
@@ -4434,10 +4456,17 @@ int process_each_pv(struct cmd_context *cmd,
process_all_devices = process_all_pvs && (cmd->cname->flags & ENABLE_ALL_DEVS) && all_is_set;
/* Needed for a current listing of the global VG namespace. */
if (!only_this_vgname && !lock_global(cmd, "sh")) {
ret_max = ECMD_FAILED;
goto_out;
if (do_scan_lock_global(cmd, &gl_ex)) {
if (!lock_global(cmd, gl_ex ? "ex" : "sh")) {
ret_max = ECMD_FAILED;
goto_out;
}
} else if (!only_this_vgname) {
/* Needed for a current listing of the global VG namespace. */
if (!lock_global(cmd, "sh")) {
ret_max = ECMD_FAILED;
goto_out;
}
}
if (!(read_flags & PROCESS_SKIP_SCAN))