1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-04 09:18:36 +03:00
lvm2/lib/misc/lvm-flock.c

257 lines
5.4 KiB
C
Raw Normal View History

/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "lib/misc/lib.h"
#include "lib/config/config.h"
#include "lib/misc/lvm-flock.h"
#include "lib/misc/lvm-signal.h"
#include "lib/locking/locking.h"
#include <sys/file.h>
#include <fcntl.h>
struct lock_list {
struct dm_list list;
int lf;
char *res;
};
static struct dm_list _lock_list;
static int _prioritise_write_locks;
/* Drop lock known to be shared with another file descriptor. */
static void _drop_shared_flock(const char *file, int fd)
{
log_debug_locking("_drop_shared_flock %s.", file);
if (close(fd) < 0)
log_sys_debug("close", file);
}
static void _undo_flock(const char *file, int fd)
{
struct stat buf1, buf2;
log_debug_locking("_undo_flock %s", file);
if (!flock(fd, LOCK_NB | LOCK_EX) &&
!stat(file, &buf1) &&
!fstat(fd, &buf2) &&
is_same_inode(buf1, buf2))
if (unlink(file))
log_sys_debug("unlink", file);
if (close(fd) < 0)
log_sys_debug("close", file);
}
locking: unify global lock for flock and lockd There have been two file locks used to protect lvm "global state": "ORPHANS" and "GLOBAL". Commands that used the ORPHAN flock in exclusive mode: pvcreate, pvremove, vgcreate, vgextend, vgremove, vgcfgrestore Commands that used the ORPHAN flock in shared mode: vgimportclone, pvs, pvscan, pvresize, pvmove, pvdisplay, pvchange, fullreport Commands that used the GLOBAL flock in exclusive mode: pvchange, pvscan, vgimportclone, vgscan Commands that used the GLOBAL flock in shared mode: pvscan --cache, pvs The ORPHAN lock covers the important cases of serializing the use of orphan PVs. It also partially covers the reporting of orphan PVs (although not correctly as explained below.) The GLOBAL lock doesn't seem to have a clear purpose (it may have eroded over time.) Neither lock correctly protects the VG namespace, or orphan PV properties. To simplify and correct these issues, the two separate flocks are combined into the one GLOBAL flock, and this flock is used from the locking sites that are in place for the lvmlockd global lock. The logic behind the lvmlockd (distributed) global lock is that any command that changes "global state" needs to take the global lock in ex mode. Global state in lvm is: the list of VG names, the set of orphan PVs, and any properties of orphan PVs. Reading this global state can use the global lock in sh mode to ensure it doesn't change while being reported. The locking of global state now looks like: lockd_global() previously named lockd_gl(), acquires the distributed global lock through lvmlockd. This is unchanged. It serializes distributed lvm commands that are changing global state. This is a no-op when lvmlockd is not in use. lockf_global() acquires an flock on a local file. It serializes local lvm commands that are changing global state. lock_global() first calls lockf_global() to acquire the local flock for global state, and if this succeeds, it calls lockd_global() to acquire the distributed lock for global state. Replace instances of lockd_gl() with lock_global(), so that the existing sites for lvmlockd global state locking are now also used for local file locking of global state. Remove the previous file locking calls lock_vol(GLOBAL) and lock_vol(ORPHAN). The following commands which change global state are now serialized with the exclusive global flock: pvchange (of orphan), pvresize (of orphan), pvcreate, pvremove, vgcreate, vgextend, vgremove, vgreduce, vgrename, vgcfgrestore, vgimportclone, vgmerge, vgsplit Commands that use a shared flock to read global state (and will be serialized against the prior list) are those that use process_each functions that are based on processing a list of all VG names, or all PVs. The list of all VGs or all PVs is global state and the shared lock prevents those lists from changing while the command is processing them. The ORPHAN lock previously attempted to produce an accurate listing of orphan PVs, but it was only acquired at the end of the command during the fake vg_read of the fake orphan vg. This is not when orphan PVs were determined; they were determined by elimination beforehand by processing all real VGs, and subtracting the PVs in the real VGs from the list of all PVs that had been identified during the initial scan. This is fixed by holding the single global lock in shared mode while processing all VGs to determine the list of orphan PVs.
2019-04-18 23:01:19 +03:00
static struct lock_list *_get_lock_list_entry(const char *file)
{
struct lock_list *ll;
struct dm_list *llh;
dm_list_iterate(llh, &_lock_list) {
ll = dm_list_item(llh, struct lock_list);
if (!strcmp(ll->res, file))
return ll;
}
return NULL;
}
static int _release_lock(const char *file, int unlock)
{
struct lock_list *ll;
struct dm_list *llh, *llt;
dm_list_iterate_safe(llh, llt, &_lock_list) {
ll = dm_list_item(llh, struct lock_list);
if (!file || !strcmp(ll->res, file)) {
dm_list_del(llh);
if (unlock) {
log_very_verbose("Unlocking %s", ll->res);
if (flock(ll->lf, LOCK_NB | LOCK_UN))
log_sys_debug("flock", ll->res);
_undo_flock(ll->res, ll->lf);
} else
_drop_shared_flock(ll->res, ll->lf);
free(ll->res);
free(llh);
if (file)
return 1;
}
}
return 0;
}
void release_flocks(int unlock)
{
_release_lock(NULL, unlock);
}
static int _do_flock(const char *file, int *fd, int operation, uint32_t nonblock)
{
int r;
int old_errno;
struct stat buf1, buf2;
log_debug_locking("_do_flock %s %c%c", file,
operation == LOCK_EX ? 'W' : 'R', nonblock ? ' ' : 'B');
do {
if ((*fd > -1) && close(*fd))
log_sys_debug("close", file);
if ((*fd = open(file, O_CREAT | O_APPEND | O_RDWR, 0777)) < 0) {
log_sys_error("open", file);
return 0;
}
if (nonblock)
operation |= LOCK_NB;
else
sigint_allow();
r = flock(*fd, operation);
old_errno = errno;
if (!nonblock) {
sigint_restore();
if (sigint_caught()) {
log_error("Giving up waiting for lock.");
break;
}
}
if (r) {
errno = old_errno;
log_sys_error("flock", file);
break;
}
if (!stat(file, &buf1) && !fstat(*fd, &buf2) &&
is_same_inode(buf1, buf2))
return 1;
} while (!nonblock);
if (close(*fd))
log_sys_debug("close", file);
*fd = -1;
return_0;
}
#define AUX_LOCK_SUFFIX ":aux"
static int _do_write_priority_flock(const char *file, int *fd, int operation, uint32_t nonblock)
{
int r, fd_aux = -1;
char *file_aux = alloca(strlen(file) + sizeof(AUX_LOCK_SUFFIX));
strcpy(file_aux, file);
strcat(file_aux, AUX_LOCK_SUFFIX);
if ((r = _do_flock(file_aux, &fd_aux, LOCK_EX, nonblock))) {
if (operation == LOCK_EX) {
r = _do_flock(file, fd, operation, nonblock);
_undo_flock(file_aux, fd_aux);
} else {
_undo_flock(file_aux, fd_aux);
r = _do_flock(file, fd, operation, nonblock);
}
}
return r;
}
int lock_file(const char *file, uint32_t flags)
{
int operation;
uint32_t nonblock = flags & LCK_NONBLOCK;
locking: unify global lock for flock and lockd There have been two file locks used to protect lvm "global state": "ORPHANS" and "GLOBAL". Commands that used the ORPHAN flock in exclusive mode: pvcreate, pvremove, vgcreate, vgextend, vgremove, vgcfgrestore Commands that used the ORPHAN flock in shared mode: vgimportclone, pvs, pvscan, pvresize, pvmove, pvdisplay, pvchange, fullreport Commands that used the GLOBAL flock in exclusive mode: pvchange, pvscan, vgimportclone, vgscan Commands that used the GLOBAL flock in shared mode: pvscan --cache, pvs The ORPHAN lock covers the important cases of serializing the use of orphan PVs. It also partially covers the reporting of orphan PVs (although not correctly as explained below.) The GLOBAL lock doesn't seem to have a clear purpose (it may have eroded over time.) Neither lock correctly protects the VG namespace, or orphan PV properties. To simplify and correct these issues, the two separate flocks are combined into the one GLOBAL flock, and this flock is used from the locking sites that are in place for the lvmlockd global lock. The logic behind the lvmlockd (distributed) global lock is that any command that changes "global state" needs to take the global lock in ex mode. Global state in lvm is: the list of VG names, the set of orphan PVs, and any properties of orphan PVs. Reading this global state can use the global lock in sh mode to ensure it doesn't change while being reported. The locking of global state now looks like: lockd_global() previously named lockd_gl(), acquires the distributed global lock through lvmlockd. This is unchanged. It serializes distributed lvm commands that are changing global state. This is a no-op when lvmlockd is not in use. lockf_global() acquires an flock on a local file. It serializes local lvm commands that are changing global state. lock_global() first calls lockf_global() to acquire the local flock for global state, and if this succeeds, it calls lockd_global() to acquire the distributed lock for global state. Replace instances of lockd_gl() with lock_global(), so that the existing sites for lvmlockd global state locking are now also used for local file locking of global state. Remove the previous file locking calls lock_vol(GLOBAL) and lock_vol(ORPHAN). The following commands which change global state are now serialized with the exclusive global flock: pvchange (of orphan), pvresize (of orphan), pvcreate, pvremove, vgcreate, vgextend, vgremove, vgreduce, vgrename, vgcfgrestore, vgimportclone, vgmerge, vgsplit Commands that use a shared flock to read global state (and will be serialized against the prior list) are those that use process_each functions that are based on processing a list of all VG names, or all PVs. The list of all VGs or all PVs is global state and the shared lock prevents those lists from changing while the command is processing them. The ORPHAN lock previously attempted to produce an accurate listing of orphan PVs, but it was only acquired at the end of the command during the fake vg_read of the fake orphan vg. This is not when orphan PVs were determined; they were determined by elimination beforehand by processing all real VGs, and subtracting the PVs in the real VGs from the list of all PVs that had been identified during the initial scan. This is fixed by holding the single global lock in shared mode while processing all VGs to determine the list of orphan PVs.
2019-04-18 23:01:19 +03:00
uint32_t convert = flags & LCK_CONVERT;
int r;
struct lock_list *ll;
char state;
switch (flags & LCK_TYPE_MASK) {
case LCK_READ:
operation = LOCK_SH;
state = 'R';
break;
case LCK_WRITE:
operation = LOCK_EX;
state = 'W';
break;
case LCK_UNLOCK:
return _release_lock(file, 1);
default:
log_error("Unrecognised lock type: %d", flags & LCK_TYPE_MASK);
return 0;
}
locking: unify global lock for flock and lockd There have been two file locks used to protect lvm "global state": "ORPHANS" and "GLOBAL". Commands that used the ORPHAN flock in exclusive mode: pvcreate, pvremove, vgcreate, vgextend, vgremove, vgcfgrestore Commands that used the ORPHAN flock in shared mode: vgimportclone, pvs, pvscan, pvresize, pvmove, pvdisplay, pvchange, fullreport Commands that used the GLOBAL flock in exclusive mode: pvchange, pvscan, vgimportclone, vgscan Commands that used the GLOBAL flock in shared mode: pvscan --cache, pvs The ORPHAN lock covers the important cases of serializing the use of orphan PVs. It also partially covers the reporting of orphan PVs (although not correctly as explained below.) The GLOBAL lock doesn't seem to have a clear purpose (it may have eroded over time.) Neither lock correctly protects the VG namespace, or orphan PV properties. To simplify and correct these issues, the two separate flocks are combined into the one GLOBAL flock, and this flock is used from the locking sites that are in place for the lvmlockd global lock. The logic behind the lvmlockd (distributed) global lock is that any command that changes "global state" needs to take the global lock in ex mode. Global state in lvm is: the list of VG names, the set of orphan PVs, and any properties of orphan PVs. Reading this global state can use the global lock in sh mode to ensure it doesn't change while being reported. The locking of global state now looks like: lockd_global() previously named lockd_gl(), acquires the distributed global lock through lvmlockd. This is unchanged. It serializes distributed lvm commands that are changing global state. This is a no-op when lvmlockd is not in use. lockf_global() acquires an flock on a local file. It serializes local lvm commands that are changing global state. lock_global() first calls lockf_global() to acquire the local flock for global state, and if this succeeds, it calls lockd_global() to acquire the distributed lock for global state. Replace instances of lockd_gl() with lock_global(), so that the existing sites for lvmlockd global state locking are now also used for local file locking of global state. Remove the previous file locking calls lock_vol(GLOBAL) and lock_vol(ORPHAN). The following commands which change global state are now serialized with the exclusive global flock: pvchange (of orphan), pvresize (of orphan), pvcreate, pvremove, vgcreate, vgextend, vgremove, vgreduce, vgrename, vgcfgrestore, vgimportclone, vgmerge, vgsplit Commands that use a shared flock to read global state (and will be serialized against the prior list) are those that use process_each functions that are based on processing a list of all VG names, or all PVs. The list of all VGs or all PVs is global state and the shared lock prevents those lists from changing while the command is processing them. The ORPHAN lock previously attempted to produce an accurate listing of orphan PVs, but it was only acquired at the end of the command during the fake vg_read of the fake orphan vg. This is not when orphan PVs were determined; they were determined by elimination beforehand by processing all real VGs, and subtracting the PVs in the real VGs from the list of all PVs that had been identified during the initial scan. This is fixed by holding the single global lock in shared mode while processing all VGs to determine the list of orphan PVs.
2019-04-18 23:01:19 +03:00
if (convert) {
if (nonblock)
operation |= LOCK_NB;
if (!(ll = _get_lock_list_entry(file)))
return 0;
log_very_verbose("Locking %s %c%c convert", ll->res, state,
nonblock ? ' ' : 'B');
r = flock(ll->lf, operation);
if (!r)
return 1;
log_error("Failed to convert flock on %s %d", file, errno);
return 0;
}
if (!(ll = malloc(sizeof(struct lock_list))))
return_0;
if (!(ll->res = strdup(file))) {
free(ll);
return_0;
}
ll->lf = -1;
log_very_verbose("Locking %s %c%c", ll->res, state,
nonblock ? ' ' : 'B');
(void) dm_prepare_selinux_context(file, S_IFREG);
if (_prioritise_write_locks)
r = _do_write_priority_flock(file, &ll->lf, operation, nonblock);
else
r = _do_flock(file, &ll->lf, operation, nonblock);
(void) dm_prepare_selinux_context(NULL, 0);
if (r)
dm_list_add(&_lock_list, &ll->list);
else {
free(ll->res);
free(ll);
stack;
}
return r;
}
void init_flock(struct cmd_context *cmd)
{
dm_list_init(&_lock_list);
_prioritise_write_locks =
find_config_tree_bool(cmd, global_prioritise_write_locks_CFG, NULL);
}