1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00
lvm2/lib/locking/lvmlockd.c
David Teigland c1f5ac3eca lockd: remove an unreachable global lock condition
There is no longer an "enable" option for the global lock,
so remove the bit of code that was checking for it.  It
was an optional variation anyway, and not one that was likely
to be used.

Also update the corresponding comment describing global lock
creation.
2015-07-24 10:56:08 -05:00

2502 lines
64 KiB
C

/*
* Copyright (C) 2014-2015 Red Hat, Inc.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*/
#include "lib.h"
#include "toolcontext.h"
#include "metadata.h"
#include "segtype.h"
#include "activate.h"
#include "lvmetad.h"
#include "lvmlockd.h"
#include "lvmcache.h"
#include "lvmlockd-client.h"
static daemon_handle _lvmlockd;
static const char *_lvmlockd_socket = NULL;
static struct cmd_context *_lvmlockd_cmd = NULL;
static int _use_lvmlockd = 0; /* is 1 if command is configured to use lvmlockd */
static int _lvmlockd_connected = 0; /* is 1 if command is connected to lvmlockd */
static int _lvmlockd_init_failed = 0; /* used to suppress further warnings */
void lvmlockd_set_socket(const char *sock)
{
_lvmlockd_socket = sock;
}
/*
* Set directly from global/use_lvmlockd
*/
void lvmlockd_set_use(int use)
{
_use_lvmlockd = use;
}
/*
* Returns the value of global/use_lvmlockd being used by the command.
*/
int lvmlockd_use(void)
{
return _use_lvmlockd;
}
/*
* The command continues even if init and/or connect fail,
* because the command is allowed to use local VGs without lvmlockd,
* and is allowed to read lockd VGs without locks from lvmlockd.
*/
void lvmlockd_init(struct cmd_context *cmd)
{
if (!_use_lvmlockd) {
/* Should never happen, don't call init when not using lvmlockd. */
log_error("Should not initialize lvmlockd with use_lvmlockd=0.");
}
if (!_lvmlockd_socket) {
log_warn("WARNING: lvmlockd socket location is not configured.");
_lvmlockd_init_failed = 1;
}
if (!!access(LVMLOCKD_PIDFILE, F_OK)) {
log_warn("WARNING: lvmlockd process is not running.");
_lvmlockd_init_failed = 1;
} else {
_lvmlockd_init_failed = 0;
}
_lvmlockd_cmd = cmd;
}
void lvmlockd_connect(void)
{
if (!_use_lvmlockd) {
/* Should never happen, don't call connect when not using lvmlockd. */
log_error("Should not connect to lvmlockd with use_lvmlockd=0.");
}
if (_lvmlockd_connected) {
/* Should never happen, only call connect once. */
log_error("lvmlockd is already connected.");
}
if (_lvmlockd_init_failed)
return;
_lvmlockd = lvmlockd_open(_lvmlockd_socket);
if (_lvmlockd.socket_fd >= 0 && !_lvmlockd.error) {
log_debug("Successfully connected to lvmlockd on fd %d.", _lvmlockd.socket_fd);
_lvmlockd_connected = 1;
} else {
log_warn("WARNING: lvmlockd connect failed.");
}
}
void lvmlockd_disconnect(void)
{
if (_lvmlockd_connected)
daemon_close(_lvmlockd);
_lvmlockd_connected = 0;
_lvmlockd_cmd = NULL;
}
/* Translate the result strings from lvmlockd to bit flags. */
static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_flags)
{
if (strstr(flags_str, "NO_LOCKSPACES"))
*lockd_flags |= LD_RF_NO_LOCKSPACES;
if (strstr(flags_str, "NO_GL_LS"))
*lockd_flags |= LD_RF_NO_GL_LS;
if (strstr(flags_str, "LOCAL_LS"))
*lockd_flags |= LD_RF_LOCAL_LS;
if (strstr(flags_str, "DUP_GL_LS"))
*lockd_flags |= LD_RF_DUP_GL_LS;
if (strstr(flags_str, "INACTIVE_LS"))
*lockd_flags |= LD_RF_INACTIVE_LS;
if (strstr(flags_str, "ADD_LS_ERROR"))
*lockd_flags |= LD_RF_ADD_LS_ERROR;
}
/*
* evaluate the reply from lvmlockd, check for errors, extract
* the result and lockd_flags returned by lvmlockd.
* 0 failure (no result/lockd_flags set)
* 1 success (result/lockd_flags set)
*/
/*
* This is an arbitrary number that we know lvmlockd
* will not return. daemon_reply_int reverts to this
* value if it finds no result value.
*/
#define NO_LOCKD_RESULT -1000
static int _lockd_result(daemon_reply reply, int *result, uint32_t *lockd_flags)
{
int reply_result;
const char *flags_str = NULL;
const char *lock_type = NULL;
if (reply.error) {
log_error("lockd_result reply error %d", reply.error);
return 0;
}
if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
log_error("lockd_result bad response");
return 0;
}
reply_result = daemon_reply_int(reply, "op_result", NO_LOCKD_RESULT);
if (reply_result == NO_LOCKD_RESULT) {
log_error("lockd_result no op_result");
return 0;
}
/* The lock_type that lvmlockd used for locking. */
lock_type = daemon_reply_str(reply, "lock_type", "none");
*result = reply_result;
if (lockd_flags) {
if ((flags_str = daemon_reply_str(reply, "result_flags", NULL)))
_flags_str_to_lockd_flags(flags_str, lockd_flags);
}
log_debug("lockd_result %d flags %s lm %s", reply_result,
flags_str ? flags_str : "none", lock_type);
return 1;
}
static daemon_reply _lockd_send(const char *req_name, ...)
{
va_list ap;
daemon_reply repl;
daemon_request req;
req = daemon_request_make(req_name);
va_start(ap, req_name);
daemon_request_extend_v(req, ap);
va_end(ap);
repl = daemon_send(_lvmlockd, req);
daemon_request_destroy(req);
return repl;
}
/*
* result/lockd_flags are values returned from lvmlockd.
*
* return 0 (failure)
* return 1 (result/lockd_flags indicate success/failure)
*
* return 1 result 0 (success)
* return 1 result < 0 (failure)
*
* caller may ignore result < 0 failure depending on
* lockd_flags and the specific command/mode.
*
* When this function returns 0 (failure), no result/lockd_flags
* were obtained from lvmlockd.
*
* When this function returns 1 (success), result/lockd_flags may
* have been obtained from lvmlockd. This lvmlockd result may
* indicate a locking failure.
*/
static int _lockd_request(struct cmd_context *cmd,
const char *req_name,
const char *vg_name,
const char *vg_lock_type,
const char *vg_lock_args,
const char *lv_name,
const char *lv_uuid,
const char *lv_lock_args,
const char *mode,
const char *opts,
int *result,
uint32_t *lockd_flags)
{
const char *cmd_name = get_cmd_name();
daemon_reply reply;
int pid = getpid();
*result = 0;
*lockd_flags = 0;
if (!strcmp(mode, "na"))
return 1;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
/* cmd and pid are passed for informational and debugging purposes */
if (!cmd_name || !cmd_name[0])
cmd_name = "none";
if (vg_name && lv_name) {
reply = _lockd_send(req_name,
"cmd = %s", cmd_name,
"pid = %d", pid,
"mode = %s", mode,
"opts = %s", opts ?: "none",
"vg_name = %s", vg_name,
"lv_name = %s", lv_name,
"lv_uuid = %s", lv_uuid,
"vg_lock_type = %s", vg_lock_type ?: "none",
"vg_lock_args = %s", vg_lock_args ?: "none",
"lv_lock_args = %s", lv_lock_args ?: "none",
NULL);
if (!_lockd_result(reply, result, lockd_flags))
goto fail;
log_debug("lvmlockd %s %s vg %s lv %s result %d %x",
req_name, mode, vg_name, lv_name, *result, *lockd_flags);
} else if (vg_name) {
reply = _lockd_send(req_name,
"cmd = %s", cmd_name,
"pid = %d", pid,
"mode = %s", mode,
"opts = %s", opts ?: "none",
"vg_name = %s", vg_name,
"vg_lock_type = %s", vg_lock_type ?: "none",
"vg_lock_args = %s", vg_lock_args ?: "none",
NULL);
if (!_lockd_result(reply, result, lockd_flags))
goto fail;
log_debug("lvmlockd %s %s vg %s result %d %x",
req_name, mode, vg_name, *result, *lockd_flags);
} else {
reply = _lockd_send(req_name,
"cmd = %s", cmd_name,
"pid = %d", pid,
"mode = %s", mode,
"opts = %s", opts ?: "none",
"vg_lock_type = %s", vg_lock_type ?: "none",
NULL);
if (!_lockd_result(reply, result, lockd_flags))
goto fail;
log_debug("lvmlockd %s %s result %d %x",
req_name, mode, *result, *lockd_flags);
}
daemon_reply_destroy(reply);
/* result/lockd_flags have lvmlockd result */
return 1;
fail:
/* no result was obtained from lvmlockd */
log_error("lvmlockd %s %s failed no result", req_name, mode);
daemon_reply_destroy(reply);
return 0;
}
/*
* Eventually add an option to specify which pv the lvmlock lv should be placed on.
* FIXME: when converting a VG from lock_type none to sanlock, we need to count
* the number of existing LVs to ensure that the new sanlock_lv is large enough
* for all of them that need locks.
*/
static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
const char *lock_lv_name, int extend_mb)
{
struct logical_volume *lv;
struct lvcreate_params lp = {
.activate = CHANGE_ALY,
.alloc = ALLOC_INHERIT,
.extents = (extend_mb * 1024 * 1024) / (vg->extent_size * SECTOR_SIZE),
.major = -1,
.minor = -1,
.permission = LVM_READ | LVM_WRITE,
.pvh = &vg->pvs,
.read_ahead = DM_READ_AHEAD_NONE,
.stripes = 1,
.vg_name = vg->name,
.lv_name = dm_pool_strdup(cmd->mem, lock_lv_name),
.zero = 1,
};
dm_list_init(&lp.tags);
if (!(lp.segtype = get_segtype_from_string(vg->cmd, "striped")))
return_0;
lv = lv_create_single(vg, &lp);
if (!lv) {
log_error("Failed to create sanlock lv %s in vg %s", lock_lv_name, vg->name);
return 0;
}
vg->sanlock_lv = lv;
return 1;
}
static int _remove_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
{
if (!lv_remove(vg->sanlock_lv)) {
log_error("Failed to remove sanlock LV %s/%s", vg->name, vg->sanlock_lv->name);
return 0;
}
return 1;
}
static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, int extend_mb)
{
struct logical_volume *lv = vg->sanlock_lv;
struct lvresize_params lp = {
.lv_name = vg->sanlock_lv->name,
.sign = SIGN_NONE,
.percent = PERCENT_NONE,
.resize = LV_EXTEND,
.ac_force = 1,
.sizeargs = 1,
};
lp.size = lv->size + ((extend_mb * 1024 * 1024) / SECTOR_SIZE);
if (!lv_resize_prepare(cmd, lv, &lp, &vg->pvs) ||
!lv_resize(cmd, lv, &lp, &vg->pvs)) {
log_error("Extend LV %s/%s to size %llu failed.",
vg->name, lv->name, (unsigned long long)lp.size);
return 0;
}
return 1;
}
/* When one host does _extend_sanlock_lv, the others need to refresh the size. */
static int _refresh_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
{
if (!lv_refresh_suspend_resume(cmd, vg->sanlock_lv)) {
log_error("Failed to refresh %s.", vg->sanlock_lv->name);
return 0;
}
return 1;
}
/*
* Called at the beginning of lvcreate in a sanlock VG to ensure
* that there is space in the sanlock LV for a new lock. If it's
* full, then this extends it.
*/
int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
{
daemon_reply reply;
int extend_mb;
int result;
int ret;
if (!_use_lvmlockd)
return 1;
if (!_lvmlockd_connected)
return 0;
extend_mb = find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL);
/*
* User can choose to not automatically extend the lvmlock LV
* so they can manually extend it.
*/
if (!extend_mb)
return 1;
/*
* Another host may have extended the lvmlock LV already.
* Refresh so that we'll find the new space they added
* when we search for new space.
*/
if (!_refresh_sanlock_lv(cmd, vg))
return 0;
/*
* Ask lvmlockd/sanlock to look for an unused lock.
*/
reply = _lockd_send("find_free_lock",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
/* No space on the lvmlock lv for a new lease. */
if (result == -EMSGSIZE)
ret = _extend_sanlock_lv(cmd, vg, extend_mb);
daemon_reply_destroy(reply);
return ret;
}
static int _activate_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
{
if (!activate_lv(cmd, vg->sanlock_lv)) {
log_error("Failed to activate sanlock lv %s/%s", vg->name, vg->sanlock_lv->name);
return 0;
}
return 1;
}
static int _deactivate_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
{
if (!deactivate_lv(cmd, vg->sanlock_lv)) {
log_error("Failed to deactivate sanlock lv %s/%s", vg->name, vg->sanlock_lv->name);
return 0;
}
return 1;
}
static int _init_vg_dlm(struct cmd_context *cmd, struct volume_group *vg)
{
daemon_reply reply;
const char *reply_str;
const char *vg_lock_args = NULL;
int result;
int ret;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
reply = _lockd_send("init_vg",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"vg_lock_type = %s", "dlm",
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
result = -ELOCKD;
} else {
ret = (result < 0) ? 0 : 1;
}
switch (result) {
case 0:
break;
case -ELOCKD:
log_error("VG %s init failed: lvmlockd not available", vg->name);
break;
case -EARGS:
log_error("VG %s init failed: invalid parameters for dlm", vg->name);
break;
case -EMANAGER:
log_error("VG %s init failed: lock manager dlm is not running", vg->name);
break;
case -EPROTONOSUPPORT:
log_error("VG %s init failed: lock manager dlm is not supported by lvmlockd", vg->name);
break;
default:
log_error("VG %s init failed: %d", vg->name, result);
}
if (!ret)
goto out;
if (!(reply_str = daemon_reply_str(reply, "vg_lock_args", NULL))) {
log_error("VG %s init failed: lock_args not returned", vg->name);
ret = 0;
goto out;
}
if (!(vg_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
log_error("VG %s init failed: lock_args alloc failed", vg->name);
ret = 0;
goto out;
}
vg->lock_type = "dlm";
vg->lock_args = vg_lock_args;
if (!vg_write(vg) || !vg_commit(vg)) {
log_error("VG %s init failed: vg_write vg_commit", vg->name);
ret = 0;
goto out;
}
ret = 1;
out:
daemon_reply_destroy(reply);
return ret;
}
static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg)
{
daemon_reply reply;
const char *reply_str;
const char *vg_lock_args = NULL;
const char *opts = NULL;
int extend_mb;
int result;
int ret;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
/*
* Automatic extension of the sanlock lv is disabled by
* setting sanlock_lv_extend to 0. Zero won't work as
* an initial size, so in this case, use the default as
* the initial size.
*/
if (!(extend_mb = find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL)))
extend_mb = DEFAULT_SANLOCK_LV_EXTEND_MB;
/*
* Creating the sanlock LV writes the VG containing the new lvmlock
* LV, then activates the lvmlock LV. The lvmlock LV must be active
* before we ask lvmlockd to initialize the VG because sanlock needs
* to initialize leases on the lvmlock LV.
*/
if (!_create_sanlock_lv(cmd, vg, LOCKD_SANLOCK_LV_NAME, extend_mb)) {
log_error("Failed to create internal lv.");
return 0;
}
/*
* N.B. this passes the sanlock lv name as vg_lock_args
* even though it is only part of the final args string
* which will be returned from lvmlockd.
*/
reply = _lockd_send("init_vg",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"vg_lock_type = %s", "sanlock",
"vg_lock_args = %s", vg->sanlock_lv->name,
"opts = %s", opts ?: "none",
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
result = -ELOCKD;
} else {
ret = (result < 0) ? 0 : 1;
}
switch (result) {
case 0:
break;
case -ELOCKD:
log_error("VG %s init failed: lvmlockd not available", vg->name);
break;
case -EARGS:
log_error("VG %s init failed: invalid parameters for sanlock", vg->name);
break;
case -EMANAGER:
log_error("VG %s init failed: lock manager sanlock is not running", vg->name);
break;
case -EPROTONOSUPPORT:
log_error("VG %s init failed: lock manager sanlock is not supported by lvmlockd", vg->name);
break;
case -EMSGSIZE:
log_error("VG %s init failed: no disk space for leases", vg->name);
break;
default:
log_error("VG %s init failed: %d", vg->name, result);
}
if (!ret)
goto out;
if (!(reply_str = daemon_reply_str(reply, "vg_lock_args", NULL))) {
log_error("VG %s init failed: lock_args not returned", vg->name);
ret = 0;
goto out;
}
if (!(vg_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
log_error("VG %s init failed: lock_args alloc failed", vg->name);
ret = 0;
goto out;
}
lv_set_hidden(vg->sanlock_lv);
vg->sanlock_lv->status |= LOCKD_SANLOCK_LV;
vg->lock_type = "sanlock";
vg->lock_args = vg_lock_args;
if (!vg_write(vg) || !vg_commit(vg)) {
log_error("VG %s init failed: vg_write vg_commit", vg->name);
ret = 0;
goto out;
}
ret = 1;
out:
if (!ret) {
/*
* The usleep delay gives sanlock time to close the lock lv,
* and usually avoids having an annoying error printed.
*/
usleep(1000000);
_deactivate_sanlock_lv(cmd, vg);
_remove_sanlock_lv(cmd, vg);
if (!vg_write(vg) || !vg_commit(vg))
stack;
}
daemon_reply_destroy(reply);
return ret;
}
/* called after vg_remove on disk */
static int _free_vg_dlm(struct cmd_context *cmd, struct volume_group *vg)
{
uint32_t lockd_flags;
int result;
int ret;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
/*
* Unlocking the vg lock here preempts the lvmlockd unlock in
* toollib.c which happens too late since the lockspace is
* left here.
*/
/* Equivalent to a standard unlock. */
ret = _lockd_request(cmd, "lock_vg",
vg->name, NULL, NULL, NULL, NULL, NULL, "un", NULL,
&result, &lockd_flags);
if (!ret || result < 0) {
log_error("_free_vg_dlm lvmlockd result %d", result);
return 0;
}
/* Leave the dlm lockspace. */
lockd_stop_vg(cmd, vg);
return 1;
}
/* called before vg_remove on disk */
static int _free_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg)
{
daemon_reply reply;
int result;
int ret;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
if (!vg->lock_args || !strlen(vg->lock_args)) {
/* Shouldn't happen in general, but maybe in some error cases? */
log_debug("_free_vg_sanlock %s no lock_args", vg->name);
return 1;
}
reply = _lockd_send("free_vg",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"vg_lock_type = %s", vg->lock_type,
"vg_lock_args = %s", vg->lock_args,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
/*
* Other hosts could still be joined to the lockspace, which means they
* are using the internal sanlock LV, which means we cannot remove the
* VG. Once other hosts stop using the VG it can be removed.
*/
if (result == -EBUSY) {
log_error("Lockspace for \"%s\" not stopped on other hosts", vg->name);
goto out;
}
if (!ret) {
log_error("_free_vg_sanlock lvmlockd result %d", result);
goto out;
}
/*
* The usleep delay gives sanlock time to close the lock lv,
* and usually avoids having an annoying error printed.
*/
usleep(1000000);
_deactivate_sanlock_lv(cmd, vg);
_remove_sanlock_lv(cmd, vg);
out:
daemon_reply_destroy(reply);
return ret;
}
/*
* Tell lvmlockd to forget about an old VG name.
* lvmlockd remembers previous lockd VGs so that it can provide more
* informative error messages (see INACTIVE_LS, ADD_LS_ERROR).
*
* If a new local VG is created with the same name as a previous lockd VG,
* lvmlockd's memory of the previous lockd VG interferes (causes incorrect
* lockd_vg failures).
*
* We could also remove the list of inactive (old) VG names from lvmlockd,
* and then this function would not be needed, but this would also reduce
* the ability to have helpful error messages.
*/
static void _forget_vg_name(struct cmd_context *cmd, struct volume_group *vg)
{
daemon_reply reply;
if (!_use_lvmlockd)
return;
if (!_lvmlockd_connected)
return;
reply = _lockd_send("forget_vg_name",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
NULL);
daemon_reply_destroy(reply);
}
/* vgcreate */
int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
const char *lock_type)
{
switch (get_lock_type_from_string(lock_type)) {
case LOCK_TYPE_NONE:
_forget_vg_name(cmd, vg);
return 1;
case LOCK_TYPE_CLVM:
return 1;
case LOCK_TYPE_DLM:
return _init_vg_dlm(cmd, vg);
case LOCK_TYPE_SANLOCK:
return _init_vg_sanlock(cmd, vg);
default:
log_error("Unknown lock_type.");
return 0;
}
}
/* vgremove before the vg is removed */
int lockd_free_vg_before(struct cmd_context *cmd, struct volume_group *vg)
{
switch (get_lock_type_from_string(vg->lock_type)) {
case LOCK_TYPE_NONE:
case LOCK_TYPE_CLVM:
case LOCK_TYPE_DLM:
return 1;
case LOCK_TYPE_SANLOCK:
/* returning an error will prevent vg_remove() */
return _free_vg_sanlock(cmd, vg);
default:
log_error("Unknown lock_type.");
return 0;
}
}
/* vgremove after the vg is removed */
void lockd_free_vg_final(struct cmd_context *cmd, struct volume_group *vg)
{
switch (get_lock_type_from_string(vg->lock_type)) {
case LOCK_TYPE_NONE:
case LOCK_TYPE_CLVM:
case LOCK_TYPE_SANLOCK:
break;
case LOCK_TYPE_DLM:
_free_vg_dlm(cmd, vg);
break;
default:
log_error("Unknown lock_type.");
}
}
/*
* Starting a vg involves:
* 1. reading the vg without a lock
* 2. getting the lock_type/lock_args from the vg metadata
* 3. doing start_vg in lvmlockd for the lock_type;
* this means joining the lockspace
*
* The vg read in step 1 should not be used for anything
* other than getting the lock_type/lock_args/uuid necessary
* for starting the lockspace. To use the vg after starting
* the lockspace, follow the standard method which is:
* lock the vg, read/use/write the vg, unlock the vg.
*/
int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg)
{
char uuid[64] __attribute__((aligned(8)));
daemon_reply reply;
int host_id = 0;
int result;
int ret;
memset(uuid, 0, sizeof(uuid));
if (!is_lockd_type(vg->lock_type))
return 1;
if (!_use_lvmlockd) {
log_error("VG %s start failed: lvmlockd is not enabled", vg->name);
return 0;
}
if (!_lvmlockd_connected) {
log_error("VG %s start failed: lvmlockd is not running", vg->name);
return 0;
}
log_debug("lockd start VG %s lock_type %s",
vg->name, vg->lock_type ? vg->lock_type : "empty");
if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
return_0;
if (vg->lock_type && !strcmp(vg->lock_type, "sanlock")) {
/*
* This is the big difference between starting
* sanlock vgs vs starting dlm vgs: the internal
* sanlock lv needs to be activated before lvmlockd
* does the start because sanlock needs to use the lv
* to access locks.
*/
if (!_activate_sanlock_lv(cmd, vg))
return 0;
host_id = find_config_tree_int(cmd, local_host_id_CFG, NULL);
}
reply = _lockd_send("start_vg",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"vg_lock_type = %s", vg->lock_type,
"vg_lock_args = %s", vg->lock_args ?: "none",
"vg_uuid = %s", uuid[0] ? uuid : "none",
"version = %d", (int64_t)vg->seqno,
"host_id = %d", host_id,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
result = -ELOCKD;
} else {
ret = (result < 0) ? 0 : 1;
}
switch (result) {
case 0:
log_print_unless_silent("VG %s starting %s lockspace", vg->name, vg->lock_type);
break;
case -ELOCKD:
log_error("VG %s start failed: lvmlockd not available", vg->name);
break;
case -EEXIST:
log_debug("VG %s start error: already started", vg->name);
ret = 1;
break;
case -EARGS:
log_error("VG %s start failed: invalid parameters for %s", vg->name, vg->lock_type);
break;
case -EHOSTID:
log_error("VG %s start failed: invalid sanlock host_id, set in lvmlocal.conf", vg->name);
break;
case -EMANAGER:
log_error("VG %s start failed: lock manager %s is not running", vg->name, vg->lock_type);
break;
case -EPROTONOSUPPORT:
log_error("VG %s start failed: lock manager %s is not supported by lvmlockd", vg->name, vg->lock_type);
break;
default:
log_error("VG %s start failed: %d", vg->name, result);
}
daemon_reply_destroy(reply);
return ret;
}
int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg)
{
daemon_reply reply;
int result;
int ret;
if (!is_lockd_type(vg->lock_type))
return 1;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
log_debug("lockd stop VG %s lock_type %s",
vg->name, vg->lock_type ? vg->lock_type : "empty");
reply = _lockd_send("stop_vg",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
if (result == -ENOLS) {
ret = 1;
goto out;
}
if (result == -EBUSY) {
log_error("VG %s stop failed: LVs must first be deactivated", vg->name);
goto out;
}
if (!ret) {
log_error("VG %s stop failed: %d", vg->name, result);
goto out;
}
if (!strcmp(vg->lock_type, "sanlock")) {
log_debug("lockd_stop_vg deactivate sanlock lv");
_deactivate_sanlock_lv(cmd, vg);
}
out:
daemon_reply_destroy(reply);
return ret;
}
int lockd_start_wait(struct cmd_context *cmd)
{
daemon_reply reply;
int result;
int ret;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
reply = _lockd_send("start_wait",
"pid = %d", getpid(),
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
if (!ret)
log_error("Lock start failed");
/*
* FIXME: get a list of vgs that started so we can
* better report what worked and what didn't?
*/
daemon_reply_destroy(reply);
return ret;
}
/*
* lockd_gl_create() is a variation of lockd_gl() used only by vgcreate.
* It handles the case that when using sanlock, the global lock does
* not exist until after the first vgcreate is complete, since the global
* lock exists on storage within an actual VG. So, the first vgcreate
* needs special logic to detect this bootstrap case.
*
* When the vgcreate is not creating the first VG, then lockd_gl_create()
* behaves the same as lockd_gl().
*
* vgcreate will have a lock_type for the new VG which lockd_gl_create()
* can provide in the lock-gl call.
*
* lockd_gl() and lockd_gl_create() differ in the specific cases where
* ENOLS (no lockspace found) is overriden. In the vgcreate case, the
* override cases are related to sanlock bootstrap, and the lock_type of
* the vg being created is needed.
*
* 1. vgcreate of the first lockd-type vg calls lockd_gl_create()
* to acquire the global lock.
*
* 2. vgcreate/lockd_gl_create passes gl lock request to lvmlockd,
* along with lock_type of the new vg.
*
* 3. lvmlockd finds no global lockspace/lock.
*
* 4. dlm:
* If the lock_type from vgcreate is dlm, lvmlockd creates the
* dlm global lockspace, and queues the global lock request
* for vgcreate. lockd_gl_create returns sucess with the gl held.
*
* sanlock:
* If the lock_type from vgcreate is sanlock, lvmlockd returns -ENOLS
* with the NO_GL_LS flag. lvmlockd cannot create or acquire a sanlock
* global lock until the VG exists on disk (the locks live within the VG).
*
* lockd_gl_create sees sanlock/ENOLS/NO_GL_LS (and optionally the
* "enable" lock-gl arg), determines that this is the sanlock
* bootstrap special case, and returns success without the global lock.
*
* vgcreate creates the VG on disk, and calls lockd_init_vg() which
* initializes/enables a global lock on the new VG's internal sanlock lv.
* Future lockd_gl/lockd_gl_create calls will acquire the existing gl.
*/
int lockd_gl_create(struct cmd_context *cmd, const char *def_mode, const char *vg_lock_type)
{
const char *mode = NULL;
uint32_t lockd_flags;
int retries = 0;
int result;
/*
* There are four variations of creating a local/lockd VG
* with/without use_lvmlockd set.
*
* use_lvmlockd=1, lockd VG:
* This function should acquire or create the global lock.
*
* use_lvmlockd=0, local VG:
* This function is a no-op, just returns 1.
*
* use_lvmlockd=0, lockd VG
* An error is returned in vgcreate_params_set_from_args (before this is called).
*
* use_lvmlockd=1, local VG
* This function should acquire the global lock.
*/
if (!_use_lvmlockd) {
if (!is_lockd_type(vg_lock_type))
return 1;
log_error("Cannot create VG with lock_type %s without lvmlockd.", vg_lock_type);
return 0;
}
log_debug("lockd global lock_type %s", vg_lock_type);
if (!mode)
mode = def_mode;
if (!mode) {
log_error("Unknown lock-gl mode");
return 0;
}
req:
if (!_lockd_request(cmd, "lock_gl",
NULL, vg_lock_type, NULL, NULL, NULL, NULL, mode, NULL,
&result, &lockd_flags)) {
/* No result from lvmlockd, it is probably not running. */
log_error("Global lock failed: check that lvmlockd is running.");
return 0;
}
if (result == -EAGAIN) {
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
log_warn("Retrying %s global lock", mode);
sleep(1);
retries++;
goto req;
}
}
/*
* ENOLS: no lockspace was found with a global lock.
* It may not exist (perhaps this command is creating the first),
* or it may not be visible or started on the system yet.
*/
if (result == -ENOLS) {
if (!strcmp(mode, "un"))
return 1;
/*
* This is the sanlock bootstrap condition for proceding
* without the global lock: a chicken/egg case for the first
* sanlock VG that is created. When creating the first
* sanlock VG, there is no global lock to acquire because
* the gl will exist in the VG being created. So, we
* skip acquiring the global lock when creating this initial
* VG, and enable the global lock in this VG.
*
* Here we assume that this is an initial bootstrap condition
* based on the fact that lvmlockd has seen no lockd VGs.
* (A commmand line option could be added to allow the user
* to make this initial bootstrap condition explicit.)
*
* That assumption might be wrong. It is possible that a global
* lock does exist in a VG that has not yet been seen. If that
* VG appears after this creates a new VG with a new enabled
* global lock, then there will be two VGs containing enabled
* global locks, and one will need to be disabled by the user.
*/
if ((lockd_flags & LD_RF_NO_GL_LS) &&
(lockd_flags & LD_RF_NO_LOCKSPACES) &&
!strcmp(vg_lock_type, "sanlock")) {
log_print_unless_silent("Enabling sanlock global lock");
lvmetad_validate_global_cache(cmd, 1);
return 1;
}
if (!strcmp(vg_lock_type, "sanlock"))
log_error("Global lock failed: check that VG holding global lock exists and is started.");
else
log_error("Global lock failed: check that global lockspace is started.");
return 0;
}
/*
* Check for each specific error that can be returned so a helpful
* message can be printed for it.
*/
if (result < 0) {
if (result == -ESTARTING)
log_error("Global lock failed: lockspace is starting.");
else if (result == -EAGAIN)
log_error("Global lock failed: held by other host.");
else if (result == -EPROTONOSUPPORT)
log_error("VG create failed: lock manager %s is not supported by lvmlockd.", vg_lock_type);
else
log_error("Global lock failed: error %d", result);
return 0;
}
lvmetad_validate_global_cache(cmd, 1);
return 1;
}
/*
* The global lock protects:
*
* - The global VG namespace. Two VGs cannot have the same name.
* Used by any command that creates or removes a VG name,
* e.g. vgcreate, vgremove, vgrename, vgsplit, vgmerge.
*
* - The set of orphan PVs.
* Used by any command that changes a non-PV device into an orphan PV,
* an orphan PV into a device, a non-orphan PV (in a VG) into an orphan PV
* (not in a VG), or an orphan PV into a non-orphan PV,
* e.g. pvcreate, pvremove, vgcreate, vgremove, vgextend, vgreduce.
*
* - The properties of orphan PVs. It is possible to make changes to the
* properties of an orphan PV, e.g. pvresize, pvchange.
*
* These are things that cannot be protected by a VG lock alone, since
* orphan PVs do not belong to a real VG (an artificial VG does not
* apply since a sanlock lock only exists on real storage.)
*
* If a command will change any of the things above, it must first acquire
* the global lock in exclusive mode.
*
* If command is reading any of the things above, it must acquire the global
* lock in shared mode. A number of commands read the things above, including:
*
* - Reporting/display commands which show all VGs. Any command that
* will iterate through the entire VG namespace must first acquire the
* global lock shared so that it has an accurate view of the namespace.
*
* - A command where a tag name is used to identify what to process.
* A tag requires reading all VGs to check if they match the tag.
*
* In these cases, the global lock must be acquired before the list of
* all VGs is created.
*
* The global lock is not generally unlocked explicitly in the code.
* When the command disconnects from lvmlockd, lvmlockd automatically
* releases the locks held by the command. The exception is if a command
* will continue running for a long time while not needing the global lock,
* e.g. commands that poll to report progress.
*
* Acquiring the global lock also updates the local lvmetad cache if
* necessary. lockd_gl() first acquires the lock via lvmlockd, then
* before returning to the caller, it checks that the global information
* (e.g. VG namespace, set of orphans) is up to date in lvmetad. If
* not, it scans disks and updates the lvmetad cache before returning
* to the caller. It does this checking using a version number associated
* with the global lock. The version number is incremented each time
* a change is made to the state associated with the global lock, and
* if the local version number is lower than the version number in the
* lock, then the local lvmetad state must be updated.
*
* There are two cases where the global lock can be taken in shared mode,
* and then later converted to ex. pvchange and pvresize use process_each_pv
* which does lockd_gl("sh") to get the list of VGs. Later, in the "_single"
* function called within process_each_pv, the PV may be an orphan, in which
* case the ex global lock is needed, so it's converted to ex at that point.
*
* Effects of misconfiguring use_lvmlockd.
*
* - Setting use_lvmlockd=1 tells lvm commands to use the global lock.
* This should not be set unless a lock manager and lockd VGs will
* be used. Setting use_lvmlockd=1 without setting up a lock manager
* or using lockd VGs will cause lvm commands to fail when they attempt
* to change any global state (requiring the ex global lock), and will
* cause warnings when the commands read global state (requiring the sh
* global lock). In this condition, lvm is nominally useful, and existing
* local VGs can continue to be used mostly as usual. But, the
* warnings/errors should lead a user to either set up a lock manager
* and lockd VGs, or set use_lvmlockd to 0.
*
* - Setting use_lvmlockd=0 tells lvm commands to not use the global lock.
* If use_lvmlockd=0 when lockd VGs exist which require lvmlockd, the
* lockd_gl() calls become no-ops, but the lockd_vg() calls for the lockd
* VGs will fail. The warnings/errors from accessing the lockd VGs
* should lead the user to set use_lvmlockd to 1 and run the necessary
* lock manager. In this condition, lvm reverts to the behavior of
* the following case, in which system ID largely protects shared
* devices, but has limitations.
*
* - Setting use_lvmlockd=0 with shared devices, no lockd VGs and
* no lock manager is a recognized mode of operation that is
* described in the lvmsystemid man page. Using lvm on shared
* devices this way is made safe by using system IDs to assign
* ownership of VGs to single hosts. The main limitation of this
* mode (among others outlined in the man page), is that orphan PVs
* are unprotected.
*/
int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
{
const char *mode = NULL;
const char *opts = NULL;
uint32_t lockd_flags;
int retries = 0;
int result;
if (!_use_lvmlockd)
return 1;
/*
* Verify that when --readonly is used, no ex locks should be used.
*/
if (cmd->metadata_read_only && def_mode && !strcmp(def_mode, "ex")) {
log_error("Exclusive locks are not allowed with readonly option.");
return 0;
}
if (cmd->lockd_gl_disable)
return 1;
if (def_mode && !strcmp(def_mode, "un")) {
mode = "un";
goto req;
}
if (!mode)
mode = def_mode;
if (!mode) {
log_error("Unknown lock-gl mode");
return 0;
}
req:
log_debug("lockd global mode %s", mode);
if (!_lockd_request(cmd, "lock_gl",
NULL, NULL, NULL, NULL, NULL, NULL, mode, opts,
&result, &lockd_flags)) {
/* No result from lvmlockd, it is probably not running. */
/* We don't care if an unlock fails. */
if (!strcmp(mode, "un"))
return 1;
/* We can continue reading if a shared lock fails. */
if (!strcmp(mode, "sh")) {
log_warn("Reading without shared global lock.");
lvmetad_validate_global_cache(cmd, 1);
return 1;
}
log_error("Global lock failed: check that lvmlockd is running.");
return 0;
}
if (result == -EAGAIN) {
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
log_warn("Retrying %s global lock", mode);
sleep(1);
retries++;
goto req;
}
}
/*
* ENOLS: no lockspace was found with a global lock.
* The VG with the global lock may not be visible or started yet,
* this should be a temporary condition.
*
* ESTARTING: the lockspace with the gl is starting.
* The VG with the global lock is starting and should finish shortly.
*/
if (result == -ENOLS || result == -ESTARTING) {
if (!strcmp(mode, "un"))
return 1;
/*
* If an ex global lock fails, then the command fails.
*/
if (strcmp(mode, "sh")) {
if (result == -ESTARTING)
log_error("Global lock failed: lockspace is starting.");
else if (result == -ENOLS)
log_error("Global lock failed: check that global lockspace is started.");
else
log_error("Global lock failed: error %d", result);
return 0;
}
/*
* If a sh global lock fails, then the command can continue
* reading without it, but force a global cache validation,
* and print a warning.
*/
if (result == -ESTARTING) {
log_warn("Skipping global lock: lockspace is starting");
lvmetad_validate_global_cache(cmd, 1);
return 1;
}
if ((lockd_flags & LD_RF_NO_GL_LS) || (lockd_flags & LD_RF_NO_LOCKSPACES)) {
log_warn("Skipping global lock: lockspace not found or started");
lvmetad_validate_global_cache(cmd, 1);
return 1;
}
/*
* This is for completeness. If we reach here, then
* a specific check for the error should be added above
* with a more helpful message.
*/
log_error("Global lock failed: error %d", result);
return 0;
}
if ((lockd_flags & LD_RF_DUP_GL_LS) && strcmp(mode, "un"))
log_warn("Duplicate sanlock global locks should be corrected");
if (result < 0) {
if (ignorelockingfailure()) {
log_debug("Ignore failed locking for global lock");
lvmetad_validate_global_cache(cmd, 1);
return 1;
} else if (result == -EAGAIN) {
/*
* Most of the time, retries should avoid this case.
*/
log_error("Global lock failed: held by other host.");
return 0;
} else {
/*
* We don't intend to reach this. We should check
* any known/possible error specifically and print
* a more helpful message. This is for completeness.
*/
log_error("Global lock failed: error %d.", result);
return 0;
}
}
if (!(flags & LDGL_SKIP_CACHE_VALIDATE))
lvmetad_validate_global_cache(cmd, 0);
return 1;
}
/*
* VG lock
*
* Return 1: continue, lockd_state may still indicate an error
* Return 0: failure, do not continue
*
* lvmlockd could also return the lock_type that it used for the VG,
* and we could encode that in lockd_state, and verify later that it
* matches vg->lock_type.
*
* The result of the VG lock operation needs to be saved in lockd_state
* because the result needs to be passed into vg_read so it can be
* assessed in combination with vg->lock_state.
*
* The VG lock protects the VG metadata on disk from concurrent access
* among hosts. The VG lock also ensures that the local lvmetad cache
* contains the latest version of the VG metadata from disk. (Since
* another host may have changed the VG since it was last read.)
*
* The VG lock must be acquired before the VG is read, i.e. before vg_read().
* The result from lockd_vg() is saved in the "lockd_state" variable, and
* this result is passed into vg_read(). After vg_read() reads the VG,
* it checks if the VG lock_type (sanlock or dlm) requires a lock to be
* held, and if so, it verifies that the lock was correctly acquired by
* looking at lockd_state. If vg_read() sees that the VG is a local VG,
* i.e. lock_type is not sanlock or dlm, then no lock is required, and it
* ignores lockd_state (which would indicate no lock was found.)
*
* When acquiring the VG lock, lvmlockd checks if the local cached copy
* of the VG metadata in lvmetad is up to date. If not, it invalidates
* the VG cached in lvmetad. This would happen if another host changed
* the VG since it was last read. When lvm commands read the VG from
* lvmetad, they will check if the metadata is invalid, and if so they
* will reread it from disk, and update the copy in lvmetad.
*/
int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
uint32_t flags, uint32_t *lockd_state)
{
const char *mode = NULL;
uint32_t lockd_flags;
uint32_t prev_state = *lockd_state;
int retries = 0;
int result;
int ret;
*lockd_state = 0;
if (!is_real_vg(vg_name))
return 1;
/*
* Verify that when --readonly is used, no ex locks should be used.
*/
if (cmd->metadata_read_only &&
((def_mode && !strcmp(def_mode, "ex")) ||
(!def_mode && !cmd->lockd_vg_default_sh))) {
log_error("Exclusive locks are not allowed with readonly option.");
return 0;
}
/*
* Some special cases need to disable the vg lock.
*/
if (cmd->lockd_vg_disable)
return 1;
/*
* An unlock is simply sent or skipped without any need
* for the mode checking for sh/ex.
*
* Look at lockd_state from the sh/ex lock, and if it failed,
* don't bother sending the unlock to lvmlockd. The main
* purpose of this is to avoid sending an unnecessary unlock
* for local VGs (the lockd_state from sh/ex on the local VG
* will be failed.) This implies that the lockd_state value
* should be preserved from the sh/ex lockd_vg() call and
* passed back to lockd_vg() for the corresponding unlock.
*/
if (def_mode && !strcmp(def_mode, "un")) {
if (prev_state & LDST_FAIL) {
log_debug("VG %s unlock skipped: lockd_state is failed", vg_name);
return 1;
}
mode = "un";
goto req;
}
/*
* The default mode may not have been provided in the
* function args. This happens when lockd_vg is called
* from a process_each function that handles different
* commands. Commands that only read/check/report/display
* the vg have LOCKD_VG_SH set in commands.h, which is
* copied to lockd_vg_default_sh. Commands without this
* set modify the vg and need ex.
*/
if (!mode)
mode = def_mode;
if (!mode)
mode = cmd->lockd_vg_default_sh ? "sh" : "ex";
if (!strcmp(mode, "ex"))
*lockd_state |= LDST_EX;
req:
/*
* This check is not at the top of the function so that
* we can first set LDST_EX which will be used later to
* decide whether a failure can be ignored or not.
*
* We do not know if this is a local VG or lockd VG yet,
* so we must return success, go ahead and read the VG,
* then check if the lock_type required lvmlockd or not.
*/
if (!_use_lvmlockd) {
*lockd_state |= LDST_FAIL_REQUEST;
return 1;
}
log_debug("lockd VG %s mode %s", vg_name, mode);
if (!_lockd_request(cmd, "lock_vg",
vg_name, NULL, NULL, NULL, NULL, NULL, mode, NULL,
&result, &lockd_flags)) {
/*
* No result from lvmlockd, it is probably not running.
* Decide if it is ok to continue without a lock in
* access_vg_lock_type() after the VG has been read and
* the lock_type can be checked. We don't care about
* this error for local VGs, but we do care for lockd VGs.
*/
*lockd_state |= LDST_FAIL_REQUEST;
return 1;
}
if (result == -EAGAIN) {
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
log_warn("Retrying %s lock on VG %s", mode, vg_name);
sleep(1);
retries++;
goto req;
}
}
switch (result) {
case 0:
/* success */
break;
case -ENOLS:
*lockd_state |= LDST_FAIL_NOLS;
break;
case -ESTARTING:
*lockd_state |= LDST_FAIL_STARTING;
break;
default:
*lockd_state |= LDST_FAIL_OTHER;
}
/*
* Normal success.
*/
if (!result) {
ret = 1;
goto out;
}
/*
* The lockspace for the VG is starting (the VG must not
* be local), and is not yet ready to do locking. Allow
* reading without a sh lock during this period.
*/
if (result == -ESTARTING) {
if (!strcmp(mode, "un")) {
ret = 1;
goto out;
} else if (!strcmp(mode, "sh")) {
log_warn("VG %s lock skipped: lock start in progress", vg_name);
ret = 1;
goto out;
} else {
log_error("VG %s lock failed: lock start in progress", vg_name);
ret = 0;
goto out;
}
}
/*
* An unused/previous lockspace for the VG was found.
* This means it must be a lockd VG, not local. The
* lockspace needs to be started to be used.
*/
if ((result == -ENOLS) && (lockd_flags & LD_RF_INACTIVE_LS)) {
if (!strcmp(mode, "un")) {
ret = 1;
goto out;
} else if (!strcmp(mode, "sh")) {
log_warn("VG %s lock skipped: lockspace is inactive", vg_name);
ret = 1;
goto out;
} else {
log_error("VG %s lock failed: lockspace is inactive", vg_name);
ret = 0;
goto out;
}
}
/*
* An unused lockspace for the VG was found. The previous
* start of the lockspace failed, so we can print a more useful
* error message.
*/
if ((result == -ENOLS) && (lockd_flags & LD_RF_ADD_LS_ERROR)) {
if (!strcmp(mode, "un")) {
ret = 1;
goto out;
} else if (!strcmp(mode, "sh")) {
log_warn("VG %s lock skipped: lockspace start error", vg_name);
ret = 1;
goto out;
} else {
log_error("VG %s lock failed: lockspace start error", vg_name);
ret = 0;
goto out;
}
}
/*
* No lockspace for the VG was found. It may be a local
* VG that lvmlockd doesn't keep track of, or it may be
* a lockd VG that lvmlockd doesn't yet know about (it hasn't
* been started yet.) Decide what to do after the VG is
* read and we can see the lock_type.
*/
if (result == -ENOLS) {
ret = 1;
goto out;
}
/*
* Another error. We don't intend to reach here, but
* want to check for each specific error above so that
* a helpful message can be printed.
*/
if (result) {
if (!strcmp(mode, "un")) {
ret = 1;
goto out;
} else if (!strcmp(mode, "sh")) {
log_warn("VG %s lock skipped: error %d", vg_name, result);
ret = 1;
goto out;
} else {
log_error("VG %s lock failed: error %d", vg_name, result);
ret = 0;
goto out;
}
}
out:
/*
* A notice from lvmlockd that duplicate gl locks have been found.
* It would be good for the user to disable one of them.
*/
if ((lockd_flags & LD_RF_DUP_GL_LS) && strcmp(mode, "un"))
log_warn("Duplicate sanlock global lock in VG %s", vg_name);
if (!ret && ignorelockingfailure()) {
log_debug("Ignore failed locking for VG %s", vg_name);
return 1;
}
return ret;
}
/*
* This must be called before a new version of the VG metadata is
* written to disk. For local VGs, this is a no-op, but for lockd
* VGs, this notifies lvmlockd of the new VG seqno. lvmlockd must
* know the latest VG seqno so that it can save it within the lock's
* LVB. The VG seqno in the VG lock's LVB is used by other hosts to
* detect when their cached copy of the VG metadata is stale, i.e.
* the cached VG metadata has a lower seqno than the seqno seen in
* the VG lock.
*/
int lockd_vg_update(struct volume_group *vg)
{
daemon_reply reply;
int result;
int ret;
if (!is_lockd_type(vg->lock_type))
return 1;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
reply = _lockd_send("vg_update",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"version = %d", (int64_t)vg->seqno,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
daemon_reply_destroy(reply);
return ret;
}
/*
* When this is called directly (as opposed to being called from
* lockd_lv), the caller knows that the LV has a lock.
*/
int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
const char *lv_name, struct id *lv_id,
const char *lock_args, const char *def_mode, uint32_t flags)
{
char lv_uuid[64] __attribute__((aligned(8)));
const char *mode = NULL;
const char *opts = NULL;
uint32_t lockd_flags;
int refreshed = 0;
int result;
/*
* Verify that when --readonly is used, no LVs should be activated or used.
*/
if (cmd->metadata_read_only) {
log_error("LV locks are not allowed with readonly option.");
return 0;
}
if (cmd->lockd_lv_disable)
return 1;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
if (!id_write_format(lv_id, lv_uuid, sizeof(lv_uuid)))
return_0;
/*
* For lvchange/vgchange activation, def_mode is "sh" or "ex"
* according to the specific -a{e,s}y mode designation.
* No e,s designation gives NULL def_mode.
*/
if (def_mode)
mode = def_mode;
if (mode && !strcmp(mode, "sh") && (flags & LDLV_MODE_NO_SH)) {
log_error("Shared activation not compatible with LV type: %s/%s",
vg->name, lv_name);
return 0;
}
if (!mode)
mode = "ex";
if (flags & LDLV_PERSISTENT)
opts = "persistent";
retry:
log_debug("lockd LV %s/%s mode %s uuid %s", vg->name, lv_name, mode, lv_uuid);
if (!_lockd_request(cmd, "lock_lv",
vg->name, vg->lock_type, vg->lock_args,
lv_name, lv_uuid, lock_args, mode, opts,
&result, &lockd_flags)) {
/* No result from lvmlockd, it is probably not running. */
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
return 0;
}
/* The lv was not active/locked. */
if (result == -ENOENT && !strcmp(mode, "un"))
return 1;
if (result == -EALREADY)
return 1;
if (result == -EAGAIN) {
log_error("LV locked by other host: %s/%s", vg->name, lv_name);
return 0;
}
if (result == -EMSGSIZE) {
/* Another host probably extended lvmlock. */
if (!refreshed++) {
log_debug("Refresh lvmlock");
_refresh_sanlock_lv(cmd, vg);
goto retry;
}
}
if (result == -ENOLS) {
log_error("LV %s/%s lock failed: lockspace is inactive", vg->name, lv_name);
return 0;
}
if (result < 0) {
log_error("LV %s/%s lock failed: error %d", vg->name, lv_name, result);
return 0;
}
return 1;
}
/*
* Direct the lock request to the pool LV.
* For a thin pool and all its thin volumes, one ex lock is used.
* It is the one specified in metadata of the pool data lv.
*/
static int _lockd_lv_thin(struct cmd_context *cmd, struct logical_volume *lv,
const char *def_mode, uint32_t flags)
{
struct logical_volume *pool_lv;
if (lv_is_thin_volume(lv)) {
struct lv_segment *pool_seg = first_seg(lv);
pool_lv = pool_seg ? pool_seg->pool_lv : NULL;
} else if (lv_is_thin_pool(lv)) {
pool_lv = lv;
} else {
/* This should not happen AFAIK. */
log_error("Lock on incorrect thin lv type %s/%s",
lv->vg->name, lv->name);
return 0;
}
if (!pool_lv) {
/* This should not happen. */
log_error("Cannot find thin pool for %s/%s",
lv->vg->name, lv->name);
return 0;
}
/*
* Locking a locked lv (pool in this case) is a no-op.
* Unlock when the pool is no longer active.
*/
if (def_mode && !strcmp(def_mode, "un") && pool_is_active(pool_lv))
return 1;
flags |= LDLV_MODE_NO_SH;
return lockd_lv_name(cmd, pool_lv->vg, pool_lv->name, &pool_lv->lvid.id[1],
pool_lv->lock_args, def_mode, flags);
}
/*
* If the VG has no lock_type, then this function can return immediately.
* The LV itself may have no lock (NULL lv->lock_args), but the lock request
* may be directed to another lock, e.g. the pool LV lock in _lockd_lv_thin.
* If the lock request is not directed to another LV, and the LV has no
* lock_type set, it means that the LV has no lock, and no locking is done
* for it.
*
* An LV lock is acquired before the LV is activated, and released
* after the LV is deactivated. If the LV lock cannot be acquired,
* it means that the LV is active on another host and the activation
* fails. Commands that modify an inactive LV also acquire the LV lock.
*
* In non-lockd VGs, this is a no-op.
*
* In lockd VGs, normal LVs each have their own lock, but other
* LVs do not have their own lock, e.g. the lock for a thin LV is
* acquired on the thin pool LV, and a thin LV does not have a lock
* of its own. A cache pool LV does not have a lock of its own.
* When the cache pool LV is linked to an origin LV, the lock of
* the orgin LV protects the combined origin + cache pool.
*/
int lockd_lv(struct cmd_context *cmd, struct logical_volume *lv,
const char *def_mode, uint32_t flags)
{
if (!is_lockd_type(lv->vg->lock_type))
return 1;
if (!_use_lvmlockd) {
log_error("LV in VG %s with lock_type %s requires lvmlockd.",
lv->vg->name, lv->vg->lock_type);
return 0;
}
if (!_lvmlockd_connected)
return 0;
if (lv_is_thin_type(lv))
return _lockd_lv_thin(cmd, lv, def_mode, flags);
/*
* An LV with NULL lock_args does not have a lock of its own.
*/
if (!lv->lock_args)
return 1;
/*
* LV type cannot be active concurrently on multiple hosts,
* so shared mode activation is not allowed.
*/
if (lv_is_external_origin(lv) ||
lv_is_thin_type(lv) ||
lv_is_mirror_type(lv) ||
lv_is_raid_type(lv) ||
lv_is_cache_type(lv)) {
flags |= LDLV_MODE_NO_SH;
}
return lockd_lv_name(cmd, lv->vg, lv->name, &lv->lvid.id[1],
lv->lock_args, def_mode, flags);
}
static int _init_lv_sanlock(struct cmd_context *cmd, struct volume_group *vg,
const char *lv_name, struct id *lv_id,
const char **lock_args_ret)
{
char lv_uuid[64] __attribute__((aligned(8)));
daemon_reply reply;
const char *reply_str;
const char *lv_lock_args = NULL;
int result;
int ret;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
if (!id_write_format(lv_id, lv_uuid, sizeof(lv_uuid)))
return_0;
reply = _lockd_send("init_lv",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"lv_name = %s", lv_name,
"lv_uuid = %s", lv_uuid,
"vg_lock_type = %s", "sanlock",
"vg_lock_args = %s", vg->lock_args,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
if (result == -EEXIST) {
log_error("Lock already exists for LV %s/%s", vg->name, lv_name);
goto out;
}
if (result == -EMSGSIZE) {
/*
* No space on the lvmlock lv for a new lease, this should be
* detected by handle_sanlock_lv() called before.
*/
log_error("No sanlock space for lock for LV %s/%s", vg->name, lv_name);
goto out;
}
if (!ret) {
log_error("_init_lv_sanlock lvmlockd result %d", result);
goto out;
}
if (!(reply_str = daemon_reply_str(reply, "lv_lock_args", NULL))) {
log_error("lv_lock_args not returned");
ret = 0;
goto out;
}
if (!(lv_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
log_error("lv_lock_args allocation failed");
ret = 0;
}
out:
daemon_reply_destroy(reply);
*lock_args_ret = lv_lock_args;
return ret;
}
static int _free_lv(struct cmd_context *cmd, struct volume_group *vg,
const char *lv_name, struct id *lv_id, const char *lock_args)
{
char lv_uuid[64] __attribute__((aligned(8)));
daemon_reply reply;
int result;
int ret;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
if (!id_write_format(lv_id, lv_uuid, sizeof(lv_uuid)))
return_0;
reply = _lockd_send("free_lv",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"lv_name = %s", lv_name,
"lv_uuid = %s", lv_uuid,
"vg_lock_type = %s", vg->lock_type,
"vg_lock_args = %s", vg->lock_args,
"lv_lock_args = %s", lock_args ?: "none",
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
if (!ret)
log_error("_free_lv lvmlockd result %d", result);
daemon_reply_destroy(reply);
return ret;
}
int lockd_init_lv_args(struct cmd_context *cmd, struct volume_group *vg,
struct logical_volume *lv,
const char *lock_type, const char **lock_args)
{
/* sanlock is the only lock type that sets per-LV lock_args. */
if (!strcmp(lock_type, "sanlock"))
return _init_lv_sanlock(cmd, vg, lv->name, &lv->lvid.id[1], lock_args);
return 1;
}
/*
* lvcreate
*
* An LV created in a lockd VG inherits the lock_type of the VG. In some
* cases, e.g. thin LVs, this function may decide that the LV should not be
* given a lock, in which case it sets lp lock_args to NULL, which will cause
* the LV to not have lock_args set in its metadata. A lockd_lv() request on
* an LV with no lock_args will do nothing (unless the LV type causes the lock
* request to be directed to another LV with a lock, e.g. to the thin pool LV
* for thin LVs.)
*
* Current limitations:
* - cache-type LV's in a lockd VG must be created with lvconvert.
* - creating a thin pool and thin lv in one command is not allowed.
*/
int lockd_init_lv(struct cmd_context *cmd, struct volume_group *vg, struct logical_volume *lv,
struct lvcreate_params *lp)
{
int lock_type_num = get_lock_type_from_string(vg->lock_type);
switch (lock_type_num) {
case LOCK_TYPE_NONE:
case LOCK_TYPE_CLVM:
return 1;
case LOCK_TYPE_SANLOCK:
case LOCK_TYPE_DLM:
break;
default:
log_error("lockd_init_lv: unknown lock_type.");
return 0;
}
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
if (!lp->needs_lockd_init) {
/* needs_lock_init is set for LVs that need a lockd lock. */
return 1;
} else if (seg_is_cache(lp) || seg_is_cache_pool(lp)) {
log_error("Use lvconvert for cache with lock type %s", vg->lock_type);
return 0;
} else if (!seg_is_thin_volume(lp) && lp->snapshot) {
struct logical_volume *origin_lv;
/*
* COW snapshots are associated with their origin LV,
* and only the origin LV needs its own lock, which
* represents itself and all associated cow snapshots.
*/
if (!(origin_lv = find_lv(vg, lp->origin_name))) {
log_error("Failed to find origin LV %s/%s", vg->name, lp->origin_name);
return 0;
}
if (!lockd_lv(cmd, origin_lv, "ex", LDLV_PERSISTENT)) {
log_error("Failed to lock origin LV %s/%s", vg->name, lp->origin_name);
return 0;
}
lv->lock_args = NULL;
return 1;
} else if (seg_is_thin(lp)) {
if ((seg_is_thin_volume(lp) && !lp->create_pool) ||
(!seg_is_thin_volume(lp) && lp->snapshot)) {
struct lv_list *lvl;
/*
* Creating a new thin lv or snapshot. These lvs do not get
* their own lock but use the pool lock. If an lv does not
* use its own lock, its lock_args is set to NULL.
*/
if (!(lvl = find_lv_in_vg(vg, lp->pool_name))) {
log_error("Failed to find thin pool %s/%s", vg->name, lp->pool_name);
return 0;
}
if (!lockd_lv(cmd, lvl->lv, "ex", LDLV_PERSISTENT)) {
log_error("Failed to lock thin pool %s/%s", vg->name, lp->pool_name);
return 0;
}
lv->lock_args = NULL;
return 1;
} else if (seg_is_thin_volume(lp) && lp->create_pool) {
/*
* Creating a thin pool and a thin lv in it. We could
* probably make this work.
*/
log_error("Create thin pool and thin LV separately with lock type %s",
vg->lock_type);
return 0;
} else if (!seg_is_thin_volume(lp) && lp->create_pool) {
/* Creating a thin pool only. */
/* lv_name_lock = lp->pool_name; */
} else {
log_error("Unknown thin options for lock init.");
return 0;
}
} else {
/* Creating a normal lv. */
/* lv_name_lock = lv_name; */
}
/*
* The LV gets its own lock, so set lock_args to non-NULL.
*
* lockd_init_lv_args() will be called during vg_write()
* to complete the sanlock LV lock initialization, where
* actual space on disk is allocated. Waiting to do this
* last step until vg_write() avoids the need to revert
* the sanlock allocation if the lvcreate function isn't
* completed.
*
* This works, but would leave the sanlock lease allocated
* unless the lease was freed on each early exit path from
* lvcreate:
*
* return lockd_init_lv_args(cmd, vg, lv_name_lock, lv_id,
* vg->lock_type, &lv->lock_args);
*/
if (!strcmp(vg->lock_type, "sanlock"))
lv->lock_args = "pending";
else if (!strcmp(vg->lock_type, "dlm"))
lv->lock_args = "dlm";
return 1;
}
/* lvremove */
int lockd_free_lv(struct cmd_context *cmd, struct volume_group *vg,
const char *lv_name, struct id *lv_id, const char *lock_args)
{
switch (get_lock_type_from_string(vg->lock_type)) {
case LOCK_TYPE_NONE:
case LOCK_TYPE_CLVM:
return 1;
case LOCK_TYPE_DLM:
case LOCK_TYPE_SANLOCK:
if (!lock_args)
return 1;
return _free_lv(cmd, vg, lv_name, lv_id, lock_args);
default:
log_error("lockd_free_lv: unknown lock_type.");
return 0;
}
}
int lockd_rename_vg_before(struct cmd_context *cmd, struct volume_group *vg)
{
struct lv_list *lvl;
daemon_reply reply;
int result;
int ret;
if (!is_lockd_type(vg->lock_type))
return 1;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
if (lvs_in_vg_activated(vg)) {
log_error("LVs must be inactive before vgrename.");
return 0;
}
/* Check that no LVs are active on other hosts. */
dm_list_iterate_items(lvl, &vg->lvs) {
if (!lockd_lv(cmd, lvl->lv, "ex", 0)) {
log_error("LV %s/%s must be inactive on all hosts before vgrename.",
vg->name, lvl->lv->name);
return 0;
}
if (!lockd_lv(cmd, lvl->lv, "un", 0)) {
log_error("Failed to unlock LV %s/%s.", vg->name, lvl->lv->name);
return 0;
}
}
/*
* lvmlockd:
* checks for other hosts in lockspace
* leaves the lockspace
*/
reply = _lockd_send("rename_vg_before",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"vg_lock_type = %s", vg->lock_type,
"vg_lock_args = %s", vg->lock_args,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
daemon_reply_destroy(reply);
if (!ret) {
log_error("lockd_rename_vg_before lvmlockd result %d", result);
return 0;
}
if (!strcmp(vg->lock_type, "sanlock")) {
log_debug("lockd_rename_vg_before deactivate sanlock lv");
_deactivate_sanlock_lv(cmd, vg);
}
return 1;
}
int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int success)
{
daemon_reply reply;
int result;
int ret;
if (!is_lockd_type(vg->lock_type))
return 1;
if (!_use_lvmlockd)
return 0;
if (!_lvmlockd_connected)
return 0;
if (!success) {
/*
* Depending on the problem that caused the rename to
* fail, it may make sense to not restart the VG here.
*/
if (!lockd_start_vg(cmd, vg))
log_error("Failed to restart VG %s lockspace.", vg->name);
return 1;
}
if (!strcmp(vg->lock_type, "sanlock")) {
if (!_activate_sanlock_lv(cmd, vg))
return 0;
/*
* lvmlockd needs to rewrite the leases on disk
* with the new VG (lockspace) name.
*/
reply = _lockd_send("rename_vg_final",
"pid = %d", getpid(),
"vg_name = %s", vg->name,
"vg_lock_type = %s", vg->lock_type,
"vg_lock_args = %s", vg->lock_args,
NULL);
if (!_lockd_result(reply, &result, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
daemon_reply_destroy(reply);
if (!ret) {
/*
* The VG has been renamed on disk, but renaming the
* sanlock leases failed. Cleaning this up can
* probably be done by converting the VG to lock_type
* none, then converting back to sanlock.
*/
log_error("lockd_rename_vg_final lvmlockd result %d", result);
return 0;
}
}
if (!lockd_start_vg(cmd, vg))
log_error("Failed to start VG %s lockspace.", vg->name);
return 1;
}
const char *lockd_running_lock_type(struct cmd_context *cmd)
{
daemon_reply reply;
const char *lock_type = NULL;
int result;
if (!_use_lvmlockd)
return NULL;
if (!_lvmlockd_connected)
return NULL;
reply = _lockd_send("running_lm",
"pid = %d", getpid(),
NULL);
if (!_lockd_result(reply, &result, NULL)) {
log_error("Failed to get result from lvmlockd");
goto out;
}
switch (result) {
case -EXFULL:
log_error("lvmlockd found multiple lock managers, use --lock-type to select one.");
break;
case -ENOLCK:
log_error("lvmlockd found no lock manager running.");
break;
case LOCK_TYPE_SANLOCK:
log_debug("lvmlockd found sanlock");
lock_type = "sanlock";
break;
case LOCK_TYPE_DLM:
log_debug("lvmlockd found dlm");
lock_type = "dlm";
break;
default:
log_error("Failed to find a running lock manager.");
break;
}
out:
daemon_reply_destroy(reply);
return lock_type;
}
/* Some LV types have no lock. */
int lockd_lv_uses_lock(struct logical_volume *lv)
{
if (lv_is_thin_volume(lv))
return 0;
if (lv_is_thin_pool_data(lv))
return 0;
if (lv_is_thin_pool_metadata(lv))
return 0;
if (lv_is_pool_metadata_spare(lv))
return 0;
if (lv_is_cache_pool(lv))
return 0;
if (lv_is_cache_pool_data(lv))
return 0;
if (lv_is_cache_pool_metadata(lv))
return 0;
if (lv_is_cow(lv))
return 0;
if (lv->status & SNAPSHOT)
return 0;
/* FIXME: lv_is_virtual_origin ? */
if (lv_is_lockd_sanlock_lv(lv))
return 0;
if (lv_is_mirror_image(lv))
return 0;
if (lv_is_mirror_log(lv))
return 0;
if (lv_is_raid_image(lv))
return 0;
if (lv_is_raid_metadata(lv))
return 0;
if (!lv_is_visible(lv))
return 0;
return 1;
}