mirror of
git://sourceware.org/git/lvm2.git
synced 2025-07-29 19:41:56 +03:00
lvmlockd: use new sanlock_acquire2 to return owner info
Use the new sanlock_acquire2() which returns info about the owner of a lease. Pass this info back to the lvm command, where it's initially used to print the host_id of a host holding a lock when it cannot be acquired.
This commit is contained in:
@ -57,5 +57,6 @@ static inline void lvmlockd_close(daemon_handle h)
|
||||
#define EORPHAN 222
|
||||
#define EADOPT_NONE 223
|
||||
#define EADOPT_RETRY 224
|
||||
#define EIOTIMEOUT 225
|
||||
|
||||
#endif /* _LVM_LVMLOCKD_CLIENT_H */
|
||||
|
@ -1108,14 +1108,15 @@ static int lm_add_resource(struct lockspace *ls, struct resource *r)
|
||||
}
|
||||
|
||||
static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
|
||||
struct val_blk *vb_out, int *retry, int adopt_only, int adopt_ok)
|
||||
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||
int adopt_only, int adopt_ok)
|
||||
{
|
||||
int rv = -1;
|
||||
|
||||
if (ls->lm_type == LD_LM_DLM)
|
||||
rv = lm_lock_dlm(ls, r, mode, vb_out, adopt_only, adopt_ok);
|
||||
else if (ls->lm_type == LD_LM_SANLOCK)
|
||||
rv = lm_lock_sanlock(ls, r, mode, vb_out, retry, adopt_only, adopt_ok);
|
||||
rv = lm_lock_sanlock(ls, r, mode, vb_out, retry, owner, adopt_only, adopt_ok);
|
||||
else if (ls->lm_type == LD_LM_IDM)
|
||||
rv = lm_lock_idm(ls, r, mode, vb_out, act->lv_uuid,
|
||||
&act->pvs, adopt_only, adopt_ok);
|
||||
@ -1264,7 +1265,7 @@ static void add_work_action(struct action *act)
|
||||
pthread_mutex_unlock(&worker_mutex);
|
||||
}
|
||||
|
||||
static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry)
|
||||
static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry, struct owner *owner)
|
||||
{
|
||||
struct lock *lk;
|
||||
struct val_blk vb;
|
||||
@ -1289,7 +1290,7 @@ static int res_lock(struct lockspace *ls, struct resource *r, struct action *act
|
||||
if (r->type == LD_RT_LV && act->lv_args[0])
|
||||
memcpy(r->lv_args, act->lv_args, MAX_ARGS);
|
||||
|
||||
rv = lm_lock(ls, r, act->mode, act, &vb, retry,
|
||||
rv = lm_lock(ls, r, act->mode, act, &vb, retry, owner,
|
||||
act->flags & LD_AF_ADOPT_ONLY ? 1 : 0,
|
||||
act->flags & LD_AF_ADOPT ? 1 : 0);
|
||||
|
||||
@ -1901,6 +1902,7 @@ out:
|
||||
static void res_process(struct lockspace *ls, struct resource *r,
|
||||
struct list_head *act_close_list, int *retry_out)
|
||||
{
|
||||
struct owner owner = { 0 };
|
||||
struct action *act, *safe, *act_close;
|
||||
struct lock *lk;
|
||||
uint32_t unlock_by_client_id = 0;
|
||||
@ -2189,8 +2191,15 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
||||
|
||||
if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
|
||||
lm_retry = 0;
|
||||
memset(&owner, 0, sizeof(owner));
|
||||
|
||||
rv = res_lock(ls, r, act, &lm_retry, &owner);
|
||||
|
||||
/* TODO: if lock fails because it's owned by a failed host,
|
||||
and persistent reservations are enabled, then remove the
|
||||
pr of failed host_id, tell sanlock the host_id is now
|
||||
dead, and retry lock request. */
|
||||
|
||||
rv = res_lock(ls, r, act, &lm_retry);
|
||||
if ((rv == -EAGAIN) &&
|
||||
(act->retries <= act->max_retries) &&
|
||||
(lm_retry || (r->type != LD_RT_LV))) {
|
||||
@ -2199,6 +2208,8 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
||||
act->retries++;
|
||||
*retry_out = 1;
|
||||
} else {
|
||||
if (rv == -EAGAIN)
|
||||
memcpy(&act->owner, &owner, sizeof(owner));
|
||||
act->result = rv;
|
||||
list_del(&act->list);
|
||||
add_client_result(act);
|
||||
@ -2222,8 +2233,10 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
||||
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
||||
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
|
||||
lm_retry = 0;
|
||||
memset(&owner, 0, sizeof(owner));
|
||||
|
||||
rv = res_lock(ls, r, act, &lm_retry, &owner);
|
||||
|
||||
rv = res_lock(ls, r, act, &lm_retry);
|
||||
if ((rv == -EAGAIN) &&
|
||||
(act->retries <= act->max_retries) &&
|
||||
(lm_retry || (r->type != LD_RT_LV))) {
|
||||
@ -2232,6 +2245,8 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
||||
act->retries++;
|
||||
*retry_out = 1;
|
||||
} else {
|
||||
if (rv == -EAGAIN)
|
||||
memcpy(&act->owner, &owner, sizeof(owner));
|
||||
act->result = rv;
|
||||
list_del(&act->list);
|
||||
add_client_result(act);
|
||||
@ -4235,6 +4250,31 @@ static int client_send_result(struct client *cl, struct action *act)
|
||||
"result = " FMTd64, (int64_t) act->result,
|
||||
"dump_len = " FMTd64, (int64_t) dump_len,
|
||||
NULL);
|
||||
} else if (act->op == LD_OP_LOCK && act->owner.host_id) {
|
||||
|
||||
/*
|
||||
* lock reply with owner info
|
||||
*/
|
||||
|
||||
log_debug("send %s[%d][%u] %s%s%s result %d owner %u %u %u %s %s",
|
||||
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
|
||||
op_mode_str(act->op, act->mode), act->rt ? "_" : "", rt_str(act->rt), act->result,
|
||||
act->owner.host_id, act->owner.generation, act->owner.timestamp,
|
||||
act->owner.state[0] ? act->owner.state : "",
|
||||
act->owner.name[0] ? act->owner.name : "");
|
||||
|
||||
res = daemon_reply_simple("OK",
|
||||
"op = " FMTd64, (int64_t) act->op,
|
||||
"lock_type = %s", lm_str(act->lm_type),
|
||||
"op_result = " FMTd64, (int64_t) act->result,
|
||||
"lm_result = " FMTd64, (int64_t) act->lm_rv,
|
||||
"owner_host_id = " FMTd64, (int64_t) act->owner.host_id,
|
||||
"owner_generation = " FMTd64, (int64_t) act->owner.generation,
|
||||
"owner_timestamp = " FMTd64, (int64_t) act->owner.timestamp,
|
||||
"owner_state = %s", act->owner.state[0] ? act->owner.state : "none",
|
||||
"owner_name = %s", act->owner.name[0] ? act->owner.name : "none",
|
||||
"result_flags = %s", result_flags[0] ? result_flags : "none",
|
||||
NULL);
|
||||
} else {
|
||||
/*
|
||||
* A normal reply.
|
||||
|
@ -130,6 +130,17 @@ struct pvs {
|
||||
int num;
|
||||
};
|
||||
|
||||
#define OWNER_NAME_SIZE 64
|
||||
#define OWNER_STATE_SIZE 32
|
||||
|
||||
struct owner {
|
||||
uint32_t host_id;
|
||||
uint32_t generation;
|
||||
uint32_t timestamp;
|
||||
char state[OWNER_STATE_SIZE];
|
||||
char name[OWNER_NAME_SIZE];
|
||||
};
|
||||
|
||||
struct action {
|
||||
struct list_head list;
|
||||
uint32_t client_id;
|
||||
@ -154,6 +165,7 @@ struct action {
|
||||
char vg_args[MAX_ARGS+1];
|
||||
char lv_args[MAX_ARGS+1];
|
||||
char prev_lv_args[MAX_ARGS+1];
|
||||
struct owner owner;
|
||||
struct pvs pvs; /* PV list for idm */
|
||||
};
|
||||
|
||||
@ -553,7 +565,7 @@ int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt_only, int adopt_ok,
|
||||
int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg);
|
||||
int lm_add_resource_sanlock(struct lockspace *ls, struct resource *r);
|
||||
int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||
struct val_blk *vb_out, int *retry,
|
||||
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||
int adopt_only, int adopt_ok);
|
||||
int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
|
||||
int ld_mode, uint32_t r_version);
|
||||
@ -617,7 +629,7 @@ static inline int lm_add_resource_sanlock(struct lockspace *ls, struct resource
|
||||
}
|
||||
|
||||
static inline int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||
struct val_blk *vb_out, int *retry,
|
||||
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||
int adopt_only, int adopt_ok)
|
||||
{
|
||||
return -1;
|
||||
|
@ -27,6 +27,8 @@
|
||||
/* FIXME: copied from sanlock header until the sanlock update is more widespread */
|
||||
#define SANLK_ADD_NODELAY 0x00000002
|
||||
|
||||
#define SANLOCK_HAS_ACQUIRE2 1
|
||||
|
||||
#include <stddef.h>
|
||||
#include <poll.h>
|
||||
#include <errno.h>
|
||||
@ -1797,13 +1799,33 @@ int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *_host_flags_to_str(uint32_t flags)
|
||||
{
|
||||
int val = flags & SANLK_HOST_MASK;
|
||||
|
||||
if (val == SANLK_HOST_FREE)
|
||||
return "FREE";
|
||||
if (val == SANLK_HOST_LIVE)
|
||||
return "LIVE";
|
||||
if (val == SANLK_HOST_FAIL)
|
||||
return "FAIL";
|
||||
if (val == SANLK_HOST_DEAD)
|
||||
return "DEAD";
|
||||
if (val == SANLK_HOST_UNKNOWN)
|
||||
return "UNKNOWN";
|
||||
return "ERROR";
|
||||
}
|
||||
|
||||
int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||
struct val_blk *vb_out, int *retry, int adopt_only, int adopt_ok)
|
||||
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||
int adopt_only, int adopt_ok)
|
||||
{
|
||||
struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
|
||||
struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
|
||||
struct sanlk_resource *rs;
|
||||
struct sanlk_options opt;
|
||||
struct sanlk_host owner_host = { 0 };
|
||||
char *owner_name = NULL;
|
||||
uint64_t lock_lv_offset;
|
||||
uint32_t flags = 0;
|
||||
struct val_blk vb = { 0 };
|
||||
@ -1907,7 +1929,11 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||
memset(&opt, 0, sizeof(opt));
|
||||
sprintf(opt.owner_name, "%s", "lvmlockd");
|
||||
|
||||
#ifdef SANLOCK_HAS_ACQUIRE2
|
||||
rv = sanlock_acquire2(lms->sock, -1, flags, rs, &opt, &owner_host, &owner_name);
|
||||
#else
|
||||
rv = sanlock_acquire(lms->sock, -1, flags, 1, &rs, &opt);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* errors: translate the sanlock error number to an lvmlockd error.
|
||||
@ -1915,17 +1941,6 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||
* this function to code that doesn't recognize sanlock error numbers.
|
||||
*/
|
||||
|
||||
if (rv == -EAGAIN) {
|
||||
/*
|
||||
* It appears that sanlock_acquire returns EAGAIN when we request
|
||||
* a shared lock but the lock is held ex by another host.
|
||||
* There's no point in retrying this case, just return an error.
|
||||
*/
|
||||
log_debug("%s:%s lock_san acquire mode %d rv EAGAIN", ls->name, r->name, ld_mode);
|
||||
*retry = 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if ((rv == -EMSGSIZE) && (r->type == LD_RT_LV)) {
|
||||
/*
|
||||
* sanlock tried to read beyond the end of the device,
|
||||
@ -1962,64 +1977,68 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||
return -EADOPT_NONE;
|
||||
}
|
||||
|
||||
if (rv == SANLK_ACQUIRE_IDLIVE || rv == SANLK_ACQUIRE_OWNED || rv == SANLK_ACQUIRE_OTHER) {
|
||||
if (rv == SANLK_ACQUIRE_IDLIVE ||
|
||||
rv == SANLK_ACQUIRE_OWNED ||
|
||||
rv == SANLK_ACQUIRE_OTHER ||
|
||||
rv == SANLK_ACQUIRE_OWNED_RETRY ||
|
||||
rv == -EAGAIN) {
|
||||
|
||||
/*
|
||||
* The lock is held by another host. These failures can
|
||||
* happen while multiple hosts are concurrently acquiring
|
||||
* shared locks. We want to retry a couple times in this
|
||||
* case because we'll probably get the sh lock.
|
||||
* EAGAIN: when a shared lock is held, and we request an ex lock.
|
||||
*
|
||||
* I believe these are also the errors when requesting an
|
||||
* ex lock that another host holds ex. We want to report
|
||||
* something like: "lock is held by another host" in this case.
|
||||
* Retry is pointless here.
|
||||
* OWNED_RETRY: the lock is held by a failed but not yet dead host.
|
||||
* Retrying will eventually find the host is dead (and the lock is
|
||||
* granted), or another host has acquired it.
|
||||
*
|
||||
* We can't distinguish between the two cases above,
|
||||
* so if requesting a sh lock, retry a couple times,
|
||||
* otherwise don't.
|
||||
* Multiple hosts all requesting shared locks can also result in
|
||||
* some transient errors here (shared locks involve acquiring the
|
||||
* paxos lease ex for a short period, which means two hosts both
|
||||
* requesting sh at once can cause one to fail here.)
|
||||
* Retry here to attempt to cover these transient failures.
|
||||
*
|
||||
* The command also has its own configurable retry logic.
|
||||
* The intention is to handle actual lock contention retries
|
||||
* from the command, and the transient failures from concurrent
|
||||
* shared requests here. We don't actually know when a failure
|
||||
* was related to the transient concurrent sh, so we just guess
|
||||
* it was if we were requesting a sh lock.
|
||||
*/
|
||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
||||
|
||||
*retry = (ld_mode == LD_LK_SH) ? 1 : 0;
|
||||
|
||||
if (rv == SANLK_ACQUIRE_OWNED_RETRY)
|
||||
*retry = 0;
|
||||
|
||||
if (owner && owner_host.host_id) {
|
||||
const char *host_state;
|
||||
|
||||
owner->host_id = (uint32_t)owner_host.host_id;
|
||||
owner->generation = (uint32_t)owner_host.generation;
|
||||
owner->timestamp = (uint32_t)owner_host.timestamp;
|
||||
|
||||
if ((host_state = _host_flags_to_str(owner_host.flags)))
|
||||
dm_strncpy(owner->state, host_state, OWNER_STATE_SIZE-1);
|
||||
|
||||
if (owner_name) {
|
||||
dm_strncpy(owner->name, owner_name, OWNER_NAME_SIZE-1);
|
||||
free(owner_name);
|
||||
}
|
||||
|
||||
log_debug("%s:%s lock_san acquire mode %d lock held %d owner %u %u %u %s %s",
|
||||
ls->name, r->name, ld_mode, rv,
|
||||
owner->host_id, owner->generation, owner->timestamp,
|
||||
owner->state, owner->name ?: "");
|
||||
} else {
|
||||
log_debug("%s:%s lock_san acquire mode %d lock held %d",
|
||||
ls->name, r->name, ld_mode, rv);
|
||||
}
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (rv == SANLK_AIO_TIMEOUT) {
|
||||
/*
|
||||
* sanlock got an i/o timeout when trying to acquire the
|
||||
* lease on disk.
|
||||
*/
|
||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
||||
log_debug("%s:%s lock_san acquire mode %d io timeout", ls->name, r->name, ld_mode);
|
||||
*retry = 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (rv == SANLK_DBLOCK_LVER || rv == SANLK_DBLOCK_MBAL) {
|
||||
/*
|
||||
* There was contention with another host for the lease,
|
||||
* and we lost.
|
||||
*/
|
||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
||||
*retry = 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (rv == SANLK_ACQUIRE_OWNED_RETRY) {
|
||||
/*
|
||||
* The lock is held by a failed host, and will eventually
|
||||
* expire. If we retry we'll eventually acquire the lock
|
||||
* (or find someone else has acquired it). The EAGAIN retry
|
||||
* attempts for SH locks above would not be sufficient for
|
||||
* the length of expiration time. We could add a longer
|
||||
* retry time here to cover the full expiration time and block
|
||||
* the activation command for that long. For now just return
|
||||
* the standard error indicating that another host still owns
|
||||
* the lease. FIXME: return a different error number so the
|
||||
* command can print an different error indicating that the
|
||||
* owner of the lease is in the process of expiring?
|
||||
*/
|
||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
||||
*retry = 0;
|
||||
return -EAGAIN;
|
||||
return -EIOTIMEOUT;
|
||||
}
|
||||
|
||||
if (rv < 0) {
|
||||
@ -2162,8 +2181,6 @@ int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
|
||||
case SANLK_ACQUIRE_OWNED_RETRY:
|
||||
case SANLK_ACQUIRE_OTHER:
|
||||
case SANLK_AIO_TIMEOUT:
|
||||
case SANLK_DBLOCK_LVER:
|
||||
case SANLK_DBLOCK_MBAL:
|
||||
/* expected errors from known/normal cases like lock contention or io timeouts */
|
||||
log_debug("%s:%s convert_san error %d", ls->name, r->name, rv);
|
||||
return -EAGAIN;
|
||||
|
@ -32,6 +32,12 @@ struct lvmlockd_pvs {
|
||||
int num;
|
||||
};
|
||||
|
||||
struct owner {
|
||||
uint32_t host_id;
|
||||
uint32_t generation;
|
||||
char *name;
|
||||
};
|
||||
|
||||
void lvmlockd_set_socket(const char *sock)
|
||||
{
|
||||
_lvmlockd_socket = sock;
|
||||
@ -132,6 +138,21 @@ static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_fla
|
||||
*lockd_flags |= LD_RF_SH_EXISTS;
|
||||
}
|
||||
|
||||
static char *_owner_str(struct owner *owner)
|
||||
{
|
||||
static char log_owner_str[128];
|
||||
|
||||
if (!owner || !owner->host_id)
|
||||
return (char *)"";
|
||||
|
||||
log_owner_str[0] = '\0';
|
||||
|
||||
/* Use a --lockopt setting to print all owner details? */
|
||||
|
||||
snprintf(log_owner_str, sizeof(log_owner_str)-1, " (host_id %u)", owner->host_id);
|
||||
return log_owner_str;
|
||||
}
|
||||
|
||||
/*
|
||||
* evaluate the reply from lvmlockd, check for errors, extract
|
||||
* the result and lockd_flags returned by lvmlockd.
|
||||
@ -146,10 +167,11 @@ static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_fla
|
||||
*/
|
||||
#define NO_LOCKD_RESULT (-1000)
|
||||
|
||||
static int _lockd_result(const char *req_name, daemon_reply reply, int *result, uint32_t *lockd_flags)
|
||||
static int _lockd_result(struct cmd_context *cmd, const char *req_name, daemon_reply reply,
|
||||
int *result, uint32_t *lockd_flags, struct owner *owner)
|
||||
{
|
||||
int reply_result;
|
||||
const char *flags_str = NULL;
|
||||
const char *str;
|
||||
|
||||
*result = -1;
|
||||
|
||||
@ -172,8 +194,15 @@ static int _lockd_result(const char *req_name, daemon_reply reply, int *result,
|
||||
*result = reply_result;
|
||||
|
||||
if (lockd_flags) {
|
||||
if ((flags_str = daemon_reply_str(reply, "result_flags", NULL)))
|
||||
_flags_str_to_lockd_flags(flags_str, lockd_flags);
|
||||
if ((str = daemon_reply_str(reply, "result_flags", NULL)))
|
||||
_flags_str_to_lockd_flags(str, lockd_flags);
|
||||
}
|
||||
|
||||
if (owner) {
|
||||
owner->host_id = (uint32_t)daemon_reply_int(reply, "owner_host_id", 0);
|
||||
owner->generation = (uint32_t)daemon_reply_int(reply, "owner_generation", 0);
|
||||
if ((str = daemon_reply_str(reply, "owner_name", "none")))
|
||||
owner->name = dm_pool_strdup(cmd->mem, str);
|
||||
}
|
||||
|
||||
log_debug("lockd %s result: %d", req_name, reply_result);
|
||||
@ -389,7 +418,8 @@ static int _lockd_request(struct cmd_context *cmd,
|
||||
const char *opts,
|
||||
const struct lvmlockd_pvs *lock_pvs,
|
||||
int *result,
|
||||
uint32_t *lockd_flags)
|
||||
uint32_t *lockd_flags,
|
||||
struct owner *owner)
|
||||
{
|
||||
const char *cmd_name = get_cmd_name();
|
||||
daemon_reply reply;
|
||||
@ -426,7 +456,7 @@ static int _lockd_request(struct cmd_context *cmd,
|
||||
"lv_lock_args = %s", lv_lock_args ?: "none",
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result(req_name, reply, result, lockd_flags))
|
||||
if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
@ -446,7 +476,7 @@ static int _lockd_request(struct cmd_context *cmd,
|
||||
"vg_lock_args = %s", vg_lock_args ?: "none",
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result(req_name, reply, result, lockd_flags))
|
||||
if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
@ -464,7 +494,7 @@ static int _lockd_request(struct cmd_context *cmd,
|
||||
"vg_lock_type = %s", vg_lock_type ?: "none",
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result(req_name, reply, result, lockd_flags))
|
||||
if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
|
||||
goto fail;
|
||||
|
||||
log_debug("lockd %s %s result %d %x",
|
||||
@ -735,7 +765,7 @@ static int _handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
|
||||
"lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("find_free_lock", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "find_free_lock", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -790,7 +820,7 @@ static int _init_vg(struct cmd_context *cmd, struct volume_group *vg,
|
||||
"vg_lock_type = %s", lock_type,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("init_vg", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
result = -ELOCKD;
|
||||
} else {
|
||||
@ -987,7 +1017,7 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in
|
||||
"opts = %s", opts ?: "none",
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("init_vg", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
result = -ELOCKD;
|
||||
} else {
|
||||
@ -1089,7 +1119,7 @@ static int _free_vg(struct cmd_context *cmd, struct volume_group *vg)
|
||||
"vg_lock_args = %s", vg->lock_args,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("free_vg", reply, &result, &lockd_flags)) {
|
||||
if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -1143,7 +1173,7 @@ static int _busy_vg(struct cmd_context *cmd, struct volume_group *vg)
|
||||
"vg_lock_args = %s", vg->lock_args,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("busy_vg", reply, &result, &lockd_flags)) {
|
||||
if (!_lockd_result(cmd, "busy_vg", reply, &result, &lockd_flags, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -1217,7 +1247,7 @@ static int _free_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg)
|
||||
"vg_lock_args = %s", vg->lock_args,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("free_vg", reply, &result, &lockd_flags)) {
|
||||
if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -1497,7 +1527,7 @@ int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (!_lockd_result("start_vg", reply, &result, &lockd_flags)) {
|
||||
if (!_lockd_result(cmd, "start_vg", reply, &result, &lockd_flags, NULL)) {
|
||||
ret = 0;
|
||||
result = -ELOCKD;
|
||||
} else {
|
||||
@ -1566,7 +1596,7 @@ int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg)
|
||||
"vg_name = %s", vg->name,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("stop_vg", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "stop_vg", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -1612,7 +1642,7 @@ int lockd_start_wait(struct cmd_context *cmd)
|
||||
"pid = " FMTd64, (int64_t) getpid(),
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("start_wait", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "start_wait", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -1683,6 +1713,7 @@ int lockd_start_wait(struct cmd_context *cmd)
|
||||
|
||||
int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const char *vg_lock_type)
|
||||
{
|
||||
struct owner owner = { 0 };
|
||||
const char *mode = NULL;
|
||||
uint32_t lockd_flags;
|
||||
int retries = 0;
|
||||
@ -1730,15 +1761,18 @@ int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const cha
|
||||
req:
|
||||
if (!_lockd_request(cmd, "lock_gl",
|
||||
NULL, vg_lock_type, NULL, NULL, NULL, NULL, mode, NULL,
|
||||
NULL, &result, &lockd_flags)) {
|
||||
NULL, &result, &lockd_flags, &owner)) {
|
||||
/* No result from lvmlockd, it is probably not running. */
|
||||
log_error("Global lock failed: check that lvmlockd is running.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (result == -EAGAIN) {
|
||||
if (result == -EAGAIN || result == -EIOTIMEOUT) {
|
||||
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
||||
log_warn("Retrying %s global lock", mode);
|
||||
if (result == -EIOTIMEOUT)
|
||||
log_warn("Retrying global lock: io timeout");
|
||||
else
|
||||
log_warn("Retrying global lock: held by other host%s", _owner_str(&owner));
|
||||
sleep(1);
|
||||
retries++;
|
||||
goto req;
|
||||
@ -1821,8 +1855,10 @@ int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const cha
|
||||
if (result < 0) {
|
||||
if (result == -ESTARTING)
|
||||
log_error("Global lock failed: lockspace is starting.");
|
||||
else if (result == -EIOTIMEOUT)
|
||||
log_error("Global lock failed: io timeout");
|
||||
else if (result == -EAGAIN)
|
||||
log_error("Global lock failed: held by other host.");
|
||||
log_error("Global lock failed: held by other host%s", _owner_str(&owner));
|
||||
else if (result == -EPROTONOSUPPORT)
|
||||
log_error("VG create failed: lock manager %s is not supported by lvmlockd.", vg_lock_type);
|
||||
else
|
||||
@ -1925,6 +1961,7 @@ out:
|
||||
|
||||
int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
{
|
||||
struct owner owner = { 0 };
|
||||
const char *mode = NULL;
|
||||
const char *opts = NULL;
|
||||
uint32_t lockd_flags;
|
||||
@ -1976,7 +2013,7 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
|
||||
if (!_lockd_request(cmd, "lock_gl",
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, mode, opts,
|
||||
NULL, &result, &lockd_flags)) {
|
||||
NULL, &result, &lockd_flags, &owner)) {
|
||||
/* No result from lvmlockd, it is probably not running. */
|
||||
|
||||
/* We don't care if an unlock fails. */
|
||||
@ -1993,9 +2030,12 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (result == -EAGAIN) {
|
||||
if (result == -EAGAIN || result == -EIOTIMEOUT) {
|
||||
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
||||
log_warn("Retrying %s global lock", mode);
|
||||
if (result == -EIOTIMEOUT)
|
||||
log_warn("Retrying global lock: io timeout");
|
||||
else
|
||||
log_warn("Retrying global lock: held by other host%s", _owner_str(&owner));
|
||||
sleep(1);
|
||||
retries++;
|
||||
goto req;
|
||||
@ -2038,10 +2078,12 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
result == -ESTARTING ||
|
||||
result == -EVGKILLED ||
|
||||
result == -ELOCKIO ||
|
||||
result == -EIOTIMEOUT ||
|
||||
result == -ELMERR ||
|
||||
result == -EORPHAN ||
|
||||
result == -EADOPT_RETRY ||
|
||||
result == -EADOPT_NONE) {
|
||||
result == -EADOPT_NONE ||
|
||||
result == -EAGAIN) {
|
||||
/*
|
||||
* If an ex global lock fails, then the command fails.
|
||||
*/
|
||||
@ -2052,6 +2094,8 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
log_error("Global lock failed: check that global lockspace is started");
|
||||
else if (result == -ELOCKIO)
|
||||
log_error("Global lock failed: storage errors for sanlock leases");
|
||||
else if (result == -EIOTIMEOUT)
|
||||
log_error("Global lock failed: io timeout");
|
||||
else if (result == -ELMERR)
|
||||
log_error("Global lock failed: lock manager error");
|
||||
else if (result == -EVGKILLED)
|
||||
@ -2062,6 +2106,8 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
log_error("Global lock failed: adopt found no orphan");
|
||||
else if (result == -EADOPT_RETRY)
|
||||
log_error("Global lock failed: adopt found other mode");
|
||||
else if (result == -EAGAIN)
|
||||
log_error("Global lock failed: held by other host%s", _owner_str(&owner));
|
||||
else
|
||||
log_error("Global lock failed: error %d", result);
|
||||
|
||||
@ -2085,6 +2131,11 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
goto allow;
|
||||
}
|
||||
|
||||
if (result == -EIOTIMEOUT) {
|
||||
log_warn("Skipping global lock: io timeout");
|
||||
goto allow;
|
||||
}
|
||||
|
||||
if ((lockd_flags & LD_RF_NO_GL_LS) && (lockd_flags & LD_RF_WARN_GL_REMOVED)) {
|
||||
log_warn("Skipping global lock: VG with global lock was removed");
|
||||
goto allow;
|
||||
@ -2110,12 +2161,16 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
goto allow;
|
||||
}
|
||||
|
||||
if (result == -EAGAIN) {
|
||||
log_warn("Skipping global lock: held by other host%s", _owner_str(&owner));
|
||||
goto allow;
|
||||
}
|
||||
|
||||
if ((lockd_flags & LD_RF_NO_GL_LS) || (lockd_flags & LD_RF_NO_LOCKSPACES)) {
|
||||
log_debug("Skipping global lock: lockspace not found or started");
|
||||
goto allow;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This is for completeness. If we reach here, then
|
||||
* a specific check for the error should be added above
|
||||
@ -2129,21 +2184,13 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
log_warn("Duplicate sanlock global locks should be corrected");
|
||||
|
||||
if (result < 0) {
|
||||
if (result == -EAGAIN) {
|
||||
/*
|
||||
* Most of the time, retries should avoid this case.
|
||||
*/
|
||||
log_error("Global lock failed: held by other host.");
|
||||
return 0;
|
||||
} else {
|
||||
/*
|
||||
* We don't intend to reach this. We should check
|
||||
* any known/possible error specifically and print
|
||||
* a more helpful message. This is for completeness.
|
||||
*/
|
||||
log_error("Global lock failed: error %d.", result);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* We don't intend to reach this. We should check
|
||||
* any known/possible error specifically and print
|
||||
* a more helpful message. This is for completeness.
|
||||
*/
|
||||
log_error("Global lock failed: error %d.", result);
|
||||
return 0;
|
||||
}
|
||||
|
||||
allow:
|
||||
@ -2194,6 +2241,7 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||
int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
||||
uint32_t flags, uint32_t *lockd_state)
|
||||
{
|
||||
struct owner owner = { 0 };
|
||||
const char *mode = NULL;
|
||||
const char *opts = NULL;
|
||||
uint32_t lockd_flags;
|
||||
@ -2293,7 +2341,7 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
||||
|
||||
if (!_lockd_request(cmd, "lock_vg",
|
||||
vg_name, NULL, NULL, NULL, NULL, NULL, mode, opts,
|
||||
NULL, &result, &lockd_flags)) {
|
||||
NULL, &result, &lockd_flags, &owner)) {
|
||||
/*
|
||||
* No result from lvmlockd, it is probably not running.
|
||||
* Decide if it is ok to continue without a lock in
|
||||
@ -2305,9 +2353,12 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (result == -EAGAIN) {
|
||||
if (result == -EAGAIN || result == -EIOTIMEOUT) {
|
||||
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
||||
log_warn("Retrying %s lock on VG %s", mode, vg_name);
|
||||
if (result == -EIOTIMEOUT)
|
||||
log_warn("Retrying lock on VG %s: io timeout", vg_name);
|
||||
else
|
||||
log_warn("Retrying lock on VG %s: held by other host%s", vg_name, _owner_str(&owner));
|
||||
sleep(1);
|
||||
retries++;
|
||||
goto req;
|
||||
@ -2381,6 +2432,19 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
||||
}
|
||||
}
|
||||
|
||||
if (result == -EIOTIMEOUT) {
|
||||
if (!strcmp(mode, "un"))
|
||||
goto out;
|
||||
else if (!strcmp(mode, "sh")) {
|
||||
log_warn("VG %s lock skipped: io timeout", vg_name);
|
||||
goto out;
|
||||
} else {
|
||||
log_error("VG %s lock failed: io timeout", vg_name);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The lock is held by another host, and retries have been unsuccessful.
|
||||
*/
|
||||
@ -2388,10 +2452,10 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
||||
if (!strcmp(mode, "un"))
|
||||
goto out;
|
||||
else if (!strcmp(mode, "sh")) {
|
||||
log_warn("VG %s lock skipped: held by other host.", vg_name);
|
||||
log_warn("VG %s lock skipped: held by other host%s", vg_name, _owner_str(&owner));
|
||||
goto out;
|
||||
} else {
|
||||
log_error("VG %s lock failed: held by other host.", vg_name);
|
||||
log_error("VG %s lock failed: held by other host%s", vg_name, _owner_str(&owner));
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
@ -2522,7 +2586,7 @@ int lockd_vg_update(struct volume_group *vg)
|
||||
"version = " FMTd64, (int64_t) vg->seqno,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("vg_update", reply, &result, NULL)) {
|
||||
if (!_lockd_result(vg->cmd, "vg_update", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -2554,7 +2618,7 @@ static int _query_lv(struct cmd_context *cmd, struct volume_group *vg,
|
||||
"lv_lock_args = %s", lock_args ?: "none",
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("query_lock_lv", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "query_lock_lv", reply, &result, NULL, NULL)) {
|
||||
/* No result from lvmlockd, it is probably not running. */
|
||||
log_error("Lock query failed for LV %s/%s", vg->name, lv_name);
|
||||
return 0;
|
||||
@ -2621,6 +2685,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
||||
const char *lv_name, struct id *lv_id,
|
||||
const char *lock_args, const char *def_mode, uint32_t flags)
|
||||
{
|
||||
struct owner owner = { 0 };
|
||||
char lv_uuid[64] __attribute__((aligned(8)));
|
||||
char opt_buf[64] = {};
|
||||
const char *opts = NULL;
|
||||
@ -2718,7 +2783,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
||||
if (!_lockd_request(cmd, "lock_lv",
|
||||
vg->name, vg->lock_type, vg->lock_args,
|
||||
lv_name, lv_uuid, lock_args, mode, opts,
|
||||
&lock_pvs, &result, &lockd_flags)) {
|
||||
&lock_pvs, &result, &lockd_flags, NULL)) {
|
||||
_lockd_free_pv_list(&lock_pvs);
|
||||
/* No result from lvmlockd, it is probably not running. */
|
||||
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
|
||||
@ -2729,7 +2794,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
||||
if (!_lockd_request(cmd, "lock_lv",
|
||||
vg->name, vg->lock_type, vg->lock_args,
|
||||
lv_name, lv_uuid, lock_args, mode, opts,
|
||||
NULL, &result, &lockd_flags)) {
|
||||
NULL, &result, &lockd_flags, &owner)) {
|
||||
/* No result from lvmlockd, it is probably not running. */
|
||||
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
|
||||
return 0;
|
||||
@ -2744,7 +2809,12 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
||||
return 1;
|
||||
|
||||
if (result == -EAGAIN) {
|
||||
log_error("LV locked by other host: %s/%s", vg->name, lv_name);
|
||||
log_error("LV locked by other host: %s/%s%s", vg->name, lv_name, _owner_str(&owner));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (result == -EIOTIMEOUT) {
|
||||
log_error("LV %s/%s lock failed: io timeout.", vg->name, lv_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3649,7 +3719,7 @@ static int _init_lv_sanlock(struct cmd_context *cmd, struct volume_group *vg,
|
||||
"vg_lock_args = %s", vg->lock_args,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("init_lv", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "init_lv", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -3724,7 +3794,7 @@ static int _free_lv(struct cmd_context *cmd, struct volume_group *vg,
|
||||
"lv_lock_args = %s", lock_args ?: "none",
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("free_lv", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "free_lv", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -3989,7 +4059,7 @@ int lockd_rename_vg_before(struct cmd_context *cmd, struct volume_group *vg)
|
||||
"vg_lock_args = %s", vg->lock_args,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("rename_vg_before", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "rename_vg_before", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -4054,7 +4124,7 @@ int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int
|
||||
"vg_lock_args = %s", vg->lock_args,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("rename_vg_final", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "rename_vg_final", reply, &result, NULL, NULL)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = (result < 0) ? 0 : 1;
|
||||
@ -4095,7 +4165,7 @@ const char *lockd_running_lock_type(struct cmd_context *cmd, int *found_multiple
|
||||
"pid = " FMTd64, (int64_t) getpid(),
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("running_lm", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "running_lm", reply, &result, NULL, NULL)) {
|
||||
log_error("Failed to get result from lvmlockd");
|
||||
goto out;
|
||||
}
|
||||
@ -4216,7 +4286,7 @@ int lockd_lv_refresh(struct cmd_context *cmd, struct lvresize_params *lp)
|
||||
"path = %s", path,
|
||||
NULL);
|
||||
|
||||
if (!_lockd_result("refresh_lv", reply, &result, NULL)) {
|
||||
if (!_lockd_result(cmd, "refresh_lv", reply, &result, NULL, NULL)) {
|
||||
/* No result from lvmlockd, it is probably not running. */
|
||||
log_error("LV refresh failed for LV %s", path);
|
||||
return 0;
|
||||
|
Reference in New Issue
Block a user