mirror of
git://sourceware.org/git/lvm2.git
synced 2025-08-03 08:22:00 +03:00
lvmlockd: use new sanlock_acquire2 to return owner info
Use the new sanlock_acquire2() which returns info about the owner of a lease. Pass this info back to the lvm command, where it's initially used to print the host_id of a host holding a lock when it cannot be acquired.
This commit is contained in:
@ -57,5 +57,6 @@ static inline void lvmlockd_close(daemon_handle h)
|
|||||||
#define EORPHAN 222
|
#define EORPHAN 222
|
||||||
#define EADOPT_NONE 223
|
#define EADOPT_NONE 223
|
||||||
#define EADOPT_RETRY 224
|
#define EADOPT_RETRY 224
|
||||||
|
#define EIOTIMEOUT 225
|
||||||
|
|
||||||
#endif /* _LVM_LVMLOCKD_CLIENT_H */
|
#endif /* _LVM_LVMLOCKD_CLIENT_H */
|
||||||
|
@ -1108,14 +1108,15 @@ static int lm_add_resource(struct lockspace *ls, struct resource *r)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
|
static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
|
||||||
struct val_blk *vb_out, int *retry, int adopt_only, int adopt_ok)
|
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||||
|
int adopt_only, int adopt_ok)
|
||||||
{
|
{
|
||||||
int rv = -1;
|
int rv = -1;
|
||||||
|
|
||||||
if (ls->lm_type == LD_LM_DLM)
|
if (ls->lm_type == LD_LM_DLM)
|
||||||
rv = lm_lock_dlm(ls, r, mode, vb_out, adopt_only, adopt_ok);
|
rv = lm_lock_dlm(ls, r, mode, vb_out, adopt_only, adopt_ok);
|
||||||
else if (ls->lm_type == LD_LM_SANLOCK)
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
||||||
rv = lm_lock_sanlock(ls, r, mode, vb_out, retry, adopt_only, adopt_ok);
|
rv = lm_lock_sanlock(ls, r, mode, vb_out, retry, owner, adopt_only, adopt_ok);
|
||||||
else if (ls->lm_type == LD_LM_IDM)
|
else if (ls->lm_type == LD_LM_IDM)
|
||||||
rv = lm_lock_idm(ls, r, mode, vb_out, act->lv_uuid,
|
rv = lm_lock_idm(ls, r, mode, vb_out, act->lv_uuid,
|
||||||
&act->pvs, adopt_only, adopt_ok);
|
&act->pvs, adopt_only, adopt_ok);
|
||||||
@ -1264,7 +1265,7 @@ static void add_work_action(struct action *act)
|
|||||||
pthread_mutex_unlock(&worker_mutex);
|
pthread_mutex_unlock(&worker_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry)
|
static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry, struct owner *owner)
|
||||||
{
|
{
|
||||||
struct lock *lk;
|
struct lock *lk;
|
||||||
struct val_blk vb;
|
struct val_blk vb;
|
||||||
@ -1289,7 +1290,7 @@ static int res_lock(struct lockspace *ls, struct resource *r, struct action *act
|
|||||||
if (r->type == LD_RT_LV && act->lv_args[0])
|
if (r->type == LD_RT_LV && act->lv_args[0])
|
||||||
memcpy(r->lv_args, act->lv_args, MAX_ARGS);
|
memcpy(r->lv_args, act->lv_args, MAX_ARGS);
|
||||||
|
|
||||||
rv = lm_lock(ls, r, act->mode, act, &vb, retry,
|
rv = lm_lock(ls, r, act->mode, act, &vb, retry, owner,
|
||||||
act->flags & LD_AF_ADOPT_ONLY ? 1 : 0,
|
act->flags & LD_AF_ADOPT_ONLY ? 1 : 0,
|
||||||
act->flags & LD_AF_ADOPT ? 1 : 0);
|
act->flags & LD_AF_ADOPT ? 1 : 0);
|
||||||
|
|
||||||
@ -1901,6 +1902,7 @@ out:
|
|||||||
static void res_process(struct lockspace *ls, struct resource *r,
|
static void res_process(struct lockspace *ls, struct resource *r,
|
||||||
struct list_head *act_close_list, int *retry_out)
|
struct list_head *act_close_list, int *retry_out)
|
||||||
{
|
{
|
||||||
|
struct owner owner = { 0 };
|
||||||
struct action *act, *safe, *act_close;
|
struct action *act, *safe, *act_close;
|
||||||
struct lock *lk;
|
struct lock *lk;
|
||||||
uint32_t unlock_by_client_id = 0;
|
uint32_t unlock_by_client_id = 0;
|
||||||
@ -2189,8 +2191,15 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
|||||||
|
|
||||||
if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
|
if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
|
||||||
lm_retry = 0;
|
lm_retry = 0;
|
||||||
|
memset(&owner, 0, sizeof(owner));
|
||||||
|
|
||||||
|
rv = res_lock(ls, r, act, &lm_retry, &owner);
|
||||||
|
|
||||||
|
/* TODO: if lock fails because it's owned by a failed host,
|
||||||
|
and persistent reservations are enabled, then remove the
|
||||||
|
pr of failed host_id, tell sanlock the host_id is now
|
||||||
|
dead, and retry lock request. */
|
||||||
|
|
||||||
rv = res_lock(ls, r, act, &lm_retry);
|
|
||||||
if ((rv == -EAGAIN) &&
|
if ((rv == -EAGAIN) &&
|
||||||
(act->retries <= act->max_retries) &&
|
(act->retries <= act->max_retries) &&
|
||||||
(lm_retry || (r->type != LD_RT_LV))) {
|
(lm_retry || (r->type != LD_RT_LV))) {
|
||||||
@ -2199,6 +2208,8 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
|||||||
act->retries++;
|
act->retries++;
|
||||||
*retry_out = 1;
|
*retry_out = 1;
|
||||||
} else {
|
} else {
|
||||||
|
if (rv == -EAGAIN)
|
||||||
|
memcpy(&act->owner, &owner, sizeof(owner));
|
||||||
act->result = rv;
|
act->result = rv;
|
||||||
list_del(&act->list);
|
list_del(&act->list);
|
||||||
add_client_result(act);
|
add_client_result(act);
|
||||||
@ -2222,8 +2233,10 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
|||||||
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
||||||
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
|
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
|
||||||
lm_retry = 0;
|
lm_retry = 0;
|
||||||
|
memset(&owner, 0, sizeof(owner));
|
||||||
|
|
||||||
|
rv = res_lock(ls, r, act, &lm_retry, &owner);
|
||||||
|
|
||||||
rv = res_lock(ls, r, act, &lm_retry);
|
|
||||||
if ((rv == -EAGAIN) &&
|
if ((rv == -EAGAIN) &&
|
||||||
(act->retries <= act->max_retries) &&
|
(act->retries <= act->max_retries) &&
|
||||||
(lm_retry || (r->type != LD_RT_LV))) {
|
(lm_retry || (r->type != LD_RT_LV))) {
|
||||||
@ -2232,6 +2245,8 @@ static void res_process(struct lockspace *ls, struct resource *r,
|
|||||||
act->retries++;
|
act->retries++;
|
||||||
*retry_out = 1;
|
*retry_out = 1;
|
||||||
} else {
|
} else {
|
||||||
|
if (rv == -EAGAIN)
|
||||||
|
memcpy(&act->owner, &owner, sizeof(owner));
|
||||||
act->result = rv;
|
act->result = rv;
|
||||||
list_del(&act->list);
|
list_del(&act->list);
|
||||||
add_client_result(act);
|
add_client_result(act);
|
||||||
@ -4235,6 +4250,31 @@ static int client_send_result(struct client *cl, struct action *act)
|
|||||||
"result = " FMTd64, (int64_t) act->result,
|
"result = " FMTd64, (int64_t) act->result,
|
||||||
"dump_len = " FMTd64, (int64_t) dump_len,
|
"dump_len = " FMTd64, (int64_t) dump_len,
|
||||||
NULL);
|
NULL);
|
||||||
|
} else if (act->op == LD_OP_LOCK && act->owner.host_id) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lock reply with owner info
|
||||||
|
*/
|
||||||
|
|
||||||
|
log_debug("send %s[%d][%u] %s%s%s result %d owner %u %u %u %s %s",
|
||||||
|
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
|
||||||
|
op_mode_str(act->op, act->mode), act->rt ? "_" : "", rt_str(act->rt), act->result,
|
||||||
|
act->owner.host_id, act->owner.generation, act->owner.timestamp,
|
||||||
|
act->owner.state[0] ? act->owner.state : "",
|
||||||
|
act->owner.name[0] ? act->owner.name : "");
|
||||||
|
|
||||||
|
res = daemon_reply_simple("OK",
|
||||||
|
"op = " FMTd64, (int64_t) act->op,
|
||||||
|
"lock_type = %s", lm_str(act->lm_type),
|
||||||
|
"op_result = " FMTd64, (int64_t) act->result,
|
||||||
|
"lm_result = " FMTd64, (int64_t) act->lm_rv,
|
||||||
|
"owner_host_id = " FMTd64, (int64_t) act->owner.host_id,
|
||||||
|
"owner_generation = " FMTd64, (int64_t) act->owner.generation,
|
||||||
|
"owner_timestamp = " FMTd64, (int64_t) act->owner.timestamp,
|
||||||
|
"owner_state = %s", act->owner.state[0] ? act->owner.state : "none",
|
||||||
|
"owner_name = %s", act->owner.name[0] ? act->owner.name : "none",
|
||||||
|
"result_flags = %s", result_flags[0] ? result_flags : "none",
|
||||||
|
NULL);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* A normal reply.
|
* A normal reply.
|
||||||
|
@ -130,6 +130,17 @@ struct pvs {
|
|||||||
int num;
|
int num;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define OWNER_NAME_SIZE 64
|
||||||
|
#define OWNER_STATE_SIZE 32
|
||||||
|
|
||||||
|
struct owner {
|
||||||
|
uint32_t host_id;
|
||||||
|
uint32_t generation;
|
||||||
|
uint32_t timestamp;
|
||||||
|
char state[OWNER_STATE_SIZE];
|
||||||
|
char name[OWNER_NAME_SIZE];
|
||||||
|
};
|
||||||
|
|
||||||
struct action {
|
struct action {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
uint32_t client_id;
|
uint32_t client_id;
|
||||||
@ -154,6 +165,7 @@ struct action {
|
|||||||
char vg_args[MAX_ARGS+1];
|
char vg_args[MAX_ARGS+1];
|
||||||
char lv_args[MAX_ARGS+1];
|
char lv_args[MAX_ARGS+1];
|
||||||
char prev_lv_args[MAX_ARGS+1];
|
char prev_lv_args[MAX_ARGS+1];
|
||||||
|
struct owner owner;
|
||||||
struct pvs pvs; /* PV list for idm */
|
struct pvs pvs; /* PV list for idm */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -553,7 +565,7 @@ int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt_only, int adopt_ok,
|
|||||||
int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg);
|
int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg);
|
||||||
int lm_add_resource_sanlock(struct lockspace *ls, struct resource *r);
|
int lm_add_resource_sanlock(struct lockspace *ls, struct resource *r);
|
||||||
int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||||
struct val_blk *vb_out, int *retry,
|
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||||
int adopt_only, int adopt_ok);
|
int adopt_only, int adopt_ok);
|
||||||
int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
|
int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
|
||||||
int ld_mode, uint32_t r_version);
|
int ld_mode, uint32_t r_version);
|
||||||
@ -617,7 +629,7 @@ static inline int lm_add_resource_sanlock(struct lockspace *ls, struct resource
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
static inline int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||||
struct val_blk *vb_out, int *retry,
|
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||||
int adopt_only, int adopt_ok)
|
int adopt_only, int adopt_ok)
|
||||||
{
|
{
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
/* FIXME: copied from sanlock header until the sanlock update is more widespread */
|
/* FIXME: copied from sanlock header until the sanlock update is more widespread */
|
||||||
#define SANLK_ADD_NODELAY 0x00000002
|
#define SANLK_ADD_NODELAY 0x00000002
|
||||||
|
|
||||||
|
#define SANLOCK_HAS_ACQUIRE2 1
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <poll.h>
|
#include <poll.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
@ -1797,13 +1799,33 @@ int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *_host_flags_to_str(uint32_t flags)
|
||||||
|
{
|
||||||
|
int val = flags & SANLK_HOST_MASK;
|
||||||
|
|
||||||
|
if (val == SANLK_HOST_FREE)
|
||||||
|
return "FREE";
|
||||||
|
if (val == SANLK_HOST_LIVE)
|
||||||
|
return "LIVE";
|
||||||
|
if (val == SANLK_HOST_FAIL)
|
||||||
|
return "FAIL";
|
||||||
|
if (val == SANLK_HOST_DEAD)
|
||||||
|
return "DEAD";
|
||||||
|
if (val == SANLK_HOST_UNKNOWN)
|
||||||
|
return "UNKNOWN";
|
||||||
|
return "ERROR";
|
||||||
|
}
|
||||||
|
|
||||||
int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
||||||
struct val_blk *vb_out, int *retry, int adopt_only, int adopt_ok)
|
struct val_blk *vb_out, int *retry, struct owner *owner,
|
||||||
|
int adopt_only, int adopt_ok)
|
||||||
{
|
{
|
||||||
struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
|
struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
|
||||||
struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
|
struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
|
||||||
struct sanlk_resource *rs;
|
struct sanlk_resource *rs;
|
||||||
struct sanlk_options opt;
|
struct sanlk_options opt;
|
||||||
|
struct sanlk_host owner_host = { 0 };
|
||||||
|
char *owner_name = NULL;
|
||||||
uint64_t lock_lv_offset;
|
uint64_t lock_lv_offset;
|
||||||
uint32_t flags = 0;
|
uint32_t flags = 0;
|
||||||
struct val_blk vb = { 0 };
|
struct val_blk vb = { 0 };
|
||||||
@ -1907,7 +1929,11 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
|||||||
memset(&opt, 0, sizeof(opt));
|
memset(&opt, 0, sizeof(opt));
|
||||||
sprintf(opt.owner_name, "%s", "lvmlockd");
|
sprintf(opt.owner_name, "%s", "lvmlockd");
|
||||||
|
|
||||||
|
#ifdef SANLOCK_HAS_ACQUIRE2
|
||||||
|
rv = sanlock_acquire2(lms->sock, -1, flags, rs, &opt, &owner_host, &owner_name);
|
||||||
|
#else
|
||||||
rv = sanlock_acquire(lms->sock, -1, flags, 1, &rs, &opt);
|
rv = sanlock_acquire(lms->sock, -1, flags, 1, &rs, &opt);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* errors: translate the sanlock error number to an lvmlockd error.
|
* errors: translate the sanlock error number to an lvmlockd error.
|
||||||
@ -1915,17 +1941,6 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
|||||||
* this function to code that doesn't recognize sanlock error numbers.
|
* this function to code that doesn't recognize sanlock error numbers.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (rv == -EAGAIN) {
|
|
||||||
/*
|
|
||||||
* It appears that sanlock_acquire returns EAGAIN when we request
|
|
||||||
* a shared lock but the lock is held ex by another host.
|
|
||||||
* There's no point in retrying this case, just return an error.
|
|
||||||
*/
|
|
||||||
log_debug("%s:%s lock_san acquire mode %d rv EAGAIN", ls->name, r->name, ld_mode);
|
|
||||||
*retry = 0;
|
|
||||||
return -EAGAIN;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((rv == -EMSGSIZE) && (r->type == LD_RT_LV)) {
|
if ((rv == -EMSGSIZE) && (r->type == LD_RT_LV)) {
|
||||||
/*
|
/*
|
||||||
* sanlock tried to read beyond the end of the device,
|
* sanlock tried to read beyond the end of the device,
|
||||||
@ -1962,64 +1977,68 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
|
|||||||
return -EADOPT_NONE;
|
return -EADOPT_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rv == SANLK_ACQUIRE_IDLIVE || rv == SANLK_ACQUIRE_OWNED || rv == SANLK_ACQUIRE_OTHER) {
|
if (rv == SANLK_ACQUIRE_IDLIVE ||
|
||||||
|
rv == SANLK_ACQUIRE_OWNED ||
|
||||||
|
rv == SANLK_ACQUIRE_OTHER ||
|
||||||
|
rv == SANLK_ACQUIRE_OWNED_RETRY ||
|
||||||
|
rv == -EAGAIN) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The lock is held by another host. These failures can
|
* EAGAIN: when a shared lock is held, and we request an ex lock.
|
||||||
* happen while multiple hosts are concurrently acquiring
|
|
||||||
* shared locks. We want to retry a couple times in this
|
|
||||||
* case because we'll probably get the sh lock.
|
|
||||||
*
|
*
|
||||||
* I believe these are also the errors when requesting an
|
* OWNED_RETRY: the lock is held by a failed but not yet dead host.
|
||||||
* ex lock that another host holds ex. We want to report
|
* Retrying will eventually find the host is dead (and the lock is
|
||||||
* something like: "lock is held by another host" in this case.
|
* granted), or another host has acquired it.
|
||||||
* Retry is pointless here.
|
|
||||||
*
|
*
|
||||||
* We can't distinguish between the two cases above,
|
* Multiple hosts all requesting shared locks can also result in
|
||||||
* so if requesting a sh lock, retry a couple times,
|
* some transient errors here (shared locks involve acquiring the
|
||||||
* otherwise don't.
|
* paxos lease ex for a short period, which means two hosts both
|
||||||
|
* requesting sh at once can cause one to fail here.)
|
||||||
|
* Retry here to attempt to cover these transient failures.
|
||||||
|
*
|
||||||
|
* The command also has its own configurable retry logic.
|
||||||
|
* The intention is to handle actual lock contention retries
|
||||||
|
* from the command, and the transient failures from concurrent
|
||||||
|
* shared requests here. We don't actually know when a failure
|
||||||
|
* was related to the transient concurrent sh, so we just guess
|
||||||
|
* it was if we were requesting a sh lock.
|
||||||
*/
|
*/
|
||||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
|
||||||
*retry = (ld_mode == LD_LK_SH) ? 1 : 0;
|
*retry = (ld_mode == LD_LK_SH) ? 1 : 0;
|
||||||
|
|
||||||
|
if (rv == SANLK_ACQUIRE_OWNED_RETRY)
|
||||||
|
*retry = 0;
|
||||||
|
|
||||||
|
if (owner && owner_host.host_id) {
|
||||||
|
const char *host_state;
|
||||||
|
|
||||||
|
owner->host_id = (uint32_t)owner_host.host_id;
|
||||||
|
owner->generation = (uint32_t)owner_host.generation;
|
||||||
|
owner->timestamp = (uint32_t)owner_host.timestamp;
|
||||||
|
|
||||||
|
if ((host_state = _host_flags_to_str(owner_host.flags)))
|
||||||
|
dm_strncpy(owner->state, host_state, OWNER_STATE_SIZE-1);
|
||||||
|
|
||||||
|
if (owner_name) {
|
||||||
|
dm_strncpy(owner->name, owner_name, OWNER_NAME_SIZE-1);
|
||||||
|
free(owner_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("%s:%s lock_san acquire mode %d lock held %d owner %u %u %u %s %s",
|
||||||
|
ls->name, r->name, ld_mode, rv,
|
||||||
|
owner->host_id, owner->generation, owner->timestamp,
|
||||||
|
owner->state, owner->name ?: "");
|
||||||
|
} else {
|
||||||
|
log_debug("%s:%s lock_san acquire mode %d lock held %d",
|
||||||
|
ls->name, r->name, ld_mode, rv);
|
||||||
|
}
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rv == SANLK_AIO_TIMEOUT) {
|
if (rv == SANLK_AIO_TIMEOUT) {
|
||||||
/*
|
log_debug("%s:%s lock_san acquire mode %d io timeout", ls->name, r->name, ld_mode);
|
||||||
* sanlock got an i/o timeout when trying to acquire the
|
|
||||||
* lease on disk.
|
|
||||||
*/
|
|
||||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
|
||||||
*retry = 0;
|
*retry = 0;
|
||||||
return -EAGAIN;
|
return -EIOTIMEOUT;
|
||||||
}
|
|
||||||
|
|
||||||
if (rv == SANLK_DBLOCK_LVER || rv == SANLK_DBLOCK_MBAL) {
|
|
||||||
/*
|
|
||||||
* There was contention with another host for the lease,
|
|
||||||
* and we lost.
|
|
||||||
*/
|
|
||||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
|
||||||
*retry = 0;
|
|
||||||
return -EAGAIN;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rv == SANLK_ACQUIRE_OWNED_RETRY) {
|
|
||||||
/*
|
|
||||||
* The lock is held by a failed host, and will eventually
|
|
||||||
* expire. If we retry we'll eventually acquire the lock
|
|
||||||
* (or find someone else has acquired it). The EAGAIN retry
|
|
||||||
* attempts for SH locks above would not be sufficient for
|
|
||||||
* the length of expiration time. We could add a longer
|
|
||||||
* retry time here to cover the full expiration time and block
|
|
||||||
* the activation command for that long. For now just return
|
|
||||||
* the standard error indicating that another host still owns
|
|
||||||
* the lease. FIXME: return a different error number so the
|
|
||||||
* command can print an different error indicating that the
|
|
||||||
* owner of the lease is in the process of expiring?
|
|
||||||
*/
|
|
||||||
log_debug("%s:%s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
|
|
||||||
*retry = 0;
|
|
||||||
return -EAGAIN;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rv < 0) {
|
if (rv < 0) {
|
||||||
@ -2162,8 +2181,6 @@ int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
|
|||||||
case SANLK_ACQUIRE_OWNED_RETRY:
|
case SANLK_ACQUIRE_OWNED_RETRY:
|
||||||
case SANLK_ACQUIRE_OTHER:
|
case SANLK_ACQUIRE_OTHER:
|
||||||
case SANLK_AIO_TIMEOUT:
|
case SANLK_AIO_TIMEOUT:
|
||||||
case SANLK_DBLOCK_LVER:
|
|
||||||
case SANLK_DBLOCK_MBAL:
|
|
||||||
/* expected errors from known/normal cases like lock contention or io timeouts */
|
/* expected errors from known/normal cases like lock contention or io timeouts */
|
||||||
log_debug("%s:%s convert_san error %d", ls->name, r->name, rv);
|
log_debug("%s:%s convert_san error %d", ls->name, r->name, rv);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
@ -32,6 +32,12 @@ struct lvmlockd_pvs {
|
|||||||
int num;
|
int num;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct owner {
|
||||||
|
uint32_t host_id;
|
||||||
|
uint32_t generation;
|
||||||
|
char *name;
|
||||||
|
};
|
||||||
|
|
||||||
void lvmlockd_set_socket(const char *sock)
|
void lvmlockd_set_socket(const char *sock)
|
||||||
{
|
{
|
||||||
_lvmlockd_socket = sock;
|
_lvmlockd_socket = sock;
|
||||||
@ -132,6 +138,21 @@ static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_fla
|
|||||||
*lockd_flags |= LD_RF_SH_EXISTS;
|
*lockd_flags |= LD_RF_SH_EXISTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char *_owner_str(struct owner *owner)
|
||||||
|
{
|
||||||
|
static char log_owner_str[128];
|
||||||
|
|
||||||
|
if (!owner || !owner->host_id)
|
||||||
|
return (char *)"";
|
||||||
|
|
||||||
|
log_owner_str[0] = '\0';
|
||||||
|
|
||||||
|
/* Use a --lockopt setting to print all owner details? */
|
||||||
|
|
||||||
|
snprintf(log_owner_str, sizeof(log_owner_str)-1, " (host_id %u)", owner->host_id);
|
||||||
|
return log_owner_str;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* evaluate the reply from lvmlockd, check for errors, extract
|
* evaluate the reply from lvmlockd, check for errors, extract
|
||||||
* the result and lockd_flags returned by lvmlockd.
|
* the result and lockd_flags returned by lvmlockd.
|
||||||
@ -146,10 +167,11 @@ static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_fla
|
|||||||
*/
|
*/
|
||||||
#define NO_LOCKD_RESULT (-1000)
|
#define NO_LOCKD_RESULT (-1000)
|
||||||
|
|
||||||
static int _lockd_result(const char *req_name, daemon_reply reply, int *result, uint32_t *lockd_flags)
|
static int _lockd_result(struct cmd_context *cmd, const char *req_name, daemon_reply reply,
|
||||||
|
int *result, uint32_t *lockd_flags, struct owner *owner)
|
||||||
{
|
{
|
||||||
int reply_result;
|
int reply_result;
|
||||||
const char *flags_str = NULL;
|
const char *str;
|
||||||
|
|
||||||
*result = -1;
|
*result = -1;
|
||||||
|
|
||||||
@ -172,8 +194,15 @@ static int _lockd_result(const char *req_name, daemon_reply reply, int *result,
|
|||||||
*result = reply_result;
|
*result = reply_result;
|
||||||
|
|
||||||
if (lockd_flags) {
|
if (lockd_flags) {
|
||||||
if ((flags_str = daemon_reply_str(reply, "result_flags", NULL)))
|
if ((str = daemon_reply_str(reply, "result_flags", NULL)))
|
||||||
_flags_str_to_lockd_flags(flags_str, lockd_flags);
|
_flags_str_to_lockd_flags(str, lockd_flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (owner) {
|
||||||
|
owner->host_id = (uint32_t)daemon_reply_int(reply, "owner_host_id", 0);
|
||||||
|
owner->generation = (uint32_t)daemon_reply_int(reply, "owner_generation", 0);
|
||||||
|
if ((str = daemon_reply_str(reply, "owner_name", "none")))
|
||||||
|
owner->name = dm_pool_strdup(cmd->mem, str);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("lockd %s result: %d", req_name, reply_result);
|
log_debug("lockd %s result: %d", req_name, reply_result);
|
||||||
@ -389,7 +418,8 @@ static int _lockd_request(struct cmd_context *cmd,
|
|||||||
const char *opts,
|
const char *opts,
|
||||||
const struct lvmlockd_pvs *lock_pvs,
|
const struct lvmlockd_pvs *lock_pvs,
|
||||||
int *result,
|
int *result,
|
||||||
uint32_t *lockd_flags)
|
uint32_t *lockd_flags,
|
||||||
|
struct owner *owner)
|
||||||
{
|
{
|
||||||
const char *cmd_name = get_cmd_name();
|
const char *cmd_name = get_cmd_name();
|
||||||
daemon_reply reply;
|
daemon_reply reply;
|
||||||
@ -426,7 +456,7 @@ static int _lockd_request(struct cmd_context *cmd,
|
|||||||
"lv_lock_args = %s", lv_lock_args ?: "none",
|
"lv_lock_args = %s", lv_lock_args ?: "none",
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result(req_name, reply, result, lockd_flags))
|
if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -446,7 +476,7 @@ static int _lockd_request(struct cmd_context *cmd,
|
|||||||
"vg_lock_args = %s", vg_lock_args ?: "none",
|
"vg_lock_args = %s", vg_lock_args ?: "none",
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result(req_name, reply, result, lockd_flags))
|
if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -464,7 +494,7 @@ static int _lockd_request(struct cmd_context *cmd,
|
|||||||
"vg_lock_type = %s", vg_lock_type ?: "none",
|
"vg_lock_type = %s", vg_lock_type ?: "none",
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result(req_name, reply, result, lockd_flags))
|
if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
log_debug("lockd %s %s result %d %x",
|
log_debug("lockd %s %s result %d %x",
|
||||||
@ -735,7 +765,7 @@ static int _handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
|
|||||||
"lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes,
|
"lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("find_free_lock", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "find_free_lock", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -790,7 +820,7 @@ static int _init_vg(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
"vg_lock_type = %s", lock_type,
|
"vg_lock_type = %s", lock_type,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("init_vg", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
result = -ELOCKD;
|
result = -ELOCKD;
|
||||||
} else {
|
} else {
|
||||||
@ -987,7 +1017,7 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in
|
|||||||
"opts = %s", opts ?: "none",
|
"opts = %s", opts ?: "none",
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("init_vg", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
result = -ELOCKD;
|
result = -ELOCKD;
|
||||||
} else {
|
} else {
|
||||||
@ -1089,7 +1119,7 @@ static int _free_vg(struct cmd_context *cmd, struct volume_group *vg)
|
|||||||
"vg_lock_args = %s", vg->lock_args,
|
"vg_lock_args = %s", vg->lock_args,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("free_vg", reply, &result, &lockd_flags)) {
|
if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -1143,7 +1173,7 @@ static int _busy_vg(struct cmd_context *cmd, struct volume_group *vg)
|
|||||||
"vg_lock_args = %s", vg->lock_args,
|
"vg_lock_args = %s", vg->lock_args,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("busy_vg", reply, &result, &lockd_flags)) {
|
if (!_lockd_result(cmd, "busy_vg", reply, &result, &lockd_flags, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -1217,7 +1247,7 @@ static int _free_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg)
|
|||||||
"vg_lock_args = %s", vg->lock_args,
|
"vg_lock_args = %s", vg->lock_args,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("free_vg", reply, &result, &lockd_flags)) {
|
if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -1497,7 +1527,7 @@ int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists
|
|||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_lockd_result("start_vg", reply, &result, &lockd_flags)) {
|
if (!_lockd_result(cmd, "start_vg", reply, &result, &lockd_flags, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
result = -ELOCKD;
|
result = -ELOCKD;
|
||||||
} else {
|
} else {
|
||||||
@ -1566,7 +1596,7 @@ int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg)
|
|||||||
"vg_name = %s", vg->name,
|
"vg_name = %s", vg->name,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("stop_vg", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "stop_vg", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -1612,7 +1642,7 @@ int lockd_start_wait(struct cmd_context *cmd)
|
|||||||
"pid = " FMTd64, (int64_t) getpid(),
|
"pid = " FMTd64, (int64_t) getpid(),
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("start_wait", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "start_wait", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -1683,6 +1713,7 @@ int lockd_start_wait(struct cmd_context *cmd)
|
|||||||
|
|
||||||
int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const char *vg_lock_type)
|
int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const char *vg_lock_type)
|
||||||
{
|
{
|
||||||
|
struct owner owner = { 0 };
|
||||||
const char *mode = NULL;
|
const char *mode = NULL;
|
||||||
uint32_t lockd_flags;
|
uint32_t lockd_flags;
|
||||||
int retries = 0;
|
int retries = 0;
|
||||||
@ -1730,15 +1761,18 @@ int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const cha
|
|||||||
req:
|
req:
|
||||||
if (!_lockd_request(cmd, "lock_gl",
|
if (!_lockd_request(cmd, "lock_gl",
|
||||||
NULL, vg_lock_type, NULL, NULL, NULL, NULL, mode, NULL,
|
NULL, vg_lock_type, NULL, NULL, NULL, NULL, mode, NULL,
|
||||||
NULL, &result, &lockd_flags)) {
|
NULL, &result, &lockd_flags, &owner)) {
|
||||||
/* No result from lvmlockd, it is probably not running. */
|
/* No result from lvmlockd, it is probably not running. */
|
||||||
log_error("Global lock failed: check that lvmlockd is running.");
|
log_error("Global lock failed: check that lvmlockd is running.");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result == -EAGAIN) {
|
if (result == -EAGAIN || result == -EIOTIMEOUT) {
|
||||||
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
||||||
log_warn("Retrying %s global lock", mode);
|
if (result == -EIOTIMEOUT)
|
||||||
|
log_warn("Retrying global lock: io timeout");
|
||||||
|
else
|
||||||
|
log_warn("Retrying global lock: held by other host%s", _owner_str(&owner));
|
||||||
sleep(1);
|
sleep(1);
|
||||||
retries++;
|
retries++;
|
||||||
goto req;
|
goto req;
|
||||||
@ -1821,8 +1855,10 @@ int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const cha
|
|||||||
if (result < 0) {
|
if (result < 0) {
|
||||||
if (result == -ESTARTING)
|
if (result == -ESTARTING)
|
||||||
log_error("Global lock failed: lockspace is starting.");
|
log_error("Global lock failed: lockspace is starting.");
|
||||||
|
else if (result == -EIOTIMEOUT)
|
||||||
|
log_error("Global lock failed: io timeout");
|
||||||
else if (result == -EAGAIN)
|
else if (result == -EAGAIN)
|
||||||
log_error("Global lock failed: held by other host.");
|
log_error("Global lock failed: held by other host%s", _owner_str(&owner));
|
||||||
else if (result == -EPROTONOSUPPORT)
|
else if (result == -EPROTONOSUPPORT)
|
||||||
log_error("VG create failed: lock manager %s is not supported by lvmlockd.", vg_lock_type);
|
log_error("VG create failed: lock manager %s is not supported by lvmlockd.", vg_lock_type);
|
||||||
else
|
else
|
||||||
@ -1925,6 +1961,7 @@ out:
|
|||||||
|
|
||||||
int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
||||||
{
|
{
|
||||||
|
struct owner owner = { 0 };
|
||||||
const char *mode = NULL;
|
const char *mode = NULL;
|
||||||
const char *opts = NULL;
|
const char *opts = NULL;
|
||||||
uint32_t lockd_flags;
|
uint32_t lockd_flags;
|
||||||
@ -1976,7 +2013,7 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
|
|
||||||
if (!_lockd_request(cmd, "lock_gl",
|
if (!_lockd_request(cmd, "lock_gl",
|
||||||
NULL, NULL, NULL, NULL, NULL, NULL, mode, opts,
|
NULL, NULL, NULL, NULL, NULL, NULL, mode, opts,
|
||||||
NULL, &result, &lockd_flags)) {
|
NULL, &result, &lockd_flags, &owner)) {
|
||||||
/* No result from lvmlockd, it is probably not running. */
|
/* No result from lvmlockd, it is probably not running. */
|
||||||
|
|
||||||
/* We don't care if an unlock fails. */
|
/* We don't care if an unlock fails. */
|
||||||
@ -1993,9 +2030,12 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result == -EAGAIN) {
|
if (result == -EAGAIN || result == -EIOTIMEOUT) {
|
||||||
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
||||||
log_warn("Retrying %s global lock", mode);
|
if (result == -EIOTIMEOUT)
|
||||||
|
log_warn("Retrying global lock: io timeout");
|
||||||
|
else
|
||||||
|
log_warn("Retrying global lock: held by other host%s", _owner_str(&owner));
|
||||||
sleep(1);
|
sleep(1);
|
||||||
retries++;
|
retries++;
|
||||||
goto req;
|
goto req;
|
||||||
@ -2038,10 +2078,12 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
result == -ESTARTING ||
|
result == -ESTARTING ||
|
||||||
result == -EVGKILLED ||
|
result == -EVGKILLED ||
|
||||||
result == -ELOCKIO ||
|
result == -ELOCKIO ||
|
||||||
|
result == -EIOTIMEOUT ||
|
||||||
result == -ELMERR ||
|
result == -ELMERR ||
|
||||||
result == -EORPHAN ||
|
result == -EORPHAN ||
|
||||||
result == -EADOPT_RETRY ||
|
result == -EADOPT_RETRY ||
|
||||||
result == -EADOPT_NONE) {
|
result == -EADOPT_NONE ||
|
||||||
|
result == -EAGAIN) {
|
||||||
/*
|
/*
|
||||||
* If an ex global lock fails, then the command fails.
|
* If an ex global lock fails, then the command fails.
|
||||||
*/
|
*/
|
||||||
@ -2052,6 +2094,8 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
log_error("Global lock failed: check that global lockspace is started");
|
log_error("Global lock failed: check that global lockspace is started");
|
||||||
else if (result == -ELOCKIO)
|
else if (result == -ELOCKIO)
|
||||||
log_error("Global lock failed: storage errors for sanlock leases");
|
log_error("Global lock failed: storage errors for sanlock leases");
|
||||||
|
else if (result == -EIOTIMEOUT)
|
||||||
|
log_error("Global lock failed: io timeout");
|
||||||
else if (result == -ELMERR)
|
else if (result == -ELMERR)
|
||||||
log_error("Global lock failed: lock manager error");
|
log_error("Global lock failed: lock manager error");
|
||||||
else if (result == -EVGKILLED)
|
else if (result == -EVGKILLED)
|
||||||
@ -2062,6 +2106,8 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
log_error("Global lock failed: adopt found no orphan");
|
log_error("Global lock failed: adopt found no orphan");
|
||||||
else if (result == -EADOPT_RETRY)
|
else if (result == -EADOPT_RETRY)
|
||||||
log_error("Global lock failed: adopt found other mode");
|
log_error("Global lock failed: adopt found other mode");
|
||||||
|
else if (result == -EAGAIN)
|
||||||
|
log_error("Global lock failed: held by other host%s", _owner_str(&owner));
|
||||||
else
|
else
|
||||||
log_error("Global lock failed: error %d", result);
|
log_error("Global lock failed: error %d", result);
|
||||||
|
|
||||||
@ -2085,6 +2131,11 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
goto allow;
|
goto allow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (result == -EIOTIMEOUT) {
|
||||||
|
log_warn("Skipping global lock: io timeout");
|
||||||
|
goto allow;
|
||||||
|
}
|
||||||
|
|
||||||
if ((lockd_flags & LD_RF_NO_GL_LS) && (lockd_flags & LD_RF_WARN_GL_REMOVED)) {
|
if ((lockd_flags & LD_RF_NO_GL_LS) && (lockd_flags & LD_RF_WARN_GL_REMOVED)) {
|
||||||
log_warn("Skipping global lock: VG with global lock was removed");
|
log_warn("Skipping global lock: VG with global lock was removed");
|
||||||
goto allow;
|
goto allow;
|
||||||
@ -2110,12 +2161,16 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
goto allow;
|
goto allow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (result == -EAGAIN) {
|
||||||
|
log_warn("Skipping global lock: held by other host%s", _owner_str(&owner));
|
||||||
|
goto allow;
|
||||||
|
}
|
||||||
|
|
||||||
if ((lockd_flags & LD_RF_NO_GL_LS) || (lockd_flags & LD_RF_NO_LOCKSPACES)) {
|
if ((lockd_flags & LD_RF_NO_GL_LS) || (lockd_flags & LD_RF_NO_LOCKSPACES)) {
|
||||||
log_debug("Skipping global lock: lockspace not found or started");
|
log_debug("Skipping global lock: lockspace not found or started");
|
||||||
goto allow;
|
goto allow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is for completeness. If we reach here, then
|
* This is for completeness. If we reach here, then
|
||||||
* a specific check for the error should be added above
|
* a specific check for the error should be added above
|
||||||
@ -2129,21 +2184,13 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
log_warn("Duplicate sanlock global locks should be corrected");
|
log_warn("Duplicate sanlock global locks should be corrected");
|
||||||
|
|
||||||
if (result < 0) {
|
if (result < 0) {
|
||||||
if (result == -EAGAIN) {
|
/*
|
||||||
/*
|
* We don't intend to reach this. We should check
|
||||||
* Most of the time, retries should avoid this case.
|
* any known/possible error specifically and print
|
||||||
*/
|
* a more helpful message. This is for completeness.
|
||||||
log_error("Global lock failed: held by other host.");
|
*/
|
||||||
return 0;
|
log_error("Global lock failed: error %d.", result);
|
||||||
} else {
|
return 0;
|
||||||
/*
|
|
||||||
* We don't intend to reach this. We should check
|
|
||||||
* any known/possible error specifically and print
|
|
||||||
* a more helpful message. This is for completeness.
|
|
||||||
*/
|
|
||||||
log_error("Global lock failed: error %d.", result);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
allow:
|
allow:
|
||||||
@ -2194,6 +2241,7 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
|
|||||||
int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
||||||
uint32_t flags, uint32_t *lockd_state)
|
uint32_t flags, uint32_t *lockd_state)
|
||||||
{
|
{
|
||||||
|
struct owner owner = { 0 };
|
||||||
const char *mode = NULL;
|
const char *mode = NULL;
|
||||||
const char *opts = NULL;
|
const char *opts = NULL;
|
||||||
uint32_t lockd_flags;
|
uint32_t lockd_flags;
|
||||||
@ -2293,7 +2341,7 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
|||||||
|
|
||||||
if (!_lockd_request(cmd, "lock_vg",
|
if (!_lockd_request(cmd, "lock_vg",
|
||||||
vg_name, NULL, NULL, NULL, NULL, NULL, mode, opts,
|
vg_name, NULL, NULL, NULL, NULL, NULL, mode, opts,
|
||||||
NULL, &result, &lockd_flags)) {
|
NULL, &result, &lockd_flags, &owner)) {
|
||||||
/*
|
/*
|
||||||
* No result from lvmlockd, it is probably not running.
|
* No result from lvmlockd, it is probably not running.
|
||||||
* Decide if it is ok to continue without a lock in
|
* Decide if it is ok to continue without a lock in
|
||||||
@ -2305,9 +2353,12 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result == -EAGAIN) {
|
if (result == -EAGAIN || result == -EIOTIMEOUT) {
|
||||||
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
|
||||||
log_warn("Retrying %s lock on VG %s", mode, vg_name);
|
if (result == -EIOTIMEOUT)
|
||||||
|
log_warn("Retrying lock on VG %s: io timeout", vg_name);
|
||||||
|
else
|
||||||
|
log_warn("Retrying lock on VG %s: held by other host%s", vg_name, _owner_str(&owner));
|
||||||
sleep(1);
|
sleep(1);
|
||||||
retries++;
|
retries++;
|
||||||
goto req;
|
goto req;
|
||||||
@ -2381,6 +2432,19 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (result == -EIOTIMEOUT) {
|
||||||
|
if (!strcmp(mode, "un"))
|
||||||
|
goto out;
|
||||||
|
else if (!strcmp(mode, "sh")) {
|
||||||
|
log_warn("VG %s lock skipped: io timeout", vg_name);
|
||||||
|
goto out;
|
||||||
|
} else {
|
||||||
|
log_error("VG %s lock failed: io timeout", vg_name);
|
||||||
|
ret = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The lock is held by another host, and retries have been unsuccessful.
|
* The lock is held by another host, and retries have been unsuccessful.
|
||||||
*/
|
*/
|
||||||
@ -2388,10 +2452,10 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
|
|||||||
if (!strcmp(mode, "un"))
|
if (!strcmp(mode, "un"))
|
||||||
goto out;
|
goto out;
|
||||||
else if (!strcmp(mode, "sh")) {
|
else if (!strcmp(mode, "sh")) {
|
||||||
log_warn("VG %s lock skipped: held by other host.", vg_name);
|
log_warn("VG %s lock skipped: held by other host%s", vg_name, _owner_str(&owner));
|
||||||
goto out;
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
log_error("VG %s lock failed: held by other host.", vg_name);
|
log_error("VG %s lock failed: held by other host%s", vg_name, _owner_str(&owner));
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -2522,7 +2586,7 @@ int lockd_vg_update(struct volume_group *vg)
|
|||||||
"version = " FMTd64, (int64_t) vg->seqno,
|
"version = " FMTd64, (int64_t) vg->seqno,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("vg_update", reply, &result, NULL)) {
|
if (!_lockd_result(vg->cmd, "vg_update", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -2554,7 +2618,7 @@ static int _query_lv(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
"lv_lock_args = %s", lock_args ?: "none",
|
"lv_lock_args = %s", lock_args ?: "none",
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("query_lock_lv", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "query_lock_lv", reply, &result, NULL, NULL)) {
|
||||||
/* No result from lvmlockd, it is probably not running. */
|
/* No result from lvmlockd, it is probably not running. */
|
||||||
log_error("Lock query failed for LV %s/%s", vg->name, lv_name);
|
log_error("Lock query failed for LV %s/%s", vg->name, lv_name);
|
||||||
return 0;
|
return 0;
|
||||||
@ -2621,6 +2685,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
const char *lv_name, struct id *lv_id,
|
const char *lv_name, struct id *lv_id,
|
||||||
const char *lock_args, const char *def_mode, uint32_t flags)
|
const char *lock_args, const char *def_mode, uint32_t flags)
|
||||||
{
|
{
|
||||||
|
struct owner owner = { 0 };
|
||||||
char lv_uuid[64] __attribute__((aligned(8)));
|
char lv_uuid[64] __attribute__((aligned(8)));
|
||||||
char opt_buf[64] = {};
|
char opt_buf[64] = {};
|
||||||
const char *opts = NULL;
|
const char *opts = NULL;
|
||||||
@ -2718,7 +2783,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
if (!_lockd_request(cmd, "lock_lv",
|
if (!_lockd_request(cmd, "lock_lv",
|
||||||
vg->name, vg->lock_type, vg->lock_args,
|
vg->name, vg->lock_type, vg->lock_args,
|
||||||
lv_name, lv_uuid, lock_args, mode, opts,
|
lv_name, lv_uuid, lock_args, mode, opts,
|
||||||
&lock_pvs, &result, &lockd_flags)) {
|
&lock_pvs, &result, &lockd_flags, NULL)) {
|
||||||
_lockd_free_pv_list(&lock_pvs);
|
_lockd_free_pv_list(&lock_pvs);
|
||||||
/* No result from lvmlockd, it is probably not running. */
|
/* No result from lvmlockd, it is probably not running. */
|
||||||
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
|
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
|
||||||
@ -2729,7 +2794,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
if (!_lockd_request(cmd, "lock_lv",
|
if (!_lockd_request(cmd, "lock_lv",
|
||||||
vg->name, vg->lock_type, vg->lock_args,
|
vg->name, vg->lock_type, vg->lock_args,
|
||||||
lv_name, lv_uuid, lock_args, mode, opts,
|
lv_name, lv_uuid, lock_args, mode, opts,
|
||||||
NULL, &result, &lockd_flags)) {
|
NULL, &result, &lockd_flags, &owner)) {
|
||||||
/* No result from lvmlockd, it is probably not running. */
|
/* No result from lvmlockd, it is probably not running. */
|
||||||
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
|
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
|
||||||
return 0;
|
return 0;
|
||||||
@ -2744,7 +2809,12 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if (result == -EAGAIN) {
|
if (result == -EAGAIN) {
|
||||||
log_error("LV locked by other host: %s/%s", vg->name, lv_name);
|
log_error("LV locked by other host: %s/%s%s", vg->name, lv_name, _owner_str(&owner));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == -EIOTIMEOUT) {
|
||||||
|
log_error("LV %s/%s lock failed: io timeout.", vg->name, lv_name);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3649,7 +3719,7 @@ static int _init_lv_sanlock(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
"vg_lock_args = %s", vg->lock_args,
|
"vg_lock_args = %s", vg->lock_args,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("init_lv", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "init_lv", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -3724,7 +3794,7 @@ static int _free_lv(struct cmd_context *cmd, struct volume_group *vg,
|
|||||||
"lv_lock_args = %s", lock_args ?: "none",
|
"lv_lock_args = %s", lock_args ?: "none",
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("free_lv", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "free_lv", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -3989,7 +4059,7 @@ int lockd_rename_vg_before(struct cmd_context *cmd, struct volume_group *vg)
|
|||||||
"vg_lock_args = %s", vg->lock_args,
|
"vg_lock_args = %s", vg->lock_args,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("rename_vg_before", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "rename_vg_before", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -4054,7 +4124,7 @@ int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int
|
|||||||
"vg_lock_args = %s", vg->lock_args,
|
"vg_lock_args = %s", vg->lock_args,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("rename_vg_final", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "rename_vg_final", reply, &result, NULL, NULL)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
ret = (result < 0) ? 0 : 1;
|
ret = (result < 0) ? 0 : 1;
|
||||||
@ -4095,7 +4165,7 @@ const char *lockd_running_lock_type(struct cmd_context *cmd, int *found_multiple
|
|||||||
"pid = " FMTd64, (int64_t) getpid(),
|
"pid = " FMTd64, (int64_t) getpid(),
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("running_lm", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "running_lm", reply, &result, NULL, NULL)) {
|
||||||
log_error("Failed to get result from lvmlockd");
|
log_error("Failed to get result from lvmlockd");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -4216,7 +4286,7 @@ int lockd_lv_refresh(struct cmd_context *cmd, struct lvresize_params *lp)
|
|||||||
"path = %s", path,
|
"path = %s", path,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (!_lockd_result("refresh_lv", reply, &result, NULL)) {
|
if (!_lockd_result(cmd, "refresh_lv", reply, &result, NULL, NULL)) {
|
||||||
/* No result from lvmlockd, it is probably not running. */
|
/* No result from lvmlockd, it is probably not running. */
|
||||||
log_error("LV refresh failed for LV %s", path);
|
log_error("LV refresh failed for LV %s", path);
|
||||||
return 0;
|
return 0;
|
||||||
|
Reference in New Issue
Block a user