1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-04 09:18:36 +03:00

lvmlockd: handle loss of sanlock lease storage

This adds the infrastructure, code paths, error reporting,
etc. to handle storage errors, or storage loss, under the
sanlock leases in a VG that is being used.  The loss of
storage means sanlock cannot renew its leases, which means
that the host needs to stop using the shared VG before its
leases expire.

This still requires manually shutting down a VG that has
lost lease storage, e.g. unmounting file systems,
deactivating LVs in the VG.  The next step is to
automatically use a command like blkdeactivate to do that.
This commit is contained in:
David Teigland 2015-07-31 13:38:38 -05:00
parent 559ca8bc65
commit fd1782b5fc
7 changed files with 415 additions and 85 deletions

View File

@ -17,6 +17,7 @@
#include <signal.h> #include <signal.h>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <syslog.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/un.h> #include <sys/un.h>
@ -26,14 +27,16 @@ static int info = 0;
static int dump = 0; static int dump = 0;
static int wait_opt = 0; static int wait_opt = 0;
static int force_opt = 0; static int force_opt = 0;
static int kill_vg = 0;
static int drop_vg = 0;
static int gl_enable = 0; static int gl_enable = 0;
static int gl_disable = 0; static int gl_disable = 0;
static int stop_lockspaces = 0; static int stop_lockspaces = 0;
static char *able_vg_name = NULL; static char *arg_vg_name = NULL;
#define DUMP_SOCKET_NAME "lvmlockd-dump.sock" #define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
#define DUMP_BUF_SIZE (1024 * 1024) #define DUMP_BUF_SIZE (1024 * 1024)
static char dump_buf[DUMP_BUF_SIZE]; static char dump_buf[DUMP_BUF_SIZE+1];
static int dump_len; static int dump_len;
static struct sockaddr_un dump_addr; static struct sockaddr_un dump_addr;
static socklen_t dump_addrlen; static socklen_t dump_addrlen;
@ -446,9 +449,9 @@ static int do_able(const char *req_name)
int rv; int rv;
reply = _lvmlockd_send(req_name, reply = _lvmlockd_send(req_name,
"cmd = %s", "lvmlock", "cmd = %s", "lvmlockctl",
"pid = %d", getpid(), "pid = %d", getpid(),
"vg_name = %s", able_vg_name, "vg_name = %s", arg_vg_name,
NULL); NULL);
if (!_lvmlockd_result(reply, &result)) { if (!_lvmlockd_result(reply, &result)) {
@ -477,7 +480,7 @@ static int do_stop_lockspaces(void)
strcat(opts, "force "); strcat(opts, "force ");
reply = _lvmlockd_send("stop_all", reply = _lvmlockd_send("stop_all",
"cmd = %s", "lvmlock", "cmd = %s", "lvmlockctl",
"pid = %d", getpid(), "pid = %d", getpid(),
"opts = %s", opts[0] ? opts : "none", "opts = %s", opts[0] ? opts : "none",
NULL); NULL);
@ -493,6 +496,87 @@ static int do_stop_lockspaces(void)
return rv; return rv;
} }
static int do_kill(void)
{
daemon_reply reply;
int result;
int rv;
syslog(LOG_EMERG, "Lost access to sanlock lease storage in VG %s.", arg_vg_name);
/* These two lines explain the manual alternative to the FIXME below. */
syslog(LOG_EMERG, "Immediately deactivate LVs in VG %s.", arg_vg_name);
syslog(LOG_EMERG, "Once VG is unused, run lvmlockctl --drop %s.", arg_vg_name);
/*
* It may not be strictly necessary to notify lvmlockd of the kill, but
* lvmlockd can use this information to avoid attempting any new lock
* requests in the VG (which would fail anyway), and can return an
* error indicating that the VG has been killed.
*/
reply = _lvmlockd_send("kill_vg",
"cmd = %s", "lvmlockctl",
"pid = %d", getpid(),
"vg_name = %s", arg_vg_name,
NULL);
if (!_lvmlockd_result(reply, &result)) {
log_error("lvmlockd result %d", result);
rv = result;
} else {
rv = 0;
}
daemon_reply_destroy(reply);
/*
* FIXME: here is where we should implement a strong form of
* blkdeactivate, and if it completes successfully, automatically call
* do_drop() afterward. (The drop step may not always be necessary
* if the lvm commands run while shutting things down release all the
* leases.)
*
* run_strong_blkdeactivate();
* do_drop();
*/
return rv;
}
static int do_drop(void)
{
daemon_reply reply;
int result;
int rv;
syslog(LOG_WARNING, "Dropping locks for VG %s.", arg_vg_name);
/*
* Check for misuse by looking for any active LVs in the VG
* and refusing this operation if found? One possible way
* to kill LVs (e.g. if fs cannot be unmounted) is to suspend
* them, or replace them with the error target. In that
* case the LV will still appear to be active, but it is
* safe to release the lock.
*/
reply = _lvmlockd_send("drop_vg",
"cmd = %s", "lvmlockctl",
"pid = %d", getpid(),
"vg_name = %s", arg_vg_name,
NULL);
if (!_lvmlockd_result(reply, &result)) {
log_error("lvmlockd result %d", result);
rv = result;
} else {
rv = 0;
}
daemon_reply_destroy(reply);
return rv;
}
static void print_usage(void) static void print_usage(void)
{ {
printf("lvmlockctl options\n"); printf("lvmlockctl options\n");
@ -509,12 +593,16 @@ static void print_usage(void)
printf(" Wait option for other commands.\n"); printf(" Wait option for other commands.\n");
printf("--force | -f 0|1>\n"); printf("--force | -f 0|1>\n");
printf(" Force option for other commands.\n"); printf(" Force option for other commands.\n");
printf("--stop-lockspaces | -S\n"); printf("--kill | -k <vg_name>\n");
printf(" Stop all lockspaces.\n"); printf(" Kill access to the vg when sanlock cannot renew lease.\n");
printf("--drop | -r <vg_name>\n");
printf(" Clear locks for the vg after it has been killed and is no longer used.\n");
printf("--gl-enable <vg_name>\n"); printf("--gl-enable <vg_name>\n");
printf(" Tell lvmlockd to enable the global lock in a sanlock vg.\n"); printf(" Tell lvmlockd to enable the global lock in a sanlock vg.\n");
printf("--gl-disable <vg_name>\n"); printf("--gl-disable <vg_name>\n");
printf(" Tell lvmlockd to disable the global lock in a sanlock vg.\n"); printf(" Tell lvmlockd to disable the global lock in a sanlock vg.\n");
printf("--stop-lockspaces | -S\n");
printf(" Stop all lockspaces.\n");
} }
static int read_options(int argc, char *argv[]) static int read_options(int argc, char *argv[])
@ -529,6 +617,8 @@ static int read_options(int argc, char *argv[])
{"dump", no_argument, 0, 'd' }, {"dump", no_argument, 0, 'd' },
{"wait", required_argument, 0, 'w' }, {"wait", required_argument, 0, 'w' },
{"force", required_argument, 0, 'f' }, {"force", required_argument, 0, 'f' },
{"kill", required_argument, 0, 'k' },
{"drop", required_argument, 0, 'r' },
{"gl-enable", required_argument, 0, 'E' }, {"gl-enable", required_argument, 0, 'E' },
{"gl-disable", required_argument, 0, 'D' }, {"gl-disable", required_argument, 0, 'D' },
{"stop-lockspaces", no_argument, 0, 'S' }, {"stop-lockspaces", no_argument, 0, 'S' },
@ -541,7 +631,7 @@ static int read_options(int argc, char *argv[])
} }
while (1) { while (1) {
c = getopt_long(argc, argv, "hqidE:D:w:S", long_options, &option_index); c = getopt_long(argc, argv, "hqidE:D:w:k:r:S", long_options, &option_index);
if (c == -1) if (c == -1)
break; break;
@ -565,13 +655,21 @@ static int read_options(int argc, char *argv[])
case 'w': case 'w':
wait_opt = atoi(optarg); wait_opt = atoi(optarg);
break; break;
case 'k':
kill_vg = 1;
arg_vg_name = strdup(optarg);
break;
case 'r':
drop_vg = 1;
arg_vg_name = strdup(optarg);
break;
case 'E': case 'E':
gl_enable = 1; gl_enable = 1;
able_vg_name = strdup(optarg); arg_vg_name = strdup(optarg);
break; break;
case 'D': case 'D':
gl_disable = 1; gl_disable = 1;
able_vg_name = strdup(optarg); arg_vg_name = strdup(optarg);
break; break;
case 'S': case 'S':
stop_lockspaces = 1; stop_lockspaces = 1;
@ -616,6 +714,16 @@ int main(int argc, char **argv)
goto out; goto out;
} }
if (kill_vg) {
rv = do_kill();
goto out;
}
if (drop_vg) {
rv = do_drop();
goto out;
}
if (gl_enable) { if (gl_enable) {
rv = do_able("enable_gl"); rv = do_able("enable_gl");
goto out; goto out;

View File

@ -45,5 +45,7 @@ static inline void lvmlockd_close(daemon_handle h)
#define EMANAGER 214 #define EMANAGER 214
#define EPREPARE 215 #define EPREPARE 215
#define ELOCKD 216 #define ELOCKD 216
#define EVGKILLED 217 /* sanlock lost access to leases and VG is killed. */
#define ELOCKIO 218 /* sanlock io errors during lock op, may be transient. */
#endif /* _LVM_LVMLOCKD_CLIENT_H */ #endif /* _LVM_LVMLOCKD_CLIENT_H */

View File

@ -735,6 +735,10 @@ static const char *op_str(int x)
return "find_free_lock"; return "find_free_lock";
case LD_OP_FORGET_VG_NAME: case LD_OP_FORGET_VG_NAME:
return "forget_vg_name"; return "forget_vg_name";
case LD_OP_KILL_VG:
return "kill_vg";
case LD_OP_DROP_VG:
return "drop_vg";
default: default:
return "op_unknown"; return "op_unknown";
}; };
@ -786,6 +790,7 @@ int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsi
char *major_str, *minor_str, *patch_str; char *major_str, *minor_str, *patch_str;
char *n, *d1, *d2; char *n, *d1, *d2;
memset(version, 0, sizeof(version));
strncpy(version, args, MAX_ARGS); strncpy(version, args, MAX_ARGS);
version[MAX_ARGS] = '\0'; version[MAX_ARGS] = '\0';
@ -1828,7 +1833,7 @@ static int for_each_lock(struct lockspace *ls, int locks_do)
return 0; return 0;
} }
static int clear_locks(struct lockspace *ls, int free_vg) static int clear_locks(struct lockspace *ls, int free_vg, int drop_vg)
{ {
struct resource *r, *r_safe; struct resource *r, *r_safe;
struct lock *lk, *lk_safe; struct lock *lk, *lk_safe;
@ -1847,10 +1852,10 @@ static int clear_locks(struct lockspace *ls, int free_vg)
/* /*
* Stopping a lockspace shouldn't happen with LV locks * Stopping a lockspace shouldn't happen with LV locks
* still held, but it will be stopped with GL and VG * still held, but it will be stopped with GL and VG
* locks held. * locks held. The drop_vg case may see LV locks.
*/ */
if (lk->flags & LD_LF_PERSISTENT) if (lk->flags & LD_LF_PERSISTENT && !drop_vg)
log_error("S %s R %s clear lock persistent", ls->name, r->name); log_error("S %s R %s clear lock persistent", ls->name, r->name);
else else
log_debug("S %s R %s clear lock mode %s client %d", ls->name, r->name, mode_str(lk->mode), lk->client_id); log_debug("S %s R %s clear lock mode %s client %d", ls->name, r->name, mode_str(lk->mode), lk->client_id);
@ -1884,8 +1889,8 @@ static int clear_locks(struct lockspace *ls, int free_vg)
rv = lm_unlock(ls, r, NULL, r_version, free_vg ? LMUF_FREE_VG : 0); rv = lm_unlock(ls, r, NULL, r_version, free_vg ? LMUF_FREE_VG : 0);
if (rv < 0) { if (rv < 0) {
/* should never happen */ /* should never happen */
log_error("S %s R %s clear_locks free %d lm unlock error %d", log_error("S %s R %s clear_locks free %d drop %d lm unlock error %d",
ls->name, r->name, free_vg, rv); ls->name, r->name, free_vg, drop_vg, rv);
} }
list_for_each_entry_safe(act, act_safe, &r->actions, list) { list_for_each_entry_safe(act, act_safe, &r->actions, list) {
@ -1990,6 +1995,28 @@ static int other_sanlock_vgs_exist(struct lockspace *ls_rem)
return 0; return 0;
} }
/*
* LOCK is the main thing we're interested in; the others are unlikely.
*/
static int process_op_during_kill(struct action *act)
{
if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN)
return 1;
switch (act->op) {
case LD_OP_LOCK:
case LD_OP_ENABLE:
case LD_OP_DISABLE:
case LD_OP_UPDATE:
case LD_OP_RENAME_BEFORE:
case LD_OP_RENAME_FINAL:
case LD_OP_FIND_FREE_LOCK:
return 0;
};
return 1;
}
/* /*
* Process actions queued for this lockspace by * Process actions queued for this lockspace by
* client_recv_action / add_lock_action. * client_recv_action / add_lock_action.
@ -2010,6 +2037,7 @@ static void *lockspace_thread_main(void *arg_in)
struct list_head tmp_act; struct list_head tmp_act;
struct list_head act_close; struct list_head act_close;
int free_vg = 0; int free_vg = 0;
int drop_vg = 0;
int error = 0; int error = 0;
int adopt_flag = 0; int adopt_flag = 0;
int wait_flag = 0; int wait_flag = 0;
@ -2114,7 +2142,43 @@ static void *lockspace_thread_main(void *arg_in)
act = list_first_entry(&ls->actions, struct action, list); act = list_first_entry(&ls->actions, struct action, list);
if (act->op == LD_OP_KILL_VG && act->rt == LD_RT_VG) {
/* Continue processing until DROP_VG arrives. */
log_debug("S %s kill_vg", ls->name);
ls->kill_vg = 1;
list_del(&act->list);
act->result = 0;
add_client_result(act);
continue;
}
if (ls->kill_vg && !process_op_during_kill(act)) {
log_debug("S %s disallow op %s after kill_vg", ls->name, op_str(act->op));
list_del(&act->list);
act->result = -EVGKILLED;
add_client_result(act);
continue;
}
if (act->op == LD_OP_DROP_VG && act->rt == LD_RT_VG) {
/*
* If leases are released after i/o errors begin
* but before lvmlockctl --kill, then the VG is not
* killed, but drop is still needed to clean up the
* VG, so in that case there would be a drop op without
* a preceding kill op.
*/
if (!ls->kill_vg)
log_debug("S %s received drop without kill", ls->name);
log_debug("S %s drop_vg", ls->name);
ls->thread_work = 0;
ls->thread_stop = 1;
drop_vg = 1;
break;
}
if (act->op == LD_OP_STOP) { if (act->op == LD_OP_STOP) {
/* thread_stop is already set */
ls->thread_work = 0; ls->thread_work = 0;
break; break;
} }
@ -2238,6 +2302,9 @@ out_rem:
* allowed in emergency/force situations, otherwise it's * allowed in emergency/force situations, otherwise it's
* obviously dangerous, since the lock holders are still * obviously dangerous, since the lock holders are still
* operating under the assumption that they hold the lock. * operating under the assumption that they hold the lock.
* drop_vg drops all existing locks, but should only
* happen when the VG access has been forcibly and
* succesfully terminated.
* *
* For vgremove of a sanlock vg, the vg lock will be held, * For vgremove of a sanlock vg, the vg lock will be held,
* and possibly the gl lock if this vg holds the gl. * and possibly the gl lock if this vg holds the gl.
@ -2246,7 +2313,7 @@ out_rem:
log_debug("S %s clearing locks", ls->name); log_debug("S %s clearing locks", ls->name);
rv = clear_locks(ls, free_vg); rv = clear_locks(ls, free_vg, drop_vg);
/* /*
* Tell any other hosts in the lockspace to leave it * Tell any other hosts in the lockspace to leave it
@ -2284,6 +2351,8 @@ out_act:
act->result = 0; act->result = 0;
} else if (act->op == LD_OP_STOP) } else if (act->op == LD_OP_STOP)
act->result = 0; act->result = 0;
else if (act->op == LD_OP_DROP_VG)
act->result = 0;
else if (act->op == LD_OP_RENAME_BEFORE) else if (act->op == LD_OP_RENAME_BEFORE)
act->result = 0; act->result = 0;
else else
@ -2317,6 +2386,7 @@ out_act:
pthread_mutex_lock(&lockspaces_mutex); pthread_mutex_lock(&lockspaces_mutex);
ls->thread_done = 1; ls->thread_done = 1;
ls->free_vg = free_vg; ls->free_vg = free_vg;
ls->drop_vg = drop_vg;
pthread_mutex_unlock(&lockspaces_mutex); pthread_mutex_unlock(&lockspaces_mutex);
/* /*
@ -3539,7 +3609,6 @@ static int add_lock_action(struct action *act)
if (ls_create_fail) if (ls_create_fail)
act->flags |= LD_AF_ADD_LS_ERROR; act->flags |= LD_AF_ADD_LS_ERROR;
return -ENOLS; return -ENOLS;
} else { } else {
log_debug("lockspace not found %s", ls_name); log_debug("lockspace not found %s", ls_name);
return -ENOLS; return -ENOLS;
@ -3714,6 +3783,16 @@ static int str_to_op_rt(const char *req_name, int *op, int *rt)
*rt = LD_RT_VG; *rt = LD_RT_VG;
return 0; return 0;
} }
if (!strcmp(req_name, "kill_vg")) {
*op = LD_OP_KILL_VG;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "drop_vg")) {
*op = LD_OP_DROP_VG;
*rt = LD_RT_VG;
return 0;
}
out: out:
return -1; return -1;
} }
@ -3864,6 +3943,8 @@ static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, in
"thread_work=%d " "thread_work=%d "
"thread_stop=%d " "thread_stop=%d "
"thread_done=%d " "thread_done=%d "
"kill_vg=%d "
"drop_vg=%d "
"sanlock_gl_enabled=%d\n", "sanlock_gl_enabled=%d\n",
prefix, prefix,
ls->name, ls->name,
@ -3878,6 +3959,8 @@ static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, in
ls->thread_work ? 1 : 0, ls->thread_work ? 1 : 0,
ls->thread_stop ? 1 : 0, ls->thread_stop ? 1 : 0,
ls->thread_done ? 1 : 0, ls->thread_done ? 1 : 0,
ls->kill_vg,
ls->drop_vg,
ls->sanlock_gl_enabled ? 1 : 0); ls->sanlock_gl_enabled ? 1 : 0);
} }
@ -4273,6 +4356,8 @@ static void client_recv_action(struct client *cl)
case LD_OP_FREE: case LD_OP_FREE:
case LD_OP_RENAME_BEFORE: case LD_OP_RENAME_BEFORE:
case LD_OP_FIND_FREE_LOCK: case LD_OP_FIND_FREE_LOCK:
case LD_OP_KILL_VG:
case LD_OP_DROP_VG:
rv = add_lock_action(act); rv = add_lock_action(act);
break; break;
case LD_OP_FORGET_VG_NAME: case LD_OP_FORGET_VG_NAME:

View File

@ -51,6 +51,8 @@ enum {
LD_OP_RUNNING_LM, LD_OP_RUNNING_LM,
LD_OP_FIND_FREE_LOCK, LD_OP_FIND_FREE_LOCK,
LD_OP_FORGET_VG_NAME, LD_OP_FORGET_VG_NAME,
LD_OP_KILL_VG,
LD_OP_DROP_VG,
}; };
/* resource types */ /* resource types */
@ -184,6 +186,8 @@ struct lockspace {
unsigned int sanlock_gl_enabled: 1; unsigned int sanlock_gl_enabled: 1;
unsigned int sanlock_gl_dup: 1; unsigned int sanlock_gl_dup: 1;
unsigned int free_vg: 1; unsigned int free_vg: 1;
unsigned int kill_vg: 1;
unsigned int drop_vg: 1;
struct list_head actions; /* new client actions */ struct list_head actions; /* new client actions */
struct list_head resources; /* resource/lock state for gl/vg/lv */ struct list_head resources; /* resource/lock state for gl/vg/lv */

View File

@ -33,52 +33,101 @@
#include <sys/socket.h> #include <sys/socket.h>
/* /*
* If access to the pv containing the vg's leases is lost, sanlock cannot renew -------------------------------------------------------------------------------
* the leases we have acquired for locked LVs. This means that we could soon For each VG, lvmlockd creates a sanlock lockspace that holds the leases for
* loose the lease to another host which could activate our LV exclusively. We that VG. There's a lease for the VG lock, and there's a lease for each active
* do not want to get to the point of two hosts having the same LV active LV. sanlock maintains (reads/writes) these leases, which exist on storage.
* exclusively (it obviously violates the purpose of LV locks.) That storage is a hidden LV within the VG: /dev/vg/lvmlock. lvmlockd gives the
* path of this internal LV to sanlock, which then reads/writes the leases on it.
* The default method of preventing this problem is for lvmlockd to do nothing,
* which produces a safe but potentially inconvenient result. Doing nothing # lvs -a cc -o+uuid
* leads to our LV leases not being released, which leads to sanlock using the LV VG Attr LSize LV UUID
* local watchdog to reset us before another host can acquire our lock. It lv1 cc -wi-a----- 2.00g 7xoDtu-yvNM-iwQx-C94t-BbYs-UzBl-o8hAIa
* would often be preferrable to avoid the abrupt hard reset from the watchdog. lv2 cc -wi-a----- 100.00g exxNPX-wZdO-uCNy-yiGa-aJGT-JKVl-arfcYT
* [lvmlock] cc -wi-ao---- 256.00m iLpDel-hR0T-hJ3u-rnVo-PcDh-mcjt-sF9egM
* There are other options to avoid being reset by our watchdog. If we can
* quickly stop using the LVs in question and release the locks for them, then # sanlock status
* we could avoid a reset (there's a certain grace period of about 40 seconds s lvm_cc:1:/dev/mapper/cc-lvmlock:0
* in which we can attempt this.) To do this, we can tell sanlock to run a r lvm_cc:exxNPX-wZdO-uCNy-yiGa-aJGT-JKVl-arfcYT:/dev/mapper/cc-lvmlock:71303168:13 p 26099
* specific program when it has lost access to our leases. We could use this r lvm_cc:7xoDtu-yvNM-iwQx-C94t-BbYs-UzBl-o8hAIa:/dev/mapper/cc-lvmlock:70254592:3 p 26099
* program to:
* This shows that sanlock is maintaining leases on /dev/mapper/cc-lvmlock.
* 1. Deactivate all lvs in the effected vg. If all the leases are
* deactivated, then our LV locks would be released and sanlock would no longer sanlock acquires a lockspace lease when the lockspace is joined, i.e. when the
* use the watchdog to reset us. If file systems are mounted on the active VG is started by 'vgchange --lock-start cc'. This lockspace lease exists at
* lvs, then deactivating them would fail, so this option would be of limited /dev/mapper/cc-lvmlock offset 0, and sanlock regularly writes to it to maintain
* usefulness. ownership of it. Joining the lockspace (by acquiring the lockspace lease in
* it) then allows standard resource leases to be acquired in the lockspace for
* 2. Option 1 could be extended to kill pids using the fs on the lv, unmount whatever the application wants. lvmlockd uses resource leases for the VG lock
* the fs, and deactivate the lv. This is probably out of scope for lvm and LV locks.
* directly, and would likely need the help of another system service.
* sanlock acquires a resource lease for each actual lock that lvm commands use.
* 3. Use dmsetup suspend to block access to lvs in the effected vg. If this Above, there are two LV locks that are held because the two LVs are active.
* was successful, the local host could no longer write to the lvs, we could These are on /dev/mapper/cc-lvmlock at offsets 71303168 and 70254592. sanlock
* safely release the LV locks, and sanlock would no longer reset us. At this does not write to these resource leases except when acquiring and releasing
* point, with suspended lvs, the host would be in a fairly hobbled state, and them (e.g. lvchange -ay/-an). The renewal of the lockspace lease maintains
* would almost certainly need a manual, forcible reset. ownership of all the resource leases in the lockspace.
*
* 4. Option 3 could be extended to monitor the lost storage, and if it is If the host loses access to the disk that the sanlock lv lives on, then sanlock
* reconnected, the leases could be reacquired, and the suspended lvs resumed can no longer renew its lockspace lease. The lockspace lease will eventually
* (reacquiring leases will fail if another host has acquired them since they expire, at which point the host will lose ownership of it, and of all resource
* were released.) This complexity of this option, combined with the fact that leases it holds in the lockspace. Eventually, other hosts will be able to
* the error conditions are often not as simple as storage being lost and then acquire those leases. sanlock ensures that another host will not be able to
* later connecting, will result in this option being too unreliable. acquire one of the expired leases until the current host has quit using it.
*
* Add a config option that we could use to select a different behavior than It is important that the host "quit using" the leases it is holding if the
* the default. Then implement one of the simpler options as a proof of sanlock storage is lost and they begin expiring. If the host cannot quit using
* concept, which could be extended if needed. the leases and release them within a limited time, then sanlock will use the
*/ local watchdog to forcibly reset the host before any other host can acquire
them. This is severe, but preferable to possibly corrupting the data protected
by the lease. It ensures that two nodes will not be using the same lease at
once. For LV leases, that means that another host will not be able to activate
the LV while another host still has it active.
sanlock notifies the application that it cannot renew the lockspace lease. The
application needs to quit using all leases in the lockspace and release them as
quickly as possible. In the initial version, lvmlockd ignored this
notification, so sanlock would eventually reach the point where it would use
the local watchdog to reset the host. However, it's better to attempt a
response. If that response succeeds, the host can avoid being reset. If the
response fails, then sanlock will eventually reset the host as the last resort.
sanlock gives the application about 40 seconds to complete its response and
release its leases before resetting the host.
An application can specify the path and args of a program that sanlock should
run to notify it if the lockspace lease cannot be renewed. This program should
carry out the application's response to the expiring leases: attempt to quit
using the leases and then release them. lvmlockd gives this command to sanlock
for each VG when that VG is started: 'lvmlockctl --kill vg_name'
If sanlock loses access to lease storage in that VG, it runs lvmlockctl --kill,
which:
1. Uses syslog to explain what is happening.
2. Notifies lvmlockd that the VG is being killed, so lvmlockd can
immediatley return an error for this condition if any new lock
requests are made. (This step would not be strictly necessary.)
3. Attempts to quit using the VG. This is not yet implemented, but
will eventually use blkdeactivate on the VG (or a more forceful
equivalent.)
4. If step 3 was successful at terminating all use of the VG, then
lvmlockd is told to release all the leases for the VG. If this
is all done without about 40 seconds, the host can avoid being
reset.
Until steps 3 and 4 are fully implemented, manual steps can be substituted.
This is primarily for testing since the problem needs to be noticed and
responded to in a very short time. The manual alternative to step 3 is to kill
any processes using file systems on LV's in the VG, unmount all file systems on
the LVs, and deactivate all the LVs. Once this is done, the manual alternative
to step 4 is to run 'lvmlockctl --drop vg_name', which tells lvmlockd to
release all the leases for the VG.
-------------------------------------------------------------------------------
*/
/* /*
* Each lockspace thread has its own sanlock daemon connection. * Each lockspace thread has its own sanlock daemon connection.
@ -961,12 +1010,24 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls)
char lock_lv_name[MAX_ARGS+1]; char lock_lv_name[MAX_ARGS+1];
char lsname[SANLK_NAME_LEN + 1]; char lsname[SANLK_NAME_LEN + 1];
char disk_path[SANLK_PATH_LEN]; char disk_path[SANLK_PATH_LEN];
char killpath[SANLK_PATH_LEN];
char killargs[SANLK_PATH_LEN];
int gl_found; int gl_found;
int ret, rv; int ret, rv;
memset(disk_path, 0, sizeof(disk_path)); memset(disk_path, 0, sizeof(disk_path));
memset(lock_lv_name, 0, sizeof(lock_lv_name)); memset(lock_lv_name, 0, sizeof(lock_lv_name));
/*
* Construct the path to lvmlockctl by using the path to the lvm binary
* and appending "lockctl" to get /path/to/lvmlockctl.
*/
memset(killpath, 0, sizeof(killpath));
snprintf(killpath, SANLK_PATH_LEN - 1, "%slockctl", LVM_PATH);
memset(killargs, 0, sizeof(killargs));
snprintf(killargs, SANLK_PATH_LEN - 1, "--kill %s", ls->vg_name);
rv = check_args_version(ls->vg_args, VG_LOCK_ARGS_MAJOR); rv = check_args_version(ls->vg_args, VG_LOCK_ARGS_MAJOR);
if (rv < 0) { if (rv < 0) {
ret = -EARGS; ret = -EARGS;
@ -1051,6 +1112,15 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls)
goto fail; goto fail;
} }
log_debug("set killpath to %s %s", killpath, killargs);
rv = sanlock_killpath(lms->sock, 0, killpath, killargs);
if (rv < 0) {
log_error("S %s killpath error %d", lsname, rv);
ret = -EMANAGER;
goto fail;
}
rv = sanlock_restrict(lms->sock, SANLK_RESTRICT_SIGKILL); rv = sanlock_restrict(lms->sock, SANLK_RESTRICT_SIGKILL);
if (rv < 0) { if (rv < 0) {
log_error("S %s restrict error %d", lsname, rv); log_error("S %s restrict error %d", lsname, rv);
@ -1397,11 +1467,6 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
log_error("S %s R %s lock_san acquire error %d", log_error("S %s R %s lock_san acquire error %d",
ls->name, r->name, rv); ls->name, r->name, rv);
if (added) {
lm_rem_resource_sanlock(ls, r);
return rv;
}
/* if the gl has been disabled, remove and free the gl resource */ /* if the gl has been disabled, remove and free the gl resource */
if ((rv == SANLK_LEADER_RESOURCE) && (r->type == LD_RT_GL)) { if ((rv == SANLK_LEADER_RESOURCE) && (r->type == LD_RT_GL)) {
if (!lm_gl_is_enabled(ls)) { if (!lm_gl_is_enabled(ls)) {
@ -1413,6 +1478,22 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
} }
} }
if (added)
lm_rem_resource_sanlock(ls, r);
/* sanlock gets i/o errors trying to read/write the leases. */
if (rv == -EIO)
rv = -ELOCKIO;
/*
* The sanlock lockspace can disappear if the lease storage fails,
* the delta lease renewals fail, the lockspace enters recovery,
* lvmlockd holds no leases in the lockspace, so sanlock can
* stop and free the lockspace.
*/
if (rv == -ENOSPC)
rv = -ELOCKIO;
return rv; return rv;
} }
@ -1594,9 +1675,11 @@ int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
} }
rv = sanlock_release(lms->sock, -1, 0, 1, &rs); rv = sanlock_release(lms->sock, -1, 0, 1, &rs);
if (rv < 0) { if (rv < 0)
log_error("S %s R %s unlock_san release error %d", ls->name, r->name, rv); log_error("S %s R %s unlock_san release error %d", ls->name, r->name, rv);
}
if (rv == -EIO)
rv = -ELOCKIO;
return rv; return rv;
} }

View File

@ -1357,6 +1357,7 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
const char *mode = NULL; const char *mode = NULL;
const char *opts = NULL; const char *opts = NULL;
uint32_t lockd_flags; uint32_t lockd_flags;
int force_cache_update = 0;
int retries = 0; int retries = 0;
int result; int result;
@ -1401,8 +1402,8 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
/* We can continue reading if a shared lock fails. */ /* We can continue reading if a shared lock fails. */
if (!strcmp(mode, "sh")) { if (!strcmp(mode, "sh")) {
log_warn("Reading without shared global lock."); log_warn("Reading without shared global lock.");
lvmetad_validate_global_cache(cmd, 1); force_cache_update = 1;
return 1; goto allow;
} }
log_error("Global lock failed: check that lvmlockd is running."); log_error("Global lock failed: check that lvmlockd is running.");
@ -1425,9 +1426,19 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
* *
* ESTARTING: the lockspace with the gl is starting. * ESTARTING: the lockspace with the gl is starting.
* The VG with the global lock is starting and should finish shortly. * The VG with the global lock is starting and should finish shortly.
*
* ELOCKIO: sanlock gets i/o errors when trying to read/write leases
* (This can progress to EVGKILLED.)
*
* EVGKILLED: the sanlock lockspace is being killed after losing
* access to lease storage.
*/ */
if (result == -ENOLS || result == -ESTARTING) { if (result == -ENOLS ||
result == -ESTARTING ||
result == -EVGKILLED ||
result == -ELOCKIO) {
if (!strcmp(mode, "un")) if (!strcmp(mode, "un"))
return 1; return 1;
@ -1436,9 +1447,13 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
*/ */
if (strcmp(mode, "sh")) { if (strcmp(mode, "sh")) {
if (result == -ESTARTING) if (result == -ESTARTING)
log_error("Global lock failed: lockspace is starting."); log_error("Global lock failed: lockspace is starting");
else if (result == -ENOLS) else if (result == -ENOLS)
log_error("Global lock failed: check that global lockspace is started."); log_error("Global lock failed: check that global lockspace is started");
else if (result == -ELOCKIO)
log_error("Global lock failed: storage errors for sanlock leases");
else if (result == -EVGKILLED)
log_error("Global lock failed: storage failed for sanlock leases");
else else
log_error("Global lock failed: error %d", result); log_error("Global lock failed: error %d", result);
return 0; return 0;
@ -1452,14 +1467,21 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
if (result == -ESTARTING) { if (result == -ESTARTING) {
log_warn("Skipping global lock: lockspace is starting"); log_warn("Skipping global lock: lockspace is starting");
lvmetad_validate_global_cache(cmd, 1); force_cache_update = 1;
return 1; goto allow;
}
if (result == -ELOCKIO || result == -EVGKILLED) {
log_warn("Skipping global lock: storage %s for sanlock leases",
result == -ELOCKIO ? "errors" : "failed");
force_cache_update = 1;
goto allow;
} }
if ((lockd_flags & LD_RF_NO_GL_LS) || (lockd_flags & LD_RF_NO_LOCKSPACES)) { if ((lockd_flags & LD_RF_NO_GL_LS) || (lockd_flags & LD_RF_NO_LOCKSPACES)) {
log_warn("Skipping global lock: lockspace not found or started"); log_warn("Skipping global lock: lockspace not found or started");
lvmetad_validate_global_cache(cmd, 1); force_cache_update = 1;
return 1; goto allow;
} }
/* /*
@ -1492,9 +1514,8 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
} }
} }
if (!(flags & LDGL_SKIP_CACHE_VALIDATE)) allow:
lvmetad_validate_global_cache(cmd, 0); lvmetad_validate_global_cache(cmd, force_cache_update);
return 1; return 1;
} }
@ -1510,7 +1531,7 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
* *
* The result of the VG lock operation needs to be saved in lockd_state * The result of the VG lock operation needs to be saved in lockd_state
* because the result needs to be passed into vg_read so it can be * because the result needs to be passed into vg_read so it can be
* assessed in combination with vg->lock_state. * assessed in combination with vg->lock_type.
* *
* The VG lock protects the VG metadata on disk from concurrent access * The VG lock protects the VG metadata on disk from concurrent access
* among hosts. The VG lock also ensures that the local lvmetad cache * among hosts. The VG lock also ensures that the local lvmetad cache
@ -1686,6 +1707,28 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
} }
} }
/*
* sanlock is getting i/o errors while reading/writing leases, or the
* lockspace/VG is being killed after failing to renew its lease for
* too long.
*/
if (result == -EVGKILLED || result == -ELOCKIO) {
const char *problem = (result == -ELOCKIO ? "errors" : "failed");
if (!strcmp(mode, "un")) {
ret = 1;
goto out;
} else if (!strcmp(mode, "sh")) {
log_warn("VG %s lock skipped: storage %s for sanlock leases", vg_name, problem);
ret = 1;
goto out;
} else {
log_error("VG %s lock failed: storage %s for sanlock leases", vg_name, problem);
ret = 0;
goto out;
}
}
/* /*
* An unused/previous lockspace for the VG was found. * An unused/previous lockspace for the VG was found.
* This means it must be a lockd VG, not local. The * This means it must be a lockd VG, not local. The
@ -1903,6 +1946,12 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
return 0; return 0;
} }
if (result == -EVGKILLED || result == -ELOCKIO) {
const char *problem = (result == -ELOCKIO ? "errors" : "failed");
log_error("LV %s/%s lock failed: storage %s for sanlock leases", vg->name, lv_name, problem);
return 0;
}
if (result < 0) { if (result < 0) {
log_error("LV %s/%s lock failed: error %d", vg->name, lv_name, result); log_error("LV %s/%s lock failed: error %d", vg->name, lv_name, result);
return 0; return 0;

View File

@ -17,8 +17,7 @@
#define LOCKD_SANLOCK_LV_NAME "lvmlock" #define LOCKD_SANLOCK_LV_NAME "lvmlock"
/* lockd_gl flags */ /* lockd_gl flags */
#define LDGL_SKIP_CACHE_VALIDATE 0x00000001 #define LDGL_UPDATE_NAMES 0x00000001
#define LDGL_UPDATE_NAMES 0x00000002
/* lockd_lv flags */ /* lockd_lv flags */
#define LDLV_MODE_NO_SH 0x00000001 #define LDLV_MODE_NO_SH 0x00000001