1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-06 17:18:29 +03:00
lvm2/daemons/lvmlockd/lvmlockd-core.c
David Teigland 597de9d586 lvmlockd: add LV name to some debug logging
The LV uuid is used as the lock name, and including
the LV name in some log messages makes it easier to
follow log messages.
2015-08-10 15:07:10 -05:00

5845 lines
139 KiB
C

/*
* Copyright (C) 2014-2015 Red Hat, Inc.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*/
#define _XOPEN_SOURCE 500 /* pthread */
#define _ISOC99_SOURCE
#define _REENTRANT
#include "tool.h"
#include "daemon-io.h"
#include "daemon-server.h"
#include "daemon-log.h"
#include "lvm-version.h"
#include "lvmetad-client.h"
#include "lvmlockd-client.h"
/* #include <assert.h> */
#include <errno.h>
#include <pthread.h>
#include <stddef.h>
#include <poll.h>
#include <signal.h>
#include <getopt.h>
#include <syslog.h>
#include <dirent.h>
#include <time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <sys/un.h>
#define EXTERN
#include "lvmlockd-internal.h"
/*
* Basic operation of lvmlockd
*
* lvmlockd main process runs main_loop() which uses poll().
* poll listens for new connections from lvm commands and for
* messages from existing connected lvm commands.
*
* lvm command starts and connects to lvmlockd.
*
* lvmlockd receives a connection request from command and adds a
* 'struct client' to keep track of the connection to the command.
* The client's fd is added to the set of fd's in poll().
*
* lvm command sends a lock request to lvmlockd. The lock request
* can be for the global lock, a vg lock, or an lv lock.
*
* lvmlockd main_loop/poll sees a message from an existing client.
* It sets client.recv = 1, then wakes up client_thread_main.
*
* client_thread_main iterates through client structs (cl), looking
* for any that need processing, finds the one with cl->recv set,
* and calls client_recv_action(cl).
*
* client_recv_action(cl) reads the message/request from the client,
* allocates a new 'struct action' (act) to represent the request,
* sets the act with what is found in the request, then looks at
* the specific operation in act->op (LD_OP_FOO) to decide what to
* do with the action:
*
* . If the action is to start a lockspace, create a new thread
* to manage that lockspace: add_lockspace(act).
*
* . If the action is a lock request, pass the act to the thread
* that is managing that lockspace: add_lock_action(act).
*
* . Other misc actions are are passed to the worker_thread:
* add_work_action(act).
*
* Onec the client_thread has passed the action off to another
* thread to process, it goes back to waiting for more client
* handling work to do.
*
* The thread that was given the action by the client_thread
* now processes that action according to the operation, act->op.
* This is either a lockspace_thread (for lock ops or ops that
* add/rem a lockspace), or the worker_thread. See below for
* how these ops are processed by these threads. When the
* given thread is done processing the action, the result is
* set in act->result, and the act struct for the completed action
* is passed back to the client_thread (client_results list).
*
* The client_thread takes completed actions (from client_results
* list), and sends the result back to the client that sent the
* request represented by the action. The act struct is then freed.
*
* This completes the cycle of work between lvm commands (clients)
* and lvmlockd. In summary:
*
* - main process polls for new client connections and new requests
* from lvm commands
* - client_thread reads requests from clients
* - client_thread creates an action struct for each request
* - client_thread passes the act to another thread for processing
* - other threads pass completed act structs back to client_thread
* - client_thread sends the act result back to the client and frees the act
*
*
* Lockspace threads:
* Each lockd VG has its own lockspace that contains locks for that VG.
* Each 'struct lockspace' is managed by a separate lockspace_thread.
* When the lockspace_thread is first created, the first thing it does
* is join the lockspace in the lock manager. This can take a long time.
* If the join fails, the thread exits. After the join, the thread
* enters a loop waiting for lock actions to perform in the lockspace.
*
* The request to remove/leave a lockspace causes a flag to be set in
* the lockspace struct. When the lockspace_thread sees this flag
* set, it leaves the lockspace, and exits.
*
* When the client_thread passes a new action to a lockspace_thread,
* i.e. a new lock request, the lockspace_thread identifies which resource
* is being locked (GL, VG, LV), and gets the 'struct resource' (r) for it.
* r->type will be LD_RT_GL, LD_RT_VG, or LD_RT_LV. r->name is the
* resource name, and is fixed for GL and VG resources, but is based on
* the LV name for LV resources. The act is added to the resource's
* list of actions: r->actions, i.e. outstanding lock requests on the
* resource.
*
* The lockspace thread then iterates through each resource in the
* lockspace, processing any outstanding actions on each: res_process(ls, r).
*
* res_process() compares the outstanding actions/requests in r->actions
* against any existing locks on the resource in r->locks. If the
* action is blocked by existing locks, it's left on r->actions. If not,
* the action/request is passed to the lock manager. If the result from
* the lock manager is success, a new 'struct lock' is created for the
* action and saved on r->locks. The result is set in act->result and
* the act is passed back to the client_thread to be returned to the client.
*/
static const char *lvmlockd_protocol = "lvmlockd";
static const int lvmlockd_protocol_version = 1;
static int daemon_quit;
static int adopt_opt;
static daemon_handle lvmetad_handle;
static pthread_mutex_t lvmetad_mutex;
static int lvmetad_connected;
/*
* We use a separate socket for dumping daemon info.
* This will not interfere with normal operations, and allows
* free-form debug data to be dumped instead of the libdaemon
* protocol that wants all data in the cft format.
* 1MB should fit all the info we need to dump.
*/
#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
#define DUMP_BUF_SIZE (1024 * 1024)
static char dump_buf[DUMP_BUF_SIZE];
static struct sockaddr_un dump_addr;
static socklen_t dump_addrlen;
/*
* Main program polls client connections, adds new clients,
* adds work for client thread.
*
* pollfd_mutex is used for adding vs removing entries,
* and for resume vs realloc.
*/
#define POLL_FD_UNUSED -1 /* slot if free */
#define POLL_FD_IGNORE -2 /* slot is used but ignore in poll */
#define ADD_POLL_SIZE 16 /* increment slots by this amount */
static pthread_mutex_t pollfd_mutex;
static struct pollfd *pollfd;
static int pollfd_size;
static int pollfd_maxi;
static int listen_pi;
static int listen_fd;
static int restart_pi;
static int restart_fds[2];
/*
* Each lockspace has its own thread to do locking.
* The lockspace thread makes synchronous lock requests to dlm/sanlock.
* Every vg with a lockd type, i.e. "dlm", "sanlock", should be on this list.
*
* lockspaces_inactive holds old ls structs for vgs that have been
* stopped, or for vgs that failed to start. The old ls structs
* are removed from the inactive list and freed when a new ls with
* the same name is started and added to the standard lockspaces list.
* Keeping this bit of "history" for the ls allows us to return a
* more informative error message if a vg lock request is made for
* an ls that has been stopped or failed to start.
*/
static pthread_mutex_t lockspaces_mutex;
static struct list_head lockspaces;
static struct list_head lockspaces_inactive;
/*
* Client thread reads client requests and writes client results.
*/
static pthread_t client_thread;
static pthread_mutex_t client_mutex;
static pthread_cond_t client_cond;
static struct list_head client_list; /* connected clients */
static struct list_head client_results; /* actions to send back to clients */
static uint32_t client_ids; /* 0 and ADOPT_CLIENT_ID are skipped */
static int client_stop; /* stop the thread */
static int client_work; /* a client on client_list has work to do */
#define ADOPT_CLIENT_ID 0xFFFFFFFF /* special client_id for adopt actions */
static struct list_head adopt_results; /* special start actions from adopt_locks() */
/*
* Worker thread performs misc non-locking actions, e.g. init/free.
*/
static pthread_t worker_thread;
static pthread_mutex_t worker_mutex;
static pthread_cond_t worker_cond;
static struct list_head worker_list; /* actions for worker_thread */
static int worker_stop; /* stop the thread */
static int worker_wake; /* wake the thread without adding work */
/*
* The content of every log_foo() statement is saved in the
* circular buffer, which can be dumped to a client and printed.
*/
#define LOG_LINE_SIZE 256
#define LOG_DUMP_SIZE DUMP_BUF_SIZE
#define LOG_SYSLOG_PRIO LOG_WARNING
static char log_dump[LOG_DUMP_SIZE];
static unsigned int log_point;
static unsigned int log_wrap;
static pthread_mutex_t log_mutex;
static int syslog_priority = LOG_SYSLOG_PRIO;
/*
* Structure pools to avoid repeated malloc/free.
*/
#define MAX_UNUSED_ACTION 64
#define MAX_UNUSED_CLIENT 64
#define MAX_UNUSED_RESOURCE 64
#define MAX_UNUSED_LOCK 64
static pthread_mutex_t unused_struct_mutex;
static struct list_head unused_action;
static struct list_head unused_client;
static struct list_head unused_resource;
static struct list_head unused_lock;
static int unused_action_count;
static int unused_client_count;
static int unused_resource_count;
static int unused_lock_count;
static int resource_lm_data_size; /* max size of lm_data from sanlock|dlm */
static int alloc_new_structs; /* used for initializing in setup_structs */
#define DO_STOP 1
#define NO_STOP 0
#define DO_FREE 1
#define NO_FREE 0
#define DO_FORCE 1
#define NO_FORCE 0
static int add_lock_action(struct action *act);
static int str_to_lm(const char *str);
static int clear_lockspace_inactive(char *name);
static int _syslog_name_to_num(const char *name)
{
if (!strcmp(name, "emerg"))
return LOG_EMERG;
if (!strcmp(name, "alert"))
return LOG_ALERT;
if (!strcmp(name, "crit"))
return LOG_CRIT;
if (!strcmp(name, "err") || !strcmp(name, "error"))
return LOG_ERR;
if (!strcmp(name, "warning") || !strcmp(name, "warn"))
return LOG_WARNING;
if (!strcmp(name, "notice"))
return LOG_NOTICE;
if (!strcmp(name, "info"))
return LOG_INFO;
if (!strcmp(name, "debug"))
return LOG_DEBUG;
return LOG_WARNING;
}
static const char *_syslog_num_to_name(int num)
{
switch (num) {
case LOG_EMERG:
return "emerg";
case LOG_ALERT:
return "alert";
case LOG_CRIT:
return "crit";
case LOG_ERR:
return "err";
case LOG_WARNING:
return "warning";
case LOG_NOTICE:
return "notice";
case LOG_INFO:
return "info";
case LOG_DEBUG:
return "debug";
}
return "unknown";
}
static uint64_t monotime(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec;
}
static void log_save_line(int len, char *line,
char *log_buf, unsigned int *point, unsigned int *wrap)
{
unsigned int p = *point;
unsigned int w = *wrap;
int i;
if (len < LOG_DUMP_SIZE - p) {
memcpy(log_buf + p, line, len);
p += len;
if (p == LOG_DUMP_SIZE) {
p = 0;
w = 1;
}
goto out;
}
for (i = 0; i < len; i++) {
log_buf[p++] = line[i];
if (p == LOG_DUMP_SIZE) {
p = 0;
w = 1;
}
}
out:
*point = p;
*wrap = w;
}
void log_level(int level, const char *fmt, ...)
{
char line[LOG_LINE_SIZE];
va_list ap;
int len = LOG_LINE_SIZE - 1;
int ret, pos = 0;
memset(line, 0, sizeof(line));
ret = snprintf(line, len, "%llu ", (unsigned long long)time(NULL));
pos += ret;
va_start(ap, fmt);
ret = vsnprintf(line + pos, len - pos, fmt, ap);
va_end(ap);
if (ret >= len - pos)
pos = len - 1;
else
pos += ret;
line[pos++] = '\n';
line[pos++] = '\0';
pthread_mutex_lock(&log_mutex);
log_save_line(pos - 1, line, log_dump, &log_point, &log_wrap);
pthread_mutex_unlock(&log_mutex);
if (level <= syslog_priority)
syslog(level, "%s", line);
if (daemon_debug)
fprintf(stderr, "%s", line);
}
static int dump_log(int *dump_len)
{
int tail_len;
pthread_mutex_lock(&log_mutex);
if (!log_wrap && !log_point) {
*dump_len = 0;
} else if (log_wrap) {
tail_len = LOG_DUMP_SIZE - log_point;
memcpy(dump_buf, log_dump+log_point, tail_len);
if (log_point)
memcpy(dump_buf+tail_len, log_dump, log_point);
*dump_len = LOG_DUMP_SIZE;
} else {
memcpy(dump_buf, log_dump, log_point-1);
*dump_len = log_point-1;
}
pthread_mutex_unlock(&log_mutex);
return 0;
}
struct lockspace *alloc_lockspace(void)
{
struct lockspace *ls;
if (!(ls = malloc(sizeof(struct lockspace)))) {
log_error("out of memory for lockspace");
return NULL;
}
memset(ls, 0, sizeof(struct lockspace));
INIT_LIST_HEAD(&ls->actions);
INIT_LIST_HEAD(&ls->resources);
pthread_mutex_init(&ls->mutex, NULL);
pthread_cond_init(&ls->cond, NULL);
return ls;
}
static struct action *alloc_action(void)
{
struct action *act;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_action_count || alloc_new_structs) {
act = malloc(sizeof(struct action));
} else {
act = list_first_entry(&unused_action, struct action, list);
list_del(&act->list);
unused_action_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (act)
memset(act, 0, sizeof(struct action));
else
log_error("out of memory for action");
return act;
}
static struct client *alloc_client(void)
{
struct client *cl;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_client_count || alloc_new_structs) {
cl = malloc(sizeof(struct client));
} else {
cl = list_first_entry(&unused_client, struct client, list);
list_del(&cl->list);
unused_client_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (cl)
memset(cl, 0, sizeof(struct client));
else
log_error("out of memory for client");
return cl;
}
static struct resource *alloc_resource(void)
{
struct resource *r;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_resource_count || alloc_new_structs) {
r = malloc(sizeof(struct resource) + resource_lm_data_size);
} else {
r = list_first_entry(&unused_resource, struct resource, list);
list_del(&r->list);
unused_resource_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (r) {
memset(r, 0, sizeof(struct resource) + resource_lm_data_size);
INIT_LIST_HEAD(&r->locks);
INIT_LIST_HEAD(&r->actions);
} else {
log_error("out of memory for resource");
}
return r;
}
static struct lock *alloc_lock(void)
{
struct lock *lk;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_lock_count || alloc_new_structs) {
lk = malloc(sizeof(struct lock));
} else {
lk = list_first_entry(&unused_lock, struct lock, list);
list_del(&lk->list);
unused_lock_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (lk)
memset(lk, 0, sizeof(struct lock));
else
log_error("out of memory for lock");
return lk;
}
static void free_action(struct action *act)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_action_count >= MAX_UNUSED_ACTION) {
free(act);
} else {
list_add_tail(&act->list, &unused_action);
unused_action_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static void free_client(struct client *cl)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_client_count >= MAX_UNUSED_CLIENT) {
free(cl);
} else {
list_add_tail(&cl->list, &unused_client);
unused_client_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static void free_resource(struct resource *r)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_resource_count >= MAX_UNUSED_RESOURCE) {
free(r);
} else {
list_add_tail(&r->list, &unused_resource);
unused_resource_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static void free_lock(struct lock *lk)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_lock_count >= MAX_UNUSED_LOCK) {
free(lk);
} else {
list_add_tail(&lk->list, &unused_lock);
unused_lock_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static int setup_structs(void)
{
struct action *act;
struct client *cl;
struct resource *r;
struct lock *lk;
int data_san = lm_data_size_sanlock();
int data_dlm = lm_data_size_dlm();
int i;
resource_lm_data_size = data_san > data_dlm ? data_san : data_dlm;
pthread_mutex_init(&unused_struct_mutex, NULL);
INIT_LIST_HEAD(&unused_action);
INIT_LIST_HEAD(&unused_client);
INIT_LIST_HEAD(&unused_resource);
INIT_LIST_HEAD(&unused_lock);
/*
* For setup, force the alloc_ functions to alloc new structs instead
* of taking them unused. This allows alloc_struct/free_struct loop to
* populate the unused lists.
*/
alloc_new_structs = 1;
for (i = 0; i < MAX_UNUSED_ACTION/2; i++) {
if (!(act = alloc_action()))
goto fail;
free_action(act);
}
for (i = 0; i < MAX_UNUSED_CLIENT/2; i++) {
if (!(cl = alloc_client()))
goto fail;
free_client(cl);
}
for (i = 0; i < MAX_UNUSED_RESOURCE/2; i++) {
if (!(r = alloc_resource()))
goto fail;
free_resource(r);
}
for (i = 0; i < MAX_UNUSED_LOCK/2; i++) {
if (!(lk = alloc_lock()))
goto fail;
free_lock(lk);
}
alloc_new_structs = 0;
return 0;
fail:
alloc_new_structs = 0;
return -ENOMEM;
}
static int add_pollfd(int fd)
{
int i, new_size;
struct pollfd *tmp_pollfd;
pthread_mutex_lock(&pollfd_mutex);
for (i = 0; i < pollfd_size; i++) {
if (pollfd[i].fd != POLL_FD_UNUSED)
continue;
pollfd[i].fd = fd;
pollfd[i].events = POLLIN;
pollfd[i].revents = 0;
if (i > pollfd_maxi)
pollfd_maxi = i;
pthread_mutex_unlock(&pollfd_mutex);
return i;
}
new_size = pollfd_size + ADD_POLL_SIZE;
tmp_pollfd = realloc(pollfd, new_size * sizeof(struct pollfd));
if (!tmp_pollfd) {
log_error("can't alloc new size %d for pollfd", new_size);
pthread_mutex_unlock(&pollfd_mutex);
return -ENOMEM;
}
pollfd = tmp_pollfd;
for (i = pollfd_size; i < new_size; i++) {
pollfd[i].fd = POLL_FD_UNUSED;
pollfd[i].events = 0;
pollfd[i].revents = 0;
}
i = pollfd_size;
pollfd[i].fd = fd;
pollfd[i].events = POLLIN;
pollfd[i].revents = 0;
pollfd_maxi = i;
pollfd_size = new_size;
pthread_mutex_unlock(&pollfd_mutex);
return i;
}
static void rem_pollfd(int pi)
{
if (pi < 0) {
log_error("rem_pollfd %d", pi);
return;
}
pthread_mutex_lock(&pollfd_mutex);
pollfd[pi].fd = POLL_FD_UNUSED;
pollfd[pi].events = 0;
pollfd[pi].revents = 0;
pthread_mutex_unlock(&pollfd_mutex);
}
static const char *lm_str(int x)
{
switch (x) {
case LD_LM_NONE:
return "none";
case LD_LM_DLM:
return "dlm";
case LD_LM_SANLOCK:
return "sanlock";
default:
return "lm_unknown";
}
}
static const char *rt_str(int x)
{
switch (x) {
case LD_RT_GL:
return "gl";
case LD_RT_VG:
return "vg";
case LD_RT_LV:
return "lv";
default:
return ".";
};
}
static const char *op_str(int x)
{
switch (x) {
case LD_OP_INIT:
return "init";
case LD_OP_FREE:
return "free";
case LD_OP_START:
return "start";
case LD_OP_STOP:
return "stop";
case LD_OP_LOCK:
return "lock";
case LD_OP_UPDATE:
return "update";
case LD_OP_CLOSE:
return "close";
case LD_OP_ENABLE:
return "enable";
case LD_OP_DISABLE:
return "disable";
case LD_OP_START_WAIT:
return "start_wait";
case LD_OP_STOP_ALL:
return "stop_all";
case LD_OP_RENAME_BEFORE:
return "rename_before";
case LD_OP_RENAME_FINAL:
return "rename_final";
case LD_OP_RUNNING_LM:
return "running_lm";
case LD_OP_FIND_FREE_LOCK:
return "find_free_lock";
case LD_OP_FORGET_VG_NAME:
return "forget_vg_name";
case LD_OP_KILL_VG:
return "kill_vg";
case LD_OP_DROP_VG:
return "drop_vg";
default:
return "op_unknown";
};
}
static const char *mode_str(int x)
{
switch (x) {
case LD_LK_IV:
return "iv";
case LD_LK_UN:
return "un";
case LD_LK_NL:
return "nl";
case LD_LK_SH:
return "sh";
case LD_LK_EX:
return "ex";
default:
return ".";
};
}
int last_string_from_args(char *args_in, char *last)
{
const char *args = args_in;
const char *colon, *str = NULL;
while (1) {
if (!args || (*args == '\0'))
break;
colon = strstr(args, ":");
if (!colon)
break;
str = colon;
args = colon + 1;
}
if (str) {
snprintf(last, MAX_ARGS, "%s", str + 1);
return 0;
}
return -1;
}
int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch)
{
char version[MAX_ARGS+1];
char *major_str, *minor_str, *patch_str;
char *n, *d1, *d2;
memset(version, 0, sizeof(version));
strncpy(version, args, MAX_ARGS);
version[MAX_ARGS] = '\0';
n = strstr(version, ":");
if (n)
*n = '\0';
d1 = strstr(version, ".");
if (!d1)
return -1;
d2 = strstr(d1 + 1, ".");
if (!d2)
return -1;
major_str = version;
minor_str = d1 + 1;
patch_str = d2 + 1;
*d1 = '\0';
*d2 = '\0';
if (major)
*major = atoi(major_str);
if (minor)
*minor = atoi(minor_str);
if (patch)
*patch = atoi(patch_str);
return 0;
}
/*
* These are few enough that arrays of function pointers can
* be avoided.
*/
static int lm_prepare_lockspace(struct lockspace *ls, struct action *act)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_prepare_lockspace_dlm(ls);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_prepare_lockspace_sanlock(ls);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_add_lockspace(struct lockspace *ls, struct action *act, int adopt)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_add_lockspace_dlm(ls, adopt);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_add_lockspace_sanlock(ls, adopt);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_rem_lockspace(struct lockspace *ls, struct action *act, int free_vg)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_rem_lockspace_dlm(ls, free_vg);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_rem_lockspace_sanlock(ls, free_vg);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
uint32_t *r_version, int *retry, int adopt)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_lock_dlm(ls, r, mode, r_version, adopt);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_lock_sanlock(ls, r, mode, r_version, retry, adopt);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_convert(struct lockspace *ls, struct resource *r,
int mode, struct action *act, uint32_t r_version)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_convert_dlm(ls, r, mode, r_version);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_convert_sanlock(ls, r, mode, r_version);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_unlock(struct lockspace *ls, struct resource *r, struct action *act,
uint32_t r_version, uint32_t lmu_flags)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_unlock_dlm(ls, r, r_version, lmu_flags);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_unlock_sanlock(ls, r, r_version, lmu_flags);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_hosts(struct lockspace *ls, int notify)
{
if (ls->lm_type == LD_LM_DLM)
return 0;
else if (ls->lm_type == LD_LM_SANLOCK)
return lm_hosts_sanlock(ls, notify);
return -1;
}
static void lm_rem_resource(struct lockspace *ls, struct resource *r)
{
if (ls->lm_type == LD_LM_DLM)
lm_rem_resource_dlm(ls, r);
else if (ls->lm_type == LD_LM_SANLOCK)
lm_rem_resource_sanlock(ls, r);
}
static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset)
{
if (ls->lm_type == LD_LM_DLM)
return 0;
else if (ls->lm_type == LD_LM_SANLOCK)
return lm_find_free_lock_sanlock(ls, free_offset);
return -1;
}
/*
* While adopting locks, actions originate from the adopt_locks()
* function, not from a client. So, these actions (flagged ADOPT),
* should be passed back to the adopt_locks() function through the
* adopt_results list, and not be sent back to a client via the
* client_list/client_thread.
*/
static void add_client_result(struct action *act)
{
pthread_mutex_lock(&client_mutex);
if (act->flags & LD_AF_ADOPT)
list_add_tail(&act->list, &adopt_results);
else
list_add_tail(&act->list, &client_results);
pthread_cond_signal(&client_cond);
pthread_mutex_unlock(&client_mutex);
}
static struct lock *find_lock_client(struct resource *r, uint32_t client_id)
{
struct lock *lk;
list_for_each_entry(lk, &r->locks, list) {
if (lk->client_id == client_id)
return lk;
}
return NULL;
}
static struct lock *find_lock_persistent(struct resource *r)
{
struct lock *lk;
list_for_each_entry(lk, &r->locks, list) {
if (lk->flags & LD_LF_PERSISTENT)
return lk;
}
return NULL;
}
static struct action *find_action_client(struct resource *r, uint32_t client_id)
{
struct action *act;
list_for_each_entry(act, &r->actions, list) {
if (act->client_id != client_id)
continue;
return act;
}
return NULL;
}
static void add_work_action(struct action *act)
{
pthread_mutex_lock(&worker_mutex);
if (!worker_stop) {
list_add_tail(&act->list, &worker_list);
pthread_cond_signal(&worker_cond);
}
pthread_mutex_unlock(&worker_mutex);
}
static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry)
{
struct lock *lk;
uint32_t r_version = 0;
int rv;
if (r->type == LD_RT_LV)
log_debug("S %s R %s res_lock mode %s (%s)", ls->name, r->name, mode_str(act->mode), act->lv_name);
else
log_debug("S %s R %s res_lock mode %s", ls->name, r->name, mode_str(act->mode));
if (r->mode == LD_LK_SH && act->mode == LD_LK_SH)
goto add_lk;
if (r->type == LD_RT_LV && act->lv_args[0])
memcpy(r->lv_args, act->lv_args, MAX_ARGS);
rv = lm_lock(ls, r, act->mode, act, &r_version, retry, act->flags & LD_AF_ADOPT);
if (rv == -EAGAIN)
return rv;
if (rv < 0) {
log_error("S %s R %s res_lock lm error %d", ls->name, r->name, rv);
return rv;
}
log_debug("S %s R %s res_lock lm done r_version %u",
ls->name, r->name, r_version);
if (sanlock_gl_dup && ls->sanlock_gl_enabled)
act->flags |= LD_AF_DUP_GL_LS;
/* lm_lock() reads new r_version */
if ((r_version > r->version) || (!r->version && !r->version_zero_valid)) {
/*
* New r_version of the lock: means that another
* host has changed data protected by this lock
* since the last time we acquired it. We
* should invalidate any local cache of the data
* protected by this lock and reread it from disk.
*/
r->version = r_version;
/*
* When a new global lock is enabled in a new vg,
* it will have version zero, and the first time
* we use it we need to validate the global cache
* since we don't have any version history to know
* the state of the cache. The version could remain
* zero for a long time if no global state is changed
* to cause the GL version to be incremented to 1.
*/
r->version_zero_valid = 1;
/*
* r is vglk: tell lvmetad to set the vg invalid
* flag, and provide the new r_version. If lvmetad finds
* that its cached vg has seqno less than the value
* we send here, it will set the vg invalid flag.
* lvm commands that read the vg from lvmetad, will
* see the invalid flag returned, will reread the
* vg from disk, update the lvmetad copy, and go on.
*
* r is global: tell lvmetad to set the global invalid
* flag. When commands see this flag returned from lvmetad,
* they will reread metadata from disk, update the lvmetad
* caches, and tell lvmetad to set global invalid to 0.
*/
if ((r->type == LD_RT_VG) && lvmetad_connected) {
daemon_reply reply;
char *uuid;
log_debug("S %s R %s res_lock set lvmetad vg version %u",
ls->name, r->name, r_version);
if (!ls->vg_uuid[0] || !strcmp(ls->vg_uuid, "none"))
uuid = ls->name;
else
uuid = ls->vg_uuid;
pthread_mutex_lock(&lvmetad_mutex);
reply = daemon_send_simple(lvmetad_handle, "set_vg_info",
"token = %s", "skip",
"uuid = %s", uuid,
"version = %d", (int)r_version,
NULL);
pthread_mutex_unlock(&lvmetad_mutex);
if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK"))
log_error("set_vg_info in lvmetad failed %d", reply.error);
daemon_reply_destroy(reply);
}
if ((r->type == LD_RT_GL) && lvmetad_connected) {
daemon_reply reply;
log_debug("S %s R %s res_lock set lvmetad global invalid",
ls->name, r->name);
pthread_mutex_lock(&lvmetad_mutex);
reply = daemon_send_simple(lvmetad_handle, "set_global_info",
"token = %s", "skip",
"global_invalid = %d", 1,
NULL);
pthread_mutex_unlock(&lvmetad_mutex);
if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK"))
log_error("set_global_info in lvmetad failed %d", reply.error);
daemon_reply_destroy(reply);
}
}
r->mode = act->mode;
add_lk:
if (r->mode == LD_LK_SH)
r->sh_count++;
if (!(lk = alloc_lock()))
return -ENOMEM;
lk->client_id = act->client_id;
lk->mode = act->mode;
if (act->flags & LD_AF_PERSISTENT) {
lk->flags |= LD_LF_PERSISTENT;
lk->client_id = 0;
}
list_add_tail(&lk->list, &r->locks);
return 0;
}
static int res_convert(struct lockspace *ls, struct resource *r,
struct lock *lk, struct action *act)
{
uint32_t r_version;
int rv;
log_debug("S %s R %s res_convert mode %d", ls->name, r->name, act->mode);
if (act->mode == LD_LK_EX && lk->mode == LD_LK_SH && r->sh_count > 1)
return -EAGAIN;
/*
* lm_convert() writes new version (from ex)
* Same as lm_unlock()
*/
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
r->version++;
lk->version = r->version;
r_version = r->version;
log_debug("S %s R %s res_convert r_version inc %u",
ls->name, r->name, r_version);
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
r->version = lk->version;
r_version = r->version;
log_debug("S %s R %s res_convert r_version new %u", ls->name, r->name, r_version);
} else {
r_version = 0;
}
rv = lm_convert(ls, r, act->mode, act, r_version);
if (rv < 0) {
log_error("S %s R %s res_convert lm error %d", ls->name, r->name, rv);
return rv;
}
log_debug("S %s R %s res_convert lm done", ls->name, r->name);
if (lk->mode == LD_LK_EX && act->mode == LD_LK_SH) {
r->sh_count = 1;
} else if (lk->mode == LD_LK_SH && act->mode == LD_LK_EX) {
r->sh_count = 0;
} else {
/* should not be possible */
log_error("S %s R %s res_convert invalid modes %d %d",
ls->name, r->name, lk->mode, act->mode);
return -1;
}
r->mode = act->mode;
lk->mode = act->mode;
return 0;
}
static int res_cancel(struct lockspace *ls, struct resource *r,
struct action *act)
{
struct action *cact;
/*
* a client can cancel its own non-persistent lock requests,
* when could this happen?
*
* a client can cancel other client's persistent lock requests,
* when could this happen?
*/
if (act->flags & LD_AF_PERSISTENT) {
list_for_each_entry(cact, &r->actions, list) {
if (!(cact->flags & LD_AF_PERSISTENT))
continue;
goto do_cancel;
}
} else {
cact = find_action_client(r, act->client_id);
if (cact)
goto do_cancel;
}
return -ENOENT;
do_cancel:
log_debug("S %s R %s res_cancel client %d", ls->name, r->name, cact->client_id);
cact->result = -ECANCELED;
list_del(&cact->list);
add_client_result(cact);
return -ECANCELED;
}
/*
* lm_unlock() writes new a r_version (from ex)
*
* The r_version of the vg resource is incremented if
* an "update" was received for the vg lock. The update
* contains the new vg seqno from the vg metadata which is
* used as the r_version.
*
* The r_version of the global resource is automatically
* incremented when it is unlocked from ex mode.
*
* r_version is incremented every time a command releases
* the global lock from ex.
*/
/*
* persistent locks will not be unlocked for OP_CLOSE/act_close
* because act_close->flags does not have the PERSISTENT flag
* set, and a persistent lk->client_id is zero, which will not
* match the client in act_close->client_id.
*/
static int res_unlock(struct lockspace *ls, struct resource *r,
struct action *act)
{
struct lock *lk;
uint32_t r_version;
int rv;
if (act->flags & LD_AF_PERSISTENT) {
lk = find_lock_persistent(r);
if (lk)
goto do_unlock;
} else {
lk = find_lock_client(r, act->client_id);
if (lk)
goto do_unlock;
}
if (act->op != LD_OP_CLOSE)
log_error("S %s R %s res_unlock no locks", ls->name, r->name);
return -ENOENT;
do_unlock:
if (act->op == LD_OP_CLOSE)
log_debug("S %s R %s res_unlock from close", ls->name, r->name);
else if (r->type == LD_RT_LV)
log_debug("S %s R %s res_unlock (%s)", ls->name, r->name, act->lv_name);
else
log_debug("S %s R %s res_unlock", ls->name, r->name);
/* send unlock to lm when last sh lock is unlocked */
if (lk->mode == LD_LK_SH) {
r->sh_count--;
if (r->sh_count > 0)
goto rem_lk;
}
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
r->version++;
lk->version = r->version;
r_version = r->version;
log_debug("S %s R %s res_unlock r_version inc %u", ls->name, r->name, r_version);
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
r->version = lk->version;
r_version = r->version;
log_debug("S %s R %s res_unlock r_version new %u",
ls->name, r->name, r_version);
} else {
r_version = 0;
}
rv = lm_unlock(ls, r, act, r_version, 0);
if (rv < 0) {
/* should never happen, retry? */
log_error("S %s R %s res_unlock lm error %d", ls->name, r->name, rv);
return rv;
}
log_debug("S %s R %s res_unlock lm done", ls->name, r->name);
rem_lk:
list_del(&lk->list);
free_lock(lk);
if (list_empty(&r->locks))
r->mode = LD_LK_UN;
return 0;
}
static int res_update(struct lockspace *ls, struct resource *r,
struct action *act)
{
struct lock *lk;
lk = find_lock_client(r, act->client_id);
if (!lk) {
log_error("S %s R %s res_update client %u lock not found",
ls->name, r->name, act->client_id);
return -ENOENT;
}
if (r->mode != LD_LK_EX) {
log_error("S %s R %s res_update version on non-ex lock",
ls->name, r->name);
return -EINVAL;
}
/* lk version will be written to lm by unlock */
if (act->flags & LD_AF_NEXT_VERSION)
lk->version = r->version + 1;
else
lk->version = act->version;
log_debug("S %s R %s res_update lk version to %u", ls->name, r->name, lk->version);
return 0;
}
/*
* There is nothing to deallocate when freeing a dlm LV, the LV
* will simply be unlocked by rem_resource.
*/
static int free_lv(struct lockspace *ls, struct resource *r)
{
if (ls->lm_type == LD_LM_SANLOCK)
return lm_free_lv_sanlock(ls, r);
else if (ls->lm_type == LD_LM_DLM)
return 0;
else
return -EINVAL;
}
/*
* NB. we can't do this if sanlock is holding any locks on
* the resource; we'd be rewriting the resource from under
* sanlock and would confuse or break it badly. We don't
* know what another host is doing, so these must be used
* very carefully.
*/
static int res_able(struct lockspace *ls, struct resource *r,
struct action *act)
{
int rv;
if (ls->lm_type != LD_LM_SANLOCK) {
log_error("enable/disable only applies to sanlock");
return -EINVAL;
}
if (r->type != LD_RT_GL) {
log_error("enable/disable only applies to global lock");
return -EINVAL;
}
if (r->mode != LD_LK_UN) {
log_error("enable/disable only allowed on unlocked resource");
return -EINVAL;
}
if (act->op == LD_OP_ENABLE && gl_lsname_sanlock[0]) {
log_error("disable global lock in %s before enable in %s",
gl_lsname_sanlock, ls->name);
return -EINVAL;
}
if ((act->op == LD_OP_DISABLE) && (act->flags & LD_AF_EX_DISABLE)) {
rv = lm_ex_disable_gl_sanlock(ls);
goto out;
}
rv = lm_able_gl_sanlock(ls, act->op == LD_OP_ENABLE);
out:
return rv;
}
/*
* Go through queued actions, and make lock/unlock calls on the resource
* based on the actions and the existing lock state.
*
* All lock operations sent to the lock manager are non-blocking.
* This is because sanlock does not support lock queueing.
* Eventually we could enhance this to take advantage of lock
* queueing when available (i.e. for the dlm).
*
* act_close_list: list of CLOSE actions, identifying clients that have
* closed/terminated their lvmlockd connection, and whose locks should
* be released. Do not remove these actions from act_close_list.
*
* retry_out: set to 1 if the lock manager said we should retry,
* meaning we should call res_process() again in a short while to retry.
*/
static void res_process(struct lockspace *ls, struct resource *r,
struct list_head *act_close_list, int *retry_out)
{
struct action *act, *safe, *act_close;
struct lock *lk;
int lm_retry;
int rv;
/*
* handle version updates for ex locks
* (new version will be written by unlock)
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_UPDATE) {
rv = res_update(ls, r, act);
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
}
/*
* handle explicit unlock actions
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if ((act->op == LD_OP_LOCK) &&
(act->mode == LD_LK_IV || act->mode == LD_LK_NL)) {
act->result = -EINVAL;
list_del(&act->list);
add_client_result(act);
}
if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
rv = res_unlock(ls, r, act);
if (rv == -ENOENT && (act->flags & LD_AF_UNLOCK_CANCEL))
rv = res_cancel(ls, r, act);
/*
* possible unlock results:
* 0: unlock succeeded
* -ECANCELED: cancel succeeded
* -ENOENT: nothing to unlock or cancel
*/
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
}
/*
* handle implicit unlocks due to client exit,
* also clear any outstanding actions for the client
*/
list_for_each_entry(act_close, act_close_list, list) {
res_unlock(ls, r, act_close);
res_cancel(ls, r, act_close);
}
/*
* handle freeing a lock for an lv that has been removed
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_FREE && act->rt == LD_RT_LV) {
log_debug("S %s R %s free_lv", ls->name, r->name);
rv = free_lv(ls, r);
act->result = rv;
list_del(&act->list);
add_client_result(act);
goto r_free;
}
}
/*
* handle enable/disable
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE) {
rv = res_able(ls, r, act);
act->result = rv;
list_del(&act->list);
add_client_result(act);
if (!rv && act->op == LD_OP_DISABLE) {
log_debug("S %s R %s free disabled", ls->name, r->name);
goto r_free;
}
}
}
/*
* transient requests on existing transient locks
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->flags & LD_AF_PERSISTENT)
continue;
lk = find_lock_client(r, act->client_id);
if (!lk)
continue;
if (lk->mode != act->mode) {
/* convert below */
/*
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
*/
continue;
} else {
/* success */
act->result = -EALREADY;
list_del(&act->list);
add_client_result(act);
}
}
/*
* persistent requests on existing persistent locks
*
* persistent locks are not owned by a client, so any
* existing with matching mode satisfies a request.
* only one persistent lock is kept on a resource.
* a single "unowned" persistent lock satisfies
* any/multiple client requests for a persistent lock.
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (!(act->flags & LD_AF_PERSISTENT))
continue;
lk = find_lock_persistent(r);
if (!lk)
continue;
if (lk->mode != act->mode) {
/* convert below */
/*
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
*/
continue;
} else {
/* success */
act->result = -EALREADY;
list_del(&act->list);
add_client_result(act);
}
}
/*
* transient requests with existing persistent locks
*
* Just grant the transient request and do not
* keep a record of it. Assume that the persistent
* lock will not go away while the transient lock
* is needed.
*
* This would be used when an ex, persistent lv lock
* exists from activation, and then something like
* lvextend asks for a transient ex lock to change
* the lv. The lv could not be unlocked by deactivation
* while the lvextend was running.
*
* The logic here for mixing T/P locks is not general
* support; there are a number of cases where it will
* not work: updating version number (lv locks have
* none), ex locks from multiple clients will not
* conflict, explicit un of the transient lock will fail.
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->flags & LD_AF_PERSISTENT)
continue;
lk = find_lock_persistent(r);
if (!lk)
continue;
if ((lk->mode == LD_LK_EX) ||
(lk->mode == LD_LK_SH && act->mode == LD_LK_SH)) {
act->result = 0;
list_del(&act->list);
add_client_result(act);
} else {
/* persistent lock is sh, transient request is ex */
/* FIXME: can we remove this case? do a convert here? */
log_debug("res_process %s existing persistent lock new transient", r->name);
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
}
}
/*
* persistent requests with existing transient locks
*
* If a client requests a P (persistent) lock for a T (transient)
* lock it already holds, we can just change T to P. Fail if the
* same happens for locks from different clients. Changing
* another client's lock from T to P may cause problems
* if that client tries to unlock or update version.
*
* I don't think this P/T combination will be used.
* It might be used if a command was able to take a P
* vg lock, in which case the T vg lock would already
* be held for reading. If the T lock was sh, it would
* be converted to P ex. If the T/P modes matched, the
* lock could just be changed from T to P.
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (!(act->flags & LD_AF_PERSISTENT))
continue;
lk = find_lock_client(r, act->client_id);
if (!lk)
continue;
if (lk->mode != act->mode) {
/* FIXME: convert and change to persistent? */
log_debug("res_process %s existing transient lock new persistent", r->name);
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
} else {
lk->flags |= LD_LF_PERSISTENT;
lk->client_id = 0;
act->result = 0;
list_del(&act->list);
add_client_result(act);
}
}
/*
* convert mode of existing locks
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->flags & LD_AF_PERSISTENT)
lk = find_lock_persistent(r);
else
lk = find_lock_client(r, act->client_id);
if (!lk)
continue;
if (lk->mode == act->mode) {
/* should never happen, should be found above */
log_error("convert same mode");
continue;
}
/* convert fails immediately, no EAGAIN retry */
rv = res_convert(ls, r, lk, act);
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
/*
* Cases above are all requests addressed by existing locks.
* Below handles the rest. Transient and persistent are
* handled the same, except
* - if mode of existing lock is incompat with requested,
* leave the act on r->actions
* - if r mode is EX, any lock action is blocked, just quit
*
* Retry a lock request that fails due to a lock conflict (-EAGAIN):
* if we have not exceeded max retries and lm sets lm_retry (sanlock
* transient conflicts from shared lock implementation), or r type
* is gl or vg (transient real conflicts we want to hide from command).
* lv lock conflicts won't be transient so don't retry them.
*/
if (r->mode == LD_LK_EX)
return;
/*
* r mode is SH or UN, pass lock-sh actions to lm
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
/* grant in order, so break here */
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX)
break;
if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
lm_retry = 0;
rv = res_lock(ls, r, act, &lm_retry);
if ((rv == -EAGAIN) &&
(act->retries <= act->max_retries) &&
(lm_retry || (r->type != LD_RT_LV))) {
/* leave act on list */
log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
act->retries++;
*retry_out = 1;
} else {
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
if (rv == -EUNATCH)
goto r_free;
}
}
/*
* r mode is SH, any ex lock action is blocked, just quit
*/
if (r->mode == LD_LK_SH)
return;
/*
* r mode is UN, pass lock-ex action to lm
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
lm_retry = 0;
rv = res_lock(ls, r, act, &lm_retry);
if ((rv == -EAGAIN) &&
(act->retries <= act->max_retries) &&
(lm_retry || (r->type != LD_RT_LV))) {
/* leave act on list */
log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
act->retries++;
*retry_out = 1;
} else {
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
if (rv == -EUNATCH)
goto r_free;
break;
}
}
return;
r_free:
/* For the EUNATCH case it may be possible there are queued actions? */
list_for_each_entry_safe(act, safe, &r->actions, list) {
log_error("S %s R %s res_process r_free cancel %s client %d",
ls->name, r->name, op_str(act->op), act->client_id);
act->result = -ECANCELED;
list_del(&act->list);
add_client_result(act);
}
log_debug("S %s R %s res_process free", ls->name, r->name);
lm_rem_resource(ls, r);
list_del(&r->list);
free_resource(r);
}
#define LOCKS_EXIST_ANY 1
#define LOCKS_EXIST_GL 2
#define LOCKS_EXIST_VG 3
#define LOCKS_EXIST_LV 4
static int for_each_lock(struct lockspace *ls, int locks_do)
{
struct resource *r;
struct lock *lk;
list_for_each_entry(r, &ls->resources, list) {
list_for_each_entry(lk, &r->locks, list) {
if (locks_do == LOCKS_EXIST_ANY)
return 1;
if (locks_do == LOCKS_EXIST_GL && r->type == LD_RT_GL)
return 1;
if (locks_do == LOCKS_EXIST_VG && r->type == LD_RT_VG)
return 1;
if (locks_do == LOCKS_EXIST_LV && r->type == LD_RT_LV)
return 1;
}
}
return 0;
}
static int clear_locks(struct lockspace *ls, int free_vg, int drop_vg)
{
struct resource *r, *r_safe;
struct lock *lk, *lk_safe;
struct action *act, *act_safe;
uint32_t lk_version;
uint32_t r_version;
int lk_count = 0;
int rv;
list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
lk_version = 0;
list_for_each_entry_safe(lk, lk_safe, &r->locks, list) {
lk_count++;
/*
* Stopping a lockspace shouldn't happen with LV locks
* still held, but it will be stopped with GL and VG
* locks held. The drop_vg case may see LV locks.
*/
if (lk->flags & LD_LF_PERSISTENT && !drop_vg)
log_error("S %s R %s clear lock persistent", ls->name, r->name);
else
log_debug("S %s R %s clear lock mode %s client %d", ls->name, r->name, mode_str(lk->mode), lk->client_id);
if (lk->version > lk_version)
lk_version = lk->version;
list_del(&lk->list);
free_lock(lk);
}
if (r->mode == LD_LK_UN)
goto r_free;
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
r->version++;
r_version = r->version;
log_debug("S %s R %s clear_locks r_version inc %u",
ls->name, r->name, r_version);
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk_version > r->version)) {
r->version = lk_version;
r_version = r->version;
log_debug("S %s R %s clear_locks r_version new %u",
ls->name, r->name, r_version);
} else {
r_version = 0;
}
rv = lm_unlock(ls, r, NULL, r_version, free_vg ? LMUF_FREE_VG : 0);
if (rv < 0) {
/* should never happen */
log_error("S %s R %s clear_locks free %d drop %d lm unlock error %d",
ls->name, r->name, free_vg, drop_vg, rv);
}
list_for_each_entry_safe(act, act_safe, &r->actions, list) {
log_error("S %s R %s clear_locks cancel %s client %d",
ls->name, r->name, op_str(act->op), act->client_id);
act->result = -ECANCELED;
list_del(&act->list);
add_client_result(act);
}
r_free:
log_debug("S %s R %s free", ls->name, r->name);
lm_rem_resource(ls, r);
list_del(&r->list);
free_resource(r);
}
return lk_count;
}
/*
* find and return the resource that is referenced by the action
* - there is a single gl resource per lockspace
* - there is a single vg resource per lockspace
* - there can be many lv resources per lockspace, compare names
*/
static struct resource *find_resource_act(struct lockspace *ls,
struct action *act,
int nocreate)
{
struct resource *r;
list_for_each_entry(r, &ls->resources, list) {
if (r->type != act->rt)
continue;
if (r->type == LD_RT_GL && act->rt == LD_RT_GL)
return r;
if (r->type == LD_RT_VG && act->rt == LD_RT_VG)
return r;
if (r->type == LD_RT_LV && act->rt == LD_RT_LV &&
!strcmp(r->name, act->lv_uuid))
return r;
}
if (nocreate)
return NULL;
if (!(r = alloc_resource()))
return NULL;
r->type = act->rt;
r->mode = LD_LK_UN;
if (r->type == LD_RT_GL)
strncpy(r->name, R_NAME_GL, MAX_NAME);
else if (r->type == LD_RT_VG)
strncpy(r->name, R_NAME_VG, MAX_NAME);
else if (r->type == LD_RT_LV)
strncpy(r->name, act->lv_uuid, MAX_NAME);
list_add_tail(&r->list, &ls->resources);
return r;
}
static void free_ls_resources(struct lockspace *ls)
{
struct resource *r, *r_safe;
list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
lm_rem_resource(ls, r);
list_del(&r->list);
free_resource(r);
}
}
/*
* ls is the vg being removed that holds the global lock.
* check if any other vgs will be left without a global lock.
*/
static int other_sanlock_vgs_exist(struct lockspace *ls_rem)
{
struct lockspace *ls;
list_for_each_entry(ls, &lockspaces_inactive, list) {
if (ls->lm_type != LD_LM_SANLOCK)
continue;
log_debug("other sanlock vg exists inactive %s", ls->name);
return 1;
}
list_for_each_entry(ls, &lockspaces, list) {
if (ls->lm_type != LD_LM_SANLOCK)
continue;
if (!strcmp(ls->name, ls_rem->name))
continue;
log_debug("other sanlock vg exists %s", ls->name);
return 1;
}
return 0;
}
/*
* LOCK is the main thing we're interested in; the others are unlikely.
*/
static int process_op_during_kill(struct action *act)
{
if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN)
return 1;
switch (act->op) {
case LD_OP_LOCK:
case LD_OP_ENABLE:
case LD_OP_DISABLE:
case LD_OP_UPDATE:
case LD_OP_RENAME_BEFORE:
case LD_OP_RENAME_FINAL:
case LD_OP_FIND_FREE_LOCK:
return 0;
};
return 1;
}
/*
* Process actions queued for this lockspace by
* client_recv_action / add_lock_action.
*
* The lockspace_thread can touch its own ls struct without holding
* lockspaces_mutex until it sets ls->thread_done, after which it
* cannot touch ls without holding lockspaces_mutex.
*/
#define LOCK_RETRY_MS 1000 /* milliseconds to delay between retry */
static void *lockspace_thread_main(void *arg_in)
{
struct lockspace *ls = arg_in;
struct resource *r, *r2;
struct action *add_act, *act, *safe;
struct action *act_op_free = NULL;
struct list_head tmp_act;
struct list_head act_close;
int free_vg = 0;
int drop_vg = 0;
int error = 0;
int adopt_flag = 0;
int wait_flag = 0;
int retry;
int rv;
INIT_LIST_HEAD(&act_close);
/* first action may be client add */
pthread_mutex_lock(&ls->mutex);
act = NULL;
add_act = NULL;
if (!list_empty(&ls->actions)) {
act = list_first_entry(&ls->actions, struct action, list);
if (act->op == LD_OP_START) {
add_act = act;
list_del(&add_act->list);
if (add_act->flags & LD_AF_WAIT)
wait_flag = 1;
if (add_act->flags & LD_AF_ADOPT)
adopt_flag = 1;
}
}
pthread_mutex_unlock(&ls->mutex);
log_debug("S %s lm_add_lockspace %s wait %d adopt %d",
ls->name, lm_str(ls->lm_type), wait_flag, adopt_flag);
/*
* The prepare step does not wait for anything and is quick;
* it tells us if the parameters are valid and the lm is running.
*/
error = lm_prepare_lockspace(ls, add_act);
if (add_act && (!wait_flag || error)) {
/* send initial join result back to client */
add_act->result = error;
add_client_result(add_act);
add_act = NULL;
}
/*
* The actual lockspace join can take a while.
*/
if (!error) {
error = lm_add_lockspace(ls, add_act, adopt_flag);
log_debug("S %s lm_add_lockspace done %d", ls->name, error);
if (ls->sanlock_gl_enabled && gl_lsname_sanlock[0] &&
strcmp(ls->name, gl_lsname_sanlock))
sanlock_gl_dup = 1;
if (add_act) {
/* send final join result back to client */
add_act->result = error;
add_client_result(add_act);
}
}
pthread_mutex_lock(&ls->mutex);
if (error) {
ls->thread_stop = 1;
ls->create_fail = 1;
} else {
ls->create_done = 1;
}
pthread_mutex_unlock(&ls->mutex);
if (error)
goto out_act;
while (1) {
pthread_mutex_lock(&ls->mutex);
while (!ls->thread_work) {
if (ls->thread_stop) {
pthread_mutex_unlock(&ls->mutex);
goto out_rem;
}
pthread_cond_wait(&ls->cond, &ls->mutex);
}
/*
* Process all the actions queued for this lockspace.
* The client thread queues actions on ls->actions.
*
* Here, take all the actions off of ls->actions, and:
*
* - For lock operations, move the act to r->actions.
* These lock actions/operations processed by res_process().
*
* - For non-lock operations, e.g. related to managing
* the lockspace, process them in this loop.
*/
while (1) {
if (list_empty(&ls->actions)) {
ls->thread_work = 0;
break;
}
act = list_first_entry(&ls->actions, struct action, list);
if (act->op == LD_OP_KILL_VG && act->rt == LD_RT_VG) {
/* Continue processing until DROP_VG arrives. */
log_debug("S %s kill_vg", ls->name);
ls->kill_vg = 1;
list_del(&act->list);
act->result = 0;
add_client_result(act);
continue;
}
if (ls->kill_vg && !process_op_during_kill(act)) {
log_debug("S %s disallow op %s after kill_vg", ls->name, op_str(act->op));
list_del(&act->list);
act->result = -EVGKILLED;
add_client_result(act);
continue;
}
if (act->op == LD_OP_DROP_VG && act->rt == LD_RT_VG) {
/*
* If leases are released after i/o errors begin
* but before lvmlockctl --kill, then the VG is not
* killed, but drop is still needed to clean up the
* VG, so in that case there would be a drop op without
* a preceding kill op.
*/
if (!ls->kill_vg)
log_debug("S %s received drop without kill", ls->name);
log_debug("S %s drop_vg", ls->name);
ls->thread_work = 0;
ls->thread_stop = 1;
drop_vg = 1;
break;
}
if (act->op == LD_OP_STOP) {
/* thread_stop is already set */
ls->thread_work = 0;
break;
}
if (act->op == LD_OP_FREE && act->rt == LD_RT_VG) {
/* vgremove */
log_debug("S %s checking for lockspace hosts", ls->name);
rv = lm_hosts(ls, 1);
if (rv) {
/*
* Checking for hosts here in addition to after the
* main loop allows vgremove to fail and be rerun
* after the ls is stopped on other hosts.
*/
log_error("S %s lockspace hosts %d", ls->name, rv);
list_del(&act->list);
act->result = -EBUSY;
add_client_result(act);
continue;
}
ls->thread_work = 0;
ls->thread_stop = 1;
free_vg = 1;
break;
}
if (act->op == LD_OP_RENAME_BEFORE && act->rt == LD_RT_VG) {
/* vgrename */
log_debug("S %s checking for lockspace hosts", ls->name);
rv = lm_hosts(ls, 1);
if (rv) {
log_error("S %s lockspace hosts %d", ls->name, rv);
list_del(&act->list);
act->result = -EBUSY;
add_client_result(act);
continue;
}
ls->thread_work = 0;
ls->thread_stop = 1;
/* Do we want to check hosts again below like vgremove? */
break;
}
if (act->op == LD_OP_FIND_FREE_LOCK && act->rt == LD_RT_VG) {
uint64_t free_offset = 0;
log_debug("S %s find free lock", ls->name);
rv = lm_find_free_lock(ls, &free_offset);
log_debug("S %s find free lock %d offset %llu",
ls->name, rv, (unsigned long long)free_offset);
ls->free_lock_offset = free_offset;
list_del(&act->list);
act->result = rv;
add_client_result(act);
continue;
}
list_del(&act->list);
/* applies to all resources */
if (act->op == LD_OP_CLOSE) {
list_add(&act->list, &act_close);
continue;
}
/*
* All the other op's are for locking.
* Find the specific resource that the lock op is for,
* and add the act to the resource's list of lock ops.
*
* (This creates a new resource if the one named in
* the act is not found.)
*/
r = find_resource_act(ls, act, (act->op == LD_OP_FREE) ? 1 : 0);
if (!r) {
act->result = (act->op == LD_OP_FREE) ? -ENOENT : -ENOMEM;
add_client_result(act);
continue;
}
list_add_tail(&act->list, &r->actions);
log_debug("S %s R %s action %s %s", ls->name, r->name,
op_str(act->op), mode_str(act->mode));
}
pthread_mutex_unlock(&ls->mutex);
/*
* Process the lock operations that have been queued for each
* resource.
*/
retry = 0;
list_for_each_entry_safe(r, r2, &ls->resources, list)
res_process(ls, r, &act_close, &retry);
list_for_each_entry_safe(act, safe, &act_close, list) {
list_del(&act->list);
free_action(act);
}
if (retry) {
ls->thread_work = 1;
usleep(LOCK_RETRY_MS * 1000);
}
}
out_rem:
log_debug("S %s stopping", ls->name);
/*
* For sanlock, we need to unlock any existing locks
* before removing the lockspace, otherwise the sanlock
* daemon will kill us when the lockspace goes away.
* For dlm, we leave with force, so all locks will
* automatically be dropped when we leave the lockspace,
* so unlocking all before leaving could be skipped.
*
* Blindly dropping all existing locks must only be
* allowed in emergency/force situations, otherwise it's
* obviously dangerous, since the lock holders are still
* operating under the assumption that they hold the lock.
* drop_vg drops all existing locks, but should only
* happen when the VG access has been forcibly and
* succesfully terminated.
*
* For vgremove of a sanlock vg, the vg lock will be held,
* and possibly the gl lock if this vg holds the gl.
* sanlock vgremove wants to unlock-rename these locks.
*/
log_debug("S %s clearing locks", ls->name);
rv = clear_locks(ls, free_vg, drop_vg);
/*
* Tell any other hosts in the lockspace to leave it
* before we remove it (for vgremove). We do this
* before leaving the lockspace ourself because we
* need to be in the lockspace to see others.
*/
if (free_vg) {
log_debug("S %s checking for lockspace hosts", ls->name);
rv = lm_hosts(ls, 1);
if (rv)
log_error("S %s other lockspace hosts %d", ls->name, rv);
}
/*
* Leave the lockspace.
*/
rv = lm_rem_lockspace(ls, NULL, free_vg);
log_debug("S %s rem_lockspace done %d", ls->name, rv);
out_act:
/*
* Move remaining actions to results; this will usually (always?)
* be only the stop action.
*/
INIT_LIST_HEAD(&tmp_act);
pthread_mutex_lock(&ls->mutex);
list_for_each_entry_safe(act, safe, &ls->actions, list) {
if (act->op == LD_OP_FREE) {
act_op_free = act;
act->result = 0;
} else if (act->op == LD_OP_STOP)
act->result = 0;
else if (act->op == LD_OP_DROP_VG)
act->result = 0;
else if (act->op == LD_OP_RENAME_BEFORE)
act->result = 0;
else
act->result = -ENOLS;
list_del(&act->list);
list_add_tail(&act->list, &tmp_act);
}
pthread_mutex_unlock(&ls->mutex);
/*
* If this freed a sanlock vg that had gl enabled, and other sanlock
* vgs exist, return a flag so the command can warn that the gl has
* been removed and may need to be enabled in another sanlock vg.
*/
if (free_vg && ls->sanlock_gl_enabled && act_op_free) {
pthread_mutex_lock(&lockspaces_mutex);
if (other_sanlock_vgs_exist(ls))
act_op_free->flags |= LD_AF_WARN_GL_REMOVED;
pthread_mutex_unlock(&lockspaces_mutex);
}
pthread_mutex_lock(&client_mutex);
list_for_each_entry_safe(act, safe, &tmp_act, list) {
list_del(&act->list);
list_add_tail(&act->list, &client_results);
}
pthread_cond_signal(&client_cond);
pthread_mutex_unlock(&client_mutex);
pthread_mutex_lock(&lockspaces_mutex);
ls->thread_done = 1;
ls->free_vg = free_vg;
ls->drop_vg = drop_vg;
pthread_mutex_unlock(&lockspaces_mutex);
/*
* worker_thread will join this thread, and free the
* ls or move it to lockspaces_inactive.
*/
pthread_mutex_lock(&worker_mutex);
worker_wake = 1;
pthread_cond_signal(&worker_cond);
pthread_mutex_unlock(&worker_mutex);
return NULL;
}
int lockspaces_empty(void)
{
int rv;
pthread_mutex_lock(&lockspaces_mutex);
rv = list_empty(&lockspaces);
pthread_mutex_unlock(&lockspaces_mutex);
return rv;
}
/*
* lockspaces_mutex is locked
*
* When duplicate sanlock global locks have been seen,
* this function has a secondary job of counting the
* number of lockspaces that exist with the gl enabled,
* with the side effect of setting sanlock_gl_dup back to
* zero when the duplicates have been removed/disabled.
*/
static struct lockspace *find_lockspace_name(char *ls_name)
{
struct lockspace *ls_found = NULL;
struct lockspace *ls;
int gl_count = 0;
list_for_each_entry(ls, &lockspaces, list) {
if (!strcmp(ls->name, ls_name))
ls_found = ls;
if (!sanlock_gl_dup && ls_found)
return ls_found;
if (sanlock_gl_dup && ls->sanlock_gl_enabled)
gl_count++;
}
/* this is the side effect we want from this function */
if (sanlock_gl_dup && gl_count < 2)
sanlock_gl_dup = 0;
return ls_found;
}
/*
* If lvm_<vg_name> is longer than max lockspace name (64) we just ignore the
* extra characters. For sanlock vgs, the name is shortened further to 48 in
* the sanlock code.
*/
static int vg_ls_name(const char *vg_name, char *ls_name)
{
if (strlen(vg_name) + 4 > MAX_NAME) {
log_error("vg name too long %s", vg_name);
return -1;
}
snprintf(ls_name, MAX_NAME, "%s%s", LVM_LS_PREFIX, vg_name);
return 0;
}
/* FIXME: add mutex for gl_lsname_ ? */
static void gl_ls_name(char *ls_name)
{
if (gl_use_dlm)
memcpy(ls_name, gl_lsname_dlm, MAX_NAME);
else if (gl_use_sanlock)
memcpy(ls_name, gl_lsname_sanlock, MAX_NAME);
else
memset(ls_name, 0, MAX_NAME);
}
/*
* When this function returns an error, the caller needs to deal
* with act (in the cases where act exists).
*/
static int add_lockspace_thread(const char *ls_name,
const char *vg_name,
const char *vg_uuid,
int lm_type, const char *vg_args,
struct action *act)
{
struct lockspace *ls, *ls2;
struct resource *r;
uint32_t version = 0;
int rv;
if (act)
version = act->version;
log_debug("add_lockspace_thread %s %s version %u",
lm_str(lm_type), ls_name, version);
if (!(ls = alloc_lockspace()))
return -ENOMEM;
strncpy(ls->name, ls_name, MAX_NAME);
ls->lm_type = lm_type;
if (act)
ls->start_client_id = act->client_id;
if (vg_uuid)
strncpy(ls->vg_uuid, vg_uuid, 64);
if (vg_name)
strncpy(ls->vg_name, vg_name, MAX_NAME);
if (vg_args)
strncpy(ls->vg_args, vg_args, MAX_ARGS);
if (act)
ls->host_id = act->host_id;
if (!(r = alloc_resource())) {
free(ls);
return -ENOMEM;
}
r->type = LD_RT_VG;
r->mode = LD_LK_UN;
r->version = version;
strncpy(r->name, R_NAME_VG, MAX_NAME);
list_add_tail(&r->list, &ls->resources);
pthread_mutex_lock(&lockspaces_mutex);
ls2 = find_lockspace_name(ls->name);
if (ls2) {
if (ls2->thread_stop)
rv = -EAGAIN;
else
rv = -EEXIST;
pthread_mutex_unlock(&lockspaces_mutex);
free_resource(r);
free(ls);
return rv;
}
/*
* act will be null when this lockspace is added automatically/internally
* and not by an explicit client action that wants a result.
*/
if (act)
list_add(&act->list, &ls->actions);
clear_lockspace_inactive(ls->name);
list_add_tail(&ls->list, &lockspaces);
pthread_mutex_unlock(&lockspaces_mutex);
rv = pthread_create(&ls->thread, NULL, lockspace_thread_main, ls);
if (rv < 0) {
pthread_mutex_lock(&lockspaces_mutex);
list_del(&ls->list);
pthread_mutex_unlock(&lockspaces_mutex);
free_resource(r);
free(ls);
return rv;
}
return 0;
}
/*
* There is no add_sanlock_global_lockspace or
* rem_sanlock_global_lockspace because with sanlock,
* the global lockspace is one of the vg lockspaces.
*/
static int add_dlm_global_lockspace(struct action *act)
{
int rv;
if (gl_running_dlm)
return -EEXIST;
gl_running_dlm = 1;
/*
* There's a short period after which a previous gl lockspace thread
* has set gl_running_dlm = 0, but before its ls struct has been
* deleted, during which this add_lockspace_thread() can fail with
* -EAGAIN.
*/
rv = add_lockspace_thread(gl_lsname_dlm, NULL, NULL, LD_LM_DLM, NULL, act);
if (rv < 0) {
log_error("add_dlm_global_lockspace add_lockspace_thread %d", rv);
gl_running_dlm = 0;
}
return rv;
}
/*
* If dlm gl lockspace is the only one left, then stop it.
* This is not used for an explicit rem_lockspace action from
* the client, only for auto remove.
*/
static int rem_dlm_global_lockspace(void)
{
struct lockspace *ls, *ls_gl = NULL;
int others = 0;
int rv = 0;
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if (!strcmp(ls->name, gl_lsname_dlm)) {
ls_gl = ls;
continue;
}
if (ls->thread_stop)
continue;
others++;
break;
}
if (others) {
rv = -EAGAIN;
goto out;
}
if (!ls_gl) {
rv = -ENOENT;
goto out;
}
ls = ls_gl;
pthread_mutex_lock(&ls->mutex);
ls->thread_stop = 1;
ls->thread_work = 1;
pthread_cond_signal(&ls->cond);
pthread_mutex_unlock(&ls->mutex);
rv = 0;
out:
pthread_mutex_unlock(&lockspaces_mutex);
return rv;
}
/*
* When the first dlm lockspace is added for a vg, automatically add a separate
* dlm lockspace for the global lock.
*
* For sanlock, a separate lockspace is not used for the global lock, but the
* gl lock lives in a vg lockspace, (although it's recommended to create a
* special vg dedicated to holding the gl).
*/
static int add_lockspace(struct action *act)
{
char ls_name[MAX_NAME+1];
int rv;
memset(ls_name, 0, sizeof(ls_name));
/*
* FIXME: I don't think this is used any more.
* Remove it, or add the ability to start the global
* dlm lockspace using lvmlockctl?
*/
if (act->rt == LD_RT_GL) {
if (gl_use_dlm) {
rv = add_dlm_global_lockspace(act);
return rv;
} else {
return -EINVAL;
}
}
if (act->rt == LD_RT_VG) {
if (gl_use_dlm) {
rv = add_dlm_global_lockspace(NULL);
if (rv < 0 && rv != -EEXIST)
return rv;
}
vg_ls_name(act->vg_name, ls_name);
rv = add_lockspace_thread(ls_name, act->vg_name, act->vg_uuid,
act->lm_type, act->vg_args,
act);
if (rv)
log_error("add_lockspace %s add_lockspace_thread %d", ls_name, rv);
return rv;
}
log_error("add_lockspace bad type %d", act->rt);
return -1;
}
/*
* vgchange --lock-stop vgname will lock the vg ex, then send a stop,
* so we exect to find the ex vg lock held here, and will automatically
* unlock it when stopping.
*
* Should we attempt to stop the lockspace containing the gl last?
*/
static int rem_lockspace(struct action *act)
{
struct lockspace *ls;
char ls_name[MAX_NAME+1];
int force = act->flags & LD_AF_FORCE;
int rt = act->rt;
if (act->rt == LD_RT_GL && act->lm_type != LD_LM_DLM)
return -EINVAL;
memset(ls_name, 0, sizeof(ls_name));
if (act->rt == LD_RT_GL)
gl_ls_name(ls_name);
else
vg_ls_name(act->vg_name, ls_name);
pthread_mutex_lock(&lockspaces_mutex);
ls = find_lockspace_name(ls_name);
if (!ls) {
pthread_mutex_unlock(&lockspaces_mutex);
return -ENOLS;
}
pthread_mutex_lock(&ls->mutex);
if (ls->thread_stop) {
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
return -ESTALE;
}
if (!force && for_each_lock(ls, LOCKS_EXIST_LV)) {
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
return -EBUSY;
}
ls->thread_work = 1;
ls->thread_stop = 1;
list_add_tail(&act->list, &ls->actions);
pthread_cond_signal(&ls->cond);
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
/*
* The dlm global lockspace was automatically added when
* the first dlm vg lockspace was added, now reverse that
* by automatically removing the dlm global lockspace when
* the last dlm vg lockspace is removed.
*/
if (rt == LD_RT_VG && gl_use_dlm)
rem_dlm_global_lockspace();
return 0;
}
/*
* count how many lockspaces started by this client are still starting;
* the client will use this to wait for all its start operations to finish
* (START_WAIT).
*/
static int count_lockspace_starting(uint32_t client_id)
{
struct lockspace *ls;
int count = 0;
int done = 0;
int fail = 0;
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if (ls->start_client_id != client_id)
continue;
if (!ls->create_done && !ls->create_fail) {
count++;
continue;
}
if (ls->create_done)
done++;
if (ls->create_fail)
fail++;
}
pthread_mutex_unlock(&lockspaces_mutex);
log_debug("count_lockspace_starting client %u count %d done %d fail %d",
client_id, count, done, fail);
return count;
}
/* lockspaces_mutex is held */
static struct lockspace *find_lockspace_inactive(char *ls_name)
{
struct lockspace *ls;
list_for_each_entry(ls, &lockspaces_inactive, list) {
if (!strcmp(ls->name, ls_name))
return ls;
}
return NULL;
}
/* lockspaces_mutex is held */
static int clear_lockspace_inactive(char *ls_name)
{
struct lockspace *ls;
ls = find_lockspace_inactive(ls_name);
if (ls) {
list_del(&ls->list);
free(ls);
return 1;
}
return 0;
}
static int forget_lockspace_inactive(char *vg_name)
{
char ls_name[MAX_NAME+1];
int found;
memset(ls_name, 0, sizeof(ls_name));
vg_ls_name(vg_name, ls_name);
log_debug("forget_lockspace_inactive %s", ls_name);
pthread_mutex_lock(&lockspaces_mutex);
found = clear_lockspace_inactive(ls_name);
pthread_mutex_unlock(&lockspaces_mutex);
if (found)
return 0;
return -ENOENT;
}
static void free_lockspaces_inactive(void)
{
struct lockspace *ls, *safe;
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry_safe(ls, safe, &lockspaces_inactive, list) {
list_del(&ls->list);
free(ls);
}
pthread_mutex_unlock(&lockspaces_mutex);
}
/*
* Loop through all lockspaces, and:
* - if do_stop is set, stop any that are not stopped
* - if do_free is set, join any that are done stopping (and free ls)
*
* do_stop will not stop an ls with lv locks unless force is set.
*
* This function does not block or wait for anything.
*
* do_stop (no do_free):
* returns count of lockspaces that need stop (have locks and no force)
*
* do_free (no do_stop):
* returns count of lockspaces that are stopped and need freeing
*
* do_stop and do_free:
* returns sum of the previous two
*/
static int for_each_lockspace(int do_stop, int do_free, int do_force)
{
struct lockspace *ls, *safe;
int need_stop = 0;
int need_free = 0;
int stop_count = 0;
int free_count = 0;
int done;
int stop;
pthread_mutex_lock(&lockspaces_mutex);
if (do_stop) {
list_for_each_entry(ls, &lockspaces, list) {
pthread_mutex_lock(&ls->mutex);
if (ls->thread_stop) {
pthread_mutex_unlock(&ls->mutex);
continue;
}
if (!do_force && for_each_lock(ls, LOCKS_EXIST_ANY)) {
need_stop++;
} else {
ls->thread_work = 1;
ls->thread_stop = 1;
pthread_cond_signal(&ls->cond);
stop_count++;
}
pthread_mutex_unlock(&ls->mutex);
}
}
if (do_free) {
list_for_each_entry_safe(ls, safe, &lockspaces, list) {
pthread_mutex_lock(&ls->mutex);
done = ls->thread_done;
stop = ls->thread_stop;
pthread_mutex_unlock(&ls->mutex);
/* This ls has locks and force is not set. */
if (!stop)
continue;
/*
* Once thread_done is set, we know that the lockspace_thread
* will not be using/touching the ls struct. Any other
* thread touches the ls struct under lockspaces_mutex.
*/
if (done) {
pthread_join(ls->thread, NULL);
list_del(&ls->list);
/* In future we may need to free ls->actions here */
free_ls_resources(ls);
if (ls->free_vg)
free(ls);
else
list_add(&ls->list, &lockspaces_inactive);
free_count++;
} else {
need_free++;
}
}
}
if (list_empty(&lockspaces)) {
if (!gl_type_static) {
gl_use_dlm = 0;
gl_use_sanlock = 0;
}
}
pthread_mutex_unlock(&lockspaces_mutex);
if (stop_count || free_count || need_stop || need_free) {
log_debug("for_each_lockspace do_stop %d do_free %d "
"stop_count %d free_count %d need_stop %d need_free %d",
do_stop, do_free, stop_count, free_count, need_stop, need_free);
}
return need_stop + need_free;
}
/*
* This is only called when the daemon is exiting so the sleep/retry
* loop doesn't have any adverse impact.
*/
static void for_each_lockspace_retry(int do_stop, int do_free, int do_force)
{
int count;
while (1) {
count = for_each_lockspace(do_stop, do_free, do_force);
if (!count)
break;
log_debug("for_each_lockspace_retry remaining %d", count);
sleep(1);
}
}
static int work_init_vg(struct action *act)
{
struct lockspace *ls;
char ls_name[MAX_NAME+1];
int rv = 0;
memset(ls_name, 0, sizeof(ls_name));
vg_ls_name(act->vg_name, ls_name);
/*
* The max dlm ls name is 64 and the max sanlock ls name is 48. So,
* after the "lvm_" prefix, only the first 60/44 characters of the VG
* name are used for the lockspace name. This will cause a collision
* in the lock manager if two different VG names have the first 60/44
* chars in common. At the time of vgcreate (here), check if any other
* VG's are known that would collide. If the collision is not detected
* at vgcreate time, it will be detected at start time and add_lockspace
* will fail for the second of the two matching ls names.
*/
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if ((ls->lm_type == LD_LM_SANLOCK) && !strncmp(ls->name, ls_name, 48)) {
rv = -EEXIST;
break;
}
if ((ls->lm_type == LD_LM_DLM) && !strcmp(ls->name, ls_name)) {
rv = -EEXIST;
break;
}
}
pthread_mutex_unlock(&lockspaces_mutex);
if (rv == -EEXIST) {
log_error("Existing lockspace name %s matches new %s VG names %s %s",
ls->name, ls_name, ls->vg_name, act->vg_name);
return rv;
}
if (act->lm_type == LD_LM_SANLOCK)
rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args);
else if (act->lm_type == LD_LM_DLM)
rv = lm_init_vg_dlm(ls_name, act->vg_name, act->flags, act->vg_args);
else
rv = -EINVAL;
return rv;
}
static int work_rename_vg(struct action *act)
{
char ls_name[MAX_NAME+1];
int rv = 0;
memset(ls_name, 0, sizeof(ls_name));
vg_ls_name(act->vg_name, ls_name);
if (act->lm_type == LD_LM_SANLOCK)
rv = lm_rename_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args);
else if (act->lm_type == LD_LM_DLM)
return 0;
else
rv = -EINVAL;
return rv;
}
static void work_test_gl(void)
{
struct lockspace *ls;
int is_enabled = 0;
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if (ls->lm_type != LD_LM_SANLOCK)
continue;
pthread_mutex_lock(&ls->mutex);
if (ls->create_done && !ls->thread_stop) {
is_enabled = lm_gl_is_enabled(ls);
if (is_enabled) {
log_debug("S %s worker found gl_is_enabled", ls->name);
strncpy(gl_lsname_sanlock, ls->name, MAX_NAME);
}
}
pthread_mutex_unlock(&ls->mutex);
if (is_enabled)
break;
}
if (!is_enabled)
log_debug("worker found no gl_is_enabled");
pthread_mutex_unlock(&lockspaces_mutex);
}
static int work_init_lv(struct action *act)
{
struct lockspace *ls;
char ls_name[MAX_NAME+1];
char vg_args[MAX_ARGS+1];
char lv_args[MAX_ARGS+1];
uint64_t free_offset = 0;
int lm_type = 0;
int rv = 0;
memset(ls_name, 0, sizeof(ls_name));
memset(vg_args, 0, sizeof(vg_args));
memset(lv_args, 0, sizeof(lv_args));
vg_ls_name(act->vg_name, ls_name);
pthread_mutex_lock(&lockspaces_mutex);
ls = find_lockspace_name(ls_name);
if (ls) {
lm_type = ls->lm_type;
memcpy(vg_args, ls->vg_args, MAX_ARGS);
free_offset = ls->free_lock_offset;
ls->free_lock_offset = 0;
}
pthread_mutex_unlock(&lockspaces_mutex);
if (!ls) {
lm_type = act->lm_type;
memcpy(vg_args, act->vg_args, MAX_ARGS);
}
if (act->lm_type != lm_type) {
log_error("init_lv ls_name %s wrong lm_type %d %d",
ls_name, act->lm_type, lm_type);
return -EINVAL;
}
if (lm_type == LD_LM_SANLOCK) {
rv = lm_init_lv_sanlock(ls_name, act->vg_name, act->lv_uuid,
vg_args, lv_args, free_offset);
memcpy(act->lv_args, lv_args, MAX_ARGS);
return rv;
} else if (act->lm_type == LD_LM_DLM) {
return 0;
} else {
log_error("init_lv ls_name %s bad lm_type %d", ls_name, act->lm_type);
return -EINVAL;
}
}
/*
* When an action is queued for the worker_thread, it is processed right away.
* After processing, some actions need to be retried again in a short while.
* These actions are put on the delayed_list, and the worker_thread will
* process these delayed actions again in SHORT_DELAY_PERIOD.
*/
#define SHORT_DELAY_PERIOD 2
#define LONG_DELAY_PERIOD 60
static void *worker_thread_main(void *arg_in)
{
struct list_head delayed_list;
struct timespec ts;
struct action *act, *safe;
uint64_t last_delayed_time = 0;
int delay_sec = LONG_DELAY_PERIOD;
int rv;
INIT_LIST_HEAD(&delayed_list);
while (1) {
pthread_mutex_lock(&worker_mutex);
clock_gettime(CLOCK_REALTIME, &ts);
ts.tv_sec += delay_sec;
rv = 0;
act = NULL;
while (list_empty(&worker_list) && !worker_stop && !worker_wake && !rv) {
rv = pthread_cond_timedwait(&worker_cond, &worker_mutex, &ts);
}
worker_wake = 0;
if (worker_stop) {
pthread_mutex_unlock(&worker_mutex);
goto out;
}
if (!list_empty(&worker_list)) {
act = list_first_entry(&worker_list, struct action, list);
list_del(&act->list);
}
pthread_mutex_unlock(&worker_mutex);
/*
* Do new work actions before processing delayed work actions.
*/
if (!act)
goto delayed_work;
if (act->op == LD_OP_RUNNING_LM) {
int run_sanlock = lm_is_running_sanlock();
int run_dlm = lm_is_running_dlm();
if (run_sanlock && run_dlm)
act->result = -EXFULL;
else if (!run_sanlock && !run_dlm)
act->result = -ENOLCK;
else if (run_sanlock)
act->result = LD_LM_SANLOCK;
else if (run_dlm)
act->result = LD_LM_DLM;
add_client_result(act);
} else if ((act->op == LD_OP_LOCK) && (act->flags & LD_AF_SEARCH_LS)) {
/*
* worker_thread used as a helper to search existing
* sanlock vgs for an enabled gl.
*/
log_debug("work search for gl");
work_test_gl();
/* try again to find a gl lockspace for this act */
rv = add_lock_action(act);
if (rv < 0) {
act->result = rv;
add_client_result(act);
}
} else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_VG)) {
log_debug("work init_vg %s", act->vg_name);
act->result = work_init_vg(act);
add_client_result(act);
} else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_LV)) {
log_debug("work init_lv %s/%s uuid %s", act->vg_name, act->lv_name, act->lv_uuid);
act->result = work_init_lv(act);
add_client_result(act);
} else if ((act->op == LD_OP_RENAME_FINAL) && (act->rt == LD_RT_VG)) {
log_debug("work rename_vg %s", act->vg_name);
act->result = work_rename_vg(act);
add_client_result(act);
} else if (act->op == LD_OP_START_WAIT) {
act->result = count_lockspace_starting(act->client_id);
if (!act->result)
add_client_result(act);
else
list_add(&act->list, &delayed_list);
} else if (act->op == LD_OP_STOP_ALL) {
act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
if (!act->result || !(act->flags & LD_AF_WAIT))
add_client_result(act);
else
list_add(&act->list, &delayed_list);
} else {
log_error("work unknown op %d", act->op);
act->result = -EINVAL;
add_client_result(act);
}
delayed_work:
/*
* We may want to track retry times per action so that
* we can delay different actions by different amounts.
*/
if (monotime() - last_delayed_time < SHORT_DELAY_PERIOD) {
delay_sec = 1;
continue;
}
last_delayed_time = monotime();
list_for_each_entry_safe(act, safe, &delayed_list, list) {
if (act->op == LD_OP_START_WAIT) {
log_debug("work delayed start_wait for client %u", act->client_id);
act->result = count_lockspace_starting(act->client_id);
if (!act->result) {
list_del(&act->list);
add_client_result(act);
}
} else if (act->op == LD_OP_STOP_ALL) {
log_debug("work delayed stop_all");
act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
if (!act->result) {
list_del(&act->list);
act->result = 0;
add_client_result(act);
}
}
}
/*
* This is not explicitly queued work, and not delayed work,
* but lockspace thread cleanup that's needed when a
* lockspace has been stopped/removed or failed to start.
*/
for_each_lockspace(NO_STOP, DO_FREE, NO_FORCE);
if (list_empty(&delayed_list))
delay_sec = LONG_DELAY_PERIOD;
else
delay_sec = 1;
}
out:
list_for_each_entry_safe(act, safe, &delayed_list, list) {
list_del(&act->list);
free_action(act);
}
pthread_mutex_lock(&worker_mutex);
list_for_each_entry_safe(act, safe, &worker_list, list) {
list_del(&act->list);
free_action(act);
}
pthread_mutex_unlock(&worker_mutex);
return NULL;
}
static int setup_worker_thread(void)
{
int rv;
INIT_LIST_HEAD(&worker_list);
pthread_mutex_init(&worker_mutex, NULL);
pthread_cond_init(&worker_cond, NULL);
rv = pthread_create(&worker_thread, NULL, worker_thread_main, NULL);
if (rv)
return -1;
return 0;
}
static void close_worker_thread(void)
{
pthread_mutex_lock(&worker_mutex);
worker_stop = 1;
pthread_cond_signal(&worker_cond);
pthread_mutex_unlock(&worker_mutex);
pthread_join(worker_thread, NULL);
}
/* client_mutex is locked */
static struct client *find_client_work(void)
{
struct client *cl;
list_for_each_entry(cl, &client_list, list) {
if (cl->recv || cl->dead)
return cl;
}
return NULL;
}
/* client_mutex is locked */
static struct client *find_client_id(uint32_t id)
{
struct client *cl;
list_for_each_entry(cl, &client_list, list) {
if (cl->id == id)
return cl;
}
return NULL;
}
/* client_mutex is locked */
static struct client *find_client_pi(int pi)
{
struct client *cl;
list_for_each_entry(cl, &client_list, list) {
if (cl->pi == pi)
return cl;
}
return NULL;
}
/*
* wake up poll() because we have added an fd
* back into pollfd and poll() needs to be restarted
* to recognize it.
*/
static void restart_poll(void)
{
int rv;
rv = write(restart_fds[1], "w", 1);
if (!rv || rv < 0)
log_debug("restart_poll write %d", errno);
}
/* poll will take requests from client again, cl->mutex must be held */
static void client_resume(struct client *cl)
{
if (cl->dead)
return;
if (!cl->poll_ignore || cl->fd == -1 || cl->pi == -1) {
/* shouldn't happen */
log_error("client_resume %d bad state ig %d fd %d pi %d",
cl->id, cl->poll_ignore, cl->fd, cl->pi);
return;
}
pthread_mutex_lock(&pollfd_mutex);
if (pollfd[cl->pi].fd != POLL_FD_IGNORE) {
log_error("client_resume %d pi %d fd %d not IGNORE",
cl->id, cl->pi, cl->fd);
}
pollfd[cl->pi].fd = cl->fd;
pollfd[cl->pi].events = POLLIN;
pthread_mutex_unlock(&pollfd_mutex);
restart_poll();
}
/* called from client_thread, cl->mutex is held */
static void client_send_result(struct client *cl, struct action *act)
{
response res;
char result_flags[128];
if (cl->dead) {
log_debug("client send %d skip dead", cl->id);
return;
}
memset(result_flags, 0, sizeof(result_flags));
buffer_init(&res.buffer);
/*
* EUNATCH is returned when the global lock existed,
* but had been disabled when we tried to lock it,
* so we removed it, and no longer have a gl to lock.
*/
if (act->result == -EUNATCH)
act->result = -ENOLS;
/*
* init_vg with dlm|sanlock returns vg_args
* init_lv with sanlock returns lv_args
*/
if (act->result == -ENOLS) {
/*
* The lockspace could not be found, in which case
* the caller may want to know if any lockspaces exist
* or if lockspaces exist, but not one with the global lock.
* Given this detail, it may be able to procede without
* the lock.
*/
pthread_mutex_lock(&lockspaces_mutex);
if (list_empty(&lockspaces))
strcat(result_flags, "NO_LOCKSPACES,");
pthread_mutex_unlock(&lockspaces_mutex);
if (gl_use_sanlock && !gl_lsname_sanlock[0])
strcat(result_flags, "NO_GL_LS,");
else if (gl_use_dlm && !gl_lsname_dlm[0])
strcat(result_flags, "NO_GL_LS,");
else
strcat(result_flags, "NO_GL_LS,");
}
if (act->flags & LD_AF_DUP_GL_LS)
strcat(result_flags, "DUP_GL_LS,");
if (act->flags & LD_AF_INACTIVE_LS)
strcat(result_flags, "INACTIVE_LS,");
if (act->flags & LD_AF_ADD_LS_ERROR)
strcat(result_flags, "ADD_LS_ERROR,");
if (act->flags & LD_AF_WARN_GL_REMOVED)
strcat(result_flags, "WARN_GL_REMOVED,");
if (act->op == LD_OP_INIT) {
/*
* init is a special case where lock args need
* to be passed back to the client.
*/
const char *vg_args = "none";
const char *lv_args = "none";
if (act->vg_args[0])
vg_args = act->vg_args;
if (act->lv_args[0])
lv_args = act->lv_args;
log_debug("send %s[%d.%u] %s %s rv %d vg_args %s lv_args %s",
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
op_str(act->op), rt_str(act->rt),
act->result, vg_args ? vg_args : "", lv_args ? lv_args : "");
res = daemon_reply_simple("OK",
"op = %d", act->op,
"op_result = %d", act->result,
"lm_result = %d", act->lm_rv,
"vg_lock_args = %s", vg_args,
"lv_lock_args = %s", lv_args,
"result_flags = %s", result_flags[0] ? result_flags : "none",
NULL);
} else {
/*
* A normal reply.
*/
log_debug("send %s[%d.%u] %s %s rv %d %s %s",
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
op_str(act->op), rt_str(act->rt),
act->result, (act->result == -ENOLS) ? "ENOLS" : "", result_flags);
res = daemon_reply_simple("OK",
"op = %d", act->op,
"lock_type = %s", lm_str(act->lm_type),
"op_result = %d", act->result,
"lm_result = %d", act->lm_rv,
"result_flags = %s", result_flags[0] ? result_flags : "none",
NULL);
}
buffer_write(cl->fd, &res.buffer);
buffer_destroy(&res.buffer);
client_resume(cl);
}
/* called from client_thread */
static void client_purge(struct client *cl)
{
struct lockspace *ls;
struct action *act;
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if (!(act = alloc_action()))
continue;
act->op = LD_OP_CLOSE;
act->client_id = cl->id;
pthread_mutex_lock(&ls->mutex);
if (!ls->thread_stop) {
list_add_tail(&act->list, &ls->actions);
ls->thread_work = 1;
pthread_cond_signal(&ls->cond);
} else {
free_action(act);
}
pthread_mutex_unlock(&ls->mutex);
}
pthread_mutex_unlock(&lockspaces_mutex);
}
static int add_lock_action(struct action *act)
{
struct lockspace *ls = NULL;
char ls_name[MAX_NAME+1];
memset(ls_name, 0, sizeof(ls_name));
/* Determine which lockspace this action is for, and set ls_name. */
if (act->rt == LD_RT_GL && gl_use_sanlock &&
(act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE))
vg_ls_name(act->vg_name, ls_name);
else if (act->rt == LD_RT_GL)
gl_ls_name(ls_name);
else
vg_ls_name(act->vg_name, ls_name);
retry:
pthread_mutex_lock(&lockspaces_mutex);
if (ls_name[0])
ls = find_lockspace_name(ls_name);
if (!ls) {
int ls_inactive = 0;
int ls_create_fail = 0;
if (ls_name[0])
ls = find_lockspace_inactive(ls_name);
if (ls) {
ls_inactive = 1;
ls_create_fail = ls->create_fail;
ls = NULL;
}
pthread_mutex_unlock(&lockspaces_mutex);
if (act->op == LD_OP_UPDATE && act->rt == LD_RT_VG) {
log_debug("lockspace not found ignored for vg update");
return -ENOLS;
} else if (act->flags & LD_AF_SEARCH_LS) {
/* fail if we've already tried searching for the ls */
log_debug("lockspace search repeated %s", ls_name);
return -ENOLS;
} else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && gl_use_sanlock) {
/* gl may have been enabled in an existing vg */
log_debug("gl lockspace not found check sanlock vgs");
act->flags |= LD_AF_SEARCH_LS;
add_work_action(act);
return 0;
} else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && gl_use_dlm) {
log_debug("gl lockspace not found add dlm global");
act->flags |= LD_AF_SEARCH_LS;
act->flags |= LD_AF_WAIT_STARTING;
add_dlm_global_lockspace(NULL);
gl_ls_name(ls_name);
goto retry;
} else if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
log_debug("lockspace not found ignored for unlock");
return -ENOLS;
} else if (act->op == LD_OP_LOCK && act->rt == LD_RT_VG && ls_inactive) {
/* ls has been stopped or previously failed to start */
log_debug("lockspace inactive create_fail %d %s",
ls_create_fail, ls_name);
act->flags |= LD_AF_INACTIVE_LS;
if (ls_create_fail)
act->flags |= LD_AF_ADD_LS_ERROR;
return -ENOLS;
} else {
log_debug("lockspace not found %s", ls_name);
return -ENOLS;
}
}
if (act->lm_type == LD_LM_NONE) {
/* return to the command the type we are using */
act->lm_type = ls->lm_type;
} else if (act->lm_type != ls->lm_type) {
/* should not happen */
log_error("S %s add_lock_action bad lm_type %d ls %d",
ls_name, act->lm_type, ls->lm_type);
pthread_mutex_unlock(&lockspaces_mutex);
return -EINVAL;
}
pthread_mutex_lock(&ls->mutex);
if (ls->thread_stop && ls->thread_done) {
log_debug("lockspace is done finish cleanup %s", ls_name);
pthread_join(ls->thread, NULL);
list_del(&ls->list);
pthread_mutex_unlock(&ls->mutex);
free_ls_resources(ls);
free(ls);
pthread_mutex_unlock(&lockspaces_mutex);
goto retry;
}
if (ls->thread_stop) {
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
log_error("lockspace is stopping %s", ls_name);
return -ESTALE;
}
if (!ls->create_fail && !ls->create_done && !(act->flags & LD_AF_WAIT_STARTING)) {
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
log_debug("lockspace is starting %s", ls_name);
return -ESTARTING;
}
list_add_tail(&act->list, &ls->actions);
ls->thread_work = 1;
pthread_cond_signal(&ls->cond);
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
/* lockspace_thread_main / res_process take it from here */
return 0;
}
static int str_to_op_rt(const char *req_name, int *op, int *rt)
{
if (!req_name)
goto out;
if (!strcmp(req_name, "hello")) {
*op = LD_OP_HELLO;
*rt = 0;
return 0;
}
if (!strcmp(req_name, "quit")) {
*op = LD_OP_QUIT;
*rt = 0;
return 0;
}
if (!strcmp(req_name, "info")) {
*op = LD_OP_DUMP_INFO;
*rt = 0;
return 0;
}
if (!strcmp(req_name, "dump")) {
*op = LD_OP_DUMP_LOG;
*rt = 0;
return 0;
}
if (!strcmp(req_name, "init_vg")) {
*op = LD_OP_INIT;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "init_lv")) {
*op = LD_OP_INIT;
*rt = LD_RT_LV;
return 0;
}
if (!strcmp(req_name, "free_vg")) {
*op = LD_OP_FREE;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "free_lv")) {
*op = LD_OP_FREE;
*rt = LD_RT_LV;
return 0;
}
if (!strcmp(req_name, "start_vg")) {
*op = LD_OP_START;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "stop_vg")) {
*op = LD_OP_STOP;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "start_wait")) {
*op = LD_OP_START_WAIT;
*rt = 0;
return 0;
}
if (!strcmp(req_name, "stop_all")) {
*op = LD_OP_STOP_ALL;
*rt = 0;
return 0;
}
if (!strcmp(req_name, "lock_gl")) {
*op = LD_OP_LOCK;
*rt = LD_RT_GL;
return 0;
}
if (!strcmp(req_name, "lock_vg")) {
*op = LD_OP_LOCK;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "lock_lv")) {
*op = LD_OP_LOCK;
*rt = LD_RT_LV;
return 0;
}
if (!strcmp(req_name, "vg_update")) {
*op = LD_OP_UPDATE;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "enable_gl")) {
*op = LD_OP_ENABLE;
*rt = LD_RT_GL;
return 0;
}
if (!strcmp(req_name, "disable_gl")) {
*op = LD_OP_DISABLE;
*rt = LD_RT_GL;
return 0;
}
if (!strcmp(req_name, "rename_vg_before")) {
*op = LD_OP_RENAME_BEFORE;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "rename_vg_final")) {
*op = LD_OP_RENAME_FINAL;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "running_lm")) {
*op = LD_OP_RUNNING_LM;
*rt = 0;
return 0;
}
if (!strcmp(req_name, "find_free_lock")) {
*op = LD_OP_FIND_FREE_LOCK;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "forget_vg_name")) {
*op = LD_OP_FORGET_VG_NAME;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "kill_vg")) {
*op = LD_OP_KILL_VG;
*rt = LD_RT_VG;
return 0;
}
if (!strcmp(req_name, "drop_vg")) {
*op = LD_OP_DROP_VG;
*rt = LD_RT_VG;
return 0;
}
out:
return -1;
}
static int str_to_mode(const char *str)
{
if (!str)
goto out;
if (!strcmp(str, "un"))
return LD_LK_UN;
if (!strcmp(str, "nl"))
return LD_LK_NL;
if (!strcmp(str, "sh"))
return LD_LK_SH;
if (!strcmp(str, "ex"))
return LD_LK_EX;
out:
return LD_LK_IV;
}
static int str_to_lm(const char *str)
{
if (!str || !strcmp(str, "none"))
return LD_LM_NONE;
if (!strcmp(str, "sanlock"))
return LD_LM_SANLOCK;
if (!strcmp(str, "dlm"))
return LD_LM_DLM;
return -2;
}
static uint32_t str_to_opts(const char *str)
{
uint32_t flags = 0;
if (!str)
goto out;
if (strstr(str, "persistent"))
flags |= LD_AF_PERSISTENT;
if (strstr(str, "unlock_cancel"))
flags |= LD_AF_UNLOCK_CANCEL;
if (strstr(str, "next_version"))
flags |= LD_AF_NEXT_VERSION;
if (strstr(str, "wait"))
flags |= LD_AF_WAIT;
if (strstr(str, "force"))
flags |= LD_AF_FORCE;
if (strstr(str, "ex_disable"))
flags |= LD_AF_EX_DISABLE;
if (strstr(str, "enable"))
flags |= LD_AF_ENABLE;
if (strstr(str, "disable"))
flags |= LD_AF_DISABLE;
out:
return flags;
}
/*
* dump info
* client_list: each client struct
* lockspaces: each lockspace struct
* lockspace actions: each action struct
* lockspace resources: each resource struct
* lockspace resource actions: each action struct
* lockspace resource locks: each lock struct
*/
static int setup_dump_socket(void)
{
int s;
s = socket(AF_LOCAL, SOCK_DGRAM, 0);
if (s < 0)
return s;
memset(&dump_addr, 0, sizeof(dump_addr));
dump_addr.sun_family = AF_LOCAL;
strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
return s;
}
static int send_dump_buf(int fd, int dump_len)
{
int pos = 0;
int ret;
retry:
ret = sendto(fd, dump_buf + pos, dump_len - pos, MSG_DONTWAIT | MSG_NOSIGNAL,
(struct sockaddr *)&dump_addr, dump_addrlen);
if (ret <= 0)
return ret;
pos += ret;
if (pos < dump_len)
goto retry;
return 0;
}
static int print_structs(const char *prefix, int pos, int len)
{
return snprintf(dump_buf + pos, len - pos,
"info=%s "
"unused_action_count=%d "
"unused_client_count=%d "
"unused_resource_count=%d "
"unused_lock_count=%d\n",
prefix,
unused_action_count,
unused_client_count,
unused_resource_count,
unused_lock_count);
}
static int print_client(struct client *cl, const char *prefix, int pos, int len)
{
return snprintf(dump_buf + pos, len - pos,
"info=%s "
"pid=%d "
"fd=%d "
"pi=%d "
"id=%u "
"name=%s\n",
prefix,
cl->pid,
cl->fd,
cl->pi,
cl->id,
cl->name[0] ? cl->name : ".");
}
static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, int len)
{
return snprintf(dump_buf + pos, len - pos,
"info=%s "
"ls_name=%s "
"vg_name=%s "
"vg_uuid=%s "
"vg_sysid=%s "
"vg_args=%s "
"lm_type=%s "
"host_id=%llu "
"create_fail=%d "
"create_done=%d "
"thread_work=%d "
"thread_stop=%d "
"thread_done=%d "
"kill_vg=%d "
"drop_vg=%d "
"sanlock_gl_enabled=%d\n",
prefix,
ls->name,
ls->vg_name,
ls->vg_uuid,
ls->vg_sysid[0] ? ls->vg_sysid : ".",
ls->vg_args,
lm_str(ls->lm_type),
(unsigned long long)ls->host_id,
ls->create_fail ? 1 : 0,
ls->create_done ? 1 : 0,
ls->thread_work ? 1 : 0,
ls->thread_stop ? 1 : 0,
ls->thread_done ? 1 : 0,
ls->kill_vg,
ls->drop_vg,
ls->sanlock_gl_enabled ? 1 : 0);
}
static int print_action(struct action *act, const char *prefix, int pos, int len)
{
return snprintf(dump_buf + pos, len - pos,
"info=%s "
"client_id=%u "
"flags=0x%x "
"version=%u "
"op=%s "
"rt=%s "
"mode=%s "
"lm_type=%s "
"result=%d "
"lm_rv=%d\n",
prefix,
act->client_id,
act->flags,
act->version,
op_str(act->op),
rt_str(act->rt),
mode_str(act->mode),
lm_str(act->lm_type),
act->result,
act->lm_rv);
}
static int print_resource(struct resource *r, const char *prefix, int pos, int len)
{
return snprintf(dump_buf + pos, len - pos,
"info=%s "
"name=%s "
"type=%s "
"mode=%s "
"sh_count=%d "
"version=%u\n",
prefix,
r->name,
rt_str(r->type),
mode_str(r->mode),
r->sh_count,
r->version);
}
static int print_lock(struct lock *lk, const char *prefix, int pos, int len)
{
return snprintf(dump_buf + pos, len - pos,
"info=%s "
"mode=%s "
"version=%u "
"flags=0x%x "
"client_id=%u\n",
prefix,
mode_str(lk->mode),
lk->version,
lk->flags,
lk->client_id);
}
static int dump_info(int *dump_len)
{
struct client *cl;
struct lockspace *ls;
struct resource *r;
struct lock *lk;
struct action *act;
int len, pos, ret;
int rv = 0;
memset(dump_buf, 0, sizeof(dump_buf));
len = sizeof(dump_buf);
pos = 0;
/*
* memory
*/
pthread_mutex_lock(&unused_struct_mutex);
ret = print_structs("structs", pos, len);
if (ret >= len - pos) {
pthread_mutex_unlock(&unused_struct_mutex);
return -ENOSPC;
}
pos += ret;
pthread_mutex_unlock(&unused_struct_mutex);
/*
* clients
*/
pthread_mutex_lock(&client_mutex);
list_for_each_entry(cl, &client_list, list) {
ret = print_client(cl, "client", pos, len);
if (ret >= len - pos) {
rv = -ENOSPC;
break;
}
pos += ret;
}
pthread_mutex_unlock(&client_mutex);
if (rv < 0)
return rv;
/*
* lockspaces with their action/resource/lock info
*/
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
ret = print_lockspace(ls, "ls", pos, len);
if (ret >= len - pos) {
rv = -ENOSPC;
goto out;
}
pos += ret;
list_for_each_entry(act, &ls->actions, list) {
ret = print_action(act, "ls_action", pos, len);
if (ret >= len - pos) {
rv = -ENOSPC;
goto out;
}
pos += ret;
}
list_for_each_entry(r, &ls->resources, list) {
ret = print_resource(r, "r", pos, len);
if (ret >= len - pos) {
rv = -ENOSPC;
goto out;
}
pos += ret;
list_for_each_entry(lk, &r->locks, list) {
ret = print_lock(lk, "lk", pos, len);
if (ret >= len - pos) {
rv = -ENOSPC;
goto out;
}
pos += ret;
}
list_for_each_entry(act, &r->actions, list) {
ret = print_action(act, "r_action", pos, len);
if (ret >= len - pos) {
rv = -ENOSPC;
goto out;
}
pos += ret;
}
}
}
out:
pthread_mutex_unlock(&lockspaces_mutex);
*dump_len = pos;
return rv;
}
/* called from client_thread, cl->mutex is held */
static void client_recv_action(struct client *cl)
{
request req;
response res;
struct action *act;
const char *cl_name;
const char *vg_name;
const char *vg_uuid;
const char *vg_sysid;
const char *str;
int64_t val;
uint32_t opts = 0;
int result = 0;
int cl_pid;
int op, rt, lm, mode;
int rv;
buffer_init(&req.buffer);
rv = buffer_read(cl->fd, &req.buffer);
if (!rv) {
if (errno == ECONNRESET) {
log_debug("client recv %d ECONNRESET", cl->id);
cl->dead = 1;
} else {
log_error("client recv %d buffer_read error %d", cl->id, errno);
}
buffer_destroy(&req.buffer);
client_resume(cl);
return;
}
req.cft = dm_config_from_string(req.buffer.mem);
if (!req.cft) {
log_error("client recv %d config_from_string error", cl->id);
buffer_destroy(&req.buffer);
client_resume(cl);
return;
}
str = daemon_request_str(req, "request", NULL);
rv = str_to_op_rt(str, &op, &rt);
if (rv < 0) {
log_error("client recv %d bad request name \"%s\"", cl->id, str ? str : "");
dm_config_destroy(req.cft);
buffer_destroy(&req.buffer);
client_resume(cl);
return;
}
if (op == LD_OP_HELLO || op == LD_OP_QUIT ||
op == LD_OP_DUMP_INFO || op == LD_OP_DUMP_LOG) {
/*
* FIXME: add the client command name to the hello messages
* so it can be saved in cl->name here.
*/
result = 0;
if (op == LD_OP_QUIT) {
log_debug("op quit");
pthread_mutex_lock(&lockspaces_mutex);
if (list_empty(&lockspaces))
daemon_quit = 1;
else
result = -EBUSY;
pthread_mutex_unlock(&lockspaces_mutex);
}
buffer_init(&res.buffer);
if (op == LD_OP_DUMP_INFO || op == LD_OP_DUMP_LOG) {
int dump_len = 0;
int fd;
fd = setup_dump_socket();
if (fd < 0)
result = fd;
else if (op == LD_OP_DUMP_INFO)
result = dump_info(&dump_len);
else if (op == LD_OP_DUMP_LOG)
result = dump_log(&dump_len);
else
result = -EINVAL;
res = daemon_reply_simple("OK",
"result = %d", result,
"dump_len = %d", dump_len,
NULL);
if (fd >= 0) {
send_dump_buf(fd, dump_len);
close(fd);
}
} else {
res = daemon_reply_simple("OK",
"result = %d", result,
"protocol = %s", lvmlockd_protocol,
"version = %d", lvmlockd_protocol_version,
NULL);
}
buffer_write(cl->fd, &res.buffer);
buffer_destroy(&res.buffer);
dm_config_destroy(req.cft);
buffer_destroy(&req.buffer);
client_resume(cl);
return;
}
cl_name = daemon_request_str(req, "cmd", NULL);
cl_pid = daemon_request_int(req, "pid", 0);
vg_name = daemon_request_str(req, "vg_name", NULL);
vg_uuid = daemon_request_str(req, "vg_uuid", NULL);
vg_sysid = daemon_request_str(req, "vg_sysid", NULL);
str = daemon_request_str(req, "mode", NULL);
mode = str_to_mode(str);
str = daemon_request_str(req, "opts", NULL);
opts = str_to_opts(str);
str = daemon_request_str(req, "vg_lock_type", NULL);
lm = str_to_lm(str);
if (cl_pid && cl_pid != cl->pid)
log_error("client recv bad message pid %d client %d", cl_pid, cl->pid);
/* FIXME: do this in hello message instead */
if (!cl->name[0] && cl_name)
strncpy(cl->name, cl_name, MAX_NAME);
if (!gl_use_dlm && !gl_use_sanlock && (lm > 0)) {
if (lm == LD_LM_DLM && lm_support_dlm())
gl_use_dlm = 1;
else if (lm == LD_LM_SANLOCK && lm_support_sanlock())
gl_use_sanlock = 1;
log_debug("set gl_use_%s", lm_str(lm));
}
if (!(act = alloc_action())) {
log_error("No memory for action");
dm_config_destroy(req.cft);
buffer_destroy(&req.buffer);
client_resume(cl);
return;
}
act->client_id = cl->id;
act->op = op;
act->rt = rt;
act->mode = mode;
act->flags = opts;
act->lm_type = lm;
if (vg_name && strcmp(vg_name, "none"))
strncpy(act->vg_name, vg_name, MAX_NAME);
if (vg_uuid && strcmp(vg_uuid, "none"))
strncpy(act->vg_uuid, vg_uuid, 64);
if (vg_sysid && strcmp(vg_sysid, "none"))
strncpy(act->vg_sysid, vg_sysid, MAX_NAME);
str = daemon_request_str(req, "lv_name", NULL);
if (str && strcmp(str, "none"))
strncpy(act->lv_name, str, MAX_NAME);
str = daemon_request_str(req, "lv_uuid", NULL);
if (str && strcmp(str, "none"))
strncpy(act->lv_uuid, str, MAX_NAME);
val = daemon_request_int(req, "version", 0);
if (val)
act->version = (uint32_t)val;
str = daemon_request_str(req, "vg_lock_args", NULL);
if (str && strcmp(str, "none"))
strncpy(act->vg_args, str, MAX_ARGS);
str = daemon_request_str(req, "lv_lock_args", NULL);
if (str && strcmp(str, "none"))
strncpy(act->lv_args, str, MAX_ARGS);
/* start_vg will include lvmlocal.conf local/host_id here */
val = daemon_request_int(req, "host_id", 0);
if (val)
act->host_id = val;
act->max_retries = daemon_request_int(req, "max_retries", DEFAULT_MAX_RETRIES);
dm_config_destroy(req.cft);
buffer_destroy(&req.buffer);
log_debug("recv %s[%d.%u] %s %s \"%s\" mode %s flags %x",
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
op_str(act->op), rt_str(act->rt), act->vg_name, mode_str(act->mode), opts);
if (lm == LD_LM_DLM && !lm_support_dlm()) {
log_debug("dlm not supported");
rv = -EPROTONOSUPPORT;
goto out;
}
if (lm == LD_LM_SANLOCK && !lm_support_sanlock()) {
log_debug("sanlock not supported");
rv = -EPROTONOSUPPORT;
goto out;
}
switch (act->op) {
case LD_OP_START:
rv = add_lockspace(act);
break;
case LD_OP_STOP:
rv = rem_lockspace(act);
break;
case LD_OP_INIT:
case LD_OP_START_WAIT:
case LD_OP_STOP_ALL:
case LD_OP_RENAME_FINAL:
case LD_OP_RUNNING_LM:
add_work_action(act);
rv = 0;
break;
case LD_OP_LOCK:
case LD_OP_UPDATE:
case LD_OP_ENABLE:
case LD_OP_DISABLE:
case LD_OP_FREE:
case LD_OP_RENAME_BEFORE:
case LD_OP_FIND_FREE_LOCK:
case LD_OP_KILL_VG:
case LD_OP_DROP_VG:
rv = add_lock_action(act);
break;
case LD_OP_FORGET_VG_NAME:
act->result = forget_lockspace_inactive(act->vg_name);
add_client_result(act);
break;
default:
rv = -EINVAL;
};
out:
if (rv < 0) {
act->result = rv;
add_client_result(act);
}
}
static void *client_thread_main(void *arg_in)
{
struct client *cl;
struct action *act;
while (1) {
pthread_mutex_lock(&client_mutex);
while (!client_work && list_empty(&client_results)) {
if (client_stop) {
pthread_mutex_unlock(&client_mutex);
goto out;
}
pthread_cond_wait(&client_cond, &client_mutex);
}
/*
* Send outgoing results back to clients
*/
if (!list_empty(&client_results)) {
act = list_first_entry(&client_results, struct action, list);
list_del(&act->list);
cl = find_client_id(act->client_id);
pthread_mutex_unlock(&client_mutex);
if (cl) {
pthread_mutex_lock(&cl->mutex);
client_send_result(cl, act);
pthread_mutex_unlock(&cl->mutex);
} else {
log_debug("no client for result");
}
free_action(act);
continue;
}
/*
* Queue incoming actions for lockspace threads
*/
if (client_work) {
cl = find_client_work();
if (!cl)
client_work = 0;
pthread_mutex_unlock(&client_mutex);
if (!cl)
continue;
pthread_mutex_lock(&cl->mutex);
if (cl->recv) {
cl->recv = 0;
client_recv_action(cl);
}
if (cl->dead) {
/*
log_debug("client rem %d pi %d fd %d ig %d",
cl->id, cl->pi, cl->fd, cl->poll_ignore);
*/
/*
* If cl->dead was set in main_loop, then the
* fd has already been closed and the pollfd
* entry is already unused.
* main_loop set dead=1, ignore=0, pi=-1, fd=-1
*
* if cl->dead was not set in main_loop, but
* set in client_recv_action, then the main_loop
* should be ignoring this client fd.
* main_loop set ignore=1
*/
if (cl->poll_ignore) {
log_debug("client close %d pi %d fd %d",
cl->id, cl->pi, cl->fd);
/* assert cl->pi != -1 */
/* assert pollfd[pi].fd == FD_IGNORE */
if (close(cl->fd))
log_error("client close %d pi %d fd %d failed",
cl->id, cl->pi, cl->fd);
rem_pollfd(cl->pi);
cl->pi = -1;
cl->fd = -1;
cl->poll_ignore = 0;
} else {
/* main thread should have closed */
if (cl->pi != -1 || cl->fd != -1) {
log_error("client %d bad state pi %d fd %d",
cl->id, cl->pi, cl->fd);
}
}
pthread_mutex_unlock(&cl->mutex);
pthread_mutex_lock(&client_mutex);
list_del(&cl->list);
pthread_mutex_unlock(&client_mutex);
client_purge(cl);
free_client(cl);
} else {
pthread_mutex_unlock(&cl->mutex);
}
}
pthread_mutex_unlock(&client_mutex);
}
out:
return NULL;
}
static int setup_client_thread(void)
{
int rv;
INIT_LIST_HEAD(&client_list);
INIT_LIST_HEAD(&client_results);
pthread_mutex_init(&client_mutex, NULL);
pthread_cond_init(&client_cond, NULL);
rv = pthread_create(&client_thread, NULL, client_thread_main, NULL);
if (rv)
return -1;
return 0;
}
static void close_client_thread(void)
{
pthread_mutex_lock(&client_mutex);
client_stop = 1;
pthread_cond_signal(&client_cond);
pthread_mutex_unlock(&client_mutex);
pthread_join(client_thread, NULL);
}
/*
* Get a list of all VGs with a lockd type (sanlock|dlm) from lvmetad.
* We'll match this list against a list of existing lockspaces that are
* found in the lock manager.
*
* For each of these VGs, also create a struct resource on ls->resources to
* represent each LV in the VG that uses a lock. For each of these LVs
* that are active, we'll attempt to adopt a lock.
*/
static int get_lockd_vgs(struct list_head *vg_lockd)
{
struct list_head update_vgs;
daemon_reply reply;
struct dm_config_node *cn;
struct dm_config_node *metadata;
struct dm_config_node *md_cn;
struct dm_config_node *lv_cn;
struct lockspace *ls, *safe;
struct resource *r;
const char *vg_name;
const char *vg_uuid;
const char *lv_uuid;
const char *lock_type;
const char *lock_args;
char find_str_path[PATH_MAX];
int mutex_unlocked = 0;
int rv = 0;
INIT_LIST_HEAD(&update_vgs);
pthread_mutex_lock(&lvmetad_mutex);
reply = daemon_send_simple(lvmetad_handle, "vg_list",
"token = %s", "skip",
NULL);
if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
log_error("vg_list from lvmetad failed %d", reply.error);
rv = -EINVAL;
goto destroy;
}
if (!(cn = dm_config_find_node(reply.cft->root, "volume_groups"))) {
log_error("get_lockd_vgs no vgs");
rv = -EINVAL;
goto destroy;
}
/* create an update_vgs list of all vg uuids */
for (cn = cn->child; cn; cn = cn->sib) {
vg_uuid = cn->key;
if (!(ls = alloc_lockspace())) {
rv = -ENOMEM;
break;
}
strncpy(ls->vg_uuid, vg_uuid, 64);
list_add_tail(&ls->list, &update_vgs);
log_debug("get_lockd_vgs %s", vg_uuid);
}
destroy:
daemon_reply_destroy(reply);
if (rv < 0)
goto out;
/* get vg_name and lock_type for each vg uuid entry in update_vgs */
list_for_each_entry(ls, &update_vgs, list) {
reply = daemon_send_simple(lvmetad_handle, "vg_lookup",
"token = %s", "skip",
"uuid = %s", ls->vg_uuid,
NULL);
if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
log_error("vg_lookup from lvmetad failed %d", reply.error);
rv = -EINVAL;
goto next;
}
vg_name = daemon_reply_str(reply, "name", NULL);
if (!vg_name) {
log_error("get_lockd_vgs %s no name", ls->vg_uuid);
rv = -EINVAL;
goto next;
}
strncpy(ls->vg_name, vg_name, MAX_NAME);
metadata = dm_config_find_node(reply.cft->root, "metadata");
if (!metadata) {
log_error("get_lockd_vgs %s name %s no metadata",
ls->vg_uuid, ls->vg_name);
rv = -EINVAL;
goto next;
}
lock_type = dm_config_find_str(metadata, "metadata/lock_type", NULL);
ls->lm_type = str_to_lm(lock_type);
if ((ls->lm_type != LD_LM_SANLOCK) && (ls->lm_type != LD_LM_DLM)) {
log_debug("get_lockd_vgs %s not lockd type", ls->vg_name);
continue;
}
lock_args = dm_config_find_str(metadata, "metadata/lock_args", NULL);
if (lock_args)
strncpy(ls->vg_args, lock_args, MAX_ARGS);
log_debug("get_lockd_vgs %s lock_type %s lock_args %s",
ls->vg_name, lock_type, lock_args ?: "none");
/*
* Make a record (struct resource) of each lv that uses a lock.
* For any lv that uses a lock, we'll check if the lv is active
* and if so try to adopt a lock for it.
*/
for (md_cn = metadata->child; md_cn; md_cn = md_cn->sib) {
if (strcmp(md_cn->key, "logical_volumes"))
continue;
for (lv_cn = md_cn->child; lv_cn; lv_cn = lv_cn->sib) {
snprintf(find_str_path, PATH_MAX, "%s/lock_type", lv_cn->key);
lock_type = dm_config_find_str(lv_cn, find_str_path, NULL);
if (!lock_type)
continue;
snprintf(find_str_path, PATH_MAX, "%s/lock_args", lv_cn->key);
lock_args = dm_config_find_str(lv_cn, find_str_path, NULL);
snprintf(find_str_path, PATH_MAX, "%s/id", lv_cn->key);
lv_uuid = dm_config_find_str(lv_cn, find_str_path, NULL);
if (!lv_uuid) {
log_error("get_lock_vgs no lv id for name %s", lv_cn->key);
continue;
}
if (!(r = alloc_resource())) {
rv = -ENOMEM;
goto next;
}
r->type = LD_RT_LV;
strncpy(r->name, lv_uuid, MAX_NAME);
if (lock_args)
strncpy(r->lv_args, lock_args, MAX_ARGS);
list_add_tail(&r->list, &ls->resources);
log_debug("get_lockd_vgs %s lv %s %s (name %s)",
ls->vg_name, r->name, lock_args ? lock_args : "", lv_cn->key);
}
}
next:
daemon_reply_destroy(reply);
if (rv < 0)
break;
}
pthread_mutex_unlock(&lvmetad_mutex);
mutex_unlocked = 1;
out:
/* Return lockd VG's on the vg_lockd list. */
list_for_each_entry_safe(ls, safe, &update_vgs, list) {
list_del(&ls->list);
if ((ls->lm_type == LD_LM_SANLOCK) || (ls->lm_type == LD_LM_DLM))
list_add_tail(&ls->list, vg_lockd);
else
free(ls);
}
if (!mutex_unlocked)
pthread_mutex_unlock(&lvmetad_mutex);
return rv;
}
static char _dm_uuid[64];
static char *get_dm_uuid(char *dm_name)
{
struct dm_info info;
struct dm_task *dmt;
const char *uuid;
if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
goto fail_out;
if (!dm_task_set_name(dmt, dm_name))
goto fail;
if (!dm_task_run(dmt))
goto fail;
if (!dm_task_get_info(dmt, &info))
goto fail;
if (!info.exists)
goto fail;
uuid = dm_task_get_uuid(dmt);
if (!uuid) {
log_error("Failed to get uuid for device %s", dm_name);
goto fail;
}
if (strncmp(uuid, "LVM", 3)) {
log_debug("dm device %s is not from LVM", dm_name);
goto fail;
}
memset(_dm_uuid, 0, sizeof(_dm_uuid));
strncpy(_dm_uuid, uuid, sizeof(_dm_uuid)-1);
dm_task_destroy(dmt);
return _dm_uuid;
fail:
dm_task_destroy(dmt);
fail_out:
return NULL;
}
/*
* dm reports the LV uuid as:
* LVM-ydpRIdDWBDX25upmj2k0D4deat6oxH8er03T0f4xM8rPIV8XqIhwv3h8Y7xRWjMr
*
* the lock name for the LV is:
* r03T0f-4xM8-rPIV-8XqI-hwv3-h8Y7-xRWjMr
*
* This function formats both as:
* r03T0f4xM8rPIV8XqIhwv3h8Y7xRWjMr
*
* and returns 1 if they match.
*/
static int match_dm_uuid(char *dm_uuid, char *lv_lock_uuid)
{
char buf1[64];
char buf2[64];
int i, j;
memset(buf1, 0, sizeof(buf1));
memset(buf2, 0, sizeof(buf2));
for (i = 0, j = 0; i < strlen(lv_lock_uuid); i++) {
if (lv_lock_uuid[i] == '-')
continue;
buf1[j] = lv_lock_uuid[i];
j++;
}
for (i = 36, j = 0; i < 69; i++) {
buf2[j] = dm_uuid[i];
j++;
}
if (!strcmp(buf1, buf2))
return 1;
return 0;
}
/*
* All LVs with a lock_type are on ls->resources.
* Remove any that are not active. The remaining
* will have locks adopted.
*/
static int remove_inactive_lvs(struct list_head *vg_lockd)
{
struct lockspace *ls;
struct resource *r, *rsafe;
struct dm_names *names;
struct dm_task *dmt;
char *dm_uuid;
char *vgname, *lvname, *layer;
char namebuf[MAX_NAME+1];
unsigned next = 0;
int rv = 0;
if (!(dmt = dm_task_create(DM_DEVICE_LIST)))
return -1;
if (!dm_task_run(dmt)) {
log_error("Failed to get dm devices");
rv = -1;
goto ret;
}
if (!(names = dm_task_get_names(dmt))) {
log_error("Failed to get dm names");
rv = -1;
goto ret;
}
if (!names->dev) {
log_debug("dm names none found");
goto out;
}
/*
* For each dm name, compare it to each lv in each lockd vg.
*/
do {
names = (struct dm_names *)((char *) names + next);
dm_uuid = get_dm_uuid(names->name);
if (!dm_uuid)
goto next_dmname;
vgname = NULL;
lvname = NULL;
layer = NULL;
memset(namebuf, 0, sizeof(namebuf));
strncpy(namebuf, names->name, MAX_NAME);
vgname = namebuf;
if (!dm_split_lvm_name(NULL, namebuf, &vgname, &lvname, &layer)) {
log_error("failed to split dm name %s", namebuf);
goto next_dmname;
}
log_debug("adopt remove_inactive dm name %s dm uuid %s vgname %s lvname %s",
names->name, dm_uuid, vgname, lvname);
if (!vgname || !lvname) {
log_debug("dm name %s invalid split vg %s lv %s layer %s",
names->name, vgname ? vgname : "", lvname ? lvname : "", layer ? layer : "");
goto next_dmname;
}
list_for_each_entry(ls, vg_lockd, list) {
if (strcmp(vgname, ls->vg_name))
continue;
if (!strcmp(lvname, "lvmlock"))
continue;
list_for_each_entry(r, &ls->resources, list) {
if (!match_dm_uuid(dm_uuid, r->name))
continue;
/* Found an active LV in a lockd VG. */
log_debug("dm device %s adopt in vg %s lv %s",
names->name, ls->vg_name, r->name);
r->adopt = 1;
goto next_dmname;
}
}
next_dmname:
next = names->next;
} while (next);
out:
/* Remove any struct resources that do not need locks adopted. */
list_for_each_entry(ls, vg_lockd, list) {
list_for_each_entry_safe(r, rsafe, &ls->resources, list) {
if (r->adopt) {
r->adopt = 0;
} else {
log_debug("lockd vg %s remove inactive lv %s", ls->vg_name, r->name);
list_del(&r->list);
free_resource(r);
}
}
}
ret:
dm_task_destroy(dmt);
return rv;
}
static void adopt_locks(void)
{
struct list_head ls_found;
struct list_head vg_lockd;
struct list_head to_unlock;
struct lockspace *ls, *lsafe;
struct lockspace *ls1, *l1safe;
struct lockspace *ls2, *l2safe;
struct resource *r, *rsafe;
struct action *act, *asafe;
int count_start = 0, count_start_done = 0, count_start_fail = 0;
int count_adopt = 0, count_adopt_done = 0, count_adopt_fail = 0;
int found, rv;
INIT_LIST_HEAD(&adopt_results);
INIT_LIST_HEAD(&ls_found);
INIT_LIST_HEAD(&vg_lockd);
INIT_LIST_HEAD(&to_unlock);
/*
* Get list of lockspaces from lock managers.
* Get list of VGs from lvmetad with a lockd type.
* Get list of active lockd type LVs from /dev.
*
* ECONNREFUSED means the lock manager is not running.
* This is expected for at least one of them.
*/
rv = lm_get_lockspaces_dlm(&ls_found);
if ((rv < 0) && (rv != -ECONNREFUSED))
goto fail;
rv = lm_get_lockspaces_sanlock(&ls_found);
if ((rv < 0) && (rv != -ECONNREFUSED))
goto fail;
if (list_empty(&ls_found)) {
log_debug("No lockspaces found to adopt");
return;
}
/*
* Adds a struct lockspace to vg_lockd for each lockd VG.
* Adds a struct resource to ls->resources for each LV.
*/
rv = get_lockd_vgs(&vg_lockd);
if (rv < 0) {
log_error("adopt_locks get_lockd_vgs failed");
goto fail;
}
/*
* For each resource on each lockspace, check if the
* corresponding LV is active. If so, leave the
* resource struct, if not free the resource struct.
* The remain entries need to have locks adopted.
*/
rv = remove_inactive_lvs(&vg_lockd);
if (rv < 0) {
log_error("adopt_locks remove_inactive_lvs failed");
goto fail;
}
list_for_each_entry(ls, &ls_found, list) {
if (ls->lm_type == LD_LM_DLM)
gl_use_dlm = 1;
log_debug("adopt %s lockspace %s vg %s",
lm_str(ls->lm_type), ls->name, ls->vg_name);
}
if (!gl_use_dlm)
gl_use_sanlock = 1;
list_for_each_entry(ls, &vg_lockd, list) {
log_debug("adopt lvmetad vg %s lock_type %s lock_args %s",
ls->vg_name, lm_str(ls->lm_type), ls->vg_args);
list_for_each_entry(r, &ls->resources, list)
log_debug("adopt lv %s %s", ls->vg_name, r->name);
}
/*
* Compare and merge the list of lockspaces in ls_found
* and the list of lockd VGs in vg_lockd.
*
* An ls from ls_found may not have had any active lvs when
* previous lvmlockd died, but the ls should still be joined,
* and checked for GL/VG locks.
*
* An ls from vg_lockd with active lvs should be in ls_found.
* If it's not then we might want to join the ls and acquire locks
* for the active lvs (as opposed to adopting orphans for them.)
* The orphan lock in the ls should have prevented the ls in
* the lock manager from going away.
*
* If an ls in vg_lockd has no active lvs and does not have
* a matching entry in ls_found, then skip it.
*
* An ls in ls_found should always have a matching ls in
* vg_lockd. If it doesn't, then maybe the vg has been
* removed even though the lockspace for the vg is still
* in the lock manager. Just leave the ls in the lm
* alone, and skip the ls_found entry.
*/
list_for_each_entry_safe(ls1, l1safe, &ls_found, list) {
/* The dlm global lockspace is special and doesn't match a VG. */
if (!strcmp(ls1->name, gl_lsname_dlm)) {
list_del(&ls1->list);
free(ls1);
continue;
}
found = 0;
list_for_each_entry_safe(ls2, l2safe, &vg_lockd, list) {
if (strcmp(ls1->vg_name, ls2->vg_name))
continue;
/*
* LS in both ls_found and vg_lockd.
*/
log_debug("ls %s matches vg %s", ls1->name, ls2->vg_name);
memcpy(ls1->vg_uuid, ls2->vg_uuid, 64);
memcpy(ls1->vg_args, ls2->vg_args, MAX_ARGS);
list_for_each_entry_safe(r, rsafe, &ls2->resources, list) {
list_del(&r->list);
list_add(&r->list, &ls1->resources);
}
list_del(&ls2->list);
free(ls2);
found = 1;
break;
}
/*
* LS in ls_found, not in vg_lockd.
* An lvm lockspace found in the lock manager has no
* corresponding VG in lvmetad. This shouldn't usually
* happen, but it's possible the VG could have been removed
* while the orphaned lockspace from it was still around.
* Report an error and leave the ls in the lm alone.
*/
if (!found) {
log_error("No VG %s found for lockspace %s %s",
ls1->vg_name, ls1->name, lm_str(ls1->lm_type));
list_del(&ls1->list);
free(ls1);
}
}
/*
* LS in vg_lockd, not in ls_found.
* lockd vgs from lvmetad that do not have an existing lockspace.
* This wouldn't be unusual; we just skip the vg.
* But, if the vg has active lvs, then it should have had locks
* and a lockspace. Should we attempt to join the lockspace and
* acquire (not adopt) locks for these LVs?
*/
list_for_each_entry_safe(ls, lsafe, &vg_lockd, list) {
if (!list_empty(&ls->resources)) {
/* We should have found a lockspace. */
/* add this ls and acquire locks for ls->resources? */
log_error("No lockspace %s %s found for VG %s with active LVs",
ls->name, lm_str(ls->lm_type), ls->vg_name);
} else {
/* The VG wasn't started in the previous lvmlockd. */
log_debug("No ls found for vg %s", ls->vg_name);
}
list_del(&ls->list);
free(ls);
}
/*
* Create and queue start actions to add lockspaces.
*/
if (gl_use_dlm) {
if (!(act = alloc_action()))
goto fail;
log_debug("adopt add dlm global lockspace");
act->op = LD_OP_START;
act->flags = (LD_AF_ADOPT | LD_AF_WAIT);
act->rt = LD_RT_GL;
act->lm_type = LD_LM_DLM;
act->client_id = ADOPT_CLIENT_ID;
add_dlm_global_lockspace(act);
count_start++;
}
list_for_each_entry_safe(ls, lsafe, &ls_found, list) {
if (!(act = alloc_action()))
goto fail;
act->op = LD_OP_START;
act->flags = (LD_AF_ADOPT | LD_AF_WAIT);
act->rt = LD_RT_VG;
act->lm_type = ls->lm_type;
act->client_id = ADOPT_CLIENT_ID;
strncpy(act->vg_name, ls->vg_name, MAX_NAME);
memcpy(act->vg_uuid, ls->vg_uuid, 64);
memcpy(act->vg_args, ls->vg_args, MAX_ARGS);
act->host_id = ls->host_id;
/* set act->version from lvmetad data? */
log_debug("adopt add %s vg lockspace %s", lm_str(act->lm_type), act->vg_name);
rv = add_lockspace_thread(ls->name, act->vg_name, act->vg_uuid,
act->lm_type, act->vg_args, act);
if (rv < 0) {
log_error("Failed to create lockspace thread for VG %s", ls->vg_name);
list_del(&ls->list);
free(ls);
free_action(act);
count_start_fail++;
continue;
}
/*
* When the lockspace_thread is done with the start act,
* it will see the act ADOPT flag and move the act onto
* the adopt_results list for us to collect below.
*/
count_start++;
}
log_debug("adopt starting %d lockspaces", count_start);
/*
* Wait for all start/rejoin actions to complete. Each start action
* queued above will appear on the adopt_results list when finished.
*/
while (count_start_done < count_start) {
sleep(1);
act = NULL;
pthread_mutex_lock(&client_mutex);
if (!list_empty(&adopt_results)) {
act = list_first_entry(&adopt_results, struct action, list);
list_del(&act->list);
}
pthread_mutex_unlock(&client_mutex);
if (!act)
continue;
if (act->result < 0) {
log_error("adopt add lockspace failed vg %s %d", act->vg_name, act->result);
count_start_fail++;
}
free_action(act);
count_start_done++;
}
log_debug("adopt started %d lockspaces done %d fail %d",
count_start, count_start_done, count_start_fail);
/*
* Create lock-adopt actions for active LVs (ls->resources),
* and GL/VG locks (we don't know if these locks were held
* and orphaned by the last lvmlockd, so try to adopt them
* to see.)
*
* A proper struct lockspace now exists on the lockspaces list
* for each ls in ls_found. Lock ops for one of those
* lockspaces can be done as OP_LOCK actions queued using
* add_lock_action();
*
* Start by attempting to adopt the lock in the most likely
* mode it was left in (ex for lvs, sh for vg/gl). If
* the mode is wrong, the lm will return an error and we
* try again with the other mode.
*/
list_for_each_entry(ls, &ls_found, list) {
/*
* Adopt orphan LV locks.
*/
list_for_each_entry(r, &ls->resources, list) {
if (!(act = alloc_action()))
goto fail;
act->op = LD_OP_LOCK;
act->rt = LD_RT_LV;
act->mode = LD_LK_EX;
act->flags = (LD_AF_ADOPT | LD_AF_PERSISTENT);
act->client_id = ADOPT_CLIENT_ID;
act->lm_type = ls->lm_type;
strncpy(act->vg_name, ls->vg_name, MAX_NAME);
strncpy(act->lv_uuid, r->name, MAX_NAME);
strncpy(act->lv_args, r->lv_args, MAX_ARGS);
log_debug("adopt lock for lv %s %s", act->vg_name, act->lv_uuid);
rv = add_lock_action(act);
if (rv < 0) {
log_error("adopt add_lock_action lv %s %s error %d", act->vg_name, act->lv_uuid, rv);
count_adopt_fail++;
free_action(act);
} else {
count_adopt++;
}
}
/*
* Adopt orphan VG lock.
*/
if (!(act = alloc_action()))
goto fail;
act->op = LD_OP_LOCK;
act->rt = LD_RT_VG;
act->mode = LD_LK_SH;
act->flags = LD_AF_ADOPT;
act->client_id = ADOPT_CLIENT_ID;
act->lm_type = ls->lm_type;
strncpy(act->vg_name, ls->vg_name, MAX_NAME);
log_debug("adopt lock for vg %s", act->vg_name);
rv = add_lock_action(act);
if (rv < 0) {
log_error("adopt add_lock_action vg %s error %d", act->vg_name, rv);
count_adopt_fail++;
free_action(act);
} else {
count_adopt++;
}
}
/*
* Adopt orphan GL lock.
*/
if (!(act = alloc_action()))
goto fail;
act->op = LD_OP_LOCK;
act->rt = LD_RT_GL;
act->mode = LD_LK_SH;
act->flags = LD_AF_ADOPT;
act->client_id = ADOPT_CLIENT_ID;
act->lm_type = (gl_use_sanlock ? LD_LM_SANLOCK : LD_LM_DLM);
log_debug("adopt lock for gl");
rv = add_lock_action(act);
if (rv < 0) {
log_error("adopt add_lock_action gl %s error %d", act->vg_name, rv);
count_adopt_fail++;
free_action(act);
} else {
count_adopt++;
}
/*
* Wait for lock-adopt actions to complete. The completed
* actions are passed back here via the adopt_results list.
*/
while (count_adopt_done < count_adopt) {
sleep(1);
act = NULL;
pthread_mutex_lock(&client_mutex);
if (!list_empty(&adopt_results)) {
act = list_first_entry(&adopt_results, struct action, list);
list_del(&act->list);
}
pthread_mutex_unlock(&client_mutex);
if (!act)
continue;
/*
* lock adopt results
*/
if (act->result == -EUCLEAN) {
/*
* Adopt failed because the orphan has a different mode
* than initially requested. Repeat the lock-adopt operation
* with the other mode. N.B. this logic depends on first
* trying sh then ex for GL/VG locks, and ex then sh for
* LV locks.
*/
if ((act->rt != LD_RT_LV) && (act->mode == LD_LK_SH)) {
/* GL/VG locks: attempt to adopt ex after sh failed. */
act->mode = LD_LK_EX;
rv = add_lock_action(act);
} else if ((act->rt == LD_RT_LV) && (act->mode == LD_LK_EX)) {
/* LV locks: attempt to adopt sh after ex failed. */
act->mode = LD_LK_SH;
rv = add_lock_action(act);
} else {
log_error("Failed to adopt %s lock in vg %s error %d",
rt_str(act->rt), act->vg_name, act->result);
count_adopt_fail++;
count_adopt_done++;
free_action(act);
rv = 0;
}
if (rv < 0) {
log_error("adopt add_lock_action again %s", act->vg_name);
count_adopt_fail++;
count_adopt_done++;
free_action(act);
}
} else if (act->result == -ENOENT) {
/*
* No orphan lock exists. This is common for GL/VG locks
* because they may not have been held when lvmlockd exited.
* It's also expected for LV types that do not use a lock.
*/
if (act->rt == LD_RT_LV) {
/* Unexpected, we should have found an orphan. */
log_error("Failed to adopt LV lock for %s %s error %d",
act->vg_name, act->lv_uuid, act->result);
count_adopt_fail++;
} else {
/* Normal, no GL/VG lock was orphaned. */
log_debug("Did not adopt %s lock in vg %s error %d",
rt_str(act->rt), act->vg_name, act->result);
}
count_adopt_done++;
free_action(act);
} else if (act->result < 0) {
/*
* Some unexpected error.
*/
log_error("adopt lock rt %s vg %s lv %s error %d",
rt_str(act->rt), act->vg_name, act->lv_uuid, act->result);
count_adopt_fail++;
count_adopt_done++;
free_action(act);
} else {
/*
* Adopt success.
*/
if (act->rt == LD_RT_LV) {
log_debug("adopt success lv %s %s %s", act->vg_name, act->lv_uuid, mode_str(act->mode));
free_action(act);
} else if (act->rt == LD_RT_VG) {
log_debug("adopt success vg %s %s", act->vg_name, mode_str(act->mode));
list_add_tail(&act->list, &to_unlock);
} else if (act->rt == LD_RT_GL) {
log_debug("adopt success gl %s %s", act->vg_name, mode_str(act->mode));
list_add_tail(&act->list, &to_unlock);
}
count_adopt_done++;
}
}
/*
* Release adopted GL/VG locks.
* The to_unlock actions were the ones used to lock-adopt the GL/VG locks;
* now use them to do the unlocks. These actions will again be placed
* on adopt_results for us to collect because they have the ADOPT flag set.
*/
count_adopt = 0;
count_adopt_done = 0;
list_for_each_entry_safe(act, asafe, &to_unlock, list) {
list_del(&act->list);
if (act->mode == LD_LK_EX) {
/*
* FIXME: we probably want to check somehow that
* there's no lvm command still running that's
* using this ex lock and changing things.
*/
log_warn("adopt releasing ex %s lock %s",
rt_str(act->rt), act->vg_name);
}
act->mode = LD_LK_UN;
log_debug("adopt unlock for %s %s", rt_str(act->rt), act->vg_name);
rv = add_lock_action(act);
if (rv < 0) {
log_error("adopt unlock add_lock_action error %d", rv);
free_action(act);
} else {
count_adopt++;
}
}
/* Wait for the unlocks to complete. */
while (count_adopt_done < count_adopt) {
sleep(1);
act = NULL;
pthread_mutex_lock(&client_mutex);
if (!list_empty(&adopt_results)) {
act = list_first_entry(&adopt_results, struct action, list);
list_del(&act->list);
}
pthread_mutex_unlock(&client_mutex);
if (!act)
continue;
if (act->result < 0)
log_error("adopt unlock error %d", act->result);
count_adopt_done++;
free_action(act);
}
/* FIXME: purge any remaining orphan locks in each rejoined ls? */
if (count_start_fail || count_adopt_fail)
goto fail;
log_debug("adopt_locks done");
return;
fail:
log_error("adopt_locks failed, reset host");
}
static int get_peer_pid(int fd)
{
struct ucred cred;
unsigned int len = sizeof(cred);
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cred, &len) != 0)
return -1;
return cred.pid;
}
static void process_listener(int poll_fd)
{
struct client *cl;
int fd, pi;
/* assert poll_fd == listen_fd */
fd = accept(listen_fd, NULL, NULL);
if (fd < 0)
return;
if (!(cl = alloc_client())) {
if (!close(fd))
log_error("failed to close lockd poll fd");
return;
}
pi = add_pollfd(fd);
if (pi < 0) {
log_error("process_listener add_pollfd error %d", pi);
free_client(cl);
return;
}
cl->pi = pi;
cl->fd = fd;
cl->pid = get_peer_pid(fd);
pthread_mutex_init(&cl->mutex, NULL);
pthread_mutex_lock(&client_mutex);
client_ids++;
if (client_ids == ADOPT_CLIENT_ID)
client_ids++;
if (!client_ids)
client_ids++;
cl->id = client_ids;
list_add_tail(&cl->list, &client_list);
pthread_mutex_unlock(&client_mutex);
log_debug("client add id %d pi %d fd %d", cl->id, cl->pi, cl->fd);
}
/*
* main loop polls on pipe[0] so that a thread can
* restart the poll by writing to pipe[1].
*/
static int setup_restart(void)
{
if (pipe(restart_fds)) {
log_error("setup_restart pipe error %d", errno);
return -1;
}
restart_pi = add_pollfd(restart_fds[0]);
if (restart_pi < 0)
return restart_pi;
return 0;
}
/*
* thread wrote 'w' to restart_fds[1] to restart poll()
* after adding an fd back into pollfd.
*/
static void process_restart(int fd)
{
char wake[1];
int rv;
/* assert fd == restart_fds[0] */
rv = read(restart_fds[0], wake, 1);
if (!rv || rv < 0)
log_debug("process_restart error %d", errno);
}
static void sigterm_handler(int sig __attribute__((unused)))
{
daemon_quit = 1;
}
static int main_loop(daemon_state *ds_arg)
{
struct client *cl;
int i, rv, is_recv, is_dead;
signal(SIGTERM, &sigterm_handler);
rv = setup_structs();
if (rv < 0) {
log_error("Can't allocate memory");
return rv;
}
strcpy(gl_lsname_dlm, S_NAME_GL_DLM);
INIT_LIST_HEAD(&lockspaces);
INIT_LIST_HEAD(&lockspaces_inactive);
pthread_mutex_init(&lockspaces_mutex, NULL);
pthread_mutex_init(&pollfd_mutex, NULL);
pthread_mutex_init(&log_mutex, NULL);
openlog("lvmlockd", LOG_CONS | LOG_PID, LOG_DAEMON);
log_warn("lvmlockd started");
listen_fd = ds_arg->socket_fd;
listen_pi = add_pollfd(listen_fd);
setup_client_thread();
setup_worker_thread();
setup_restart();
pthread_mutex_init(&lvmetad_mutex, NULL);
lvmetad_handle = lvmetad_open(NULL);
if (lvmetad_handle.error || lvmetad_handle.socket_fd < 0)
log_error("lvmetad_open error %d", lvmetad_handle.error);
else
lvmetad_connected = 1;
/*
* Attempt to rejoin lockspaces and adopt locks from a previous
* instance of lvmlockd that left behind lockspaces/locks.
*/
if (adopt_opt)
adopt_locks();
while (1) {
rv = poll(pollfd, pollfd_maxi + 1, -1);
if ((rv == -1 && errno == EINTR) || daemon_quit) {
if (daemon_quit) {
int count;
/* first sigterm would trigger stops, and
second sigterm may finish the joins. */
count = for_each_lockspace(DO_STOP, DO_FREE, NO_FORCE);
if (!count)
break;
log_debug("ignore shutdown for %d lockspaces", count);
daemon_quit = 0;
}
continue;
}
if (rv < 0) {
log_error("poll errno %d", errno);
break;
}
for (i = 0; i <= pollfd_maxi; i++) {
if (pollfd[i].fd < 0)
continue;
is_recv = 0;
is_dead = 0;
if (pollfd[i].revents & POLLIN)
is_recv = 1;
if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL))
is_dead = 1;
if (!is_recv && !is_dead)
continue;
if (i == listen_pi) {
process_listener(pollfd[i].fd);
continue;
}
if (i == restart_pi) {
process_restart(pollfd[i].fd);
continue;
}
/*
log_debug("poll pi %d fd %d revents %x",
i, pollfd[i].fd, pollfd[i].revents);
*/
pthread_mutex_lock(&client_mutex);
cl = find_client_pi(i);
if (cl) {
pthread_mutex_lock(&cl->mutex);
if (cl->recv) {
/* should not happen */
log_error("main client %d already recv", cl->id);
} else if (cl->dead) {
/* should not happen */
log_error("main client %d already dead", cl->id);
} else if (is_dead) {
log_debug("close %s[%d.%u] fd %d",
cl->name[0] ? cl->name : "client",
cl->pid, cl->id, cl->fd);
cl->dead = 1;
cl->pi = -1;
cl->fd = -1;
cl->poll_ignore = 0;
if (close(pollfd[i].fd))
log_error("close fd %d failed", pollfd[i].fd);
pollfd[i].fd = POLL_FD_UNUSED;
pollfd[i].events = 0;
pollfd[i].revents = 0;
} else if (is_recv) {
cl->recv = 1;
cl->poll_ignore = 1;
pollfd[i].fd = POLL_FD_IGNORE;
pollfd[i].events = 0;
pollfd[i].revents = 0;
}
pthread_mutex_unlock(&cl->mutex);
client_work = 1;
pthread_cond_signal(&client_cond);
/* client_thread will pick up and work on any
client with cl->recv or cl->dead set */
} else {
/* don't think this can happen */
log_error("no client for index %d fd %d",
i, pollfd[i].fd);
if (close(pollfd[i].fd))
log_error("close fd %d failed", pollfd[i].fd);
pollfd[i].fd = POLL_FD_UNUSED;
pollfd[i].events = 0;
pollfd[i].revents = 0;
}
pthread_mutex_unlock(&client_mutex);
/* After set_dead, should we scan pollfd for
last unused slot and reduce pollfd_maxi? */
}
}
for_each_lockspace_retry(DO_STOP, DO_FREE, DO_FORCE);
free_lockspaces_inactive();
close_worker_thread();
close_client_thread();
closelog();
daemon_close(lvmetad_handle);
return 0;
}
static void usage(char *prog, FILE *file)
{
fprintf(file, "Usage:\n");
fprintf(file, "%s [options]\n\n", prog);
fprintf(file, " --help | -h\n");
fprintf(file, " Show this help information.\n");
fprintf(file, " --version | -V\n");
fprintf(file, " Show version of lvmlockd.\n");
fprintf(file, " --test | -T\n");
fprintf(file, " Test mode, do not call lock manager.\n");
fprintf(file, " --foreground | -f\n");
fprintf(file, " Don't fork.\n");
fprintf(file, " --daemon-debug | -D\n");
fprintf(file, " Don't fork and print debugging to stdout.\n");
fprintf(file, " --pid-file | -p <path>\n");
fprintf(file, " Set path to the pid file. [%s]\n", LVMLOCKD_PIDFILE);
fprintf(file, " --socket-path | -s <path>\n");
fprintf(file, " Set path to the socket to listen on. [%s]\n", LVMLOCKD_SOCKET);
fprintf(file, " --syslog-priority | -S err|warning|debug\n");
fprintf(file, " Write log messages from this level up to syslog. [%s]\n", _syslog_num_to_name(LOG_SYSLOG_PRIO));
fprintf(file, " --gl-type | -g <str>\n");
fprintf(file, " Set global lock type to be dlm|sanlock.\n");
fprintf(file, " --host-id | -i <num>\n");
fprintf(file, " Set the local sanlock host id.\n");
fprintf(file, " --host-id-file | -F <path>\n");
fprintf(file, " A file containing the local sanlock host_id.\n");
fprintf(file, " --sanlock-timeout | -o <seconds>\n");
fprintf(file, " Set the sanlock lockspace I/O timeout.\n");
fprintf(file, " --adopt | -A 0|1\n");
fprintf(file, " Adopt locks from a previous instance of lvmlockd.\n");
}
int main(int argc, char *argv[])
{
daemon_state ds = {
.daemon_main = main_loop,
.daemon_init = NULL,
.daemon_fini = NULL,
.pidfile = getenv("LVM_LVMLOCKD_PIDFILE"),
.socket_path = getenv("LVM_LVMLOCKD_SOCKET"),
.protocol = lvmlockd_protocol,
.protocol_version = lvmlockd_protocol_version,
.name = "lvmlockd",
};
static struct option long_options[] = {
{"help", no_argument, 0, 'h' },
{"version", no_argument, 0, 'V' },
{"test", no_argument, 0, 'T' },
{"foreground", no_argument, 0, 'f' },
{"daemon-debug", no_argument, 0, 'D' },
{"pid-file", required_argument, 0, 'p' },
{"socket-path", required_argument, 0, 's' },
{"gl-type", required_argument, 0, 'g' },
{"host-id", required_argument, 0, 'i' },
{"host-id-file", required_argument, 0, 'F' },
{"adopt", required_argument, 0, 'A' },
{"syslog-priority", required_argument, 0, 'S' },
{"sanlock-timeout", required_argument, 0, 'o' },
{0, 0, 0, 0 }
};
while (1) {
int c;
int lm;
int option_index = 0;
c = getopt_long(argc, argv, "hVTfDp:s:l:g:S:I:A:o:",
long_options, &option_index);
if (c == -1)
break;
switch (c) {
case '0':
break;
case 'h':
usage(argv[0], stdout);
exit(EXIT_SUCCESS);
case 'V':
printf("lvmlockd version: " LVM_VERSION "\n");
exit(EXIT_SUCCESS);
case 'T':
daemon_test = 1;
break;
case 'f':
ds.foreground = 1;
break;
case 'D':
ds.foreground = 1;
daemon_debug = 1;
break;
case 'p':
ds.pidfile = strdup(optarg);
break;
case 's':
ds.socket_path = strdup(optarg);
break;
case 'g':
lm = str_to_lm(optarg);
if (lm == LD_LM_DLM && lm_support_dlm())
gl_use_dlm = 1;
else if (lm == LD_LM_SANLOCK && lm_support_sanlock())
gl_use_sanlock = 1;
else {
fprintf(stderr, "invalid gl-type option");
exit(EXIT_FAILURE);
}
break;
case 'i':
daemon_host_id = atoi(optarg);
break;
case 'F':
daemon_host_id_file = strdup(optarg);
break;
case 'o':
sanlock_io_timeout = atoi(optarg);
break;
case 'A':
adopt_opt = atoi(optarg);
break;
case 'S':
syslog_priority = _syslog_name_to_num(optarg);
break;
case '?':
default:
usage(argv[0], stdout);
exit(EXIT_FAILURE);
}
}
if (!ds.pidfile)
ds.pidfile = LVMLOCKD_PIDFILE;
if (!ds.socket_path)
ds.socket_path = LVMLOCKD_SOCKET;
/* runs daemon_main/main_loop */
daemon_start(ds);
return 0;
}