mirror of
git://sourceware.org/git/lvm2.git
synced 2024-12-21 13:34:40 +03:00
54a8562804
If lvmlockd is killed and restarted, let it adopt the existing global lockspace.
6494 lines
160 KiB
C
6494 lines
160 KiB
C
/*
|
|
* Copyright (C) 2014-2015 Red Hat, Inc.
|
|
*
|
|
* This file is part of LVM2.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*/
|
|
|
|
#include "tools/tool.h"
|
|
|
|
#include "libdaemon/client/daemon-io.h"
|
|
#include "daemon-server.h"
|
|
#include "lvm-version.h"
|
|
#include "daemons/lvmlockd/lvmlockd-client.h"
|
|
#include "device_mapper/misc/dm-ioctl.h"
|
|
|
|
/* #include <assert.h> */
|
|
#include <errno.h>
|
|
#include <pthread.h>
|
|
#include <stddef.h>
|
|
#include <poll.h>
|
|
#include <signal.h>
|
|
#include <getopt.h>
|
|
#include <syslog.h>
|
|
#include <dirent.h>
|
|
#include <time.h>
|
|
#include <sys/types.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/utsname.h>
|
|
#include <sys/un.h>
|
|
|
|
#ifdef USE_SD_NOTIFY
|
|
#include <systemd/sd-daemon.h>
|
|
#endif
|
|
|
|
#define EXTERN
|
|
#include "lvmlockd-internal.h"
|
|
|
|
static int str_to_mode(const char *str);
|
|
|
|
/*
|
|
* Basic operation of lvmlockd
|
|
*
|
|
* lvmlockd main process runs main_loop() which uses poll().
|
|
* poll listens for new connections from lvm commands and for
|
|
* messages from existing connected lvm commands.
|
|
*
|
|
* lvm command starts and connects to lvmlockd.
|
|
*
|
|
* lvmlockd receives a connection request from command and adds a
|
|
* 'struct client' to keep track of the connection to the command.
|
|
* The client's fd is added to the set of fd's in poll().
|
|
*
|
|
* lvm command sends a lock request to lvmlockd. The lock request
|
|
* can be for the global lock, a vg lock, or an lv lock.
|
|
*
|
|
* lvmlockd main_loop/poll sees a message from an existing client.
|
|
* It sets client.recv = 1, then wakes up client_thread_main.
|
|
*
|
|
* client_thread_main iterates through client structs (cl), looking
|
|
* for any that need processing, finds the one with cl->recv set,
|
|
* and calls client_recv_action(cl).
|
|
*
|
|
* client_recv_action(cl) reads the message/request from the client,
|
|
* allocates a new 'struct action' (act) to represent the request,
|
|
* sets the act with what is found in the request, then looks at
|
|
* the specific operation in act->op (LD_OP_FOO) to decide what to
|
|
* do with the action:
|
|
*
|
|
* . If the action is to start a lockspace, create a new thread
|
|
* to manage that lockspace: add_lockspace(act).
|
|
*
|
|
* . If the action is a lock request, pass the act to the thread
|
|
* that is managing that lockspace: add_lock_action(act).
|
|
*
|
|
* . Other misc actions are are passed to the worker_thread:
|
|
* add_work_action(act).
|
|
*
|
|
* Onec the client_thread has passed the action off to another
|
|
* thread to process, it goes back to waiting for more client
|
|
* handling work to do.
|
|
*
|
|
* The thread that was given the action by the client_thread
|
|
* now processes that action according to the operation, act->op.
|
|
* This is either a lockspace_thread (for lock ops or ops that
|
|
* add/rem a lockspace), or the worker_thread. See below for
|
|
* how these ops are processed by these threads. When the
|
|
* given thread is done processing the action, the result is
|
|
* set in act->result, and the act struct for the completed action
|
|
* is passed back to the client_thread (client_results list).
|
|
*
|
|
* The client_thread takes completed actions (from client_results
|
|
* list), and sends the result back to the client that sent the
|
|
* request represented by the action. The act struct is then freed.
|
|
*
|
|
* This completes the cycle of work between lvm commands (clients)
|
|
* and lvmlockd. In summary:
|
|
*
|
|
* - main process polls for new client connections and new requests
|
|
* from lvm commands
|
|
* - client_thread reads requests from clients
|
|
* - client_thread creates an action struct for each request
|
|
* - client_thread passes the act to another thread for processing
|
|
* - other threads pass completed act structs back to client_thread
|
|
* - client_thread sends the act result back to the client and frees the act
|
|
*
|
|
*
|
|
* Lockspace threads:
|
|
* Each lockd VG has its own lockspace that contains locks for that VG.
|
|
* Each 'struct lockspace' is managed by a separate lockspace_thread.
|
|
* When the lockspace_thread is first created, the first thing it does
|
|
* is join the lockspace in the lock manager. This can take a long time.
|
|
* If the join fails, the thread exits. After the join, the thread
|
|
* enters a loop waiting for lock actions to perform in the lockspace.
|
|
*
|
|
* The request to remove/leave a lockspace causes a flag to be set in
|
|
* the lockspace struct. When the lockspace_thread sees this flag
|
|
* set, it leaves the lockspace, and exits.
|
|
*
|
|
* When the client_thread passes a new action to a lockspace_thread,
|
|
* i.e. a new lock request, the lockspace_thread identifies which resource
|
|
* is being locked (GL, VG, LV), and gets the 'struct resource' (r) for it.
|
|
* r->type will be LD_RT_GL, LD_RT_VG, or LD_RT_LV. r->name is the
|
|
* resource name, and is fixed for GL and VG resources, but is based on
|
|
* the LV name for LV resources. The act is added to the resource's
|
|
* list of actions: r->actions, i.e. outstanding lock requests on the
|
|
* resource.
|
|
*
|
|
* The lockspace thread then iterates through each resource in the
|
|
* lockspace, processing any outstanding actions on each: res_process(ls, r).
|
|
*
|
|
* res_process() compares the outstanding actions/requests in r->actions
|
|
* against any existing locks on the resource in r->locks. If the
|
|
* action is blocked by existing locks, it's left on r->actions. If not,
|
|
* the action/request is passed to the lock manager. If the result from
|
|
* the lock manager is success, a new 'struct lock' is created for the
|
|
* action and saved on r->locks. The result is set in act->result and
|
|
* the act is passed back to the client_thread to be returned to the client.
|
|
*/
|
|
|
|
static const char lvmlockd_protocol[] = "lvmlockd";
|
|
static const int lvmlockd_protocol_version = 1;
|
|
static int daemon_quit;
|
|
static int adopt_opt;
|
|
static uint32_t adopt_update_count;
|
|
static const char *adopt_file;
|
|
|
|
/*
|
|
* We use a separate socket for dumping daemon info.
|
|
* This will not interfere with normal operations, and allows
|
|
* free-form debug data to be dumped instead of the libdaemon
|
|
* protocol that wants all data in the cft format.
|
|
* 1MB should fit all the info we need to dump.
|
|
*/
|
|
#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
|
|
#define DUMP_BUF_SIZE (1024 * 1024)
|
|
static char dump_buf[DUMP_BUF_SIZE];
|
|
static struct sockaddr_un dump_addr;
|
|
static socklen_t dump_addrlen;
|
|
|
|
/*
|
|
* Main program polls client connections, adds new clients,
|
|
* adds work for client thread.
|
|
*
|
|
* pollfd_mutex is used for adding vs removing entries,
|
|
* and for resume vs realloc.
|
|
*/
|
|
#define POLL_FD_UNUSED -1 /* slot if free */
|
|
#define POLL_FD_IGNORE -2 /* slot is used but ignore in poll */
|
|
#define ADD_POLL_SIZE 16 /* increment slots by this amount */
|
|
|
|
static pthread_mutex_t pollfd_mutex;
|
|
static struct pollfd *pollfd;
|
|
static int pollfd_size;
|
|
static int pollfd_maxi;
|
|
static int listen_pi;
|
|
static int listen_fd;
|
|
static int restart_pi;
|
|
static int restart_fds[2];
|
|
|
|
/*
|
|
* Each lockspace has its own thread to do locking.
|
|
* The lockspace thread makes synchronous lock requests to dlm/sanlock.
|
|
* Every vg with a lockd type, i.e. "dlm", "sanlock", should be on this list.
|
|
*/
|
|
static pthread_mutex_t lockspaces_mutex;
|
|
static struct list_head lockspaces;
|
|
|
|
/*
|
|
* Client thread reads client requests and writes client results.
|
|
*/
|
|
static pthread_t client_thread;
|
|
static pthread_mutex_t client_mutex;
|
|
static pthread_cond_t client_cond;
|
|
static struct list_head client_list; /* connected clients */
|
|
static struct list_head client_results; /* actions to send back to clients */
|
|
static uint32_t client_ids; /* 0 and INTERNAL_CLIENT_ID are skipped */
|
|
static int client_stop; /* stop the thread */
|
|
static int client_work; /* a client on client_list has work to do */
|
|
|
|
#define INTERNAL_CLIENT_ID 0xFFFFFFFF /* special client_id for internal actions */
|
|
static struct list_head adopt_results; /* special start actions from adopt_locks() */
|
|
|
|
/*
|
|
* Worker thread performs misc non-locking actions, e.g. init/free.
|
|
*/
|
|
static pthread_t worker_thread;
|
|
static pthread_mutex_t worker_mutex;
|
|
static pthread_cond_t worker_cond;
|
|
static struct list_head worker_list; /* actions for worker_thread */
|
|
static int worker_stop; /* stop the thread */
|
|
static int worker_wake; /* wake the thread without adding work */
|
|
|
|
/*
|
|
* The content of every log_foo() statement is saved in the
|
|
* circular buffer, which can be dumped to a client and printed.
|
|
*/
|
|
#define LOG_LINE_SIZE 256
|
|
#define LOG_DUMP_SIZE DUMP_BUF_SIZE
|
|
#define LOG_SYSLOG_PRIO LOG_WARNING
|
|
static char log_dump[LOG_DUMP_SIZE];
|
|
static unsigned int log_point;
|
|
static unsigned int log_wrap;
|
|
static pthread_mutex_t log_mutex;
|
|
static int syslog_priority = LOG_SYSLOG_PRIO;
|
|
|
|
/*
|
|
* Structure pools to avoid repeated malloc/free.
|
|
*/
|
|
#define MAX_UNUSED_ACTION 64
|
|
#define MAX_UNUSED_CLIENT 64
|
|
#define MAX_UNUSED_RESOURCE 64
|
|
#define MAX_UNUSED_LOCK 64
|
|
static pthread_mutex_t unused_struct_mutex;
|
|
static struct list_head unused_action;
|
|
static struct list_head unused_client;
|
|
static struct list_head unused_resource;
|
|
static struct list_head unused_lock;
|
|
static int unused_action_count;
|
|
static int unused_client_count;
|
|
static int unused_resource_count;
|
|
static int unused_lock_count;
|
|
static int resource_lm_data_size; /* max size of lm_data from sanlock|dlm */
|
|
static int alloc_new_structs; /* used for initializing in setup_structs */
|
|
|
|
#define DO_STOP 1
|
|
#define NO_STOP 0
|
|
#define DO_FREE 1
|
|
#define NO_FREE 0
|
|
#define DO_FORCE 1
|
|
#define NO_FORCE 0
|
|
|
|
static int add_lock_action(struct action *act);
|
|
static int str_to_lm(const char *str);
|
|
static int setup_dump_socket(void);
|
|
static void send_dump_buf(int fd, int dump_len);
|
|
static int dump_info(int *dump_len);
|
|
static int dump_log(int *dump_len);
|
|
|
|
static int _syslog_name_to_num(const char *name)
|
|
{
|
|
if (!strcmp(name, "emerg"))
|
|
return LOG_EMERG;
|
|
if (!strcmp(name, "alert"))
|
|
return LOG_ALERT;
|
|
if (!strcmp(name, "crit"))
|
|
return LOG_CRIT;
|
|
if (!strcmp(name, "err") || !strcmp(name, "error"))
|
|
return LOG_ERR;
|
|
if (!strcmp(name, "warning") || !strcmp(name, "warn"))
|
|
return LOG_WARNING;
|
|
if (!strcmp(name, "notice"))
|
|
return LOG_NOTICE;
|
|
if (!strcmp(name, "info"))
|
|
return LOG_INFO;
|
|
if (!strcmp(name, "debug"))
|
|
return LOG_DEBUG;
|
|
return LOG_WARNING;
|
|
}
|
|
|
|
static const char *_syslog_num_to_name(int num)
|
|
{
|
|
switch (num) {
|
|
case LOG_EMERG:
|
|
return "emerg";
|
|
case LOG_ALERT:
|
|
return "alert";
|
|
case LOG_CRIT:
|
|
return "crit";
|
|
case LOG_ERR:
|
|
return "err";
|
|
case LOG_WARNING:
|
|
return "warning";
|
|
case LOG_NOTICE:
|
|
return "notice";
|
|
case LOG_INFO:
|
|
return "info";
|
|
case LOG_DEBUG:
|
|
return "debug";
|
|
}
|
|
return "unknown";
|
|
}
|
|
|
|
static uint64_t monotime(void)
|
|
{
|
|
struct timespec ts;
|
|
|
|
if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
|
|
log_error("clock_gettime failed to get timestamp %s.",
|
|
strerror(errno));
|
|
ts.tv_sec = 0;
|
|
}
|
|
|
|
return ts.tv_sec;
|
|
}
|
|
|
|
static void log_save_line(int len, char *line,
|
|
char *log_buf, unsigned int *point, unsigned int *wrap)
|
|
{
|
|
unsigned int p = *point;
|
|
unsigned int w = *wrap;
|
|
int i;
|
|
|
|
if (len < (int) (LOG_DUMP_SIZE - p)) {
|
|
memcpy(log_buf + p, line, len);
|
|
p += len;
|
|
|
|
if (p == LOG_DUMP_SIZE) {
|
|
p = 0;
|
|
w = 1;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
for (i = 0; i < len; i++) {
|
|
log_buf[p++] = line[i];
|
|
|
|
if (p == LOG_DUMP_SIZE) {
|
|
p = 0;
|
|
w = 1;
|
|
}
|
|
}
|
|
out:
|
|
*point = p;
|
|
*wrap = w;
|
|
}
|
|
|
|
void log_level(int level, const char *fmt, ...)
|
|
{
|
|
char line[LOG_LINE_SIZE];
|
|
va_list ap;
|
|
int len = LOG_LINE_SIZE - 1;
|
|
int ret, pos = 0;
|
|
|
|
memset(line, 0, sizeof(line));
|
|
|
|
ret = snprintf(line, len, "%llu ", (unsigned long long)time(NULL));
|
|
pos += ret;
|
|
|
|
va_start(ap, fmt);
|
|
ret = vsnprintf(line + pos, len - pos, fmt, ap);
|
|
va_end(ap);
|
|
|
|
if (ret >= len - pos)
|
|
pos = len - 1;
|
|
else
|
|
pos += ret;
|
|
|
|
line[pos++] = '\n';
|
|
line[pos++] = '\0';
|
|
|
|
pthread_mutex_lock(&log_mutex);
|
|
log_save_line(pos - 1, line, log_dump, &log_point, &log_wrap);
|
|
pthread_mutex_unlock(&log_mutex);
|
|
|
|
if (level <= syslog_priority)
|
|
syslog(level, "%s", line);
|
|
|
|
if (daemon_debug)
|
|
fprintf(stderr, "%s", line);
|
|
}
|
|
|
|
static int dump_log(int *dump_len)
|
|
{
|
|
int tail_len;
|
|
|
|
pthread_mutex_lock(&log_mutex);
|
|
|
|
if (!log_wrap && !log_point) {
|
|
*dump_len = 0;
|
|
} else if (log_wrap) {
|
|
tail_len = LOG_DUMP_SIZE - log_point;
|
|
memcpy(dump_buf, log_dump+log_point, tail_len);
|
|
if (log_point)
|
|
memcpy(dump_buf+tail_len, log_dump, log_point);
|
|
*dump_len = LOG_DUMP_SIZE;
|
|
} else {
|
|
memcpy(dump_buf, log_dump, log_point-1);
|
|
*dump_len = log_point-1;
|
|
}
|
|
pthread_mutex_unlock(&log_mutex);
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct lockspace *alloc_lockspace(void)
|
|
{
|
|
struct lockspace *ls;
|
|
|
|
if (!(ls = zalloc(sizeof(struct lockspace)))) {
|
|
log_error("out of memory for lockspace");
|
|
return NULL;
|
|
}
|
|
|
|
INIT_LIST_HEAD(&ls->actions);
|
|
INIT_LIST_HEAD(&ls->resources);
|
|
pthread_mutex_init(&ls->mutex, NULL);
|
|
pthread_cond_init(&ls->cond, NULL);
|
|
return ls;
|
|
}
|
|
|
|
static char **alloc_pvs_path(struct pvs *pvs, int num)
|
|
{
|
|
if (!num)
|
|
return NULL;
|
|
|
|
pvs->path = malloc(sizeof(char *) * num);
|
|
if (!pvs->path)
|
|
return NULL;
|
|
|
|
memset(pvs->path, 0x0, sizeof(char *) * num);
|
|
return pvs->path;
|
|
}
|
|
|
|
static void free_pvs_path(struct pvs *pvs)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < pvs->num; i++)
|
|
free((char *)pvs->path[i]);
|
|
|
|
free(pvs->path);
|
|
pvs->path = NULL;
|
|
}
|
|
|
|
static char **alloc_and_copy_pvs_path(struct pvs *dst, struct pvs *src)
|
|
{
|
|
int i;
|
|
|
|
if (!alloc_pvs_path(dst, src->num))
|
|
return NULL;
|
|
|
|
dst->num = 0;
|
|
for (i = 0; i < src->num; i++) {
|
|
if (!src->path[i] || !strcmp(src->path[i], "none"))
|
|
continue;
|
|
|
|
dst->path[dst->num] = strdup(src->path[i]);
|
|
if (!dst->path[dst->num]) {
|
|
log_error("out of memory for copying pvs path");
|
|
goto failed;
|
|
}
|
|
dst->num++;
|
|
}
|
|
return dst->path;
|
|
|
|
failed:
|
|
free_pvs_path(dst);
|
|
return NULL;
|
|
}
|
|
|
|
static struct action *alloc_action(void)
|
|
{
|
|
struct action *act;
|
|
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (!unused_action_count || alloc_new_structs) {
|
|
act = malloc(sizeof(struct action));
|
|
} else {
|
|
act = list_first_entry(&unused_action, struct action, list);
|
|
list_del(&act->list);
|
|
unused_action_count--;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
if (act)
|
|
memset(act, 0, sizeof(struct action));
|
|
else
|
|
log_error("out of memory for action");
|
|
return act;
|
|
}
|
|
|
|
static struct client *alloc_client(void)
|
|
{
|
|
struct client *cl;
|
|
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (!unused_client_count || alloc_new_structs) {
|
|
cl = malloc(sizeof(struct client));
|
|
} else {
|
|
cl = list_first_entry(&unused_client, struct client, list);
|
|
list_del(&cl->list);
|
|
unused_client_count--;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
if (cl)
|
|
memset(cl, 0, sizeof(struct client));
|
|
else
|
|
log_error("out of memory for client");
|
|
return cl;
|
|
}
|
|
|
|
static struct resource *alloc_resource(void)
|
|
{
|
|
struct resource *r;
|
|
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (!unused_resource_count || alloc_new_structs) {
|
|
r = malloc(sizeof(struct resource) + resource_lm_data_size);
|
|
} else {
|
|
r = list_first_entry(&unused_resource, struct resource, list);
|
|
list_del(&r->list);
|
|
unused_resource_count--;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
if (r) {
|
|
memset(r, 0, sizeof(struct resource) + resource_lm_data_size);
|
|
INIT_LIST_HEAD(&r->locks);
|
|
INIT_LIST_HEAD(&r->actions);
|
|
} else {
|
|
log_error("out of memory for resource");
|
|
}
|
|
return r;
|
|
}
|
|
|
|
static struct lock *alloc_lock(void)
|
|
{
|
|
struct lock *lk;
|
|
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (!unused_lock_count || alloc_new_structs) {
|
|
lk = malloc(sizeof(struct lock));
|
|
} else {
|
|
lk = list_first_entry(&unused_lock, struct lock, list);
|
|
list_del(&lk->list);
|
|
unused_lock_count--;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
if (lk)
|
|
memset(lk, 0, sizeof(struct lock));
|
|
else
|
|
log_error("out of memory for lock");
|
|
return lk;
|
|
}
|
|
|
|
static void free_action(struct action *act)
|
|
{
|
|
free(act->path);
|
|
act->path = NULL;
|
|
|
|
free_pvs_path(&act->pvs);
|
|
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (unused_action_count >= MAX_UNUSED_ACTION) {
|
|
free(act);
|
|
} else {
|
|
list_add_tail(&act->list, &unused_action);
|
|
unused_action_count++;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
}
|
|
|
|
static void free_client(struct client *cl)
|
|
{
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (unused_client_count >= MAX_UNUSED_CLIENT) {
|
|
free(cl);
|
|
} else {
|
|
list_add_tail(&cl->list, &unused_client);
|
|
unused_client_count++;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
}
|
|
|
|
static void free_resource(struct resource *r)
|
|
{
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (unused_resource_count >= MAX_UNUSED_RESOURCE) {
|
|
free(r);
|
|
} else {
|
|
list_add_tail(&r->list, &unused_resource);
|
|
unused_resource_count++;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
}
|
|
|
|
static void free_lock(struct lock *lk)
|
|
{
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
if (unused_lock_count >= MAX_UNUSED_LOCK) {
|
|
free(lk);
|
|
} else {
|
|
list_add_tail(&lk->list, &unused_lock);
|
|
unused_lock_count++;
|
|
}
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
}
|
|
|
|
static int setup_structs(void)
|
|
{
|
|
struct action *act;
|
|
struct client *cl;
|
|
struct resource *r;
|
|
struct lock *lk;
|
|
int data_san = lm_data_size_sanlock();
|
|
int data_dlm = lm_data_size_dlm();
|
|
int data_idm = lm_data_size_idm();
|
|
int i;
|
|
|
|
resource_lm_data_size = data_san > data_dlm ? data_san : data_dlm;
|
|
resource_lm_data_size = resource_lm_data_size > data_idm ?
|
|
resource_lm_data_size : data_idm;
|
|
|
|
pthread_mutex_init(&unused_struct_mutex, NULL);
|
|
INIT_LIST_HEAD(&unused_action);
|
|
INIT_LIST_HEAD(&unused_client);
|
|
INIT_LIST_HEAD(&unused_resource);
|
|
INIT_LIST_HEAD(&unused_lock);
|
|
|
|
/*
|
|
* For setup, force the alloc_ functions to alloc new structs instead
|
|
* of taking them unused. This allows alloc_struct/free_struct loop to
|
|
* populate the unused lists.
|
|
*/
|
|
alloc_new_structs = 1;
|
|
|
|
for (i = 0; i < MAX_UNUSED_ACTION/2; i++) {
|
|
if (!(act = alloc_action()))
|
|
goto fail;
|
|
free_action(act);
|
|
}
|
|
|
|
for (i = 0; i < MAX_UNUSED_CLIENT/2; i++) {
|
|
if (!(cl = alloc_client()))
|
|
goto fail;
|
|
free_client(cl);
|
|
}
|
|
|
|
for (i = 0; i < MAX_UNUSED_RESOURCE/2; i++) {
|
|
if (!(r = alloc_resource()))
|
|
goto fail;
|
|
free_resource(r);
|
|
}
|
|
|
|
for (i = 0; i < MAX_UNUSED_LOCK/2; i++) {
|
|
if (!(lk = alloc_lock()))
|
|
goto fail;
|
|
free_lock(lk);
|
|
}
|
|
|
|
alloc_new_structs = 0;
|
|
return 0;
|
|
fail:
|
|
alloc_new_structs = 0;
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static int add_pollfd(int fd)
|
|
{
|
|
int i, new_size;
|
|
struct pollfd *tmp_pollfd;
|
|
|
|
pthread_mutex_lock(&pollfd_mutex);
|
|
for (i = 0; i < pollfd_size; i++) {
|
|
if (pollfd[i].fd != POLL_FD_UNUSED)
|
|
continue;
|
|
|
|
pollfd[i].fd = fd;
|
|
pollfd[i].events = POLLIN;
|
|
pollfd[i].revents = 0;
|
|
|
|
if (i > pollfd_maxi)
|
|
pollfd_maxi = i;
|
|
|
|
pthread_mutex_unlock(&pollfd_mutex);
|
|
return i;
|
|
}
|
|
|
|
new_size = pollfd_size + ADD_POLL_SIZE;
|
|
|
|
tmp_pollfd = realloc(pollfd, new_size * sizeof(struct pollfd));
|
|
if (!tmp_pollfd) {
|
|
log_error("can't alloc new size %d for pollfd", new_size);
|
|
pthread_mutex_unlock(&pollfd_mutex);
|
|
return -ENOMEM;
|
|
}
|
|
pollfd = tmp_pollfd;
|
|
|
|
for (i = pollfd_size; i < new_size; i++) {
|
|
pollfd[i].fd = POLL_FD_UNUSED;
|
|
pollfd[i].events = 0;
|
|
pollfd[i].revents = 0;
|
|
}
|
|
|
|
i = pollfd_size;
|
|
pollfd[i].fd = fd;
|
|
pollfd[i].events = POLLIN;
|
|
pollfd[i].revents = 0;
|
|
pollfd_maxi = i;
|
|
|
|
pollfd_size = new_size;
|
|
|
|
pthread_mutex_unlock(&pollfd_mutex);
|
|
return i;
|
|
}
|
|
|
|
static void rem_pollfd(int pi)
|
|
{
|
|
if (pi < 0) {
|
|
log_error("rem_pollfd %d", pi);
|
|
return;
|
|
}
|
|
pthread_mutex_lock(&pollfd_mutex);
|
|
pollfd[pi].fd = POLL_FD_UNUSED;
|
|
pollfd[pi].events = 0;
|
|
pollfd[pi].revents = 0;
|
|
pthread_mutex_unlock(&pollfd_mutex);
|
|
}
|
|
|
|
static const char *lm_str(int x)
|
|
{
|
|
switch (x) {
|
|
case LD_LM_NONE:
|
|
return "none";
|
|
case LD_LM_DLM:
|
|
return "dlm";
|
|
case LD_LM_SANLOCK:
|
|
return "sanlock";
|
|
case LD_LM_IDM:
|
|
return "idm";
|
|
default:
|
|
return "lm_unknown";
|
|
}
|
|
}
|
|
|
|
static const char *rt_str(int x)
|
|
{
|
|
switch (x) {
|
|
case LD_RT_GL:
|
|
return "gl";
|
|
case LD_RT_VG:
|
|
return "vg";
|
|
case LD_RT_LV:
|
|
return "lv";
|
|
default:
|
|
return ".";
|
|
};
|
|
}
|
|
|
|
static const char *op_str(int x)
|
|
{
|
|
switch (x) {
|
|
case LD_OP_INIT:
|
|
return "init";
|
|
case LD_OP_FREE:
|
|
return "free";
|
|
case LD_OP_START:
|
|
return "start";
|
|
case LD_OP_STOP:
|
|
return "stop";
|
|
case LD_OP_LOCK:
|
|
return "lock";
|
|
case LD_OP_UPDATE:
|
|
return "update";
|
|
case LD_OP_CLOSE:
|
|
return "close";
|
|
case LD_OP_ENABLE:
|
|
return "enable";
|
|
case LD_OP_DISABLE:
|
|
return "disable";
|
|
case LD_OP_START_WAIT:
|
|
return "start_wait";
|
|
case LD_OP_STOP_ALL:
|
|
return "stop_all";
|
|
case LD_OP_RENAME_BEFORE:
|
|
return "rename_before";
|
|
case LD_OP_RENAME_FINAL:
|
|
return "rename_final";
|
|
case LD_OP_RUNNING_LM:
|
|
return "running_lm";
|
|
case LD_OP_QUERY_LOCK:
|
|
return "query_lock";
|
|
case LD_OP_FIND_FREE_LOCK:
|
|
return "find_free_lock";
|
|
case LD_OP_KILL_VG:
|
|
return "kill_vg";
|
|
case LD_OP_DROP_VG:
|
|
return "drop_vg";
|
|
case LD_OP_DUMP_LOG:
|
|
return "dump_log";
|
|
case LD_OP_DUMP_INFO:
|
|
return "dump_info";
|
|
case LD_OP_BUSY:
|
|
return "busy";
|
|
case LD_OP_REFRESH_LV:
|
|
return "refresh_lv";
|
|
default:
|
|
return "op_unknown";
|
|
};
|
|
}
|
|
|
|
static const char *op_mode_str(int op, int mode)
|
|
{
|
|
if (op != LD_OP_LOCK)
|
|
return op_str(op);
|
|
|
|
switch (mode) {
|
|
case LD_LK_IV:
|
|
return "lock_iv";
|
|
case LD_LK_UN:
|
|
return "unlock";
|
|
case LD_LK_NL:
|
|
return "lock_nl";
|
|
case LD_LK_SH:
|
|
return "lock_sh";
|
|
case LD_LK_EX:
|
|
return "lock_ex";
|
|
default:
|
|
return "lock_bad";
|
|
}
|
|
}
|
|
|
|
int last_string_from_args(char *args_in, char *last)
|
|
{
|
|
const char *args = args_in;
|
|
const char *colon, *str = NULL;
|
|
|
|
while (1) {
|
|
if (!args || (*args == '\0'))
|
|
break;
|
|
colon = strstr(args, ":");
|
|
if (!colon)
|
|
break;
|
|
str = colon;
|
|
args = colon + 1;
|
|
}
|
|
|
|
if (str) {
|
|
snprintf(last, MAX_ARGS, "%s", str + 1);
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch)
|
|
{
|
|
char version[MAX_ARGS+1];
|
|
char *major_str, *minor_str, *patch_str;
|
|
char *n, *d1, *d2;
|
|
|
|
memset(version, 0, sizeof(version));
|
|
strncpy(version, args, MAX_ARGS);
|
|
version[MAX_ARGS] = '\0';
|
|
|
|
n = strstr(version, ":");
|
|
if (n)
|
|
*n = '\0';
|
|
|
|
d1 = strstr(version, ".");
|
|
if (!d1)
|
|
return -1;
|
|
|
|
d2 = strstr(d1 + 1, ".");
|
|
if (!d2)
|
|
return -1;
|
|
|
|
major_str = version;
|
|
minor_str = d1 + 1;
|
|
patch_str = d2 + 1;
|
|
|
|
*d1 = '\0';
|
|
*d2 = '\0';
|
|
|
|
if (major)
|
|
*major = atoi(major_str);
|
|
if (minor)
|
|
*minor = atoi(minor_str);
|
|
if (patch)
|
|
*patch = atoi(patch_str);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Write new info when a command exits if that command has acquired a new LV
|
|
* lock. If the command has released an LV lock we don't bother updating the
|
|
* info. When adopting, we eliminate any LV lock adoptions if there is no dm
|
|
* device for that LV. If lvmlockd is terminated after acquiring but before
|
|
* writing this file, those LV locks would not be adopted on restart.
|
|
*/
|
|
|
|
#define ADOPT_VERSION_MAJOR 1
|
|
#define ADOPT_VERSION_MINOR 0
|
|
|
|
static void write_adopt_file(void)
|
|
{
|
|
struct lockspace *ls;
|
|
struct resource *r;
|
|
struct lock *lk;
|
|
time_t t;
|
|
FILE *fp;
|
|
|
|
if (!(fp = fopen(adopt_file, "w")))
|
|
return;
|
|
|
|
adopt_update_count++;
|
|
|
|
t = time(NULL);
|
|
fprintf(fp, "lvmlockd adopt_version %u.%u pid %d updates %u %s",
|
|
ADOPT_VERSION_MAJOR, ADOPT_VERSION_MINOR, getpid(), adopt_update_count, ctime(&t));
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if (ls->lm_type == LD_LM_DLM && !strcmp(ls->name, gl_lsname_dlm))
|
|
continue;
|
|
fprintf(fp, "VG: %38s %s %s %s\n",
|
|
ls->vg_uuid, ls->vg_name, lm_str(ls->lm_type), ls->vg_args);
|
|
list_for_each_entry(r, &ls->resources, list) {
|
|
if (r->type != LD_RT_LV)
|
|
continue;
|
|
if ((r->mode != LD_LK_EX) && (r->mode != LD_LK_SH))
|
|
continue;
|
|
list_for_each_entry(lk, &r->locks, list) {
|
|
fprintf(fp, "LV: %38s %s %s %s %u\n",
|
|
ls->vg_uuid, r->name, r->lv_args, mode_str(r->mode), r->version);
|
|
}
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
fflush(fp);
|
|
(void) fclose(fp);
|
|
}
|
|
|
|
static int read_adopt_file(struct list_head *vg_lockd)
|
|
{
|
|
char adopt_line[512];
|
|
char vg_uuid[72];
|
|
char lm_type_str[16];
|
|
char mode[8];
|
|
struct lockspace *ls = NULL, *ls2;
|
|
struct resource *r;
|
|
FILE *fp;
|
|
|
|
if (MAX_ARGS != 64 || MAX_NAME != 64)
|
|
return -1;
|
|
|
|
if (!(fp = fopen(adopt_file, "r")))
|
|
return 0;
|
|
|
|
while (fgets(adopt_line, sizeof(adopt_line), fp)) {
|
|
if (adopt_line[0] == '#')
|
|
continue;
|
|
else if (!strncmp(adopt_line, "lvmlockd", 8)) {
|
|
unsigned int v_major = 0, v_minor = 0;
|
|
if ((sscanf(adopt_line, "lvmlockd adopt_version %u.%u", &v_major, &v_minor) != 2) ||
|
|
(v_major != ADOPT_VERSION_MAJOR))
|
|
goto fail;
|
|
|
|
} else if (!strncmp(adopt_line, "VG:", 3)) {
|
|
if (!(ls = alloc_lockspace()))
|
|
goto fail;
|
|
|
|
memset(vg_uuid, 0, sizeof(vg_uuid));
|
|
memset(lm_type_str, 0, sizeof(lm_type_str));
|
|
|
|
if (sscanf(adopt_line, "VG: %63s %64s %15s %64s",
|
|
vg_uuid, ls->vg_name, lm_type_str, ls->vg_args) != 4) {
|
|
free(ls);
|
|
goto fail;
|
|
}
|
|
|
|
memcpy(ls->vg_uuid, vg_uuid, 64);
|
|
|
|
if ((ls->lm_type = str_to_lm(lm_type_str)) < 0) {
|
|
free(ls);
|
|
goto fail;
|
|
}
|
|
|
|
list_add(&ls->list, vg_lockd);
|
|
|
|
} else if (!strncmp(adopt_line, "LV:", 3)) {
|
|
if (!(r = alloc_resource()))
|
|
goto fail;
|
|
|
|
r->type = LD_RT_LV;
|
|
|
|
memset(vg_uuid, 0, sizeof(vg_uuid));
|
|
memset(mode, 0, sizeof(mode));
|
|
|
|
if (sscanf(adopt_line, "LV: %64s %64s %s %7s %u",
|
|
vg_uuid, r->name, r->lv_args, mode, &r->version) != 5) {
|
|
free_resource(r);
|
|
goto fail;
|
|
}
|
|
|
|
if ((r->adopt_mode = str_to_mode(mode)) == LD_LK_IV) {
|
|
free_resource(r);
|
|
goto fail;
|
|
}
|
|
|
|
if (ls && !memcmp(ls->vg_uuid, vg_uuid, 64)) {
|
|
list_add(&r->list, &ls->resources);
|
|
r = NULL;
|
|
} else {
|
|
list_for_each_entry(ls2, vg_lockd, list) {
|
|
if (memcmp(ls2->vg_uuid, vg_uuid, 64))
|
|
continue;
|
|
list_add(&r->list, &ls2->resources);
|
|
r = NULL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (r) {
|
|
log_error("No lockspace found for resource %s vg_uuid %s", r->name, vg_uuid);
|
|
free_resource(r);
|
|
goto fail;
|
|
}
|
|
}
|
|
}
|
|
|
|
(void) fclose(fp);
|
|
return 0;
|
|
|
|
fail:
|
|
(void) fclose(fp);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* These are few enough that arrays of function pointers can
|
|
* be avoided.
|
|
*/
|
|
|
|
static int lm_prepare_lockspace(struct lockspace *ls, struct action *act)
|
|
{
|
|
int rv;
|
|
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
rv = lm_prepare_lockspace_dlm(ls);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_prepare_lockspace_sanlock(ls);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
rv = lm_prepare_lockspace_idm(ls);
|
|
else
|
|
return -1;
|
|
|
|
if (act)
|
|
act->lm_rv = rv;
|
|
return rv;
|
|
}
|
|
|
|
static int lm_add_lockspace(struct lockspace *ls, struct action *act, int adopt_only, int adopt_ok)
|
|
{
|
|
int rv;
|
|
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
rv = lm_add_lockspace_dlm(ls, adopt_only, adopt_ok);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_add_lockspace_sanlock(ls, adopt_only, adopt_ok);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
rv = lm_add_lockspace_idm(ls, adopt_only, adopt_ok);
|
|
else
|
|
return -1;
|
|
|
|
if (act)
|
|
act->lm_rv = rv;
|
|
return rv;
|
|
}
|
|
|
|
static int lm_rem_lockspace(struct lockspace *ls, struct action *act, int free_vg)
|
|
{
|
|
int rv;
|
|
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
rv = lm_rem_lockspace_dlm(ls, free_vg);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_rem_lockspace_sanlock(ls, free_vg);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
rv = lm_rem_lockspace_idm(ls, free_vg);
|
|
else
|
|
return -1;
|
|
|
|
if (act)
|
|
act->lm_rv = rv;
|
|
return rv;
|
|
}
|
|
|
|
static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
|
|
struct val_blk *vb_out, int *retry, int adopt_only, int adopt_ok)
|
|
{
|
|
int rv;
|
|
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
rv = lm_lock_dlm(ls, r, mode, vb_out, adopt_only, adopt_ok);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_lock_sanlock(ls, r, mode, vb_out, retry, adopt_only, adopt_ok);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
rv = lm_lock_idm(ls, r, mode, vb_out, act->lv_uuid,
|
|
&act->pvs, adopt_only, adopt_ok);
|
|
else
|
|
return -1;
|
|
|
|
if (act)
|
|
act->lm_rv = rv;
|
|
return rv;
|
|
}
|
|
|
|
static int lm_convert(struct lockspace *ls, struct resource *r,
|
|
int mode, struct action *act, uint32_t r_version)
|
|
{
|
|
int rv;
|
|
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
rv = lm_convert_dlm(ls, r, mode, r_version);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_convert_sanlock(ls, r, mode, r_version);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
rv = lm_convert_idm(ls, r, mode, r_version);
|
|
else
|
|
return -1;
|
|
|
|
if (act)
|
|
act->lm_rv = rv;
|
|
return rv;
|
|
}
|
|
|
|
static int lm_unlock(struct lockspace *ls, struct resource *r, struct action *act,
|
|
uint32_t r_version, uint32_t lmu_flags)
|
|
{
|
|
int rv;
|
|
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
rv = lm_unlock_dlm(ls, r, r_version, lmu_flags);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_unlock_sanlock(ls, r, r_version, lmu_flags);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
rv = lm_unlock_idm(ls, r, r_version, lmu_flags);
|
|
else
|
|
return -1;
|
|
|
|
if (act)
|
|
act->lm_rv = rv;
|
|
return rv;
|
|
}
|
|
|
|
static int lm_hosts(struct lockspace *ls, int notify)
|
|
{
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
return lm_hosts_dlm(ls, notify);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
return lm_hosts_sanlock(ls, notify);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
return lm_hosts_idm(ls, notify);
|
|
return -1;
|
|
}
|
|
|
|
static void lm_rem_resource(struct lockspace *ls, struct resource *r)
|
|
{
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
lm_rem_resource_dlm(ls, r);
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
lm_rem_resource_sanlock(ls, r);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
lm_rem_resource_idm(ls, r);
|
|
}
|
|
|
|
static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size)
|
|
{
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
return 0;
|
|
else if (ls->lm_type == LD_LM_SANLOCK)
|
|
return lm_find_free_lock_sanlock(ls, free_offset, sector_size, align_size);
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
return 0;
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* While adopting locks, actions originate from the adopt_locks()
|
|
* function, not from a client. So, these actions (flagged ADOPT_ONLY),
|
|
* should be passed back to the adopt_locks() function through the
|
|
* adopt_results list, and not be sent back to a client via the
|
|
* client_list/client_thread. INTERNAL_CLIENT_ID indicates the
|
|
* act was generated internally and not from a client, and
|
|
* distinguishes internal adopt request from those received from
|
|
* a client.
|
|
*/
|
|
|
|
static void add_client_result(struct action *act)
|
|
{
|
|
if (act->flags & LD_AF_NO_CLIENT) {
|
|
log_debug("internal action done op %s mode %s result %d vg %s",
|
|
op_str(act->op), mode_str(act->mode), act->result, act->vg_name);
|
|
free_action(act);
|
|
return;
|
|
}
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
if ((act->flags & LD_AF_ADOPT_ONLY) && (act->client_id == INTERNAL_CLIENT_ID))
|
|
list_add_tail(&act->list, &adopt_results);
|
|
else
|
|
list_add_tail(&act->list, &client_results);
|
|
pthread_cond_signal(&client_cond);
|
|
pthread_mutex_unlock(&client_mutex);
|
|
}
|
|
|
|
static struct lock *find_lock_client(struct resource *r, uint32_t client_id)
|
|
{
|
|
struct lock *lk;
|
|
|
|
list_for_each_entry(lk, &r->locks, list) {
|
|
if (lk->client_id == client_id)
|
|
return lk;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static struct lock *find_lock_persistent(struct resource *r)
|
|
{
|
|
struct lock *lk;
|
|
|
|
list_for_each_entry(lk, &r->locks, list) {
|
|
if (lk->flags & LD_LF_PERSISTENT)
|
|
return lk;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static struct action *find_action_client(struct resource *r, uint32_t client_id)
|
|
{
|
|
struct action *act;
|
|
|
|
list_for_each_entry(act, &r->actions, list) {
|
|
if (act->client_id != client_id)
|
|
continue;
|
|
return act;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void add_work_action(struct action *act)
|
|
{
|
|
pthread_mutex_lock(&worker_mutex);
|
|
if (!worker_stop) {
|
|
list_add_tail(&act->list, &worker_list);
|
|
pthread_cond_signal(&worker_cond);
|
|
}
|
|
pthread_mutex_unlock(&worker_mutex);
|
|
}
|
|
|
|
static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry)
|
|
{
|
|
struct lock *lk;
|
|
struct val_blk vb;
|
|
uint32_t new_version = 0;
|
|
int inval_meta;
|
|
int rv = 0;
|
|
|
|
memset(&vb, 0, sizeof(vb));
|
|
|
|
r->last_client_id = act->client_id;
|
|
|
|
if (r->type == LD_RT_LV)
|
|
log_debug("%s:%s res_lock %s cl %u (%s)", ls->name, r->name,
|
|
mode_str(act->mode), act->client_id, act->lv_name);
|
|
else
|
|
log_debug("%s:%s res_lock %s cl %u", ls->name, r->name,
|
|
mode_str(act->mode), act->client_id);
|
|
|
|
if (r->mode == LD_LK_SH && act->mode == LD_LK_SH)
|
|
goto add_lk;
|
|
|
|
if (r->type == LD_RT_LV && act->lv_args[0])
|
|
memcpy(r->lv_args, act->lv_args, MAX_ARGS);
|
|
|
|
rv = lm_lock(ls, r, act->mode, act, &vb, retry,
|
|
act->flags & LD_AF_ADOPT_ONLY ? 1 : 0,
|
|
act->flags & LD_AF_ADOPT ? 1 : 0);
|
|
|
|
if (rv && r->use_vb)
|
|
log_debug("%s:%s res_lock rv %d read vb %x %x %u",
|
|
ls->name, r->name, rv, vb.version, vb.flags, vb.r_version);
|
|
else if (rv)
|
|
log_debug("%s:%s res_lock rv %d", ls->name, r->name, rv);
|
|
|
|
if (rv < 0)
|
|
return rv;
|
|
|
|
if (sanlock_gl_dup && ls->sanlock_gl_enabled)
|
|
act->flags |= LD_AF_DUP_GL_LS;
|
|
|
|
/*
|
|
* Check new lvb values to decide if lvmetad cache should
|
|
* be invalidated. When we need to invalidate the lvmetad
|
|
* cache, but don't have a usable r_version from the lvb,
|
|
* send lvmetad new_version 0 which causes it to invalidate
|
|
* the VG metadata without comparing against the currently
|
|
* cached VG seqno.
|
|
*/
|
|
|
|
inval_meta = 0;
|
|
|
|
if (!r->use_vb) {
|
|
/* LV locks don't use an lvb. */
|
|
|
|
} else if (vb.version && ((vb.version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
|
|
log_error("%s:%s res_lock invalid val_blk version %x flags %x r_version %u",
|
|
ls->name, r->name, vb.version, vb.flags, vb.r_version);
|
|
inval_meta = 1;
|
|
new_version = 0;
|
|
rv = -EINVAL;
|
|
|
|
} else if (vb.r_version && (vb.r_version == r->version)) {
|
|
/*
|
|
* Common case when the version hasn't changed.
|
|
* Do nothing.
|
|
*/
|
|
} else if (r->version && vb.r_version && (vb.r_version > r->version)) {
|
|
/*
|
|
* Common case when the version has changed. Another host
|
|
* has changed the data protected by the lock since we last
|
|
* acquired it, and increased r_version so we know that our
|
|
* cache is invalid.
|
|
*/
|
|
log_debug("%s:%s res_lock got version %u our %u",
|
|
ls->name, r->name, vb.r_version, r->version);
|
|
r->version = vb.r_version;
|
|
new_version = vb.r_version;
|
|
r->version_zero_valid = 0;
|
|
inval_meta = 1;
|
|
|
|
} else if (r->version_zero_valid && !vb.r_version) {
|
|
/*
|
|
* The lvb is in a persistent zero state, which will end
|
|
* once someone uses the lock and writes a new lvb value.
|
|
* Do nothing.
|
|
*/
|
|
/*
|
|
log_debug("%s:%s res_lock version_zero_valid still zero", ls->name, r->name);
|
|
*/
|
|
|
|
} else if (r->version_zero_valid && vb.r_version) {
|
|
/*
|
|
* Someone has written to the lvb after it was in a
|
|
* persistent zero state. Begin tracking normal
|
|
* non-zero changes. We may or may not have known
|
|
* about a previous non-zero version (in r->version).
|
|
* If we did, it means the lvb content was lost and
|
|
* has now been reinitialized.
|
|
*
|
|
* If the new reinitialized value is less than the
|
|
* previous non-zero value in r->version, then something
|
|
* unusual has happened. For a VG lock, it probably
|
|
* means the VG was removed and recreated. Invalidate
|
|
* our cache and begin using the new VG version. For
|
|
* a GL lock, another host may have reinitialized a
|
|
* lost/zero lvb with a value less than we'd seen
|
|
* before. Invalidate the cache, and begin using
|
|
* the lower version (or continue using our old
|
|
* larger version?)
|
|
*/
|
|
if (r->version && (r->version >= vb.r_version)) {
|
|
log_debug("%s:%s res_lock version_zero_valid got version %u less than our %u",
|
|
ls->name, r->name, vb.r_version, r->version);
|
|
new_version = 0;
|
|
} else {
|
|
log_debug("%s:%s res_lock version_zero_valid got version %u our %u",
|
|
ls->name, r->name, vb.r_version, r->version);
|
|
new_version = vb.r_version;
|
|
}
|
|
r->version = vb.r_version;
|
|
r->version_zero_valid = 0;
|
|
inval_meta = 1;
|
|
|
|
} else if (!r->version && vb.r_version) {
|
|
/*
|
|
* The first time we've acquired the lock and seen the lvb.
|
|
*/
|
|
log_debug("%s:%s res_lock initial version %u", ls->name, r->name, vb.r_version);
|
|
r->version = vb.r_version;
|
|
inval_meta = 1;
|
|
new_version = vb.r_version;
|
|
r->version_zero_valid = 0;
|
|
|
|
} else if (!r->version && !vb.r_version) {
|
|
/*
|
|
* The lock may have never been used to change something.
|
|
* (e.g. a new sanlock GL?)
|
|
*/
|
|
log_debug("%s:%s res_lock all versions zero", ls->name, r->name);
|
|
if (!r->version_zero_valid) {
|
|
inval_meta = 1;
|
|
new_version = 0;
|
|
}
|
|
r->version_zero_valid = 1;
|
|
|
|
} else if (r->version && !vb.r_version) {
|
|
/*
|
|
* The lvb content has been lost or never been initialized.
|
|
* It can be lost during dlm recovery when the master node
|
|
* is removed.
|
|
*
|
|
* If we're the next to write the lvb, reinitialze it to the
|
|
* new VG seqno, or a new GL counter larger than was seen by
|
|
* any hosts before (how to estimate that?)
|
|
*
|
|
* If we see non-zero values before we next write to it, use
|
|
* those values.
|
|
*
|
|
* While the lvb values remain zero, the data for the lock
|
|
* is unchanged and we don't need to invalidate metadata.
|
|
*/
|
|
if ((ls->lm_type == LD_LM_DLM) && !vb.version && !vb.flags)
|
|
log_debug("%s:%s res_lock all lvb content is blank",
|
|
ls->name, r->name);
|
|
log_debug("%s:%s res_lock our version %u got vb %x %x %u",
|
|
ls->name, r->name, r->version, vb.version, vb.flags, vb.r_version);
|
|
r->version_zero_valid = 1;
|
|
inval_meta = 1;
|
|
new_version = 0;
|
|
|
|
} else if (r->version && vb.r_version && (vb.r_version < r->version)) {
|
|
/*
|
|
* The lvb value has gone backwards, which shouldn't generally happen,
|
|
* but could when the dlm lvb is lost and reinitialized, or the VG
|
|
* is removed and recreated.
|
|
*
|
|
* If this is a VG lock, it probably means the VG has been removed
|
|
* and recreated while we had the dlm lockspace running.
|
|
* FIXME: how does the cache validation and replacement in lvmetad
|
|
* work in this case?
|
|
*/
|
|
log_debug("%s:%s res_lock got version %u less than our version %u",
|
|
ls->name, r->name, vb.r_version, r->version);
|
|
r->version = vb.r_version;
|
|
inval_meta = 1;
|
|
new_version = 0;
|
|
r->version_zero_valid = 0;
|
|
} else {
|
|
log_debug("%s:%s res_lock undefined vb condition vzv %d our version %u vb %x %x %u",
|
|
ls->name, r->name, r->version_zero_valid, r->version,
|
|
vb.version, vb.flags, vb.r_version);
|
|
}
|
|
|
|
if (vb.version && vb.r_version && (vb.flags & VBF_REMOVED)) {
|
|
/* Should we set ls->thread_stop = 1 ? */
|
|
log_debug("%s:%s res_lock vb flag REMOVED",
|
|
ls->name, r->name);
|
|
rv = -EREMOVED;
|
|
}
|
|
|
|
/*
|
|
* lvmetad is no longer used, but the infrastructure for
|
|
* distributed cache validation remains. The points
|
|
* where vg or global cache state would be invalidated
|
|
* remain below and log_debug messages point out where
|
|
* they would occur.
|
|
*
|
|
* The comments related to "lvmetad" remain because they
|
|
* describe how some other local cache like lvmetad would
|
|
* be invalidated here.
|
|
*/
|
|
|
|
/*
|
|
* r is vglk: tell lvmetad to set the vg invalid
|
|
* flag, and provide the new r_version. If lvmetad finds
|
|
* that its cached vg has seqno less than the value
|
|
* we send here, it will set the vg invalid flag.
|
|
* lvm commands that read the vg from lvmetad, will
|
|
* see the invalid flag returned, will reread the
|
|
* vg from disk, update the lvmetad copy, and go on.
|
|
*
|
|
* r is global: tell lvmetad to set the global invalid
|
|
* flag. When commands see this flag returned from lvmetad,
|
|
* they will reread metadata from disk, update the lvmetad
|
|
* caches, and tell lvmetad to set global invalid to 0.
|
|
*/
|
|
|
|
/*
|
|
* lvmetad not running:
|
|
* Even if we have not previously found lvmetad running,
|
|
* we attempt to connect and invalidate in case it has
|
|
* been started while lvmlockd is running. We don't
|
|
* want to allow lvmetad to be used with invalid data if
|
|
* it happens to be enabled and started after lvmlockd.
|
|
*/
|
|
|
|
if (inval_meta && (r->type == LD_RT_VG)) {
|
|
log_debug("%s:%s res_lock invalidate vg state version %u",
|
|
ls->name, r->name, new_version);
|
|
}
|
|
|
|
if (inval_meta && (r->type == LD_RT_GL)) {
|
|
log_debug("%s:%s res_lock invalidate global state", ls->name, r->name);
|
|
}
|
|
|
|
/*
|
|
* Record the new lock state.
|
|
*/
|
|
|
|
r->mode = act->mode;
|
|
|
|
add_lk:
|
|
if (r->mode == LD_LK_SH)
|
|
r->sh_count++;
|
|
|
|
if (!(lk = alloc_lock()))
|
|
return -ENOMEM;
|
|
|
|
lk->client_id = act->client_id;
|
|
lk->mode = act->mode;
|
|
|
|
if (act->flags & LD_AF_PERSISTENT) {
|
|
lk->flags |= LD_LF_PERSISTENT;
|
|
lk->client_id = 0;
|
|
}
|
|
|
|
/*
|
|
* LV_LOCK means the action acquired the lv lock in the lock manager
|
|
* (as opposed to finding that the lv lock was already held). If
|
|
* the client for this LV_LOCK action fails before we send the result,
|
|
* then we automatically unlock the lv since the lv wasn't activated.
|
|
* (There will always be an odd chance the lv lock is held while the
|
|
* lv is not active, but this helps.) The most common case where this
|
|
* is helpful is when the lv lock operation is slow/delayed and the
|
|
* command is canceled by the user.
|
|
*
|
|
* LV_UNLOCK means the lv unlock action was generated by lvmlockd when
|
|
* it tried to send the reply for an lv lock action (with LV_LOCK set),
|
|
* and failed to send the reply to the client/command. The
|
|
* last_client_id saved on the resource is compared to this LV_UNLOCK
|
|
* action before the auto unlock is done in case another action locked
|
|
* the lv between the failed client lock action and the auto unlock.
|
|
*/
|
|
if (r->type == LD_RT_LV)
|
|
act->flags |= LD_AF_LV_LOCK;
|
|
|
|
list_add_tail(&lk->list, &r->locks);
|
|
|
|
return rv;
|
|
}
|
|
|
|
static int res_convert(struct lockspace *ls, struct resource *r,
|
|
struct lock *lk, struct action *act)
|
|
{
|
|
uint32_t r_version;
|
|
int rv;
|
|
|
|
r->last_client_id = act->client_id;
|
|
|
|
log_debug("%s:%s res_convert %s cl %u", ls->name, r->name,
|
|
mode_str(act->mode), act->client_id);
|
|
|
|
if (act->mode == LD_LK_EX && lk->mode == LD_LK_SH && r->sh_count > 1)
|
|
return -EAGAIN;
|
|
|
|
/*
|
|
* lm_convert() writes new version (from ex)
|
|
* Same as lm_unlock()
|
|
*/
|
|
|
|
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
|
|
r->version++;
|
|
lk->version = r->version;
|
|
r_version = r->version;
|
|
r->version_zero_valid = 0;
|
|
|
|
log_debug("%s:%s res_convert r_version inc %u",
|
|
ls->name, r->name, r_version);
|
|
|
|
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
|
|
r->version = lk->version;
|
|
r_version = r->version;
|
|
r->version_zero_valid = 0;
|
|
|
|
log_debug("%s:%s res_convert r_version new %u", ls->name, r->name, r_version);
|
|
} else {
|
|
r_version = 0;
|
|
}
|
|
|
|
rv = lm_convert(ls, r, act->mode, act, r_version);
|
|
|
|
log_debug("%s:%s res_convert rv %d", ls->name, r->name, rv);
|
|
|
|
if (rv < 0)
|
|
return rv;
|
|
|
|
if (lk->mode == LD_LK_EX && act->mode == LD_LK_SH) {
|
|
r->sh_count = 1;
|
|
} else if (lk->mode == LD_LK_SH && act->mode == LD_LK_EX) {
|
|
r->sh_count = 0;
|
|
} else {
|
|
/* should not be possible */
|
|
log_error("%s:%s res_convert invalid modes %d %d",
|
|
ls->name, r->name, lk->mode, act->mode);
|
|
return -1;
|
|
}
|
|
|
|
r->mode = act->mode;
|
|
lk->mode = act->mode;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int res_cancel(struct lockspace *ls, struct resource *r,
|
|
struct action *act)
|
|
{
|
|
struct action *cact;
|
|
|
|
/*
|
|
* a client can cancel its own non-persistent lock requests,
|
|
* when could this happen?
|
|
*
|
|
* a client can cancel other client's persistent lock requests,
|
|
* when could this happen?
|
|
*/
|
|
|
|
if (act->flags & LD_AF_PERSISTENT) {
|
|
list_for_each_entry(cact, &r->actions, list) {
|
|
if (!(cact->flags & LD_AF_PERSISTENT))
|
|
continue;
|
|
goto do_cancel;
|
|
}
|
|
} else {
|
|
cact = find_action_client(r, act->client_id);
|
|
if (cact)
|
|
goto do_cancel;
|
|
}
|
|
|
|
return -ENOENT;
|
|
|
|
do_cancel:
|
|
log_debug("%s:%s res_cancel cl %u", ls->name, r->name, cact->client_id);
|
|
cact->result = -ECANCELED;
|
|
list_del(&cact->list);
|
|
add_client_result(cact);
|
|
|
|
return -ECANCELED;
|
|
}
|
|
|
|
/*
|
|
* lm_unlock() writes new a r_version (from ex)
|
|
*
|
|
* The r_version of the vg resource is incremented if
|
|
* an "update" was received for the vg lock. The update
|
|
* contains the new vg seqno from the vg metadata which is
|
|
* used as the r_version.
|
|
*
|
|
* The r_version of the global resource is automatically
|
|
* incremented when it is unlocked from ex mode.
|
|
*
|
|
* r_version is incremented every time a command releases
|
|
* the global lock from ex.
|
|
*/
|
|
|
|
/*
|
|
* persistent locks will not be unlocked for OP_CLOSE/act_close
|
|
* because act_close->flags does not have the PERSISTENT flag
|
|
* set, and a persistent lk->client_id is zero, which will not
|
|
* match the client in act_close->client_id.
|
|
*/
|
|
|
|
static int res_unlock(struct lockspace *ls, struct resource *r,
|
|
struct action *act)
|
|
{
|
|
struct lock *lk;
|
|
uint32_t r_version;
|
|
int rv;
|
|
|
|
if (act->flags & LD_AF_PERSISTENT) {
|
|
lk = find_lock_persistent(r);
|
|
if (lk)
|
|
goto do_unlock;
|
|
} else {
|
|
lk = find_lock_client(r, act->client_id);
|
|
if (lk)
|
|
goto do_unlock;
|
|
}
|
|
|
|
if (act->op != LD_OP_CLOSE)
|
|
log_debug("%s:%s res_unlock cl %u no locks", ls->name, r->name, act->client_id);
|
|
return -ENOENT;
|
|
|
|
do_unlock:
|
|
if ((act->flags & LD_AF_LV_UNLOCK) && (r->last_client_id != act->client_id)) {
|
|
log_debug("%s:%s res_unlock cl %u for failed client ignored, last client %u",
|
|
ls->name, r->name, act->client_id, r->last_client_id);
|
|
return -ENOENT;
|
|
}
|
|
|
|
r->last_client_id = act->client_id;
|
|
|
|
if (act->op == LD_OP_CLOSE)
|
|
log_debug("%s:%s res_unlock cl %u from close", ls->name, r->name, act->client_id);
|
|
else if (r->type == LD_RT_LV)
|
|
log_debug("%s:%s res_unlock cl %u (%s)", ls->name, r->name, act->client_id, act->lv_name);
|
|
else
|
|
log_debug("%s:%s res_unlock cl %u", ls->name, r->name, act->client_id);
|
|
|
|
/* send unlock to lm when last sh lock is unlocked */
|
|
if (lk->mode == LD_LK_SH) {
|
|
r->sh_count--;
|
|
if (r->sh_count > 0) {
|
|
log_debug("%s:%s res_unlock sh_count %u", ls->name, r->name, r->sh_count);
|
|
goto rem_lk;
|
|
}
|
|
}
|
|
|
|
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
|
|
r->version++;
|
|
lk->version = r->version;
|
|
r_version = r->version;
|
|
r->version_zero_valid = 0;
|
|
|
|
log_debug("%s:%s res_unlock r_version inc %u", ls->name, r->name, r_version);
|
|
|
|
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
|
|
r->version = lk->version;
|
|
r_version = r->version;
|
|
r->version_zero_valid = 0;
|
|
|
|
log_debug("%s:%s res_unlock r_version new %u",
|
|
ls->name, r->name, r_version);
|
|
} else {
|
|
r_version = 0;
|
|
}
|
|
|
|
rv = lm_unlock(ls, r, act, r_version, 0);
|
|
if (rv < 0) {
|
|
/* should never happen, retry? */
|
|
log_error("%s:%s res_unlock lm error %d", ls->name, r->name, rv);
|
|
return rv;
|
|
}
|
|
|
|
/* log_debug("%s:%s res_unlock lm done", ls->name, r->name); */
|
|
|
|
rem_lk:
|
|
list_del(&lk->list);
|
|
free_lock(lk);
|
|
|
|
if (list_empty(&r->locks))
|
|
r->mode = LD_LK_UN;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int res_update(struct lockspace *ls, struct resource *r,
|
|
struct action *act)
|
|
{
|
|
struct lock *lk;
|
|
|
|
lk = find_lock_client(r, act->client_id);
|
|
if (!lk) {
|
|
log_error("%s:%s res_update cl %u lock not found",
|
|
ls->name, r->name, act->client_id);
|
|
return -ENOENT;
|
|
}
|
|
|
|
if (r->mode != LD_LK_EX) {
|
|
log_error("%s:%s res_update cl %u version on non-ex lock",
|
|
ls->name, r->name, act->client_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* lk version will be written to lm by unlock */
|
|
|
|
if (act->flags & LD_AF_NEXT_VERSION)
|
|
lk->version = r->version + 1;
|
|
else {
|
|
if (r->version >= act->version) {
|
|
/*
|
|
* This update is done from vg_write. If the metadata with
|
|
* this seqno is not committed by vg_commit, then next
|
|
* vg_write can use the same seqno, causing us to see no
|
|
* increase in seqno here as expected.
|
|
* FIXME: In this case, do something like setting the lvb
|
|
* version to 0 to instead of the same seqno which will
|
|
* force an invalidation on other hosts. The next change
|
|
* will return to using the seqno again.
|
|
*/
|
|
log_error("%s:%s res_update cl %u old version %u new version %u too small",
|
|
ls->name, r->name, act->client_id, r->version, act->version);
|
|
}
|
|
lk->version = act->version;
|
|
}
|
|
|
|
log_debug("%s:%s res_update cl %u lk version to %u", ls->name, r->name, act->client_id, lk->version);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* For DLM and IDM locking scheme, there is nothing to deallocate when freeing a
|
|
* LV, the LV will simply be unlocked by rem_resource.
|
|
*/
|
|
|
|
static int free_lv(struct lockspace *ls, struct resource *r)
|
|
{
|
|
if (ls->lm_type == LD_LM_SANLOCK)
|
|
return lm_free_lv_sanlock(ls, r);
|
|
else if (ls->lm_type == LD_LM_DLM)
|
|
return 0;
|
|
else if (ls->lm_type == LD_LM_IDM)
|
|
return 0;
|
|
else
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* NB. we can't do this if sanlock is holding any locks on
|
|
* the resource; we'd be rewriting the resource from under
|
|
* sanlock and would confuse or break it badly. We don't
|
|
* know what another host is doing, so these must be used
|
|
* very carefully.
|
|
*/
|
|
|
|
static int res_able(struct lockspace *ls, struct resource *r,
|
|
struct action *act)
|
|
{
|
|
int rv;
|
|
|
|
if (ls->lm_type != LD_LM_SANLOCK) {
|
|
log_error("enable/disable only applies to sanlock");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (r->type != LD_RT_GL) {
|
|
log_error("enable/disable only applies to global lock");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (r->mode != LD_LK_UN) {
|
|
log_error("enable/disable only allowed on unlocked resource");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (act->op == LD_OP_ENABLE && gl_lsname_sanlock[0]) {
|
|
log_error("disable global lock in %s before enable in %s",
|
|
gl_lsname_sanlock, ls->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((act->op == LD_OP_DISABLE) && (act->flags & LD_AF_EX_DISABLE)) {
|
|
rv = lm_ex_disable_gl_sanlock(ls);
|
|
goto out;
|
|
}
|
|
|
|
rv = lm_able_gl_sanlock(ls, act->op == LD_OP_ENABLE);
|
|
|
|
if (!rv && (act->op == LD_OP_ENABLE))
|
|
gl_vg_removed = 0;
|
|
out:
|
|
return rv;
|
|
}
|
|
|
|
/*
|
|
* Go through queued actions, and make lock/unlock calls on the resource
|
|
* based on the actions and the existing lock state.
|
|
*
|
|
* All lock operations sent to the lock manager are non-blocking.
|
|
* This is because sanlock does not support lock queueing.
|
|
* Eventually we could enhance this to take advantage of lock
|
|
* queueing when available (i.e. for the dlm).
|
|
*
|
|
* act_close_list: list of CLOSE actions, identifying clients that have
|
|
* closed/terminated their lvmlockd connection, and whose locks should
|
|
* be released. Do not remove these actions from act_close_list.
|
|
*
|
|
* retry_out: set to 1 if the lock manager said we should retry,
|
|
* meaning we should call res_process() again in a short while to retry.
|
|
*/
|
|
|
|
static void res_process(struct lockspace *ls, struct resource *r,
|
|
struct list_head *act_close_list, int *retry_out)
|
|
{
|
|
struct action *act, *safe, *act_close;
|
|
struct lock *lk;
|
|
int lm_retry;
|
|
int rv;
|
|
|
|
/*
|
|
* handle version updates for ex locks
|
|
* (new version will be written by unlock)
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (act->op == LD_OP_UPDATE) {
|
|
rv = res_update(ls, r, act);
|
|
act->result = rv;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* handle explicit unlock actions
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if ((act->op == LD_OP_LOCK) &&
|
|
(act->mode == LD_LK_IV || act->mode == LD_LK_NL)) {
|
|
act->result = -EINVAL;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
} else if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
|
|
rv = res_unlock(ls, r, act);
|
|
|
|
if (rv == -ENOENT && (act->flags & LD_AF_UNLOCK_CANCEL))
|
|
rv = res_cancel(ls, r, act);
|
|
|
|
/*
|
|
* possible unlock results:
|
|
* 0: unlock succeeded
|
|
* -ECANCELED: cancel succeeded
|
|
* -ENOENT: nothing to unlock or cancel
|
|
*/
|
|
|
|
act->result = rv;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* handle implicit unlocks due to client exit,
|
|
* also clear any outstanding actions for the client
|
|
*/
|
|
|
|
list_for_each_entry(act_close, act_close_list, list) {
|
|
res_unlock(ls, r, act_close);
|
|
res_cancel(ls, r, act_close);
|
|
}
|
|
|
|
/*
|
|
* handle freeing a lock for an lv that has been removed
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (act->op == LD_OP_FREE && act->rt == LD_RT_LV) {
|
|
log_debug("%s:%s free_lv", ls->name, r->name);
|
|
rv = free_lv(ls, r);
|
|
act->result = rv;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
goto r_free;
|
|
|
|
}
|
|
}
|
|
|
|
/*
|
|
* handle enable/disable
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE) {
|
|
rv = res_able(ls, r, act);
|
|
act->result = rv;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
|
|
if (!rv && act->op == LD_OP_DISABLE) {
|
|
log_debug("%s:%s free disabled", ls->name, r->name);
|
|
goto r_free;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* transient requests on existing transient locks
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (act->flags & LD_AF_PERSISTENT)
|
|
continue;
|
|
|
|
lk = find_lock_client(r, act->client_id);
|
|
if (!lk)
|
|
continue;
|
|
|
|
if (lk->mode != act->mode) {
|
|
/* convert below */
|
|
/*
|
|
act->result = -EEXIST;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
*/
|
|
continue;
|
|
} else {
|
|
/* success */
|
|
r->last_client_id = act->client_id;
|
|
act->result = -EALREADY;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* persistent requests on existing persistent locks
|
|
*
|
|
* persistent locks are not owned by a client, so any
|
|
* existing with matching mode satisfies a request.
|
|
* only one persistent lock is kept on a resource.
|
|
* a single "unowned" persistent lock satisfies
|
|
* any/multiple client requests for a persistent lock.
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (!(act->flags & LD_AF_PERSISTENT))
|
|
continue;
|
|
|
|
lk = find_lock_persistent(r);
|
|
if (!lk)
|
|
continue;
|
|
|
|
if (lk->mode != act->mode) {
|
|
/* convert below */
|
|
/*
|
|
act->result = -EEXIST;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
*/
|
|
continue;
|
|
} else {
|
|
/* success */
|
|
r->last_client_id = act->client_id;
|
|
act->result = -EALREADY;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* transient requests with existing persistent locks
|
|
*
|
|
* Just grant the transient request and do not
|
|
* keep a record of it. Assume that the persistent
|
|
* lock will not go away while the transient lock
|
|
* is needed.
|
|
*
|
|
* This would be used when an ex, persistent lv lock
|
|
* exists from activation, and then something like
|
|
* lvextend asks for a transient ex lock to change
|
|
* the lv. The lv could not be unlocked by deactivation
|
|
* while the lvextend was running.
|
|
*
|
|
* The logic here for mixing T/P locks is not general
|
|
* support; there are a number of cases where it will
|
|
* not work: updating version number (lv locks have
|
|
* none), ex locks from multiple clients will not
|
|
* conflict, explicit un of the transient lock will fail.
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (act->flags & LD_AF_PERSISTENT)
|
|
continue;
|
|
|
|
lk = find_lock_persistent(r);
|
|
if (!lk)
|
|
continue;
|
|
|
|
if ((lk->mode == LD_LK_EX) ||
|
|
(lk->mode == LD_LK_SH && act->mode == LD_LK_SH)) {
|
|
r->last_client_id = act->client_id;
|
|
act->result = 0;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
} else {
|
|
/* persistent lock is sh, transient request is ex */
|
|
log_debug("res_process %s existing persistent lock new transient", r->name);
|
|
r->last_client_id = act->client_id;
|
|
act->flags |= LD_AF_SH_EXISTS;
|
|
act->result = -EEXIST;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* persistent requests with existing transient locks
|
|
*
|
|
* If a client requests a P (persistent) lock for a T (transient)
|
|
* lock it already holds, we can just change T to P. Fail if the
|
|
* same happens for locks from different clients. Changing
|
|
* another client's lock from T to P may cause problems
|
|
* if that client tries to unlock or update version.
|
|
*
|
|
* I don't think this P/T combination will be used.
|
|
* It might be used if a command was able to take a P
|
|
* vg lock, in which case the T vg lock would already
|
|
* be held for reading. If the T lock was sh, it would
|
|
* be converted to P ex. If the T/P modes matched, the
|
|
* lock could just be changed from T to P.
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (!(act->flags & LD_AF_PERSISTENT))
|
|
continue;
|
|
|
|
lk = find_lock_client(r, act->client_id);
|
|
if (!lk)
|
|
continue;
|
|
|
|
if (lk->mode != act->mode) {
|
|
/* FIXME: convert and change to persistent? */
|
|
log_debug("res_process %s existing transient lock new persistent", r->name);
|
|
r->last_client_id = act->client_id;
|
|
act->result = -EEXIST;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
} else {
|
|
r->last_client_id = act->client_id;
|
|
lk->flags |= LD_LF_PERSISTENT;
|
|
lk->client_id = 0;
|
|
act->result = 0;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* convert mode of existing locks
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (act->flags & LD_AF_PERSISTENT)
|
|
lk = find_lock_persistent(r);
|
|
else
|
|
lk = find_lock_client(r, act->client_id);
|
|
if (!lk)
|
|
continue;
|
|
|
|
if (lk->mode == act->mode) {
|
|
/* should never happen, should be found above */
|
|
log_error("convert same mode");
|
|
continue;
|
|
}
|
|
|
|
/* convert fails immediately, no EAGAIN retry */
|
|
rv = res_convert(ls, r, lk, act);
|
|
act->result = rv;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
|
|
/*
|
|
* Cases above are all requests addressed by existing locks.
|
|
* Below handles the rest. Transient and persistent are
|
|
* handled the same, except
|
|
* - if mode of existing lock is incompat with requested,
|
|
* leave the act on r->actions
|
|
* - if r mode is EX, any lock action is blocked, just quit
|
|
*
|
|
* Retry a lock request that fails due to a lock conflict (-EAGAIN):
|
|
* if we have not exceeded max retries and lm sets lm_retry (sanlock
|
|
* transient conflicts from shared lock implementation), or r type
|
|
* is gl or vg (transient real conflicts we want to hide from command).
|
|
* lv lock conflicts won't be transient so don't retry them.
|
|
*/
|
|
|
|
if (r->mode == LD_LK_EX)
|
|
return;
|
|
|
|
/*
|
|
* r mode is SH or UN, pass lock-sh actions to lm
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
/* grant in order, so break here */
|
|
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX)
|
|
break;
|
|
|
|
if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
|
|
lm_retry = 0;
|
|
|
|
rv = res_lock(ls, r, act, &lm_retry);
|
|
if ((rv == -EAGAIN) &&
|
|
(act->retries <= act->max_retries) &&
|
|
(lm_retry || (r->type != LD_RT_LV))) {
|
|
/* leave act on list */
|
|
log_debug("%s:%s res_lock EAGAIN retry", ls->name, r->name);
|
|
act->retries++;
|
|
*retry_out = 1;
|
|
} else {
|
|
act->result = rv;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
if (rv == -EUNATCH)
|
|
goto r_free;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* r mode is SH, any ex lock action is blocked, just quit
|
|
*/
|
|
|
|
if (r->mode == LD_LK_SH)
|
|
return;
|
|
|
|
/*
|
|
* r mode is UN, pass lock-ex action to lm
|
|
*/
|
|
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
|
|
lm_retry = 0;
|
|
|
|
rv = res_lock(ls, r, act, &lm_retry);
|
|
if ((rv == -EAGAIN) &&
|
|
(act->retries <= act->max_retries) &&
|
|
(lm_retry || (r->type != LD_RT_LV))) {
|
|
/* leave act on list */
|
|
log_debug("%s:%s res_lock EAGAIN retry", ls->name, r->name);
|
|
act->retries++;
|
|
*retry_out = 1;
|
|
} else {
|
|
act->result = rv;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
if (rv == -EUNATCH)
|
|
goto r_free;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return;
|
|
|
|
r_free:
|
|
/* For the EUNATCH case it may be possible there are queued actions? */
|
|
list_for_each_entry_safe(act, safe, &r->actions, list) {
|
|
log_error("%s:%s res_process r_free cancel %s client %d",
|
|
ls->name, r->name, op_str(act->op), act->client_id);
|
|
act->result = -ECANCELED;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
log_debug("%s:%s res_process free", ls->name, r->name);
|
|
lm_rem_resource(ls, r);
|
|
list_del(&r->list);
|
|
free_resource(r);
|
|
}
|
|
|
|
#define LOCKS_EXIST_ANY 1
|
|
#define LOCKS_EXIST_GL 2
|
|
#define LOCKS_EXIST_VG 3
|
|
#define LOCKS_EXIST_LV 4
|
|
|
|
static int for_each_lock(struct lockspace *ls, int locks_do)
|
|
{
|
|
struct resource *r;
|
|
struct lock *lk;
|
|
|
|
list_for_each_entry(r, &ls->resources, list) {
|
|
list_for_each_entry(lk, &r->locks, list) {
|
|
if (locks_do == LOCKS_EXIST_ANY)
|
|
return 1;
|
|
|
|
if (locks_do == LOCKS_EXIST_GL && r->type == LD_RT_GL)
|
|
return 1;
|
|
|
|
if (locks_do == LOCKS_EXIST_VG && r->type == LD_RT_VG)
|
|
return 1;
|
|
|
|
if (locks_do == LOCKS_EXIST_LV && r->type == LD_RT_LV)
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int clear_locks(struct lockspace *ls, int free_vg, int drop_vg)
|
|
{
|
|
struct resource *r, *r_safe;
|
|
struct lock *lk, *lk_safe;
|
|
struct action *act, *act_safe;
|
|
uint32_t lk_version;
|
|
uint32_t r_version;
|
|
int lk_count = 0;
|
|
int rv;
|
|
|
|
list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
|
|
lk_version = 0;
|
|
|
|
list_for_each_entry_safe(lk, lk_safe, &r->locks, list) {
|
|
lk_count++;
|
|
|
|
/*
|
|
* Stopping a lockspace shouldn't happen with LV locks
|
|
* still held, but it will be stopped with GL and VG
|
|
* locks held. The drop_vg case may see LV locks.
|
|
*/
|
|
|
|
if (lk->flags & LD_LF_PERSISTENT && !drop_vg)
|
|
log_error("%s:%s clear lock persistent", ls->name, r->name);
|
|
else
|
|
log_debug("%s:%s clear lock mode %s client %d", ls->name, r->name, mode_str(lk->mode), lk->client_id);
|
|
|
|
if (lk->version > lk_version)
|
|
lk_version = lk->version;
|
|
|
|
list_del(&lk->list);
|
|
free_lock(lk);
|
|
}
|
|
|
|
if (r->mode == LD_LK_UN)
|
|
goto r_free;
|
|
|
|
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
|
|
r->version++;
|
|
r_version = r->version;
|
|
log_debug("%s:%s clear_locks r_version inc %u",
|
|
ls->name, r->name, r_version);
|
|
|
|
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk_version > r->version)) {
|
|
r->version = lk_version;
|
|
r_version = r->version;
|
|
log_debug("%s:%s clear_locks r_version new %u",
|
|
ls->name, r->name, r_version);
|
|
|
|
} else {
|
|
r_version = 0;
|
|
}
|
|
|
|
rv = lm_unlock(ls, r, NULL, r_version, free_vg ? LMUF_FREE_VG : 0);
|
|
if (rv < 0) {
|
|
/* should never happen */
|
|
log_error("%s:%s clear_locks free %d drop %d lm unlock error %d",
|
|
ls->name, r->name, free_vg, drop_vg, rv);
|
|
}
|
|
|
|
list_for_each_entry_safe(act, act_safe, &r->actions, list) {
|
|
log_error("%s:%s clear_locks cancel %s client %d",
|
|
ls->name, r->name, op_str(act->op), act->client_id);
|
|
act->result = -ECANCELED;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
r_free:
|
|
log_debug("%s:%s free", ls->name, r->name);
|
|
lm_rem_resource(ls, r);
|
|
list_del(&r->list);
|
|
free_resource(r);
|
|
}
|
|
|
|
return lk_count;
|
|
}
|
|
|
|
/*
|
|
* find and return the resource that is referenced by the action
|
|
* - there is a single gl resource per lockspace
|
|
* - there is a single vg resource per lockspace
|
|
* - there can be many lv resources per lockspace, compare names
|
|
*/
|
|
|
|
static struct resource *find_resource_act(struct lockspace *ls,
|
|
struct action *act,
|
|
int nocreate)
|
|
{
|
|
struct resource *r;
|
|
|
|
list_for_each_entry(r, &ls->resources, list) {
|
|
if (r->type != act->rt)
|
|
continue;
|
|
|
|
if (r->type == LD_RT_GL && act->rt == LD_RT_GL)
|
|
return r;
|
|
|
|
if (r->type == LD_RT_VG && act->rt == LD_RT_VG)
|
|
return r;
|
|
|
|
if (r->type == LD_RT_LV && act->rt == LD_RT_LV &&
|
|
!strcmp(r->name, act->lv_uuid))
|
|
return r;
|
|
}
|
|
|
|
if (nocreate)
|
|
return NULL;
|
|
|
|
if (!(r = alloc_resource()))
|
|
return NULL;
|
|
|
|
r->type = act->rt;
|
|
r->mode = LD_LK_UN;
|
|
|
|
if (r->type == LD_RT_GL) {
|
|
dm_strncpy(r->name, R_NAME_GL, sizeof(r->name));
|
|
r->use_vb = 1;
|
|
} else if (r->type == LD_RT_VG) {
|
|
dm_strncpy(r->name, R_NAME_VG, sizeof(r->name));
|
|
r->use_vb = 1;
|
|
} else if (r->type == LD_RT_LV) {
|
|
dm_strncpy(r->name, act->lv_uuid, sizeof(r->name));
|
|
r->use_vb = 0;
|
|
}
|
|
|
|
list_add_tail(&r->list, &ls->resources);
|
|
|
|
return r;
|
|
}
|
|
|
|
static void free_ls_resources(struct lockspace *ls)
|
|
{
|
|
struct resource *r, *r_safe;
|
|
|
|
list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
|
|
lm_rem_resource(ls, r);
|
|
list_del(&r->list);
|
|
free_resource(r);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* ls is the vg being removed that holds the global lock.
|
|
* check if any other vgs will be left without a global lock.
|
|
*/
|
|
|
|
static int other_sanlock_vgs_exist(struct lockspace *ls_rem)
|
|
{
|
|
struct lockspace *ls;
|
|
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if (ls->lm_type != LD_LM_SANLOCK)
|
|
continue;
|
|
if (!strcmp(ls->name, ls_rem->name))
|
|
continue;
|
|
log_debug("other sanlock vg exists %s", ls->name);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* LOCK is the main thing we're interested in; the others are unlikely.
|
|
*/
|
|
|
|
static int process_op_during_kill(struct action *act)
|
|
{
|
|
if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN)
|
|
return 1;
|
|
|
|
switch (act->op) {
|
|
case LD_OP_LOCK:
|
|
case LD_OP_ENABLE:
|
|
case LD_OP_DISABLE:
|
|
case LD_OP_UPDATE:
|
|
case LD_OP_RENAME_BEFORE:
|
|
case LD_OP_RENAME_FINAL:
|
|
case LD_OP_QUERY_LOCK:
|
|
case LD_OP_FIND_FREE_LOCK:
|
|
return 0;
|
|
};
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Process actions queued for this lockspace by
|
|
* client_recv_action / add_lock_action.
|
|
*
|
|
* The lockspace_thread can touch its own ls struct without holding
|
|
* lockspaces_mutex until it sets ls->thread_done, after which it
|
|
* cannot touch ls without holding lockspaces_mutex.
|
|
*/
|
|
|
|
#define LOCK_RETRY_MS 1000 /* milliseconds to delay between retry */
|
|
|
|
static void *lockspace_thread_main(void *arg_in)
|
|
{
|
|
struct lockspace *ls = arg_in;
|
|
struct resource *r, *r2;
|
|
struct action *add_act, *act, *safe;
|
|
struct action *act_op_free = NULL;
|
|
struct list_head tmp_act;
|
|
struct list_head act_close;
|
|
char tmp_name[MAX_NAME+5];
|
|
int fail_stop_busy;
|
|
int free_vg = 0;
|
|
int drop_vg = 0;
|
|
int error = 0;
|
|
int adopt_only = 0;
|
|
int adopt_ok = 0;
|
|
int wait_flag = 0;
|
|
int retry;
|
|
int rv;
|
|
|
|
INIT_LIST_HEAD(&act_close);
|
|
INIT_LIST_HEAD(&tmp_act);
|
|
|
|
/* first action may be client add */
|
|
pthread_mutex_lock(&ls->mutex);
|
|
act = NULL;
|
|
add_act = NULL;
|
|
if (!list_empty(&ls->actions)) {
|
|
act = list_first_entry(&ls->actions, struct action, list);
|
|
if (act->op == LD_OP_START) {
|
|
add_act = act;
|
|
list_del(&add_act->list);
|
|
|
|
if (add_act->flags & LD_AF_WAIT)
|
|
wait_flag = 1;
|
|
if (add_act->flags & LD_AF_ADOPT_ONLY)
|
|
adopt_only = 1;
|
|
if (add_act->flags & LD_AF_ADOPT)
|
|
adopt_ok = 1;
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
|
|
if (ls->lm_type == LD_LM_DLM && !strcmp(gl_lsname_dlm, ls->name)) {
|
|
log_debug("dlm global lockspace adopt_ok");
|
|
adopt_ok = 1;
|
|
}
|
|
|
|
log_debug("S %s lm_add_lockspace %s wait %d adopt_only %d adopt_ok %d",
|
|
ls->name, lm_str(ls->lm_type), wait_flag, adopt_only, adopt_ok);
|
|
|
|
/*
|
|
* The prepare step does not wait for anything and is quick;
|
|
* it tells us if the parameters are valid and the lm is running.
|
|
*/
|
|
error = lm_prepare_lockspace(ls, add_act);
|
|
|
|
if (add_act && (!wait_flag || error)) {
|
|
/* send initial join result back to client */
|
|
add_act->result = error;
|
|
add_client_result(add_act);
|
|
add_act = NULL;
|
|
}
|
|
|
|
/*
|
|
* The actual lockspace join can take a while.
|
|
*/
|
|
if (!error) {
|
|
error = lm_add_lockspace(ls, add_act, adopt_only, adopt_ok);
|
|
|
|
log_debug("S %s lm_add_lockspace done %d", ls->name, error);
|
|
|
|
if (ls->sanlock_gl_enabled && gl_lsname_sanlock[0] &&
|
|
strcmp(ls->name, gl_lsname_sanlock))
|
|
sanlock_gl_dup = 1;
|
|
|
|
if (add_act) {
|
|
/* send final join result back to client */
|
|
add_act->result = error;
|
|
add_client_result(add_act);
|
|
}
|
|
}
|
|
|
|
pthread_mutex_lock(&ls->mutex);
|
|
if (error) {
|
|
ls->thread_stop = 1;
|
|
ls->create_fail = 1;
|
|
} else {
|
|
ls->create_done = 1;
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
|
|
if (error)
|
|
goto out_act;
|
|
|
|
restart:
|
|
|
|
while (1) {
|
|
pthread_mutex_lock(&ls->mutex);
|
|
while (!ls->thread_work) {
|
|
if (ls->thread_stop) {
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
goto out_rem;
|
|
}
|
|
pthread_cond_wait(&ls->cond, &ls->mutex);
|
|
}
|
|
|
|
/*
|
|
* Process all the actions queued for this lockspace.
|
|
* The client thread queues actions on ls->actions.
|
|
*
|
|
* Here, take all the actions off of ls->actions, and:
|
|
*
|
|
* - For lock operations, move the act to r->actions.
|
|
* These lock actions/operations processed by res_process().
|
|
*
|
|
* - For non-lock operations, e.g. related to managing
|
|
* the lockspace, process them in this loop.
|
|
*/
|
|
|
|
while (1) {
|
|
if (list_empty(&ls->actions)) {
|
|
ls->thread_work = 0;
|
|
break;
|
|
}
|
|
|
|
act = list_first_entry(&ls->actions, struct action, list);
|
|
|
|
if (act->op == LD_OP_KILL_VG && act->rt == LD_RT_VG) {
|
|
/* Continue processing until DROP_VG arrives. */
|
|
log_debug("S %s kill_vg", ls->name);
|
|
ls->kill_vg = 1;
|
|
list_del(&act->list);
|
|
act->result = 0;
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
|
|
if (ls->kill_vg && !process_op_during_kill(act)) {
|
|
log_debug("S %s disallow op %s after kill_vg", ls->name, op_str(act->op));
|
|
list_del(&act->list);
|
|
act->result = -EVGKILLED;
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
|
|
if (act->op == LD_OP_DROP_VG && act->rt == LD_RT_VG) {
|
|
/*
|
|
* If leases are released after i/o errors begin
|
|
* but before lvmlockctl --kill, then the VG is not
|
|
* killed, but drop is still needed to clean up the
|
|
* VG, so in that case there would be a drop op without
|
|
* a preceding kill op.
|
|
*/
|
|
if (!ls->kill_vg)
|
|
log_debug("S %s received drop without kill", ls->name);
|
|
log_debug("S %s drop_vg", ls->name);
|
|
ls->thread_work = 0;
|
|
ls->thread_stop = 1;
|
|
drop_vg = 1;
|
|
break;
|
|
}
|
|
|
|
if (act->op == LD_OP_STOP) {
|
|
/* thread_stop is already set */
|
|
ls->thread_work = 0;
|
|
break;
|
|
}
|
|
|
|
if (act->op == LD_OP_FREE && act->rt == LD_RT_VG) {
|
|
/* vgremove */
|
|
log_debug("S %s checking for lockspace hosts", ls->name);
|
|
rv = lm_hosts(ls, 1);
|
|
if (rv) {
|
|
/*
|
|
* Checking for hosts here in addition to after the
|
|
* main loop allows vgremove to fail and be rerun
|
|
* after the ls is stopped on other hosts.
|
|
*/
|
|
log_error("S %s lockspace hosts %d", ls->name, rv);
|
|
list_del(&act->list);
|
|
act->result = -EBUSY;
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
ls->thread_work = 0;
|
|
ls->thread_stop = 1;
|
|
free_vg = 1;
|
|
break;
|
|
}
|
|
|
|
if (act->op == LD_OP_BUSY && act->rt == LD_RT_VG) {
|
|
log_debug("S %s checking if lockspace is busy", ls->name);
|
|
rv = lm_hosts(ls, 0);
|
|
if (rv)
|
|
act->result = -EBUSY;
|
|
else
|
|
act->result = 0;
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
|
|
if (act->op == LD_OP_RENAME_BEFORE && act->rt == LD_RT_VG) {
|
|
/* vgrename */
|
|
log_debug("S %s checking for lockspace hosts", ls->name);
|
|
rv = lm_hosts(ls, 1);
|
|
if (rv) {
|
|
log_error("S %s lockspace hosts %d", ls->name, rv);
|
|
list_del(&act->list);
|
|
act->result = -EBUSY;
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
ls->thread_work = 0;
|
|
ls->thread_stop = 1;
|
|
/* Do we want to check hosts again below like vgremove? */
|
|
break;
|
|
}
|
|
|
|
if (act->op == LD_OP_QUERY_LOCK) {
|
|
r = find_resource_act(ls, act, 0);
|
|
if (!r)
|
|
act->result = -ENOENT;
|
|
else {
|
|
act->result = 0;
|
|
act->mode = r->mode;
|
|
}
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
|
|
if (act->op == LD_OP_FIND_FREE_LOCK && act->rt == LD_RT_VG) {
|
|
uint64_t free_offset = 0;
|
|
int sector_size = 0;
|
|
int align_size = 0;
|
|
|
|
log_debug("S %s find free lock", ls->name);
|
|
rv = lm_find_free_lock(ls, &free_offset, §or_size, &align_size);
|
|
log_debug("S %s find free lock %d offset %llu sector_size %d align_size %d",
|
|
ls->name, rv, (unsigned long long)free_offset, sector_size, align_size);
|
|
ls->free_lock_offset = free_offset;
|
|
ls->free_lock_sector_size = sector_size;
|
|
ls->free_lock_align_size = align_size;
|
|
list_del(&act->list);
|
|
act->result = rv;
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
|
|
list_del(&act->list);
|
|
|
|
/* applies to all resources */
|
|
if (act->op == LD_OP_CLOSE) {
|
|
list_add(&act->list, &act_close);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* All the other op's are for locking.
|
|
* Find the specific resource that the lock op is for,
|
|
* and add the act to the resource's list of lock ops.
|
|
*
|
|
* (This creates a new resource if the one named in
|
|
* the act is not found.)
|
|
*/
|
|
|
|
r = find_resource_act(ls, act, (act->op == LD_OP_FREE) ? 1 : 0);
|
|
if (!r) {
|
|
act->result = (act->op == LD_OP_FREE) ? -ENOENT : -ENOMEM;
|
|
add_client_result(act);
|
|
continue;
|
|
}
|
|
|
|
list_add_tail(&act->list, &r->actions);
|
|
|
|
log_debug("%s:%s action %s %s", ls->name, r->name,
|
|
op_str(act->op), mode_str(act->mode));
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
|
|
/*
|
|
* Process the lock operations that have been queued for each
|
|
* resource.
|
|
*/
|
|
|
|
retry = 0;
|
|
|
|
list_for_each_entry_safe(r, r2, &ls->resources, list)
|
|
res_process(ls, r, &act_close, &retry);
|
|
|
|
list_for_each_entry_safe(act, safe, &act_close, list) {
|
|
list_del(&act->list);
|
|
free_action(act);
|
|
}
|
|
|
|
if (retry) {
|
|
ls->thread_work = 1;
|
|
usleep(LOCK_RETRY_MS * 1000);
|
|
}
|
|
}
|
|
|
|
out_rem:
|
|
log_debug("S %s stopping", ls->name);
|
|
|
|
/*
|
|
* For sanlock, we need to unlock any existing locks
|
|
* before removing the lockspace, otherwise the sanlock
|
|
* daemon will kill us when the lockspace goes away.
|
|
* For dlm, we leave with force, so all locks will
|
|
* automatically be dropped when we leave the lockspace,
|
|
* so unlocking all before leaving could be skipped.
|
|
*
|
|
* Blindly dropping all existing locks must only be
|
|
* allowed in emergency/force situations, otherwise it's
|
|
* obviously dangerous, since the lock holders are still
|
|
* operating under the assumption that they hold the lock.
|
|
* drop_vg drops all existing locks, but should only
|
|
* happen when the VG access has been forcibly and
|
|
* succesfully terminated.
|
|
*
|
|
* For vgremove of a sanlock vg, the vg lock will be held,
|
|
* and possibly the gl lock if this vg holds the gl.
|
|
* sanlock vgremove wants to unlock-rename these locks.
|
|
*/
|
|
|
|
log_debug("S %s clearing locks", ls->name);
|
|
|
|
(void) clear_locks(ls, free_vg, drop_vg);
|
|
|
|
/*
|
|
* Tell any other hosts in the lockspace to leave it
|
|
* before we remove it (for vgremove). We do this
|
|
* before leaving the lockspace ourself because we
|
|
* need to be in the lockspace to see others.
|
|
*/
|
|
|
|
if (free_vg) {
|
|
log_debug("S %s checking for lockspace hosts", ls->name);
|
|
rv = lm_hosts(ls, 1);
|
|
if (rv)
|
|
log_error("S %s other lockspace hosts %d", ls->name, rv);
|
|
}
|
|
|
|
/*
|
|
* Leave the lockspace.
|
|
*/
|
|
|
|
fail_stop_busy = 0;
|
|
|
|
rv = lm_rem_lockspace(ls, NULL, free_vg);
|
|
if (rv < 0) {
|
|
pthread_mutex_lock(&ls->mutex);
|
|
list_for_each_entry_safe(act, safe, &ls->actions, list) {
|
|
/*
|
|
* If there's a stop action then there's a path to return an error,
|
|
* and in the case of EBUSY presumably there's a chance to redo it.
|
|
*/
|
|
if ((act->op == LD_OP_STOP) && (rv == -EBUSY)) {
|
|
log_debug("S %s rem_lockspace for stop error %d", ls->name, rv);
|
|
act->result = -EBUSY;
|
|
list_del(&act->list);
|
|
list_add_tail(&act->list, &tmp_act);
|
|
ls->thread_stop = 0;
|
|
fail_stop_busy = 1;
|
|
break;
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
|
|
if (fail_stop_busy) {
|
|
pthread_mutex_lock(&client_mutex);
|
|
list_del(&act->list);
|
|
list_add_tail(&act->list, &client_results);
|
|
pthread_cond_signal(&client_cond);
|
|
pthread_mutex_unlock(&client_mutex);
|
|
goto restart;
|
|
}
|
|
}
|
|
|
|
if (rv < 0)
|
|
log_debug("S %s rem_lockspace error %d", ls->name, rv);
|
|
else
|
|
log_debug("S %s rem_lockspace done", ls->name);
|
|
|
|
out_act:
|
|
/*
|
|
* Move remaining actions to results; this will usually (always?)
|
|
* be only the stop action.
|
|
*/
|
|
pthread_mutex_lock(&ls->mutex);
|
|
list_for_each_entry_safe(act, safe, &ls->actions, list) {
|
|
if (act->op == LD_OP_FREE) {
|
|
act_op_free = act;
|
|
act->result = 0;
|
|
} else if (act->op == LD_OP_STOP)
|
|
act->result = 0;
|
|
else if (act->op == LD_OP_DROP_VG)
|
|
act->result = 0;
|
|
else if (act->op == LD_OP_RENAME_BEFORE)
|
|
act->result = 0;
|
|
else
|
|
act->result = -ENOLS;
|
|
list_del(&act->list);
|
|
list_add_tail(&act->list, &tmp_act);
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
|
|
/*
|
|
* If this freed a sanlock vg that had gl enabled, and other sanlock
|
|
* vgs exist, return a flag so the command can warn that the gl has
|
|
* been removed and may need to be enabled in another sanlock vg.
|
|
*/
|
|
|
|
if (free_vg && ls->sanlock_gl_enabled && act_op_free) {
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
if (other_sanlock_vgs_exist(ls)) {
|
|
act_op_free->flags |= LD_AF_WARN_GL_REMOVED;
|
|
gl_vg_removed = 1;
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
}
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
list_for_each_entry_safe(act, safe, &tmp_act, list) {
|
|
list_del(&act->list);
|
|
list_add_tail(&act->list, &client_results);
|
|
}
|
|
pthread_cond_signal(&client_cond);
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
ls->thread_done = 1;
|
|
ls->free_vg = free_vg;
|
|
ls->drop_vg = drop_vg;
|
|
if (ls->lm_type == LD_LM_DLM && !strcmp(ls->name, gl_lsname_dlm))
|
|
global_dlm_lockspace_exists = 0;
|
|
if (ls->lm_type == LD_LM_IDM && !strcmp(ls->name, gl_lsname_idm))
|
|
global_idm_lockspace_exists = 0;
|
|
|
|
/*
|
|
* Avoid a name collision of the same lockspace is added again before
|
|
* this thread is cleaned up. We just set ls->name to a "junk" value
|
|
* for the short period until the struct is freed. We could make it
|
|
* blank or fill it with garbage, but instead set it to REM:<name>
|
|
* to make it easier to follow progress of freeing is via log_debug.
|
|
*/
|
|
memset(tmp_name, 0, sizeof(tmp_name));
|
|
memcpy(tmp_name, "REM:", 4);
|
|
strncpy(tmp_name+4, ls->name, sizeof(tmp_name)-4);
|
|
memcpy(ls->name, tmp_name, sizeof(ls->name));
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
/* worker_thread will join this thread, and free the ls */
|
|
pthread_mutex_lock(&worker_mutex);
|
|
worker_wake = 1;
|
|
pthread_cond_signal(&worker_cond);
|
|
pthread_mutex_unlock(&worker_mutex);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int lockspaces_empty(void)
|
|
{
|
|
int rv;
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
rv = list_empty(&lockspaces);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
return rv;
|
|
}
|
|
|
|
/*
|
|
* lockspaces_mutex is locked
|
|
*
|
|
* When duplicate sanlock global locks have been seen,
|
|
* this function has a secondary job of counting the
|
|
* number of lockspaces that exist with the gl enabled,
|
|
* with the side effect of setting sanlock_gl_dup back to
|
|
* zero when the duplicates have been removed/disabled.
|
|
*/
|
|
|
|
static struct lockspace *find_lockspace_name(char *ls_name)
|
|
{
|
|
struct lockspace *ls_found = NULL;
|
|
struct lockspace *ls;
|
|
int gl_count = 0;
|
|
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if (!strcmp(ls->name, ls_name))
|
|
ls_found = ls;
|
|
|
|
if (!sanlock_gl_dup && ls_found)
|
|
return ls_found;
|
|
|
|
if (sanlock_gl_dup && ls->sanlock_gl_enabled)
|
|
gl_count++;
|
|
}
|
|
|
|
/* this is the side effect we want from this function */
|
|
if (sanlock_gl_dup && gl_count < 2)
|
|
sanlock_gl_dup = 0;
|
|
|
|
return ls_found;
|
|
}
|
|
|
|
/*
|
|
* If lvm_<vg_name> is longer than max lockspace name (64) we just ignore the
|
|
* extra characters. For sanlock vgs, the name is shortened further to 48 in
|
|
* the sanlock code.
|
|
*/
|
|
|
|
static int vg_ls_name(const char *vg_name, char *ls_name)
|
|
{
|
|
if (snprintf(ls_name, MAX_NAME, "%s%s", LVM_LS_PREFIX, vg_name) >= MAX_NAME) {
|
|
log_error("vg name too long %s", vg_name);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* FIXME: add mutex for gl_lsname_ ? */
|
|
|
|
static void gl_ls_name(char *ls_name)
|
|
{
|
|
if (gl_use_dlm)
|
|
memcpy(ls_name, gl_lsname_dlm, MAX_NAME);
|
|
else if (gl_use_sanlock)
|
|
memcpy(ls_name, gl_lsname_sanlock, MAX_NAME);
|
|
else if (gl_use_idm)
|
|
memcpy(ls_name, gl_lsname_idm, MAX_NAME);
|
|
else
|
|
memset(ls_name, 0, MAX_NAME);
|
|
}
|
|
|
|
/*
|
|
* When this function returns an error, the caller needs to deal
|
|
* with act (in the cases where act exists).
|
|
*/
|
|
|
|
static int add_lockspace_thread(const char *ls_name,
|
|
const char *vg_name,
|
|
const char *vg_uuid,
|
|
int lm_type, const char *vg_args,
|
|
struct action *act)
|
|
{
|
|
struct lockspace *ls, *ls2;
|
|
struct resource *r;
|
|
int rv;
|
|
|
|
log_debug("add_lockspace_thread %s %s version %u",
|
|
lm_str(lm_type), ls_name, act ? act->version : 0);
|
|
|
|
if (!(ls = alloc_lockspace()))
|
|
return -ENOMEM;
|
|
|
|
strncpy(ls->name, ls_name, MAX_NAME);
|
|
ls->lm_type = lm_type;
|
|
|
|
if (act) {
|
|
ls->start_client_id = act->client_id;
|
|
|
|
/*
|
|
* Copy PV list to lockspact structure, so this is
|
|
* used for VG locking for idm scheme.
|
|
*/
|
|
if (lm_type == LD_LM_IDM &&
|
|
!alloc_and_copy_pvs_path(&ls->pvs, &act->pvs)) {
|
|
free(ls);
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
|
|
if (vg_uuid)
|
|
/* coverity[buffer_size_warning] */
|
|
memccpy(ls->vg_uuid, vg_uuid, 0, 64);
|
|
|
|
if (vg_name)
|
|
dm_strncpy(ls->vg_name, vg_name, sizeof(ls->vg_name));
|
|
|
|
if (vg_args)
|
|
dm_strncpy(ls->vg_args, vg_args, sizeof(ls->vg_args));
|
|
|
|
if (act)
|
|
ls->host_id = act->host_id;
|
|
|
|
if (!(r = alloc_resource())) {
|
|
free(ls);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
r->type = LD_RT_VG;
|
|
r->mode = LD_LK_UN;
|
|
r->use_vb = 1;
|
|
strncpy(r->name, R_NAME_VG, MAX_NAME);
|
|
list_add_tail(&r->list, &ls->resources);
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
ls2 = find_lockspace_name(ls->name);
|
|
if (ls2) {
|
|
/*
|
|
* If find an existed lockspace, we need to update the PV list
|
|
* based on the latest information, and release for the old
|
|
* PV list in case it keeps stale information.
|
|
*/
|
|
free_pvs_path(&ls2->pvs);
|
|
if (lm_type == LD_LM_IDM &&
|
|
!alloc_and_copy_pvs_path(&ls2->pvs, &ls->pvs)) {
|
|
log_debug("add_lockspace_thread %s fails to allocate pvs", ls->name);
|
|
rv = -ENOMEM;
|
|
} else if (ls2->thread_stop) {
|
|
log_debug("add_lockspace_thread %s exists and stopping", ls->name);
|
|
rv = -EAGAIN;
|
|
} else if (!ls2->create_fail && !ls2->create_done) {
|
|
log_debug("add_lockspace_thread %s exists and starting", ls->name);
|
|
rv = -ESTARTING;
|
|
} else {
|
|
log_debug("add_lockspace_thread %s exists", ls->name);
|
|
rv = -EEXIST;
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
free_resource(r);
|
|
free_pvs_path(&ls->pvs);
|
|
free(ls);
|
|
return rv;
|
|
}
|
|
|
|
/*
|
|
* act will be null when this lockspace is added automatically/internally
|
|
* and not by an explicit client action that wants a result.
|
|
*/
|
|
if (act)
|
|
list_add(&act->list, &ls->actions);
|
|
|
|
if (ls->lm_type == LD_LM_DLM && !strcmp(ls->name, gl_lsname_dlm))
|
|
global_dlm_lockspace_exists = 1;
|
|
if (ls->lm_type == LD_LM_IDM && !strcmp(ls->name, gl_lsname_idm))
|
|
global_idm_lockspace_exists = 1;
|
|
list_add_tail(&ls->list, &lockspaces);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
rv = pthread_create(&ls->thread, NULL, lockspace_thread_main, ls);
|
|
if (rv < 0) {
|
|
log_error("add_lockspace_thread %s pthread error %d %d", ls->name, rv, errno);
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_del(&ls->list);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
free_resource(r);
|
|
free_pvs_path(&ls->pvs);
|
|
free(ls);
|
|
return rv;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* There is no variant for sanlock because, with sanlock, the global
|
|
* lockspace is one of the vg lockspaces.
|
|
*/
|
|
static int add_global_lockspace(char *ls_name, int lm_type,
|
|
struct action *act)
|
|
{
|
|
int rv;
|
|
|
|
if (global_dlm_lockspace_exists || global_idm_lockspace_exists)
|
|
return 0;
|
|
|
|
/*
|
|
* FIXME: if the dlm global lockspace is started without a global
|
|
* lock request, insert an internal gl sh lock request?
|
|
*/
|
|
|
|
rv = add_lockspace_thread(ls_name, NULL, NULL, lm_type, NULL, act);
|
|
if (rv < 0)
|
|
log_debug("add_global_lockspace add_lockspace_thread %d", rv);
|
|
|
|
/*
|
|
* EAGAIN may be returned for a short period because
|
|
* global_dlm_lockspace_exists is set to 0 before the
|
|
* ls is removed from the lockspaces list by the
|
|
* worker_thread.
|
|
*/
|
|
|
|
return rv;
|
|
}
|
|
|
|
/*
|
|
* When DLM or IDM locking scheme is used for global lock, if the global
|
|
* lockspace is the only one left, then stop it. This is not used for
|
|
* an explicit rem_lockspace action from the client, only for auto
|
|
* remove.
|
|
*/
|
|
static int rem_global_lockspace(char *ls_name)
|
|
{
|
|
struct lockspace *ls, *ls_gl = NULL;
|
|
int others = 0;
|
|
int rv = 0;
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if (!strcmp(ls->name, ls_name)) {
|
|
ls_gl = ls;
|
|
continue;
|
|
}
|
|
if (ls->thread_stop)
|
|
continue;
|
|
others++;
|
|
break;
|
|
}
|
|
|
|
if (others) {
|
|
rv = -EAGAIN;
|
|
goto out;
|
|
}
|
|
|
|
if (!ls_gl) {
|
|
rv = -ENOENT;
|
|
goto out;
|
|
}
|
|
|
|
ls = ls_gl;
|
|
pthread_mutex_lock(&ls->mutex);
|
|
ls->thread_stop = 1;
|
|
ls->thread_work = 1;
|
|
pthread_cond_signal(&ls->cond);
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
rv = 0;
|
|
out:
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
return rv;
|
|
}
|
|
|
|
static int add_dlm_global_lockspace(struct action *act)
|
|
{
|
|
return add_global_lockspace(gl_lsname_dlm, LD_LM_DLM, act);
|
|
}
|
|
|
|
static int rem_dlm_global_lockspace(void)
|
|
{
|
|
return rem_global_lockspace(gl_lsname_dlm);
|
|
}
|
|
|
|
static int add_idm_global_lockspace(struct action *act)
|
|
{
|
|
return add_global_lockspace(gl_lsname_idm, LD_LM_IDM, act);
|
|
}
|
|
|
|
static int rem_idm_global_lockspace(void)
|
|
{
|
|
return rem_global_lockspace(gl_lsname_idm);
|
|
}
|
|
|
|
/*
|
|
* When the first dlm lockspace is added for a vg, automatically add a separate
|
|
* dlm lockspace for the global lock.
|
|
*
|
|
* For sanlock, a separate lockspace is not used for the global lock, but the
|
|
* gl lock lives in a vg lockspace, (although it's recommended to create a
|
|
* special vg dedicated to holding the gl).
|
|
*/
|
|
|
|
static int add_lockspace(struct action *act)
|
|
{
|
|
char ls_name[MAX_NAME+1];
|
|
int rv;
|
|
|
|
memset(ls_name, 0, sizeof(ls_name));
|
|
|
|
/*
|
|
* FIXME: I don't think this is used any more.
|
|
* Remove it, or add the ability to start the global
|
|
* dlm lockspace using lvmlockctl?
|
|
*/
|
|
if (act->rt == LD_RT_GL) {
|
|
if (gl_use_dlm) {
|
|
rv = add_dlm_global_lockspace(act);
|
|
return rv;
|
|
} else if (gl_use_idm) {
|
|
rv = add_idm_global_lockspace(act);
|
|
return rv;
|
|
} else {
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
if (act->rt == LD_RT_VG) {
|
|
if (gl_use_dlm)
|
|
add_dlm_global_lockspace(NULL);
|
|
else if (gl_use_idm)
|
|
add_idm_global_lockspace(NULL);
|
|
|
|
vg_ls_name(act->vg_name, ls_name);
|
|
|
|
rv = add_lockspace_thread(ls_name, act->vg_name, act->vg_uuid,
|
|
act->lm_type, act->vg_args,
|
|
act);
|
|
if (rv)
|
|
log_debug("add_lockspace %s add_lockspace_thread %d", ls_name, rv);
|
|
return rv;
|
|
}
|
|
|
|
log_error("add_lockspace bad type %d", act->rt);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* vgchange --lock-stop vgname will lock the vg ex, then send a stop,
|
|
* so we exect to find the ex vg lock held here, and will automatically
|
|
* unlock it when stopping.
|
|
*
|
|
* Should we attempt to stop the lockspace containing the gl last?
|
|
*/
|
|
|
|
static int rem_lockspace(struct action *act)
|
|
{
|
|
struct lockspace *ls;
|
|
char ls_name[MAX_NAME+1];
|
|
int force = act->flags & LD_AF_FORCE;
|
|
int rt = act->rt;
|
|
|
|
if (act->rt == LD_RT_GL && act->lm_type != LD_LM_DLM)
|
|
return -EINVAL;
|
|
|
|
memset(ls_name, 0, sizeof(ls_name));
|
|
|
|
if (act->rt == LD_RT_GL)
|
|
gl_ls_name(ls_name);
|
|
else
|
|
vg_ls_name(act->vg_name, ls_name);
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
ls = find_lockspace_name(ls_name);
|
|
if (!ls) {
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
return -ENOLS;
|
|
}
|
|
|
|
pthread_mutex_lock(&ls->mutex);
|
|
if (ls->thread_stop) {
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
return -ESTALE;
|
|
}
|
|
|
|
if (!force && for_each_lock(ls, LOCKS_EXIST_LV)) {
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
return -EBUSY;
|
|
}
|
|
ls->thread_work = 1;
|
|
ls->thread_stop = 1;
|
|
list_add_tail(&act->list, &ls->actions);
|
|
pthread_cond_signal(&ls->cond);
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
/*
|
|
* For DLM and IDM locking scheme, the global lockspace was
|
|
* automatically added when the first vg lockspace was added,
|
|
* now reverse that by automatically removing the dlm global
|
|
* lockspace when the last vg lockspace is removed.
|
|
*/
|
|
if (rt == LD_RT_VG && gl_use_dlm)
|
|
rem_dlm_global_lockspace();
|
|
else if (rt == LD_RT_VG && gl_use_idm)
|
|
rem_idm_global_lockspace();
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* count how many lockspaces started by this client are still starting;
|
|
* the client will use this to wait for all its start operations to finish
|
|
* (START_WAIT).
|
|
*/
|
|
|
|
static int count_lockspace_starting(uint32_t client_id)
|
|
{
|
|
struct lockspace *ls;
|
|
int count = 0;
|
|
int done = 0;
|
|
int fail = 0;
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if (client_id && (ls->start_client_id != client_id))
|
|
continue;
|
|
|
|
if (!ls->create_done && !ls->create_fail) {
|
|
count++;
|
|
continue;
|
|
}
|
|
|
|
if (ls->create_done)
|
|
done++;
|
|
if (ls->create_fail)
|
|
fail++;
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
log_debug("count_lockspace_starting client %u count %d done %d fail %d",
|
|
client_id, count, done, fail);
|
|
|
|
return count;
|
|
}
|
|
|
|
/*
|
|
* Loop through all lockspaces, and:
|
|
* - if do_stop is set, stop any that are not stopped
|
|
* - if do_free is set, join any that are done stopping (and free ls)
|
|
*
|
|
* do_stop will not stop an ls with lv locks unless force is set.
|
|
*
|
|
* This function does not block or wait for anything.
|
|
*
|
|
* do_stop (no do_free):
|
|
* returns count of lockspaces that need stop (have locks and no force)
|
|
*
|
|
* do_free (no do_stop):
|
|
* returns count of lockspaces that are stopped and need freeing
|
|
*
|
|
* do_stop and do_free:
|
|
* returns sum of the previous two
|
|
*/
|
|
|
|
static int for_each_lockspace(int do_stop, int do_free, int do_force)
|
|
{
|
|
struct lockspace *ls, *safe;
|
|
int need_stop = 0;
|
|
int need_free = 0;
|
|
int stop_count = 0;
|
|
int free_count = 0;
|
|
int done;
|
|
int stop;
|
|
int perrno;
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
|
|
if (do_stop) {
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
|
|
pthread_mutex_lock(&ls->mutex);
|
|
if (ls->thread_stop) {
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
continue;
|
|
}
|
|
|
|
if (!do_force && for_each_lock(ls, LOCKS_EXIST_ANY)) {
|
|
need_stop++;
|
|
} else {
|
|
ls->thread_work = 1;
|
|
ls->thread_stop = 1;
|
|
pthread_cond_signal(&ls->cond);
|
|
stop_count++;
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
}
|
|
}
|
|
|
|
if (do_free) {
|
|
list_for_each_entry_safe(ls, safe, &lockspaces, list) {
|
|
|
|
pthread_mutex_lock(&ls->mutex);
|
|
done = ls->thread_done;
|
|
stop = ls->thread_stop;
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
|
|
/* This ls has locks and force is not set. */
|
|
if (!stop)
|
|
continue;
|
|
|
|
/*
|
|
* Once thread_done is set, we know that the lockspace_thread
|
|
* will not be using/touching the ls struct. Any other
|
|
* thread touches the ls struct under lockspaces_mutex.
|
|
*/
|
|
if (done) {
|
|
if ((perrno = pthread_join(ls->thread, NULL)))
|
|
log_error("pthread_join error %d", perrno);
|
|
|
|
list_del(&ls->list);
|
|
|
|
/* FIXME: will free_vg ever not be set? */
|
|
|
|
log_debug("free ls %s", ls->name);
|
|
|
|
if (ls->free_vg) {
|
|
/* In future we may need to free ls->actions here */
|
|
free_ls_resources(ls);
|
|
free_pvs_path(&ls->pvs);
|
|
free(ls);
|
|
free_count++;
|
|
}
|
|
} else {
|
|
need_free++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (list_empty(&lockspaces)) {
|
|
if (!gl_type_static) {
|
|
gl_use_dlm = 0;
|
|
gl_use_sanlock = 0;
|
|
gl_use_idm = 0;
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
if (stop_count || free_count || need_stop || need_free) {
|
|
log_debug("for_each_lockspace do_stop %d do_free %d "
|
|
"stop_count %d free_count %d need_stop %d need_free %d",
|
|
do_stop, do_free, stop_count, free_count, need_stop, need_free);
|
|
}
|
|
|
|
return need_stop + need_free;
|
|
}
|
|
|
|
/*
|
|
* This is only called when the daemon is exiting so the sleep/retry
|
|
* loop doesn't have any adverse impact.
|
|
*/
|
|
|
|
static void for_each_lockspace_retry(int do_stop, int do_free, int do_force)
|
|
{
|
|
int count;
|
|
|
|
while (1) {
|
|
count = for_each_lockspace(do_stop, do_free, do_force);
|
|
if (!count)
|
|
break;
|
|
|
|
log_debug("for_each_lockspace_retry remaining %d", count);
|
|
sleep(1);
|
|
}
|
|
}
|
|
|
|
static int work_init_vg(struct action *act)
|
|
{
|
|
struct lockspace *ls;
|
|
char ls_name[MAX_NAME+1];
|
|
int rv = 0;
|
|
|
|
memset(ls_name, 0, sizeof(ls_name));
|
|
|
|
vg_ls_name(act->vg_name, ls_name);
|
|
|
|
/*
|
|
* The max dlm ls name is 64 and the max sanlock ls name is 48. So,
|
|
* after the "lvm_" prefix, only the first 60/44 characters of the VG
|
|
* name are used for the lockspace name. This will cause a collision
|
|
* in the lock manager if two different VG names have the first 60/44
|
|
* chars in common. At the time of vgcreate (here), check if any other
|
|
* VG's are known that would collide. If the collision is not detected
|
|
* at vgcreate time, it will be detected at start time and add_lockspace
|
|
* will fail for the second of the two matching ls names.
|
|
*/
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if ((ls->lm_type == LD_LM_SANLOCK) && !strncmp(ls->name, ls_name, 48)) {
|
|
rv = -EEXIST;
|
|
break;
|
|
}
|
|
if ((ls->lm_type == LD_LM_DLM) && !strcmp(ls->name, ls_name)) {
|
|
rv = -EEXIST;
|
|
break;
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
if (rv == -EEXIST) {
|
|
log_error("Existing lockspace name %s matches new %s VG names %s %s",
|
|
ls->name, ls_name, ls->vg_name, act->vg_name);
|
|
return rv;
|
|
}
|
|
|
|
if (act->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args);
|
|
else if (act->lm_type == LD_LM_DLM)
|
|
rv = lm_init_vg_dlm(ls_name, act->vg_name, act->flags, act->vg_args);
|
|
else if (act->lm_type == LD_LM_IDM)
|
|
/* Non't do anything for IDM when initialize VG */
|
|
rv = 0;
|
|
else
|
|
rv = -EINVAL;
|
|
|
|
return rv;
|
|
}
|
|
|
|
static int work_rename_vg(struct action *act)
|
|
{
|
|
char ls_name[MAX_NAME+1];
|
|
int rv = 0;
|
|
|
|
memset(ls_name, 0, sizeof(ls_name));
|
|
|
|
vg_ls_name(act->vg_name, ls_name);
|
|
|
|
if (act->lm_type == LD_LM_SANLOCK)
|
|
rv = lm_rename_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args);
|
|
else if (act->lm_type == LD_LM_DLM)
|
|
return 0;
|
|
else
|
|
rv = -EINVAL;
|
|
|
|
return rv;
|
|
}
|
|
|
|
static void work_test_gl(void)
|
|
{
|
|
struct lockspace *ls;
|
|
int is_enabled = 0;
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if (ls->lm_type != LD_LM_SANLOCK)
|
|
continue;
|
|
|
|
pthread_mutex_lock(&ls->mutex);
|
|
if (ls->create_done && !ls->thread_stop) {
|
|
is_enabled = lm_gl_is_enabled(ls);
|
|
if (is_enabled) {
|
|
log_debug("S %s worker found gl_is_enabled", ls->name);
|
|
dm_strncpy(gl_lsname_sanlock, ls->name, sizeof(gl_lsname_sanlock));
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
|
|
if (is_enabled)
|
|
break;
|
|
}
|
|
|
|
if (!is_enabled)
|
|
log_debug("worker found no gl_is_enabled");
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
}
|
|
|
|
static int work_init_lv(struct action *act)
|
|
{
|
|
struct lockspace *ls;
|
|
char ls_name[MAX_NAME+1];
|
|
char vg_args[MAX_ARGS+1];
|
|
char lv_args[MAX_ARGS+1];
|
|
uint64_t free_offset = 0;
|
|
int sector_size = 0;
|
|
int align_size = 0;
|
|
int lm_type = 0;
|
|
int rv = 0;
|
|
|
|
memset(ls_name, 0, sizeof(ls_name));
|
|
memset(vg_args, 0, sizeof(vg_args));
|
|
memset(lv_args, 0, sizeof(lv_args));
|
|
|
|
vg_ls_name(act->vg_name, ls_name);
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
ls = find_lockspace_name(ls_name);
|
|
if (ls) {
|
|
lm_type = ls->lm_type;
|
|
memcpy(vg_args, ls->vg_args, MAX_ARGS);
|
|
free_offset = ls->free_lock_offset;
|
|
sector_size = ls->free_lock_sector_size;
|
|
align_size = ls->free_lock_align_size;
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
if (!ls) {
|
|
lm_type = act->lm_type;
|
|
memcpy(vg_args, act->vg_args, MAX_ARGS);
|
|
}
|
|
|
|
if (act->lm_type != lm_type) {
|
|
log_error("init_lv ls_name %s wrong lm_type %d %d",
|
|
ls_name, act->lm_type, lm_type);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (lm_type == LD_LM_SANLOCK) {
|
|
rv = lm_init_lv_sanlock(ls_name, act->vg_name, act->lv_uuid,
|
|
vg_args, lv_args, sector_size, align_size, free_offset);
|
|
|
|
memcpy(act->lv_args, lv_args, MAX_ARGS);
|
|
return rv;
|
|
|
|
} else if (act->lm_type == LD_LM_DLM) {
|
|
return 0;
|
|
} else if (act->lm_type == LD_LM_IDM) {
|
|
return 0;
|
|
} else {
|
|
log_error("init_lv ls_name %s bad lm_type %d", ls_name, act->lm_type);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* When an action is queued for the worker_thread, it is processed right away.
|
|
* After processing, some actions need to be retried again in a short while.
|
|
* These actions are put on the delayed_list, and the worker_thread will
|
|
* process these delayed actions again in SHORT_DELAY_PERIOD.
|
|
*/
|
|
|
|
#define SHORT_DELAY_PERIOD 2
|
|
#define LONG_DELAY_PERIOD 60
|
|
|
|
static void *worker_thread_main(void *arg_in)
|
|
{
|
|
struct list_head delayed_list;
|
|
struct timespec ts;
|
|
struct action *act, *safe;
|
|
uint64_t last_delayed_time = 0;
|
|
int delay_sec = LONG_DELAY_PERIOD;
|
|
int rv;
|
|
|
|
INIT_LIST_HEAD(&delayed_list);
|
|
|
|
while (1) {
|
|
pthread_mutex_lock(&worker_mutex);
|
|
if (clock_gettime(CLOCK_REALTIME, &ts)) {
|
|
log_error("clock_gettime failed.");
|
|
ts.tv_sec = ts.tv_nsec = 0;
|
|
}
|
|
ts.tv_sec += delay_sec;
|
|
rv = 0;
|
|
act = NULL;
|
|
|
|
while (list_empty(&worker_list) && !worker_stop && !worker_wake && !rv) {
|
|
rv = pthread_cond_timedwait(&worker_cond, &worker_mutex, &ts);
|
|
}
|
|
worker_wake = 0;
|
|
|
|
if (worker_stop) {
|
|
pthread_mutex_unlock(&worker_mutex);
|
|
goto out;
|
|
}
|
|
|
|
if (!list_empty(&worker_list)) {
|
|
act = list_first_entry(&worker_list, struct action, list);
|
|
list_del(&act->list);
|
|
}
|
|
pthread_mutex_unlock(&worker_mutex);
|
|
|
|
/*
|
|
* Do new work actions before processing delayed work actions.
|
|
*/
|
|
|
|
if (!act)
|
|
goto delayed_work;
|
|
|
|
if (act->op == LD_OP_RUNNING_LM) {
|
|
int run_sanlock = lm_is_running_sanlock();
|
|
int run_dlm = lm_is_running_dlm();
|
|
int run_idm = lm_is_running_idm();
|
|
|
|
if (daemon_test) {
|
|
run_sanlock = gl_use_sanlock;
|
|
run_dlm = gl_use_dlm;
|
|
run_idm = gl_use_idm;
|
|
}
|
|
|
|
/*
|
|
* It's not possible to enable multiple locking schemes
|
|
* for global lock, otherwise, it must be conflict and
|
|
* reports it!
|
|
*/
|
|
if ((run_sanlock + run_dlm + run_idm) >= 2)
|
|
act->result = -EXFULL;
|
|
else if (!run_sanlock && !run_dlm && !run_idm)
|
|
act->result = -ENOLCK;
|
|
else if (run_sanlock)
|
|
act->result = LD_LM_SANLOCK;
|
|
else if (run_dlm)
|
|
act->result = LD_LM_DLM;
|
|
else if (run_idm)
|
|
act->result = LD_LM_IDM;
|
|
add_client_result(act);
|
|
|
|
} else if ((act->op == LD_OP_LOCK) && (act->flags & LD_AF_SEARCH_LS)) {
|
|
/*
|
|
* worker_thread used as a helper to search existing
|
|
* sanlock vgs for an enabled gl.
|
|
*/
|
|
log_debug("work search for gl");
|
|
work_test_gl();
|
|
|
|
/* try again to find a gl lockspace for this act */
|
|
rv = add_lock_action(act);
|
|
if (rv < 0) {
|
|
act->result = rv;
|
|
add_client_result(act);
|
|
}
|
|
|
|
} else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_VG)) {
|
|
log_debug("work init_vg %s", act->vg_name);
|
|
act->result = work_init_vg(act);
|
|
add_client_result(act);
|
|
|
|
} else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_LV)) {
|
|
log_debug("work init_lv %s/%s uuid %s", act->vg_name, act->lv_name, act->lv_uuid);
|
|
act->result = work_init_lv(act);
|
|
add_client_result(act);
|
|
|
|
} else if ((act->op == LD_OP_RENAME_FINAL) && (act->rt == LD_RT_VG)) {
|
|
log_debug("work rename_vg %s", act->vg_name);
|
|
act->result = work_rename_vg(act);
|
|
add_client_result(act);
|
|
|
|
} else if (act->op == LD_OP_START_WAIT) {
|
|
act->result = count_lockspace_starting(0);
|
|
if (!act->result)
|
|
add_client_result(act);
|
|
else
|
|
list_add(&act->list, &delayed_list);
|
|
|
|
} else if (act->op == LD_OP_STOP_ALL) {
|
|
act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
|
|
if (!act->result || !(act->flags & LD_AF_WAIT))
|
|
add_client_result(act);
|
|
else
|
|
list_add(&act->list, &delayed_list);
|
|
|
|
} else if (act->op == LD_OP_REFRESH_LV) {
|
|
log_debug("work refresh_lv %s %s", act->lv_uuid, act->path);
|
|
rv = lm_refresh_lv_start_dlm(act);
|
|
if (rv < 0) {
|
|
act->result = rv;
|
|
add_client_result(act);
|
|
} else
|
|
list_add(&act->list, &delayed_list);
|
|
|
|
} else {
|
|
log_error("work unknown op %d", act->op);
|
|
act->result = -EINVAL;
|
|
add_client_result(act);
|
|
}
|
|
|
|
delayed_work:
|
|
/*
|
|
* We may want to track retry times per action so that
|
|
* we can delay different actions by different amounts.
|
|
*/
|
|
|
|
if (monotime() - last_delayed_time < SHORT_DELAY_PERIOD) {
|
|
delay_sec = 1;
|
|
continue;
|
|
}
|
|
last_delayed_time = monotime();
|
|
|
|
list_for_each_entry_safe(act, safe, &delayed_list, list) {
|
|
if (act->op == LD_OP_START_WAIT) {
|
|
log_debug("work delayed start_wait for client %u", act->client_id);
|
|
act->result = count_lockspace_starting(0);
|
|
if (!act->result) {
|
|
list_del(&act->list);
|
|
add_client_result(act);
|
|
}
|
|
|
|
} else if (act->op == LD_OP_STOP_ALL) {
|
|
log_debug("work delayed stop_all");
|
|
act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
|
|
if (!act->result) {
|
|
list_del(&act->list);
|
|
act->result = 0;
|
|
add_client_result(act);
|
|
}
|
|
|
|
} else if (act->op == LD_OP_REFRESH_LV) {
|
|
log_debug("work delayed refresh_lv");
|
|
rv = lm_refresh_lv_check_dlm(act);
|
|
if (!rv) {
|
|
list_del(&act->list);
|
|
act->result = 0;
|
|
add_client_result(act);
|
|
} else if ((rv < 0) && (rv != -EAGAIN)) {
|
|
list_del(&act->list);
|
|
act->result = rv;
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This is not explicitly queued work, and not delayed work,
|
|
* but lockspace thread cleanup that's needed when a
|
|
* lockspace has been stopped/removed or failed to start.
|
|
*/
|
|
|
|
for_each_lockspace(NO_STOP, DO_FREE, NO_FORCE);
|
|
|
|
if (list_empty(&delayed_list))
|
|
delay_sec = LONG_DELAY_PERIOD;
|
|
else
|
|
delay_sec = 1;
|
|
}
|
|
out:
|
|
list_for_each_entry_safe(act, safe, &delayed_list, list) {
|
|
list_del(&act->list);
|
|
free_action(act);
|
|
}
|
|
|
|
pthread_mutex_lock(&worker_mutex);
|
|
list_for_each_entry_safe(act, safe, &worker_list, list) {
|
|
list_del(&act->list);
|
|
free_action(act);
|
|
}
|
|
pthread_mutex_unlock(&worker_mutex);
|
|
return NULL;
|
|
}
|
|
|
|
static int setup_worker_thread(void)
|
|
{
|
|
int rv;
|
|
|
|
INIT_LIST_HEAD(&worker_list);
|
|
|
|
pthread_mutex_init(&worker_mutex, NULL);
|
|
pthread_cond_init(&worker_cond, NULL);
|
|
|
|
rv = pthread_create(&worker_thread, NULL, worker_thread_main, NULL);
|
|
if (rv)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
static void close_worker_thread(void)
|
|
{
|
|
int perrno;
|
|
|
|
pthread_mutex_lock(&worker_mutex);
|
|
worker_stop = 1;
|
|
pthread_cond_signal(&worker_cond);
|
|
pthread_mutex_unlock(&worker_mutex);
|
|
|
|
if ((perrno = pthread_join(worker_thread, NULL)))
|
|
log_error("pthread_join worker_thread error %d", perrno);
|
|
}
|
|
|
|
/* client_mutex is locked */
|
|
static struct client *find_client_work(void)
|
|
{
|
|
struct client *cl;
|
|
|
|
list_for_each_entry(cl, &client_list, list) {
|
|
if (cl->recv || cl->dead)
|
|
return cl;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* client_mutex is locked */
|
|
static struct client *find_client_id(uint32_t id)
|
|
{
|
|
struct client *cl;
|
|
|
|
list_for_each_entry(cl, &client_list, list) {
|
|
if (cl->id == id)
|
|
return cl;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* client_mutex is locked */
|
|
static struct client *find_client_pi(int pi)
|
|
{
|
|
struct client *cl;
|
|
|
|
list_for_each_entry(cl, &client_list, list) {
|
|
if (cl->pi == pi)
|
|
return cl;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* wake up poll() because we have added an fd
|
|
* back into pollfd and poll() needs to be restarted
|
|
* to recognize it.
|
|
*/
|
|
static void restart_poll(void)
|
|
{
|
|
int rv;
|
|
rv = write(restart_fds[1], "w", 1);
|
|
if (!rv || rv < 0)
|
|
log_debug("restart_poll write %d", errno);
|
|
}
|
|
|
|
/* poll will take requests from client again, cl->mutex must be held */
|
|
static void client_resume(struct client *cl)
|
|
{
|
|
if (cl->dead)
|
|
return;
|
|
|
|
if (!cl->poll_ignore || cl->fd == -1 || cl->pi == -1) {
|
|
/* shouldn't happen */
|
|
log_error("client_resume %u bad state ig %d fd %d pi %d",
|
|
cl->id, cl->poll_ignore, cl->fd, cl->pi);
|
|
return;
|
|
}
|
|
|
|
pthread_mutex_lock(&pollfd_mutex);
|
|
if (pollfd[cl->pi].fd != POLL_FD_IGNORE) {
|
|
log_error("client_resume %u pi %d fd %d not IGNORE",
|
|
cl->id, cl->pi, cl->fd);
|
|
}
|
|
pollfd[cl->pi].fd = cl->fd;
|
|
pollfd[cl->pi].events = POLLIN;
|
|
pthread_mutex_unlock(&pollfd_mutex);
|
|
|
|
restart_poll();
|
|
}
|
|
|
|
/* called from client_thread, cl->mutex is held */
|
|
static int client_send_result(struct client *cl, struct action *act)
|
|
{
|
|
response res;
|
|
char result_flags[128];
|
|
int dump_len = 0;
|
|
int dump_fd = -1;
|
|
int rv = 0;
|
|
|
|
if (cl->dead) {
|
|
log_debug("send cl %u skip dead", cl->id);
|
|
return -1;
|
|
}
|
|
|
|
memset(result_flags, 0, sizeof(result_flags));
|
|
|
|
buffer_init(&res.buffer);
|
|
|
|
/*
|
|
* EUNATCH is returned when the global lock existed,
|
|
* but had been disabled when we tried to lock it,
|
|
* so we removed it, and no longer have a gl to lock.
|
|
*/
|
|
|
|
if (act->result == -EUNATCH)
|
|
act->result = -ENOLS;
|
|
|
|
/*
|
|
* init_vg with dlm|sanlock returns vg_args
|
|
* init_lv with sanlock returns lv_args
|
|
*/
|
|
|
|
if (act->result == -ENOLS) {
|
|
/*
|
|
* The lockspace could not be found, in which case
|
|
* the caller may want to know if any lockspaces exist
|
|
* or if lockspaces exist, but not one with the global lock.
|
|
* Given this detail, it may be able to procede without
|
|
* the lock.
|
|
*/
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
if (list_empty(&lockspaces))
|
|
strcat(result_flags, "NO_LOCKSPACES,");
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
if (gl_use_sanlock) {
|
|
if (!gl_lsname_sanlock[0])
|
|
strcat(result_flags, "NO_GL_LS,");
|
|
} else if (gl_use_dlm) {
|
|
if (!gl_lsname_dlm[0])
|
|
strcat(result_flags, "NO_GL_LS,");
|
|
} else if (gl_use_idm) {
|
|
if (!gl_lsname_idm[0])
|
|
strcat(result_flags, "NO_GL_LS,");
|
|
} else {
|
|
int found_lm = 0;
|
|
|
|
if (lm_support_dlm() && lm_is_running_dlm())
|
|
found_lm++;
|
|
if (lm_support_sanlock() && lm_is_running_sanlock())
|
|
found_lm++;
|
|
if (lm_support_idm() && lm_is_running_idm())
|
|
found_lm++;
|
|
|
|
if (!found_lm)
|
|
strcat(result_flags, "NO_GL_LS,NO_LM");
|
|
else
|
|
strcat(result_flags, "NO_GL_LS");
|
|
}
|
|
}
|
|
|
|
if (act->flags & LD_AF_DUP_GL_LS)
|
|
strcat(result_flags, "DUP_GL_LS,");
|
|
|
|
if ((act->flags & LD_AF_WARN_GL_REMOVED) || gl_vg_removed)
|
|
strcat(result_flags, "WARN_GL_REMOVED,");
|
|
|
|
if (act->flags & LD_AF_SH_EXISTS)
|
|
strcat(result_flags, "SH_EXISTS,");
|
|
|
|
if (act->op == LD_OP_INIT) {
|
|
/*
|
|
* init is a special case where lock args need
|
|
* to be passed back to the client.
|
|
*/
|
|
const char *vg_args = "none";
|
|
const char *lv_args = "none";
|
|
|
|
if (act->vg_args[0])
|
|
vg_args = act->vg_args;
|
|
|
|
if (act->lv_args[0])
|
|
lv_args = act->lv_args;
|
|
|
|
log_debug("send %s[%d] cl %u %s %s rv %d vg_args %s lv_args %s",
|
|
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
|
|
op_str(act->op), rt_str(act->rt),
|
|
act->result, vg_args ? vg_args : "", lv_args ? lv_args : "");
|
|
|
|
res = daemon_reply_simple("OK",
|
|
"op = " FMTd64, (int64_t)act->op,
|
|
"op_result = " FMTd64, (int64_t) act->result,
|
|
"lm_result = " FMTd64, (int64_t) act->lm_rv,
|
|
"vg_lock_args = %s", vg_args,
|
|
"lv_lock_args = %s", lv_args,
|
|
"result_flags = %s", result_flags[0] ? result_flags : "none",
|
|
NULL);
|
|
|
|
} else if (act->op == LD_OP_QUERY_LOCK) {
|
|
|
|
log_debug("send %s[%d] cl %u %s %s rv %d mode %d",
|
|
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
|
|
op_str(act->op), rt_str(act->rt),
|
|
act->result, act->mode);
|
|
|
|
res = daemon_reply_simple("OK",
|
|
"op = " FMTd64, (int64_t)act->op,
|
|
"op_result = " FMTd64, (int64_t) act->result,
|
|
"lock_type = %s", lm_str(act->lm_type),
|
|
"mode = %s", mode_str(act->mode),
|
|
NULL);
|
|
|
|
} else if (act->op == LD_OP_DUMP_LOG || act->op == LD_OP_DUMP_INFO) {
|
|
/*
|
|
* lvmlockctl creates the unix socket then asks us to write to it.
|
|
* FIXME: move processing this to a new dedicated query thread to
|
|
* avoid having a large data dump interfere with normal operation
|
|
* of the client thread?
|
|
*/
|
|
|
|
dump_fd = setup_dump_socket();
|
|
if (dump_fd < 0)
|
|
act->result = dump_fd;
|
|
else if (act->op == LD_OP_DUMP_LOG)
|
|
act->result = dump_log(&dump_len);
|
|
else if (act->op == LD_OP_DUMP_INFO)
|
|
act->result = dump_info(&dump_len);
|
|
else
|
|
act->result = -EINVAL;
|
|
|
|
log_debug("send %s[%d] cl %u dump result %d dump_len %d",
|
|
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
|
|
act->result, dump_len);
|
|
|
|
res = daemon_reply_simple("OK",
|
|
"result = " FMTd64, (int64_t) act->result,
|
|
"dump_len = " FMTd64, (int64_t) dump_len,
|
|
NULL);
|
|
} else {
|
|
/*
|
|
* A normal reply.
|
|
*/
|
|
|
|
log_debug("send %s[%d] cl %u %s %s rv %d %s %s",
|
|
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
|
|
op_mode_str(act->op, act->mode), rt_str(act->rt),
|
|
act->result, (act->result == -ENOLS) ? "ENOLS" : "", result_flags);
|
|
|
|
res = daemon_reply_simple("OK",
|
|
"op = " FMTd64, (int64_t) act->op,
|
|
"lock_type = %s", lm_str(act->lm_type),
|
|
"op_result = " FMTd64, (int64_t) act->result,
|
|
"lm_result = " FMTd64, (int64_t) act->lm_rv,
|
|
"result_flags = %s", result_flags[0] ? result_flags : "none",
|
|
NULL);
|
|
}
|
|
|
|
if (!buffer_write(cl->fd, &res.buffer)) {
|
|
rv = -errno;
|
|
if (rv >= 0)
|
|
rv = -1;
|
|
log_debug("send cl %u fd %d error %d", cl->id, cl->fd, rv);
|
|
}
|
|
|
|
buffer_destroy(&res.buffer);
|
|
|
|
client_resume(cl);
|
|
|
|
if (dump_fd >= 0) {
|
|
/* To avoid deadlock, send data here after the reply. */
|
|
send_dump_buf(dump_fd, dump_len);
|
|
if (close(dump_fd))
|
|
log_error("failed to close dump socket %d", dump_fd);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
/* called from client_thread */
|
|
static void client_purge(struct client *cl)
|
|
{
|
|
struct lockspace *ls;
|
|
struct action *act;
|
|
|
|
/*
|
|
* If the client made no lock requests, there can be
|
|
* no locks to release for it.
|
|
*/
|
|
if (!cl->lock_ops)
|
|
return;
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
if (!(act = alloc_action()))
|
|
continue;
|
|
|
|
act->op = LD_OP_CLOSE;
|
|
act->client_id = cl->id;
|
|
|
|
pthread_mutex_lock(&ls->mutex);
|
|
if (!ls->thread_stop) {
|
|
list_add_tail(&act->list, &ls->actions);
|
|
ls->thread_work = 1;
|
|
pthread_cond_signal(&ls->cond);
|
|
} else {
|
|
free_action(act);
|
|
}
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
}
|
|
|
|
static int add_lock_action(struct action *act)
|
|
{
|
|
struct lockspace *ls = NULL;
|
|
char ls_name[MAX_NAME+1];
|
|
|
|
memset(ls_name, 0, sizeof(ls_name));
|
|
|
|
/*
|
|
* Determine which lockspace this action is for, and set ls_name.
|
|
*/
|
|
|
|
if (act->rt == LD_RT_GL) {
|
|
/* Global lock is requested */
|
|
if (gl_use_sanlock && (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE)) {
|
|
vg_ls_name(act->vg_name, ls_name);
|
|
} else {
|
|
if (!gl_use_dlm && !gl_use_sanlock && !gl_use_idm) {
|
|
int run_sanlock = lm_is_running_sanlock();
|
|
int run_dlm = lm_is_running_dlm();
|
|
int run_idm = lm_is_running_idm();
|
|
|
|
if (run_sanlock + run_dlm + run_idm >= 2) {
|
|
log_error("global lock op %s mode %s: multiple lock managers running sanlock=%d dlm=%d idm=%d",
|
|
op_str(act->op), mode_str(act->mode), run_sanlock, run_dlm, run_idm);
|
|
} else if (!run_sanlock && !run_dlm && !run_idm) {
|
|
log_debug("global lock op %s mode %s: no lock manager running",
|
|
op_str(act->op), mode_str(act->mode));
|
|
} else {
|
|
if (run_dlm)
|
|
gl_use_dlm = 1;
|
|
else if (run_sanlock)
|
|
gl_use_sanlock = 1;
|
|
else if (run_idm)
|
|
gl_use_idm = 1;
|
|
log_debug("global lock op %s mode %s: gl_use_sanlock %d gl_use_dlm %d gl_use_idm %d",
|
|
op_str(act->op), mode_str(act->mode), gl_use_sanlock, gl_use_dlm, gl_use_idm);
|
|
}
|
|
}
|
|
gl_ls_name(ls_name);
|
|
}
|
|
} else {
|
|
/* VG lock is requested */
|
|
vg_ls_name(act->vg_name, ls_name);
|
|
}
|
|
|
|
retry:
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
if (ls_name[0])
|
|
ls = find_lockspace_name(ls_name);
|
|
if (!ls) {
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
if (act->op == LD_OP_UPDATE && act->rt == LD_RT_VG) {
|
|
log_debug("lockspace \"%s\" not found ignored for vg update", ls_name);
|
|
return -ENOLS;
|
|
|
|
} else if (act->flags & LD_AF_SEARCH_LS) {
|
|
/*
|
|
* Fail if we've already tried searching for the lockspace.
|
|
*/
|
|
log_debug("lockspace \"%s\" not found after search", ls_name);
|
|
return -ENOLS;
|
|
|
|
} else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && gl_use_sanlock) {
|
|
/*
|
|
* The sanlock global lock may have been enabled in an existing VG,
|
|
* so search existing VGs for an enabled global lock.
|
|
*/
|
|
log_debug("lockspace \"%s\" not found for sanlock gl, searching...", ls_name);
|
|
act->flags |= LD_AF_SEARCH_LS;
|
|
add_work_action(act);
|
|
return 0;
|
|
|
|
} else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && act->mode != LD_LK_UN && gl_use_dlm) {
|
|
/*
|
|
* Automatically start the dlm global lockspace when
|
|
* a command tries to acquire the global lock.
|
|
*/
|
|
log_debug("lockspace \"%s\" not found for dlm gl, adding...", ls_name);
|
|
act->flags |= LD_AF_SEARCH_LS;
|
|
act->flags |= LD_AF_WAIT_STARTING;
|
|
add_dlm_global_lockspace(NULL);
|
|
goto retry;
|
|
|
|
} else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && act->mode != LD_LK_UN && gl_use_idm) {
|
|
/*
|
|
* Automatically start the idm global lockspace when
|
|
* a command tries to acquire the global lock.
|
|
*/
|
|
log_debug("lockspace \"%s\" not found for idm gl, adding...", ls_name);
|
|
act->flags |= LD_AF_SEARCH_LS;
|
|
act->flags |= LD_AF_WAIT_STARTING;
|
|
add_idm_global_lockspace(NULL);
|
|
goto retry;
|
|
|
|
} else if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
|
|
log_debug("lockspace \"%s\" not found for unlock ignored", ls_name);
|
|
return -ENOLS;
|
|
|
|
} else {
|
|
log_debug("lockspace \"%s\" not found", ls_name);
|
|
return -ENOLS;
|
|
}
|
|
}
|
|
|
|
if (act->lm_type == LD_LM_NONE) {
|
|
/* return to the command the type we are using */
|
|
act->lm_type = ls->lm_type;
|
|
} else if (act->lm_type != ls->lm_type) {
|
|
/* should not happen */
|
|
log_error("S %s add_lock_action bad lm_type %d ls %d",
|
|
ls_name, act->lm_type, ls->lm_type);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
return -EINVAL;
|
|
}
|
|
|
|
pthread_mutex_lock(&ls->mutex);
|
|
if (ls->thread_stop) {
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
log_error("lockspace is stopping %s", ls_name);
|
|
return -ESTALE;
|
|
}
|
|
|
|
if (!ls->create_fail && !ls->create_done && !(act->flags & LD_AF_WAIT_STARTING)) {
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
log_debug("lockspace is starting %s", ls_name);
|
|
return -ESTARTING;
|
|
}
|
|
|
|
list_add_tail(&act->list, &ls->actions);
|
|
ls->thread_work = 1;
|
|
pthread_cond_signal(&ls->cond);
|
|
pthread_mutex_unlock(&ls->mutex);
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
/* lockspace_thread_main / res_process take it from here */
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int str_to_op_rt(const char *req_name, int *op, int *rt)
|
|
{
|
|
if (!req_name)
|
|
goto out;
|
|
|
|
if (!strcmp(req_name, "hello")) {
|
|
*op = LD_OP_HELLO;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "quit")) {
|
|
*op = LD_OP_QUIT;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "info")) {
|
|
*op = LD_OP_DUMP_INFO;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "dump")) {
|
|
*op = LD_OP_DUMP_LOG;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "init_vg")) {
|
|
*op = LD_OP_INIT;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "init_lv")) {
|
|
*op = LD_OP_INIT;
|
|
*rt = LD_RT_LV;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "free_vg")) {
|
|
*op = LD_OP_FREE;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "busy_vg")) {
|
|
*op = LD_OP_BUSY;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "free_lv")) {
|
|
*op = LD_OP_FREE;
|
|
*rt = LD_RT_LV;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "start_vg")) {
|
|
*op = LD_OP_START;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "stop_vg")) {
|
|
*op = LD_OP_STOP;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "start_wait")) {
|
|
*op = LD_OP_START_WAIT;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "stop_all")) {
|
|
*op = LD_OP_STOP_ALL;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "lock_gl")) {
|
|
*op = LD_OP_LOCK;
|
|
*rt = LD_RT_GL;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "lock_vg")) {
|
|
*op = LD_OP_LOCK;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "lock_lv")) {
|
|
*op = LD_OP_LOCK;
|
|
*rt = LD_RT_LV;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "vg_update")) {
|
|
*op = LD_OP_UPDATE;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "enable_gl")) {
|
|
*op = LD_OP_ENABLE;
|
|
*rt = LD_RT_GL;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "disable_gl")) {
|
|
*op = LD_OP_DISABLE;
|
|
*rt = LD_RT_GL;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "rename_vg_before")) {
|
|
*op = LD_OP_RENAME_BEFORE;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "rename_vg_final")) {
|
|
*op = LD_OP_RENAME_FINAL;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "running_lm")) {
|
|
*op = LD_OP_RUNNING_LM;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "query_lock_vg")) {
|
|
*op = LD_OP_QUERY_LOCK;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "query_lock_lv")) {
|
|
*op = LD_OP_QUERY_LOCK;
|
|
*rt = LD_RT_LV;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "find_free_lock")) {
|
|
*op = LD_OP_FIND_FREE_LOCK;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "kill_vg")) {
|
|
*op = LD_OP_KILL_VG;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "drop_vg")) {
|
|
*op = LD_OP_DROP_VG;
|
|
*rt = LD_RT_VG;
|
|
return 0;
|
|
}
|
|
if (!strcmp(req_name, "refresh_lv")) {
|
|
*op = LD_OP_REFRESH_LV;
|
|
*rt = 0;
|
|
return 0;
|
|
}
|
|
out:
|
|
return -1;
|
|
}
|
|
|
|
static int str_to_mode(const char *str)
|
|
{
|
|
if (!str)
|
|
goto out;
|
|
if (!strcmp(str, "un"))
|
|
return LD_LK_UN;
|
|
if (!strcmp(str, "nl"))
|
|
return LD_LK_NL;
|
|
if (!strcmp(str, "sh"))
|
|
return LD_LK_SH;
|
|
if (!strcmp(str, "ex"))
|
|
return LD_LK_EX;
|
|
out:
|
|
return LD_LK_IV;
|
|
}
|
|
|
|
static int str_to_lm(const char *str)
|
|
{
|
|
if (!str || !strcmp(str, "none"))
|
|
return LD_LM_NONE;
|
|
if (!strcmp(str, "sanlock"))
|
|
return LD_LM_SANLOCK;
|
|
if (!strcmp(str, "dlm"))
|
|
return LD_LM_DLM;
|
|
if (!strcmp(str, "idm"))
|
|
return LD_LM_IDM;
|
|
return -2;
|
|
}
|
|
|
|
static uint32_t str_to_opts(const char *str)
|
|
{
|
|
uint32_t flags = 0;
|
|
|
|
if (!str)
|
|
goto out;
|
|
if (strstr(str, "persistent"))
|
|
flags |= LD_AF_PERSISTENT;
|
|
if (strstr(str, "unlock_cancel"))
|
|
flags |= LD_AF_UNLOCK_CANCEL;
|
|
if (strstr(str, "next_version"))
|
|
flags |= LD_AF_NEXT_VERSION;
|
|
if (strstr(str, "wait"))
|
|
flags |= LD_AF_WAIT;
|
|
if (strstr(str, "force"))
|
|
flags |= LD_AF_FORCE;
|
|
if (strstr(str, "ex_disable"))
|
|
flags |= LD_AF_EX_DISABLE;
|
|
if (strstr(str, "enable"))
|
|
flags |= LD_AF_ENABLE;
|
|
if (strstr(str, "disable"))
|
|
flags |= LD_AF_DISABLE;
|
|
|
|
/* FIXME: parse the flag values properly */
|
|
if (strstr(str, "adopt_only"))
|
|
flags |= LD_AF_ADOPT_ONLY;
|
|
else if (strstr(str, "adopt"))
|
|
flags |= LD_AF_ADOPT;
|
|
out:
|
|
return flags;
|
|
}
|
|
|
|
/*
|
|
* dump info
|
|
* client_list: each client struct
|
|
* lockspaces: each lockspace struct
|
|
* lockspace actions: each action struct
|
|
* lockspace resources: each resource struct
|
|
* lockspace resource actions: each action struct
|
|
* lockspace resource locks: each lock struct
|
|
*/
|
|
|
|
static int setup_dump_socket(void)
|
|
{
|
|
int s;
|
|
|
|
s = socket(AF_LOCAL, SOCK_DGRAM, 0);
|
|
if (s < 0)
|
|
return s;
|
|
|
|
memset(&dump_addr, 0, sizeof(dump_addr));
|
|
dump_addr.sun_family = AF_LOCAL;
|
|
strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
|
|
dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
|
|
|
|
return s;
|
|
}
|
|
|
|
#define MAX_SEND_LEN 65536
|
|
#define RESEND_DELAY_US 1000
|
|
#define RESEND_DELAY_US_MAX 500000
|
|
|
|
static void send_dump_buf(int fd, int dump_len)
|
|
{
|
|
int pos = 0;
|
|
int ret;
|
|
int send_len;
|
|
int delay = 0;
|
|
|
|
if (!dump_len)
|
|
return;
|
|
repeat:
|
|
if (dump_len - pos < MAX_SEND_LEN)
|
|
send_len = dump_len - pos;
|
|
else
|
|
send_len = MAX_SEND_LEN;
|
|
|
|
ret = sendto(fd, dump_buf + pos, send_len, MSG_NOSIGNAL | MSG_DONTWAIT,
|
|
(struct sockaddr *)&dump_addr, dump_addrlen);
|
|
if (ret < 0) {
|
|
if ((errno == EAGAIN || errno == EINTR) && (delay < RESEND_DELAY_US_MAX)) {
|
|
usleep(RESEND_DELAY_US);
|
|
delay += RESEND_DELAY_US;
|
|
goto repeat;
|
|
}
|
|
log_error("send_dump_buf delay %d errno %d", delay, errno);
|
|
return;
|
|
}
|
|
|
|
pos += ret;
|
|
|
|
if (pos < dump_len)
|
|
goto repeat;
|
|
|
|
log_debug("send_dump_buf delay %d total %d", delay, pos);
|
|
}
|
|
|
|
static int print_structs(const char *prefix, int pos, int len)
|
|
{
|
|
return snprintf(dump_buf + pos, len - pos,
|
|
"info=%s "
|
|
"unused_action_count=%d "
|
|
"unused_client_count=%d "
|
|
"unused_resource_count=%d "
|
|
"unused_lock_count=%d\n",
|
|
prefix,
|
|
unused_action_count,
|
|
unused_client_count,
|
|
unused_resource_count,
|
|
unused_lock_count);
|
|
}
|
|
|
|
static int print_client(struct client *cl, const char *prefix, int pos, int len)
|
|
{
|
|
return snprintf(dump_buf + pos, len - pos,
|
|
"info=%s "
|
|
"pid=%d "
|
|
"fd=%d "
|
|
"pi=%d "
|
|
"id=%u "
|
|
"name=%s\n",
|
|
prefix,
|
|
cl->pid,
|
|
cl->fd,
|
|
cl->pi,
|
|
cl->id,
|
|
cl->name[0] ? cl->name : ".");
|
|
}
|
|
|
|
static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, int len)
|
|
{
|
|
return snprintf(dump_buf + pos, len - pos,
|
|
"info=%s "
|
|
"ls_name=%s "
|
|
"vg_name=%s "
|
|
"vg_uuid=%s "
|
|
"vg_sysid=%s "
|
|
"vg_args=%s "
|
|
"lm_type=%s "
|
|
"host_id=%llu "
|
|
"create_fail=%d "
|
|
"create_done=%d "
|
|
"thread_work=%d "
|
|
"thread_stop=%d "
|
|
"thread_done=%d "
|
|
"kill_vg=%d "
|
|
"drop_vg=%d "
|
|
"sanlock_gl_enabled=%d\n",
|
|
prefix,
|
|
ls->name,
|
|
ls->vg_name,
|
|
ls->vg_uuid,
|
|
ls->vg_sysid[0] ? ls->vg_sysid : ".",
|
|
ls->vg_args,
|
|
lm_str(ls->lm_type),
|
|
(unsigned long long)ls->host_id,
|
|
ls->create_fail ? 1 : 0,
|
|
ls->create_done ? 1 : 0,
|
|
ls->thread_work ? 1 : 0,
|
|
ls->thread_stop ? 1 : 0,
|
|
ls->thread_done ? 1 : 0,
|
|
ls->kill_vg,
|
|
ls->drop_vg,
|
|
ls->sanlock_gl_enabled ? 1 : 0);
|
|
}
|
|
|
|
static int print_action(struct action *act, const char *prefix, int pos, int len)
|
|
{
|
|
return snprintf(dump_buf + pos, len - pos,
|
|
"info=%s "
|
|
"client_id=%u "
|
|
"flags=0x%x "
|
|
"version=%u "
|
|
"op=%s "
|
|
"rt=%s "
|
|
"mode=%s "
|
|
"lm_type=%s "
|
|
"result=%d "
|
|
"lm_rv=%d\n",
|
|
prefix,
|
|
act->client_id,
|
|
act->flags,
|
|
act->version,
|
|
op_str(act->op),
|
|
rt_str(act->rt),
|
|
mode_str(act->mode),
|
|
lm_str(act->lm_type),
|
|
act->result,
|
|
act->lm_rv);
|
|
}
|
|
|
|
static int print_resource(struct resource *r, const char *prefix, int pos, int len)
|
|
{
|
|
return snprintf(dump_buf + pos, len - pos,
|
|
"info=%s "
|
|
"name=%s "
|
|
"type=%s "
|
|
"mode=%s "
|
|
"sh_count=%d "
|
|
"version=%u\n",
|
|
prefix,
|
|
r->name,
|
|
rt_str(r->type),
|
|
mode_str(r->mode),
|
|
r->sh_count,
|
|
r->version);
|
|
}
|
|
|
|
static int print_lock(struct lock *lk, const char *prefix, int pos, int len)
|
|
{
|
|
return snprintf(dump_buf + pos, len - pos,
|
|
"info=%s "
|
|
"mode=%s "
|
|
"version=%u "
|
|
"flags=0x%x "
|
|
"client_id=%u\n",
|
|
prefix,
|
|
mode_str(lk->mode),
|
|
lk->version,
|
|
lk->flags,
|
|
lk->client_id);
|
|
}
|
|
|
|
static int dump_info(int *dump_len)
|
|
{
|
|
struct client *cl;
|
|
struct lockspace *ls;
|
|
struct resource *r;
|
|
struct lock *lk;
|
|
struct action *act;
|
|
int len, pos, ret;
|
|
int rv = 0;
|
|
|
|
memset(dump_buf, 0, sizeof(dump_buf));
|
|
len = sizeof(dump_buf);
|
|
pos = 0;
|
|
|
|
/*
|
|
* memory
|
|
*/
|
|
|
|
pthread_mutex_lock(&unused_struct_mutex);
|
|
ret = print_structs("structs", pos, len);
|
|
if (ret >= len - pos) {
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
return -ENOSPC;
|
|
}
|
|
pos += ret;
|
|
pthread_mutex_unlock(&unused_struct_mutex);
|
|
|
|
/*
|
|
* clients
|
|
* Proper lock order is client_mutex then cl->mutex,
|
|
* but cl->mutex is already held so skip client info
|
|
* if it would block.
|
|
*/
|
|
if (pthread_mutex_trylock(&client_mutex))
|
|
goto print_ls;
|
|
list_for_each_entry(cl, &client_list, list) {
|
|
ret = print_client(cl, "client", pos, len);
|
|
if (ret >= len - pos) {
|
|
rv = -ENOSPC;
|
|
break;
|
|
}
|
|
pos += ret;
|
|
}
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
if (rv < 0)
|
|
return rv;
|
|
|
|
print_ls:
|
|
/*
|
|
* lockspaces with their action/resource/lock info
|
|
*/
|
|
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
list_for_each_entry(ls, &lockspaces, list) {
|
|
|
|
ret = print_lockspace(ls, "ls", pos, len);
|
|
if (ret >= len - pos) {
|
|
rv = -ENOSPC;
|
|
goto out;
|
|
}
|
|
pos += ret;
|
|
|
|
list_for_each_entry(act, &ls->actions, list) {
|
|
ret = print_action(act, "ls_action", pos, len);
|
|
if (ret >= len - pos) {
|
|
rv = -ENOSPC;
|
|
goto out;
|
|
}
|
|
pos += ret;
|
|
}
|
|
|
|
list_for_each_entry(r, &ls->resources, list) {
|
|
ret = print_resource(r, "r", pos, len);
|
|
if (ret >= len - pos) {
|
|
rv = -ENOSPC;
|
|
goto out;
|
|
}
|
|
pos += ret;
|
|
|
|
list_for_each_entry(lk, &r->locks, list) {
|
|
ret = print_lock(lk, "lk", pos, len);
|
|
if (ret >= len - pos) {
|
|
rv = -ENOSPC;
|
|
goto out;
|
|
}
|
|
pos += ret;
|
|
}
|
|
|
|
list_for_each_entry(act, &r->actions, list) {
|
|
ret = print_action(act, "r_action", pos, len);
|
|
if (ret >= len - pos) {
|
|
rv = -ENOSPC;
|
|
goto out;
|
|
}
|
|
pos += ret;
|
|
}
|
|
}
|
|
}
|
|
out:
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
|
|
*dump_len = pos;
|
|
|
|
return rv;
|
|
}
|
|
|
|
/* called from client_thread, cl->mutex is held */
|
|
static void client_recv_action(struct client *cl)
|
|
{
|
|
request req;
|
|
response res;
|
|
struct action *act;
|
|
const char *cl_name;
|
|
const char *vg_name;
|
|
const char *vg_uuid;
|
|
const char *vg_sysid;
|
|
const char *path;
|
|
const char *str;
|
|
struct pvs pvs;
|
|
char buf[18]; /* "path[%d]\0", %d outputs signed integer so max to 10 bytes */
|
|
int64_t val;
|
|
uint32_t opts = 0;
|
|
int result = 0;
|
|
int cl_pid;
|
|
int op, rt, lm, mode;
|
|
int rv, i;
|
|
|
|
buffer_init(&req.buffer);
|
|
|
|
rv = buffer_read(cl->fd, &req.buffer);
|
|
if (!rv) {
|
|
if (errno == ECONNRESET) {
|
|
log_debug("client recv %u ECONNRESET", cl->id);
|
|
cl->dead = 1;
|
|
} else {
|
|
log_error("client recv %u buffer_read error %d", cl->id, errno);
|
|
}
|
|
buffer_destroy(&req.buffer);
|
|
client_resume(cl);
|
|
return;
|
|
}
|
|
|
|
req.cft = config_tree_from_string_without_dup_node_check(req.buffer.mem);
|
|
if (!req.cft) {
|
|
log_error("client recv %u config_from_string error", cl->id);
|
|
buffer_destroy(&req.buffer);
|
|
client_resume(cl);
|
|
return;
|
|
}
|
|
|
|
str = daemon_request_str(req, "request", NULL);
|
|
rv = str_to_op_rt(str, &op, &rt);
|
|
if (rv < 0) {
|
|
log_error("client recv %u bad request name \"%s\"", cl->id, str ? str : "");
|
|
dm_config_destroy(req.cft);
|
|
buffer_destroy(&req.buffer);
|
|
client_resume(cl);
|
|
return;
|
|
}
|
|
|
|
if (op == LD_OP_HELLO || op == LD_OP_QUIT) {
|
|
|
|
/*
|
|
* FIXME: add the client command name to the hello messages
|
|
* so it can be saved in cl->name here.
|
|
*/
|
|
|
|
result = 0;
|
|
|
|
if (op == LD_OP_QUIT) {
|
|
log_debug("op quit");
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
if (list_empty(&lockspaces))
|
|
daemon_quit = 1;
|
|
else
|
|
result = -EBUSY;
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
}
|
|
|
|
buffer_init(&res.buffer);
|
|
|
|
res = daemon_reply_simple("OK",
|
|
"result = " FMTd64, (int64_t) result,
|
|
"protocol = %s", lvmlockd_protocol,
|
|
"version = " FMTd64, (int64_t) lvmlockd_protocol_version,
|
|
NULL);
|
|
buffer_write(cl->fd, &res.buffer);
|
|
buffer_destroy(&res.buffer);
|
|
dm_config_destroy(req.cft);
|
|
buffer_destroy(&req.buffer);
|
|
client_resume(cl);
|
|
return;
|
|
}
|
|
|
|
cl_name = daemon_request_str(req, "cmd", NULL);
|
|
cl_pid = daemon_request_int(req, "pid", 0);
|
|
vg_name = daemon_request_str(req, "vg_name", NULL);
|
|
vg_uuid = daemon_request_str(req, "vg_uuid", NULL);
|
|
vg_sysid = daemon_request_str(req, "vg_sysid", NULL);
|
|
str = daemon_request_str(req, "mode", NULL);
|
|
mode = str_to_mode(str);
|
|
str = daemon_request_str(req, "opts", NULL);
|
|
opts = str_to_opts(str);
|
|
str = daemon_request_str(req, "vg_lock_type", NULL);
|
|
lm = str_to_lm(str);
|
|
path = daemon_request_str(req, "path", NULL);
|
|
|
|
if (cl_pid && cl_pid != cl->pid)
|
|
log_error("client recv bad message pid %d client %d", cl_pid, cl->pid);
|
|
|
|
/* FIXME: do this in hello message instead */
|
|
if (!cl->name[0] && cl_name)
|
|
strncpy(cl->name, cl_name, MAX_NAME);
|
|
|
|
if (!gl_use_dlm && !gl_use_sanlock && !gl_use_idm && (lm > 0)) {
|
|
if (lm == LD_LM_DLM && lm_support_dlm())
|
|
gl_use_dlm = 1;
|
|
else if (lm == LD_LM_SANLOCK && lm_support_sanlock())
|
|
gl_use_sanlock = 1;
|
|
else if (lm == LD_LM_IDM && lm_support_idm())
|
|
gl_use_idm = 1;
|
|
|
|
log_debug("set gl_use_%s", lm_str(lm));
|
|
}
|
|
|
|
if (!(act = alloc_action())) {
|
|
log_error("No memory for action");
|
|
dm_config_destroy(req.cft);
|
|
buffer_destroy(&req.buffer);
|
|
client_resume(cl);
|
|
return;
|
|
}
|
|
|
|
act->client_id = cl->id;
|
|
act->op = op;
|
|
act->rt = rt;
|
|
act->mode = mode;
|
|
act->flags = opts;
|
|
act->lm_type = lm;
|
|
|
|
if (path)
|
|
act->path = strdup(path);
|
|
|
|
if (vg_name && strcmp(vg_name, "none"))
|
|
dm_strncpy(act->vg_name, vg_name, sizeof(act->vg_name));
|
|
|
|
if (vg_uuid && strcmp(vg_uuid, "none"))
|
|
memccpy(act->vg_uuid, vg_uuid, 0, 64);
|
|
|
|
if (vg_sysid && strcmp(vg_sysid, "none"))
|
|
dm_strncpy(act->vg_sysid, vg_sysid, sizeof(act->vg_sysid));
|
|
|
|
str = daemon_request_str(req, "lv_name", NULL);
|
|
if (str && strcmp(str, "none"))
|
|
strncpy(act->lv_name, str, MAX_NAME);
|
|
|
|
str = daemon_request_str(req, "lv_uuid", NULL);
|
|
if (str && strcmp(str, "none"))
|
|
strncpy(act->lv_uuid, str, MAX_NAME);
|
|
|
|
val = daemon_request_int(req, "version", 0);
|
|
if (val)
|
|
act->version = (uint32_t)val;
|
|
|
|
str = daemon_request_str(req, "vg_lock_args", NULL);
|
|
if (str && strcmp(str, "none"))
|
|
strncpy(act->vg_args, str, MAX_ARGS);
|
|
|
|
str = daemon_request_str(req, "lv_lock_args", NULL);
|
|
if (str && strcmp(str, "none"))
|
|
strncpy(act->lv_args, str, MAX_ARGS);
|
|
|
|
/* start_vg will include lvmlocal.conf local/host_id here */
|
|
val = daemon_request_int(req, "host_id", 0);
|
|
if (val)
|
|
act->host_id = val;
|
|
|
|
/* Create PV list for idm */
|
|
if (lm == LD_LM_IDM) {
|
|
memset(&pvs, 0x0, sizeof(pvs));
|
|
|
|
pvs.num = daemon_request_int(req, "path_num", 0);
|
|
log_error("pvs_num = %d", pvs.num);
|
|
|
|
if (!pvs.num)
|
|
goto skip_pvs_path;
|
|
|
|
/* Receive the pv list which is transferred from LVM command */
|
|
if (!alloc_pvs_path(&pvs, pvs.num)) {
|
|
log_error("fail to allocate pvs path");
|
|
rv = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
for (i = 0; i < pvs.num; i++) {
|
|
snprintf(buf, sizeof(buf), "path[%d]", i);
|
|
pvs.path[i] = (char *)daemon_request_str(req, buf, NULL);
|
|
}
|
|
|
|
if (!alloc_and_copy_pvs_path(&act->pvs, &pvs)) {
|
|
log_error("fail to allocate pvs path");
|
|
rv = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
if (pvs.path)
|
|
free(pvs.path);
|
|
pvs.path = NULL;
|
|
}
|
|
|
|
skip_pvs_path:
|
|
act->max_retries = daemon_request_int(req, "max_retries", DEFAULT_MAX_RETRIES);
|
|
|
|
dm_config_destroy(req.cft);
|
|
buffer_destroy(&req.buffer);
|
|
|
|
log_debug("recv %s[%d] cl %u %s %s \"%s\" flags %x",
|
|
cl->name[0] ? cl->name : "client", cl->pid, cl->id,
|
|
op_mode_str(act->op, act->mode), rt_str(act->rt), act->vg_name, opts);
|
|
|
|
if (lm == LD_LM_DLM && !lm_support_dlm()) {
|
|
log_debug("dlm not supported");
|
|
rv = -EPROTONOSUPPORT;
|
|
goto out;
|
|
}
|
|
|
|
if (lm == LD_LM_SANLOCK && !lm_support_sanlock()) {
|
|
log_debug("sanlock not supported");
|
|
rv = -EPROTONOSUPPORT;
|
|
goto out;
|
|
}
|
|
|
|
if (lm == LD_LM_IDM && !lm_support_idm()) {
|
|
log_debug("idm not supported");
|
|
rv = -EPROTONOSUPPORT;
|
|
goto out;
|
|
}
|
|
|
|
if (act->op == LD_OP_LOCK && act->mode != LD_LK_UN)
|
|
cl->lock_ops = 1;
|
|
|
|
switch (act->op) {
|
|
case LD_OP_START:
|
|
rv = add_lockspace(act);
|
|
break;
|
|
case LD_OP_STOP:
|
|
rv = rem_lockspace(act);
|
|
break;
|
|
case LD_OP_DUMP_LOG:
|
|
case LD_OP_DUMP_INFO:
|
|
/* The client thread reply will copy and send the dump. */
|
|
add_client_result(act);
|
|
rv = 0;
|
|
break;
|
|
case LD_OP_INIT:
|
|
case LD_OP_START_WAIT:
|
|
case LD_OP_STOP_ALL:
|
|
case LD_OP_RENAME_FINAL:
|
|
case LD_OP_RUNNING_LM:
|
|
case LD_OP_REFRESH_LV:
|
|
add_work_action(act);
|
|
rv = 0;
|
|
break;
|
|
case LD_OP_LOCK:
|
|
case LD_OP_UPDATE:
|
|
case LD_OP_ENABLE:
|
|
case LD_OP_DISABLE:
|
|
case LD_OP_FREE:
|
|
case LD_OP_RENAME_BEFORE:
|
|
case LD_OP_QUERY_LOCK:
|
|
case LD_OP_FIND_FREE_LOCK:
|
|
case LD_OP_KILL_VG:
|
|
case LD_OP_DROP_VG:
|
|
case LD_OP_BUSY:
|
|
rv = add_lock_action(act);
|
|
break;
|
|
default:
|
|
rv = -EINVAL;
|
|
};
|
|
|
|
out:
|
|
if (rv < 0) {
|
|
act->result = rv;
|
|
add_client_result(act);
|
|
}
|
|
}
|
|
|
|
static void *client_thread_main(void *arg_in)
|
|
{
|
|
struct client *cl;
|
|
struct action *act;
|
|
struct action *act_un;
|
|
uint32_t lock_acquire_count = 0, lock_acquire_written = 0;
|
|
int rv;
|
|
|
|
while (1) {
|
|
pthread_mutex_lock(&client_mutex);
|
|
while (!client_work && list_empty(&client_results)) {
|
|
if (client_stop) {
|
|
pthread_mutex_unlock(&client_mutex);
|
|
goto out;
|
|
}
|
|
pthread_cond_wait(&client_cond, &client_mutex);
|
|
}
|
|
|
|
/*
|
|
* Send outgoing results back to clients
|
|
*/
|
|
|
|
if (!list_empty(&client_results)) {
|
|
act = list_first_entry(&client_results, struct action, list);
|
|
list_del(&act->list);
|
|
cl = find_client_id(act->client_id);
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
if (cl) {
|
|
pthread_mutex_lock(&cl->mutex);
|
|
rv = client_send_result(cl, act);
|
|
pthread_mutex_unlock(&cl->mutex);
|
|
} else {
|
|
log_debug("no client %u for result", act->client_id);
|
|
rv = -1;
|
|
}
|
|
|
|
if (act->flags & LD_AF_LV_LOCK)
|
|
lock_acquire_count++;
|
|
|
|
/*
|
|
* The client failed after we acquired an LV lock for
|
|
* it, but before getting this reply saying it's done.
|
|
* So the lv will not be active and we should release
|
|
* the lv lock it requested.
|
|
*/
|
|
if ((rv < 0) && (act->flags & LD_AF_LV_LOCK)) {
|
|
log_debug("auto unlock lv for failed client %u", act->client_id);
|
|
if ((act_un = alloc_action())) {
|
|
memcpy(act_un, act, sizeof(struct action));
|
|
act_un->mode = LD_LK_UN;
|
|
act_un->flags |= LD_AF_LV_UNLOCK;
|
|
act_un->flags &= ~LD_AF_LV_LOCK;
|
|
add_lock_action(act_un);
|
|
}
|
|
}
|
|
|
|
free_action(act);
|
|
continue;
|
|
}
|
|
|
|
if (adopt_opt && (lock_acquire_count > lock_acquire_written)) {
|
|
lock_acquire_written = lock_acquire_count;
|
|
write_adopt_file();
|
|
}
|
|
|
|
/*
|
|
* Queue incoming actions for lockspace threads
|
|
*/
|
|
|
|
if (client_work) {
|
|
cl = find_client_work();
|
|
if (!cl)
|
|
client_work = 0;
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
if (!cl)
|
|
continue;
|
|
|
|
pthread_mutex_lock(&cl->mutex);
|
|
|
|
if (cl->recv) {
|
|
cl->recv = 0;
|
|
client_recv_action(cl);
|
|
}
|
|
|
|
if (cl->dead) {
|
|
/*
|
|
log_debug("client rem %d pi %d fd %d ig %d",
|
|
cl->id, cl->pi, cl->fd, cl->poll_ignore);
|
|
*/
|
|
|
|
/*
|
|
* If cl->dead was set in main_loop, then the
|
|
* fd has already been closed and the pollfd
|
|
* entry is already unused.
|
|
* main_loop set dead=1, ignore=0, pi=-1, fd=-1
|
|
*
|
|
* if cl->dead was not set in main_loop, but
|
|
* set in client_recv_action, then the main_loop
|
|
* should be ignoring this client fd.
|
|
* main_loop set ignore=1
|
|
*/
|
|
|
|
if (cl->poll_ignore) {
|
|
log_debug("client close %d pi %d fd %d",
|
|
cl->id, cl->pi, cl->fd);
|
|
/* assert cl->pi != -1 */
|
|
/* assert pollfd[pi].fd == FD_IGNORE */
|
|
if (close(cl->fd))
|
|
log_error("client close %d pi %d fd %d failed",
|
|
cl->id, cl->pi, cl->fd);
|
|
rem_pollfd(cl->pi);
|
|
cl->pi = -1;
|
|
cl->fd = -1;
|
|
cl->poll_ignore = 0;
|
|
} else {
|
|
/* main thread should have closed */
|
|
if (cl->pi != -1 || cl->fd != -1) {
|
|
log_error("client %d bad state pi %d fd %d",
|
|
cl->id, cl->pi, cl->fd);
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&cl->mutex);
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
list_del(&cl->list);
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
client_purge(cl);
|
|
|
|
free_client(cl);
|
|
} else {
|
|
pthread_mutex_unlock(&cl->mutex);
|
|
}
|
|
} else
|
|
pthread_mutex_unlock(&client_mutex);
|
|
}
|
|
out:
|
|
if (adopt_opt && lock_acquire_written)
|
|
(void) unlink(adopt_file);
|
|
return NULL;
|
|
}
|
|
|
|
static int setup_client_thread(void)
|
|
{
|
|
int rv;
|
|
|
|
INIT_LIST_HEAD(&client_list);
|
|
INIT_LIST_HEAD(&client_results);
|
|
|
|
pthread_mutex_init(&client_mutex, NULL);
|
|
pthread_cond_init(&client_cond, NULL);
|
|
|
|
rv = pthread_create(&client_thread, NULL, client_thread_main, NULL);
|
|
if (rv)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
static void close_client_thread(void)
|
|
{
|
|
int perrno;
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
client_stop = 1;
|
|
pthread_cond_signal(&client_cond);
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
if ((perrno = pthread_join(client_thread, NULL)))
|
|
log_error("pthread_join client_thread error %d", perrno);
|
|
}
|
|
|
|
static char _dm_uuid[DM_UUID_LEN];
|
|
|
|
static char *get_dm_uuid(char *dm_name)
|
|
{
|
|
struct dm_info info;
|
|
struct dm_task *dmt;
|
|
const char *uuid;
|
|
|
|
if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
|
|
goto fail_out;
|
|
|
|
if (!dm_task_set_name(dmt, dm_name))
|
|
goto fail;
|
|
|
|
if (!dm_task_run(dmt))
|
|
goto fail;
|
|
|
|
if (!dm_task_get_info(dmt, &info))
|
|
goto fail;
|
|
|
|
if (!info.exists)
|
|
goto fail;
|
|
|
|
uuid = dm_task_get_uuid(dmt);
|
|
if (!uuid) {
|
|
log_error("Failed to get uuid for device %s", dm_name);
|
|
goto fail;
|
|
}
|
|
|
|
if (strncmp(uuid, "LVM", 3)) {
|
|
log_debug("dm device %s is not from LVM", dm_name);
|
|
goto fail;
|
|
}
|
|
|
|
memset(_dm_uuid, 0, sizeof(_dm_uuid));
|
|
strncpy(_dm_uuid, uuid, sizeof(_dm_uuid)-1);
|
|
dm_task_destroy(dmt);
|
|
return _dm_uuid;
|
|
|
|
fail:
|
|
dm_task_destroy(dmt);
|
|
fail_out:
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* dm reports the LV uuid as:
|
|
* LVM-ydpRIdDWBDX25upmj2k0D4deat6oxH8er03T0f4xM8rPIV8XqIhwv3h8Y7xRWjMr
|
|
*
|
|
* the lock name for the LV is:
|
|
* r03T0f-4xM8-rPIV-8XqI-hwv3-h8Y7-xRWjMr
|
|
*
|
|
* This function formats both as:
|
|
* r03T0f4xM8rPIV8XqIhwv3h8Y7xRWjMr
|
|
*
|
|
* and returns 1 if they match.
|
|
*/
|
|
|
|
static int match_dm_uuid(char *dm_uuid, char *lv_lock_uuid)
|
|
{
|
|
char buf1[64];
|
|
char buf2[64];
|
|
unsigned i, j;
|
|
|
|
memset(buf1, 0, sizeof(buf1));
|
|
memset(buf2, 0, sizeof(buf2));
|
|
|
|
for (i = 0, j = 0; i < strlen(lv_lock_uuid); i++) {
|
|
if (lv_lock_uuid[i] == '-')
|
|
continue;
|
|
buf1[j] = lv_lock_uuid[i];
|
|
j++;
|
|
}
|
|
|
|
for (i = 36, j = 0; i < 69; i++) {
|
|
buf2[j] = dm_uuid[i];
|
|
j++;
|
|
}
|
|
|
|
if (!strcmp(buf1, buf2))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* All LVs with a lock_type are on ls->resources.
|
|
* Remove any that are not active. The remaining
|
|
* will have locks adopted.
|
|
*/
|
|
|
|
static int remove_inactive_lvs(struct list_head *vg_lockd)
|
|
{
|
|
struct lockspace *ls;
|
|
struct resource *r, *rsafe;
|
|
struct dm_names *names;
|
|
struct dm_task *dmt;
|
|
char *dm_uuid;
|
|
char *vgname, *lvname, *layer;
|
|
char namebuf[MAX_NAME+1];
|
|
unsigned next = 0;
|
|
int rv = 0;
|
|
|
|
if (!(dmt = dm_task_create(DM_DEVICE_LIST)))
|
|
return -1;
|
|
|
|
if (!dm_task_run(dmt)) {
|
|
log_error("Failed to get dm devices");
|
|
rv = -1;
|
|
goto ret;
|
|
}
|
|
|
|
if (!(names = dm_task_get_names(dmt))) {
|
|
log_error("Failed to get dm names");
|
|
rv = -1;
|
|
goto ret;
|
|
}
|
|
|
|
if (!names->dev) {
|
|
log_debug("dm names none found");
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* For each dm name, compare it to each lv in each lockd vg.
|
|
*/
|
|
|
|
do {
|
|
names = (struct dm_names *)((char *) names + next);
|
|
|
|
dm_uuid = get_dm_uuid(names->name);
|
|
if (!dm_uuid)
|
|
goto next_dmname;
|
|
|
|
vgname = NULL;
|
|
lvname = NULL;
|
|
layer = NULL;
|
|
|
|
memset(namebuf, 0, sizeof(namebuf));
|
|
strncpy(namebuf, names->name, MAX_NAME);
|
|
vgname = namebuf;
|
|
|
|
if (!dm_split_lvm_name(NULL, namebuf, &vgname, &lvname, &layer)) {
|
|
log_error("failed to split dm name %s", namebuf);
|
|
goto next_dmname;
|
|
}
|
|
|
|
log_debug("adopt found active dm %s %s lv %s/%s",
|
|
names->name, dm_uuid, vgname, lvname);
|
|
|
|
if (!vgname || !lvname) {
|
|
log_debug("dm name %s invalid split vg %s lv %s layer %s",
|
|
names->name, vgname ? vgname : "", lvname ? lvname : "", layer ? layer : "");
|
|
goto next_dmname;
|
|
}
|
|
|
|
list_for_each_entry(ls, vg_lockd, list) {
|
|
if (strcmp(vgname, ls->vg_name))
|
|
continue;
|
|
|
|
if (!strcmp(lvname, "lvmlock"))
|
|
continue;
|
|
|
|
list_for_each_entry(r, &ls->resources, list) {
|
|
if (!match_dm_uuid(dm_uuid, r->name))
|
|
continue;
|
|
|
|
/* Found an active LV in a lockd VG. */
|
|
log_debug("adopting %s", names->name);
|
|
r->adopt = 1;
|
|
goto next_dmname;
|
|
}
|
|
}
|
|
next_dmname:
|
|
next = names->next;
|
|
} while (next);
|
|
|
|
out:
|
|
/* Remove any struct resources that do not need locks adopted. */
|
|
list_for_each_entry(ls, vg_lockd, list) {
|
|
list_for_each_entry_safe(r, rsafe, &ls->resources, list) {
|
|
if (r->adopt) {
|
|
r->adopt = 0;
|
|
} else {
|
|
log_debug("lockd vg %s remove inactive lv %s", ls->vg_name, r->name);
|
|
list_del(&r->list);
|
|
free_resource(r);
|
|
}
|
|
}
|
|
}
|
|
ret:
|
|
dm_task_destroy(dmt);
|
|
return rv;
|
|
}
|
|
|
|
static void adopt_locks(void)
|
|
{
|
|
struct list_head ls_found;
|
|
struct list_head vg_lockd;
|
|
struct list_head to_unlock;
|
|
struct lockspace *ls, *lsafe;
|
|
struct lockspace *ls1, *l1safe;
|
|
struct lockspace *ls2, *l2safe;
|
|
struct resource *r, *rsafe;
|
|
struct action *act, *asafe;
|
|
int count_start = 0, count_start_done = 0, count_start_fail = 0;
|
|
int count_adopt = 0, count_adopt_done = 0, count_adopt_fail = 0;
|
|
int found, rv;
|
|
|
|
INIT_LIST_HEAD(&adopt_results);
|
|
|
|
INIT_LIST_HEAD(&ls_found);
|
|
INIT_LIST_HEAD(&vg_lockd);
|
|
INIT_LIST_HEAD(&to_unlock);
|
|
|
|
/*
|
|
* Get list of lockspaces from currently running lock managers.
|
|
* Get list of shared VGs from file written by prior lvmlockd.
|
|
* Get list of active LVs (in the shared VGs) from the file.
|
|
*/
|
|
|
|
if (lm_support_dlm() && lm_is_running_dlm()) {
|
|
rv = lm_get_lockspaces_dlm(&ls_found);
|
|
if (rv < 0)
|
|
goto fail;
|
|
}
|
|
|
|
if (lm_support_sanlock() && lm_is_running_sanlock()) {
|
|
rv = lm_get_lockspaces_sanlock(&ls_found);
|
|
if (rv < 0)
|
|
goto fail;
|
|
}
|
|
|
|
if (list_empty(&ls_found)) {
|
|
log_debug("No lockspaces found to adopt");
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Adds a struct lockspace to vg_lockd for each lockd VG.
|
|
* Adds a struct resource to ls->resources for each LV.
|
|
*/
|
|
rv = read_adopt_file(&vg_lockd);
|
|
if (rv < 0) {
|
|
log_error("adopt_locks read_adopt_file failed");
|
|
goto fail;
|
|
}
|
|
|
|
if (list_empty(&vg_lockd)) {
|
|
log_debug("No lockspaces in adopt file");
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* For each resource on each lockspace, check if the
|
|
* corresponding LV is active. If so, leave the
|
|
* resource struct, if not free the resource struct.
|
|
* The remain entries need to have locks adopted.
|
|
*/
|
|
rv = remove_inactive_lvs(&vg_lockd);
|
|
if (rv < 0) {
|
|
log_error("adopt_locks remove_inactive_lvs failed");
|
|
goto fail;
|
|
}
|
|
|
|
list_for_each_entry(ls, &ls_found, list) {
|
|
if (ls->lm_type == LD_LM_DLM)
|
|
gl_use_dlm = 1;
|
|
|
|
log_debug("adopt %s lockspace %s vg %s",
|
|
lm_str(ls->lm_type), ls->name, ls->vg_name);
|
|
}
|
|
|
|
if (!gl_use_dlm)
|
|
gl_use_sanlock = 1;
|
|
|
|
list_for_each_entry(ls, &vg_lockd, list) {
|
|
log_debug("adopt vg %s lock_type %s lock_args %s",
|
|
ls->vg_name, lm_str(ls->lm_type), ls->vg_args);
|
|
|
|
list_for_each_entry(r, &ls->resources, list)
|
|
log_debug("adopt lv %s %s", ls->vg_name, r->name);
|
|
}
|
|
|
|
/*
|
|
* Compare and merge the list of lockspaces in ls_found
|
|
* and the list of lockd VGs in vg_lockd.
|
|
*
|
|
* An ls from ls_found may not have had any active lvs when
|
|
* previous lvmlockd died, but the ls should still be joined,
|
|
* and checked for GL/VG locks.
|
|
*
|
|
* An ls from vg_lockd with active lvs should be in ls_found.
|
|
* If it's not then we might want to join the ls and acquire locks
|
|
* for the active lvs (as opposed to adopting orphans for them.)
|
|
* The orphan lock in the ls should have prevented the ls in
|
|
* the lock manager from going away.
|
|
*
|
|
* If an ls in vg_lockd has no active lvs and does not have
|
|
* a matching entry in ls_found, then skip it.
|
|
*
|
|
* An ls in ls_found should always have a matching ls in
|
|
* vg_lockd. If it doesn't, then maybe the vg has been
|
|
* removed even though the lockspace for the vg is still
|
|
* in the lock manager. Just leave the ls in the lm
|
|
* alone, and skip the ls_found entry.
|
|
*/
|
|
|
|
list_for_each_entry_safe(ls1, l1safe, &ls_found, list) {
|
|
|
|
/* The dlm global lockspace is special and doesn't match a VG. */
|
|
if ((ls1->lm_type == LD_LM_DLM) && !strcmp(ls1->name, gl_lsname_dlm)) {
|
|
list_del(&ls1->list);
|
|
free(ls1);
|
|
continue;
|
|
}
|
|
|
|
found = 0;
|
|
|
|
list_for_each_entry_safe(ls2, l2safe, &vg_lockd, list) {
|
|
if (strcmp(ls1->vg_name, ls2->vg_name))
|
|
continue;
|
|
|
|
/*
|
|
* LS in both ls_found and vg_lockd.
|
|
*/
|
|
log_debug("ls %s matches vg %s", ls1->name, ls2->vg_name);
|
|
memcpy(ls1->vg_uuid, ls2->vg_uuid, 64);
|
|
memcpy(ls1->vg_args, ls2->vg_args, MAX_ARGS);
|
|
list_for_each_entry_safe(r, rsafe, &ls2->resources, list) {
|
|
list_del(&r->list);
|
|
list_add(&r->list, &ls1->resources);
|
|
}
|
|
list_del(&ls2->list);
|
|
free(ls2);
|
|
found = 1;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* LS in ls_found, not in vg_lockd.
|
|
* An lvm lockspace found in the lock manager has no
|
|
* corresponding VG. This shouldn't usually
|
|
* happen, but it's possible the VG could have been removed
|
|
* while the orphaned lockspace from it was still around.
|
|
* Report an error and leave the ls in the lm alone.
|
|
*/
|
|
if (!found) {
|
|
log_error("No VG %s found for lockspace %s %s",
|
|
ls1->vg_name, ls1->name, lm_str(ls1->lm_type));
|
|
list_del(&ls1->list);
|
|
free(ls1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* LS in vg_lockd, not in ls_found.
|
|
* lockd vgs that do not have an existing lockspace.
|
|
* This wouldn't be unusual; we just skip the vg.
|
|
* But, if the vg has active lvs, then it should have had locks
|
|
* and a lockspace. Should we attempt to join the lockspace and
|
|
* acquire (not adopt) locks for these LVs?
|
|
*/
|
|
|
|
list_for_each_entry_safe(ls, lsafe, &vg_lockd, list) {
|
|
if (!list_empty(&ls->resources)) {
|
|
/* We should have found a lockspace. */
|
|
/* add this ls and acquire locks for ls->resources? */
|
|
log_error("No lockspace %s %s found for VG %s with active LVs",
|
|
ls->name, lm_str(ls->lm_type), ls->vg_name);
|
|
} else {
|
|
/* The VG wasn't started in the previous lvmlockd. */
|
|
log_debug("No ls found for vg %s", ls->vg_name);
|
|
}
|
|
|
|
list_del(&ls->list);
|
|
free_pvs_path(&ls->pvs);
|
|
free(ls);
|
|
}
|
|
|
|
/*
|
|
* Create and queue start actions to add lockspaces.
|
|
*/
|
|
|
|
if (gl_use_dlm) {
|
|
if (!(act = alloc_action()))
|
|
goto fail;
|
|
log_debug("adopt add dlm global lockspace");
|
|
act->op = LD_OP_START;
|
|
act->flags = (LD_AF_ADOPT | LD_AF_WAIT);
|
|
act->rt = LD_RT_GL;
|
|
act->lm_type = LD_LM_DLM;
|
|
act->client_id = INTERNAL_CLIENT_ID;
|
|
add_dlm_global_lockspace(act);
|
|
count_start++;
|
|
}
|
|
|
|
list_for_each_entry_safe(ls, lsafe, &ls_found, list) {
|
|
if (!(act = alloc_action()))
|
|
goto fail;
|
|
act->op = LD_OP_START;
|
|
act->flags = (LD_AF_ADOPT | LD_AF_WAIT);
|
|
act->rt = LD_RT_VG;
|
|
act->lm_type = ls->lm_type;
|
|
act->client_id = INTERNAL_CLIENT_ID;
|
|
dm_strncpy(act->vg_name, ls->vg_name, sizeof(act->vg_name));
|
|
memcpy(act->vg_uuid, ls->vg_uuid, 64);
|
|
memcpy(act->vg_args, ls->vg_args, MAX_ARGS);
|
|
act->host_id = ls->host_id;
|
|
|
|
log_debug("adopt add %s vg lockspace %s", lm_str(act->lm_type), act->vg_name);
|
|
|
|
rv = add_lockspace_thread(ls->name, act->vg_name, act->vg_uuid,
|
|
act->lm_type, act->vg_args, act);
|
|
if (rv < 0) {
|
|
log_error("Failed to create lockspace thread for VG %s", ls->vg_name);
|
|
list_del(&ls->list);
|
|
free_pvs_path(&ls->pvs);
|
|
free(ls);
|
|
free_action(act);
|
|
count_start_fail++;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* When the lockspace_thread is done with the start act,
|
|
* it will see the act ADOPT flag and move the act onto
|
|
* the adopt_results list for us to collect below.
|
|
*/
|
|
count_start++;
|
|
}
|
|
|
|
log_debug("adopt starting %d lockspaces", count_start);
|
|
|
|
/*
|
|
* Wait for all start/rejoin actions to complete. Each start action
|
|
* queued above will appear on the adopt_results list when finished.
|
|
*/
|
|
|
|
while (count_start_done < count_start) {
|
|
act = NULL;
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
if (!list_empty(&adopt_results)) {
|
|
act = list_first_entry(&adopt_results, struct action, list);
|
|
list_del(&act->list);
|
|
}
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
if (!act) {
|
|
usleep(500000);
|
|
continue;
|
|
}
|
|
|
|
if (act->result < 0) {
|
|
log_error("adopt add lockspace failed vg %s %d", act->vg_name, act->result);
|
|
count_start_fail++;
|
|
}
|
|
|
|
free_action(act);
|
|
count_start_done++;
|
|
}
|
|
|
|
log_debug("adopt started %d lockspaces done %d fail %d",
|
|
count_start, count_start_done, count_start_fail);
|
|
|
|
/*
|
|
* Create lock-adopt actions for active LVs (ls->resources),
|
|
* and GL/VG locks (we don't know if these locks were held
|
|
* and orphaned by the last lvmlockd, so try to adopt them
|
|
* to see.)
|
|
*
|
|
* A proper struct lockspace now exists on the lockspaces list
|
|
* for each ls in ls_found. Lock ops for one of those
|
|
* lockspaces can be done as OP_LOCK actions queued using
|
|
* add_lock_action();
|
|
*
|
|
* Start by attempting to adopt the lock in the most likely
|
|
* mode it was left in (ex for lvs, sh for vg/gl). If
|
|
* the mode is wrong, the lm will return an error and we
|
|
* try again with the other mode.
|
|
*/
|
|
|
|
list_for_each_entry(ls, &ls_found, list) {
|
|
|
|
/*
|
|
* Adopt orphan LV locks.
|
|
*/
|
|
|
|
list_for_each_entry(r, &ls->resources, list) {
|
|
if (!(act = alloc_action()))
|
|
goto fail;
|
|
act->op = LD_OP_LOCK;
|
|
act->rt = LD_RT_LV;
|
|
act->mode = r->adopt_mode;
|
|
act->flags = (LD_AF_ADOPT_ONLY | LD_AF_PERSISTENT);
|
|
act->client_id = INTERNAL_CLIENT_ID;
|
|
act->lm_type = ls->lm_type;
|
|
dm_strncpy(act->vg_name, ls->vg_name, sizeof(act->vg_name));
|
|
dm_strncpy(act->lv_uuid, r->name, sizeof(act->lv_uuid));
|
|
dm_strncpy(act->lv_args, r->lv_args, sizeof(act->lv_args));
|
|
|
|
log_debug("adopt lock for lv %s %s", act->vg_name, act->lv_uuid);
|
|
|
|
rv = add_lock_action(act);
|
|
if (rv < 0) {
|
|
log_error("adopt add_lock_action lv %s %s error %d", act->vg_name, act->lv_uuid, rv);
|
|
count_adopt_fail++;
|
|
free_action(act);
|
|
} else {
|
|
count_adopt++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Adopt orphan VG lock.
|
|
*/
|
|
|
|
if (!(act = alloc_action()))
|
|
goto fail;
|
|
act->op = LD_OP_LOCK;
|
|
act->rt = LD_RT_VG;
|
|
act->mode = LD_LK_SH;
|
|
act->flags = LD_AF_ADOPT_ONLY;
|
|
act->client_id = INTERNAL_CLIENT_ID;
|
|
act->lm_type = ls->lm_type;
|
|
dm_strncpy(act->vg_name, ls->vg_name, sizeof(act->vg_name));
|
|
|
|
log_debug("adopt lock for vg %s", act->vg_name);
|
|
|
|
rv = add_lock_action(act);
|
|
if (rv < 0) {
|
|
log_error("adopt add_lock_action vg %s error %d", act->vg_name, rv);
|
|
count_adopt_fail++;
|
|
free_action(act);
|
|
} else {
|
|
count_adopt++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Adopt orphan GL lock.
|
|
*/
|
|
|
|
if (!(act = alloc_action()))
|
|
goto fail;
|
|
act->op = LD_OP_LOCK;
|
|
act->rt = LD_RT_GL;
|
|
act->mode = LD_LK_SH;
|
|
act->flags = LD_AF_ADOPT_ONLY;
|
|
act->client_id = INTERNAL_CLIENT_ID;
|
|
act->lm_type = (gl_use_sanlock ? LD_LM_SANLOCK : LD_LM_DLM);
|
|
|
|
log_debug("adopt lock for gl");
|
|
|
|
rv = add_lock_action(act);
|
|
if (rv < 0) {
|
|
log_error("adopt add_lock_action gl %s error %d", act->vg_name, rv);
|
|
count_adopt_fail++;
|
|
free_action(act);
|
|
} else {
|
|
count_adopt++;
|
|
}
|
|
|
|
/*
|
|
* Wait for lock-adopt actions to complete. The completed
|
|
* actions are passed back here via the adopt_results list.
|
|
*/
|
|
|
|
while (count_adopt_done < count_adopt) {
|
|
act = NULL;
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
if (!list_empty(&adopt_results)) {
|
|
act = list_first_entry(&adopt_results, struct action, list);
|
|
list_del(&act->list);
|
|
}
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
if (!act) {
|
|
usleep(200000);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* lock adopt results
|
|
*/
|
|
|
|
if (act->result == -EADOPT_RETRY) {
|
|
/*
|
|
* Adopt failed because the orphan has a different mode
|
|
* than initially requested. Repeat the lock-adopt operation
|
|
* with the other mode. N.B. this logic depends on first
|
|
* trying sh then ex for GL/VG locks; for LV locks the mode
|
|
* from the adopt file is tried first, the alternate
|
|
* (if the mode in adopt file was wrong somehow.)
|
|
*/
|
|
|
|
if ((act->rt != LD_RT_LV) && (act->mode == LD_LK_SH)) {
|
|
/* GL/VG locks: attempt to adopt ex after sh failed. */
|
|
act->mode = LD_LK_EX;
|
|
rv = add_lock_action(act);
|
|
|
|
} else if (act->rt == LD_RT_LV) {
|
|
/* LV locks: attempt to adopt the other mode. */
|
|
if (act->mode == LD_LK_EX)
|
|
act->mode = LD_LK_SH;
|
|
else if (act->mode == LD_LK_SH)
|
|
act->mode = LD_LK_EX;
|
|
rv = add_lock_action(act);
|
|
|
|
} else {
|
|
log_error("Failed to adopt %s lock in vg %s error %d",
|
|
rt_str(act->rt), act->vg_name, act->result);
|
|
count_adopt_fail++;
|
|
count_adopt_done++;
|
|
free_action(act);
|
|
rv = 0;
|
|
}
|
|
|
|
if (rv < 0) {
|
|
log_error("adopt add_lock_action again %s", act->vg_name);
|
|
count_adopt_fail++;
|
|
count_adopt_done++;
|
|
free_action(act);
|
|
}
|
|
|
|
} else if (act->result == -EADOPT_NONE) {
|
|
/*
|
|
* No orphan lock exists. This is common for GL/VG locks
|
|
* because they may not have been held when lvmlockd exited.
|
|
* It's also expected for LV types that do not use a lock.
|
|
*/
|
|
|
|
if (act->rt == LD_RT_LV) {
|
|
/* Unexpected, we should have found an orphan. */
|
|
log_error("Failed to adopt LV lock for %s %s error %d",
|
|
act->vg_name, act->lv_uuid, act->result);
|
|
count_adopt_fail++;
|
|
} else {
|
|
/* Normal, no GL/VG lock was orphaned. */
|
|
log_debug("Did not adopt %s lock in vg %s error %d",
|
|
rt_str(act->rt), act->vg_name, act->result);
|
|
}
|
|
|
|
count_adopt_done++;
|
|
free_action(act);
|
|
|
|
} else if (act->result < 0) {
|
|
/*
|
|
* Some unexpected error.
|
|
*/
|
|
|
|
log_error("adopt lock rt %s vg %s lv %s error %d",
|
|
rt_str(act->rt), act->vg_name, act->lv_uuid, act->result);
|
|
count_adopt_fail++;
|
|
count_adopt_done++;
|
|
free_action(act);
|
|
|
|
} else {
|
|
/*
|
|
* Adopt success.
|
|
*/
|
|
|
|
if (act->rt == LD_RT_LV) {
|
|
log_debug("adopt success lv %s %s %s", act->vg_name, act->lv_uuid, mode_str(act->mode));
|
|
free_action(act);
|
|
} else if (act->rt == LD_RT_VG) {
|
|
log_debug("adopt success vg %s %s", act->vg_name, mode_str(act->mode));
|
|
list_add_tail(&act->list, &to_unlock);
|
|
} else if (act->rt == LD_RT_GL) {
|
|
log_debug("adopt success gl %s %s", act->vg_name, mode_str(act->mode));
|
|
list_add_tail(&act->list, &to_unlock);
|
|
}
|
|
count_adopt_done++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Release adopted GL/VG locks.
|
|
* The to_unlock actions were the ones used to lock-adopt the GL/VG locks;
|
|
* now use them to do the unlocks. These actions will again be placed
|
|
* on adopt_results for us to collect because they have the ADOPT flag set.
|
|
*/
|
|
|
|
count_adopt = 0;
|
|
count_adopt_done = 0;
|
|
|
|
list_for_each_entry_safe(act, asafe, &to_unlock, list) {
|
|
list_del(&act->list);
|
|
|
|
if (act->mode == LD_LK_EX) {
|
|
/*
|
|
* FIXME: we probably want to check somehow that
|
|
* there's no lvm command still running that's
|
|
* using this ex lock and changing things.
|
|
*/
|
|
log_warn("adopt releasing ex %s lock %s",
|
|
rt_str(act->rt), act->vg_name);
|
|
}
|
|
|
|
act->mode = LD_LK_UN;
|
|
|
|
log_debug("adopt unlock for %s %s", rt_str(act->rt), act->vg_name);
|
|
|
|
rv = add_lock_action(act);
|
|
if (rv < 0) {
|
|
log_error("adopt unlock add_lock_action error %d", rv);
|
|
free_action(act);
|
|
} else {
|
|
count_adopt++;
|
|
}
|
|
}
|
|
|
|
/* Wait for the unlocks to complete. */
|
|
|
|
while (count_adopt_done < count_adopt) {
|
|
act = NULL;
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
if (!list_empty(&adopt_results)) {
|
|
act = list_first_entry(&adopt_results, struct action, list);
|
|
list_del(&act->list);
|
|
}
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
if (!act) {
|
|
usleep(200000);
|
|
continue;
|
|
}
|
|
|
|
if (act->result < 0)
|
|
log_error("adopt unlock error %d", act->result);
|
|
|
|
count_adopt_done++;
|
|
free_action(act);
|
|
}
|
|
|
|
|
|
/* Try to purge the orphan locks when lock manager is dlm */
|
|
if (lm_support_dlm() && lm_is_running_dlm()) {
|
|
list_for_each_entry(ls, &ls_found, list) {
|
|
pthread_mutex_lock(&lockspaces_mutex);
|
|
ls1 = find_lockspace_name(ls->name);
|
|
if (ls1) {
|
|
log_debug("ls: %s purge locks", ls->name);
|
|
lm_purge_locks_dlm(ls1);
|
|
}
|
|
pthread_mutex_unlock(&lockspaces_mutex);
|
|
}
|
|
}
|
|
|
|
if (count_start_fail || count_adopt_fail)
|
|
goto fail;
|
|
|
|
(void) unlink(adopt_file);
|
|
write_adopt_file();
|
|
log_debug("adopt_locks done");
|
|
return;
|
|
|
|
fail:
|
|
(void) unlink(adopt_file);
|
|
log_error("adopt_locks failed, reset host");
|
|
}
|
|
|
|
static int get_peer_pid(int fd)
|
|
{
|
|
struct ucred cred;
|
|
unsigned int len = sizeof(cred);
|
|
|
|
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cred, &len) != 0)
|
|
return -1;
|
|
|
|
return cred.pid;
|
|
}
|
|
|
|
static void process_listener(int poll_fd)
|
|
{
|
|
struct client *cl;
|
|
int fd, pi;
|
|
|
|
/* assert poll_fd == listen_fd */
|
|
|
|
fd = accept(listen_fd, NULL, NULL);
|
|
if (fd < 0)
|
|
return;
|
|
|
|
if (!(cl = alloc_client())) {
|
|
if (close(fd))
|
|
log_error("failed to close lockd poll fd");
|
|
return;
|
|
}
|
|
|
|
pi = add_pollfd(fd);
|
|
if (pi < 0) {
|
|
log_error("process_listener add_pollfd error %d", pi);
|
|
free_client(cl);
|
|
return;
|
|
}
|
|
|
|
cl->pi = pi;
|
|
cl->fd = fd;
|
|
cl->pid = get_peer_pid(fd);
|
|
|
|
pthread_mutex_init(&cl->mutex, NULL);
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
client_ids++;
|
|
|
|
if (client_ids == INTERNAL_CLIENT_ID)
|
|
client_ids++;
|
|
if (!client_ids)
|
|
client_ids++;
|
|
|
|
cl->id = client_ids;
|
|
list_add_tail(&cl->list, &client_list);
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
log_debug("new cl %u pi %d fd %d", cl->id, cl->pi, cl->fd);
|
|
}
|
|
|
|
/*
|
|
* main loop polls on pipe[0] so that a thread can
|
|
* restart the poll by writing to pipe[1].
|
|
*/
|
|
static int setup_restart(void)
|
|
{
|
|
if (pipe(restart_fds)) {
|
|
log_error("setup_restart pipe error %d", errno);
|
|
return -1;
|
|
}
|
|
|
|
restart_pi = add_pollfd(restart_fds[0]);
|
|
if (restart_pi < 0)
|
|
return restart_pi;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* thread wrote 'w' to restart_fds[1] to restart poll()
|
|
* after adding an fd back into pollfd.
|
|
*/
|
|
static void process_restart(int fd)
|
|
{
|
|
char wake[1];
|
|
int rv;
|
|
|
|
/* assert fd == restart_fds[0] */
|
|
|
|
rv = read(restart_fds[0], wake, 1);
|
|
if (!rv || rv < 0)
|
|
log_debug("process_restart error %d", errno);
|
|
}
|
|
|
|
static void sigterm_handler(int sig __attribute__((unused)))
|
|
{
|
|
daemon_quit = 1;
|
|
}
|
|
|
|
static int main_loop(daemon_state *ds_arg)
|
|
{
|
|
struct client *cl;
|
|
int i, rv, is_recv, is_dead;
|
|
|
|
signal(SIGTERM, &sigterm_handler);
|
|
|
|
rv = setup_structs();
|
|
if (rv < 0) {
|
|
log_error("Can't allocate memory");
|
|
return rv;
|
|
}
|
|
|
|
strcpy(gl_lsname_dlm, S_NAME_GL_DLM);
|
|
strcpy(gl_lsname_idm, S_NAME_GL_IDM);
|
|
|
|
INIT_LIST_HEAD(&lockspaces);
|
|
pthread_mutex_init(&lockspaces_mutex, NULL);
|
|
pthread_mutex_init(&pollfd_mutex, NULL);
|
|
pthread_mutex_init(&log_mutex, NULL);
|
|
|
|
openlog("lvmlockd", LOG_CONS | LOG_PID, LOG_DAEMON);
|
|
log_warn("lvmlockd started");
|
|
|
|
listen_fd = ds_arg->socket_fd;
|
|
listen_pi = add_pollfd(listen_fd);
|
|
|
|
setup_client_thread();
|
|
setup_worker_thread();
|
|
setup_restart();
|
|
|
|
#ifdef USE_SD_NOTIFY
|
|
sd_notify(0, "READY=1");
|
|
#endif
|
|
|
|
/*
|
|
* Attempt to rejoin lockspaces and adopt locks from a previous
|
|
* instance of lvmlockd that left behind lockspaces/locks.
|
|
*/
|
|
if (adopt_opt)
|
|
adopt_locks();
|
|
|
|
while (1) {
|
|
rv = poll(pollfd, pollfd_maxi + 1, -1);
|
|
if ((rv == -1 && errno == EINTR) || daemon_quit) {
|
|
if (daemon_quit) {
|
|
int count;
|
|
/* first sigterm would trigger stops, and
|
|
second sigterm may finish the joins. */
|
|
count = for_each_lockspace(DO_STOP, DO_FREE, NO_FORCE);
|
|
if (!count)
|
|
break;
|
|
log_debug("ignore shutdown for %d lockspaces", count);
|
|
daemon_quit = 0;
|
|
}
|
|
continue;
|
|
}
|
|
if (rv < 0) {
|
|
log_error("poll errno %d", errno);
|
|
break;
|
|
}
|
|
|
|
for (i = 0; i <= pollfd_maxi; i++) {
|
|
if (pollfd[i].fd < 0)
|
|
continue;
|
|
|
|
is_recv = 0;
|
|
is_dead = 0;
|
|
|
|
if (pollfd[i].revents & POLLIN)
|
|
is_recv = 1;
|
|
if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL))
|
|
is_dead = 1;
|
|
|
|
if (!is_recv && !is_dead)
|
|
continue;
|
|
|
|
if (i == listen_pi) {
|
|
process_listener(pollfd[i].fd);
|
|
continue;
|
|
}
|
|
|
|
if (i == restart_pi) {
|
|
process_restart(pollfd[i].fd);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
log_debug("poll pi %d fd %d revents %x",
|
|
i, pollfd[i].fd, pollfd[i].revents);
|
|
*/
|
|
|
|
pthread_mutex_lock(&client_mutex);
|
|
cl = find_client_pi(i);
|
|
if (cl) {
|
|
pthread_mutex_lock(&cl->mutex);
|
|
|
|
if (cl->recv) {
|
|
/* should not happen */
|
|
log_error("main client %u already recv", cl->id);
|
|
|
|
} else if (cl->dead) {
|
|
/* should not happen */
|
|
log_error("main client %u already dead", cl->id);
|
|
|
|
} else if (is_dead) {
|
|
log_debug("close %s[%d] cl %u fd %d",
|
|
cl->name[0] ? cl->name : "client",
|
|
cl->pid, cl->id, cl->fd);
|
|
cl->dead = 1;
|
|
cl->pi = -1;
|
|
cl->fd = -1;
|
|
cl->poll_ignore = 0;
|
|
if (close(pollfd[i].fd))
|
|
log_error("close fd %d failed", pollfd[i].fd);
|
|
pollfd[i].fd = POLL_FD_UNUSED;
|
|
pollfd[i].events = 0;
|
|
pollfd[i].revents = 0;
|
|
|
|
} else if (is_recv) {
|
|
cl->recv = 1;
|
|
cl->poll_ignore = 1;
|
|
pollfd[i].fd = POLL_FD_IGNORE;
|
|
pollfd[i].events = 0;
|
|
pollfd[i].revents = 0;
|
|
}
|
|
|
|
pthread_mutex_unlock(&cl->mutex);
|
|
|
|
client_work = 1;
|
|
pthread_cond_signal(&client_cond);
|
|
|
|
/* client_thread will pick up and work on any
|
|
client with cl->recv or cl->dead set */
|
|
|
|
} else {
|
|
/* don't think this can happen */
|
|
log_error("no client for index %d fd %d",
|
|
i, pollfd[i].fd);
|
|
if (close(pollfd[i].fd))
|
|
log_error("close fd %d failed", pollfd[i].fd);
|
|
pollfd[i].fd = POLL_FD_UNUSED;
|
|
pollfd[i].events = 0;
|
|
pollfd[i].revents = 0;
|
|
}
|
|
pthread_mutex_unlock(&client_mutex);
|
|
|
|
/* After set_dead, should we scan pollfd for
|
|
last unused slot and reduce pollfd_maxi? */
|
|
}
|
|
}
|
|
|
|
for_each_lockspace_retry(DO_STOP, DO_FREE, DO_FORCE);
|
|
close_worker_thread();
|
|
close_client_thread();
|
|
closelog();
|
|
return 1; /* libdaemon uses 1 for success */
|
|
}
|
|
|
|
static void usage(char *prog, FILE *file)
|
|
{
|
|
fprintf(file, "Usage:\n");
|
|
fprintf(file, "%s [options]\n\n", prog);
|
|
fprintf(file, " --help | -h\n");
|
|
fprintf(file, " Show this help information.\n");
|
|
fprintf(file, " --version | -V\n");
|
|
fprintf(file, " Show version of lvmlockd.\n");
|
|
fprintf(file, " --test | -T\n");
|
|
fprintf(file, " Test mode, do not call lock manager.\n");
|
|
fprintf(file, " --foreground | -f\n");
|
|
fprintf(file, " Don't fork.\n");
|
|
fprintf(file, " --daemon-debug | -D\n");
|
|
fprintf(file, " Don't fork and print debugging to stdout.\n");
|
|
fprintf(file, " --pid-file | -p <path>\n");
|
|
fprintf(file, " Set path to the pid file. [%s]\n", LVMLOCKD_PIDFILE);
|
|
fprintf(file, " --socket-path | -s <path>\n");
|
|
fprintf(file, " Set path to the socket to listen on. [%s]\n", LVMLOCKD_SOCKET);
|
|
fprintf(file, " --adopt-file <path>\n");
|
|
fprintf(file, " Set path to the adopt file. [%s]\n", LVMLOCKD_ADOPT_FILE);
|
|
fprintf(file, " --syslog-priority | -S err|warning|debug\n");
|
|
fprintf(file, " Write log messages from this level up to syslog. [%s]\n", _syslog_num_to_name(LOG_SYSLOG_PRIO));
|
|
fprintf(file, " --gl-type | -g <str>\n");
|
|
fprintf(file, " Set global lock type to be dlm|sanlock.\n");
|
|
fprintf(file, " --host-id | -i <num>\n");
|
|
fprintf(file, " Set the local sanlock host id.\n");
|
|
fprintf(file, " --host-id-file | -F <path>\n");
|
|
fprintf(file, " A file containing the local sanlock host_id.\n");
|
|
fprintf(file, " --sanlock-timeout | -o <seconds>\n");
|
|
fprintf(file, " Set the sanlock lockspace I/O timeout.\n");
|
|
fprintf(file, " --adopt | -A 0|1\n");
|
|
fprintf(file, " Adopt locks from a previous instance of lvmlockd.\n");
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
daemon_state ds = {
|
|
.name = "lvmlockd",
|
|
.pidfile = getenv("LVM_LVMLOCKD_PIDFILE"),
|
|
.socket_path = getenv("LVM_LVMLOCKD_SOCKET"),
|
|
.protocol = lvmlockd_protocol,
|
|
.protocol_version = lvmlockd_protocol_version,
|
|
.daemon_init = NULL,
|
|
.daemon_fini = NULL,
|
|
.daemon_main = main_loop,
|
|
};
|
|
|
|
static const struct option long_options[] = {
|
|
{"help", no_argument, 0, 'h' },
|
|
{"version", no_argument, 0, 'V' },
|
|
{"test", no_argument, 0, 'T' },
|
|
{"foreground", no_argument, 0, 'f' },
|
|
{"daemon-debug", no_argument, 0, 'D' },
|
|
{"pid-file", required_argument, 0, 'p' },
|
|
{"socket-path", required_argument, 0, 's' },
|
|
{"adopt-file", required_argument, 0, 128 },
|
|
{"gl-type", required_argument, 0, 'g' },
|
|
{"host-id", required_argument, 0, 'i' },
|
|
{"host-id-file", required_argument, 0, 'F' },
|
|
{"adopt", required_argument, 0, 'A' },
|
|
{"syslog-priority", required_argument, 0, 'S' },
|
|
{"sanlock-timeout", required_argument, 0, 'o' },
|
|
{0, 0, 0, 0 }
|
|
};
|
|
|
|
daemon_host_id_file = NULL;
|
|
|
|
while (1) {
|
|
int c;
|
|
int lm;
|
|
int option_index = 0;
|
|
|
|
c = getopt_long(argc, argv, "hVTfDp:s:l:g:S:I:A:o:",
|
|
long_options, &option_index);
|
|
if (c == -1)
|
|
break;
|
|
|
|
switch (c) {
|
|
case '0':
|
|
break;
|
|
case 128:
|
|
free((void *) adopt_file);
|
|
adopt_file = strdup(optarg);
|
|
break;
|
|
case 'h':
|
|
usage(argv[0], stdout);
|
|
exit(EXIT_SUCCESS);
|
|
case 'V':
|
|
printf("lvmlockd version: " LVM_VERSION "\n");
|
|
exit(EXIT_SUCCESS);
|
|
case 'T':
|
|
daemon_test = 1;
|
|
break;
|
|
case 'f':
|
|
ds.foreground = 1;
|
|
break;
|
|
case 'D':
|
|
ds.foreground = 1;
|
|
daemon_debug = 1;
|
|
break;
|
|
case 'p':
|
|
free((void*)ds.pidfile);
|
|
ds.pidfile = strdup(optarg);
|
|
break;
|
|
case 's':
|
|
free((void*)ds.socket_path);
|
|
ds.socket_path = strdup(optarg);
|
|
break;
|
|
case 'g':
|
|
lm = str_to_lm(optarg);
|
|
if (lm == LD_LM_DLM && lm_support_dlm())
|
|
gl_use_dlm = 1;
|
|
else if (lm == LD_LM_SANLOCK && lm_support_sanlock())
|
|
gl_use_sanlock = 1;
|
|
else if (lm == LD_LM_IDM && lm_support_idm())
|
|
gl_use_idm = 1;
|
|
else {
|
|
fprintf(stderr, "invalid gl-type option\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
break;
|
|
case 'i':
|
|
daemon_host_id = atoi(optarg);
|
|
break;
|
|
case 'F':
|
|
free((void*)daemon_host_id_file);
|
|
daemon_host_id_file = strdup(optarg);
|
|
break;
|
|
case 'o':
|
|
sanlock_io_timeout = atoi(optarg);
|
|
break;
|
|
case 'A':
|
|
adopt_opt = atoi(optarg);
|
|
break;
|
|
case 'S':
|
|
syslog_priority = _syslog_name_to_num(optarg);
|
|
break;
|
|
case '?':
|
|
default:
|
|
usage(argv[0], stdout);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
if (!ds.pidfile)
|
|
ds.pidfile = LVMLOCKD_PIDFILE;
|
|
|
|
if (!ds.socket_path)
|
|
ds.socket_path = LVMLOCKD_SOCKET;
|
|
|
|
if (!adopt_file)
|
|
adopt_file = LVMLOCKD_ADOPT_FILE;
|
|
|
|
/* runs daemon_main/main_loop */
|
|
daemon_start(ds);
|
|
|
|
return 0;
|
|
}
|