mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-09 01:18:39 +03:00
cf9927697b
Today, we use "suppress_messages" flag (set internally in init_locking fn based on 'ignorelockingfailure() && getenv("LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES")'. This way, we can suppress high level messages like "File-based locking initialisation failed" or "Internal cluster locking initialisation failed". However, each locking has its own sequence of initialization steps and these could log some errors as well. It's quite misleading for the user to see such errors and warnings if the "--sysinit" is used (and so the ignorelockingfailure && LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES environment variable). Errors and warnings from these intermediary steps should be suppressed as well if requested. This patch propagates the "suppress_messages" flag deeper into locking init functions. I've also added these flags for other locking types for consistency, though it's not actually used for no_locking and readonly_locking.
622 lines
15 KiB
C
622 lines
15 KiB
C
/*
|
|
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
|
|
* Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This file is part of LVM2.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
/*
|
|
* Locking functions for LVM.
|
|
* The main purpose of this part of the library is to serialise LVM
|
|
* management operations across a cluster.
|
|
*/
|
|
|
|
#include "lib.h"
|
|
#include "clvm.h"
|
|
#include "lvm-string.h"
|
|
#include "locking.h"
|
|
#include "locking_types.h"
|
|
#include "toolcontext.h"
|
|
|
|
#include <assert.h>
|
|
#include <stddef.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/un.h>
|
|
#include <unistd.h>
|
|
|
|
#ifndef CLUSTER_LOCKING_INTERNAL
|
|
int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags);
|
|
int query_resource(const char *resource, int *mode);
|
|
void locking_end(void);
|
|
int locking_init(int type, struct config_tree *cf, uint32_t *flags);
|
|
#endif
|
|
|
|
typedef struct lvm_response {
|
|
char node[255];
|
|
char *response;
|
|
int status;
|
|
int len;
|
|
} lvm_response_t;
|
|
|
|
/*
|
|
* This gets stuck at the start of memory we allocate so we
|
|
* can sanity-check it at deallocation time
|
|
*/
|
|
#define LVM_SIGNATURE 0x434C564D
|
|
|
|
/*
|
|
* NOTE: the LVMD uses the socket FD as the client ID, this means
|
|
* that any client that calls fork() will inherit the context of
|
|
* it's parent.
|
|
*/
|
|
static int _clvmd_sock = -1;
|
|
|
|
/* FIXME Install SIGPIPE handler? */
|
|
|
|
/* Open connection to the Cluster Manager daemon */
|
|
static int _open_local_sock(int suppress_messages)
|
|
{
|
|
int local_socket;
|
|
struct sockaddr_un sockaddr;
|
|
|
|
/* Open local socket */
|
|
if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
|
|
log_error_suppress(suppress_messages, "Local socket "
|
|
"creation failed: %s", strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
memset(&sockaddr, 0, sizeof(sockaddr));
|
|
memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
|
|
|
|
sockaddr.sun_family = AF_UNIX;
|
|
|
|
if (connect(local_socket,(struct sockaddr *) &sockaddr,
|
|
sizeof(sockaddr))) {
|
|
int saved_errno = errno;
|
|
|
|
log_error_suppress(suppress_messages, "connect() failed "
|
|
"on local socket: %s", strerror(errno));
|
|
if (close(local_socket))
|
|
stack;
|
|
|
|
errno = saved_errno;
|
|
return -1;
|
|
}
|
|
|
|
return local_socket;
|
|
}
|
|
|
|
/* Send a request and return the status */
|
|
static int _send_request(char *inbuf, int inlen, char **retbuf)
|
|
{
|
|
char outbuf[PIPE_BUF] __attribute__((aligned(8)));
|
|
struct clvm_header *outheader = (struct clvm_header *) outbuf;
|
|
int len;
|
|
unsigned off;
|
|
int buflen;
|
|
int err;
|
|
|
|
/* Send it to CLVMD */
|
|
rewrite:
|
|
if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
|
|
if (err == -1 && errno == EINTR)
|
|
goto rewrite;
|
|
log_error("Error writing data to clvmd: %s", strerror(errno));
|
|
return 0;
|
|
}
|
|
|
|
/* Get the response */
|
|
reread:
|
|
if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
|
|
if (errno == EINTR)
|
|
goto reread;
|
|
log_error("Error reading data from clvmd: %s", strerror(errno));
|
|
return 0;
|
|
}
|
|
|
|
if (len == 0) {
|
|
log_error("EOF reading CLVMD");
|
|
errno = ENOTCONN;
|
|
return 0;
|
|
}
|
|
|
|
/* Allocate buffer */
|
|
buflen = len + outheader->arglen;
|
|
*retbuf = dm_malloc(buflen);
|
|
if (!*retbuf) {
|
|
errno = ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
/* Copy the header */
|
|
memcpy(*retbuf, outbuf, len);
|
|
outheader = (struct clvm_header *) *retbuf;
|
|
|
|
/* Read the returned values */
|
|
off = 1; /* we've already read the first byte */
|
|
while (off <= outheader->arglen && len > 0) {
|
|
len = read(_clvmd_sock, outheader->args + off,
|
|
buflen - off - offsetof(struct clvm_header, args));
|
|
if (len > 0)
|
|
off += len;
|
|
}
|
|
|
|
/* Was it an error ? */
|
|
if (outheader->status != 0) {
|
|
errno = outheader->status;
|
|
|
|
/* Only return an error here if there are no node-specific
|
|
errors present in the message that might have more detail */
|
|
if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) {
|
|
log_error("cluster request failed: %s", strerror(errno));
|
|
return 0;
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* Build the structure header and parse-out wildcard node names */
|
|
/* FIXME: Cleanup implicit casts of clvmd_cmd (int, char, uint8_t, etc). */
|
|
static void _build_header(struct clvm_header *head, int clvmd_cmd, const char *node,
|
|
int len)
|
|
{
|
|
head->cmd = clvmd_cmd;
|
|
head->status = 0;
|
|
head->flags = 0;
|
|
head->xid = 0;
|
|
head->clientid = 0;
|
|
head->arglen = len;
|
|
|
|
if (node) {
|
|
/*
|
|
* Allow a couple of special node names:
|
|
* "*" for all nodes,
|
|
* "." for the local node only
|
|
*/
|
|
if (strcmp(node, "*") == 0) {
|
|
head->node[0] = '\0';
|
|
} else if (strcmp(node, ".") == 0) {
|
|
head->node[0] = '\0';
|
|
head->flags = CLVMD_FLAG_LOCAL;
|
|
} else
|
|
strcpy(head->node, node);
|
|
} else
|
|
head->node[0] = '\0';
|
|
}
|
|
|
|
/*
|
|
* Send a message to a(or all) node(s) in the cluster and wait for replies
|
|
*/
|
|
static int _cluster_request(char clvmd_cmd, const char *node, void *data, int len,
|
|
lvm_response_t ** response, int *num)
|
|
{
|
|
char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute__((aligned(8)));
|
|
char *inptr;
|
|
char *retbuf = NULL;
|
|
int status;
|
|
int i;
|
|
int num_responses = 0;
|
|
struct clvm_header *head = (struct clvm_header *) outbuf;
|
|
lvm_response_t *rarray;
|
|
|
|
*num = 0;
|
|
|
|
if (_clvmd_sock == -1)
|
|
_clvmd_sock = _open_local_sock(0);
|
|
|
|
if (_clvmd_sock == -1)
|
|
return 0;
|
|
|
|
/* 1 byte is used from struct clvm_header.args[1], so -> len - 1 */
|
|
_build_header(head, clvmd_cmd, node, len - 1);
|
|
memcpy(head->node + strlen(head->node) + 1, data, len);
|
|
|
|
status = _send_request(outbuf, sizeof(struct clvm_header) +
|
|
strlen(head->node) + len - 1, &retbuf);
|
|
if (!status)
|
|
goto out;
|
|
|
|
/* Count the number of responses we got */
|
|
head = (struct clvm_header *) retbuf;
|
|
inptr = head->args;
|
|
while (inptr[0]) {
|
|
num_responses++;
|
|
inptr += strlen(inptr) + 1;
|
|
inptr += sizeof(int);
|
|
inptr += strlen(inptr) + 1;
|
|
}
|
|
|
|
/*
|
|
* Allocate response array.
|
|
* With an extra pair of INTs on the front to sanity
|
|
* check the pointer when we are given it back to free
|
|
*/
|
|
*response = dm_malloc(sizeof(lvm_response_t) * num_responses);
|
|
if (!*response) {
|
|
errno = ENOMEM;
|
|
status = 0;
|
|
goto out;
|
|
}
|
|
|
|
rarray = *response;
|
|
|
|
/* Unpack the response into an lvm_response_t array */
|
|
inptr = head->args;
|
|
i = 0;
|
|
while (inptr[0]) {
|
|
strcpy(rarray[i].node, inptr);
|
|
inptr += strlen(inptr) + 1;
|
|
|
|
memcpy(&rarray[i].status, inptr, sizeof(int));
|
|
inptr += sizeof(int);
|
|
|
|
rarray[i].response = dm_malloc(strlen(inptr) + 1);
|
|
if (rarray[i].response == NULL) {
|
|
/* Free up everything else and return error */
|
|
int j;
|
|
for (j = 0; j < i; j++)
|
|
dm_free(rarray[i].response);
|
|
free(*response);
|
|
errno = ENOMEM;
|
|
status = -1;
|
|
goto out;
|
|
}
|
|
|
|
strcpy(rarray[i].response, inptr);
|
|
rarray[i].len = strlen(inptr);
|
|
inptr += strlen(inptr) + 1;
|
|
i++;
|
|
}
|
|
*num = num_responses;
|
|
*response = rarray;
|
|
|
|
out:
|
|
dm_free(retbuf);
|
|
|
|
return status;
|
|
}
|
|
|
|
/* Free reply array */
|
|
static int _cluster_free_request(lvm_response_t * response, int num)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < num; i++) {
|
|
dm_free(response[i].response);
|
|
}
|
|
|
|
dm_free(response);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _lock_for_cluster(struct cmd_context *cmd, unsigned char clvmd_cmd,
|
|
uint32_t flags, const char *name)
|
|
{
|
|
int status;
|
|
int i;
|
|
char *args;
|
|
const char *node = "";
|
|
int len;
|
|
int dmeventd_mode;
|
|
int saved_errno;
|
|
lvm_response_t *response = NULL;
|
|
int num_responses;
|
|
|
|
assert(name);
|
|
|
|
len = strlen(name) + 3;
|
|
args = alloca(len);
|
|
strcpy(args + 2, name);
|
|
|
|
/* Mask off lock flags */
|
|
args[0] = flags & (LCK_SCOPE_MASK | LCK_TYPE_MASK | LCK_NONBLOCK | LCK_HOLD);
|
|
args[1] = flags & (LCK_LOCAL | LCK_CLUSTER_VG);
|
|
|
|
if (flags & LCK_ORIGIN_ONLY)
|
|
args[1] |= LCK_ORIGIN_ONLY_MODE;
|
|
|
|
if (mirror_in_sync())
|
|
args[1] |= LCK_MIRROR_NOSYNC_MODE;
|
|
|
|
if (test_mode())
|
|
args[1] |= LCK_TEST_MODE;
|
|
|
|
/*
|
|
* Must handle tri-state return from dmeventd_monitor_mode.
|
|
* But DMEVENTD_MONITOR_IGNORE is not propagated across the cluster.
|
|
*/
|
|
dmeventd_mode = dmeventd_monitor_mode();
|
|
if (dmeventd_mode != DMEVENTD_MONITOR_IGNORE && dmeventd_mode)
|
|
args[1] |= LCK_DMEVENTD_MONITOR_MODE;
|
|
|
|
if (cmd->partial_activation)
|
|
args[1] |= LCK_PARTIAL_MODE;
|
|
|
|
/*
|
|
* VG locks are just that: locks, and have no side effects
|
|
* so we only need to do them on the local node because all
|
|
* locks are cluster-wide.
|
|
*
|
|
* Also, if the lock is exclusive it makes no sense to try to
|
|
* acquire it on all nodes, so just do that on the local node too.
|
|
*
|
|
* P_ locks /do/ get distributed across the cluster because they might
|
|
* have side-effects.
|
|
*
|
|
* SYNC_NAMES and VG_BACKUP use the VG name directly without prefix.
|
|
*/
|
|
if (clvmd_cmd == CLVMD_CMD_SYNC_NAMES) {
|
|
if (flags & LCK_LOCAL)
|
|
node = ".";
|
|
} else if (clvmd_cmd != CLVMD_CMD_VG_BACKUP) {
|
|
if (strncmp(name, "P_", 2) &&
|
|
(clvmd_cmd == CLVMD_CMD_LOCK_VG ||
|
|
(flags & LCK_TYPE_MASK) == LCK_EXCL ||
|
|
(flags & LCK_LOCAL) ||
|
|
!(flags & LCK_CLUSTER_VG)))
|
|
node = ".";
|
|
}
|
|
|
|
status = _cluster_request(clvmd_cmd, node, args, len,
|
|
&response, &num_responses);
|
|
|
|
/* If any nodes were down then display them and return an error */
|
|
for (i = 0; i < num_responses; i++) {
|
|
if (response[i].status == EHOSTDOWN) {
|
|
log_error("clvmd not running on node %s",
|
|
response[i].node);
|
|
status = 0;
|
|
errno = response[i].status;
|
|
} else if (response[i].status) {
|
|
log_error("Error locking on node %s: %s",
|
|
response[i].node,
|
|
response[i].response[0] ?
|
|
response[i].response :
|
|
strerror(response[i].status));
|
|
status = 0;
|
|
errno = response[i].status;
|
|
}
|
|
}
|
|
|
|
saved_errno = errno;
|
|
_cluster_free_request(response, num_responses);
|
|
errno = saved_errno;
|
|
|
|
return status;
|
|
}
|
|
|
|
/* API entry point for LVM */
|
|
#ifdef CLUSTER_LOCKING_INTERNAL
|
|
static int _lock_resource(struct cmd_context *cmd, const char *resource,
|
|
uint32_t flags)
|
|
#else
|
|
int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags)
|
|
#endif
|
|
{
|
|
char lockname[PATH_MAX];
|
|
int clvmd_cmd = 0;
|
|
const char *lock_scope;
|
|
const char *lock_type = "";
|
|
|
|
assert(strlen(resource) < sizeof(lockname));
|
|
assert(resource);
|
|
|
|
switch (flags & LCK_SCOPE_MASK) {
|
|
case LCK_VG:
|
|
if (!strcmp(resource, VG_SYNC_NAMES)) {
|
|
log_very_verbose("Requesting sync names.");
|
|
return _lock_for_cluster(cmd, CLVMD_CMD_SYNC_NAMES,
|
|
flags & ~LCK_HOLD, resource);
|
|
}
|
|
if (flags == LCK_VG_BACKUP) {
|
|
log_very_verbose("Requesting backup of VG metadata for %s",
|
|
resource);
|
|
return _lock_for_cluster(cmd, CLVMD_CMD_VG_BACKUP,
|
|
LCK_CLUSTER_VG, resource);
|
|
}
|
|
|
|
/* If the VG name is empty then lock the unused PVs */
|
|
if (dm_snprintf(lockname, sizeof(lockname), "%c_%s",
|
|
(is_orphan_vg(resource) ||
|
|
is_global_vg(resource) ||
|
|
(flags & LCK_CACHE)) ? 'P' : 'V',
|
|
resource) < 0) {
|
|
log_error("Locking resource %s too long.", resource);
|
|
return 0;
|
|
}
|
|
|
|
lock_scope = "VG";
|
|
clvmd_cmd = CLVMD_CMD_LOCK_VG;
|
|
/*
|
|
* Old clvmd does not expect LCK_HOLD which was already processed
|
|
* in lock_vol, mask it for compatibility reasons.
|
|
*/
|
|
if (flags != LCK_VG_COMMIT && flags != LCK_VG_REVERT)
|
|
flags &= ~LCK_HOLD;
|
|
|
|
break;
|
|
|
|
case LCK_LV:
|
|
clvmd_cmd = CLVMD_CMD_LOCK_LV;
|
|
strcpy(lockname, resource);
|
|
lock_scope = "LV";
|
|
flags &= ~LCK_HOLD; /* Mask off HOLD flag */
|
|
break;
|
|
|
|
default:
|
|
log_error("Unrecognised lock scope: %d",
|
|
flags & LCK_SCOPE_MASK);
|
|
return 0;
|
|
}
|
|
|
|
switch(flags & LCK_TYPE_MASK) {
|
|
case LCK_UNLOCK:
|
|
lock_type = "UN";
|
|
break;
|
|
case LCK_NULL:
|
|
lock_type = "NL";
|
|
break;
|
|
case LCK_READ:
|
|
lock_type = "CR";
|
|
break;
|
|
case LCK_PREAD:
|
|
lock_type = "PR";
|
|
break;
|
|
case LCK_WRITE:
|
|
lock_type = "PW";
|
|
break;
|
|
case LCK_EXCL:
|
|
lock_type = "EX";
|
|
break;
|
|
default:
|
|
log_error("Unrecognised lock type: %u",
|
|
flags & LCK_TYPE_MASK);
|
|
return 0;
|
|
}
|
|
|
|
log_very_verbose("Locking %s %s %s (%s%s%s%s%s%s%s) (0x%x)", lock_scope, lockname,
|
|
lock_type, lock_scope,
|
|
flags & LCK_NONBLOCK ? "|NONBLOCK" : "",
|
|
flags & LCK_HOLD ? "|HOLD" : "",
|
|
flags & LCK_LOCAL ? "|LOCAL" : "",
|
|
flags & LCK_CLUSTER_VG ? "|CLUSTER" : "",
|
|
flags & LCK_CACHE ? "|CACHE" : "",
|
|
flags & LCK_ORIGIN_ONLY ? "|ORIGIN_ONLY" : "",
|
|
flags);
|
|
|
|
/* Send a message to the cluster manager */
|
|
return _lock_for_cluster(cmd, clvmd_cmd, flags, lockname);
|
|
}
|
|
|
|
static int decode_lock_type(const char *response)
|
|
{
|
|
if (!response)
|
|
return LCK_NULL;
|
|
else if (!strcmp(response, "EX"))
|
|
return LCK_EXCL;
|
|
else if (!strcmp(response, "CR"))
|
|
return LCK_READ;
|
|
else if (!strcmp(response, "PR"))
|
|
return LCK_PREAD;
|
|
|
|
stack;
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CLUSTER_LOCKING_INTERNAL
|
|
static int _query_resource(const char *resource, int *mode)
|
|
#else
|
|
int query_resource(const char *resource, int *mode)
|
|
#endif
|
|
{
|
|
int i, status, len, num_responses, saved_errno;
|
|
const char *node = "";
|
|
char *args;
|
|
lvm_response_t *response = NULL;
|
|
|
|
saved_errno = errno;
|
|
len = strlen(resource) + 3;
|
|
args = alloca(len);
|
|
strcpy(args + 2, resource);
|
|
|
|
args[0] = 0;
|
|
args[1] = LCK_CLUSTER_VG;
|
|
|
|
status = _cluster_request(CLVMD_CMD_LOCK_QUERY, node, args, len,
|
|
&response, &num_responses);
|
|
*mode = LCK_NULL;
|
|
for (i = 0; i < num_responses; i++) {
|
|
if (response[i].status == EHOSTDOWN)
|
|
continue;
|
|
|
|
if (!response[i].response[0])
|
|
continue;
|
|
|
|
/*
|
|
* All nodes should use CR, or exactly one node
|
|
* should hold EX. (PR is obsolete)
|
|
* If two nodes report different locks,
|
|
* something is broken - just return more important mode.
|
|
*/
|
|
if (decode_lock_type(response[i].response) > *mode)
|
|
*mode = decode_lock_type(response[i].response);
|
|
|
|
log_debug("Lock held for %s, node %s : %s", resource,
|
|
response[i].node, response[i].response);
|
|
}
|
|
|
|
_cluster_free_request(response, num_responses);
|
|
errno = saved_errno;
|
|
|
|
return status;
|
|
}
|
|
|
|
#ifdef CLUSTER_LOCKING_INTERNAL
|
|
static void _locking_end(void)
|
|
#else
|
|
void locking_end(void)
|
|
#endif
|
|
{
|
|
if (_clvmd_sock != -1 && close(_clvmd_sock))
|
|
stack;
|
|
|
|
_clvmd_sock = -1;
|
|
}
|
|
|
|
#ifdef CLUSTER_LOCKING_INTERNAL
|
|
static void _reset_locking(void)
|
|
#else
|
|
void reset_locking(void)
|
|
#endif
|
|
{
|
|
if (close(_clvmd_sock))
|
|
stack;
|
|
|
|
_clvmd_sock = _open_local_sock(0);
|
|
if (_clvmd_sock == -1)
|
|
stack;
|
|
}
|
|
|
|
#ifdef CLUSTER_LOCKING_INTERNAL
|
|
int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd,
|
|
int suppress_messages)
|
|
{
|
|
locking->lock_resource = _lock_resource;
|
|
locking->query_resource = _query_resource;
|
|
locking->fin_locking = _locking_end;
|
|
locking->reset_locking = _reset_locking;
|
|
locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED;
|
|
|
|
_clvmd_sock = _open_local_sock(suppress_messages);
|
|
if (_clvmd_sock == -1)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
#else
|
|
int locking_init(int type, struct config_tree *cf, uint32_t *flags)
|
|
{
|
|
_clvmd_sock = _open_local_sock(0);
|
|
if (_clvmd_sock == -1)
|
|
return 0;
|
|
|
|
/* Ask LVM to lock memory before calling us */
|
|
*flags |= LCK_PRE_MEMLOCK;
|
|
*flags |= LCK_CLUSTERED;
|
|
|
|
return 1;
|
|
}
|
|
#endif
|