1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-03-25 18:50:51 +03:00

Add cluster support.

This commit is contained in:
Alasdair Kergon 2004-06-24 08:02:38 +00:00
parent 244a32b3d5
commit d3c8211fef
32 changed files with 6534 additions and 9 deletions

@ -22,11 +22,13 @@ ifeq ("@INTL@", "yes")
SUBDIRS += po
endif
SUBDIRS += lib tools
SUBDIRS += lib tools daemons
ifeq ($(MAKECMDGOALS),distclean)
SUBDIRS += lib/format1 \
SUBDIRS += daemons/clvmd \
lib/format1 \
lib/format_pool \
lib/locking \
lib/mirror \
lib/snapshot \
po \
@ -35,14 +37,16 @@ endif
include make.tmpl
daemons: lib
lib: include
tools: lib
po: lib tools
po: tools daemons
ifeq ("@INTL@", "yes")
lib.pofile: include.pofile
tools.pofile: lib.pofile
po.pofile: lib.pofile tools.pofile
daemons.pofile: lib.pofile
po.pofile: tools.pofile daemons.pofile
pofile: po.pofile
endif

@ -1 +1 @@
2.00.17-cvs (2004-06-20)
2.00.18-cvs (2004-06-24)

@ -1,3 +1,7 @@
Version 2.00.18 - 24 June 2004
==============================
Add cluster support.
Version 2.00.17 - 20 June 2004
==============================
configure --enable-fsadm to try out fsadm. fsadm is not tested yet.

51
configure vendored

@ -309,7 +309,7 @@ ac_includes_default="\
#endif"
ac_default_prefix=/usr
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL FSADM LIBOBJS LTLIBOBJS'
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL CLVMD CLUSTER FSADM LIBOBJS LTLIBOBJS'
ac_subst_files=''
# Initialize some variables set by options.
@ -867,10 +867,13 @@ Optional Packages:
TYPE=internal
--with-pool=TYPE GFS pool read-only support: internal/shared/none
TYPE=internal
--with-cluster=TYPE Cluster LVM locking support: internal/shared/none
TYPE=internal
--with-snapshots=TYPE Snapshot support: internal/shared/none
TYPE=internal
--with-mirrors=TYPE Mirror support: internal/shared/none
TYPE=internal
--with-clvmd Build cluster LVM Daemon
--with-localedir=DIR Translation files in DIR PREFIX/share/locale
--with-confdir=DIR Configuration files in DIR /etc
--with-staticdir=DIR Static binary in DIR EXEC_PREFIX/sbin
@ -3900,6 +3903,7 @@ case "$host_os" in
SOFLAG="-shared"
DEVMAPPER=yes
ODIRECT=yes
CLUSTER=internal
FSADM=no ;;
darwin*)
CFLAGS="-no-cpp-precomp -fno-common"
@ -3911,6 +3915,7 @@ case "$host_os" in
SOFLAG="-dynamiclib"
DEVMAPPER=no
ODIRECT=no
CLUSTER=none
FSADM=no ;;
esac
@ -3998,6 +4003,25 @@ if test x$POOL = xinternal; then
fi
# Check whether --with-cluster or --without-cluster was given.
if test "${with_cluster+set}" = set; then
withval="$with_cluster"
CLUSTER="$withval"
fi;
if [ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ];
then { { echo "$as_me:$LINENO: error: --with-cluster parameter invalid
" >&5
echo "$as_me: error: --with-cluster parameter invalid
" >&2;}
{ (exit 1); exit 1; }; }
exit
fi;
if test x$CLUSTER = xinternal; then
CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
fi
# Check whether --enable-jobs or --disable-jobs was given.
if test "${enable_jobs+set}" = set; then
enableval="$enable_jobs"
@ -4071,6 +4095,20 @@ if test x$READLINE = xyes; then
CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
fi
# Check whether --with-clvmd or --without-clvmd was given.
if test "${with_clvmd+set}" = set; then
withval="$with_clvmd"
\
CLVMD=$withval
else
CLVMD=no
fi;
if test x$CLVMD = xyes && test x$CLUSTER = xnone; then
CLUSTER=internal
fi
echo "$ac_t""$CLVMD" 1>&6
echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
# Check whether --enable-debug or --disable-debug was given.
if test "${enable_debug+set}" = set; then
@ -4698,7 +4736,7 @@ else
HAVE_LIBDL=no
fi
if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
"x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
\) -a "x$STATIC_LINK" = xyes ];
then { { echo "$as_me:$LINENO: error: Features cannot be 'shared' when building statically
@ -5207,7 +5245,9 @@ fi
ac_config_files="$ac_config_files Makefile make.tmpl doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
# tests run on this system so they can be shared between configure
@ -5760,11 +5800,14 @@ do
# Handling of arguments.
"Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"make.tmpl" ) CONFIG_FILES="$CONFIG_FILES make.tmpl" ;;
"daemons/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/Makefile" ;;
"daemons/clvmd/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/clvmd/Makefile" ;;
"doc/Makefile" ) CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
"include/Makefile" ) CONFIG_FILES="$CONFIG_FILES include/Makefile" ;;
"lib/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/Makefile" ;;
"lib/format1/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format1/Makefile" ;;
"lib/format_pool/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format_pool/Makefile" ;;
"lib/locking/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/locking/Makefile" ;;
"lib/mirror/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/mirror/Makefile" ;;
"lib/snapshot/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/snapshot/Makefile" ;;
"man/Makefile" ) CONFIG_FILES="$CONFIG_FILES man/Makefile" ;;
@ -5916,6 +5959,8 @@ s,@CONFDIR@,$CONFDIR,;t t
s,@STATICDIR@,$STATICDIR,;t t
s,@INTL_PACKAGE@,$INTL_PACKAGE,;t t
s,@INTL@,$INTL,;t t
s,@CLVMD@,$CLVMD,;t t
s,@CLUSTER@,$CLUSTER,;t t
s,@FSADM@,$FSADM,;t t
s,@LIBOBJS@,$LIBOBJS,;t t
s,@LTLIBOBJS@,$LTLIBOBJS,;t t

@ -59,6 +59,7 @@ case "$host_os" in
SOFLAG="-shared"
DEVMAPPER=yes
ODIRECT=yes
CLUSTER=internal
FSADM=no ;;
darwin*)
CFLAGS="-no-cpp-precomp -fno-common"
@ -70,6 +71,7 @@ case "$host_os" in
SOFLAG="-dynamiclib"
DEVMAPPER=no
ODIRECT=no
CLUSTER=none
FSADM=no ;;
esac
@ -141,6 +143,22 @@ if test x$POOL = xinternal; then
CFLAGS="$CFLAGS -DPOOL_INTERNAL"
fi
dnl -- cluster_locking inclusion type
AC_ARG_WITH(cluster,
[ --with-cluster=TYPE Cluster LVM locking support: internal/shared/none
[TYPE=internal] ],
[ CLUSTER="$withval" ])
if [[ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ]];
then AC_MSG_ERROR(
--with-cluster parameter invalid
)
exit
fi;
if test x$CLUSTER = xinternal; then
CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
fi
AC_ARG_ENABLE(jobs, [ --enable-jobs=NUM Number of jobs to run simultaneously], JOBS=-j$enableval, JOBS=-j2)
@ -192,6 +210,15 @@ if test x$READLINE = xyes; then
CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
fi
dnl Build cluster LVM daemon
AC_ARG_WITH(clvmd, [ --with-clvmd Build cluster LVM Daemon], \
CLVMD=$withval, CLVMD=no)
dnl If clvmd enabled and not cluster locking, automgically include the locking.
if test x$CLVMD = xyes && test x$CLUSTER = xnone; then
CLUSTER=internal
fi
echo "$ac_t""$CLVMD" 1>&6
echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
dnl Enable Debugging
AC_ARG_ENABLE(debug, [ --enable-debug Enable debugging], \
@ -272,7 +299,7 @@ else
fi
dnl Check for shared/static conflicts
if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
"x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
\) -a "x$STATIC_LINK" = xyes ]];
then AC_MSG_ERROR(
@ -377,6 +404,8 @@ AC_SUBST(CONFDIR)
AC_SUBST(STATICDIR)
AC_SUBST(INTL_PACKAGE)
AC_SUBST(INTL)
AC_SUBST(CLVMD)
AC_SUBST(CLUSTER)
AC_SUBST(FSADM)
dnl First and last lines should not contain files to generate in order to
@ -384,11 +413,14 @@ dnl keep utility scripts running properly
AC_OUTPUT( \
Makefile \
make.tmpl \
daemons/Makefile \
daemons/clvmd/Makefile \
doc/Makefile \
include/Makefile \
lib/Makefile \
lib/format1/Makefile \
lib/format_pool/Makefile \
lib/locking/Makefile \
lib/mirror/Makefile \
lib/snapshot/Makefile \
man/Makefile \

23
daemons/Makefile.in Normal file

@ -0,0 +1,23 @@
#
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
#
# This file is part of the LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v.2.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
ifeq ("@CLVMD@", "yes")
SUBDIRS = clvmd
endif
include $(top_srcdir)/make.tmpl

47
daemons/clvmd/Makefile.in Normal file

@ -0,0 +1,47 @@
#
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
#
# This file is part of the LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v.2.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
SOURCES = \
clvmd-cman.c \
clvmd-command.c \
clvmd.c \
libclvm.c \
lvm-functions.c \
system-lv.c
TARGETS = \
clvmd
include $(top_srcdir)/make.tmpl
CFLAGS += -D_REENTRANT -fno-strict-aliasing
LIBS += -ldevmapper -ldlm -llvm -lpthread
INSTALL_TARGETS = \
install_clvmd
clvmd: $(OBJECTS) $(top_srcdir)/lib/liblvm.a
$(CC) -o clvmd $(OBJECTS) $(LD_FLAGS) $(LVMLIBS) $(LIBS)
.PHONY: install_clvmd
install_clvmd: $(TARGETS)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) clvmd \
$(sbindir)/clvmd
install: $(INSTALL_TARGETS)

65
daemons/clvmd/clvm.h Normal file

@ -0,0 +1,65 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Definitions for CLVMD server and clients */
/*
* The protocol spoken over the cluster and across the local socket.
*/
#ifndef _CLVM_H
#define _CLVM_H
struct clvm_header {
uint8_t cmd; /* See below */
uint8_t flags; /* See below */
uint16_t xid; /* Transaction ID */
uint32_t clientid; /* Only used in Daemon->Daemon comms */
int32_t status; /* For replies, whether request succeeded */
uint32_t arglen; /* Length of argument below.
If >1500 then it will be passed
around the cluster in the system LV */
char node[1]; /* Actually a NUL-terminated string, node name.
If this is empty then the command is
forwarded to all cluster nodes unless
FLAG_LOCAL is also set. */
char args[1]; /* Arguments for the command follow the
node name, This member is only
valid if the node name is empty */
} __attribute__ ((packed));
/* Flags */
#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */
#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */
/* Name of the local socket to communicate between libclvm and clvmd */
//static const char CLVMD_SOCKNAME[]="/var/run/clvmd";
static const char CLVMD_SOCKNAME[] = "\0clvmd";
/* Internal commands & replies */
#define CLVMD_CMD_REPLY 1
#define CLVMD_CMD_VERSION 2 /* Send version around cluster when we start */
#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running
an incompatible version */
#define CLVMD_CMD_TEST 4 /* Just for mucking about */
#define CLVMD_CMD_LOCK 30
#define CLVMD_CMD_UNLOCK 31
/* Lock/Unlock commands */
#define CLVMD_CMD_LOCK_LV 50
#define CLVMD_CMD_LOCK_VG 51
#endif

499
daemons/clvmd/clvmd-cman.c Normal file

@ -0,0 +1,499 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* CMAN communication layer for clvmd.
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <syslog.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include <getopt.h>
#include <errno.h>
#include "clvmd-comms.h"
#include "clvm.h"
#include "libdlm.h"
#include "log.h"
#include "clvmd.h"
#include "lvm-functions.h"
#define LOCKSPACE_NAME "clvmd"
static int cluster_sock;
static int num_nodes;
static struct cl_cluster_node *nodes = NULL;
static int count_nodes; /* size of allocated nodes array */
static int max_updown_nodes = 50; /* Current size of the allocated array */
/* Node up/down status, indexed by nodeid */
static int *node_updown = NULL;
static dlm_lshandle_t *lockspace;
static void sigusr1_handler(int sig);
static void count_clvmds_running(void);
static void get_members(void);
static int nodeid_from_csid(char *csid);
static int name_from_nodeid(int nodeid, char *name);
struct lock_wait {
pthread_cond_t cond;
pthread_mutex_t mutex;
struct dlm_lksb lksb;
};
int init_cluster()
{
struct sockaddr_cl saddr;
int port = CLUSTER_PORT_CLVMD;
/* Open the cluster communication socket */
cluster_sock = socket(AF_CLUSTER, SOCK_DGRAM, CLPROTO_CLIENT);
if (cluster_sock == -1) {
perror("Can't open cluster socket");
return -1;
}
/* Bind to our port number on the cluster.
Writes to this will block if the cluster loses quorum */
saddr.scl_family = AF_CLUSTER;
saddr.scl_port = port;
if (bind
(cluster_sock, (struct sockaddr *) &saddr,
sizeof(struct sockaddr_cl))) {
log_error("Can't bind cluster socket: %m");
return -1;
}
/* Get the cluster members list */
get_members();
count_clvmds_running();
/* Create a lockspace for LV & VG locks to live in */
lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
if (!lockspace) {
log_error("Unable to create lockspace for CLVM\n");
return -1;
}
dlm_ls_pthread_init(lockspace);
return 0;
}
int get_main_cluster_fd()
{
return cluster_sock;
}
int get_num_nodes()
{
return num_nodes;
}
/* send_message with the fd check removed */
int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext)
{
struct iovec iov[2];
struct msghdr msg;
struct sockaddr_cl saddr;
int len = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_iovlen = 1;
msg.msg_iov = iov;
msg.msg_flags = 0;
iov[0].iov_len = msglen;
iov[0].iov_base = buf;
saddr.scl_family = AF_CLUSTER;
saddr.scl_port = CLUSTER_PORT_CLVMD;
if (csid) {
msg.msg_name = &saddr;
msg.msg_namelen = sizeof(saddr);
memcpy(&saddr.scl_nodeid, csid, MAX_CSID_LEN);
} else { /* Cluster broadcast */
msg.msg_name = NULL;
msg.msg_namelen = 0;
}
do {
len = sendmsg(cluster_sock, &msg, 0);
if (len < 0 && errno != EAGAIN)
log_error(errtext);
} while (len == -1 && errno == EAGAIN);
return len;
}
void get_our_csid(char *csid)
{
int i;
memset(csid, 0, MAX_CSID_LEN);
for (i = 0; i < num_nodes; i++) {
if (nodes[i].us)
memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
}
}
/* Call a callback routine for each node that known (down mean not running a clvmd) */
int cluster_do_node_callback(struct local_client *client,
void (*callback) (struct local_client *, char *,
int))
{
int i;
int somedown = 0;
for (i = 0; i < get_num_nodes(); i++) {
callback(client, (char *)&nodes[i].node_id, node_updown[nodes[i].node_id]);
if (!node_updown[nodes[i].node_id])
somedown = -1;
}
return somedown;
}
/* Process OOB message from the cluster socket,
this currently just means that a node has stopped listening on our port */
static void process_oob_msg(char *buf, int len, int nodeid)
{
char namebuf[256];
switch (buf[0]) {
case CLUSTER_OOB_MSG_PORTCLOSED:
name_from_nodeid(nodeid, namebuf);
log_notice("clvmd on node %s has died\n", namebuf);
DEBUGLOG("Got OOB message, removing node %s\n", namebuf);
node_updown[nodeid] = 0;
break;
case CLUSTER_OOB_MSG_STATECHANGE:
DEBUGLOG("Got OOB message, Cluster state change\n");
get_members();
break;
default:
/* ERROR */
DEBUGLOG("Got unknown OOB message: %d\n", buf[0]);
}
}
int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid,
struct local_client **new_client)
{
struct iovec iov[2];
struct msghdr msg;
struct sockaddr_cl saddr;
/* We never return a new client */
*new_client = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_iovlen = 1;
msg.msg_iov = iov;
msg.msg_name = &saddr;
msg.msg_flags = 0;
msg.msg_namelen = sizeof(saddr);
iov[0].iov_len = len;
iov[0].iov_base = buf;
len = recvmsg(cluster_sock, &msg, MSG_OOB | O_NONBLOCK);
if (len < 0 && errno == EAGAIN)
return len;
DEBUGLOG("Read on cluster socket, len = %d\n", len);
/* A real error */
if (len < 0) {
log_error("read error on cluster socket: %m");
return 0;
}
/* EOF - we have left the cluster */
if (len == 0)
return 0;
/* Is it OOB? probably a node gone down */
if (msg.msg_flags & MSG_OOB) {
process_oob_msg(iov[0].iov_base, len, saddr.scl_nodeid);
/* Tell the upper layer to ignore this message */
len = -1;
errno = EAGAIN;
}
memcpy(csid, &saddr.scl_nodeid, sizeof(saddr.scl_nodeid));
return len;
}
void add_up_node(char *csid)
{
/* It's up ! */
int nodeid = nodeid_from_csid(csid);
if (nodeid >= max_updown_nodes) {
int *new_updown = realloc(node_updown, max_updown_nodes + 10);
if (new_updown) {
node_updown = new_updown;
max_updown_nodes += 10;
DEBUGLOG("realloced more space for nodes. now %d\n",
max_updown_nodes);
} else {
log_error
("Realloc failed. Node status for clvmd will be wrong\n");
return;
}
}
node_updown[nodeid] = 1;
DEBUGLOG("Added new node %d to updown list\n", nodeid);
}
void cluster_closedown()
{
unlock_all();
dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
close(cluster_sock);
}
static int is_listening(int nodeid)
{
struct cl_listen_request rq;
int status;
rq.port = CLUSTER_PORT_CLVMD;
rq.nodeid = nodeid;
do {
status = ioctl(cluster_sock, SIOCCLUSTER_ISLISTENING, &rq);
if (status < 0 && errno == EBUSY) { /* Don't busywait */
sleep(1);
errno = EBUSY; /* In case sleep trashes it */
}
}
while (status < 0 && errno == EBUSY);
return status;
}
/* Populate the list of CLVMDs running.
called only at startup time */
void count_clvmds_running(void)
{
int i;
for (i = 0; i < num_nodes; i++) {
node_updown[nodes[i].node_id] = is_listening(nodes[i].node_id);
}
}
/* Get a list of active cluster members */
static void get_members()
{
struct cl_cluster_nodelist nodelist;
num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, 0);
if (num_nodes == -1) {
perror("get nodes");
} else {
/* Not enough room for new nodes list ? */
if (num_nodes > count_nodes && nodes) {
free(nodes);
nodes = NULL;
}
if (nodes == NULL) {
count_nodes = num_nodes + 10; /* Overallocate a little */
nodes = malloc(count_nodes * sizeof(struct cl_cluster_node));
if (!nodes) {
perror("Unable to allocate nodes array\n");
exit(5);
}
}
nodelist.max_members = count_nodes;
nodelist.nodes = nodes;
num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, &nodelist);
if (num_nodes <= 0) {
perror("get node details");
exit(6);
}
/* Sanity check struct */
if (nodes[0].size != sizeof(struct cl_cluster_node)) {
log_error
("sizeof(cl_cluster_node) does not match size returned from the kernel: aborting\n");
exit(10);
}
if (node_updown == NULL) {
node_updown =
(int *) malloc(sizeof(int) *
max(num_nodes, max_updown_nodes));
memset(node_updown, 0,
sizeof(int) * max(num_nodes, max_updown_nodes));
}
}
}
/* Convert a node name to a CSID */
int csid_from_name(char *csid, char *name)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (strcmp(name, nodes[i].name) == 0) {
memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
return 0;
}
}
return -1;
}
/* Convert a CSID to a node name */
int name_from_csid(char *csid, char *name)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (memcmp(csid, &nodes[i].node_id, MAX_CSID_LEN) == 0) {
strcpy(name, nodes[i].name);
return 0;
}
}
/* Who?? */
strcpy(name, "Unknown");
return -1;
}
/* Convert a node ID to a node name */
int name_from_nodeid(int nodeid, char *name)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (nodeid == nodes[i].node_id) {
strcpy(name, nodes[i].name);
return 0;
}
}
/* Who?? */
strcpy(name, "Unknown");
return -1;
}
/* Convert a CSID to a node ID */
static int nodeid_from_csid(char *csid)
{
int nodeid;
memcpy(&nodeid, csid, MAX_CSID_LEN);
return nodeid;
}
int is_quorate()
{
return ioctl(cluster_sock, SIOCCLUSTER_ISQUORATE, 0);
}
static void sync_ast_routine(void *arg)
{
struct lock_wait *lwait = arg;
pthread_mutex_lock(&lwait->mutex);
pthread_cond_signal(&lwait->cond);
pthread_mutex_unlock(&lwait->mutex);
}
int sync_lock(const char *resource, int mode, int flags, int *lockid)
{
int status;
struct lock_wait lwait;
if (!lockid) {
errno = EINVAL;
return -1;
}
/* Conversions need the lockid in the LKSB */
if (flags & LKF_CONVERT)
lwait.lksb.sb_lkid = *lockid;
pthread_cond_init(&lwait.cond, NULL);
pthread_mutex_init(&lwait.mutex, NULL);
pthread_mutex_lock(&lwait.mutex);
status = dlm_ls_lock(lockspace,
mode,
&lwait.lksb,
flags,
resource,
strlen(resource),
0, sync_ast_routine, &lwait, NULL, NULL);
if (status)
return status;
/* Wait for it to complete */
pthread_cond_wait(&lwait.cond, &lwait.mutex);
pthread_mutex_unlock(&lwait.mutex);
*lockid = lwait.lksb.sb_lkid;
errno = lwait.lksb.sb_status;
if (lwait.lksb.sb_status)
return -1;
else
return 0;
}
int sync_unlock(const char *resource /* UNUSED */, int lockid)
{
int status;
struct lock_wait lwait;
pthread_cond_init(&lwait.cond, NULL);
pthread_mutex_init(&lwait.mutex, NULL);
pthread_mutex_lock(&lwait.mutex);
status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait);
if (status)
return status;
/* Wait for it to complete */
pthread_cond_wait(&lwait.cond, &lwait.mutex);
pthread_mutex_unlock(&lwait.mutex);
errno = lwait.lksb.sb_status;
if (lwait.lksb.sb_status != EUNLOCK)
return -1;
else
return 0;
}

@ -0,0 +1,219 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
CLVMD Cluster LVM daemon command processor.
To add commands to the daemon simply add a processor in do_command and return
and messages back in buf and the length in *retlen. The initial value of
buflen is the maximum size of the buffer. if buf is not large enough then it
may be reallocated by the functions in here to a suitable size bearing in
mind that anything larger than the passed-in size will have to be returned
using the system LV and so performance will suffer.
The status return will be negated and passed back to the originating node.
pre- and post- command routines are called only on the local node. The
purpose is primarily to get and release locks, though the pre- routine should
also do any other local setups required by the command (if any) and can
return a failure code that prevents the command from being distributed around
the cluster
The pre- and post- routines are run in their own thread so can block as long
they like, do_command is run in the main clvmd thread so should not block for
too long. If the pre-command returns an error code (!=0) then the command
will not be propogated around the cluster but the post-command WILL be called
Also note that the pre and post routine are *always* called on the local
node, even if the command to be executed was only requested to run on a
remote node. It may peek inside the client structure to check the status of
the command.
The clients of the daemon must, naturally, understand the return messages and
codes.
Routines in here may only READ the values in the client structure passed in
apart from client->private which they are free to do what they like with.
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <unistd.h>
#include <errno.h>
#include "list.h"
#include "locking.h"
#include "log.h"
#include "lvm-functions.h"
#include "clvmd-comms.h"
#include "clvm.h"
#include "clvmd.h"
#include "libdlm.h"
/* This is where all the real work happens:
NOTE: client will be NULL when this is executed on a remote node */
int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
char **buf, int buflen, int *retlen)
{
char *args = msg->node + strlen(msg->node) + 1;
int arglen = msglen - sizeof(struct clvm_header) - strlen(msg->node);
int status = 0;
char *lockname;
struct utsname nodeinfo;
unsigned char lock_cmd;
unsigned char lock_flags;
/* Do the command */
switch (msg->cmd) {
/* Just a test message */
case CLVMD_CMD_TEST:
if (arglen > buflen) {
buflen = arglen + 200;
*buf = realloc(*buf, buflen);
}
uname(&nodeinfo);
*retlen = 1 + snprintf(*buf, buflen, "TEST from %s: %s v%s",
nodeinfo.nodename, args,
nodeinfo.release);
break;
case CLVMD_CMD_LOCK_VG:
/* Check to see if the VG is in use by LVM1 */
status = do_check_lvm1(&args[2]);
break;
case CLVMD_CMD_LOCK_LV:
/* This is the biggie */
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
status = do_lock_lv(lock_cmd, lock_flags, lockname);
/* Replace EIO with something less scary */
if (status == EIO) {
*retlen =
1 + snprintf(*buf, buflen,
"Internal lvm error, check syslog");
return EIO;
}
break;
default:
/* Won't get here because command is validated in pre_command */
break;
}
/* Check the status of the command and return the error text */
if (status) {
*retlen = 1 + snprintf(*buf, buflen, strerror(status));
}
return status;
}
/* Pre-command is a good place to get locks that are needed only for the duration
of the commands around the cluster (don't forget to free them in post-command),
and to sanity check the command arguments */
int do_pre_command(struct local_client *client)
{
struct clvm_header *header =
(struct clvm_header *) client->bits.localsock.cmd;
unsigned char lock_cmd;
unsigned char lock_flags;
char *args = header->node + strlen(header->node) + 1;
int lockid;
int status = 0;
char *lockname;
switch (header->cmd) {
case CLVMD_CMD_TEST:
status = sync_lock("CLVMD_TEST", LKM_EXMODE, 0, &lockid);
client->bits.localsock.private = (void *) lockid;
break;
case CLVMD_CMD_LOCK_VG:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
DEBUGLOG("doing PRE command LOCK_VG %s at %x\n", lockname,
lock_cmd);
if (lock_cmd == LCK_UNLOCK) {
hold_unlock(lockname);
} else {
status =
hold_lock(lockname, (int) lock_cmd,
(int) lock_flags);
if (status)
status = errno;
}
break;
case CLVMD_CMD_LOCK_LV:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
status = pre_lock_lv(lock_cmd, lock_flags, lockname);
break;
default:
log_error("Unknown command %d received\n", header->cmd);
status = EINVAL;
}
return status;
}
/* Note that the post-command routine is called even if the pre-command or the real command
failed */
int do_post_command(struct local_client *client)
{
struct clvm_header *header =
(struct clvm_header *) client->bits.localsock.cmd;
int status = 0;
unsigned char lock_cmd;
unsigned char lock_flags;
char *args = header->node + strlen(header->node) + 1;
char *lockname;
switch (header->cmd) {
case CLVMD_CMD_TEST:
status =
sync_unlock("CLVMD_TEST", (int) (long) client->bits.localsock.private);
break;
case CLVMD_CMD_LOCK_VG:
/* Nothing to do here */
break;
case CLVMD_CMD_LOCK_LV:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
status = post_lock_lv(lock_cmd, lock_flags, lockname);
break;
}
return status;
}

@ -0,0 +1,55 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Abstraction layer for clvmd cluster communications
*/
#ifndef _CLVMD_COMMS_H
#define _CLVMD_COMMS_H
struct local_client;
extern int cluster_send_message(void *buf, int msglen, char *csid,
const char *errtext);
extern int name_from_csid(char *csid, char *name);
extern int csid_from_name(char *csid, char *name);
extern int get_num_nodes(void);
extern int cluster_fd_callback(struct local_client *fd, char *buf, int len,
char *csid, struct local_client **new_client);
extern int init_cluster(void);
extern int get_main_cluster_fd(void); /* gets accept FD or cman cluster socket */
extern int cluster_do_node_callback(struct local_client *client,
void (*callback) (struct local_client *,
char *csid, int node_up));
extern int is_quorate(void);
extern void get_our_csid(char *csid);
extern void add_up_node(char *csid);
extern void cluster_closedown(void);
extern int sync_lock(const char *resource, int mode, int flags, int *lockid);
extern int sync_unlock(const char *resource, int lockid);
#ifdef USE_GULM
#include "tcp-comms.h"
#else
/* cman */
#include "cnxman-socket.h"
#define MAX_CSID_LEN 4
#endif
#endif

880
daemons/clvmd/clvmd-gulm.c Normal file

@ -0,0 +1,880 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 2002-2003 All rights reserved.
**
*******************************************************************************
******************************************************************************/
/* This provides the interface between clvmd and gulm as the cluster
* and lock manager.
*
* It also provides the "liblm" functions too as it's hard (and pointless)
* to seperate them out when using gulm.
*
* What it does /not/ provide is the communications between clvmd daemons
* on the cluster nodes. That is done in tcp-comms.c
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <signal.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <utmpx.h>
#include <syslog.h>
#include <assert.h>
#include "ccs.h"
#include "list.h"
#include "locking.h"
#include "log.h"
#include "clvm.h"
#include "clvmd-comms.h"
#include "clvmd.h"
#include "hash.h"
#include "clvmd-gulm.h"
#include "libgulm.h"
#include "hash.h"
/* Hash list of nodes in the cluster */
static struct hash_table *node_hash;
/* hash list of outstanding lock requests */
static struct hash_table *lock_hash;
/* Copy of the current core state */
static uint8_t current_corestate;
/* Number of active nodes */
static int num_nodes;
static char *cluster_name;
static pthread_mutex_t lock_start_mutex;
static volatile int lock_start_flag;
struct node_info
{
enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
char name[MAX_CLUSTER_MEMBER_NAME_LEN];
};
struct lock_wait
{
pthread_cond_t cond;
pthread_mutex_t mutex;
int status;
};
/* Forward */
static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client);
static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client);
static int get_all_cluster_nodes(void);
/* In tcp-comms.c */
extern struct hash_table *sock_hash;
static int add_internal_client(int fd, fd_callback_t callback)
{
struct local_client *client;
DEBUGLOG("Add_internal_client, fd = %d\n", fd);
/* Add a GULM file descriptor it to the main loop */
client = malloc(sizeof(struct local_client));
if (!client)
{
DEBUGLOG("malloc failed\n");
return -1;
}
memset(client, 0, sizeof(struct local_client));
client->fd = fd;
client->type = CLUSTER_INTERNAL;
client->callback = callback;
add_client(client);
return 0;
}
/* Gulm library handle */
static gulm_interface_p gulm_if;
static lg_core_callbacks_t core_callbacks;
static lg_lockspace_callbacks_t lock_callbacks;
static void badsig_handler(int sig)
{
DEBUGLOG("got sig %d\n", sig);
cluster_closedown();
exit(0);
}
static void sighup_handler(int sig)
{
DEBUGLOG("got SIGHUP\n");
/* Re-read CCS node list */
get_all_cluster_nodes();
}
int init_cluster()
{
int status;
int ccs_h;
/* Get cluster name from CCS */
/* TODO: is this right? */
ccs_h = ccs_connect();
ccs_get(ccs_h, "//cluster/@name", &cluster_name);
ccs_disconnect(ccs_h);
/* Block locking until we are logged in */
pthread_mutex_init(&lock_start_mutex, NULL);
pthread_mutex_lock(&lock_start_mutex);
lock_start_flag = 1;
node_hash = hash_create(100);
lock_hash = hash_create(10);
/* Get all nodes from CCS */
get_all_cluster_nodes();
/* Initialise GULM library */
status = lg_initialize(&gulm_if, cluster_name, "clvmd");
if (status)
{
DEBUGLOG("lg_initialize failed: %d\n", status);
return status;
}
/* Connect to core - we are not "important" :-) */
status = lg_core_login(gulm_if, 0);
if (status)
{
DEBUGLOG("lg_core_login failed: %d\n", status);
return status;
}
/* Initialise the inter-node comms */
status = init_comms();
if (status)
return status;
/* Add core FD to the list */
status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock);
if (status)
{
DEBUGLOG("can't allocate client space\n");
return status;
}
/* Connect to the lock server */
if (lg_lock_login(gulm_if, "CLVM"))
{
syslog(LOG_ERR, "Cannot login in to LOCK server\n");
DEBUGLOG("Cannot login in to LOCK server\n");
exit(88);
}
/* Add lockspace FD to the list */
status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock);
if (status)
{
DEBUGLOG("can't allocate client space\n");
exit(status);
}
/* Request a list of nodes, we can;t really do anything until
this comes back */
status = lg_core_nodelist(gulm_if);
if (status)
{
DEBUGLOG("lg_core_nodelist failed: %d\n", status);
return status;
}
/* So I can kill it without taking GULM down too */
signal(SIGINT, badsig_handler);
signal(SIGTERM, badsig_handler);
/* Re-read the node list on SIGHUP */
signal(SIGHUP, sighup_handler);
return 0;
}
void cluster_closedown()
{
DEBUGLOG("cluster_closedown\n");
lg_lock_logout(gulm_if);
lg_core_logout(gulm_if);
lg_core_shutdown(gulm_if);
lg_release(gulm_if);
}
/* Expire locks for a named node, or us */
#define GIO_KEY_SIZE 46
static void drop_expired_locks(char *nodename)
{
struct utsname nodeinfo;
uint8_t mask[GIO_KEY_SIZE];
memset(mask, 0xff, GIO_KEY_SIZE);
if (!nodename)
{
uname(&nodeinfo);
nodename = nodeinfo.nodename;
}
if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE))
{
DEBUGLOG("Error calling lg_lock_drop_exp()\n");
}
}
static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client)
{
int status;
*new_client = NULL;
status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL);
return status<0 ? status : 1;
}
static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client)
{
int status;
*new_client = NULL;
status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL);
return status<0 ? status : 1;
}
/* CORE callback routines */
static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate)
{
DEBUGLOG("CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d\n",
gen, error, rank, corestate);
if (error)
exit(error);
current_corestate = corestate;
return 0;
}
static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
{
if (nodestate == lg_core_Logged_in)
{
/* Don't clobber NODE_CLVMD state */
if (ninfo->state != NODE_CLVMD)
{
if (ninfo->state == NODE_UNKNOWN ||
ninfo->state == NODE_DOWN)
num_nodes++;
ninfo->state = NODE_UP;
}
}
else
{
if (nodestate == lg_core_Expired ||
nodestate == lg_core_Fenced ||
nodestate == lg_core_Logged_out)
{
if (ninfo->state != NODE_DOWN)
num_nodes--;
ninfo->state = NODE_DOWN;
tcp_remove_client(csid);
}
}
DEBUGLOG("set_node_state, '%s' state = %d, num_nodes=%d\n",
ninfo->name, ninfo->state, num_nodes);
}
static struct node_info *add_or_set_node(char *name, uint32_t ip, uint8_t state)
{
struct node_info *ninfo;
ninfo = hash_lookup_binary(node_hash, (char *)&ip, MAX_CSID_LEN);
if (!ninfo)
{
/* If we can't find that node then re-read the config file in case it
was added after we were started */
DEBUGLOG("Node %s not found, re-reading config file\n", name);
get_all_cluster_nodes();
/* Now try again */
ninfo = hash_lookup_binary(node_hash, (char *)&ip, MAX_CSID_LEN);
if (!ninfo)
{
DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name);
return NULL;
}
}
set_node_state(ninfo, (char *)&ip, state);
return ninfo;
}
static int core_nodelist(void *misc, lglcb_t type, char *name, uint32_t ip, uint8_t state)
{
DEBUGLOG("CORE nodelist\n");
if (type == lglcb_start)
{
DEBUGLOG("Got Nodelist, start\n");
}
else
{
if (type == lglcb_item)
{
DEBUGLOG("Got nodelist, item: %s, %#x, %#x\n", name, ip, state);
add_or_set_node(name, ip, state);
}
else
{
if (type == lglcb_stop)
{
char ourcsid[MAX_CSID_LEN];
DEBUGLOG("Got Nodelist, stop\n");
clvmd_cluster_init_completed();
/* Mark ourself as up */
get_our_csid(ourcsid);
add_up_node(ourcsid);
}
else
{
DEBUGLOG("Unknown lglcb_t %#x\n", type);
}
}
}
return 0;
}
static int core_statechange(void *misc, uint8_t corestate, uint32_t masterip, char *mastername)
{
DEBUGLOG("CORE Got statechange corestate:%#x masterip:%#x mastername:%s\n",
corestate, masterip, mastername);
current_corestate = corestate;
return 0;
}
static int core_nodechange(void *misc, char *nodename, uint32_t nodeip, uint8_t nodestate)
{
struct node_info *ninfo;
DEBUGLOG("CORE node change, name=%s, ip=%x, state = %d\n", nodename, nodeip, nodestate);
/* If we don't get nodeip here, try a lookup by name */
if (!nodeip)
csid_from_name((char *)&nodeip, nodename);
if (!nodeip)
return 0;
ninfo = add_or_set_node(nodename, nodeip, nodestate);
if (!ninfo)
return 0;
/* Check if we need to drop any expired locks */
if (ninfo->state == NODE_DOWN)
{
drop_expired_locks(nodename);
}
return 0;
}
static int core_error(void *misc, uint32_t err)
{
DEBUGLOG("CORE error: %d\n", err);
// Not sure what happens here
return 0;
}
/* LOCK callback routines */
static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
{
DEBUGLOG("LOCK Got a Login reply. err:%d which:%d\n",
error, which);
if (error)
exit(error);
/* Drop any expired locks for us that might be hanging around */
drop_expired_locks(NULL);
/* Enable locking operations in other threads */
if (lock_start_flag)
{
lock_start_flag = 0;
pthread_mutex_unlock(&lock_start_mutex);
}
return 0;
}
static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen, uint8_t state, uint32_t flags, uint32_t error,
uint8_t *LVB, uint16_t LVBlen)
{
struct lock_wait *lwait;
DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error);
lwait = hash_lookup(lock_hash, key);
if (!lwait)
{
DEBUGLOG("Can't find hash entry for resource %s\n", key);
return 0;
}
lwait->status = error;
pthread_mutex_lock(&lwait->mutex);
pthread_cond_signal(&lwait->cond);
pthread_mutex_unlock(&lwait->mutex);
return 0;
}
static int lock_error(void *misc, uint32_t err)
{
DEBUGLOG("LOCK error: %d\n", err);
// Not sure what happens here
return 0;
}
/* CORE callbacks */
static lg_core_callbacks_t core_callbacks = {
.login_reply = core_login_reply,
.nodelist = core_nodelist,
.statechange = core_statechange,
.nodechange = core_nodechange,
.error = core_error,
};
/* LOCK callbacks */
static lg_lockspace_callbacks_t lock_callbacks = {
.login_reply = lock_login_reply,
.lock_state = lock_lock_state,
.error = lock_error,
};
/* Allow tcp-comms to loop round the list of active nodes */
int get_next_node_csid(void **context, char *csid)
{
struct node_info *ninfo = NULL;
/* First node */
if (!*context)
{
*context = hash_get_first(node_hash);
}
else
{
*context = hash_get_next(node_hash, *context);
}
if (*context)
ninfo = hash_get_data(node_hash, *context);
/* Find a node that is UP */
while (*context && ninfo->state == NODE_DOWN)
{
*context = hash_get_next(node_hash, *context);
if (*context)
{
ninfo = hash_get_data(node_hash, *context);
}
}
if (!*context || ninfo->state == NODE_DOWN)
{
return 0;
}
memcpy(csid, hash_get_key(node_hash, *context), MAX_CSID_LEN);
return 1;
}
int name_from_csid(char *csid, char *name)
{
struct node_info *ninfo;
ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
if (!ninfo)
{
sprintf(name, "UNKNOWN [%d.%d.%d.%d]",
csid[0], csid[1], csid[2], csid[3]);
return -1;
}
strcpy(name, ninfo->name);
return 0;
}
int csid_from_name(char *csid, char *name)
{
struct hash_node *hn;
struct node_info *ninfo;
hash_iterate(hn, node_hash)
{
ninfo = hash_get_data(node_hash, hn);
if (strcmp(ninfo->name, name) == 0)
{
memcpy(csid, hash_get_key(node_hash, hn), MAX_CSID_LEN);
return 0;
}
}
return -1;
}
int get_num_nodes()
{
DEBUGLOG("num_nodes = %d\n", num_nodes);
return num_nodes;
}
/* Node is now known to be running a clvmd */
void add_up_node(char *csid)
{
struct node_info *ninfo;
ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
if (!ninfo)
return;
ninfo->state = NODE_CLVMD;
return;
}
/* Node is now known to be NOT running a clvmd */
void add_down_node(char *csid)
{
struct node_info *ninfo;
ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
if (!ninfo)
return;
/* Only set it to UP if it was previously known to be
running clvmd - gulm may set it DOWN quite soon */
if (ninfo->state == NODE_CLVMD)
ninfo->state = NODE_UP;
return;
}
/* Call a callback for each node, so the caller knows whether it's up or down */
int cluster_do_node_callback(struct local_client *master_client,
void (*callback)(struct local_client *, char *csid, int node_up))
{
struct hash_node *hn;
struct node_info *ninfo;
hash_iterate(hn, node_hash)
{
char csid[MAX_CSID_LEN];
struct local_client *client;
ninfo = hash_get_data(node_hash, hn);
memcpy(csid, hash_get_key(node_hash, hn), MAX_CSID_LEN);
DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state);
client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
if (client)
callback(master_client, csid, ninfo->state == NODE_CLVMD);
}
return 0;
}
/* Convert gulm error codes to unix errno numbers */
static int gulm_to_errno(int gulm_ret)
{
switch (gulm_ret)
{
case lg_err_TryFailed:
errno = EAGAIN;
break;
case lg_err_AlreadyPend:
errno = EBUSY;
/* More?? */
default:
errno = EINVAL;
}
return gulm_ret ? -1 : 0;
}
/* Real locking */
static int _lock_resource(char *resource, int mode, int flags, int *lockid)
{
int status;
struct lock_wait lwait;
/* Wait until the lock module is ready */
if (lock_start_flag)
{
pthread_mutex_lock(&lock_start_mutex);
pthread_mutex_unlock(&lock_start_mutex);
}
pthread_cond_init(&lwait.cond, NULL);
pthread_mutex_init(&lwait.mutex, NULL);
pthread_mutex_lock(&lwait.mutex);
/* This needs to be converted from DLM/LVM2 value for GULM */
if (flags == LCK_NONBLOCK) flags = lg_lock_flag_Try;
hash_insert(lock_hash, resource, &lwait);
DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode);
status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
mode, flags, NULL, 0);
if (status)
{
DEBUGLOG("lg_lock_state returned %d\n", status);
return status;
}
/* Wait for it to complete */
pthread_cond_wait(&lwait.cond, &lwait.mutex);
pthread_mutex_unlock(&lwait.mutex);
hash_remove(lock_hash, resource);
DEBUGLOG("lock-resource returning %d\n", lwait.status);
return gulm_to_errno(lwait.status);
}
static int _unlock_resource(char *resource, int lockid)
{
int status;
struct lock_wait lwait;
pthread_cond_init(&lwait.cond, NULL);
pthread_mutex_init(&lwait.mutex, NULL);
pthread_mutex_lock(&lwait.mutex);
hash_insert(lock_hash, resource, &lwait);
DEBUGLOG("unlock_resource %s\n", resource);
status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
lg_lock_state_Unlock, 0, NULL, 0);
if (status)
{
DEBUGLOG("lg_lock_state(unlock) returned %d\n", status);
return status;
}
/* Wait for it to complete */
pthread_cond_wait(&lwait.cond, &lwait.mutex);
pthread_mutex_unlock(&lwait.mutex);
hash_remove(lock_hash, resource);
return gulm_to_errno(lwait.status);
}
/* These two locking functions MUST be called in a seperate thread from
the clvmd main loop because they expect to be woken up by it.
These are abstractions around the real locking functions (above)
as we need to emulate the DLM's EX/PW/CW interaction with GULM using
two locks.
To aid unlocking, we store the lock mode in the lockid (as GULM
doesn't use this).
*/
int sync_lock(const char *resource, int mode, int flags, int *lockid)
{
int status;
char lock1[strlen(resource)+3];
char lock2[strlen(resource)+3];
snprintf(lock1, sizeof(lock1), "%s-1", resource);
snprintf(lock2, sizeof(lock2), "%s-2", resource);
switch (mode)
{
case LCK_EXCL:
status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid);
if (status)
goto out;
/* If we can't get this lock then bail out */
status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid);
if (status == lg_err_TryFailed)
{
_unlock_resource(lock1, *lockid);
status = -1;
errno = EAGAIN;
}
break;
case LCK_READ:
status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid);
break;
case LCK_WRITE:
status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid);
break;
default:
status = -1;
errno = EINVAL;
break;
}
out:
*lockid = mode;
return status;
}
int sync_unlock(const char *resource, int lockid)
{
int status = 0;
char lock1[strlen(resource)+3];
char lock2[strlen(resource)+3];
snprintf(lock1, sizeof(lock1), "%s-1", resource);
snprintf(lock2, sizeof(lock2), "%s-2", resource);
/* The held lock mode is in the lock id */
assert(lockid == LCK_EXCL ||
lockid == LCK_READ ||
lockid == LCK_WRITE);
switch (lockid)
{
case LCK_EXCL:
status = _unlock_resource(lock1, lockid);
if (status)
goto out;
status = _unlock_resource(lock2, lockid);
break;
case LCK_READ:
status = _unlock_resource(lock1, lockid);
break;
case LCK_WRITE:
status = _unlock_resource(lock2, lockid);
break;
}
out:
return status;
}
int is_quorate()
{
if (current_corestate == lg_core_Slave ||
current_corestate == lg_core_Master ||
current_corestate == lg_core_Client)
return 1;
else
return 0;
}
/* Get all the cluster node names & IPs from CCS and
add them to our node list so we know who to talk to.
Called when we start up and if we get sent SIGHUP.
*/
static int get_all_cluster_nodes()
{
int ctree;
char *nodename;
int error;
/* Open the config file */
ctree = ccs_connect();
if (ctree <= 0)
{
log_error("Error connecting to CCS");
return -1;
}
error = ccs_get(ctree, "//nodes/node/@name", &nodename);
while (nodename)
{
char nodeip[MAX_CSID_LEN];
char *clvmflag;
char key[256];
sprintf(key, "//nodes/node[@name=\"%s\"]/clvm", nodename);
ccs_get(ctree, key, &clvmflag);
if ((get_ip_address(nodename, nodeip) == 0) && atoi(clvmflag))
{
struct node_info *ninfo;
/* If it's not in the list, then add it */
ninfo = hash_lookup_binary(node_hash, nodeip, MAX_CSID_LEN);
if (!ninfo)
{
ninfo = malloc(sizeof(struct node_info));
if (!ninfo)
{
syslog(LOG_ERR, "Cannot alloc memory for node info\n");
ccs_disconnect(ctree);
return -1;
}
strcpy(ninfo->name, nodename);
ninfo->state = NODE_DOWN;
hash_insert_binary(node_hash, nodeip, MAX_CSID_LEN, ninfo);
}
}
else
{
DEBUGLOG("node %s has clvm disabled\n", nodename);
}
if (clvmflag) free(clvmflag);
free(nodename);
error = ccs_get(ctree, "//nodes/node/@name", &nodename);
}
/* Finished with config file */
ccs_disconnect(ctree);
return 0;
}
int gulm_fd(void)
{
return lg_core_selector(gulm_if);
}

@ -0,0 +1,9 @@
extern int get_next_node_csid(void **context, char *csid);
extern void add_down_node(char *csid);
extern int gulm_fd(void);
extern int get_ip_address(char *node, char *addr);
extern void tcp_remove_client(char *csid);
extern int alloc_client(int fd, char *csid, struct local_client **new_client);

1693
daemons/clvmd/clvmd.c Normal file

File diff suppressed because it is too large Load Diff

119
daemons/clvmd/clvmd.h Normal file

@ -0,0 +1,119 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _CLVMD_H
#define _CLVMD_H
#define CLVMD_MAJOR_VERSION 0
#define CLVMD_MINOR_VERSION 2
#define CLVMD_PATCH_VERSION 1
/* Name of the cluster LVM admin lock */
#define ADMIN_LOCK_NAME "CLVMD_ADMIN"
/* Default time (in seconds) we will wait for all remote commands to execute
before declaring them dead */
#define DEFAULT_CMD_TIMEOUT 60
/* One of these for each reply we get from command execution on a node */
struct node_reply {
char node[MAX_CLUSTER_MEMBER_NAME_LEN];
char *replymsg;
int status;
struct node_reply *next;
};
/*
* These exist for the use of local sockets only when we are
* collecting responses from all cluster nodes
*/
struct localsock_bits {
struct node_reply *replies;
int num_replies;
int expected_replies;
time_t sent_time; /* So we can check for timeouts */
int in_progress; /* Only execute one cmd at a time per client */
int sent_out; /* Flag to indicate that a command was sent
to remote nodes */
void *private; /* Private area for command processor use */
void *cmd; /* Whole command as passed down local socket */
int cmd_len; /* Length of above */
int pipe; /* Pipe to send PRE completion status down */
int finished; /* Flag to tell subthread to exit */
int all_success; /* Set to 0 if any node (or the pre_command)
failed */
struct local_client *pipe_client;
pthread_t threadid;
enum { PRE_COMMAND, POST_COMMAND, QUIT } state;
pthread_mutex_t mutex; /* Main thread and worker synchronisation */
pthread_cond_t cond;
pthread_mutex_t reply_mutex; /* Protect reply structure */
};
/* Entries for PIPE clients */
struct pipe_bits {
struct local_client *client; /* Actual (localsock) client */
pthread_t threadid; /* Our own copy of the thread id */
};
/* Entries for Network socket clients */
struct netsock_bits {
void *private;
int flags;
};
typedef int (*fd_callback_t) (struct local_client * fd, char *buf, int len,
char *csid, struct local_client ** new_client);
/* One of these for each fd we are listening on */
struct local_client {
int fd;
enum { CLUSTER_MAIN_SOCK, CLUSTER_DATA_SOCK, LOCAL_RENDEZVOUS,
LOCAL_SOCK, THREAD_PIPE, CLUSTER_INTERNAL } type;
struct local_client *next;
unsigned short xid;
fd_callback_t callback;
union {
struct localsock_bits localsock;
struct pipe_bits pipe;
struct netsock_bits net;
} bits;
};
#ifdef DEBUG
#define DEBUGLOG(fmt, args...) fprintf(stderr, "CLVMD[%d]: %ld ", getpid(), time(NULL) ); fprintf(stderr, fmt, ## args)
#else
#define DEBUGLOG(fmt, args...)
#endif
#ifndef max
#define max(a,b) ((a)>(b)?(a):(b))
#endif
/* The real command processor is in clvmd-command.c */
extern int do_command(struct local_client *client, struct clvm_header *msg,
int msglen, char **buf, int buflen, int *retlen);
/* Pre and post command routines are called only on the local node */
extern int do_pre_command(struct local_client *client);
extern int do_post_command(struct local_client *client);
extern int add_client(struct local_client *new_client);
extern void clvmd_cluster_init_completed(void);
#endif

@ -0,0 +1,226 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
/* CMAN socket interface header,
may be include by user or kernel code */
#ifndef __CNXMAN_SOCKET_H
#define __CNXMAN_SOCKET_H
/* Just made these up but the address family must be less than 32 (NPROTO) */
#define AF_CLUSTER 31
#define PF_CLUSTER AF_CLUSTER
/* Protocol(socket) types */
#define CLPROTO_MASTER 2
#define CLPROTO_CLIENT 3
/* Setsockopt -- maybe should be ioctls?? */
#define CLU_SET_MULTICAST 100
#define CLU_JOIN_CLUSTER 101
#define CLU_LEAVE_CLUSTER 102
#define CLU_SET_RCVONLY 103
#define CLU_SET_UNICAST 104
#define KCL_SET_MULTICAST 105
#define KCL_SET_RCVONLY 106
#define KCL_SET_UNICAST 107
#define KCL_SET_NODENAME 108
#define CLU_SET_NODENAME 109
/* ioctls -- should register these properly */
#define SIOCCLUSTER_NOTIFY _IOW('x', 0x01, int)
#define SIOCCLUSTER_REMOVENOTIFY _IO( 'x', 0x02)
#define SIOCCLUSTER_GETMEMBERS _IOR('x', 0x03, struct cl_cluster_nodelist)
#define SIOCCLUSTER_SETEXPECTED_VOTES _IOW('x', 0x04, int)
#define SIOCCLUSTER_ISQUORATE _IO( 'x', 0x05)
#define SIOCCLUSTER_ISLISTENING _IOW('x', 0x06, struct cl_listen_request)
#define SIOCCLUSTER_GETALLMEMBERS _IOR('x', 0x07, struct cl_cluster_nodelist)
#define SIOCCLUSTER_SET_VOTES _IOW('x', 0x08, int)
#define SIOCCLUSTER_GET_VERSION _IOR('x', 0x09, struct cl_version)
#define SIOCCLUSTER_SET_VERSION _IOW('x', 0x0a, struct cl_version)
#define SIOCCLUSTER_ISACTIVE _IO( 'x', 0x0b)
#define SIOCCLUSTER_KILLNODE _IOW('x', 0x0c, int)
#define SIOCCLUSTER_GET_JOINCOUNT _IO( 'x', 0x0d)
#define SIOCCLUSTER_SERVICE_REGISTER _IOW('x', 0x0e, char)
#define SIOCCLUSTER_SERVICE_UNREGISTER _IO('x', 0x0f)
#define SIOCCLUSTER_SERVICE_JOIN _IO( 'x', 0x10)
#define SIOCCLUSTER_SERVICE_LEAVE _IO( 'x', 0x20)
#define SIOCCLUSTER_SERVICE_SETSIGNAL _IOW('x', 0x30, int)
#define SIOCCLUSTER_SERVICE_STARTDONE _IOW('x', 0x40, unsigned int)
#define SIOCCLUSTER_SERVICE_GETEVENT _IOR('x', 0x50, struct cl_service_event)
#define SIOCCLUSTER_SERVICE_GETMEMBERS _IOR('x', 0x60, struct cl_cluster_node)
#define SIOCCLUSTER_SERVICE_GLOBALID _IOR('x', 0x70, uint32_t)
#define SIOCCLUSTER_SERVICE_SETLEVEL _IOR('x', 0x80, int)
#define SIOCCLUSTER_GETNODE _IOWR('x', 0x90, struct cl_cluster_node)
#define SIOCCLUSTER_BARRIER _IOW('x', 0x0a0, struct cl_barrier_info)
/* Maximum size of a cluster message */
#define MAX_CLUSTER_MESSAGE 1500
#define MAX_CLUSTER_MEMBER_NAME_LEN 255
#define MAX_BARRIER_NAME_LEN 33
#define MAX_SA_ADDR_LEN 12
#define MAX_CLUSTER_NAME_LEN 16
/* Well-known cluster port numbers */
#define CLUSTER_PORT_MEMBERSHIP 1 /* Mustn't block during cluster
* transitions! */
#define CLUSTER_PORT_SERVICES 2
#define CLUSTER_PORT_SYSMAN 10 /* Remote execution daemon */
#define CLUSTER_PORT_CLVMD 11 /* Cluster LVM daemon */
#define CLUSTER_PORT_SLM 12 /* LVM SLM (simple lock manager) */
/* Port numbers above this will be blocked when the cluster is inquorate or in
* transition */
#define HIGH_PROTECTED_PORT 9
/* Reasons for leaving the cluster */
#define CLUSTER_LEAVEFLAG_DOWN 0 /* Normal shutdown */
#define CLUSTER_LEAVEFLAG_KILLED 1
#define CLUSTER_LEAVEFLAG_PANIC 2
#define CLUSTER_LEAVEFLAG_REMOVED 3 /* This one can reduce quorum */
#define CLUSTER_LEAVEFLAG_REJECTED 4 /* Not allowed into the cluster in the
* first place */
#define CLUSTER_LEAVEFLAG_INCONSISTENT 5 /* Our view of the cluster is
* in a minority */
#define CLUSTER_LEAVEFLAG_DEAD 6 /* Discovered to be dead */
#define CLUSTER_LEAVEFLAG_FORCE 0x10 /* Forced by command-line */
/* OOB messages sent to a local socket */
#define CLUSTER_OOB_MSG_PORTCLOSED 1
#define CLUSTER_OOB_MSG_STATECHANGE 2
#define CLUSTER_OOB_MSG_SERVICEEVENT 3
/* Sendmsg flags, these are above the normal sendmsg flags so they don't
* interfere */
#define MSG_NOACK 0x010000 /* Don't need an ACK for this message */
#define MSG_QUEUE 0x020000 /* Queue the message for sending later */
#define MSG_MULTICAST 0x080000 /* Message was sent to all nodes in the cluster
*/
#define MSG_ALLINT 0x100000 /* Send out of all interfaces */
typedef enum { NODESTATE_REMOTEMEMBER, NODESTATE_JOINING, NODESTATE_MEMBER,
NODESTATE_DEAD } nodestate_t;
struct sockaddr_cl {
unsigned short scl_family;
unsigned char scl_flags;
unsigned char scl_port;
int scl_nodeid;
};
/* This is how we pass the multicast socket into kernel space. addr is the
* multicast address to use in the address family of the socket (eg for UDP it
* might be 255.255.255.0) */
struct cl_multicast_sock {
int fd; /* FD of master socket to do multicast on */
int number; /* Socket number, to match up recvonly & bcast
* sockets */
};
/* Cluster configuration info passed when we join the cluster */
struct cl_join_cluster_info {
unsigned char votes;
unsigned int expected_votes;
unsigned int two_node;
unsigned int config_version;
char cluster_name[17];
};
/* This is the structure, per node, returned from the membership ioctl */
struct cl_cluster_node {
unsigned int size;
unsigned int node_id;
unsigned int us;
unsigned int leave_reason;
unsigned int incarnation;
nodestate_t state;
char name[MAX_CLUSTER_MEMBER_NAME_LEN];
unsigned char votes;
};
/* The struct passed to the membership ioctls */
struct cl_cluster_nodelist {
uint32_t max_members;
struct cl_cluster_node *nodes;
};
/* Structure passed to SIOCCLUSTER_ISLISTENING */
struct cl_listen_request {
unsigned char port;
int nodeid;
};
/* A Cluster PORTCLOSED message - received by a local user as an OOB message */
struct cl_portclosed_oob {
unsigned char cmd; /* CLUSTER_OOB_MSG_PORTCLOSED */
unsigned char port;
};
/* Get all version numbers or set the config version */
struct cl_version {
unsigned int major;
unsigned int minor;
unsigned int patch;
unsigned int config;
};
/* structure passed to barrier ioctls */
struct cl_barrier_info {
char cmd;
char name[MAX_BARRIER_NAME_LEN];
unsigned int flags;
unsigned long arg;
};
typedef enum { SERVICE_EVENT_STOP, SERVICE_EVENT_START, SERVICE_EVENT_FINISH,
SERVICE_EVENT_LEAVEDONE } service_event_t;
typedef enum { SERVICE_START_FAILED, SERVICE_START_JOIN, SERVICE_START_LEAVE }
service_start_t;
struct cl_service_event {
service_event_t type;
service_start_t start_type;
unsigned int event_id;
unsigned int last_stop;
unsigned int last_start;
unsigned int last_finish;
unsigned int node_count;
};
/* Commands to the barrier ioctl */
#define BARRIER_IOCTL_REGISTER 1
#define BARRIER_IOCTL_CHANGE 2
#define BARRIER_IOCTL_DELETE 3
#define BARRIER_IOCTL_WAIT 4
/* Attributes of a barrier - bitmask */
#define BARRIER_ATTR_AUTODELETE 1
#define BARRIER_ATTR_MULTISTEP 2
#define BARRIER_ATTR_MANUAL 4
#define BARRIER_ATTR_ENABLED 8
#define BARRIER_ATTR_CALLBACK 16
/* Attribute setting commands */
#define BARRIER_SETATTR_AUTODELETE 1
#define BARRIER_SETATTR_MULTISTEP 2
#define BARRIER_SETATTR_ENABLED 3
#define BARRIER_SETATTR_NODES 4
#define BARRIER_SETATTR_CALLBACK 5
#define BARRIER_SETATTR_TIMEOUT 6
#endif

446
daemons/clvmd/libclvm.c Normal file

@ -0,0 +1,446 @@
/*
* Copyright (C) 1997-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* library functions for Cluster LVM Daemon */
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <syslog.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include <search.h>
#include <errno.h>
#include "clvm.h"
#include "libclvm.h"
/* CLVM in hex! */
#define LVM_SIGNATURE 0x434C564D
#define MAX_CLUSTER_MEMBER_NAME_LEN 255
/* NOTE: the LVMD uses the socket FD as the client ID, this means
that any client that calls fork() will inherit the context of
it's parent. */
static int clvmd_sock = -1;
static int open_local_sock(void)
{
int local_socket;
struct sockaddr_un sockaddr;
/* Open local socket */
local_socket = socket(PF_UNIX, SOCK_STREAM, 0);
if (local_socket < 0) {
perror("Can't create local socket");
return -1;
}
fcntl(local_socket, F_SETFD, !FD_CLOEXEC);
strcpy(sockaddr.sun_path, CLVMD_SOCKNAME);
sockaddr.sun_family = AF_UNIX;
if (connect
(local_socket, (struct sockaddr *) &sockaddr, sizeof(sockaddr))) {
int saved_errno = errno;
close(local_socket);
errno = saved_errno;
return -1;
}
return local_socket;
}
/* Send a request and return the status */
static int send_request(char *inbuf, int inlen, char **retbuf)
{
char outbuf[PIPE_BUF];
struct clvm_header *outheader = (struct clvm_header *) outbuf;
int len;
int off;
fd_set fds;
FD_ZERO(&fds);
FD_SET(clvmd_sock, &fds);
/* Send it to CLVMD */
if (write(clvmd_sock, inbuf, inlen) != inlen) {
perror("Error writing to CLVMD");
return -1;
}
/* Get the response */
if ((len = read(clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
perror("Error reading CLVMD");
return -1;
}
if (len == 0) {
fprintf(stderr, "EOF reading CLVMD");
errno = ENOTCONN;
return -1;
}
/* Allocate buffer */
*retbuf = malloc(len + outheader->arglen);
if (!*retbuf) {
errno = ENOMEM;
return -1;
}
/* Copy the header */
memcpy(*retbuf, outbuf, len);
outheader = (struct clvm_header *) *retbuf;
/* Read the returned values */
off = 1; /* we've already read the first byte */
while (off < outheader->arglen && len > 0) {
len = read(clvmd_sock, outheader->args + off, PIPE_BUF);
if (len > 0)
off += len;
}
/* Was it an error ? */
if (outheader->status < 0) {
errno = -outheader->status;
return -2;
}
return 0;
}
/* Build the structure header and parse-out wildcard node names */
static void build_header(struct clvm_header *head, int cmd, const char *node,
void *data, int len)
{
head->cmd = cmd;
head->status = 0;
head->flags = 0;
head->clientid = 0;
head->arglen = len;
if (node) {
/* Allow a couple of special node names:
"*" for all nodes,
"." for the local node only
*/
if (strcmp(node, "*") == 0) {
head->node[0] = '\0';
} else if (strcmp(node, ".") == 0) {
head->node[0] = '\0';
head->flags = CLVMD_FLAG_LOCAL;
} else {
strcpy(head->node, node);
}
} else {
head->node[0] = '\0';
}
}
/* Send a message to a(or all) node(s) in the cluster */
int lvm_cluster_write(char cmd, char *node, void *data, int len)
{
char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
char *retbuf = NULL;
int status;
struct clvm_header *head = (struct clvm_header *) outbuf;
if (clvmd_sock == -1)
clvmd_sock = open_local_sock();
if (clvmd_sock == -1)
return -1;
build_header(head, cmd, node, data, len);
memcpy(head->node + strlen(head->node) + 1, data, len);
status =
send_request(outbuf,
sizeof(struct clvm_header) + strlen(head->node) + len,
&retbuf);
if (retbuf)
free(retbuf);
return status;
}
/* API: Send a message to a(or all) node(s) in the cluster
and wait for replies */
int lvm_cluster_request(char cmd, const char *node, void *data, int len,
lvm_response_t ** response, int *num)
{
char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
int *outptr;
char *inptr;
char *retbuf = NULL;
int status;
int i;
int num_responses = 0;
struct clvm_header *head = (struct clvm_header *) outbuf;
lvm_response_t *rarray;
*num = 0;
if (clvmd_sock == -1)
clvmd_sock = open_local_sock();
if (clvmd_sock == -1)
return -1;
build_header(head, cmd, node, data, len);
memcpy(head->node + strlen(head->node) + 1, data, len);
status =
send_request(outbuf,
sizeof(struct clvm_header) + strlen(head->node) + len,
&retbuf);
if (status == 0 || status == -2) {
/* Count the number of responses we got */
head = (struct clvm_header *) retbuf;
inptr = head->args;
while (inptr[0]) {
num_responses++;
inptr += strlen(inptr) + 1;
inptr += sizeof(int);
inptr += strlen(inptr) + 1;
}
/* Allocate response array. With an extra pair of INTs on the front to sanity
check the pointer when we are given it back to free */
outptr =
malloc(sizeof(lvm_response_t) * num_responses +
sizeof(int) * 2);
if (!outptr) {
if (retbuf)
free(retbuf);
errno = ENOMEM;
return -1;
}
*response = (lvm_response_t *) (outptr + 2);
outptr[0] = LVM_SIGNATURE;
outptr[1] = num_responses;
rarray = *response;
/* Unpack the response into an lvm_response_t array */
inptr = head->args;
i = 0;
while (inptr[0]) {
strcpy(rarray[i].node, inptr);
inptr += strlen(inptr) + 1;
rarray[i].status = *(int *) inptr;
inptr += sizeof(int);
rarray[i].response = malloc(strlen(inptr) + 1);
if (rarray[i].response == NULL) {
/* Free up everything else and return error */
int j;
for (j = 0; j < i; j++)
free(rarray[i].response);
free(outptr);
errno = ENOMEM;
return -1;
}
strcpy(rarray[i].response, inptr);
rarray[i].len = strlen(inptr);
inptr += strlen(inptr) + 1;
i++;
}
*num = num_responses;
*response = rarray;
}
if (retbuf)
free(retbuf);
return status;
}
/* API: Free reply array */
int lvm_cluster_free_request(lvm_response_t * response)
{
int *ptr = (int *) response - 2;
int i;
int num;
/* Check it's ours to free */
if (response == NULL || *ptr != LVM_SIGNATURE) {
errno = EINVAL;
return -1;
}
num = ptr[1];
for (i = 0; i < num; i++) {
free(response[i].response);
}
free(ptr);
return 0;
}
/* These are a "higher-level" API providing black-box lock/unlock
functions for cluster LVM...maybe */
/* Set by lock(), used by unlock() */
static int num_responses;
static lvm_response_t *response;
int lvm_lock_for_cluster(char scope, char *name, int verbosity)
{
int status;
int i;
char *args;
int len;
if (name) {
len = strlen(name) + 2;
args = alloca(len);
strcpy(args + 1, name);
} else {
len = 2;
args = alloca(len);
args[1] = '\0';
}
args[0] = scope;
status = lvm_cluster_request(CLVMD_CMD_LOCK,
"", args, len, &response, &num_responses);
/* If any nodes were down then display them and return an error */
for (i = 0; i < num_responses; i++) {
if (response[i].status == -EHOSTDOWN) {
if (verbosity)
fprintf(stderr,
"clvmd not running on node %s\n",
response[i].node);
status = -1;
}
}
/* If there was an error then free the memory now as the caller won't
want to do the unlock */
if (status) {
int saved_errno = errno;
lvm_cluster_free_request(response);
num_responses = 0;
errno = saved_errno;
}
return status;
}
int lvm_unlock_for_cluster(char scope, char *name, int verbosity)
{
int status;
int i;
int len;
int failed;
int num_unlock_responses;
char *args;
lvm_response_t *unlock_response;
/* We failed - this should not have been called */
if (num_responses == 0)
return 0;
if (name) {
len = strlen(name) + 2;
args = alloca(len);
strcpy(args + 1, name);
} else {
len = 2;
args = alloca(len);
args[1] = '\0';
}
args[0] = scope;
/* See if it failed anywhere */
failed = 0;
for (i = 0; i < num_responses; i++) {
if (response[i].status != 0)
failed++;
}
/* If it failed on any nodes then we only unlock on
the nodes that succeeded */
if (failed) {
for (i = 0; i < num_responses; i++) {
/* Unlock the ones that succeeded */
if (response[i].status == 0) {
status = lvm_cluster_request(CLVMD_CMD_UNLOCK,
response[i].node,
args, len,
&unlock_response,
&num_unlock_responses);
if (status) {
if (verbosity)
fprintf(stderr,
"cluster command to node %s failed: %s\n",
response[i].node,
strerror(errno));
} else if (unlock_response[0].status != 0) {
if (verbosity > 1)
fprintf(stderr,
"unlock on node %s failed: %s\n",
response[i].node,
strerror(unlock_response
[0].status));
}
lvm_cluster_free_request(unlock_response);
} else {
if (verbosity)
fprintf(stderr,
"command on node %s failed: '%s' - will be left locked\n",
response[i].node,
strerror(response[i].status));
}
}
} else {
/* All OK, we can do a full cluster unlock */
status = lvm_cluster_request(CLVMD_CMD_UNLOCK,
"",
args, len,
&unlock_response,
&num_unlock_responses);
if (status) {
if (verbosity > 1)
fprintf(stderr, "cluster command failed: %s\n",
strerror(errno));
} else {
for (i = 0; i < num_unlock_responses; i++) {
if (unlock_response[i].status != 0) {
if (verbosity > 1)
fprintf(stderr,
"unlock on node %s failed: %s\n",
response[i].node,
strerror(unlock_response
[0].status));
}
}
}
lvm_cluster_free_request(unlock_response);
}
lvm_cluster_free_request(response);
return 0;
}

36
daemons/clvmd/libclvm.h Normal file

@ -0,0 +1,36 @@
/*
* Copyright (C) 1997-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _LIBCLVM_H
#define _LIBCLVM_H
typedef struct lvm_response {
char node[255];
char *response;
int status;
int len;
} lvm_response_t;
extern int lvm_cluster_request(char cmd, const char *node, void *data, int len,
lvm_response_t ** response, int *num);
extern int lvm_cluster_write(char cmd, char *node, void *data, int len);
extern int lvm_cluster_free_request(lvm_response_t * response);
/* The "high-level" API */
extern int lvm_lock_for_cluster(char scope, char *name, int verbosity);
extern int lvm_unlock_for_cluster(char scope, char *name, int verbosity);
#endif

@ -0,0 +1,446 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <syslog.h>
#include <assert.h>
#include "libdlm.h"
#include "clvm.h"
#include "clvmd-comms.h"
#include "clvmd.h"
#include "lvm-functions.h"
/* LVM2 headers */
#include "toolcontext.h"
#include "log.h"
#include "activate.h"
#include "hash.h"
#include "locking.h"
static struct cmd_context *cmd = NULL;
static struct hash_table *lv_hash = NULL;
struct lv_info {
int lock_id;
int lock_mode;
};
/* Return the mode a lock is currently held at (or -1 if not held) */
static int get_current_lock(char *resource)
{
struct lv_info *lvi;
lvi = hash_lookup(lv_hash, resource);
if (lvi) {
return lvi->lock_mode;
} else {
return -1;
}
}
/* Called at shutdown to tidy the lockspace */
void unlock_all()
{
struct hash_node *v;
hash_iterate(v, lv_hash) {
struct lv_info *lvi = hash_get_data(lv_hash, v);
sync_unlock(hash_get_key(lv_hash, v), lvi->lock_id);
}
}
/* Gets a real lock and keeps the info in the hash table */
int hold_lock(char *resource, int mode, int flags)
{
int status;
int saved_errno;
struct lv_info *lvi;
flags &= LKF_NOQUEUE; /* Only LKF_NOQUEUE is valid here */
lvi = hash_lookup(lv_hash, resource);
if (lvi) {
/* Already exists - convert it */
status =
sync_lock(resource, mode, LKF_CONVERT | flags,
&lvi->lock_id);
saved_errno = errno;
if (!status)
lvi->lock_mode = mode;
if (status) {
DEBUGLOG("hold_lock. convert to %d failed: %s\n", mode,
strerror(errno));
}
errno = saved_errno;
} else {
lvi = malloc(sizeof(struct lv_info));
if (!lvi)
return -1;
lvi->lock_mode = mode;
status = sync_lock(resource, mode, flags, &lvi->lock_id);
saved_errno = errno;
if (status) {
free(lvi);
DEBUGLOG("hold_lock. lock at %d failed: %s\n", mode,
strerror(errno));
} else {
hash_insert(lv_hash, resource, lvi);
}
errno = saved_errno;
}
return status;
}
/* Unlock and remove it from the hash table */
int hold_unlock(char *resource)
{
struct lv_info *lvi;
int status;
int saved_errno;
lvi = hash_lookup(lv_hash, resource);
if (!lvi) {
DEBUGLOG("hold_unlock, lock not already held\n");
return 0;
}
status = sync_unlock(resource, lvi->lock_id);
saved_errno = errno;
if (!status) {
hash_remove(lv_hash, resource);
free(lvi);
} else {
DEBUGLOG("hold_unlock. unlock failed(%d): %s\n", status,
strerror(errno));
}
errno = saved_errno;
return status;
}
/* Watch the return codes here.
liblvm API functions return 1(true) for success, 0(false) for failure and don't set errno.
libdlm API functions return 0 for success, -1 for failure and do set errno.
These functions here return 0 for success or >0 for failure (where the retcode is errno)
*/
/* Activate LV exclusive or non-exclusive */
static int do_activate_lv(char *resource, int mode)
{
int oldmode;
int status;
int activate_lv;
struct lvinfo lvi;
/* Is it already open ? */
oldmode = get_current_lock(resource);
if (oldmode == mode) {
return 0; /* Nothing to do */
}
/* Does the config file want us to activate this LV ? */
if (!lv_activation_filter(cmd, resource, &activate_lv))
return EIO;
if (!activate_lv)
return 0; /* Success, we did nothing! */
/* Do we need to activate exclusively? */
if (activate_lv == 2)
mode = LKM_EXMODE;
/* OK, try to get the lock */
status = hold_lock(resource, mode, LKF_NOQUEUE);
if (status)
return errno;
/* If it's suspended then resume it */
if (!lv_info_by_lvid(cmd, resource, &lvi))
return EIO;
if (lvi.suspended)
if (!lv_resume(cmd, resource))
return EIO;
/* Now activate it */
if (!lv_activate(cmd, resource))
return EIO;
return 0;
}
/* Resume the LV if it was active */
static int do_resume_lv(char *resource)
{
int oldmode;
/* Is it open ? */
oldmode = get_current_lock(resource);
if (oldmode == -1) {
DEBUGLOG("do_deactivate_lock, lock not already held\n");
return 0; /* We don't need to do anything */
}
if (!lv_resume_if_active(cmd, resource))
return EIO;
return 0;
}
/* Suspend the device if active */
static int do_suspend_lv(char *resource)
{
int oldmode;
struct lvinfo lvi;
/* Is it open ? */
oldmode = get_current_lock(resource);
if (oldmode == -1) {
DEBUGLOG("do_suspend_lv, lock held at %d\n", oldmode);
return 0; /* Not active, so it's OK */
}
/* Only suspend it if it exists */
if (!lv_info_by_lvid(cmd, resource, &lvi))
return EIO;
if (lvi.exists) {
if (!lv_suspend_if_active(cmd, resource)) {
return EIO;
}
}
return 0;
}
static int do_deactivate_lv(char *resource)
{
int oldmode;
int status;
/* Is it open ? */
oldmode = get_current_lock(resource);
if (oldmode == -1) {
DEBUGLOG("do_deactivate_lock, lock not already held\n");
return 0; /* We don't need to do anything */
}
if (!lv_deactivate(cmd, resource))
return EIO;
status = hold_unlock(resource);
if (status)
return errno;
return 0;
}
/* This is the LOCK_LV part that happens on all nodes in the cluster -
it is responsible for the interaction with device-mapper and LVM */
int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
{
int status = 0;
DEBUGLOG("do_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
resource, command, lock_flags);
if (!cmd->config_valid || config_files_changed(cmd)) {
/* Reinitialise various settings inc. logging, filters */
if (!refresh_toolcontext(cmd)) {
log_error("Updated config file invalid. Aborting.");
return EINVAL;
}
}
switch (command) {
case LCK_LV_EXCLUSIVE:
status = do_activate_lv(resource, LKM_EXMODE);
break;
case LCK_LV_SUSPEND:
status = do_suspend_lv(resource);
break;
case LCK_UNLOCK:
case LCK_LV_RESUME: /* if active */
status = do_resume_lv(resource);
break;
case LCK_LV_ACTIVATE:
status = do_activate_lv(resource, LKM_CRMODE);
break;
case LCK_LV_DEACTIVATE:
status = do_deactivate_lv(resource);
break;
default:
DEBUGLOG("Invalid LV command 0x%x\n", command);
status = EINVAL;
break;
}
/* clean the pool for another command */
pool_empty(cmd->mem);
DEBUGLOG("Command return is %d\n", status);
return status;
}
/* Functions to do on the local node only BEFORE the cluster-wide stuff above happens */
int pre_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
{
/* Nearly all the stuff happens cluster-wide. Apart from SUSPEND. Here we get the
lock out on this node (because we are the node modifying the metadata)
before suspending cluster-wide.
*/
if (command == LCK_LV_SUSPEND) {
DEBUGLOG("pre_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
resource, command, lock_flags);
if (hold_lock(resource, LKM_PWMODE, LKF_NOQUEUE))
return errno;
}
return 0;
}
/* Functions to do on the local node only AFTER the cluster-wide stuff above happens */
int post_lock_lv(unsigned char command, unsigned char lock_flags,
char *resource)
{
/* Opposite of above, done on resume after a metadata update */
if (command == LCK_LV_RESUME) {
int oldmode;
DEBUGLOG
("post_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
resource, command, lock_flags);
/* If the lock state is PW then restore it to what it was */
oldmode = get_current_lock(resource);
if (oldmode == LKM_PWMODE) {
struct lvinfo lvi;
if (!lv_info_by_lvid(cmd, resource, &lvi))
return EIO;
if (lvi.exists) {
if (hold_lock(resource, LKM_CRMODE, 0))
return errno;
} else {
if (hold_unlock(resource))
return errno;
}
}
}
return 0;
}
/* Check if a VG is un use by LVM1 so we don't stomp on it */
int do_check_lvm1(char *vgname)
{
int status;
status = check_lvm1_vg_inactive(cmd, vgname);
return status == 1 ? 0 : EBUSY;
}
/*
* Ideally, clvmd should be started before any LVs are active
* but this may not be the case...
* I suppose this also comes in handy if clvmd crashes, not that it would!
*/
static void *get_initial_state()
{
char lv[64], vg[64], flags[25];
char uuid[65];
char line[255];
FILE *lvs =
popen
("/sbin/lvm lvs --nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr",
"r");
if (!lvs)
return NULL;
while (fgets(line, sizeof(line), lvs)) {
if (sscanf(line, "%s %s %s\n", vg, lv, flags) == 3) {
/* States: s:suspended a:active S:dropped snapshot I:invalid snapshot */
if (flags[4] == 'a' || flags[4] == 's') { /* is it active or suspended? */
/* Convert hyphen-separated UUIDs into one */
memcpy(&uuid[0], &vg[0], 6);
memcpy(&uuid[6], &vg[7], 4);
memcpy(&uuid[10], &vg[12], 4);
memcpy(&uuid[14], &vg[17], 4);
memcpy(&uuid[18], &vg[22], 4);
memcpy(&uuid[22], &vg[27], 4);
memcpy(&uuid[26], &vg[32], 6);
memcpy(&uuid[32], &lv[0], 6);
memcpy(&uuid[38], &lv[7], 4);
memcpy(&uuid[42], &lv[12], 4);
memcpy(&uuid[46], &lv[17], 4);
memcpy(&uuid[50], &lv[22], 4);
memcpy(&uuid[54], &lv[27], 4);
memcpy(&uuid[58], &lv[32], 6);
uuid[64] = '\0';
DEBUGLOG("getting initial lock for %s\n", uuid);
hold_lock(uuid, LKM_CRMODE, LKF_NOQUEUE);
}
}
}
fclose(lvs);
return NULL;
}
void init_lvhash()
{
/* Create hash table for keeping LV locks & status */
lv_hash = hash_create(100);
}
/* Called to initialise the LVM context of the daemon */
int init_lvm(void)
{
if (!(cmd = create_toolcontext(NULL))) {
log_error("Failed to allocate command context");
return 0;
}
/* Use LOG_DAEMON for syslog messages instead of LOG_USER */
init_syslog(LOG_DAEMON);
get_initial_state();
return 1;
}

@ -0,0 +1,35 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Functions in lvm-functions.c */
#ifndef _LVM_FUNCTIONS_H
#define _LVM_FUNCTIONS_H
extern int pre_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
char *resource);
extern int do_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
char *resource);
extern int post_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
char *resource);
extern int do_check_lvm1(char *vgname);
extern int init_lvm(void);
extern void init_lvhash(void);
extern int hold_unlock(char *resource);
extern int hold_lock(char *resource, int mode, int flags);
extern void unlock_all(void);
#endif

369
daemons/clvmd/system-lv.c Normal file

@ -0,0 +1,369 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Routines dealing with the System LV */
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/utsname.h>
#include <syslog.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include <dirent.h>
#include <errno.h>
#include <mntent.h>
#include "libdlm.h"
#include "log.h"
#include "list.h"
#include "locking.h"
#include "system-lv.h"
#include "clvmd-comms.h"
#ifdef HAVE_CCS
#include "ccs.h"
#endif
#define SYSTEM_LV_FILESYSTEM "ext2"
#define SYSTEM_LV_MOUNTPOINT "/tmp/.clvmd-XXXXXX"
extern char *config_filename(void);
static char system_lv_name[PATH_MAX] = { '\0' };
static char mount_point[PATH_MAX] = { '\0' };
static int mounted = 0;
static int mounted_rw = 0;
static int lockid;
static const char *lock_name = "CLVM_SYSTEM_LV";
/* Look in /proc/mounts or (as a last resort) /etc/mtab to
see if the system-lv is mounted. If it is mounted and we
think it's not then abort because we don't have the right
lock status and we don't know what other processes are doing with it.
Returns 1 for mounted, 0 for not mounted so it matches the condition
of the "mounted" static variable above.
*/
static int is_really_mounted(void)
{
FILE *mountfile;
struct mntent *ment;
mountfile = setmntent("/proc/mounts", "r");
if (!mountfile) {
mountfile = setmntent("/etc/mtab", "r");
if (!mountfile) {
log_error("Unable to open /proc/mounts or /etc/mtab");
return -1;
}
}
/* Look for system LV name in the file */
do {
ment = getmntent(mountfile);
if (ment) {
if (strcmp(ment->mnt_fsname, system_lv_name) == 0) {
endmntent(mountfile);
return 1;
}
}
}
while (ment);
endmntent(mountfile);
return 0;
}
/* Get the system LV name from the config file */
static int find_system_lv(void)
{
if (system_lv_name[0] == '\0') {
#ifdef HAVE_CCS
int error;
ccs_node_t *ctree;
/* Read the cluster config file */
/* Open the config file */
error = open_ccs_file(&ctree, "clvm.ccs");
if (error) {
perror("reading config file");
return -1;
}
strcpy(system_lv_name, find_ccs_str(ctree,
"cluster/systemlv", '/',
"/dev/vg/system_lv"));
/* Finished with config file */
close_ccs_file(ctree);
#else
if (getenv("CLVMD_SYSTEM_LV"))
strcpy(system_lv_name, getenv("CLVMD_SYSTEM_LV"));
else
return -1;
#endif
}
/* See if it has been mounted outside our control */
if (is_really_mounted() != mounted) {
log_error
("The system LV state has been mounted/umounted outside the control of clvmd\n"
"it cannot not be used for cluster communications until this is fixed.\n");
return -1;
}
return 0;
}
/* No prizes */
int system_lv_umount(void)
{
if (!mounted)
return 0;
if (umount(mount_point) < 0) {
log_error("umount of system LV (%s) failed: %m\n",
system_lv_name);
return -1;
}
sync_unlock(lock_name, lockid);
mounted = 0;
/* Remove the mount point */
rmdir(mount_point);
return 0;
}
int system_lv_mount(int readwrite)
{
int status;
int saved_errno;
int fd;
if (find_system_lv()) {
errno = EBUSY;
return -1;
}
/* Is it already mounted suitably? */
if (mounted) {
if (!readwrite || (readwrite && mounted_rw)) {
return 0;
} else {
/* Mounted RO and we need RW */
if (system_lv_umount() < 0)
return -1;
}
}
/* Randomize the mount point */
strcpy(mount_point, SYSTEM_LV_MOUNTPOINT);
fd = mkstemp(mount_point);
if (fd < 0) {
log_error("mkstemp for system LV mount point failed: %m\n");
return -1;
}
/* Race condition here but there's no mkstemp for directories */
close(fd);
unlink(mount_point);
mkdir(mount_point, 0600);
/* Make sure we have a system-lv lock */
status =
sync_lock(lock_name, (readwrite) ? LKM_EXMODE : LKM_CRMODE, 0,
&lockid);
if (status < 0)
return -1;
/* Mount it */
if (mount(system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM,
MS_MGC_VAL | MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_SYNCHRONOUS
| (readwrite ? 0 : MS_RDONLY), NULL) < 0) {
/* mount(2) returns EINVAL if the volume has no FS on it. So, if we want to
write to it we try to make a filesystem in it and retry the mount */
if (errno == EINVAL && readwrite) {
char cmd[256];
log_error("Attempting mkfs on system LV device %s\n",
system_lv_name);
snprintf(cmd, sizeof(cmd), "/sbin/mkfs -t %s %s",
SYSTEM_LV_FILESYSTEM, system_lv_name);
system(cmd);
if (mount
(system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM,
MS_MGC_VAL | MS_NOSUID | MS_NODEV | MS_NOEXEC |
MS_SYNCHRONOUS | (readwrite ? 0 : MS_RDONLY),
NULL) == 0)
goto mounted;
}
saved_errno = errno;
log_error("mount of system LV (%s, %s, %s) failed: %m\n",
system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM);
sync_unlock(lock_name, lockid);
errno = saved_errno;
return -1;
}
mounted:
/* Set the internal flags */
mounted = 1;
mounted_rw = readwrite;
return 0;
}
/* Erase *all* files in the root directory of the system LV.
This *MUST* be called with an appropriate lock held!
The LV is left mounted RW because it is assumed that the
caller wants to write something here after clearing some space */
int system_lv_eraseall(void)
{
DIR *dir;
struct dirent *ent;
char fname[PATH_MAX];
/* Must be mounted R/W */
system_lv_mount(1);
dir = opendir(mount_point);
if (!dir)
return -1;
while ((ent = readdir(dir))) {
struct stat st;
snprintf(fname, sizeof(fname), "%s/%s", mount_point,
ent->d_name);
if (stat(fname, &st)) {
if (S_ISREG(st.st_mode))
unlink(fname);
}
}
closedir(dir);
return 0;
}
/* This is a "high-level" routine - it mounts the system LV, writes
the data into a file named after this node and then umounts the LV
again */
int system_lv_write_data(char *data, ssize_t len)
{
struct utsname nodeinfo;
char fname[PATH_MAX];
int outfile;
ssize_t thiswrite;
ssize_t written;
if (system_lv_mount(1))
return -1;
/* Build the file name we are goingto use. */
uname(&nodeinfo);
snprintf(fname, sizeof(fname), "%s/%s", mount_point, nodeinfo.nodename);
/* Open the file for output */
outfile = open(fname, O_RDWR | O_CREAT | O_TRUNC, 0600);
if (outfile < 0) {
int saved_errno = errno;
system_lv_umount();
errno = saved_errno;
return -1;
}
written = 0;
do {
thiswrite = write(outfile, data + written, len - written);
if (thiswrite > 0)
written += thiswrite;
} while (written < len && thiswrite > 0);
close(outfile);
system_lv_umount();
return (thiswrite < 0) ? -1 : 0;
}
/* This is a "high-level" routine - it mounts the system LV, reads
the data from a named file and then umounts the LV
again */
int system_lv_read_data(char *fname_base, char *data, ssize_t *len)
{
char fname[PATH_MAX];
int outfile;
struct stat st;
ssize_t filesize;
ssize_t thisread;
ssize_t readbytes;
if (system_lv_mount(0))
return -1;
/* Build the file name we are going to use. */
snprintf(fname, sizeof(fname), "%s/%s", mount_point, fname_base);
/* Get the file size and stuff. Actually we only need the file size but
this will also check that the file exists */
if (stat(fname, &st) < 0) {
int saved_errno = errno;
log_error("stat of file %s on system LV failed: %m\n", fname);
system_lv_umount();
errno = saved_errno;
return -1;
}
filesize = st.st_size;
outfile = open(fname, O_RDONLY);
if (outfile < 0) {
int saved_errno = errno;
log_error("open of file %s on system LV failed: %m\n", fname);
system_lv_umount();
errno = saved_errno;
return -1;
}
readbytes = 0;
do {
thisread =
read(outfile, data + readbytes, filesize - readbytes);
if (thisread > 0)
readbytes += thisread;
} while (readbytes < filesize && thisread > 0);
close(outfile);
system_lv_umount();
*len = readbytes;
return (thisread < 0) ? -1 : 0;
}

30
daemons/clvmd/system-lv.h Normal file

@ -0,0 +1,30 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _CLVM_SYSTEM_LV_H
#define _CLVM_SYSTEM_LV_H
/* Prototypes for System-LV functions */
/* "low-level" functions */
extern int system_lv_umount(void);
extern int system_lv_mount(int readwrite);
extern int system_lv_eraseall(void);
/* "high-level" functions */
extern int system_lv_write_data(char *data, ssize_t len);
extern int system_lv_read_data(char *fname_base, char *data, ssize_t *len);
#endif

480
daemons/clvmd/tcp-comms.c Normal file

@ -0,0 +1,480 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 2002-2003 All rights reserved.
**
*******************************************************************************
******************************************************************************/
/* This provides the inter-clvmd communications for a system without CMAN.
There is a listening TCP socket which accepts new connections in the
normal way.
It can also make outgoing connnections to the other clvmd nodes.
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <syslog.h>
#include <netdb.h>
#include <assert.h>
#include "ccs.h"
#include "clvm.h"
#include "clvmd-comms.h"
#include "clvmd.h"
#include "clvmd-gulm.h"
#include "hash.h"
#define DEFAULT_TCP_PORT 21064
static int listen_fd = -1;
static int tcp_port;
struct hash_table *sock_hash;
static int get_tcp_port(int default_port);
static int get_our_ip_address(char *addr, int *family);
static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
struct local_client **new_client);
/* Called by init_cluster() to open up the listening socket */
// TODO: IPv6 compat.
int init_comms()
{
struct sockaddr *addr = NULL;
struct sockaddr_in addr4;
struct sockaddr_in6 addr6;
int addr_len;
int family;
char address[MAX_CSID_LEN];
sock_hash = hash_create(100);
tcp_port = get_tcp_port(DEFAULT_TCP_PORT);
/* Get IP address and IP type */
get_our_ip_address(address, &family);
if (family == AF_INET)
{
memcpy(&addr4.sin_addr, addr, sizeof(struct in_addr));
addr = (struct sockaddr *)&addr4;
addr4.sin_port = htons(tcp_port);
addr_len = sizeof(addr4);
}
else
{
memcpy(&addr6.sin6_addr, addr, sizeof(struct in6_addr));
addr = (struct sockaddr *)&addr6;
addr6.sin6_port = htons(tcp_port);
addr_len = sizeof(addr6);
}
listen_fd = socket(family, SOCK_STREAM, 0);
if (listen_fd < 0)
{
return -1;
}
else
{
int one = 1;
setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
}
addr->sa_family = family;
if (bind(listen_fd, addr, addr_len) < 0)
{
DEBUGLOG("Can't bind to port\n");
syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
close(listen_fd);
return -1;
}
listen(listen_fd, 5);
return 0;
}
void tcp_remove_client(char *csid)
{
struct local_client *client;
DEBUGLOG("tcp_remove_client\n");
/* Don't actually close the socket here - that's the
job of clvmd.c whch will do the job when it notices the
other end has gone. We just need to remove the client(s) from
the hash table so we don't try to use it for sending any more */
client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
if (client)
{
hash_remove_binary(sock_hash, csid, MAX_CSID_LEN);
}
/* Look for a mangled one too */
csid[0] ^= 0x80;
client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
if (client)
{
hash_remove_binary(sock_hash, csid, MAX_CSID_LEN);
}
/* Put it back as we found it */
csid[0] ^= 0x80;
}
int alloc_client(int fd, char *csid, struct local_client **new_client)
{
struct local_client *client;
DEBUGLOG("alloc_client %d csid = [%d.%d.%d.%d]\n", fd,csid[0],csid[1],csid[2],csid[3]);
/* Create a local_client and return it */
client = malloc(sizeof(struct local_client));
if (!client)
{
DEBUGLOG("malloc failed\n");
return -1;
}
memset(client, 0, sizeof(struct local_client));
client->fd = fd;
client->type = CLUSTER_DATA_SOCK;
client->callback = read_from_tcpsock;
if (new_client)
*new_client = client;
/* Add to our list of node sockets */
if (hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN))
{
DEBUGLOG("alloc_client mangling CSID for second connection\n");
/* This is a duplicate connection but we can't close it because
the other end may already have started sending.
So, we mangle the IP address and keep it, all sending will
go out of the main FD
*/
csid[0] ^= 0x80;
client->bits.net.flags = 1; /* indicate mangled CSID */
/* If it still exists then kill the connection as we should only
ever have one incoming connection from each node */
if (hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN))
{
DEBUGLOG("Multiple incoming connections from node\n");
syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
free(client);
errno = ECONNREFUSED;
return -1;
}
}
hash_insert_binary(sock_hash, csid, MAX_CSID_LEN, client);
return 0;
}
int get_main_cluster_fd()
{
return listen_fd;
}
/* Read on main comms (listen) socket, accept it */
int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid,
struct local_client **new_client)
{
int newfd;
struct sockaddr_in addr;
socklen_t addrlen = sizeof(addr);
int status;
char name[MAX_CLUSTER_MEMBER_NAME_LEN];
DEBUGLOG("cluster_fd_callback\n");
*new_client = NULL;
newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
if (!newfd)
{
syslog(LOG_ERR, "error in accept: %m");
errno = EAGAIN;
return -1; /* Don't return an error or clvmd will close the listening FD */
}
/* Check that the client is a member of the cluster
and reject if not.
// FIXME: IPv4 specific
*/
if (name_from_csid((char *)&addr.sin_addr.s_addr, name) < 0)
{
char *ip = (char *)&addr.sin_addr.s_addr;
syslog(LOG_ERR, "Got connect from non-cluster node %d.%d.%d.%d\n",
ip[0], ip[1], ip[2], ip[3]);
DEBUGLOG("Got connect from non-cluster node %d.%d.%d.%d\n",
ip[0], ip[1], ip[2], ip[3]);
close(newfd);
errno = EAGAIN;
return -1;
}
status = alloc_client(newfd, (char *)&addr.sin_addr.s_addr, new_client);
if (status)
{
DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
close(newfd);
/* See above... */
errno = EAGAIN;
return -1;
}
DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
return newfd;
}
static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client)
{
struct sockaddr_in addr;
socklen_t slen = sizeof(addr);
int status;
DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
*new_client = NULL;
/* Get "csid" */
getpeername(client->fd, (struct sockaddr *)&addr, &slen);
memcpy(csid, &addr.sin_addr.s_addr, MAX_CSID_LEN);
status = read(client->fd, buf, len);
DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
/* Remove it from the hash table if there's an error, clvmd will
remove the socket from its lists and free the client struct */
if (status == 0 ||
(status < 0 && errno != EAGAIN && errno != EINTR))
{
char remcsid[MAX_CSID_LEN];
memcpy(remcsid, csid, MAX_CSID_LEN);
close(client->fd);
/* If the csid was mangled, then make sure we remove the right entry */
if (client->bits.net.flags)
remcsid[0] ^= 0x80;
hash_remove_binary(sock_hash, remcsid, MAX_CSID_LEN);
/* Tell cluster manager layer */
add_down_node(remcsid);
}
return status;
}
static int connect_csid(char *csid, struct local_client **newclient)
{
int fd;
struct sockaddr_in addr;
int status;
DEBUGLOG("Connecting socket\n");
fd = socket(PF_INET, SOCK_STREAM, 0);
if (fd < 0)
{
syslog(LOG_ERR, "Unable to create new socket: %m");
return -1;
}
addr.sin_family = AF_INET;
memcpy(&addr.sin_addr.s_addr, csid, MAX_CSID_LEN);
addr.sin_port = htons(tcp_port);
DEBUGLOG("Connecting socket %d\n", fd);
if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in)) < 0)
{
syslog(LOG_ERR, "Unable to connect to remote node: %m");
DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
close(fd);
return -1;
}
status = alloc_client(fd, csid, newclient);
if (status)
close(fd);
else
add_client(*newclient);
/* If we can connect to it, it must be running a clvmd */
add_up_node(csid);
return status;
}
/* Send a message to a known CSID */
static int tcp_send_message(void *buf, int msglen, unsigned char *csid, const char *errtext)
{
int status;
struct local_client *client;
char ourcsid[MAX_CSID_LEN];
assert(csid);
DEBUGLOG("tcp_send_message, csid = [%d.%d.%d.%d], msglen = %d\n", csid[0],csid[1],csid[2],csid[3], msglen);
/* Don't connect to ourself */
get_our_csid(ourcsid);
if (memcmp(csid, ourcsid, MAX_CSID_LEN) == 0)
return msglen;
client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
if (!client)
{
status = connect_csid(csid, &client);
if (status)
return -1;
}
DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
return write(client->fd, buf, msglen);
}
int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext)
{
int status=0;
DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
/* If csid is NULL then send to all known (not just connected) nodes */
if (!csid)
{
void *context = NULL;
char loop_csid[MAX_CSID_LEN];
/* Loop round all gulm-known nodes */
while (get_next_node_csid(&context, loop_csid))
{
status = tcp_send_message(buf, msglen, loop_csid, errtext);
if (status == 0 ||
(status < 0 && (errno == EAGAIN || errno == EINTR)))
break;
}
}
else
{
status = tcp_send_message(buf, msglen, csid, errtext);
}
return status;
}
static int get_tcp_port(int default_port)
{
int ccs_handle;
int port = default_port;
char *portstr;
ccs_handle = ccs_connect();
if (ccs_handle)
{
return port;
}
if (!ccs_get(ccs_handle, "//clvm/@port", &portstr))
{
port = atoi(portstr);
free(portstr);
if (port <= 0 && port >= 65536)
port = default_port;
}
ccs_disconnect(ccs_handle);
DEBUGLOG("Using port %d for communications\n", port);
return port;
}
/* To get our own IP address we get the locally bound address of the
socket that's talking to GULM in the assumption(eek) that it will
be on the "right" network in a multi-homed system */
static int get_our_ip_address(char *addr, int *family)
{
/* Use a sockaddr_in6 to make sure it's big enough */
struct sockaddr_in6 saddr;
int socklen = sizeof(saddr);
if (!getsockname(gulm_fd(), (struct sockaddr *)&saddr, &socklen))
{
if (saddr.sin6_family == AF_INET6)
{
memcpy(addr, &saddr.sin6_addr, sizeof(saddr.sin6_addr));
}
else
{
struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
memcpy(addr, &sin4->sin_addr, sizeof(sin4->sin_addr));
}
return 0;
}
return -1;
}
/* Public version of above for those that don't care what protocol
we're using */
void get_our_csid(char *csid)
{
static char our_csid[MAX_CSID_LEN];
static int got_csid = 0;
if (!got_csid)
{
int family;
memset(our_csid, 0, sizeof(our_csid));
if (get_our_ip_address(our_csid, &family))
{
got_csid = 1;
}
}
memcpy(csid, our_csid, MAX_CSID_LEN);
}
/* Get someone else's IP address from DNS */
int get_ip_address(char *node, char *addr)
{
struct hostent *he;
memset(addr, 0, MAX_CSID_LEN);
// TODO: what do we do about multi-homed hosts ???
// CCSs ip_interfaces solved this but some bugger removed it.
/* Try IPv6 first. The man page for gethostbyname implies that
it will lookup ip6 & ip4 names, but it seems not to */
he = gethostbyname2(node, AF_INET6);
if (!he)
he = gethostbyname2(node, AF_INET);
if (!he)
return -1;
/* For IPv4 address just use the lower 4 bytes */
memcpy(&addr, he->h_addr_list[0],
he->h_length);
return 0;
}

@ -0,0 +1,7 @@
#include <netinet/in.h>
#define MAX_CLUSTER_MESSAGE 1600
#define MAX_CSID_LEN sizeof(struct in6_addr)
#define MAX_CLUSTER_MEMBER_NAME_LEN 128
extern int init_comms(void);

@ -1,3 +1,4 @@
../daemons/clvmd/clvm.h
../lib/activate/activate.h
../lib/activate/targets.h
../lib/cache/lvmcache.h

@ -104,6 +104,14 @@ ifeq ("@POOL@", "internal")
format_pool/pool_label.c
endif
ifeq ("@CLUSTER@", "internal")
SOURCES += locking/cluster_locking.c
endif
ifeq ("@CLUSTER@", "shared")
SUBDIRS += locking
endif
ifeq ("@SNAPSHOTS@", "internal")
SOURCES += snapshot/snapshot.c
endif

32
lib/locking/Makefile.in Normal file

@ -0,0 +1,32 @@
#
# Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
#
# This file is part of the LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v.2.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
SOURCES = cluster_locking.c
LIB_SHARED = liblvm2clusterlock.so
include $(top_srcdir)/make.tmpl
.PHONY: install
install: liblvm2clusterlock.so
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
$(libdir)/liblvm2clusterlock.so.$(LIB_VERSION)
$(LN_S) -f liblvm2clusterlock.so.$(LIB_VERSION) \
$(libdir)/liblvm2clusterlock.so

@ -0,0 +1,462 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Locking functions for LVM.
* The main purpose of this part of the library is to serialise LVM
* management operations across a cluster.
*/
#include "lib.h"
#include "clvm.h"
#include "lvm-string.h"
#include "locking.h"
#include "locking_types.h"
#include <stddef.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#ifndef CLUSTER_LOCKING_INTERNAL
int lock_resource(struct cmd_context *cmd, const char *resource, int flags);
void locking_end(void);
int locking_init(int type, struct config_tree *cf, uint32_t *flags);
#endif
typedef struct lvm_response {
char node[255];
char *response;
int status;
int len;
} lvm_response_t;
/*
* This gets stuck at the start of memory we allocate so we
* can sanity-check it at deallocation time
*/
#define LVM_SIGNATURE 0x434C564D
/*
* NOTE: the LVMD uses the socket FD as the client ID, this means
* that any client that calls fork() will inherit the context of
* it's parent.
*/
static int _clvmd_sock = -1;
/* FIXME Install SIGPIPE handler? */
/* Open connection to the Cluster Manager daemon */
static int _open_local_sock(void)
{
int local_socket;
struct sockaddr_un sockaddr;
/* Open local socket */
if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
log_error("Local socket creation failed: %s", strerror(errno));
return -1;
}
memset(&sockaddr, 0, sizeof(sockaddr));
memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
sockaddr.sun_family = AF_UNIX;
if (connect(local_socket,(struct sockaddr *) &sockaddr,
sizeof(sockaddr))) {
int saved_errno = errno;
log_error("connect() failed on local socket: %s",
strerror(errno));
if (close(local_socket))
stack;
errno = saved_errno;
return -1;
}
return local_socket;
}
/* Send a request and return the status */
static int _send_request(char *inbuf, int inlen, char **retbuf)
{
char outbuf[PIPE_BUF];
struct clvm_header *outheader = (struct clvm_header *) outbuf;
int len;
int off;
int buflen;
int err;
/* Send it to CLVMD */
rewrite:
if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
if (err == -1 && errno == EINTR)
goto rewrite;
log_error("Error writing data to clvmd: %s", strerror(errno));
return 0;
}
/* Get the response */
reread:
if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
if (errno == EINTR)
goto reread;
log_error("Error reading data from clvmd: %s", strerror(errno));
return 0;
}
if (len == 0) {
log_error("EOF reading CLVMD");
errno = ENOTCONN;
return 0;
}
/* Allocate buffer */
buflen = len + outheader->arglen;
*retbuf = dbg_malloc(buflen);
if (!*retbuf) {
errno = ENOMEM;
return 0;
}
/* Copy the header */
memcpy(*retbuf, outbuf, len);
outheader = (struct clvm_header *) *retbuf;
/* Read the returned values */
off = 1; /* we've already read the first byte */
while (off < outheader->arglen && len > 0) {
len = read(_clvmd_sock, outheader->args + off,
buflen - off - offsetof(struct clvm_header, args));
if (len > 0)
off += len;
}
/* Was it an error ? */
if (outheader->status < 0) {
errno = -outheader->status;
log_error("cluster send request failed: %s", strerror(errno));
return 0;
}
return 1;
}
/* Build the structure header and parse-out wildcard node names */
static void _build_header(struct clvm_header *head, int cmd, const char *node,
int len)
{
head->cmd = cmd;
head->status = 0;
head->flags = 0;
head->clientid = 0;
head->arglen = len;
if (node) {
/*
* Allow a couple of special node names:
* "*" for all nodes,
* "." for the local node only
*/
if (strcmp(node, "*") == 0) {
head->node[0] = '\0';
} else if (strcmp(node, ".") == 0) {
head->node[0] = '\0';
head->flags = CLVMD_FLAG_LOCAL;
} else
strcpy(head->node, node);
} else
head->node[0] = '\0';
}
/*
* Send a message to a(or all) node(s) in the cluster and wait for replies
*/
static int _cluster_request(char cmd, const char *node, void *data, int len,
lvm_response_t ** response, int *num)
{
char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
int *outptr;
char *inptr;
char *retbuf = NULL;
int status;
int i;
int num_responses = 0;
struct clvm_header *head = (struct clvm_header *) outbuf;
lvm_response_t *rarray;
*num = 0;
if (_clvmd_sock == -1)
_clvmd_sock = _open_local_sock();
if (_clvmd_sock == -1)
return 0;
_build_header(head, cmd, node, len);
memcpy(head->node + strlen(head->node) + 1, data, len);
status = _send_request(outbuf, sizeof(struct clvm_header) +
strlen(head->node) + len, &retbuf);
if (!status)
goto out;
/* Count the number of responses we got */
head = (struct clvm_header *) retbuf;
inptr = head->args;
while (inptr[0]) {
num_responses++;
inptr += strlen(inptr) + 1;
inptr += sizeof(int);
inptr += strlen(inptr) + 1;
}
/*
* Allocate response array.
* With an extra pair of INTs on the front to sanity
* check the pointer when we are given it back to free
*/
outptr = dbg_malloc(sizeof(lvm_response_t) * num_responses +
sizeof(int) * 2);
if (!outptr) {
errno = ENOMEM;
status = 0;
goto out;
}
*response = (lvm_response_t *) (outptr + 2);
outptr[0] = LVM_SIGNATURE;
outptr[1] = num_responses;
rarray = *response;
/* Unpack the response into an lvm_response_t array */
inptr = head->args;
i = 0;
while (inptr[0]) {
strcpy(rarray[i].node, inptr);
inptr += strlen(inptr) + 1;
rarray[i].status = *(int *) inptr;
inptr += sizeof(int);
rarray[i].response = dbg_malloc(strlen(inptr) + 1);
if (rarray[i].response == NULL) {
/* Free up everything else and return error */
int j;
for (j = 0; j < i; j++)
dbg_free(rarray[i].response);
free(outptr);
errno = ENOMEM;
status = -1;
goto out;
}
strcpy(rarray[i].response, inptr);
rarray[i].len = strlen(inptr);
inptr += strlen(inptr) + 1;
i++;
}
*num = num_responses;
*response = rarray;
out:
if (retbuf)
dbg_free(retbuf);
return status;
}
/* Free reply array */
static int _cluster_free_request(lvm_response_t * response)
{
int *ptr = (int *) response - 2;
int i;
int num;
/* Check it's ours to free */
if (response == NULL || *ptr != LVM_SIGNATURE) {
errno = EINVAL;
return 0;
}
num = ptr[1];
for (i = 0; i < num; i++) {
dbg_free(response[i].response);
}
dbg_free(ptr);
return 1;
}
static int _lock_for_cluster(unsigned char cmd, unsigned int flags, char *name)
{
int status;
int i;
char *args;
const char *node = "";
int len;
int saved_errno = errno;
lvm_response_t *response = NULL;
int num_responses;
assert(name);
len = strlen(name) + 3;
args = alloca(len);
strcpy(args + 2, name);
args[0] = flags & 0xBF; /* Maskoff LOCAL flag */
args[1] = 0; /* Not used now */
/*
* VG locks are just that: locks, and have no side effects
* so we only need to do them on the local node because all
* locks are cluster-wide.
* Also, if the lock is exclusive it makes no sense to try to
* acquire it on all nodes, so just do that on the local node too.
*/
if (cmd == CLVMD_CMD_LOCK_VG ||
(flags & LCK_TYPE_MASK) == LCK_EXCL ||
(flags & LCK_LOCAL))
node = ".";
status = _cluster_request(cmd, node, args, len,
&response, &num_responses);
/* If any nodes were down then display them and return an error */
for (i = 0; i < num_responses; i++) {
if (response[i].status == -EHOSTDOWN) {
log_error("clvmd not running on node %s",
response[i].node);
status = 0;
} else if (response[i].status) {
log_error("Error locking on node %s: %s",
response[i].node,
response[i].response[0] ?
response[i].response :
strerror(response[i].status));
status = 0;
}
}
saved_errno = errno;
_cluster_free_request(response);
errno = saved_errno;
return status;
}
/* API entry point for LVM */
#ifdef CLUSTER_LOCKING_INTERNAL
static int _lock_resource(struct cmd_context *cmd, const char *resource,
int flags)
#else
int lock_resource(struct cmd_context *cmd, const char *resource, int flags)
#endif
{
char lockname[PATH_MAX];
int cluster_cmd = 0;
assert(strlen(resource) < sizeof(lockname));
switch (flags & LCK_SCOPE_MASK) {
case LCK_VG:
/* If the VG name is empty then lock the unused PVs */
if (!resource || !*resource)
lvm_snprintf(lockname, sizeof(lockname), "P_orphans");
else
lvm_snprintf(lockname, sizeof(lockname), "V_%s",
resource);
cluster_cmd = CLVMD_CMD_LOCK_VG;
flags &= LCK_TYPE_MASK;
break;
case LCK_LV:
cluster_cmd = CLVMD_CMD_LOCK_LV;
strcpy(lockname, resource);
flags &= 0xffdf; /* Mask off HOLD flag */
break;
default:
log_error("Unrecognised lock scope: %d",
flags & LCK_SCOPE_MASK);
return 0;
}
/* Send a message to the cluster manager */
log_very_verbose("Locking %s at 0x%x", lockname, flags);
return _lock_for_cluster(cluster_cmd, flags, lockname);
}
#ifdef CLUSTER_LOCKING_INTERNAL
static void _locking_end(void)
#else
void locking_end(void)
#endif
{
if (_clvmd_sock != -1 && close(_clvmd_sock))
stack;
_clvmd_sock = -1;
}
#ifdef CLUSTER_LOCKING_INTERNAL
static void _reset_locking(void)
#else
void reset_locking(void)
#endif
{
if (close(_clvmd_sock))
stack;
_clvmd_sock = _open_local_sock();
if (_clvmd_sock == -1)
stack;
}
#ifdef CLUSTER_LOCKING_INTERNAL
int init_cluster_locking(struct locking_type *locking, struct config_tree *cft)
{
locking->lock_resource = _lock_resource;
locking->fin_locking = _locking_end;
locking->reset_locking = _reset_locking;
locking->flags = LCK_PRE_MEMLOCK;
_clvmd_sock = _open_local_sock();
if (_clvmd_sock == -1)
return 0;
return 1;
}
#else
int locking_init(int type, struct config_tree *cf, uint32_t *flags)
{
_clvmd_sock = _open_local_sock();
if (_clvmd_sock == -1)
return 0;
/* Ask LVM to lock memory before calling us */
*flags |= LCK_PRE_MEMLOCK;
return 1;
}
#endif

@ -145,6 +145,14 @@ int init_locking(int type, struct config_tree *cft)
return 1;
#endif
#ifdef CLUSTER_LOCKING_INTERNAL
case 3:
if (!init_cluster_locking(&_locking, cft))
break;
log_very_verbose("Cluster locking enabled.");
return 1;
#endif
default:
log_error("Unknown locking type requested.");
return 0;

@ -40,3 +40,4 @@ int init_no_locking(struct locking_type *locking, struct config_tree *cf);
int init_file_locking(struct locking_type *locking, struct config_tree *cf);
int init_external_locking(struct locking_type *locking, struct config_tree *cf);
int init_cluster_locking(struct locking_type *locking, struct config_tree *cf);

154
scripts/clvmd_fix_conf.sh Normal file

@ -0,0 +1,154 @@
#!/bin/sh
#
# Edit an lvm.conf file to enable cluster locking.
#
# $1 is the directory where the locking library is installed.
# $2 (optional) is the config file
# $3 (optional) is the locking library name
#
#
PREFIX=$1
LVMCONF=$2
LIB=$3
if [ -z "$PREFIX" ]
then
echo "usage: $0 <prefix> [<config file>] [<library>]"
echo ""
echo "<prefix> location of the cluster locking shared library. (no default)"
echo "<config file> name of the LVM config file (default: /etc/lvm/lvm.conf)"
echo "<library> name of the shared library (default: liblvm2clusterlock.so)"
echo ""
exit 0
fi
[ -z "$LVMCONF" ] && LVMCONF="/etc/lvm/lvm.conf"
[ -z "$LIB" ] && LIB="liblvm2clusterlock.so"
if [ "${PREFIX:0:1}" != "/" ]
then
echo "Prefix must be an absolute path name (starting with a /)"
exit 12
fi
if [ ! -f "$LVMCONF" ]
then
echo "$LVMCONF does not exist"
exit 10
fi
if [ ! -f "$PREFIX/$LIB" ]
then
echo "$PREFIX/$LIB does not exist, did you do a \"make install\" ?"
exit 11
fi
SCRIPTFILE=`mktemp -t lvmscript.XXXXXXXXXX`
TMPFILE=`mktemp -t lvmtmp.XXXXXXXXXX`
# Flags so we know which parts of the file we can replace and which need
# adding. These are return codes from grep, so zero means it IS present!
have_type=1
have_dir=1
have_library=1
have_global=1
grep -q '^[[:blank:]]*locking_type[[:blank:]]*=' $LVMCONF
have_type=$?
grep -q '^[[:blank:]]*library_dir[[:blank:]]*=' $LVMCONF
have_dir=$?
grep -q '^[[:blank:]]*locking_library[[:blank:]]*=' $LVMCONF
have_library=$?
# Those options are in section "global {" so we must have one if any are present.
if [ "$have_type" = "0" -o "$have_dir" = "0" -o "$have_library" = "0" ]
then
# See if we can find it...
grep -q '^[[:blank:]]*global[[:blank:]]*{' $LVMCONF
have_global=$?
if [ "$have_global" = "1" ]
then
echo "global keys but no 'global {' found, can't edit file"
exit 12
fi
fi
# So if we don't have "global {" we need to create one and
# populate it
if [ "$have_global" = "1" ]
then
cat $LVMCONF - <<EOF > $TMPFILE
global {
# Enable locking for cluster LVM
locking_type = 2
library_dir = "$PREFIX"
locking_library = "$LIB"
}
EOF
if [ $? != 0 ]
then
echo "failed to create temporary config file, $LVMCONF not updated"
exit 1
fi
else
#
# We have a "global {" section, so add or replace the
# locking entries as appropriate
#
if [ "$have_type" = "0" ]
then
SEDCMD=" s/^[[:blank:]]*locking_type[[:blank:]]*=.*/\ \ \ \ locking_type = 2/g"
else
SEDCMD=" /global[[:blank:]]*{/a\ \ \ \ locking_type = 2"
fi
if [ "$have_dir" = "0" ]
then
SEDCMD="${SEDCMD}\ns'^[[:blank:]]*library_dir[[:blank:]]*=.*'\ \ \ \ library_dir = \"$PREFIX\"'g"
else
SEDCMD="${SEDCMD}\n/global[[:blank:]]*{/a\ \ \ \ library_dir = \"$PREFIX\""
fi
if [ "$have_library" = "0" ]
then
SEDCMD="${SEDCMD}\ns/^[[:blank:]]*locking_library[[:blank:]]*=.*/\ \ \ \ locking_library = \"$LIB\"/g"
else
SEDCMD="${SEDCMD}\n/global[[:blank:]]*{/a\ \ \ \ locking_library = \"$LIB\""
fi
echo -e $SEDCMD > $SCRIPTFILE
sed <$LVMCONF >$TMPFILE -f $SCRIPTFILE
if [ $? != 0 ]
then
echo "sed failed, $LVMCONF not updated"
exit 1
fi
fi
# Now we have a suitably editted config file in a temp place,
# backup the original and copy our new one into place.
cp $LVMCONF $LVMCONF.nocluster
if [ $? != 0 ]
then
echo "failed to backup old config file, $LVMCONF not updated"
exit 2
fi
cp $TMPFILE $LVMCONF
if [ $? != 0 ]
then
echo "failed to copy new config file into place, check $LVMCONF is still OK"
exit 3
fi
rm -f $SCRIPTFILE $TMPFILE

90
scripts/clvmd_init Executable file

@ -0,0 +1,90 @@
#!/bin/bash
#
# /etc/rc.d/init.d/clvmd
#
# Starts the clvm daemon
# NOTE: These startup levels may not be right yet - it depends on where
# the rest of the cluster startup goes.
#
# chkconfig: 345 72 5
# description: distributes LVM commands in a clustered environment. \
# a clvmd must be run on all nodes in a cluster for clustered LVM \
# operations to work.
# processname: clvmd
# Source function library.
. /etc/init.d/functions
BINARY=/usr/sbin/clvmd
LOCKFILE=/var/lock/subsys/clvmd
test -x "$BINARY" || exit 0
RETVAL=0
#
# See how we were called.
#
prog="clvmd"
start() {
# Check if clvmd is already running
if [ ! -f "$LOCKFILE" ]; then
echo -n $"Starting $prog: "
daemon $BINARY
RETVAL=$?
[ $RETVAL -eq 0 ] && touch $LOCKFILE
echo
fi
return $RETVAL
}
stop() {
echo -n $"Stopping $prog: "
killproc $BINARY
RETVAL=$?
[ $RETVAL -eq 0 ] && rm -f $LOCKFILE
echo
return $RETVAL
}
restart() {
stop
start
}
reload() {
restart
}
status_clvm() {
status $BINARY
}
case "$1" in
start)
start
;;
stop)
stop
;;
reload|restart)
restart
;;
condrestart)
if [ -f $LOCKFILE ]; then
restart
fi
;;
status)
status_clvm
;;
*)
echo $"Usage: $0 {start|stop|restart|condrestart|status}"
exit 1
esac
exit $?
exit $RETVAL