1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-19 14:04:17 +03:00

Add cluster support.

This commit is contained in:
Alasdair Kergon 2004-06-24 08:02:38 +00:00
parent 244a32b3d5
commit d3c8211fef
32 changed files with 6534 additions and 9 deletions

View File

@ -22,11 +22,13 @@ ifeq ("@INTL@", "yes")
SUBDIRS += po
endif
SUBDIRS += lib tools
SUBDIRS += lib tools daemons
ifeq ($(MAKECMDGOALS),distclean)
SUBDIRS += lib/format1 \
SUBDIRS += daemons/clvmd \
lib/format1 \
lib/format_pool \
lib/locking \
lib/mirror \
lib/snapshot \
po \
@ -35,14 +37,16 @@ endif
include make.tmpl
daemons: lib
lib: include
tools: lib
po: lib tools
po: tools daemons
ifeq ("@INTL@", "yes")
lib.pofile: include.pofile
tools.pofile: lib.pofile
po.pofile: lib.pofile tools.pofile
daemons.pofile: lib.pofile
po.pofile: tools.pofile daemons.pofile
pofile: po.pofile
endif

View File

@ -1 +1 @@
2.00.17-cvs (2004-06-20)
2.00.18-cvs (2004-06-24)

View File

@ -1,3 +1,7 @@
Version 2.00.18 - 24 June 2004
==============================
Add cluster support.
Version 2.00.17 - 20 June 2004
==============================
configure --enable-fsadm to try out fsadm. fsadm is not tested yet.

51
configure vendored
View File

@ -309,7 +309,7 @@ ac_includes_default="\
#endif"
ac_default_prefix=/usr
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL FSADM LIBOBJS LTLIBOBJS'
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL CLVMD CLUSTER FSADM LIBOBJS LTLIBOBJS'
ac_subst_files=''
# Initialize some variables set by options.
@ -867,10 +867,13 @@ Optional Packages:
TYPE=internal
--with-pool=TYPE GFS pool read-only support: internal/shared/none
TYPE=internal
--with-cluster=TYPE Cluster LVM locking support: internal/shared/none
TYPE=internal
--with-snapshots=TYPE Snapshot support: internal/shared/none
TYPE=internal
--with-mirrors=TYPE Mirror support: internal/shared/none
TYPE=internal
--with-clvmd Build cluster LVM Daemon
--with-localedir=DIR Translation files in DIR PREFIX/share/locale
--with-confdir=DIR Configuration files in DIR /etc
--with-staticdir=DIR Static binary in DIR EXEC_PREFIX/sbin
@ -3900,6 +3903,7 @@ case "$host_os" in
SOFLAG="-shared"
DEVMAPPER=yes
ODIRECT=yes
CLUSTER=internal
FSADM=no ;;
darwin*)
CFLAGS="-no-cpp-precomp -fno-common"
@ -3911,6 +3915,7 @@ case "$host_os" in
SOFLAG="-dynamiclib"
DEVMAPPER=no
ODIRECT=no
CLUSTER=none
FSADM=no ;;
esac
@ -3998,6 +4003,25 @@ if test x$POOL = xinternal; then
fi
# Check whether --with-cluster or --without-cluster was given.
if test "${with_cluster+set}" = set; then
withval="$with_cluster"
CLUSTER="$withval"
fi;
if [ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ];
then { { echo "$as_me:$LINENO: error: --with-cluster parameter invalid
" >&5
echo "$as_me: error: --with-cluster parameter invalid
" >&2;}
{ (exit 1); exit 1; }; }
exit
fi;
if test x$CLUSTER = xinternal; then
CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
fi
# Check whether --enable-jobs or --disable-jobs was given.
if test "${enable_jobs+set}" = set; then
enableval="$enable_jobs"
@ -4071,6 +4095,20 @@ if test x$READLINE = xyes; then
CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
fi
# Check whether --with-clvmd or --without-clvmd was given.
if test "${with_clvmd+set}" = set; then
withval="$with_clvmd"
\
CLVMD=$withval
else
CLVMD=no
fi;
if test x$CLVMD = xyes && test x$CLUSTER = xnone; then
CLUSTER=internal
fi
echo "$ac_t""$CLVMD" 1>&6
echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
# Check whether --enable-debug or --disable-debug was given.
if test "${enable_debug+set}" = set; then
@ -4698,7 +4736,7 @@ else
HAVE_LIBDL=no
fi
if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
"x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
\) -a "x$STATIC_LINK" = xyes ];
then { { echo "$as_me:$LINENO: error: Features cannot be 'shared' when building statically
@ -5207,7 +5245,9 @@ fi
ac_config_files="$ac_config_files Makefile make.tmpl doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
# tests run on this system so they can be shared between configure
@ -5760,11 +5800,14 @@ do
# Handling of arguments.
"Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"make.tmpl" ) CONFIG_FILES="$CONFIG_FILES make.tmpl" ;;
"daemons/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/Makefile" ;;
"daemons/clvmd/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/clvmd/Makefile" ;;
"doc/Makefile" ) CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
"include/Makefile" ) CONFIG_FILES="$CONFIG_FILES include/Makefile" ;;
"lib/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/Makefile" ;;
"lib/format1/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format1/Makefile" ;;
"lib/format_pool/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format_pool/Makefile" ;;
"lib/locking/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/locking/Makefile" ;;
"lib/mirror/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/mirror/Makefile" ;;
"lib/snapshot/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/snapshot/Makefile" ;;
"man/Makefile" ) CONFIG_FILES="$CONFIG_FILES man/Makefile" ;;
@ -5916,6 +5959,8 @@ s,@CONFDIR@,$CONFDIR,;t t
s,@STATICDIR@,$STATICDIR,;t t
s,@INTL_PACKAGE@,$INTL_PACKAGE,;t t
s,@INTL@,$INTL,;t t
s,@CLVMD@,$CLVMD,;t t
s,@CLUSTER@,$CLUSTER,;t t
s,@FSADM@,$FSADM,;t t
s,@LIBOBJS@,$LIBOBJS,;t t
s,@LTLIBOBJS@,$LTLIBOBJS,;t t

View File

@ -59,6 +59,7 @@ case "$host_os" in
SOFLAG="-shared"
DEVMAPPER=yes
ODIRECT=yes
CLUSTER=internal
FSADM=no ;;
darwin*)
CFLAGS="-no-cpp-precomp -fno-common"
@ -70,6 +71,7 @@ case "$host_os" in
SOFLAG="-dynamiclib"
DEVMAPPER=no
ODIRECT=no
CLUSTER=none
FSADM=no ;;
esac
@ -141,6 +143,22 @@ if test x$POOL = xinternal; then
CFLAGS="$CFLAGS -DPOOL_INTERNAL"
fi
dnl -- cluster_locking inclusion type
AC_ARG_WITH(cluster,
[ --with-cluster=TYPE Cluster LVM locking support: internal/shared/none
[TYPE=internal] ],
[ CLUSTER="$withval" ])
if [[ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ]];
then AC_MSG_ERROR(
--with-cluster parameter invalid
)
exit
fi;
if test x$CLUSTER = xinternal; then
CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
fi
AC_ARG_ENABLE(jobs, [ --enable-jobs=NUM Number of jobs to run simultaneously], JOBS=-j$enableval, JOBS=-j2)
@ -192,6 +210,15 @@ if test x$READLINE = xyes; then
CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
fi
dnl Build cluster LVM daemon
AC_ARG_WITH(clvmd, [ --with-clvmd Build cluster LVM Daemon], \
CLVMD=$withval, CLVMD=no)
dnl If clvmd enabled and not cluster locking, automgically include the locking.
if test x$CLVMD = xyes && test x$CLUSTER = xnone; then
CLUSTER=internal
fi
echo "$ac_t""$CLVMD" 1>&6
echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
dnl Enable Debugging
AC_ARG_ENABLE(debug, [ --enable-debug Enable debugging], \
@ -272,7 +299,7 @@ else
fi
dnl Check for shared/static conflicts
if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
"x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
\) -a "x$STATIC_LINK" = xyes ]];
then AC_MSG_ERROR(
@ -377,6 +404,8 @@ AC_SUBST(CONFDIR)
AC_SUBST(STATICDIR)
AC_SUBST(INTL_PACKAGE)
AC_SUBST(INTL)
AC_SUBST(CLVMD)
AC_SUBST(CLUSTER)
AC_SUBST(FSADM)
dnl First and last lines should not contain files to generate in order to
@ -384,11 +413,14 @@ dnl keep utility scripts running properly
AC_OUTPUT( \
Makefile \
make.tmpl \
daemons/Makefile \
daemons/clvmd/Makefile \
doc/Makefile \
include/Makefile \
lib/Makefile \
lib/format1/Makefile \
lib/format_pool/Makefile \
lib/locking/Makefile \
lib/mirror/Makefile \
lib/snapshot/Makefile \
man/Makefile \

23
daemons/Makefile.in Normal file
View File

@ -0,0 +1,23 @@
#
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
#
# This file is part of the LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v.2.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
ifeq ("@CLVMD@", "yes")
SUBDIRS = clvmd
endif
include $(top_srcdir)/make.tmpl

47
daemons/clvmd/Makefile.in Normal file
View File

@ -0,0 +1,47 @@
#
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
#
# This file is part of the LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v.2.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
SOURCES = \
clvmd-cman.c \
clvmd-command.c \
clvmd.c \
libclvm.c \
lvm-functions.c \
system-lv.c
TARGETS = \
clvmd
include $(top_srcdir)/make.tmpl
CFLAGS += -D_REENTRANT -fno-strict-aliasing
LIBS += -ldevmapper -ldlm -llvm -lpthread
INSTALL_TARGETS = \
install_clvmd
clvmd: $(OBJECTS) $(top_srcdir)/lib/liblvm.a
$(CC) -o clvmd $(OBJECTS) $(LD_FLAGS) $(LVMLIBS) $(LIBS)
.PHONY: install_clvmd
install_clvmd: $(TARGETS)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) clvmd \
$(sbindir)/clvmd
install: $(INSTALL_TARGETS)

65
daemons/clvmd/clvm.h Normal file
View File

@ -0,0 +1,65 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Definitions for CLVMD server and clients */
/*
* The protocol spoken over the cluster and across the local socket.
*/
#ifndef _CLVM_H
#define _CLVM_H
struct clvm_header {
uint8_t cmd; /* See below */
uint8_t flags; /* See below */
uint16_t xid; /* Transaction ID */
uint32_t clientid; /* Only used in Daemon->Daemon comms */
int32_t status; /* For replies, whether request succeeded */
uint32_t arglen; /* Length of argument below.
If >1500 then it will be passed
around the cluster in the system LV */
char node[1]; /* Actually a NUL-terminated string, node name.
If this is empty then the command is
forwarded to all cluster nodes unless
FLAG_LOCAL is also set. */
char args[1]; /* Arguments for the command follow the
node name, This member is only
valid if the node name is empty */
} __attribute__ ((packed));
/* Flags */
#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */
#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */
/* Name of the local socket to communicate between libclvm and clvmd */
//static const char CLVMD_SOCKNAME[]="/var/run/clvmd";
static const char CLVMD_SOCKNAME[] = "\0clvmd";
/* Internal commands & replies */
#define CLVMD_CMD_REPLY 1
#define CLVMD_CMD_VERSION 2 /* Send version around cluster when we start */
#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running
an incompatible version */
#define CLVMD_CMD_TEST 4 /* Just for mucking about */
#define CLVMD_CMD_LOCK 30
#define CLVMD_CMD_UNLOCK 31
/* Lock/Unlock commands */
#define CLVMD_CMD_LOCK_LV 50
#define CLVMD_CMD_LOCK_VG 51
#endif

499
daemons/clvmd/clvmd-cman.c Normal file
View File

@ -0,0 +1,499 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* CMAN communication layer for clvmd.
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <syslog.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include <getopt.h>
#include <errno.h>
#include "clvmd-comms.h"
#include "clvm.h"
#include "libdlm.h"
#include "log.h"
#include "clvmd.h"
#include "lvm-functions.h"
#define LOCKSPACE_NAME "clvmd"
static int cluster_sock;
static int num_nodes;
static struct cl_cluster_node *nodes = NULL;
static int count_nodes; /* size of allocated nodes array */
static int max_updown_nodes = 50; /* Current size of the allocated array */
/* Node up/down status, indexed by nodeid */
static int *node_updown = NULL;
static dlm_lshandle_t *lockspace;
static void sigusr1_handler(int sig);
static void count_clvmds_running(void);
static void get_members(void);
static int nodeid_from_csid(char *csid);
static int name_from_nodeid(int nodeid, char *name);
struct lock_wait {
pthread_cond_t cond;
pthread_mutex_t mutex;
struct dlm_lksb lksb;
};
int init_cluster()
{
struct sockaddr_cl saddr;
int port = CLUSTER_PORT_CLVMD;
/* Open the cluster communication socket */
cluster_sock = socket(AF_CLUSTER, SOCK_DGRAM, CLPROTO_CLIENT);
if (cluster_sock == -1) {
perror("Can't open cluster socket");
return -1;
}
/* Bind to our port number on the cluster.
Writes to this will block if the cluster loses quorum */
saddr.scl_family = AF_CLUSTER;
saddr.scl_port = port;
if (bind
(cluster_sock, (struct sockaddr *) &saddr,
sizeof(struct sockaddr_cl))) {
log_error("Can't bind cluster socket: %m");
return -1;
}
/* Get the cluster members list */
get_members();
count_clvmds_running();
/* Create a lockspace for LV & VG locks to live in */
lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
if (!lockspace) {
log_error("Unable to create lockspace for CLVM\n");
return -1;
}
dlm_ls_pthread_init(lockspace);
return 0;
}
int get_main_cluster_fd()
{
return cluster_sock;
}
int get_num_nodes()
{
return num_nodes;
}
/* send_message with the fd check removed */
int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext)
{
struct iovec iov[2];
struct msghdr msg;
struct sockaddr_cl saddr;
int len = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_iovlen = 1;
msg.msg_iov = iov;
msg.msg_flags = 0;
iov[0].iov_len = msglen;
iov[0].iov_base = buf;
saddr.scl_family = AF_CLUSTER;
saddr.scl_port = CLUSTER_PORT_CLVMD;
if (csid) {
msg.msg_name = &saddr;
msg.msg_namelen = sizeof(saddr);
memcpy(&saddr.scl_nodeid, csid, MAX_CSID_LEN);
} else { /* Cluster broadcast */
msg.msg_name = NULL;
msg.msg_namelen = 0;
}
do {
len = sendmsg(cluster_sock, &msg, 0);
if (len < 0 && errno != EAGAIN)
log_error(errtext);
} while (len == -1 && errno == EAGAIN);
return len;
}
void get_our_csid(char *csid)
{
int i;
memset(csid, 0, MAX_CSID_LEN);
for (i = 0; i < num_nodes; i++) {
if (nodes[i].us)
memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
}
}
/* Call a callback routine for each node that known (down mean not running a clvmd) */
int cluster_do_node_callback(struct local_client *client,
void (*callback) (struct local_client *, char *,
int))
{
int i;
int somedown = 0;
for (i = 0; i < get_num_nodes(); i++) {
callback(client, (char *)&nodes[i].node_id, node_updown[nodes[i].node_id]);
if (!node_updown[nodes[i].node_id])
somedown = -1;
}
return somedown;
}
/* Process OOB message from the cluster socket,
this currently just means that a node has stopped listening on our port */
static void process_oob_msg(char *buf, int len, int nodeid)
{
char namebuf[256];
switch (buf[0]) {
case CLUSTER_OOB_MSG_PORTCLOSED:
name_from_nodeid(nodeid, namebuf);
log_notice("clvmd on node %s has died\n", namebuf);
DEBUGLOG("Got OOB message, removing node %s\n", namebuf);
node_updown[nodeid] = 0;
break;
case CLUSTER_OOB_MSG_STATECHANGE:
DEBUGLOG("Got OOB message, Cluster state change\n");
get_members();
break;
default:
/* ERROR */
DEBUGLOG("Got unknown OOB message: %d\n", buf[0]);
}
}
int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid,
struct local_client **new_client)
{
struct iovec iov[2];
struct msghdr msg;
struct sockaddr_cl saddr;
/* We never return a new client */
*new_client = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_iovlen = 1;
msg.msg_iov = iov;
msg.msg_name = &saddr;
msg.msg_flags = 0;
msg.msg_namelen = sizeof(saddr);
iov[0].iov_len = len;
iov[0].iov_base = buf;
len = recvmsg(cluster_sock, &msg, MSG_OOB | O_NONBLOCK);
if (len < 0 && errno == EAGAIN)
return len;
DEBUGLOG("Read on cluster socket, len = %d\n", len);
/* A real error */
if (len < 0) {
log_error("read error on cluster socket: %m");
return 0;
}
/* EOF - we have left the cluster */
if (len == 0)
return 0;
/* Is it OOB? probably a node gone down */
if (msg.msg_flags & MSG_OOB) {
process_oob_msg(iov[0].iov_base, len, saddr.scl_nodeid);
/* Tell the upper layer to ignore this message */
len = -1;
errno = EAGAIN;
}
memcpy(csid, &saddr.scl_nodeid, sizeof(saddr.scl_nodeid));
return len;
}
void add_up_node(char *csid)
{
/* It's up ! */
int nodeid = nodeid_from_csid(csid);
if (nodeid >= max_updown_nodes) {
int *new_updown = realloc(node_updown, max_updown_nodes + 10);
if (new_updown) {
node_updown = new_updown;
max_updown_nodes += 10;
DEBUGLOG("realloced more space for nodes. now %d\n",
max_updown_nodes);
} else {
log_error
("Realloc failed. Node status for clvmd will be wrong\n");
return;
}
}
node_updown[nodeid] = 1;
DEBUGLOG("Added new node %d to updown list\n", nodeid);
}
void cluster_closedown()
{
unlock_all();
dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
close(cluster_sock);
}
static int is_listening(int nodeid)
{
struct cl_listen_request rq;
int status;
rq.port = CLUSTER_PORT_CLVMD;
rq.nodeid = nodeid;
do {
status = ioctl(cluster_sock, SIOCCLUSTER_ISLISTENING, &rq);
if (status < 0 && errno == EBUSY) { /* Don't busywait */
sleep(1);
errno = EBUSY; /* In case sleep trashes it */
}
}
while (status < 0 && errno == EBUSY);
return status;
}
/* Populate the list of CLVMDs running.
called only at startup time */
void count_clvmds_running(void)
{
int i;
for (i = 0; i < num_nodes; i++) {
node_updown[nodes[i].node_id] = is_listening(nodes[i].node_id);
}
}
/* Get a list of active cluster members */
static void get_members()
{
struct cl_cluster_nodelist nodelist;
num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, 0);
if (num_nodes == -1) {
perror("get nodes");
} else {
/* Not enough room for new nodes list ? */
if (num_nodes > count_nodes && nodes) {
free(nodes);
nodes = NULL;
}
if (nodes == NULL) {
count_nodes = num_nodes + 10; /* Overallocate a little */
nodes = malloc(count_nodes * sizeof(struct cl_cluster_node));
if (!nodes) {
perror("Unable to allocate nodes array\n");
exit(5);
}
}
nodelist.max_members = count_nodes;
nodelist.nodes = nodes;
num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, &nodelist);
if (num_nodes <= 0) {
perror("get node details");
exit(6);
}
/* Sanity check struct */
if (nodes[0].size != sizeof(struct cl_cluster_node)) {
log_error
("sizeof(cl_cluster_node) does not match size returned from the kernel: aborting\n");
exit(10);
}
if (node_updown == NULL) {
node_updown =
(int *) malloc(sizeof(int) *
max(num_nodes, max_updown_nodes));
memset(node_updown, 0,
sizeof(int) * max(num_nodes, max_updown_nodes));
}
}
}
/* Convert a node name to a CSID */
int csid_from_name(char *csid, char *name)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (strcmp(name, nodes[i].name) == 0) {
memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
return 0;
}
}
return -1;
}
/* Convert a CSID to a node name */
int name_from_csid(char *csid, char *name)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (memcmp(csid, &nodes[i].node_id, MAX_CSID_LEN) == 0) {
strcpy(name, nodes[i].name);
return 0;
}
}
/* Who?? */
strcpy(name, "Unknown");
return -1;
}
/* Convert a node ID to a node name */
int name_from_nodeid(int nodeid, char *name)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (nodeid == nodes[i].node_id) {
strcpy(name, nodes[i].name);
return 0;
}
}
/* Who?? */
strcpy(name, "Unknown");
return -1;
}
/* Convert a CSID to a node ID */
static int nodeid_from_csid(char *csid)
{
int nodeid;
memcpy(&nodeid, csid, MAX_CSID_LEN);
return nodeid;
}
int is_quorate()
{
return ioctl(cluster_sock, SIOCCLUSTER_ISQUORATE, 0);
}
static void sync_ast_routine(void *arg)
{
struct lock_wait *lwait = arg;
pthread_mutex_lock(&lwait->mutex);
pthread_cond_signal(&lwait->cond);
pthread_mutex_unlock(&lwait->mutex);
}
int sync_lock(const char *resource, int mode, int flags, int *lockid)
{
int status;
struct lock_wait lwait;
if (!lockid) {
errno = EINVAL;
return -1;
}
/* Conversions need the lockid in the LKSB */
if (flags & LKF_CONVERT)
lwait.lksb.sb_lkid = *lockid;
pthread_cond_init(&lwait.cond, NULL);
pthread_mutex_init(&lwait.mutex, NULL);
pthread_mutex_lock(&lwait.mutex);
status = dlm_ls_lock(lockspace,
mode,
&lwait.lksb,
flags,
resource,
strlen(resource),
0, sync_ast_routine, &lwait, NULL, NULL);
if (status)
return status;
/* Wait for it to complete */
pthread_cond_wait(&lwait.cond, &lwait.mutex);
pthread_mutex_unlock(&lwait.mutex);
*lockid = lwait.lksb.sb_lkid;
errno = lwait.lksb.sb_status;
if (lwait.lksb.sb_status)
return -1;
else
return 0;
}
int sync_unlock(const char *resource /* UNUSED */, int lockid)
{
int status;
struct lock_wait lwait;
pthread_cond_init(&lwait.cond, NULL);
pthread_mutex_init(&lwait.mutex, NULL);
pthread_mutex_lock(&lwait.mutex);
status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait);
if (status)
return status;
/* Wait for it to complete */
pthread_cond_wait(&lwait.cond, &lwait.mutex);
pthread_mutex_unlock(&lwait.mutex);
errno = lwait.lksb.sb_status;
if (lwait.lksb.sb_status != EUNLOCK)
return -1;
else
return 0;
}

View File

@ -0,0 +1,219 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
CLVMD Cluster LVM daemon command processor.
To add commands to the daemon simply add a processor in do_command and return
and messages back in buf and the length in *retlen. The initial value of
buflen is the maximum size of the buffer. if buf is not large enough then it
may be reallocated by the functions in here to a suitable size bearing in
mind that anything larger than the passed-in size will have to be returned
using the system LV and so performance will suffer.
The status return will be negated and passed back to the originating node.
pre- and post- command routines are called only on the local node. The
purpose is primarily to get and release locks, though the pre- routine should
also do any other local setups required by the command (if any) and can
return a failure code that prevents the command from being distributed around
the cluster
The pre- and post- routines are run in their own thread so can block as long
they like, do_command is run in the main clvmd thread so should not block for
too long. If the pre-command returns an error code (!=0) then the command
will not be propogated around the cluster but the post-command WILL be called
Also note that the pre and post routine are *always* called on the local
node, even if the command to be executed was only requested to run on a
remote node. It may peek inside the client structure to check the status of
the command.
The clients of the daemon must, naturally, understand the return messages and
codes.
Routines in here may only READ the values in the client structure passed in
apart from client->private which they are free to do what they like with.
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <unistd.h>
#include <errno.h>
#include "list.h"
#include "locking.h"
#include "log.h"
#include "lvm-functions.h"
#include "clvmd-comms.h"
#include "clvm.h"
#include "clvmd.h"
#include "libdlm.h"
/* This is where all the real work happens:
NOTE: client will be NULL when this is executed on a remote node */
int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
char **buf, int buflen, int *retlen)
{
char *args = msg->node + strlen(msg->node) + 1;
int arglen = msglen - sizeof(struct clvm_header) - strlen(msg->node);
int status = 0;
char *lockname;
struct utsname nodeinfo;
unsigned char lock_cmd;
unsigned char lock_flags;
/* Do the command */
switch (msg->cmd) {
/* Just a test message */
case CLVMD_CMD_TEST:
if (arglen > buflen) {
buflen = arglen + 200;
*buf = realloc(*buf, buflen);
}
uname(&nodeinfo);
*retlen = 1 + snprintf(*buf, buflen, "TEST from %s: %s v%s",
nodeinfo.nodename, args,
nodeinfo.release);
break;
case CLVMD_CMD_LOCK_VG:
/* Check to see if the VG is in use by LVM1 */
status = do_check_lvm1(&args[2]);
break;
case CLVMD_CMD_LOCK_LV:
/* This is the biggie */
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
status = do_lock_lv(lock_cmd, lock_flags, lockname);
/* Replace EIO with something less scary */
if (status == EIO) {
*retlen =
1 + snprintf(*buf, buflen,
"Internal lvm error, check syslog");
return EIO;
}
break;
default:
/* Won't get here because command is validated in pre_command */
break;
}
/* Check the status of the command and return the error text */
if (status) {
*retlen = 1 + snprintf(*buf, buflen, strerror(status));
}
return status;
}
/* Pre-command is a good place to get locks that are needed only for the duration
of the commands around the cluster (don't forget to free them in post-command),
and to sanity check the command arguments */
int do_pre_command(struct local_client *client)
{
struct clvm_header *header =
(struct clvm_header *) client->bits.localsock.cmd;
unsigned char lock_cmd;
unsigned char lock_flags;
char *args = header->node + strlen(header->node) + 1;
int lockid;
int status = 0;
char *lockname;
switch (header->cmd) {
case CLVMD_CMD_TEST:
status = sync_lock("CLVMD_TEST", LKM_EXMODE, 0, &lockid);
client->bits.localsock.private = (void *) lockid;
break;
case CLVMD_CMD_LOCK_VG:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
DEBUGLOG("doing PRE command LOCK_VG %s at %x\n", lockname,
lock_cmd);
if (lock_cmd == LCK_UNLOCK) {
hold_unlock(lockname);
} else {
status =
hold_lock(lockname, (int) lock_cmd,
(int) lock_flags);
if (status)
status = errno;
}
break;
case CLVMD_CMD_LOCK_LV:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
status = pre_lock_lv(lock_cmd, lock_flags, lockname);
break;
default:
log_error("Unknown command %d received\n", header->cmd);
status = EINVAL;
}
return status;
}
/* Note that the post-command routine is called even if the pre-command or the real command
failed */
int do_post_command(struct local_client *client)
{
struct clvm_header *header =
(struct clvm_header *) client->bits.localsock.cmd;
int status = 0;
unsigned char lock_cmd;
unsigned char lock_flags;
char *args = header->node + strlen(header->node) + 1;
char *lockname;
switch (header->cmd) {
case CLVMD_CMD_TEST:
status =
sync_unlock("CLVMD_TEST", (int) (long) client->bits.localsock.private);
break;
case CLVMD_CMD_LOCK_VG:
/* Nothing to do here */
break;
case CLVMD_CMD_LOCK_LV:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
status = post_lock_lv(lock_cmd, lock_flags, lockname);
break;
}
return status;
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Abstraction layer for clvmd cluster communications
*/
#ifndef _CLVMD_COMMS_H
#define _CLVMD_COMMS_H
struct local_client;
extern int cluster_send_message(void *buf, int msglen, char *csid,
const char *errtext);
extern int name_from_csid(char *csid, char *name);
extern int csid_from_name(char *csid, char *name);
extern int get_num_nodes(void);
extern int cluster_fd_callback(struct local_client *fd, char *buf, int len,
char *csid, struct local_client **new_client);
extern int init_cluster(void);
extern int get_main_cluster_fd(void); /* gets accept FD or cman cluster socket */
extern int cluster_do_node_callback(struct local_client *client,
void (*callback) (struct local_client *,
char *csid, int node_up));
extern int is_quorate(void);
extern void get_our_csid(char *csid);
extern void add_up_node(char *csid);
extern void cluster_closedown(void);
extern int sync_lock(const char *resource, int mode, int flags, int *lockid);
extern int sync_unlock(const char *resource, int lockid);
#ifdef USE_GULM
#include "tcp-comms.h"
#else
/* cman */
#include "cnxman-socket.h"
#define MAX_CSID_LEN 4
#endif
#endif

880
daemons/clvmd/clvmd-gulm.c Normal file
View File

@ -0,0 +1,880 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 2002-2003 All rights reserved.
**
*******************************************************************************
******************************************************************************/
/* This provides the interface between clvmd and gulm as the cluster
* and lock manager.
*
* It also provides the "liblm" functions too as it's hard (and pointless)
* to seperate them out when using gulm.
*
* What it does /not/ provide is the communications between clvmd daemons
* on the cluster nodes. That is done in tcp-comms.c
*/
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <signal.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <utmpx.h>
#include <syslog.h>
#include <assert.h>
#include "ccs.h"
#include "list.h"
#include "locking.h"
#include "log.h"
#include "clvm.h"
#include "clvmd-comms.h"
#include "clvmd.h"
#include "hash.h"
#include "clvmd-gulm.h"
#include "libgulm.h"
#include "hash.h"
/* Hash list of nodes in the cluster */
static struct hash_table *node_hash;
/* hash list of outstanding lock requests */
static struct hash_table *lock_hash;
/* Copy of the current core state */
static uint8_t current_corestate;
/* Number of active nodes */
static int num_nodes;
static char *cluster_name;
static pthread_mutex_t lock_start_mutex;
static volatile int lock_start_flag;
struct node_info
{
enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
char name[MAX_CLUSTER_MEMBER_NAME_LEN];
};
struct lock_wait
{
pthread_cond_t cond;
pthread_mutex_t mutex;
int status;
};
/* Forward */
static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client);
static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client);
static int get_all_cluster_nodes(void);
/* In tcp-comms.c */
extern struct hash_table *sock_hash;
static int add_internal_client(int fd, fd_callback_t callback)
{
struct local_client *client;
DEBUGLOG("Add_internal_client, fd = %d\n", fd);
/* Add a GULM file descriptor it to the main loop */
client = malloc(sizeof(struct local_client));
if (!client)
{
DEBUGLOG("malloc failed\n");
return -1;
}
memset(client, 0, sizeof(struct local_client));
client->fd = fd;
client->type = CLUSTER_INTERNAL;
client->callback = callback;
add_client(client);
return 0;
}
/* Gulm library handle */
static gulm_interface_p gulm_if;
static lg_core_callbacks_t core_callbacks;
static lg_lockspace_callbacks_t lock_callbacks;
static void badsig_handler(int sig)
{
DEBUGLOG("got sig %d\n", sig);
cluster_closedown();
exit(0);
}
static void sighup_handler(int sig)
{
DEBUGLOG("got SIGHUP\n");
/* Re-read CCS node list */
get_all_cluster_nodes();
}
int init_cluster()
{
int status;
int ccs_h;
/* Get cluster name from CCS */
/* TODO: is this right? */
ccs_h = ccs_connect();
ccs_get(ccs_h, "//cluster/@name", &cluster_name);
ccs_disconnect(ccs_h);
/* Block locking until we are logged in */
pthread_mutex_init(&lock_start_mutex, NULL);
pthread_mutex_lock(&lock_start_mutex);
lock_start_flag = 1;
node_hash = hash_create(100);
lock_hash = hash_create(10);
/* Get all nodes from CCS */
get_all_cluster_nodes();
/* Initialise GULM library */
status = lg_initialize(&gulm_if, cluster_name, "clvmd");
if (status)
{
DEBUGLOG("lg_initialize failed: %d\n", status);
return status;
}
/* Connect to core - we are not "important" :-) */
status = lg_core_login(gulm_if, 0);
if (status)
{
DEBUGLOG("lg_core_login failed: %d\n", status);
return status;
}
/* Initialise the inter-node comms */
status = init_comms();
if (status)
return status;
/* Add core FD to the list */
status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock);
if (status)
{
DEBUGLOG("can't allocate client space\n");
return status;
}
/* Connect to the lock server */
if (lg_lock_login(gulm_if, "CLVM"))
{
syslog(LOG_ERR, "Cannot login in to LOCK server\n");
DEBUGLOG("Cannot login in to LOCK server\n");
exit(88);
}
/* Add lockspace FD to the list */
status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock);
if (status)
{
DEBUGLOG("can't allocate client space\n");
exit(status);
}
/* Request a list of nodes, we can;t really do anything until
this comes back */
status = lg_core_nodelist(gulm_if);
if (status)
{
DEBUGLOG("lg_core_nodelist failed: %d\n", status);
return status;
}
/* So I can kill it without taking GULM down too */
signal(SIGINT, badsig_handler);
signal(SIGTERM, badsig_handler);
/* Re-read the node list on SIGHUP */
signal(SIGHUP, sighup_handler);
return 0;
}
void cluster_closedown()
{
DEBUGLOG("cluster_closedown\n");
lg_lock_logout(gulm_if);
lg_core_logout(gulm_if);
lg_core_shutdown(gulm_if);
lg_release(gulm_if);
}
/* Expire locks for a named node, or us */
#define GIO_KEY_SIZE 46
static void drop_expired_locks(char *nodename)
{
struct utsname nodeinfo;
uint8_t mask[GIO_KEY_SIZE];
memset(mask, 0xff, GIO_KEY_SIZE);
if (!nodename)
{
uname(&nodeinfo);
nodename = nodeinfo.nodename;
}
if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE))
{
DEBUGLOG("Error calling lg_lock_drop_exp()\n");
}
}
static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client)
{
int status;
*new_client = NULL;
status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL);
return status<0 ? status : 1;
}
static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client)
{
int status;
*new_client = NULL;
status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL);
return status<0 ? status : 1;
}
/* CORE callback routines */
static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate)
{
DEBUGLOG("CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d\n",
gen, error, rank, corestate);
if (error)
exit(error);
current_corestate = corestate;
return 0;
}
static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
{
if (nodestate == lg_core_Logged_in)
{
/* Don't clobber NODE_CLVMD state */
if (ninfo->state != NODE_CLVMD)
{
if (ninfo->state == NODE_UNKNOWN ||
ninfo->state == NODE_DOWN)
num_nodes++;
ninfo->state = NODE_UP;
}
}
else
</