From 858a2b1b88b28e95880350a5622053a02b9e472d Mon Sep 17 00:00:00 2001
From: Alasdair Kergon <agk@redhat.com>
Date: Thu, 24 Jun 2004 08:02:38 +0000
Subject: [PATCH] Add cluster support.

---
 Makefile.in                   |   12 +-
 VERSION                       |    2 +-
 WHATS_NEW                     |    4 +
 configure                     |   51 +-
 configure.in                  |   34 +-
 daemons/Makefile.in           |   23 +
 daemons/clvmd/Makefile.in     |   47 +
 daemons/clvmd/clvm.h          |   65 ++
 daemons/clvmd/clvmd-cman.c    |  499 ++++++++++
 daemons/clvmd/clvmd-command.c |  219 +++++
 daemons/clvmd/clvmd-comms.h   |   55 ++
 daemons/clvmd/clvmd-gulm.c    |  880 +++++++++++++++++
 daemons/clvmd/clvmd-gulm.h    |    9 +
 daemons/clvmd/clvmd.c         | 1693 +++++++++++++++++++++++++++++++++
 daemons/clvmd/clvmd.h         |  119 +++
 daemons/clvmd/cnxman-socket.h |  226 +++++
 daemons/clvmd/libclvm.c       |  446 +++++++++
 daemons/clvmd/libclvm.h       |   36 +
 daemons/clvmd/lvm-functions.c |  446 +++++++++
 daemons/clvmd/lvm-functions.h |   35 +
 daemons/clvmd/system-lv.c     |  369 +++++++
 daemons/clvmd/system-lv.h     |   30 +
 daemons/clvmd/tcp-comms.c     |  480 ++++++++++
 daemons/clvmd/tcp-comms.h     |    7 +
 include/.symlinks             |    1 +
 lib/Makefile.in               |    8 +
 lib/locking/Makefile.in       |   32 +
 lib/locking/cluster_locking.c |  462 +++++++++
 lib/locking/locking.c         |    8 +
 lib/locking/locking_types.h   |    1 +
 scripts/clvmd_fix_conf.sh     |  154 +++
 scripts/clvmd_init            |   90 ++
 32 files changed, 6534 insertions(+), 9 deletions(-)
 create mode 100644 daemons/Makefile.in
 create mode 100644 daemons/clvmd/Makefile.in
 create mode 100644 daemons/clvmd/clvm.h
 create mode 100644 daemons/clvmd/clvmd-cman.c
 create mode 100644 daemons/clvmd/clvmd-command.c
 create mode 100644 daemons/clvmd/clvmd-comms.h
 create mode 100644 daemons/clvmd/clvmd-gulm.c
 create mode 100644 daemons/clvmd/clvmd-gulm.h
 create mode 100644 daemons/clvmd/clvmd.c
 create mode 100644 daemons/clvmd/clvmd.h
 create mode 100644 daemons/clvmd/cnxman-socket.h
 create mode 100644 daemons/clvmd/libclvm.c
 create mode 100644 daemons/clvmd/libclvm.h
 create mode 100644 daemons/clvmd/lvm-functions.c
 create mode 100644 daemons/clvmd/lvm-functions.h
 create mode 100644 daemons/clvmd/system-lv.c
 create mode 100644 daemons/clvmd/system-lv.h
 create mode 100644 daemons/clvmd/tcp-comms.c
 create mode 100644 daemons/clvmd/tcp-comms.h
 create mode 100644 lib/locking/Makefile.in
 create mode 100644 lib/locking/cluster_locking.c
 create mode 100644 scripts/clvmd_fix_conf.sh
 create mode 100755 scripts/clvmd_init

diff --git a/Makefile.in b/Makefile.in
index f7b4d5238..af63fc5d9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -22,11 +22,13 @@ ifeq ("@INTL@", "yes")
   SUBDIRS += po
 endif
 
-SUBDIRS += lib tools
+SUBDIRS += lib tools daemons
 
 ifeq ($(MAKECMDGOALS),distclean)
-  SUBDIRS += lib/format1 \
+  SUBDIRS += daemons/clvmd \
+	     lib/format1 \
 	     lib/format_pool \
+	     lib/locking \
 	     lib/mirror \
 	     lib/snapshot \
 	     po \
@@ -35,14 +37,16 @@ endif
 
 include make.tmpl
 
+daemons: lib
 lib: include
 tools: lib
-po: lib tools
+po: tools daemons
 
 ifeq ("@INTL@", "yes")
 lib.pofile: include.pofile
 tools.pofile: lib.pofile
-po.pofile: lib.pofile tools.pofile
+daemons.pofile: lib.pofile
+po.pofile: tools.pofile daemons.pofile
 pofile: po.pofile
 endif
 
diff --git a/VERSION b/VERSION
index f2ad88206..a9f466bfb 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.00.17-cvs (2004-06-20)
+2.00.18-cvs (2004-06-24)
diff --git a/WHATS_NEW b/WHATS_NEW
index b582f61bf..94aadf13c 100644
--- a/WHATS_NEW
+++ b/WHATS_NEW
@@ -1,3 +1,7 @@
+Version 2.00.18 - 24 June 2004
+==============================
+  Add cluster support.
+
 Version 2.00.17 - 20 June 2004
 ==============================
   configure --enable-fsadm to try out fsadm.  fsadm is not tested yet.
diff --git a/configure b/configure
index 783dbddad..d5efca06d 100755
--- a/configure
+++ b/configure
@@ -309,7 +309,7 @@ ac_includes_default="\
 #endif"
 
 ac_default_prefix=/usr
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL FSADM LIBOBJS LTLIBOBJS'
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL CLVMD CLUSTER FSADM LIBOBJS LTLIBOBJS'
 ac_subst_files=''
 
 # Initialize some variables set by options.
@@ -867,10 +867,13 @@ Optional Packages:
                           TYPE=internal
   --with-pool=TYPE        GFS pool read-only support: internal/shared/none
                           TYPE=internal
+  --with-cluster=TYPE     Cluster LVM locking support: internal/shared/none
+                          TYPE=internal
   --with-snapshots=TYPE   Snapshot support: internal/shared/none
                           TYPE=internal
   --with-mirrors=TYPE     Mirror support: internal/shared/none
                           TYPE=internal
+  --with-clvmd            Build cluster LVM Daemon
   --with-localedir=DIR    Translation files in DIR PREFIX/share/locale
   --with-confdir=DIR      Configuration files in DIR /etc
   --with-staticdir=DIR    Static binary in DIR EXEC_PREFIX/sbin
@@ -3900,6 +3903,7 @@ case "$host_os" in
 		SOFLAG="-shared"
 		DEVMAPPER=yes
 		ODIRECT=yes
+		CLUSTER=internal
 		FSADM=no ;;
 	darwin*)
 		CFLAGS="-no-cpp-precomp -fno-common"
@@ -3911,6 +3915,7 @@ case "$host_os" in
 		SOFLAG="-dynamiclib"
 		DEVMAPPER=no
 		ODIRECT=no
+		CLUSTER=none
 		FSADM=no ;;
 esac
 
@@ -3998,6 +4003,25 @@ if test x$POOL = xinternal; then
 fi
 
 
+# Check whether --with-cluster or --without-cluster was given.
+if test "${with_cluster+set}" = set; then
+  withval="$with_cluster"
+   CLUSTER="$withval"
+fi;
+
+if [ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ];
+ then  { { echo "$as_me:$LINENO: error: --with-cluster parameter invalid
+" >&5
+echo "$as_me: error: --with-cluster parameter invalid
+" >&2;}
+   { (exit 1); exit 1; }; }
+ exit
+fi;
+
+if test x$CLUSTER = xinternal; then
+	CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
+fi
+
 # Check whether --enable-jobs or --disable-jobs was given.
 if test "${enable_jobs+set}" = set; then
   enableval="$enable_jobs"
@@ -4071,6 +4095,20 @@ if test x$READLINE = xyes; then
 	CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
 fi
 
+
+# Check whether --with-clvmd or --without-clvmd was given.
+if test "${with_clvmd+set}" = set; then
+  withval="$with_clvmd"
+  \
+CLVMD=$withval
+else
+  CLVMD=no
+fi;
+if  test x$CLVMD = xyes && test x$CLUSTER = xnone; then
+	CLUSTER=internal
+fi
+echo "$ac_t""$CLVMD" 1>&6
+
 echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
 # Check whether --enable-debug or --disable-debug was given.
 if test "${enable_debug+set}" = set; then
@@ -4698,7 +4736,7 @@ else
 	HAVE_LIBDL=no
 fi
 
-if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
+if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
       "x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
       \) -a "x$STATIC_LINK" = xyes ];
  then  { { echo "$as_me:$LINENO: error: Features cannot be 'shared' when building statically
@@ -5207,7 +5245,9 @@ fi
 
 
 
-                                                                                                                                                                                              ac_config_files="$ac_config_files Makefile make.tmpl doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
+
+
+                                                                                                                                                                                                                            ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
 # tests run on this system so they can be shared between configure
@@ -5760,11 +5800,14 @@ do
   # Handling of arguments.
   "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
   "make.tmpl" ) CONFIG_FILES="$CONFIG_FILES make.tmpl" ;;
+  "daemons/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/Makefile" ;;
+  "daemons/clvmd/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/clvmd/Makefile" ;;
   "doc/Makefile" ) CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
   "include/Makefile" ) CONFIG_FILES="$CONFIG_FILES include/Makefile" ;;
   "lib/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/Makefile" ;;
   "lib/format1/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format1/Makefile" ;;
   "lib/format_pool/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format_pool/Makefile" ;;
+  "lib/locking/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/locking/Makefile" ;;
   "lib/mirror/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/mirror/Makefile" ;;
   "lib/snapshot/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/snapshot/Makefile" ;;
   "man/Makefile" ) CONFIG_FILES="$CONFIG_FILES man/Makefile" ;;
@@ -5916,6 +5959,8 @@ s,@CONFDIR@,$CONFDIR,;t t
 s,@STATICDIR@,$STATICDIR,;t t
 s,@INTL_PACKAGE@,$INTL_PACKAGE,;t t
 s,@INTL@,$INTL,;t t
+s,@CLVMD@,$CLVMD,;t t
+s,@CLUSTER@,$CLUSTER,;t t
 s,@FSADM@,$FSADM,;t t
 s,@LIBOBJS@,$LIBOBJS,;t t
 s,@LTLIBOBJS@,$LTLIBOBJS,;t t
diff --git a/configure.in b/configure.in
index c07b020a8..95d3a9726 100644
--- a/configure.in
+++ b/configure.in
@@ -59,6 +59,7 @@ case "$host_os" in
 		SOFLAG="-shared"
 		DEVMAPPER=yes
 		ODIRECT=yes
+		CLUSTER=internal
 		FSADM=no ;;
 	darwin*)
 		CFLAGS="-no-cpp-precomp -fno-common"
@@ -70,6 +71,7 @@ case "$host_os" in
 		SOFLAG="-dynamiclib"
 		DEVMAPPER=no
 		ODIRECT=no
+		CLUSTER=none
 		FSADM=no ;;
 esac
 
@@ -141,6 +143,22 @@ if test x$POOL = xinternal; then
 	CFLAGS="$CFLAGS -DPOOL_INTERNAL"
 fi
 
+dnl -- cluster_locking inclusion type
+AC_ARG_WITH(cluster,
+  [  --with-cluster=TYPE     Cluster LVM locking support: internal/shared/none
+                          [TYPE=internal] ],
+  [ CLUSTER="$withval" ])
+
+if [[ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ]];
+ then  AC_MSG_ERROR(
+--with-cluster parameter invalid
+)
+ exit
+fi;
+
+if test x$CLUSTER = xinternal; then
+	CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
+fi
 
 AC_ARG_ENABLE(jobs, [  --enable-jobs=NUM       Number of jobs to run simultaneously], JOBS=-j$enableval, JOBS=-j2)
 
@@ -192,6 +210,15 @@ if test x$READLINE = xyes; then
 	CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
 fi
 
+dnl Build cluster LVM daemon
+AC_ARG_WITH(clvmd, [  --with-clvmd            Build cluster LVM Daemon],  \
+CLVMD=$withval, CLVMD=no)
+dnl If clvmd enabled and not cluster locking, automgically include the locking.
+if  test x$CLVMD = xyes && test x$CLUSTER = xnone; then
+	CLUSTER=internal
+fi
+echo "$ac_t""$CLVMD" 1>&6
+
 echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
 dnl Enable Debugging
 AC_ARG_ENABLE(debug,    [  --enable-debug          Enable debugging],  \
@@ -272,7 +299,7 @@ else
 fi
 
 dnl Check for shared/static conflicts
-if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
+if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
       "x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
       \) -a "x$STATIC_LINK" = xyes ]];
  then  AC_MSG_ERROR(
@@ -377,6 +404,8 @@ AC_SUBST(CONFDIR)
 AC_SUBST(STATICDIR)
 AC_SUBST(INTL_PACKAGE)
 AC_SUBST(INTL)
+AC_SUBST(CLVMD)
+AC_SUBST(CLUSTER)
 AC_SUBST(FSADM)
 
 dnl First and last lines should not contain files to generate in order to 
@@ -384,11 +413,14 @@ dnl keep utility scripts running properly
 AC_OUTPUT( 								\
 Makefile								\
 make.tmpl                                                               \
+daemons/Makefile							\
+daemons/clvmd/Makefile							\
 doc/Makefile								\
 include/Makefile						 	\
 lib/Makefile							 	\
 lib/format1/Makefile						 	\
 lib/format_pool/Makefile						\
+lib/locking/Makefile							\
 lib/mirror/Makefile							\
 lib/snapshot/Makefile							\
 man/Makefile							 	\
diff --git a/daemons/Makefile.in b/daemons/Makefile.in
new file mode 100644
index 000000000..a951632ee
--- /dev/null
+++ b/daemons/Makefile.in
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+ifeq ("@CLVMD@", "yes")
+  SUBDIRS = clvmd
+endif
+
+include $(top_srcdir)/make.tmpl
+
diff --git a/daemons/clvmd/Makefile.in b/daemons/clvmd/Makefile.in
new file mode 100644
index 000000000..54563e7eb
--- /dev/null
+++ b/daemons/clvmd/Makefile.in
@@ -0,0 +1,47 @@
+#
+# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+SOURCES = \
+	clvmd-cman.c     \
+	clvmd-command.c  \
+	clvmd.c          \
+	libclvm.c        \
+	lvm-functions.c  \
+	system-lv.c
+
+TARGETS = \
+	clvmd
+
+include $(top_srcdir)/make.tmpl
+
+CFLAGS += -D_REENTRANT -fno-strict-aliasing
+LIBS += -ldevmapper -ldlm -llvm -lpthread
+
+INSTALL_TARGETS = \
+	install_clvmd
+
+clvmd: $(OBJECTS) $(top_srcdir)/lib/liblvm.a
+	$(CC) -o clvmd $(OBJECTS) $(LD_FLAGS) $(LVMLIBS) $(LIBS)
+
+.PHONY: install_clvmd
+
+install_clvmd: $(TARGETS)
+	$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) clvmd \
+		$(sbindir)/clvmd
+
+install: $(INSTALL_TARGETS)
+
diff --git a/daemons/clvmd/clvm.h b/daemons/clvmd/clvm.h
new file mode 100644
index 000000000..dd20bfd33
--- /dev/null
+++ b/daemons/clvmd/clvm.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* Definitions for CLVMD server and clients */
+
+/*
+ * The protocol spoken over the cluster and across the local socket.
+ */
+
+#ifndef _CLVM_H
+#define _CLVM_H
+
+struct clvm_header {
+	uint8_t  cmd;	        /* See below */
+	uint8_t  flags;	        /* See below */
+	uint16_t xid;	        /* Transaction ID */
+	uint32_t clientid;	/* Only used in Daemon->Daemon comms */
+	int32_t  status;	/* For replies, whether request succeeded */
+	uint32_t arglen;	/* Length of argument below. 
+				   If >1500 then it will be passed 
+				   around the cluster in the system LV */
+	char node[1];		/* Actually a NUL-terminated string, node name.
+				   If this is empty then the command is 
+				   forwarded to all cluster nodes unless 
+				   FLAG_LOCAL is also set. */
+	char args[1];		/* Arguments for the command follow the 
+				   node name, This member is only
+				   valid if the node name is empty */
+} __attribute__ ((packed));
+
+/* Flags */
+#define CLVMD_FLAG_LOCAL        1	/* Only do this on the local node */
+#define CLVMD_FLAG_SYSTEMLV     2	/* Data in system LV under my node name */
+
+/* Name of the local socket to communicate between libclvm and clvmd */
+//static const char CLVMD_SOCKNAME[]="/var/run/clvmd";
+static const char CLVMD_SOCKNAME[] = "\0clvmd";
+
+/* Internal commands & replies */
+#define CLVMD_CMD_REPLY    1
+#define CLVMD_CMD_VERSION  2	/* Send version around cluster when we start */
+#define CLVMD_CMD_GOAWAY   3	/* Die if received this - we are running 
+				   an incompatible version */
+#define CLVMD_CMD_TEST     4	/* Just for mucking about */
+
+#define CLVMD_CMD_LOCK              30
+#define CLVMD_CMD_UNLOCK            31
+
+/* Lock/Unlock commands */
+#define CLVMD_CMD_LOCK_LV           50
+#define CLVMD_CMD_LOCK_VG           51
+
+#endif
diff --git a/daemons/clvmd/clvmd-cman.c b/daemons/clvmd/clvmd-cman.c
new file mode 100644
index 000000000..751f4ddf4
--- /dev/null
+++ b/daemons/clvmd/clvmd-cman.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * CMAN communication layer for clvmd.
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "clvmd-comms.h"
+#include "clvm.h"
+#include "libdlm.h"
+#include "log.h"
+#include "clvmd.h"
+#include "lvm-functions.h"
+
+#define LOCKSPACE_NAME "clvmd"
+
+static int cluster_sock;
+static int num_nodes;
+static struct cl_cluster_node *nodes = NULL;
+static int count_nodes; /* size of allocated nodes array */
+static int max_updown_nodes = 50;	/* Current size of the allocated array */
+/* Node up/down status, indexed by nodeid */
+static int *node_updown = NULL;
+static dlm_lshandle_t *lockspace;
+
+static void sigusr1_handler(int sig);
+static void count_clvmds_running(void);
+static void get_members(void);
+static int nodeid_from_csid(char *csid);
+static int name_from_nodeid(int nodeid, char *name);
+
+struct lock_wait {
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+	struct dlm_lksb lksb;
+};
+
+int init_cluster()
+{
+	struct sockaddr_cl saddr;
+	int port = CLUSTER_PORT_CLVMD;
+
+	/* Open the cluster communication socket */
+	cluster_sock = socket(AF_CLUSTER, SOCK_DGRAM, CLPROTO_CLIENT);
+	if (cluster_sock == -1) {
+		perror("Can't open cluster socket");
+		return -1;
+	}
+
+	/* Bind to our port number on the cluster.
+	   Writes to this will block if the cluster loses quorum */
+	saddr.scl_family = AF_CLUSTER;
+	saddr.scl_port = port;
+
+	if (bind
+	    (cluster_sock, (struct sockaddr *) &saddr,
+	     sizeof(struct sockaddr_cl))) {
+		log_error("Can't bind cluster socket: %m");
+		return -1;
+	}
+
+	/* Get the cluster members list */
+	get_members();
+	count_clvmds_running();
+
+	/* Create a lockspace for LV & VG locks to live in */
+	lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
+	if (!lockspace) {
+		log_error("Unable to create lockspace for CLVM\n");
+		return -1;
+	}
+	dlm_ls_pthread_init(lockspace);
+	return 0;
+}
+
+int get_main_cluster_fd()
+{
+	return cluster_sock;
+}
+
+int get_num_nodes()
+{
+	return num_nodes;
+}
+
+/* send_message with the fd check removed */
+int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext)
+{
+	struct iovec iov[2];
+	struct msghdr msg;
+	struct sockaddr_cl saddr;
+	int len = 0;
+
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = iov;
+	msg.msg_flags = 0;
+	iov[0].iov_len = msglen;
+	iov[0].iov_base = buf;
+
+	saddr.scl_family = AF_CLUSTER;
+	saddr.scl_port = CLUSTER_PORT_CLVMD;
+	if (csid) {
+		msg.msg_name = &saddr;
+		msg.msg_namelen = sizeof(saddr);
+		memcpy(&saddr.scl_nodeid, csid, MAX_CSID_LEN);
+	} else {		/* Cluster broadcast */
+
+		msg.msg_name = NULL;
+		msg.msg_namelen = 0;
+	}
+
+	do {
+		len = sendmsg(cluster_sock, &msg, 0);
+		if (len < 0 && errno != EAGAIN)
+			log_error(errtext);
+
+	} while (len == -1 && errno == EAGAIN);
+	return len;
+}
+
+void get_our_csid(char *csid)
+{
+	int i;
+	memset(csid, 0, MAX_CSID_LEN);
+
+	for (i = 0; i < num_nodes; i++) {
+		if (nodes[i].us)
+			memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
+	}
+}
+
+/* Call a callback routine for each node that known (down mean not running a clvmd) */
+int cluster_do_node_callback(struct local_client *client,
+			     void (*callback) (struct local_client *, char *,
+					       int))
+{
+	int i;
+	int somedown = 0;
+
+	for (i = 0; i < get_num_nodes(); i++) {
+		callback(client, (char *)&nodes[i].node_id, node_updown[nodes[i].node_id]);
+		if (!node_updown[nodes[i].node_id])
+			somedown = -1;
+	}
+	return somedown;
+}
+
+/* Process OOB message from the cluster socket,
+   this currently just means that a node has stopped listening on our port */
+static void process_oob_msg(char *buf, int len, int nodeid)
+{
+	char namebuf[256];
+	switch (buf[0]) {
+        case CLUSTER_OOB_MSG_PORTCLOSED:
+		name_from_nodeid(nodeid, namebuf);
+		log_notice("clvmd on node %s has died\n", namebuf);
+		DEBUGLOG("Got OOB message, removing node %s\n", namebuf);
+
+		node_updown[nodeid] = 0;
+		break;
+
+	case CLUSTER_OOB_MSG_STATECHANGE:
+		DEBUGLOG("Got OOB message, Cluster state change\n");
+		get_members();
+		break;
+	default:
+		/* ERROR */
+		DEBUGLOG("Got unknown OOB message: %d\n", buf[0]);
+	}
+}
+
+int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid,
+			struct local_client **new_client)
+{
+	struct iovec iov[2];
+	struct msghdr msg;
+	struct sockaddr_cl saddr;
+
+	/* We never return a new client */
+	*new_client = NULL;
+
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = iov;
+	msg.msg_name = &saddr;
+	msg.msg_flags = 0;
+	msg.msg_namelen = sizeof(saddr);
+	iov[0].iov_len = len;
+	iov[0].iov_base = buf;
+
+	len = recvmsg(cluster_sock, &msg, MSG_OOB | O_NONBLOCK);
+	if (len < 0 && errno == EAGAIN)
+		return len;
+
+	DEBUGLOG("Read on cluster socket, len = %d\n", len);
+
+	/* A real error */
+	if (len < 0) {
+		log_error("read error on cluster socket: %m");
+		return 0;
+	}
+
+	/* EOF - we have left the cluster */
+	if (len == 0)
+		return 0;
+
+	/* Is it OOB? probably a node gone down */
+	if (msg.msg_flags & MSG_OOB) {
+		process_oob_msg(iov[0].iov_base, len, saddr.scl_nodeid);
+
+		/* Tell the upper layer to ignore this message */
+		len = -1;
+		errno = EAGAIN;
+	}
+	memcpy(csid, &saddr.scl_nodeid, sizeof(saddr.scl_nodeid));
+	return len;
+}
+
+void add_up_node(char *csid)
+{
+	/* It's up ! */
+	int nodeid = nodeid_from_csid(csid);
+
+	if (nodeid >= max_updown_nodes) {
+		int *new_updown = realloc(node_updown, max_updown_nodes + 10);
+
+		if (new_updown) {
+			node_updown = new_updown;
+			max_updown_nodes += 10;
+			DEBUGLOG("realloced more space for nodes. now %d\n",
+				 max_updown_nodes);
+		} else {
+			log_error
+			    ("Realloc failed. Node status for clvmd will be wrong\n");
+			return;
+		}
+	}
+	node_updown[nodeid] = 1;
+	DEBUGLOG("Added new node %d to updown list\n", nodeid);
+}
+
+void cluster_closedown()
+{
+	unlock_all();
+	dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
+	close(cluster_sock);
+}
+
+static int is_listening(int nodeid)
+{
+	struct cl_listen_request rq;
+	int status;
+
+	rq.port = CLUSTER_PORT_CLVMD;
+	rq.nodeid = nodeid;
+
+	do {
+		status = ioctl(cluster_sock, SIOCCLUSTER_ISLISTENING, &rq);
+		if (status < 0 && errno == EBUSY) {	/* Don't busywait */
+			sleep(1);
+			errno = EBUSY;	/* In case sleep trashes it */
+		}
+	}
+	while (status < 0 && errno == EBUSY);
+
+	return status;
+}
+
+/* Populate the list of CLVMDs running.
+   called only at startup time */
+void count_clvmds_running(void)
+{
+	int i;
+
+	for (i = 0; i < num_nodes; i++) {
+		node_updown[nodes[i].node_id] = is_listening(nodes[i].node_id);
+	}
+}
+
+/* Get a list of active cluster members */
+static void get_members()
+{
+	struct cl_cluster_nodelist nodelist;
+
+	num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, 0);
+	if (num_nodes == -1) {
+		perror("get nodes");
+	} else {
+	        /* Not enough room for new nodes list ? */
+	        if (num_nodes > count_nodes && nodes) {
+			free(nodes);
+			nodes = NULL;
+		}
+
+		if (nodes == NULL) {
+		        count_nodes = num_nodes + 10; /* Overallocate a little */
+		        nodes = malloc(count_nodes * sizeof(struct cl_cluster_node));
+			if (!nodes) {
+			        perror("Unable to allocate nodes array\n");
+				exit(5);
+			}
+		}
+		nodelist.max_members = count_nodes;
+		nodelist.nodes = nodes;
+		
+		num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, &nodelist);
+		if (num_nodes <= 0) {
+		        perror("get node details");
+			exit(6);
+		}
+
+		/* Sanity check struct */
+		if (nodes[0].size != sizeof(struct cl_cluster_node)) {
+			log_error
+			    ("sizeof(cl_cluster_node) does not match size returned from the kernel: aborting\n");
+			exit(10);
+		}
+
+		if (node_updown == NULL) {
+			node_updown =
+			    (int *) malloc(sizeof(int) *
+					   max(num_nodes, max_updown_nodes));
+			memset(node_updown, 0,
+			       sizeof(int) * max(num_nodes, max_updown_nodes));
+		}
+	}
+}
+
+/* Convert a node name to a CSID */
+int csid_from_name(char *csid, char *name)
+{
+	int i;
+
+	for (i = 0; i < num_nodes; i++) {
+		if (strcmp(name, nodes[i].name) == 0) {
+			memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
+			return 0;
+		}
+	}
+	return -1;
+}
+
+/* Convert a CSID to a node name */
+int name_from_csid(char *csid, char *name)
+{
+	int i;
+
+	for (i = 0; i < num_nodes; i++) {
+		if (memcmp(csid, &nodes[i].node_id, MAX_CSID_LEN) == 0) {
+			strcpy(name, nodes[i].name);
+			return 0;
+		}
+	}
+	/* Who?? */
+	strcpy(name, "Unknown");
+	return -1;
+}
+
+/* Convert a node ID to a node name */
+int name_from_nodeid(int nodeid, char *name)
+{
+	int i;
+
+	for (i = 0; i < num_nodes; i++) {
+		if (nodeid == nodes[i].node_id) {
+			strcpy(name, nodes[i].name);
+			return 0;
+		}
+	}
+	/* Who?? */
+	strcpy(name, "Unknown");
+	return -1;
+}
+
+/* Convert a CSID to a node ID */
+static int nodeid_from_csid(char *csid)
+{
+        int nodeid;
+
+	memcpy(&nodeid, csid, MAX_CSID_LEN);
+
+	return nodeid;
+}
+
+int is_quorate()
+{
+	return ioctl(cluster_sock, SIOCCLUSTER_ISQUORATE, 0);
+}
+
+static void sync_ast_routine(void *arg)
+{
+	struct lock_wait *lwait = arg;
+
+	pthread_mutex_lock(&lwait->mutex);
+	pthread_cond_signal(&lwait->cond);
+	pthread_mutex_unlock(&lwait->mutex);
+}
+
+int sync_lock(const char *resource, int mode, int flags, int *lockid)
+{
+	int status;
+	struct lock_wait lwait;
+
+	if (!lockid) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	/* Conversions need the lockid in the LKSB */
+	if (flags & LKF_CONVERT)
+		lwait.lksb.sb_lkid = *lockid;
+
+	pthread_cond_init(&lwait.cond, NULL);
+	pthread_mutex_init(&lwait.mutex, NULL);
+	pthread_mutex_lock(&lwait.mutex);
+
+	status = dlm_ls_lock(lockspace,
+			     mode,
+			     &lwait.lksb,
+			     flags,
+			     resource,
+			     strlen(resource),
+			     0, sync_ast_routine, &lwait, NULL, NULL);
+	if (status)
+		return status;
+
+	/* Wait for it to complete */
+	pthread_cond_wait(&lwait.cond, &lwait.mutex);
+	pthread_mutex_unlock(&lwait.mutex);
+
+	*lockid = lwait.lksb.sb_lkid;
+
+	errno = lwait.lksb.sb_status;
+	if (lwait.lksb.sb_status)
+		return -1;
+	else
+		return 0;
+}
+
+int sync_unlock(const char *resource /* UNUSED */, int lockid)
+{
+	int status;
+	struct lock_wait lwait;
+
+	pthread_cond_init(&lwait.cond, NULL);
+	pthread_mutex_init(&lwait.mutex, NULL);
+	pthread_mutex_lock(&lwait.mutex);
+
+	status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait);
+
+	if (status)
+		return status;
+
+	/* Wait for it to complete */
+	pthread_cond_wait(&lwait.cond, &lwait.mutex);
+	pthread_mutex_unlock(&lwait.mutex);
+
+	errno = lwait.lksb.sb_status;
+	if (lwait.lksb.sb_status != EUNLOCK)
+		return -1;
+	else
+		return 0;
+
+}
diff --git a/daemons/clvmd/clvmd-command.c b/daemons/clvmd/clvmd-command.c
new file mode 100644
index 000000000..517c1346f
--- /dev/null
+++ b/daemons/clvmd/clvmd-command.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+
+  CLVMD Cluster LVM daemon command processor.
+
+  To add commands to the daemon simply add a processor in do_command and return
+  and messages back in buf and the length in *retlen. The initial value of
+  buflen is the maximum size of the buffer. if buf is not large enough then it
+  may be reallocated by the functions in here to a suitable size bearing in
+  mind that anything larger than the passed-in size will have to be returned
+  using the system LV and so performance will suffer.
+
+  The status return will be negated and passed back to the originating node.
+
+  pre- and post- command routines are called only on the local node. The
+  purpose is primarily to get and release locks, though the pre- routine should
+  also do any other local setups required by the command (if any) and can
+  return a failure code that prevents the command from being distributed around
+  the cluster
+
+  The pre- and post- routines are run in their own thread so can block as long
+  they like, do_command is run in the main clvmd thread so should not block for
+  too long. If the pre-command returns an error code (!=0) then the command
+  will not be propogated around the cluster but the post-command WILL be called
+
+  Also note that the pre and post routine are *always* called on the local
+  node, even if the command to be executed was only requested to run on a
+  remote node. It may peek inside the client structure to check the status of
+  the command.
+
+  The clients of the daemon must, naturally, understand the return messages and
+  codes.
+
+  Routines in here may only READ the values in the client structure passed in
+  apart from client->private which they are free to do what they like with.
+
+*/
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "list.h"
+#include "locking.h"
+#include "log.h"
+#include "lvm-functions.h"
+#include "clvmd-comms.h"
+#include "clvm.h"
+#include "clvmd.h"
+#include "libdlm.h"
+
+/* This is where all the real work happens:
+   NOTE: client will be NULL when this is executed on a remote node */
+int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
+	       char **buf, int buflen, int *retlen)
+{
+	char *args = msg->node + strlen(msg->node) + 1;
+	int arglen = msglen - sizeof(struct clvm_header) - strlen(msg->node);
+	int status = 0;
+	char *lockname;
+	struct utsname nodeinfo;
+	unsigned char lock_cmd;
+	unsigned char lock_flags;
+
+	/* Do the command */
+	switch (msg->cmd) {
+		/* Just a test message */
+	case CLVMD_CMD_TEST:
+		if (arglen > buflen) {
+			buflen = arglen + 200;
+			*buf = realloc(*buf, buflen);
+		}
+		uname(&nodeinfo);
+		*retlen = 1 + snprintf(*buf, buflen, "TEST from %s: %s v%s",
+				       nodeinfo.nodename, args,
+				       nodeinfo.release);
+		break;
+
+	case CLVMD_CMD_LOCK_VG:
+		/* Check to see if the VG is in use by LVM1 */
+		status = do_check_lvm1(&args[2]);
+		break;
+
+	case CLVMD_CMD_LOCK_LV:
+		/* This is the biggie */
+		lock_cmd = args[0];
+		lock_flags = args[1];
+		lockname = &args[2];
+		status = do_lock_lv(lock_cmd, lock_flags, lockname);
+		/* Replace EIO with something less scary */
+		if (status == EIO) {
+			*retlen =
+			    1 + snprintf(*buf, buflen,
+					 "Internal lvm error, check syslog");
+			return EIO;
+		}
+		break;
+
+	default:
+		/* Won't get here because command is validated in pre_command */
+		break;
+	}
+
+	/* Check the status of the command and return the error text */
+	if (status) {
+		*retlen = 1 + snprintf(*buf, buflen, strerror(status));
+	}
+
+	return status;
+
+}
+
+/* Pre-command is a good place to get locks that are needed only for the duration
+   of the commands around the cluster (don't forget to free them in post-command),
+   and to sanity check the command arguments */
+int do_pre_command(struct local_client *client)
+{
+	struct clvm_header *header =
+	    (struct clvm_header *) client->bits.localsock.cmd;
+	unsigned char lock_cmd;
+	unsigned char lock_flags;
+	char *args = header->node + strlen(header->node) + 1;
+	int lockid;
+	int status = 0;
+	char *lockname;
+
+	switch (header->cmd) {
+	case CLVMD_CMD_TEST:
+		status = sync_lock("CLVMD_TEST", LKM_EXMODE, 0, &lockid);
+		client->bits.localsock.private = (void *) lockid;
+		break;
+
+	case CLVMD_CMD_LOCK_VG:
+		lock_cmd = args[0];
+		lock_flags = args[1];
+		lockname = &args[2];
+		DEBUGLOG("doing PRE command LOCK_VG %s at %x\n", lockname,
+			 lock_cmd);
+		if (lock_cmd == LCK_UNLOCK) {
+			hold_unlock(lockname);
+		} else {
+			status =
+			    hold_lock(lockname, (int) lock_cmd,
+				      (int) lock_flags);
+			if (status)
+				status = errno;
+		}
+		break;
+
+	case CLVMD_CMD_LOCK_LV:
+		lock_cmd = args[0];
+		lock_flags = args[1];
+		lockname = &args[2];
+		status = pre_lock_lv(lock_cmd, lock_flags, lockname);
+		break;
+
+	default:
+		log_error("Unknown command %d received\n", header->cmd);
+		status = EINVAL;
+	}
+	return status;
+}
+
+/* Note that the post-command routine is called even if the pre-command or the real command
+   failed */
+int do_post_command(struct local_client *client)
+{
+	struct clvm_header *header =
+	    (struct clvm_header *) client->bits.localsock.cmd;
+	int status = 0;
+	unsigned char lock_cmd;
+	unsigned char lock_flags;
+	char *args = header->node + strlen(header->node) + 1;
+	char *lockname;
+
+	switch (header->cmd) {
+	case CLVMD_CMD_TEST:
+		status =
+		    sync_unlock("CLVMD_TEST", (int) (long) client->bits.localsock.private);
+		break;
+
+	case CLVMD_CMD_LOCK_VG:
+		/* Nothing to do here */
+		break;
+
+	case CLVMD_CMD_LOCK_LV:
+		lock_cmd = args[0];
+		lock_flags = args[1];
+		lockname = &args[2];
+		status = post_lock_lv(lock_cmd, lock_flags, lockname);
+		break;
+	}
+	return status;
+}
diff --git a/daemons/clvmd/clvmd-comms.h b/daemons/clvmd/clvmd-comms.h
new file mode 100644
index 000000000..54017b33f
--- /dev/null
+++ b/daemons/clvmd/clvmd-comms.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Abstraction layer for clvmd cluster communications
+ */
+
+#ifndef _CLVMD_COMMS_H
+#define _CLVMD_COMMS_H
+
+struct local_client;
+
+extern int cluster_send_message(void *buf, int msglen, char *csid,
+				const char *errtext);
+extern int name_from_csid(char *csid, char *name);
+extern int csid_from_name(char *csid, char *name);
+extern int get_num_nodes(void);
+extern int cluster_fd_callback(struct local_client *fd, char *buf, int len,
+			       char *csid, struct local_client **new_client);
+extern int init_cluster(void);
+extern int get_main_cluster_fd(void);	/* gets accept FD or cman cluster socket */
+extern int cluster_do_node_callback(struct local_client *client,
+				    void (*callback) (struct local_client *,
+						      char *csid, int node_up));
+extern int is_quorate(void);
+
+extern void get_our_csid(char *csid);
+extern void add_up_node(char *csid);
+extern void cluster_closedown(void);
+
+extern int sync_lock(const char *resource, int mode, int flags, int *lockid);
+extern int sync_unlock(const char *resource, int lockid);
+
+#ifdef USE_GULM
+#include "tcp-comms.h"
+#else
+/* cman */
+#include "cnxman-socket.h"
+#define MAX_CSID_LEN 4
+#endif
+
+
+#endif
diff --git a/daemons/clvmd/clvmd-gulm.c b/daemons/clvmd/clvmd-gulm.c
new file mode 100644
index 000000000..bef4cbe78
--- /dev/null
+++ b/daemons/clvmd/clvmd-gulm.c
@@ -0,0 +1,880 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  2002-2003  All rights reserved.
+**
+*******************************************************************************
+******************************************************************************/
+
+/* This provides the interface between clvmd and gulm as the cluster
+ * and lock manager.
+ *
+ * It also provides the "liblm" functions too as it's hard (and pointless)
+ * to seperate them out when using gulm.
+ *
+ * What it does /not/ provide is the communications between clvmd daemons
+ * on the cluster nodes. That is done in tcp-comms.c
+ */
+
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <utmpx.h>
+#include <syslog.h>
+#include <assert.h>
+
+#include "ccs.h"
+#include "list.h"
+#include "locking.h"
+#include "log.h"
+#include "clvm.h"
+#include "clvmd-comms.h"
+#include "clvmd.h"
+#include "hash.h"
+#include "clvmd-gulm.h"
+#include "libgulm.h"
+#include "hash.h"
+
+/* Hash list of nodes in the cluster */
+static struct hash_table *node_hash;
+
+/* hash list of outstanding lock requests */
+static struct hash_table *lock_hash;
+
+/* Copy of the current core state */
+static uint8_t current_corestate;
+
+/* Number of active nodes */
+static int num_nodes;
+
+static char *cluster_name;
+
+static pthread_mutex_t lock_start_mutex;
+static volatile int lock_start_flag;
+
+struct node_info
+{
+    enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
+    char name[MAX_CLUSTER_MEMBER_NAME_LEN];
+};
+
+struct lock_wait
+{
+    pthread_cond_t cond;
+    pthread_mutex_t mutex;
+    int status;
+};
+
+/* Forward */
+static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
+			       struct local_client **new_client);
+static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
+			       struct local_client **new_client);
+static int get_all_cluster_nodes(void);
+
+/* In tcp-comms.c */
+extern struct hash_table *sock_hash;
+
+static int add_internal_client(int fd, fd_callback_t callback)
+{
+    struct local_client *client;
+
+    DEBUGLOG("Add_internal_client, fd = %d\n", fd);
+
+    /* Add a GULM file descriptor it to the main loop */
+    client = malloc(sizeof(struct local_client));
+    if (!client)
+    {
+	DEBUGLOG("malloc failed\n");
+	return -1;
+    }
+
+    memset(client, 0, sizeof(struct local_client));
+    client->fd = fd;
+    client->type = CLUSTER_INTERNAL;
+    client->callback = callback;
+    add_client(client);
+
+    return 0;
+}
+
+/* Gulm library handle */
+static gulm_interface_p gulm_if;
+static lg_core_callbacks_t core_callbacks;
+static lg_lockspace_callbacks_t lock_callbacks;
+
+static void badsig_handler(int sig)
+{
+    DEBUGLOG("got sig %d\n", sig);
+    cluster_closedown();
+    exit(0);
+}
+
+static void sighup_handler(int sig)
+{
+    DEBUGLOG("got SIGHUP\n");
+
+    /* Re-read CCS node list */
+    get_all_cluster_nodes();
+}
+
+int init_cluster()
+{
+    int status;
+    int ccs_h;
+
+    /* Get cluster name from CCS */
+    /* TODO: is this right? */
+    ccs_h = ccs_connect();
+    ccs_get(ccs_h, "//cluster/@name", &cluster_name);
+    ccs_disconnect(ccs_h);
+
+    /* Block locking until we are logged in */
+    pthread_mutex_init(&lock_start_mutex, NULL);
+    pthread_mutex_lock(&lock_start_mutex);
+    lock_start_flag = 1;
+
+    node_hash = hash_create(100);
+    lock_hash = hash_create(10);
+
+    /* Get all nodes from CCS */
+    get_all_cluster_nodes();
+
+    /* Initialise GULM library */
+    status = lg_initialize(&gulm_if, cluster_name, "clvmd");
+    if (status)
+    {
+	DEBUGLOG("lg_initialize failed: %d\n", status);
+	return status;
+    }
+
+    /* Connect to core - we are not "important" :-) */
+    status = lg_core_login(gulm_if, 0);
+    if (status)
+    {
+	DEBUGLOG("lg_core_login failed: %d\n", status);
+	return status;
+    }
+
+    /* Initialise the inter-node comms */
+    status = init_comms();
+    if (status)
+	return status;
+
+    /* Add core FD to the list */
+    status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock);
+    if (status)
+    {
+	DEBUGLOG("can't allocate client space\n");
+	return status;
+    }
+
+    /* Connect to the lock server */
+    if (lg_lock_login(gulm_if, "CLVM"))
+    {
+	syslog(LOG_ERR, "Cannot login in to LOCK server\n");
+	DEBUGLOG("Cannot login in to LOCK server\n");
+	exit(88);
+    }
+
+    /* Add lockspace FD to the list */
+    status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock);
+    if (status)
+    {
+	DEBUGLOG("can't allocate client space\n");
+	exit(status);
+    }
+
+    /* Request a list of nodes, we can;t really do anything until
+       this comes back */
+    status = lg_core_nodelist(gulm_if);
+    if (status)
+    {
+	DEBUGLOG("lg_core_nodelist failed: %d\n", status);
+	return status;
+    }
+
+    /* So I can kill it without taking GULM down too */
+    signal(SIGINT, badsig_handler);
+    signal(SIGTERM, badsig_handler);
+
+    /* Re-read the node list on SIGHUP */
+    signal(SIGHUP, sighup_handler);
+
+    return 0;
+}
+
+void cluster_closedown()
+{
+    DEBUGLOG("cluster_closedown\n");
+    lg_lock_logout(gulm_if);
+    lg_core_logout(gulm_if);
+    lg_core_shutdown(gulm_if);
+    lg_release(gulm_if);
+}
+
+/* Expire locks for a named node, or us */
+#define GIO_KEY_SIZE 46
+static void drop_expired_locks(char *nodename)
+{
+    struct utsname nodeinfo;
+    uint8_t mask[GIO_KEY_SIZE];
+
+    memset(mask, 0xff, GIO_KEY_SIZE);
+
+    if (!nodename)
+    {
+	uname(&nodeinfo);
+	nodename = nodeinfo.nodename;
+    }
+
+    if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE))
+    {
+	DEBUGLOG("Error calling lg_lock_drop_exp()\n");
+    }
+}
+
+
+static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
+			       struct local_client **new_client)
+{
+    int status;
+
+    *new_client = NULL;
+    status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL);
+    return status<0 ? status : 1;
+}
+
+static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
+			       struct local_client **new_client)
+{
+    int status;
+
+    *new_client = NULL;
+    status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL);
+    return status<0 ? status : 1;
+}
+
+
+/* CORE callback routines */
+static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate)
+{
+   DEBUGLOG("CORE Got a Login reply.  gen:%lld err:%d rank:%d corestate:%d\n",
+         gen, error, rank, corestate);
+
+   if (error)
+       exit(error);
+
+   current_corestate = corestate;
+   return 0;
+}
+
+static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
+{
+    if (nodestate == lg_core_Logged_in)
+    {
+	/* Don't clobber NODE_CLVMD state */
+	if (ninfo->state != NODE_CLVMD)
+	{
+	    if (ninfo->state == NODE_UNKNOWN ||
+		ninfo->state == NODE_DOWN)
+		num_nodes++;
+
+	    ninfo->state = NODE_UP;
+	}
+    }
+    else
+    {
+	if (nodestate == lg_core_Expired ||
+	    nodestate == lg_core_Fenced ||
+	    nodestate == lg_core_Logged_out)
+	{
+	    if (ninfo->state != NODE_DOWN)
+		num_nodes--;
+	    ninfo->state = NODE_DOWN;
+	    tcp_remove_client(csid);
+	}
+    }
+    DEBUGLOG("set_node_state, '%s' state = %d, num_nodes=%d\n",
+	     ninfo->name, ninfo->state, num_nodes);
+}
+
+static struct node_info *add_or_set_node(char *name, uint32_t ip, uint8_t state)
+{
+    struct node_info *ninfo;
+
+    ninfo = hash_lookup_binary(node_hash, (char *)&ip, MAX_CSID_LEN);
+    if (!ninfo)
+    {
+	/* If we can't find that node then re-read the config file in case it
+	   was added after we were started */
+	DEBUGLOG("Node %s not found, re-reading config file\n", name);
+	get_all_cluster_nodes();
+
+	/* Now try again */
+	ninfo = hash_lookup_binary(node_hash, (char *)&ip, MAX_CSID_LEN);
+	if (!ninfo)
+	{
+	    DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name);
+	    return NULL;
+	}
+    }
+
+    set_node_state(ninfo, (char *)&ip, state);
+
+    return ninfo;
+}
+
+static int core_nodelist(void *misc, lglcb_t type, char *name, uint32_t ip, uint8_t state)
+{
+    DEBUGLOG("CORE nodelist\n");
+
+    if (type == lglcb_start)
+    {
+	DEBUGLOG("Got Nodelist, start\n");
+    }
+    else
+    {
+	if (type == lglcb_item)
+	{
+	    DEBUGLOG("Got nodelist, item: %s, %#x, %#x\n", name, ip, state);
+
+	    add_or_set_node(name, ip, state);
+	}
+	else
+	{
+	    if (type == lglcb_stop)
+	    {
+		char ourcsid[MAX_CSID_LEN];
+
+		DEBUGLOG("Got Nodelist, stop\n");
+		clvmd_cluster_init_completed();
+
+		/* Mark ourself as up */
+		get_our_csid(ourcsid);
+		add_up_node(ourcsid);
+	    }
+	    else
+	    {
+		DEBUGLOG("Unknown lglcb_t %#x\n", type);
+	    }
+	}
+    }
+
+    return 0;
+}
+
+static int core_statechange(void *misc, uint8_t corestate, uint32_t masterip, char *mastername)
+{
+    DEBUGLOG("CORE Got statechange  corestate:%#x masterip:%#x mastername:%s\n",
+	     corestate, masterip, mastername);
+
+    current_corestate = corestate;
+    return 0;
+}
+
+static int core_nodechange(void *misc, char *nodename, uint32_t nodeip, uint8_t nodestate)
+{
+    struct node_info *ninfo;
+
+    DEBUGLOG("CORE node change, name=%s, ip=%x, state = %d\n", nodename, nodeip, nodestate);
+
+    /* If we don't get nodeip here, try a lookup by name */
+    if (!nodeip)
+	csid_from_name((char *)&nodeip, nodename);
+    if (!nodeip)
+	return 0;
+
+    ninfo = add_or_set_node(nodename, nodeip, nodestate);
+    if (!ninfo)
+	return 0;
+
+    /* Check if we need to drop any expired locks */
+    if (ninfo->state == NODE_DOWN)
+    {
+	drop_expired_locks(nodename);
+    }
+
+    return 0;
+}
+static int core_error(void *misc, uint32_t err)
+{
+    DEBUGLOG("CORE error: %d\n", err);
+    // Not sure what happens here
+    return 0;
+}
+
+/* LOCK callback routines */
+static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
+{
+    DEBUGLOG("LOCK Got a Login reply.  err:%d which:%d\n",
+	     error, which);
+
+    if (error)
+	exit(error);
+
+    /* Drop any expired locks for us that might be hanging around */
+    drop_expired_locks(NULL);
+
+    /* Enable locking operations in other threads */
+    if (lock_start_flag)
+    {
+	lock_start_flag = 0;
+	pthread_mutex_unlock(&lock_start_mutex);
+    }
+
+    return 0;
+}
+
+static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen, uint8_t state, uint32_t flags, uint32_t error,
+			   uint8_t *LVB, uint16_t LVBlen)
+{
+    struct lock_wait *lwait;
+
+    DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error);
+
+    lwait = hash_lookup(lock_hash, key);
+    if (!lwait)
+    {
+	DEBUGLOG("Can't find hash entry for resource %s\n", key);
+	return 0;
+    }
+    lwait->status = error;
+    pthread_mutex_lock(&lwait->mutex);
+    pthread_cond_signal(&lwait->cond);
+    pthread_mutex_unlock(&lwait->mutex);
+
+    return 0;
+}
+static int lock_error(void *misc, uint32_t err)
+{
+    DEBUGLOG("LOCK error: %d\n", err);
+    // Not sure what happens here
+    return 0;
+}
+
+
+/* CORE callbacks */
+static lg_core_callbacks_t core_callbacks = {
+    .login_reply  = core_login_reply,
+    .nodelist     = core_nodelist,
+    .statechange  = core_statechange,
+    .nodechange   = core_nodechange,
+    .error        = core_error,
+};
+
+/* LOCK callbacks */
+static lg_lockspace_callbacks_t lock_callbacks = {
+    .login_reply   = lock_login_reply,
+    .lock_state    = lock_lock_state,
+    .error         = lock_error,
+};
+
+/* Allow tcp-comms to loop round the list of active nodes */
+int get_next_node_csid(void **context, char *csid)
+{
+    struct node_info *ninfo = NULL;
+
+    /* First node */
+    if (!*context)
+    {
+	*context = hash_get_first(node_hash);
+    }
+    else
+    {
+	*context = hash_get_next(node_hash, *context);
+    }
+    if (*context)
+	ninfo = hash_get_data(node_hash, *context);
+
+    /* Find a node that is UP */
+    while (*context && ninfo->state == NODE_DOWN)
+    {
+	*context = hash_get_next(node_hash, *context);
+	if (*context)
+	{
+	    ninfo = hash_get_data(node_hash, *context);
+	}
+    }
+
+    if (!*context || ninfo->state == NODE_DOWN)
+    {
+	return 0;
+    }
+
+    memcpy(csid, hash_get_key(node_hash, *context), MAX_CSID_LEN);
+    return 1;
+}
+
+int name_from_csid(char *csid, char *name)
+{
+    struct node_info *ninfo;
+
+    ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
+    if (!ninfo)
+    {
+	sprintf(name, "UNKNOWN [%d.%d.%d.%d]",
+		csid[0], csid[1], csid[2], csid[3]);
+	return -1;
+    }
+
+    strcpy(name, ninfo->name);
+    return 0;
+}
+
+
+int csid_from_name(char *csid, char *name)
+{
+    struct hash_node *hn;
+    struct node_info *ninfo;
+
+    hash_iterate(hn, node_hash)
+    {
+	ninfo = hash_get_data(node_hash, hn);
+	if (strcmp(ninfo->name, name) == 0)
+	{
+	    memcpy(csid, hash_get_key(node_hash, hn), MAX_CSID_LEN);
+	    return 0;
+	}
+    }
+    return -1;
+}
+
+int get_num_nodes()
+{
+    DEBUGLOG("num_nodes = %d\n", num_nodes);
+    return num_nodes;
+}
+
+/* Node is now known to be running a clvmd */
+void add_up_node(char *csid)
+{
+    struct node_info *ninfo;
+
+    ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
+    if (!ninfo)
+	return;
+
+    ninfo->state = NODE_CLVMD;
+    return;
+
+}
+/* Node is now known to be NOT running a clvmd */
+void add_down_node(char *csid)
+{
+    struct node_info *ninfo;
+
+    ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
+    if (!ninfo)
+	return;
+
+    /* Only set it to UP if it was previously known to be
+       running clvmd - gulm may set it DOWN quite soon */
+    if (ninfo->state == NODE_CLVMD)
+	ninfo->state = NODE_UP;
+    return;
+
+}
+
+/* Call a callback for each node, so the caller knows whether it's up or down */
+int cluster_do_node_callback(struct local_client *master_client,
+			     void (*callback)(struct local_client *, char *csid, int node_up))
+{
+    struct hash_node *hn;
+    struct node_info *ninfo;
+
+    hash_iterate(hn, node_hash)
+    {
+	char csid[MAX_CSID_LEN];
+	struct local_client *client;
+
+	ninfo = hash_get_data(node_hash, hn);
+	memcpy(csid, hash_get_key(node_hash, hn), MAX_CSID_LEN);
+
+	DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state);
+
+	client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+	if (client)
+	    callback(master_client, csid, ninfo->state == NODE_CLVMD);
+    }
+    return 0;
+}
+
+/* Convert gulm error codes to unix errno numbers */
+static int gulm_to_errno(int gulm_ret)
+{
+    switch (gulm_ret)
+    {
+    case lg_err_TryFailed:
+	errno = EAGAIN;
+	break;
+
+    case lg_err_AlreadyPend:
+	errno = EBUSY;
+
+	/* More?? */
+    default:
+	errno = EINVAL;
+    }
+
+    return gulm_ret ? -1 : 0;
+}
+
+/* Real locking */
+static int _lock_resource(char *resource, int mode, int flags, int *lockid)
+{
+    int status;
+    struct lock_wait lwait;
+
+    /* Wait until the lock module is ready */
+    if (lock_start_flag)
+    {
+	pthread_mutex_lock(&lock_start_mutex);
+	pthread_mutex_unlock(&lock_start_mutex);
+    }
+
+    pthread_cond_init(&lwait.cond, NULL);
+    pthread_mutex_init(&lwait.mutex, NULL);
+    pthread_mutex_lock(&lwait.mutex);
+
+    /* This needs to be converted from DLM/LVM2 value for GULM */
+    if (flags == LCK_NONBLOCK) flags = lg_lock_flag_Try;
+
+    hash_insert(lock_hash, resource, &lwait);
+    DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode);
+
+    status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
+			       mode, flags, NULL, 0);
+    if (status)
+    {
+	DEBUGLOG("lg_lock_state returned %d\n", status);
+	return status;
+    }
+
+    /* Wait for it to complete */
+    pthread_cond_wait(&lwait.cond, &lwait.mutex);
+    pthread_mutex_unlock(&lwait.mutex);
+
+    hash_remove(lock_hash, resource);
+    DEBUGLOG("lock-resource returning %d\n", lwait.status);
+
+    return gulm_to_errno(lwait.status);
+}
+
+
+static int _unlock_resource(char *resource, int lockid)
+{
+    int status;
+    struct lock_wait lwait;
+
+    pthread_cond_init(&lwait.cond, NULL);
+    pthread_mutex_init(&lwait.mutex, NULL);
+    pthread_mutex_lock(&lwait.mutex);
+
+    hash_insert(lock_hash, resource, &lwait);
+
+    DEBUGLOG("unlock_resource %s\n", resource);
+    status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
+			       lg_lock_state_Unlock, 0, NULL, 0);
+
+    if (status)
+    {
+	DEBUGLOG("lg_lock_state(unlock) returned %d\n", status);
+	return status;
+    }
+
+    /* Wait for it to complete */
+
+    pthread_cond_wait(&lwait.cond, &lwait.mutex);
+    pthread_mutex_unlock(&lwait.mutex);
+
+    hash_remove(lock_hash, resource);
+
+    return gulm_to_errno(lwait.status);
+}
+
+
+/* These two locking functions MUST be called in a seperate thread from
+   the clvmd main loop because they expect to be woken up by it.
+
+   These are abstractions around the real locking functions (above)
+   as we need to emulate the DLM's EX/PW/CW interaction with GULM using
+   two locks.
+   To aid unlocking, we store the lock mode in the lockid (as GULM
+   doesn't use this).
+*/
+int sync_lock(const char *resource, int mode, int flags, int *lockid)
+{
+    int status;
+    char lock1[strlen(resource)+3];
+    char lock2[strlen(resource)+3];
+
+    snprintf(lock1, sizeof(lock1), "%s-1", resource);
+    snprintf(lock2, sizeof(lock2), "%s-2", resource);
+
+    switch (mode)
+    {
+    case LCK_EXCL:
+	status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid);
+	if (status)
+	    goto out;
+
+	/* If we can't get this lock then bail out */
+	status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid);
+        if (status == lg_err_TryFailed)
+        {
+           _unlock_resource(lock1, *lockid);
+           status = -1;
+           errno = EAGAIN;
+        }
+	break;
+
+    case LCK_READ:
+	status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid);
+	break;
+
+    case LCK_WRITE:
+	status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid);
+	break;
+
+    default:
+	status = -1;
+	errno = EINVAL;
+	break;
+    }
+ out:
+    *lockid = mode;
+    return status;
+}
+
+int sync_unlock(const char *resource, int lockid)
+{
+    int status = 0;
+    char lock1[strlen(resource)+3];
+    char lock2[strlen(resource)+3];
+
+    snprintf(lock1, sizeof(lock1), "%s-1", resource);
+    snprintf(lock2, sizeof(lock2), "%s-2", resource);
+
+    /* The held lock mode is in the lock id */
+    assert(lockid == LCK_EXCL ||
+	   lockid == LCK_READ ||
+	   lockid == LCK_WRITE);
+
+    switch (lockid)
+    {
+    case LCK_EXCL:
+	status = _unlock_resource(lock1, lockid);
+	if (status)
+	    goto out;
+	status = _unlock_resource(lock2, lockid);
+	break;
+
+    case LCK_READ:
+	status = _unlock_resource(lock1, lockid);
+	break;
+
+    case LCK_WRITE:
+	status = _unlock_resource(lock2, lockid);
+	break;
+    }
+
+ out:
+    return status;
+}
+
+int is_quorate()
+{
+    if (current_corestate == lg_core_Slave ||
+	current_corestate == lg_core_Master ||
+	current_corestate == lg_core_Client)
+	return 1;
+    else
+	return 0;
+}
+
+/* Get all the cluster node names & IPs from CCS and
+   add them to our node list so we know who to talk to.
+   Called when we start up and if we get sent SIGHUP.
+*/
+static int get_all_cluster_nodes()
+{
+    int ctree;
+    char *nodename;
+    int error;
+
+    /* Open the config file */
+    ctree = ccs_connect();
+    if (ctree <= 0)
+    {
+	log_error("Error connecting to CCS");
+	return -1;
+    }
+
+    error = ccs_get(ctree, "//nodes/node/@name", &nodename);
+    while (nodename)
+    {
+	char nodeip[MAX_CSID_LEN];
+	char *clvmflag;
+	char key[256];
+
+	sprintf(key, "//nodes/node[@name=\"%s\"]/clvm", nodename);
+	ccs_get(ctree, key, &clvmflag);
+
+	if ((get_ip_address(nodename, nodeip) == 0) && atoi(clvmflag))
+	{
+	    struct node_info *ninfo;
+
+	    /* If it's not in the list, then add it */
+	    ninfo = hash_lookup_binary(node_hash, nodeip, MAX_CSID_LEN);
+	    if (!ninfo)
+	    {
+		ninfo = malloc(sizeof(struct node_info));
+		if (!ninfo)
+		{
+		    syslog(LOG_ERR, "Cannot alloc memory for node info\n");
+		    ccs_disconnect(ctree);
+		    return -1;
+		}
+		strcpy(ninfo->name, nodename);
+
+		ninfo->state = NODE_DOWN;
+		hash_insert_binary(node_hash, nodeip, MAX_CSID_LEN, ninfo);
+	    }
+	}
+	else
+	{
+	    DEBUGLOG("node %s has clvm disabled\n", nodename);
+	}
+	if (clvmflag) free(clvmflag);
+	free(nodename);
+	error = ccs_get(ctree, "//nodes/node/@name", &nodename);
+    }
+
+    /* Finished with config file */
+    ccs_disconnect(ctree);
+
+    return 0;
+}
+
+int gulm_fd(void)
+{
+    return lg_core_selector(gulm_if);
+}
diff --git a/daemons/clvmd/clvmd-gulm.h b/daemons/clvmd/clvmd-gulm.h
new file mode 100644
index 000000000..07726faa3
--- /dev/null
+++ b/daemons/clvmd/clvmd-gulm.h
@@ -0,0 +1,9 @@
+
+
+
+extern int get_next_node_csid(void **context, char *csid);
+extern void add_down_node(char *csid);
+extern int gulm_fd(void);
+extern int get_ip_address(char *node, char *addr);
+extern void tcp_remove_client(char *csid);
+extern int alloc_client(int fd, char *csid, struct local_client **new_client);
diff --git a/daemons/clvmd/clvmd.c b/daemons/clvmd/clvmd.c
new file mode 100644
index 000000000..216eb1ea1
--- /dev/null
+++ b/daemons/clvmd/clvmd.c
@@ -0,0 +1,1693 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * CLVMD: Cluster LVM daemon
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "clvmd-comms.h"
+#include "lvm-functions.h"
+#include "clvm.h"
+#include "clvmd.h"
+#include "libdlm.h"
+#include "system-lv.h"
+#include "list.h"
+#include "log.h"
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* The maximum size of a message that will fit into a packet. Anything bigger
+   than this is sent via the system LV */
+#define MAX_INLINE_MESSAGE (MAX_CLUSTER_MESSAGE-sizeof(struct clvm_header))
+
+#define ISLOCAL_CSID(c) (memcmp(c, our_csid, MAX_CSID_LEN) == 0)
+
+/* Head of the fd list. Also contains
+   the cluster_socket details */
+static struct local_client local_client_head;
+
+static unsigned short global_xid = 0;	/* Last transaction ID issued */
+
+static char our_csid[MAX_CSID_LEN];
+
+/* Structure of items on the LVM thread list */
+struct lvm_thread_cmd {
+	struct list list;
+
+	struct local_client *client;
+	struct clvm_header *msg;
+	char csid[MAX_CSID_LEN];
+	int remote;		/* Flag */
+	int msglen;
+	unsigned short xid;
+};
+static pthread_t lvm_thread;
+static pthread_mutex_t lvm_thread_mutex;
+static pthread_cond_t lvm_thread_cond;
+static struct list lvm_cmd_head;
+static int quit = 0;
+
+/* Prototypes for code further down */
+static void sigusr2_handler(int sig);
+static void sigterm_handler(int sig);
+static void send_local_reply(struct local_client *client, int status,
+			     int clientid);
+static void free_reply(struct local_client *client);
+static void send_version_message(void);
+static void *pre_and_post_thread(void *arg);
+static int send_message(void *buf, int msglen, char *csid, int fd,
+			const char *errtext);
+static int read_from_local_sock(struct local_client *thisfd);
+static int process_local_command(struct clvm_header *msg, int msglen,
+				 struct local_client *client,
+				 unsigned short xid);
+static void process_remote_command(struct clvm_header *msg, int msglen, int fd,
+				   char *csid);
+static int process_reply(struct clvm_header *msg, int msglen, char *csid);
+static int open_local_sock(void);
+static struct local_client *find_client(int clientid);
+static void main_loop(int local_sock, int cmd_timeout);
+static void be_daemon(void);
+static int check_all_clvmds_running(struct local_client *client);
+static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
+				     int len, char *csid,
+				     struct local_client **new_client);
+static void *lvm_thread_fn(void *);
+static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg,
+			   int msglen, char *csid);
+static int distribute_command(struct local_client *thisfd);
+static void hton_clvm(struct clvm_header *hdr);
+static void ntoh_clvm(struct clvm_header *hdr);
+static void add_reply_to_list(struct local_client *client, int status,
+			      char *csid, const char *buf, int len);
+
+static void usage(char *prog, FILE *file)
+{
+	fprintf(file, "Usage:\n");
+	fprintf(file, "%s [Vhd]\n", prog);
+	fprintf(file, "\n");
+	fprintf(file, "   -V       Show version of clvmd\n");
+	fprintf(file, "   -h       Show this help information\n");
+	fprintf(file, "   -d       Don't fork, run in the foreground\n");
+	fprintf(file, "   -t<secs> Command timeout (default 60 seconds)\n");
+	fprintf(file, "\n");
+}
+
+int main(int argc, char *argv[])
+{
+	int local_sock;
+	struct local_client *newfd;
+	struct utsname nodeinfo;
+	signed char opt;
+	int debug = 0;
+	int cmd_timeout = DEFAULT_CMD_TIMEOUT;
+	sigset_t ss;
+
+	/* Deal with command-line arguments */
+	opterr = 0;
+	optind = 0;
+	while ((opt = getopt(argc, argv, "?vVhdt:")) != EOF) {
+		switch (opt) {
+		case 'h':
+			usage(argv[0], stdout);
+			exit(0);
+
+		case '?':
+			usage(argv[0], stderr);
+			exit(0);
+
+		case 'd':
+			debug++;
+			break;
+
+		case 't':
+			cmd_timeout = atoi(optarg);
+			if (!cmd_timeout) {
+				fprintf(stderr, "command timeout is invalid\n");
+				usage(argv[0], stderr);
+				exit(1);
+			}
+			break;
+
+		case 'V':
+			printf("\nCluster LVM Daemon version %d.%d.%d\n\n",
+			       CLVMD_MAJOR_VERSION, CLVMD_MINOR_VERSION,
+			       CLVMD_PATCH_VERSION);
+			exit(1);
+			break;
+
+		}
+	}
+
+	/* Fork into the background (unless requested not to) */
+	if (!debug) {
+		be_daemon();
+	}
+
+	DEBUGLOG("CLVMD started\n");
+
+	/* Open the Unix socket we listen for commands on.
+	   We do this before opening the cluster socket so that
+	   potential clients will block rather than error if we are running
+	   but the cluster is not ready yet */
+	local_sock = open_local_sock();
+	if (local_sock < 0)
+		exit(2);
+
+	/* Set up signal handlers, USR1 is for cluster change notifications (in cman)
+	   USR2 causes child threads to exit.
+	   PIPE should be ignored */
+	signal(SIGUSR2, sigusr2_handler);
+	signal(SIGTERM, sigterm_handler);
+	signal(SIGINT, sigterm_handler);
+	signal(SIGPIPE, SIG_IGN);
+
+	/* Block SIGUSR2 in the main process */
+	sigemptyset(&ss);
+	sigaddset(&ss, SIGUSR2);
+	sigprocmask(SIG_BLOCK, &ss, NULL);
+
+	/* Initialise the LVM thread variables */
+	list_init(&lvm_cmd_head);
+	pthread_mutex_init(&lvm_thread_mutex, NULL);
+	pthread_cond_init(&lvm_thread_cond, NULL);
+	init_lvhash();
+
+	/* Start the cluster interface */
+	if (init_cluster()) {
+		DEBUGLOG("Can't initialise cluster interface\n");
+		log_error("Can't initialise cluster interface\n");
+		exit(5);
+	}
+	DEBUGLOG("Cluster ready, doing some more initialisation\n");
+
+	/* Save our CSID */
+	uname(&nodeinfo);
+	get_our_csid(our_csid);
+
+	/* Initialise the FD list head */
+	local_client_head.fd = get_main_cluster_fd();
+	local_client_head.type = CLUSTER_MAIN_SOCK;
+	local_client_head.callback = cluster_fd_callback;
+
+	/* Add the local socket to the list */
+	newfd = malloc(sizeof(struct local_client));
+	if (!newfd)
+		exit(2);
+
+	newfd->fd = local_sock;
+	newfd->type = LOCAL_RENDEZVOUS;
+	newfd->callback = local_rendezvous_callback;
+	newfd->next = local_client_head.next;
+	local_client_head.next = newfd;
+
+	/* This needs to be started after cluster initialisation
+	   as it may need to take out locks */
+	DEBUGLOG("starting LVM thread\n");
+	pthread_create(&lvm_thread, NULL, lvm_thread_fn, nodeinfo.nodename);
+
+#ifndef USE_GULM
+	/* Tell the rest of the cluster our version number */
+	/* CMAN can do this immediately, gulm needs to wait until
+	   the core initialisation has finished and the node list
+	   has been gathered */
+	send_version_message();
+#endif
+
+	DEBUGLOG("clvmd ready for work\n");
+
+	/* Do some work */
+	main_loop(local_sock, cmd_timeout);
+
+	return 0;
+}
+
+/* Called when the GuLM cluster layer has completed initialisation.
+   We send the version message */
+void clvmd_cluster_init_completed()
+{
+	send_version_message();
+}
+
+/* Data on a connected socket */
+static int local_sock_callback(struct local_client *thisfd, char *buf, int len,
+			       char *csid, struct local_client **new_client)
+{
+	*new_client = NULL;
+	return read_from_local_sock(thisfd);
+}
+
+/* Data on a connected socket */
+static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
+				     int len, char *csid,
+				     struct local_client **new_client)
+{
+	/* Someone connected to our local socket, accept it. */
+
+	struct sockaddr_un socka;
+	struct local_client *newfd;
+	socklen_t sl = sizeof(socka);
+	int client_fd = accept(thisfd->fd, (struct sockaddr *) &socka, &sl);
+
+	if (client_fd >= 0) {
+		newfd = malloc(sizeof(struct local_client));
+		if (!newfd) {
+			close(client_fd);
+			return 1;
+		}
+		newfd->fd = client_fd;
+		newfd->type = LOCAL_SOCK;
+		newfd->xid = 0;
+		newfd->callback = local_sock_callback;
+		newfd->bits.localsock.replies = NULL;
+		newfd->bits.localsock.expected_replies = 0;
+		newfd->bits.localsock.cmd = NULL;
+		newfd->bits.localsock.in_progress = FALSE;
+		newfd->bits.localsock.sent_out = FALSE;
+		newfd->bits.localsock.threadid = 0;
+		newfd->bits.localsock.finished = 0;
+		newfd->bits.localsock.pipe_client = NULL;
+		newfd->bits.localsock.all_success = 1;
+		DEBUGLOG("Got new connection on fd %d\n", newfd->fd);
+		*new_client = newfd;
+	}
+	return 1;
+}
+
+static int local_pipe_callback(struct local_client *thisfd, char *buf,
+			       int maxlen, char *csid,
+			       struct local_client **new_client)
+{
+	int len;
+	char buffer[PIPE_BUF];
+	struct local_client *sock_client = thisfd->bits.pipe.client;
+	int status = -1;	/* in error by default */
+
+	len = read(thisfd->fd, buffer, sizeof(int));
+
+	DEBUGLOG("read on PIPE %d: %d bytes: status: %d\n",
+		 thisfd->fd, len, *(int *) buffer);
+
+	if (len == sizeof(int)) {
+		status = *(int *) buffer;
+	}
+
+	/* EOF on pipe or an error, close it */
+	if (len <= 0) {
+		int jstat;
+		close(thisfd->fd);
+
+		/* Clear out the cross-link */
+		if (thisfd->bits.pipe.client != NULL)
+			thisfd->bits.pipe.client->bits.localsock.pipe_client =
+			    NULL;
+
+		/* Reap child thread */
+		if (thisfd->bits.pipe.threadid) {
+			jstat =
+			    pthread_join(thisfd->bits.pipe.threadid,
+					 (void **) &status);
+			thisfd->bits.pipe.threadid = 0;
+			if (thisfd->bits.pipe.client != NULL)
+				thisfd->bits.pipe.client->bits.localsock.
+				    threadid = 0;
+		}
+		return -1;
+	} else {
+		DEBUGLOG("background routine status was %d, sock_client=%p\n",
+			 status, sock_client);
+		/* But has the client gone away ?? */
+		if (sock_client == NULL) {
+			DEBUGLOG
+			    ("Got PIPE response for dead client, ignoring it\n");
+		} else {
+			/* If error then just return that code */
+			if (status)
+				send_local_reply(sock_client, status,
+						 sock_client->fd);
+			else {
+				if (sock_client->bits.localsock.state ==
+				    POST_COMMAND) {
+					send_local_reply(sock_client, 0,
+							 sock_client->fd);
+				} else	// PRE_COMMAND finished.
+				{
+					if (
+					    (status =
+					     distribute_command(sock_client)) !=
+					    0) send_local_reply(sock_client,
+								EFBIG,
+								sock_client->
+								fd);
+				}
+			}
+		}
+	}
+	return len;
+}
+
+/* If a noed is up, look for it in the reply array, if it's not there then
+   add one with "ETIMEDOUT".
+   NOTE: This won't race with real replies because they happen in the same thread.
+*/
+static void timedout_callback(struct local_client *client, char *csid,
+			      int node_up)
+{
+	if (node_up) {
+		struct node_reply *reply;
+		char nodename[MAX_CLUSTER_MEMBER_NAME_LEN];
+
+		name_from_csid(csid, nodename);
+		DEBUGLOG("PJC: checking for a reply from %s\n", nodename);
+		pthread_mutex_lock(&client->bits.localsock.reply_mutex);
+
+		reply = client->bits.localsock.replies;
+		while (reply && strcmp(reply->node, nodename) != 0) {
+			reply = reply->next;
+		}
+
+		pthread_mutex_unlock(&client->bits.localsock.reply_mutex);
+
+		if (!reply) {
+			DEBUGLOG("PJC: node %s timed-out\n", nodename);
+			add_reply_to_list(client, ETIMEDOUT, csid,
+					  "Command timed out", 18);
+		}
+	}
+}
+
+/* Called when the request has timed out on at least one node. We fill in
+   the remaining node entries with ETIMEDOUT and return.
+
+   By the time we get here the node that caused
+   the timeout could have gone down, in which case we will never get the expected
+   number of replies that triggers the post command so we need to do it here
+*/
+static void request_timed_out(struct local_client *client)
+{
+	DEBUGLOG("Request timed-out. padding\n");
+	cluster_do_node_callback(client, timedout_callback);
+
+	if (client->bits.localsock.num_replies !=
+	    client->bits.localsock.expected_replies) {
+		/* Post-process the command */
+		if (client->bits.localsock.threadid) {
+			pthread_mutex_lock(&client->bits.localsock.mutex);
+			client->bits.localsock.state = POST_COMMAND;
+			pthread_cond_signal(&client->bits.localsock.cond);
+			pthread_mutex_unlock(&client->bits.localsock.mutex);
+		}
+	}
+}
+
+/* This is where the real work happens */
+static void main_loop(int local_sock, int cmd_timeout)
+{
+	DEBUGLOG("Using timeout of %d seconds\n", cmd_timeout);
+
+	/* Main loop */
+	while (!quit) {
+		fd_set in;
+		int select_status;
+		struct local_client *thisfd;
+		struct timeval tv = { cmd_timeout, 0 };
+		int quorate = is_quorate();
+
+		/* Wait on the cluster FD and all local sockets/pipes */
+		FD_ZERO(&in);
+		for (thisfd = &local_client_head; thisfd != NULL;
+		     thisfd = thisfd->next) {
+			/* if the cluster is not quorate then don't listen for new requests */
+			if ((thisfd->type != LOCAL_RENDEZVOUS &&
+			     thisfd->type != LOCAL_SOCK) || quorate)
+				FD_SET(thisfd->fd, &in);
+		}
+
+		if ((select_status = select(FD_SETSIZE, &in, NULL, NULL, &tv)) > 0) {
+			struct local_client *lastfd = NULL;
+			struct clvm_header *inheader;
+			char csid[MAX_CSID_LEN];
+			char buf[MAX_CLUSTER_MESSAGE];
+
+			for (thisfd = &local_client_head; thisfd != NULL;
+			     thisfd = thisfd->next) {
+				if (FD_ISSET(thisfd->fd, &in)) {
+					struct local_client *newfd;
+					int ret;
+
+					/* Do callback */
+					ret =
+					    thisfd->callback(thisfd, buf,
+							     sizeof(buf), csid,
+							     &newfd);
+					/* Ignore EAGAIN */
+					if (ret < 0 && (errno == EAGAIN ||
+							errno == EINTR)) continue;
+
+					/* Got error or EOF: Remove it from the list safely */
+					if (ret <= 0) {
+						struct local_client *free_fd;
+						int type = thisfd->type;
+
+						/* If the cluster socket shuts down, so do we */
+						if (type == CLUSTER_MAIN_SOCK ||
+						    type == CLUSTER_INTERNAL)
+							goto closedown;
+
+						DEBUGLOG
+						    ("ret == %d, errno = %d. removing client\n",
+						     ret, errno);
+						lastfd->next = thisfd->next;
+						free_fd = thisfd;
+						thisfd = lastfd;
+						free(free_fd);
+						break;
+					}
+
+					/* New client...simply add it to the list */
+					if (newfd) {
+						newfd->next = thisfd->next;
+						thisfd->next = newfd;
+						break;
+					}
+
+					switch (thisfd->type) {
+					case CLUSTER_MAIN_SOCK:
+					case CLUSTER_DATA_SOCK:
+						inheader =
+						    (struct clvm_header *) buf;
+						ntoh_clvm(inheader);	/* Byteswap fields */
+						if (inheader->cmd ==
+						    CLVMD_CMD_REPLY)
+							    process_reply
+							    (inheader, ret,
+							     csid);
+						else
+							add_to_lvmqueue(thisfd,
+									inheader,
+									ret,
+									csid);
+						break;
+
+						/* All the work for these is done in the callback
+						   rightly or wrongly... */
+					case LOCAL_RENDEZVOUS:
+					case LOCAL_SOCK:
+					case THREAD_PIPE:
+					case CLUSTER_INTERNAL:
+						break;
+					}
+				}
+				lastfd = thisfd;
+			}
+		}
+
+		/* Select timed out. Check for clients that have been waiting too long for a response */
+		if (select_status == 0) {
+			time_t the_time = time(NULL);
+
+			for (thisfd = &local_client_head; thisfd != NULL;
+			     thisfd = thisfd->next) {
+				if (thisfd->type == LOCAL_SOCK
+				    && thisfd->bits.localsock.sent_out
+				    && thisfd->bits.localsock.sent_time +
+				    cmd_timeout < the_time
+				    && thisfd->bits.localsock.
+				    expected_replies !=
+				    thisfd->bits.localsock.num_replies) {
+					/* Send timed out message + replies we already have */
+					DEBUGLOG
+					    ("Request timed-out (send: %ld, now: %ld)\n",
+					     thisfd->bits.localsock.sent_time,
+					     the_time);
+
+					thisfd->bits.localsock.all_success = 0;
+
+					request_timed_out(thisfd);
+				}
+			}
+		}
+		if (select_status < 0) {
+			if (errno == EINTR)
+				continue;
+
+#ifdef DEBUG
+			perror("select error");
+			exit(-1);
+#endif
+		}
+	}
+
+      closedown:
+	cluster_closedown();
+	close(local_sock);
+}
+
+/* Fork into the background and detach from our parent process */
+static void be_daemon()
+{
+	pid_t pid;
+	int devnull = open("/dev/null", O_RDWR);
+	if (devnull == -1) {
+		perror("Can't open /dev/null");
+		exit(3);
+	}
+
+	switch (pid = fork()) {
+	case -1:
+		perror("clvmd: can't fork");
+		exit(2);
+
+	case 0:		/* child */
+		break;
+
+	default:		/* Parent */
+		exit(0);
+	}
+
+	/* Detach ourself from the calling environment */
+	if (close(0) || close(1) || close(2)) {
+		perror("Error closing terminal FDs");
+		exit(4);
+	}
+	setsid();
+
+	if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
+	    || dup2(devnull, 2) < 0) {
+		perror("Error setting terminal FDs to /dev/null");
+		log_error("Error setting terminal FDs to /dev/null: %m");
+		exit(5);
+	}
+	if (chdir("/")) {
+		log_error("Error setting current directory to /: %m");
+		exit(6);
+	}
+
+}
+
+/* Called when we have a read from the local socket.
+   was in the main loop but it's grown up and is a big girl now */
+static int read_from_local_sock(struct local_client *thisfd)
+{
+	int len;
+	int argslen;
+	int missing_len;
+	char buffer[PIPE_BUF];
+
+	len = read(thisfd->fd, buffer, sizeof(buffer));
+
+	DEBUGLOG("Read on local socket %d, len = %d\n", thisfd->fd, len);
+
+	/* EOF or error on socket */
+	if (len <= 0) {
+		int *status;
+		int jstat;
+
+		DEBUGLOG("EOF on local socket: inprogress=%d\n",
+			 thisfd->bits.localsock.in_progress);
+
+		thisfd->bits.localsock.finished = 1;
+
+		/* If the client went away in mid command then tidy up */
+		if (thisfd->bits.localsock.in_progress) {
+			pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+			thisfd->bits.localsock.state = POST_COMMAND;
+			pthread_cond_signal(&thisfd->bits.localsock.cond);
+			pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+
+			/* Free any unsent buffers */
+			free_reply(thisfd);
+		}
+
+		/* Kill the subthread & free resources */
+		if (thisfd->bits.localsock.threadid) {
+			DEBUGLOG("Waiting for child thread\n");
+			pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+			thisfd->bits.localsock.state = POST_COMMAND;
+			pthread_cond_signal(&thisfd->bits.localsock.cond);
+			pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+			pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
+
+			jstat =
+			    pthread_join(thisfd->bits.localsock.threadid,
+					 (void **) &status);
+			DEBUGLOG("Joined child thread\n");
+
+			thisfd->bits.localsock.threadid = 0;
+			pthread_cond_destroy(&thisfd->bits.localsock.cond);
+			pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
+
+			/* Remove the pipe client */
+			if (thisfd->bits.localsock.pipe_client != NULL) {
+				struct local_client *newfd;
+				struct local_client *lastfd = NULL;
+				struct local_client *free_fd = NULL;
+
+				close(thisfd->bits.localsock.pipe_client->fd);	/* Close pipe */
+				close(thisfd->bits.localsock.pipe);
+
+				/* Remove pipe client */
+				for (newfd = &local_client_head; newfd != NULL;
+				     newfd = newfd->next) {
+					if (thisfd->bits.localsock.
+					    pipe_client == newfd) {
+						thisfd->bits.localsock.
+						    pipe_client = NULL;
+
+						lastfd->next = newfd->next;
+						free_fd = newfd;
+						newfd->next = lastfd;
+						free(free_fd);
+						break;
+					}
+					lastfd = newfd;
+				}
+			}
+		}
+
+		/* Free the command buffer */
+		if (thisfd->bits.localsock.cmd)
+			free(thisfd->bits.localsock.cmd);
+
+		/* Clear out the cross-link */
+		if (thisfd->bits.localsock.pipe_client != NULL)
+			thisfd->bits.localsock.pipe_client->bits.pipe.client =
+			    NULL;
+
+		close(thisfd->fd);
+		return 0;
+	} else {
+		int comms_pipe[2];
+		struct local_client *newfd;
+		char csid[MAX_CSID_LEN];
+		struct clvm_header *inheader;
+
+		inheader = (struct clvm_header *) buffer;
+
+		/* Fill in the client ID */
+		inheader->clientid = htonl(thisfd->fd);
+
+		/* If we are already busy then return an error */
+		if (thisfd->bits.localsock.in_progress) {
+			struct clvm_header reply;
+			reply.cmd = CLVMD_CMD_REPLY;
+			reply.status = -EBUSY;
+			reply.arglen = 0;
+			reply.flags = 0;
+			send_message(&reply, sizeof(reply), our_csid,
+				     thisfd->fd,
+				     "Error sending EBUSY reply to local user");
+			return len;
+		}
+
+		/* Free any old buffer space */
+		if (thisfd->bits.localsock.cmd)
+			free(thisfd->bits.localsock.cmd);
+
+		/* See if we have the whole message */
+		argslen =
+		    len - strlen(inheader->node) - sizeof(struct clvm_header);
+		missing_len = inheader->arglen - argslen;
+
+		/* Save the message */
+		thisfd->bits.localsock.cmd = malloc(len + missing_len);
+		if (!thisfd->bits.localsock.cmd) {
+			struct clvm_header reply;
+			reply.cmd = CLVMD_CMD_REPLY;
+			reply.status = -ENOMEM;
+			reply.arglen = 0;
+			reply.flags = 0;
+			send_message(&reply, sizeof(reply), our_csid,
+				     thisfd->fd,
+				     "Error sending ENOMEM reply to local user");
+			return 0;
+		}
+		memcpy(thisfd->bits.localsock.cmd, buffer, len);
+		thisfd->bits.localsock.cmd_len = len + missing_len;
+		inheader = (struct clvm_header *) thisfd->bits.localsock.cmd;
+
+		/* If we don't have the full message then read the rest now */
+		if (missing_len) {
+			char *argptr =
+			    inheader->node + strlen(inheader->node) + 1;
+
+			while (missing_len > 0 && len >= 0) {
+				DEBUGLOG
+				    ("got %d bytes, need another %d (total %d)\n",
+				     argslen, missing_len, inheader->arglen);
+				len =
+				    read(thisfd->fd, argptr + argslen,
+					 missing_len);
+				if (len >= 0) {
+					missing_len -= len;
+					argslen += len;
+				}
+			}
+		}
+
+		/* Only run the command if all the cluster nodes are running CLVMD */
+		if (((inheader->flags & CLVMD_FLAG_LOCAL) == 0) &&
+		    (check_all_clvmds_running(thisfd) == -1)) {
+			thisfd->bits.localsock.expected_replies = 0;
+			thisfd->bits.localsock.num_replies = 0;
+			send_local_reply(thisfd, EHOSTDOWN, thisfd->fd);
+			return len;
+		}
+
+		/* Check the node name for validity */
+		if (inheader->node[0] && csid_from_name(csid, inheader->node)) {
+			/* Error, node is not in the cluster */
+			struct clvm_header reply;
+			DEBUGLOG("Unknown node: '%s'\n", inheader->node);
+
+			reply.cmd = CLVMD_CMD_REPLY;
+			reply.status = -ENOENT;
+			reply.flags = 0;
+			reply.arglen = 0;
+			send_message(&reply, sizeof(reply), our_csid,
+				     thisfd->fd,
+				     "Error sending ENOENT reply to local user");
+			thisfd->bits.localsock.expected_replies = 0;
+			thisfd->bits.localsock.num_replies = 0;
+			thisfd->bits.localsock.in_progress = FALSE;
+			thisfd->bits.localsock.sent_out = FALSE;
+			return len;
+		}
+
+		/* If we already have a subthread then just signal it to start */
+		if (thisfd->bits.localsock.threadid) {
+			pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+			thisfd->bits.localsock.state = PRE_COMMAND;
+			pthread_cond_signal(&thisfd->bits.localsock.cond);
+			pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+			return len;
+		}
+
+		/* Create a pipe and add the reading end to our FD list */
+		pipe(comms_pipe);
+		newfd = malloc(sizeof(struct local_client));
+		if (!newfd) {
+			struct clvm_header reply;
+			close(comms_pipe[0]);
+			close(comms_pipe[1]);
+
+			reply.cmd = CLVMD_CMD_REPLY;
+			reply.status = -ENOMEM;
+			reply.arglen = 0;
+			reply.flags = 0;
+			send_message(&reply, sizeof(reply), our_csid,
+				     thisfd->fd,
+				     "Error sending ENOMEM reply to local user");
+			return len;
+		}
+		DEBUGLOG("creating pipe, [%d, %d]\n", comms_pipe[0],
+			 comms_pipe[1]);
+		newfd->fd = comms_pipe[0];
+		newfd->type = THREAD_PIPE;
+		newfd->callback = local_pipe_callback;
+		newfd->next = thisfd->next;
+		newfd->bits.pipe.client = thisfd;
+		newfd->bits.pipe.threadid = 0;
+		thisfd->next = newfd;
+
+		/* Store a cross link to the pipe */
+		thisfd->bits.localsock.pipe_client = newfd;
+
+		thisfd->bits.localsock.pipe = comms_pipe[1];
+
+		/* Initialise and lock the mutex so the subthread will wait after
+		   finishing the PRE routine */
+		pthread_mutex_init(&thisfd->bits.localsock.mutex, NULL);
+		pthread_cond_init(&thisfd->bits.localsock.cond, NULL);
+		pthread_mutex_init(&thisfd->bits.localsock.reply_mutex, NULL);
+
+		/* Make sure the thread has a copy of it's own ID */
+		newfd->bits.pipe.threadid = thisfd->bits.localsock.threadid;
+
+		/* Run the pre routine */
+		thisfd->bits.localsock.in_progress = TRUE;
+		thisfd->bits.localsock.state = PRE_COMMAND;
+		pthread_create(&thisfd->bits.localsock.threadid, NULL,
+			       pre_and_post_thread, thisfd);
+	}
+	return len;
+}
+
+/* Add a file descriptor from the cluster or comms interface to
+   our list of FDs for select
+*/
+int add_client(struct local_client *new_client)
+{
+	new_client->next = local_client_head.next;
+	local_client_head.next = new_client;
+
+	return 0;
+}
+
+
+/*
+ * Send a long message using the System LV
+ */
+static int send_long_message(struct local_client *thisfd, struct clvm_header *inheader, int len)
+{
+    struct clvm_header new_header;
+    int status;
+
+    DEBUGLOG("Long message: being sent via system LV:\n");
+
+    /* Use System LV */
+    status = system_lv_write_data((char *)inheader, len);
+    if (status < 0)
+	    return errno;
+
+    /* Send message indicating System-LV is being used */
+    memcpy(&new_header, inheader, sizeof(new_header));
+    new_header.flags |= CLVMD_FLAG_SYSTEMLV;
+    new_header.xid = thisfd->xid;
+
+    return send_message(&new_header, sizeof(new_header), NULL, -1,
+		 "Error forwarding long message to cluster");
+}
+
+/* Called when the pre-command has completed successfully - we
+   now execute the real command on all the requested nodes */
+static int distribute_command(struct local_client *thisfd)
+{
+	struct clvm_header *inheader =
+	    (struct clvm_header *) thisfd->bits.localsock.cmd;
+	int len = thisfd->bits.localsock.cmd_len;
+
+	thisfd->xid = global_xid++;
+	DEBUGLOG("distribute command: XID = %d\n", thisfd->xid);
+
+	/* Forward it to other nodes in the cluster if needed */
+	if (!(inheader->flags & CLVMD_FLAG_LOCAL)) {
+		/* if node is empty then do it on the whole cluster */
+		if (inheader->node[0] == '\0') {
+			thisfd->bits.localsock.expected_replies =
+			    get_num_nodes();
+			thisfd->bits.localsock.num_replies = 0;
+			thisfd->bits.localsock.sent_time = time(NULL);
+			thisfd->bits.localsock.in_progress = TRUE;
+			thisfd->bits.localsock.sent_out = TRUE;
+
+			/* Do it here first */
+			add_to_lvmqueue(thisfd, inheader, len, NULL);
+
+			DEBUGLOG("Sending message to all cluster nodes\n");
+			if (len > MAX_INLINE_MESSAGE) {
+			        send_long_message(thisfd, inheader, len );
+			} else {
+				inheader->xid = thisfd->xid;
+				send_message(inheader, len, NULL, -1,
+					     "Error forwarding message to cluster");
+			}
+		} else {
+                        /* Do it on a single node */
+			char csid[MAX_CSID_LEN];
+
+			if (csid_from_name(csid, inheader->node)) {
+				/* This has already been checked so should not happen */
+				return 0;
+			} else {
+			        /* OK, found a node... */
+				thisfd->bits.localsock.expected_replies = 1;
+				thisfd->bits.localsock.num_replies = 0;
+				thisfd->bits.localsock.in_progress = TRUE;
+
+				/* Are we the requested node ?? */
+				if (memcmp(csid, our_csid, MAX_CSID_LEN) == 0) {
+					DEBUGLOG("Doing command on local node only\n");
+					add_to_lvmqueue(thisfd, inheader, len, NULL);
+				} else {
+					DEBUGLOG("Sending message to single node: %s\n",
+						 inheader->node);
+					if (len > MAX_INLINE_MESSAGE) {
+					        send_long_message(thisfd, inheader, len );
+					} else {
+						inheader->xid = thisfd->xid;
+						send_message(inheader, len,
+							     csid, -1,
+							     "Error forwarding message to cluster node");
+					}
+				}
+			}
+		}
+	} else {
+		/* Local explicitly requested, ignore nodes */
+		thisfd->bits.localsock.in_progress = TRUE;
+		thisfd->bits.localsock.expected_replies = 1;
+		thisfd->bits.localsock.num_replies = 0;
+		add_to_lvmqueue(thisfd, inheader, len, NULL);
+	}
+	return 0;
+}
+
+/* Process a command from a remote node and return the result */
+void process_remote_command(struct clvm_header *msg, int msglen, int fd,
+			    char *csid)
+{
+	char *replyargs;
+	char nodename[MAX_CLUSTER_MEMBER_NAME_LEN];
+	int replylen = 0;
+	int buflen = MAX_CLUSTER_MESSAGE - sizeof(struct clvm_header) - 1;
+	int status;
+	int msg_malloced = 0;
+
+	/* Get the node name as we /may/ need it later */
+	name_from_csid(csid, nodename);
+
+	DEBUGLOG("process_remote_command %d for clientid 0x%x on node %s\n",
+		 msg->cmd, msg->clientid, nodename);
+
+	/* Is the data to be found in the system LV ? */
+	if (msg->flags & CLVMD_FLAG_SYSTEMLV) {
+		struct clvm_header *newmsg;
+
+		DEBUGLOG("Reading message from system LV\n");
+		newmsg =
+		    (struct clvm_header *) malloc(msg->arglen +
+						  sizeof(struct clvm_header));
+		if (newmsg) {
+			if (system_lv_read_data
+			    (nodename, (char *) newmsg,
+			     (size_t *) &msglen) == 0) {
+				msg = newmsg;
+				msg_malloced = 1;
+			} else {
+				struct clvm_header head;
+				DEBUGLOG("System LV read failed\n");
+
+				/* Return a failure response */
+				head.cmd = CLVMD_CMD_REPLY;
+				head.status = -EFBIG;
+				head.flags = 0;
+				head.clientid = msg->clientid;
+				head.arglen = 0;
+				head.node[0] = '\0';
+				send_message(&head, sizeof(struct clvm_header),
+					     csid, fd,
+					     "Error sending ENOMEM command reply");
+				return;
+			}
+		} else {
+			struct clvm_header head;
+			DEBUGLOG
+			    ("Error attempting to malloc %d bytes for system LV read\n",
+			     msg->arglen);
+			/* Return a failure response */
+			head.cmd = CLVMD_CMD_REPLY;
+			head.status = -ENOMEM;
+			head.flags = 0;
+			head.clientid = msg->clientid;
+			head.arglen = 0;
+			head.node[0] = '\0';
+			send_message(&head, sizeof(struct clvm_header), csid,
+				     fd, "Error sending ENOMEM command reply");
+			return;
+		}
+	}
+
+	/* Check for GOAWAY and sulk */
+	if (msg->cmd == CLVMD_CMD_GOAWAY) {
+
+		DEBUGLOG("Told to go away by %s\n", nodename);
+		log_error("Told to go away by %s\n", nodename);
+		exit(99);
+	}
+
+	/* Version check is internal - don't bother exposing it in
+	   clvmd-command.c */
+	if (msg->cmd == CLVMD_CMD_VERSION) {
+		int *version_nums = (int *) msg->args;
+		char node[256];
+		name_from_csid(csid, node);
+		DEBUGLOG("Remote node %s is version %d.%d.%d\n",
+			 node,
+			 ntohl(version_nums[0]),
+			 ntohl(version_nums[1]), ntohl(version_nums[2]));
+
+		if (ntohl(version_nums[0]) != CLVMD_MAJOR_VERSION) {
+			struct clvm_header byebyemsg;
+			DEBUGLOG
+			    ("Telling node %s to go away because of incompatible version number\n",
+			     node);
+			log_notice
+			    ("Telling node %s to go away because of incompatible version number %d.%d.%d\n",
+			     node, ntohl(version_nums[0]),
+			     ntohl(version_nums[1]), ntohl(version_nums[2]));
+
+			byebyemsg.cmd = CLVMD_CMD_GOAWAY;
+			byebyemsg.status = 0;
+			byebyemsg.flags = 0;
+			byebyemsg.arglen = 0;
+			byebyemsg.clientid = 0;
+			cluster_send_message(&byebyemsg, sizeof(byebyemsg),
+					     our_csid,
+					     "Error Sending GOAWAY message");
+		} else {
+			add_up_node(csid);
+		}
+		return;
+	}
+
+	/* Allocate a default reply buffer */
+	replyargs = malloc(MAX_CLUSTER_MESSAGE - sizeof(struct clvm_header));
+
+	if (replyargs != NULL) {
+		/* Run the command */
+		status =
+		    do_command(NULL, msg, msglen, &replyargs, buflen,
+			       &replylen);
+	} else {
+		status = -ENOMEM;
+	}
+
+	/* If it wasn't a reply, then reply */
+	if (msg->cmd != CLVMD_CMD_REPLY) {
+		char *aggreply;
+
+		aggreply =
+		    realloc(replyargs, replylen + sizeof(struct clvm_header));
+		if (aggreply) {
+			struct clvm_header *agghead =
+			    (struct clvm_header *) aggreply;
+
+			replyargs = aggreply;
+			/* Move it up so there's room for a header in front of the data */
+			memmove(aggreply + offsetof(struct clvm_header, args),
+				replyargs, replylen);
+
+			agghead->xid = msg->xid;
+
+			/* Use the system LV ? */
+			if (replylen > MAX_INLINE_MESSAGE) {
+				agghead->cmd = CLVMD_CMD_REPLY;
+				agghead->status = status;
+				agghead->flags = CLVMD_FLAG_SYSTEMLV;
+				agghead->clientid = msg->clientid;
+				agghead->arglen = replylen;
+				agghead->node[0] = '\0';
+
+				/* If System LV operation failed then report it as EFBIG but only do it
+				   if the data buffer has something in it. */
+				if (system_lv_write_data
+				    (aggreply,
+				     replylen + sizeof(struct clvm_header)) < 0
+				    && replylen > 0)
+					agghead->status = -EFBIG;
+
+				send_message(agghead,
+					     sizeof(struct clvm_header), csid,
+					     fd,
+					     "Error sending long command reply");
+
+			} else {
+				agghead->cmd = CLVMD_CMD_REPLY;
+				agghead->status = status;
+				agghead->flags = 0;
+				agghead->clientid = msg->clientid;
+				agghead->arglen = replylen;
+				agghead->node[0] = '\0';
+				send_message(aggreply,
+					     sizeof(struct clvm_header) +
+					     replylen + 2, csid, fd,
+					     "Error sending command reply");
+			}
+		} else {
+			struct clvm_header head;
+
+			DEBUGLOG("Error attempting to realloc return buffer\n");
+			/* Return a failure response */
+			head.cmd = CLVMD_CMD_REPLY;
+			head.status = -ENOMEM;
+			head.flags = 0;
+			head.clientid = msg->clientid;
+			head.arglen = 0;
+			head.node[0] = '\0';
+			send_message(&head, sizeof(struct clvm_header), csid,
+				     fd, "Error sending ENOMEM command reply");
+			return;
+		}
+	}
+
+	/* Free buffer if it was malloced */
+	if (msg_malloced) {
+		free(msg);
+	}
+	free(replyargs);
+}
+
+/* Add a reply to a command to the list of replies for this client.
+   If we have got a full set then send them to the waiting client down the local
+   socket */
+static void add_reply_to_list(struct local_client *client, int status,
+			      char *csid, const char *buf, int len)
+{
+	struct node_reply *reply;
+
+	pthread_mutex_lock(&client->bits.localsock.reply_mutex);
+
+	/* Add it to the list of replies */
+	reply = malloc(sizeof(struct node_reply));
+	if (reply) {
+		reply->status = status;
+		name_from_csid(csid, reply->node);
+		DEBUGLOG("Reply from node %s: %d bytes\n", reply->node, len);
+
+		if (len > 0) {
+			reply->replymsg = malloc(len);
+			if (!reply->replymsg) {
+				reply->status = -ENOMEM;
+			} else {
+				memcpy(reply->replymsg, buf, len);
+			}
+		} else {
+			reply->replymsg = NULL;
+		}
+		/* Hook it onto the reply chain */
+		reply->next = client->bits.localsock.replies;
+		client->bits.localsock.replies = reply;
+	} else {
+		/* It's all gone horribly wrong... */
+		pthread_mutex_unlock(&client->bits.localsock.reply_mutex);
+		send_local_reply(client, ENOMEM, client->fd);
+		return;
+	}
+	DEBUGLOG("Got %d replies, expecting: %d\n",
+		 client->bits.localsock.num_replies + 1,
+		 client->bits.localsock.expected_replies);
+
+	/* If we have the whole lot then do the post-process */
+	if (++client->bits.localsock.num_replies ==
+	    client->bits.localsock.expected_replies) {
+		/* Post-process the command */
+		if (client->bits.localsock.threadid) {
+			pthread_mutex_lock(&client->bits.localsock.mutex);
+			client->bits.localsock.state = POST_COMMAND;
+			pthread_cond_signal(&client->bits.localsock.cond);
+			pthread_mutex_unlock(&client->bits.localsock.mutex);
+		}
+	}
+	pthread_mutex_unlock(&client->bits.localsock.reply_mutex);
+}
+
+/* This is the thread that runs the PRE and post commands for a particular connection */
+static void *pre_and_post_thread(void *arg)
+{
+	struct local_client *client = (struct local_client *) arg;
+	int status;
+	sigset_t ss;
+	int pipe_fd = client->bits.localsock.pipe;
+
+	DEBUGLOG("in sub thread: client = %p\n", client);
+
+	/* Ignore SIGUSR1 (handled by master process) but enable
+	   SIGUSR2 (kills subthreads) */
+	sigemptyset(&ss);
+	sigaddset(&ss, SIGUSR1);
+	pthread_sigmask(SIG_BLOCK, &ss, NULL);
+
+	sigdelset(&ss, SIGUSR1);
+	sigaddset(&ss, SIGUSR2);
+	pthread_sigmask(SIG_UNBLOCK, &ss, NULL);
+
+	/* Loop around doing PRE and POST functions until the client goes away */
+	while (!client->bits.localsock.finished) {
+		/* Execute the code */
+		status = do_pre_command(client);
+
+		if (status)
+			client->bits.localsock.all_success = 0;
+
+		DEBUGLOG("Writing status %d down pipe %d\n", status, pipe_fd);
+		/* Tell the parent process we have finished this bit */
+		write(pipe_fd, &status, sizeof(int));
+
+		/* We may need to wait for the condition variable before running the post command */
+		pthread_mutex_lock(&client->bits.localsock.mutex);
+		DEBUGLOG("Waiting to do post command - state = %d\n",
+			 client->bits.localsock.state);
+
+		if (client->bits.localsock.state != POST_COMMAND) {
+			pthread_cond_wait(&client->bits.localsock.cond,
+					  &client->bits.localsock.mutex);
+		}
+		pthread_mutex_unlock(&client->bits.localsock.mutex);
+
+		DEBUGLOG("Got post command condition...\n");
+
+		do_post_command(client);
+
+		write(pipe_fd, &status, sizeof(int));
+
+		if (client->bits.localsock.finished)
+			break;
+
+		DEBUGLOG("Waiting for next pre command\n");
+
+		pthread_mutex_lock(&client->bits.localsock.mutex);
+		if (client->bits.localsock.state != PRE_COMMAND) {
+			pthread_cond_wait(&client->bits.localsock.cond,
+					  &client->bits.localsock.mutex);
+		}
+		pthread_mutex_unlock(&client->bits.localsock.mutex);
+
+		DEBUGLOG("Got pre command condition...\n");
+	}
+	DEBUGLOG("Subthread finished\n");
+	return (void *) 0;
+}
+
+/* Process a command on the local node and store the result */
+static int process_local_command(struct clvm_header *msg, int msglen,
+				 struct local_client *client,
+				 unsigned short xid)
+{
+	char *replybuf = malloc(MAX_CLUSTER_MESSAGE);
+	int buflen = MAX_CLUSTER_MESSAGE - sizeof(struct clvm_header) - 1;
+	int replylen = 0;
+	int status;
+
+	DEBUGLOG("process_local_command: msg=%p, msglen =%d, client=%p\n", msg,
+		 msglen, client);
+	if (replybuf == NULL)
+		return -1;
+
+	status = do_command(client, msg, msglen, &replybuf, buflen, &replylen);
+
+	if (status)
+		client->bits.localsock.all_success = 0;
+
+	/* If we took too long then discard the reply */
+	if (xid == client->xid) {
+		add_reply_to_list(client, status, our_csid, replybuf, replylen);
+	} else {
+		DEBUGLOG
+		    ("Local command took too long, discarding xid %d, current is %d\n",
+		     xid, client->xid);
+	}
+
+	free(replybuf);
+	return status;
+}
+
+static int process_reply(struct clvm_header *msg, int msglen, char *csid)
+{
+	struct local_client *client = NULL;
+
+	client = find_client(msg->clientid);
+	if (!client) {
+		DEBUGLOG("Got message for unknown client 0x%x\n",
+			 msg->clientid);
+		log_error("Got message for unknown client 0x%x\n",
+			  msg->clientid);
+		return -1;
+	}
+
+	if (msg->status)
+		client->bits.localsock.all_success = 0;
+
+	/* Gather replies together for this client id */
+	if (msg->xid == client->xid) {
+		add_reply_to_list(client, msg->status, csid, msg->args,
+				  msg->arglen);
+	} else {
+		DEBUGLOG("Discarding reply with old XID %d, current = %d\n",
+			 msg->xid, client->xid);
+	}
+	return 0;
+}
+
+/* Send an aggregated reply back to the client */
+static void send_local_reply(struct local_client *client, int status, int fd)
+{
+	struct clvm_header *clientreply;
+	struct node_reply *thisreply = client->bits.localsock.replies;
+	char *replybuf;
+	char *ptr;
+	int message_len = 0;
+
+	DEBUGLOG("Send local reply\n");
+
+	/* Work out the total size of the reply */
+	while (thisreply) {
+		if (thisreply->replymsg)
+			message_len += strlen(thisreply->replymsg) + 1;
+		else
+			message_len++;
+
+		message_len += strlen(thisreply->node) + 1 + sizeof(int);
+
+		thisreply = thisreply->next;
+	}
+
+	/* Add in the size of our header */
+	message_len = message_len + sizeof(struct clvm_header) + 1;
+	replybuf = malloc(message_len);
+
+	clientreply = (struct clvm_header *) replybuf;
+	clientreply->status = -status;
+	clientreply->cmd = CLVMD_CMD_REPLY;
+	clientreply->node[0] = '\0';
+
+	ptr = clientreply->args;
+
+	/* Add in all the replies, and free them as we go */
+	thisreply = client->bits.localsock.replies;
+	while (thisreply) {
+		struct node_reply *tempreply = thisreply;
+
+		strcpy(ptr, thisreply->node);
+		ptr += strlen(thisreply->node) + 1;
+
+		*(int *) ptr = thisreply->status;
+		ptr += sizeof(int);
+
+		if (thisreply->replymsg) {
+			strcpy(ptr, thisreply->replymsg);
+			ptr += strlen(thisreply->replymsg) + 1;
+		} else {
+			ptr[0] = '\0';
+			ptr++;
+		}
+		thisreply = thisreply->next;
+
+		if (tempreply->replymsg)
+			free(tempreply->replymsg);
+		free(tempreply);
+	}
+
+	/* Terminate with an empty node name */
+	*ptr = '\0';
+
+	clientreply->arglen = ptr - clientreply->args + 1;
+
+	/* And send it */
+	send_message(replybuf, message_len, our_csid, fd,
+		     "Error sending REPLY to client");
+	free(replybuf);
+
+	/* Reset comms variables */
+	client->bits.localsock.replies = NULL;
+	client->bits.localsock.expected_replies = 0;
+	client->bits.localsock.in_progress = FALSE;
+	client->bits.localsock.sent_out = FALSE;
+}
+
+/* Just free a reply chain baceuse it wasn't used. */
+static void free_reply(struct local_client *client)
+{
+	/* Add in all the replies, and free them as we go */
+	struct node_reply *thisreply = client->bits.localsock.replies;
+	while (thisreply) {
+		struct node_reply *tempreply = thisreply;
+
+		thisreply = thisreply->next;
+
+		if (tempreply->replymsg)
+			free(tempreply->replymsg);
+		free(tempreply);
+	}
+	client->bits.localsock.replies = NULL;
+}
+
+/* Send our version number to the cluster */
+static void send_version_message()
+{
+	char message[sizeof(struct clvm_header) + sizeof(int) * 3];
+	struct clvm_header *msg = (struct clvm_header *) message;
+	int *version_nums = (int *) msg->args;
+
+	msg->cmd = CLVMD_CMD_VERSION;
+	msg->status = 0;
+	msg->flags = 0;
+	msg->clientid = 0;
+	msg->arglen = sizeof(int) * 3;
+
+	version_nums[0] = htonl(CLVMD_MAJOR_VERSION);
+	version_nums[1] = htonl(CLVMD_MINOR_VERSION);
+	version_nums[2] = htonl(CLVMD_PATCH_VERSION);
+
+	cluster_send_message(message, sizeof(message), NULL,
+			     "Error Sending version number");
+}
+
+/* Send a message to either a local client or another server */
+static int send_message(void *buf, int msglen, char *csid, int fd,
+			const char *errtext)
+{
+	int len;
+
+	/* Send remote messages down the cluster socket */
+	if (csid == NULL || !ISLOCAL_CSID(csid)) {
+		hton_clvm((struct clvm_header *) buf);	/* Byte swap if necessary */
+		return cluster_send_message(buf, msglen, csid, errtext);
+	} else {
+		int ptr = 0;
+
+		/* Make sure it all goes */
+		do {
+			len = write(fd, buf + ptr, msglen - ptr);
+
+			if (len <= 0) {
+				log_error(errtext);
+				break;
+			}
+			ptr += len;
+		} while (len < msglen);
+	}
+	return len;
+}
+
+static int process_work_item(struct lvm_thread_cmd *cmd)
+{
+	if (!cmd->remote) {
+		DEBUGLOG("process_work_item: local\n");
+		process_local_command(cmd->msg, cmd->msglen, cmd->client,
+				      cmd->xid);
+	} else {
+		DEBUGLOG("process_work_item: remote\n");
+		process_remote_command(cmd->msg, cmd->msglen, cmd->client->fd,
+				       cmd->csid);
+	}
+	return 0;
+}
+
+/*
+ * Routine that runs in the "LVM thread".
+ */
+static void *lvm_thread_fn(void *arg)
+{
+	struct list *cmdl, *tmp;
+	sigset_t ss;
+
+	DEBUGLOG("LVM thread function started\n");
+	pthread_mutex_lock(&lvm_thread_mutex);
+
+	/* Ignore SIGUSR1 & 2 */
+	sigemptyset(&ss);
+	sigaddset(&ss, SIGUSR1);
+	sigaddset(&ss, SIGUSR2);
+	pthread_sigmask(SIG_BLOCK, &ss, NULL);
+
+	/* Initialise the interface to liblvm */
+	init_lvm();
+	pthread_mutex_unlock(&lvm_thread_mutex);
+
+	/* Now wait for some actual work */
+	for (;;) {
+		DEBUGLOG("LVM thread waiting for work\n");
+
+		pthread_mutex_lock(&lvm_thread_mutex);
+		if (list_empty(&lvm_cmd_head))
+			pthread_cond_wait(&lvm_thread_cond, &lvm_thread_mutex);
+
+		list_iterate_safe(cmdl, tmp, &lvm_cmd_head) {
+			struct lvm_thread_cmd *cmd;
+
+			cmd =
+			    list_struct_base(cmdl, struct lvm_thread_cmd, list);
+			list_del(&cmd->list);
+			pthread_mutex_unlock(&lvm_thread_mutex);
+
+			process_work_item(cmd);
+			free(cmd->msg);
+			free(cmd);
+
+			pthread_mutex_lock(&lvm_thread_mutex);
+		}
+		pthread_mutex_unlock(&lvm_thread_mutex);
+	}
+}
+
+/* Pass down some work to the LVM thread */
+static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg,
+			   int msglen, char *csid)
+{
+	struct lvm_thread_cmd *cmd;
+
+	cmd = malloc(sizeof(struct lvm_thread_cmd));
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->msg = malloc(msglen);
+	if (!cmd->msg) {
+		log_error("Unable to allocate buffer space\n");
+		free(cmd);
+		return -1;
+	}
+
+	cmd->client = client;
+	cmd->msglen = msglen;
+	cmd->xid = client->xid;
+	memcpy(cmd->msg, msg, msglen);
+	if (csid) {
+		memcpy(cmd->csid, csid, MAX_CSID_LEN);
+		cmd->remote = 1;
+	} else {
+		cmd->remote = 0;
+	}
+
+	DEBUGLOG
+	    ("add_to_lvmqueue: cmd=%p. client=%p, msg=%p, len=%d, csid=%p, xid=%d\n",
+	     cmd, client, msg, msglen, csid, cmd->xid);
+	pthread_mutex_lock(&lvm_thread_mutex);
+	list_add(&lvm_cmd_head, &cmd->list);
+	pthread_cond_signal(&lvm_thread_cond);
+	pthread_mutex_unlock(&lvm_thread_mutex);
+
+	return 0;
+}
+
+/* Open the local socket, that's the one we talk to libclvm down */
+static int open_local_sock()
+{
+	int local_socket;
+	struct sockaddr_un sockaddr;
+
+	/* Open local socket */
+	if (CLVMD_SOCKNAME[0] != '\0')
+		unlink(CLVMD_SOCKNAME);
+	local_socket = socket(PF_UNIX, SOCK_STREAM, 0);
+	if (local_socket < 0) {
+		log_error("Can't create local socket: %m");
+		return -1;
+	}
+
+	memset(&sockaddr, 0, sizeof(sockaddr));
+	memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
+	sockaddr.sun_family = AF_UNIX;
+	if (bind(local_socket, (struct sockaddr *) &sockaddr, sizeof(sockaddr))) {
+		log_error("can't bind local socket: %m");
+		close(local_socket);
+		return -1;
+	}
+	if (listen(local_socket, 1) != 0) {
+		log_error("listen local: %m");
+		close(local_socket);
+		return -1;
+	}
+	if (CLVMD_SOCKNAME[0] != '\0')
+		chmod(CLVMD_SOCKNAME, 0600);
+
+	return local_socket;
+}
+
+static void check_all_callback(struct local_client *client, char *csid,
+			       int node_up)
+{
+	if (!node_up)
+		add_reply_to_list(client, -EHOSTDOWN, csid, "CLVMD not running",
+				  18);
+}
+
+/* Check to see if all CLVMDs are running (ie one on
+   every node in the cluster).
+   If not, returns -1 and prints out a list of errant nodes */
+static int check_all_clvmds_running(struct local_client *client)
+{
+	DEBUGLOG("check_all_clvmds_running\n");
+	return cluster_do_node_callback(client, check_all_callback);
+}
+
+/* Return a local_client struct given a client ID.
+   client IDs are in network byte order */
+static struct local_client *find_client(int clientid)
+{
+	struct local_client *thisfd;
+	for (thisfd = &local_client_head; thisfd != NULL; thisfd = thisfd->next) {
+		if (thisfd->fd == ntohl(clientid))
+			return thisfd;
+	}
+	return NULL;
+}
+
+/* Byte-swapping routines for the header so we
+   work in a heterogeneous environment */
+static void hton_clvm(struct clvm_header *hdr)
+{
+	hdr->status = htonl(hdr->status);
+	hdr->arglen = htonl(hdr->arglen);
+	hdr->xid = htons(hdr->xid);
+	/* Don't swap clientid as it's only a token as far as
+	   remote nodes are concerned */
+}
+
+static void ntoh_clvm(struct clvm_header *hdr)
+{
+	hdr->status = ntohl(hdr->status);
+	hdr->arglen = ntohl(hdr->arglen);
+	hdr->xid = ntohs(hdr->xid);
+}
+
+/* Handler for SIGUSR2 - sent to kill subthreads */
+static void sigusr2_handler(int sig)
+{
+	DEBUGLOG("SIGUSR2 received\n");
+	pthread_exit((void *) -1);
+	return;
+}
+
+static void sigterm_handler(int sig)
+{
+	DEBUGLOG("SIGTERM received\n");
+	quit = 1;
+	return;
+}
diff --git a/daemons/clvmd/clvmd.h b/daemons/clvmd/clvmd.h
new file mode 100644
index 000000000..46e53c4b0
--- /dev/null
+++ b/daemons/clvmd/clvmd.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _CLVMD_H
+#define _CLVMD_H
+
+#define CLVMD_MAJOR_VERSION 0
+#define CLVMD_MINOR_VERSION 2
+#define CLVMD_PATCH_VERSION 1
+
+/* Name of the cluster LVM admin lock */
+#define ADMIN_LOCK_NAME "CLVMD_ADMIN"
+
+/* Default time (in seconds) we will wait for all remote commands to execute
+   before declaring them dead */
+#define DEFAULT_CMD_TIMEOUT 60
+
+/* One of these for each reply we get from command execution on a node */
+struct node_reply {
+	char node[MAX_CLUSTER_MEMBER_NAME_LEN];
+	char *replymsg;
+	int status;
+	struct node_reply *next;
+};
+
+/*
+ * These exist for the use of local sockets only when we are
+ * collecting responses from all cluster nodes
+ */
+struct localsock_bits {
+	struct node_reply *replies;
+	int num_replies;
+	int expected_replies;
+	time_t sent_time;	/* So we can check for timeouts */
+	int in_progress;	/* Only execute one cmd at a time per client */
+	int sent_out;		/* Flag to indicate that a command was sent
+				   to remote nodes */
+	void *private;		/* Private area for command processor use */
+	void *cmd;		/* Whole command as passed down local socket */
+	int cmd_len;		/* Length of above */
+	int pipe;		/* Pipe to send PRE completion status down */
+	int finished;		/* Flag to tell subthread to exit */
+	int all_success;	/* Set to 0 if any node (or the pre_command)
+				   failed */
+	struct local_client *pipe_client;
+	pthread_t threadid;
+	enum { PRE_COMMAND, POST_COMMAND, QUIT } state;
+	pthread_mutex_t mutex;	/* Main thread and worker synchronisation */
+	pthread_cond_t cond;
+
+	pthread_mutex_t reply_mutex;	/* Protect reply structure */
+};
+
+/* Entries for PIPE clients */
+struct pipe_bits {
+	struct local_client *client;	/* Actual (localsock) client */
+	pthread_t threadid;		/* Our own copy of the thread id */
+};
+
+/* Entries for Network socket clients */
+struct netsock_bits {
+	void *private;
+	int flags;
+};
+
+typedef int (*fd_callback_t) (struct local_client * fd, char *buf, int len,
+			      char *csid, struct local_client ** new_client);
+
+/* One of these for each fd we are listening on */
+struct local_client {
+	int fd;
+	enum { CLUSTER_MAIN_SOCK, CLUSTER_DATA_SOCK, LOCAL_RENDEZVOUS,
+		    LOCAL_SOCK, THREAD_PIPE, CLUSTER_INTERNAL } type;
+	struct local_client *next;
+	unsigned short xid;
+	fd_callback_t callback;
+
+	union {
+		struct localsock_bits localsock;
+		struct pipe_bits pipe;
+		struct netsock_bits net;
+	} bits;
+};
+
+#ifdef DEBUG
+#define DEBUGLOG(fmt, args...) fprintf(stderr, "CLVMD[%d]: %ld ", getpid(), time(NULL) ); fprintf(stderr, fmt, ## args)
+#else
+#define DEBUGLOG(fmt, args...)
+#endif
+
+#ifndef max
+#define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+/* The real command processor is in clvmd-command.c */
+extern int do_command(struct local_client *client, struct clvm_header *msg,
+		      int msglen, char **buf, int buflen, int *retlen);
+
+/* Pre and post command routines are called only on the local node */
+extern int do_pre_command(struct local_client *client);
+extern int do_post_command(struct local_client *client);
+
+extern int add_client(struct local_client *new_client);
+
+extern void clvmd_cluster_init_completed(void);
+
+#endif
diff --git a/daemons/clvmd/cnxman-socket.h b/daemons/clvmd/cnxman-socket.h
new file mode 100644
index 000000000..8ae44d85c
--- /dev/null
+++ b/daemons/clvmd/cnxman-socket.h
@@ -0,0 +1,226 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/* CMAN socket interface header,
+   may be include by user or kernel code */
+
+#ifndef __CNXMAN_SOCKET_H
+#define __CNXMAN_SOCKET_H
+
+/* Just made these up but the address family must be less than 32 (NPROTO) */
+#define AF_CLUSTER 31
+#define PF_CLUSTER AF_CLUSTER
+
+/* Protocol(socket) types */
+#define CLPROTO_MASTER 2
+#define CLPROTO_CLIENT 3
+
+/* Setsockopt -- maybe should be ioctls?? */
+#define CLU_SET_MULTICAST  100
+#define CLU_JOIN_CLUSTER   101
+#define CLU_LEAVE_CLUSTER  102
+#define CLU_SET_RCVONLY    103
+#define CLU_SET_UNICAST    104
+#define KCL_SET_MULTICAST  105
+#define KCL_SET_RCVONLY    106
+#define KCL_SET_UNICAST    107
+#define KCL_SET_NODENAME   108
+#define CLU_SET_NODENAME   109
+
+/* ioctls -- should register these properly */
+#define SIOCCLUSTER_NOTIFY            _IOW('x', 0x01, int)
+#define SIOCCLUSTER_REMOVENOTIFY      _IO( 'x', 0x02)
+#define SIOCCLUSTER_GETMEMBERS        _IOR('x', 0x03, struct cl_cluster_nodelist)
+#define SIOCCLUSTER_SETEXPECTED_VOTES _IOW('x', 0x04, int)
+#define SIOCCLUSTER_ISQUORATE         _IO( 'x', 0x05)
+#define SIOCCLUSTER_ISLISTENING       _IOW('x', 0x06, struct cl_listen_request)
+#define SIOCCLUSTER_GETALLMEMBERS     _IOR('x', 0x07, struct cl_cluster_nodelist)
+#define SIOCCLUSTER_SET_VOTES         _IOW('x', 0x08, int)
+#define SIOCCLUSTER_GET_VERSION       _IOR('x', 0x09, struct cl_version)
+#define SIOCCLUSTER_SET_VERSION       _IOW('x', 0x0a, struct cl_version)
+#define SIOCCLUSTER_ISACTIVE          _IO( 'x', 0x0b)
+#define SIOCCLUSTER_KILLNODE          _IOW('x', 0x0c, int)
+#define SIOCCLUSTER_GET_JOINCOUNT     _IO( 'x', 0x0d)
+#define SIOCCLUSTER_SERVICE_REGISTER  _IOW('x', 0x0e, char)
+#define SIOCCLUSTER_SERVICE_UNREGISTER _IO('x', 0x0f)
+#define SIOCCLUSTER_SERVICE_JOIN      _IO( 'x', 0x10)
+#define SIOCCLUSTER_SERVICE_LEAVE     _IO( 'x', 0x20)
+#define SIOCCLUSTER_SERVICE_SETSIGNAL _IOW('x', 0x30, int)
+#define SIOCCLUSTER_SERVICE_STARTDONE _IOW('x', 0x40, unsigned int)
+#define SIOCCLUSTER_SERVICE_GETEVENT  _IOR('x', 0x50, struct cl_service_event)
+#define SIOCCLUSTER_SERVICE_GETMEMBERS _IOR('x', 0x60, struct cl_cluster_node)
+#define SIOCCLUSTER_SERVICE_GLOBALID  _IOR('x', 0x70, uint32_t)
+#define SIOCCLUSTER_SERVICE_SETLEVEL  _IOR('x', 0x80, int)
+#define SIOCCLUSTER_GETNODE	      _IOWR('x', 0x90, struct cl_cluster_node)
+#define SIOCCLUSTER_BARRIER           _IOW('x', 0x0a0, struct cl_barrier_info)
+
+/* Maximum size of a cluster message */
+#define MAX_CLUSTER_MESSAGE          1500
+#define MAX_CLUSTER_MEMBER_NAME_LEN   255
+#define MAX_BARRIER_NAME_LEN           33
+#define MAX_SA_ADDR_LEN                12
+#define MAX_CLUSTER_NAME_LEN           16
+
+/* Well-known cluster port numbers */
+#define CLUSTER_PORT_MEMBERSHIP  1	/* Mustn't block during cluster
+					 * transitions! */
+#define CLUSTER_PORT_SERVICES    2
+#define CLUSTER_PORT_SYSMAN      10	/* Remote execution daemon */
+#define CLUSTER_PORT_CLVMD       11	/* Cluster LVM daemon */
+#define CLUSTER_PORT_SLM         12	/* LVM SLM (simple lock manager) */
+
+/* Port numbers above this will be blocked when the cluster is inquorate or in
+ * transition */
+#define HIGH_PROTECTED_PORT      9
+
+/* Reasons for leaving the cluster */
+#define CLUSTER_LEAVEFLAG_DOWN     0	/* Normal shutdown */
+#define CLUSTER_LEAVEFLAG_KILLED   1
+#define CLUSTER_LEAVEFLAG_PANIC    2
+#define CLUSTER_LEAVEFLAG_REMOVED  3	/* This one can reduce quorum */
+#define CLUSTER_LEAVEFLAG_REJECTED 4	/* Not allowed into the cluster in the
+					 * first place */
+#define CLUSTER_LEAVEFLAG_INCONSISTENT 5	/* Our view of the cluster is
+						 * in a minority */
+#define CLUSTER_LEAVEFLAG_DEAD         6	/* Discovered to be dead */
+#define CLUSTER_LEAVEFLAG_FORCE     0x10	/* Forced by command-line */
+
+/* OOB messages sent to a local socket */
+#define CLUSTER_OOB_MSG_PORTCLOSED  1
+#define CLUSTER_OOB_MSG_STATECHANGE 2
+#define CLUSTER_OOB_MSG_SERVICEEVENT 3
+
+/* Sendmsg flags, these are above the normal sendmsg flags so they don't
+ * interfere */
+#define MSG_NOACK     0x010000	/* Don't need an ACK for this message */
+#define MSG_QUEUE     0x020000	/* Queue the message for sending later */
+#define MSG_MULTICAST 0x080000	/* Message was sent to all nodes in the cluster
+				 */
+#define MSG_ALLINT    0x100000	/* Send out of all interfaces */
+
+typedef enum { NODESTATE_REMOTEMEMBER, NODESTATE_JOINING, NODESTATE_MEMBER,
+	    NODESTATE_DEAD } nodestate_t;
+
+
+struct sockaddr_cl {
+	unsigned short scl_family;
+	unsigned char scl_flags;
+	unsigned char scl_port;
+	int           scl_nodeid;
+};
+
+/* This is how we pass the multicast socket into kernel space. addr is the
+ * multicast address to use in the address family of the socket (eg for UDP it
+ * might be 255.255.255.0) */
+struct cl_multicast_sock {
+	int fd;			/* FD of master socket to do multicast on */
+	int number;		/* Socket number, to match up recvonly & bcast
+				 * sockets */
+};
+
+/* Cluster configuration info passed when we join the cluster */
+struct cl_join_cluster_info {
+	unsigned char votes;
+	unsigned int expected_votes;
+	unsigned int two_node;
+	unsigned int config_version;
+
+        char cluster_name[17];
+};
+
+
+/* This is the structure, per node, returned from the membership ioctl */
+struct cl_cluster_node {
+	unsigned int size;
+	unsigned int node_id;
+	unsigned int us;
+	unsigned int leave_reason;
+	unsigned int incarnation;
+	nodestate_t state;
+	char name[MAX_CLUSTER_MEMBER_NAME_LEN];
+	unsigned char votes;
+};
+
+/* The struct passed to the membership ioctls */
+struct cl_cluster_nodelist {
+        uint32_t max_members;
+        struct cl_cluster_node *nodes;
+};
+
+/* Structure passed to SIOCCLUSTER_ISLISTENING */
+struct cl_listen_request {
+	unsigned char port;
+        int           nodeid;
+};
+
+/* A Cluster PORTCLOSED message - received by a local user as an OOB message */
+struct cl_portclosed_oob {
+	unsigned char cmd;	/* CLUSTER_OOB_MSG_PORTCLOSED */
+	unsigned char port;
+};
+
+/* Get all version numbers or set the config version */
+struct cl_version {
+	unsigned int major;
+	unsigned int minor;
+	unsigned int patch;
+	unsigned int config;
+};
+
+/* structure passed to barrier ioctls */
+struct cl_barrier_info {
+	char cmd;
+	char name[MAX_BARRIER_NAME_LEN];
+	unsigned int flags;
+	unsigned long arg;
+};
+
+typedef enum { SERVICE_EVENT_STOP, SERVICE_EVENT_START, SERVICE_EVENT_FINISH,
+		SERVICE_EVENT_LEAVEDONE } service_event_t;
+
+typedef enum { SERVICE_START_FAILED, SERVICE_START_JOIN, SERVICE_START_LEAVE }
+		service_start_t;
+
+struct cl_service_event {
+	service_event_t type;
+	service_start_t start_type;
+	unsigned int event_id;
+	unsigned int last_stop;
+	unsigned int last_start;
+	unsigned int last_finish;
+	unsigned int node_count;
+};
+
+
+/* Commands to the barrier ioctl */
+#define BARRIER_IOCTL_REGISTER 1
+#define BARRIER_IOCTL_CHANGE   2
+#define BARRIER_IOCTL_DELETE   3
+#define BARRIER_IOCTL_WAIT     4
+
+/* Attributes of a barrier - bitmask */
+#define BARRIER_ATTR_AUTODELETE 1
+#define BARRIER_ATTR_MULTISTEP  2
+#define BARRIER_ATTR_MANUAL     4
+#define BARRIER_ATTR_ENABLED    8
+#define BARRIER_ATTR_CALLBACK  16
+
+/* Attribute setting commands */
+#define BARRIER_SETATTR_AUTODELETE 1
+#define BARRIER_SETATTR_MULTISTEP  2
+#define BARRIER_SETATTR_ENABLED    3
+#define BARRIER_SETATTR_NODES      4
+#define BARRIER_SETATTR_CALLBACK   5
+#define BARRIER_SETATTR_TIMEOUT    6
+
+#endif
diff --git a/daemons/clvmd/libclvm.c b/daemons/clvmd/libclvm.c
new file mode 100644
index 000000000..085e57ee3
--- /dev/null
+++ b/daemons/clvmd/libclvm.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 1997-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* library functions for Cluster LVM Daemon */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <search.h>
+#include <errno.h>
+
+#include "clvm.h"
+#include "libclvm.h"
+
+/* CLVM in hex! */
+#define LVM_SIGNATURE 0x434C564D
+
+#define MAX_CLUSTER_MEMBER_NAME_LEN 255
+
+/* NOTE: the LVMD uses the socket FD as the client ID, this means
+   that any client that calls fork() will inherit the context of
+   it's parent. */
+static int clvmd_sock = -1;
+
+static int open_local_sock(void)
+{
+	int local_socket;
+	struct sockaddr_un sockaddr;
+
+	/* Open local socket */
+	local_socket = socket(PF_UNIX, SOCK_STREAM, 0);
+	if (local_socket < 0) {
+		perror("Can't create local socket");
+		return -1;
+	}
+
+	fcntl(local_socket, F_SETFD, !FD_CLOEXEC);
+
+	strcpy(sockaddr.sun_path, CLVMD_SOCKNAME);
+	sockaddr.sun_family = AF_UNIX;
+	if (connect
+	    (local_socket, (struct sockaddr *) &sockaddr, sizeof(sockaddr))) {
+		int saved_errno = errno;
+
+		close(local_socket);
+
+		errno = saved_errno;
+		return -1;
+	}
+	return local_socket;
+}
+
+/* Send a request and return the status */
+static int send_request(char *inbuf, int inlen, char **retbuf)
+{
+	char outbuf[PIPE_BUF];
+	struct clvm_header *outheader = (struct clvm_header *) outbuf;
+	int len;
+	int off;
+	fd_set fds;
+
+	FD_ZERO(&fds);
+	FD_SET(clvmd_sock, &fds);
+
+	/* Send it to CLVMD */
+	if (write(clvmd_sock, inbuf, inlen) != inlen) {
+		perror("Error writing to CLVMD");
+		return -1;
+	}
+
+	/* Get the response */
+	if ((len = read(clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
+		perror("Error reading CLVMD");
+		return -1;
+	}
+	if (len == 0) {
+		fprintf(stderr, "EOF reading CLVMD");
+		errno = ENOTCONN;
+		return -1;
+	}
+
+	/* Allocate buffer */
+	*retbuf = malloc(len + outheader->arglen);
+	if (!*retbuf) {
+		errno = ENOMEM;
+		return -1;
+	}
+
+	/* Copy the header */
+	memcpy(*retbuf, outbuf, len);
+	outheader = (struct clvm_header *) *retbuf;
+
+	/* Read the returned values */
+	off = 1;		/* we've already read the first byte */
+
+	while (off < outheader->arglen && len > 0) {
+		len = read(clvmd_sock, outheader->args + off, PIPE_BUF);
+		if (len > 0)
+			off += len;
+	}
+
+	/* Was it an error ? */
+	if (outheader->status < 0) {
+		errno = -outheader->status;
+		return -2;
+	}
+	return 0;
+}
+
+/* Build the structure header and parse-out wildcard node names */
+static void build_header(struct clvm_header *head, int cmd, const char *node,
+			 void *data, int len)
+{
+	head->cmd = cmd;
+	head->status = 0;
+	head->flags = 0;
+	head->clientid = 0;
+	head->arglen = len;
+	if (node) {
+		/* Allow a couple of special node names:
+		   "*" for all nodes,
+		   "." for the local node only
+		 */
+		if (strcmp(node, "*") == 0) {
+			head->node[0] = '\0';
+		} else if (strcmp(node, ".") == 0) {
+			head->node[0] = '\0';
+			head->flags = CLVMD_FLAG_LOCAL;
+		} else {
+			strcpy(head->node, node);
+		}
+	} else {
+		head->node[0] = '\0';
+	}
+}
+
+/* Send a message to a(or all) node(s) in the cluster */
+int lvm_cluster_write(char cmd, char *node, void *data, int len)
+{
+	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
+	char *retbuf = NULL;
+	int status;
+	struct clvm_header *head = (struct clvm_header *) outbuf;
+
+	if (clvmd_sock == -1)
+		clvmd_sock = open_local_sock();
+	if (clvmd_sock == -1)
+		return -1;
+
+	build_header(head, cmd, node, data, len);
+	memcpy(head->node + strlen(head->node) + 1, data, len);
+
+	status =
+	    send_request(outbuf,
+			 sizeof(struct clvm_header) + strlen(head->node) + len,
+			 &retbuf);
+	if (retbuf)
+		free(retbuf);
+
+	return status;
+}
+
+/* API: Send a message to a(or all) node(s) in the cluster
+   and wait for replies */
+int lvm_cluster_request(char cmd, const char *node, void *data, int len,
+			lvm_response_t ** response, int *num)
+{
+	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
+	int *outptr;
+	char *inptr;
+	char *retbuf = NULL;
+	int status;
+	int i;
+	int num_responses = 0;
+	struct clvm_header *head = (struct clvm_header *) outbuf;
+	lvm_response_t *rarray;
+
+	*num = 0;
+
+	if (clvmd_sock == -1)
+		clvmd_sock = open_local_sock();
+	if (clvmd_sock == -1)
+		return -1;
+
+	build_header(head, cmd, node, data, len);
+	memcpy(head->node + strlen(head->node) + 1, data, len);
+
+	status =
+	    send_request(outbuf,
+			 sizeof(struct clvm_header) + strlen(head->node) + len,
+			 &retbuf);
+	if (status == 0 || status == -2) {
+		/* Count the number of responses we got */
+		head = (struct clvm_header *) retbuf;
+		inptr = head->args;
+		while (inptr[0]) {
+			num_responses++;
+			inptr += strlen(inptr) + 1;
+			inptr += sizeof(int);
+			inptr += strlen(inptr) + 1;
+		}
+
+		/* Allocate response array. With an extra pair of INTs on the front to sanity
+		   check the pointer when we are given it back to free */
+		outptr =
+		    malloc(sizeof(lvm_response_t) * num_responses +
+			   sizeof(int) * 2);
+		if (!outptr) {
+			if (retbuf)
+				free(retbuf);
+			errno = ENOMEM;
+			return -1;
+		}
+
+		*response = (lvm_response_t *) (outptr + 2);
+		outptr[0] = LVM_SIGNATURE;
+		outptr[1] = num_responses;
+		rarray = *response;
+
+		/* Unpack the response into an lvm_response_t array */
+		inptr = head->args;
+		i = 0;
+		while (inptr[0]) {
+			strcpy(rarray[i].node, inptr);
+			inptr += strlen(inptr) + 1;
+
+			rarray[i].status = *(int *) inptr;
+			inptr += sizeof(int);
+
+			rarray[i].response = malloc(strlen(inptr) + 1);
+			if (rarray[i].response == NULL) {
+				/* Free up everything else and return error */
+				int j;
+				for (j = 0; j < i; j++)
+					free(rarray[i].response);
+				free(outptr);
+				errno = ENOMEM;
+				return -1;
+			}
+
+			strcpy(rarray[i].response, inptr);
+			rarray[i].len = strlen(inptr);
+			inptr += strlen(inptr) + 1;
+			i++;
+		}
+		*num = num_responses;
+		*response = rarray;
+	}
+
+	if (retbuf)
+		free(retbuf);
+	return status;
+}
+
+/* API: Free reply array */
+int lvm_cluster_free_request(lvm_response_t * response)
+{
+	int *ptr = (int *) response - 2;
+	int i;
+	int num;
+
+	/* Check it's ours to free */
+	if (response == NULL || *ptr != LVM_SIGNATURE) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	num = ptr[1];
+	for (i = 0; i < num; i++) {
+		free(response[i].response);
+	}
+	free(ptr);
+
+	return 0;
+}
+
+/* These are a "higher-level" API providing black-box lock/unlock
+   functions for cluster LVM...maybe */
+
+/* Set by lock(), used by unlock() */
+static int num_responses;
+static lvm_response_t *response;
+
+int lvm_lock_for_cluster(char scope, char *name, int verbosity)
+{
+	int status;
+	int i;
+	char *args;
+	int len;
+
+	if (name) {
+		len = strlen(name) + 2;
+		args = alloca(len);
+		strcpy(args + 1, name);
+	} else {
+		len = 2;
+		args = alloca(len);
+		args[1] = '\0';
+	}
+	args[0] = scope;
+
+	status = lvm_cluster_request(CLVMD_CMD_LOCK,
+				     "", args, len, &response, &num_responses);
+
+	/* If any nodes were down then display them and return an error */
+	for (i = 0; i < num_responses; i++) {
+		if (response[i].status == -EHOSTDOWN) {
+			if (verbosity)
+				fprintf(stderr,
+					"clvmd not running on node %s\n",
+					response[i].node);
+			status = -1;
+		}
+	}
+
+	/* If there was an error then free the memory now as the caller won't
+	   want to do the unlock */
+	if (status) {
+		int saved_errno = errno;
+		lvm_cluster_free_request(response);
+		num_responses = 0;
+		errno = saved_errno;
+	}
+	return status;
+}
+
+int lvm_unlock_for_cluster(char scope, char *name, int verbosity)
+{
+	int status;
+	int i;
+	int len;
+	int failed;
+	int num_unlock_responses;
+	char *args;
+	lvm_response_t *unlock_response;
+
+	/* We failed - this should not have been called */
+	if (num_responses == 0)
+		return 0;
+
+	if (name) {
+		len = strlen(name) + 2;
+		args = alloca(len);
+		strcpy(args + 1, name);
+	} else {
+		len = 2;
+		args = alloca(len);
+		args[1] = '\0';
+	}
+	args[0] = scope;
+
+	/* See if it failed anywhere */
+	failed = 0;
+	for (i = 0; i < num_responses; i++) {
+		if (response[i].status != 0)
+			failed++;
+	}
+
+	/* If it failed on any nodes then we only unlock on
+	   the nodes that succeeded */
+	if (failed) {
+		for (i = 0; i < num_responses; i++) {
+			/* Unlock the ones that succeeded */
+			if (response[i].status == 0) {
+				status = lvm_cluster_request(CLVMD_CMD_UNLOCK,
+							     response[i].node,
+							     args, len,
+							     &unlock_response,
+							     &num_unlock_responses);
+				if (status) {
+					if (verbosity)
+						fprintf(stderr,
+							"cluster command to node %s failed: %s\n",
+							response[i].node,
+							strerror(errno));
+				} else if (unlock_response[0].status != 0) {
+					if (verbosity > 1)
+						fprintf(stderr,
+							"unlock on node %s failed: %s\n",
+							response[i].node,
+							strerror(unlock_response
+								 [0].status));
+				}
+				lvm_cluster_free_request(unlock_response);
+			} else {
+				if (verbosity)
+					fprintf(stderr,
+						"command on node %s failed: '%s' - will be left locked\n",
+						response[i].node,
+						strerror(response[i].status));
+			}
+		}
+	} else {
+		/* All OK, we can do a full cluster unlock */
+		status = lvm_cluster_request(CLVMD_CMD_UNLOCK,
+					     "",
+					     args, len,
+					     &unlock_response,
+					     &num_unlock_responses);
+		if (status) {
+			if (verbosity > 1)
+				fprintf(stderr, "cluster command failed: %s\n",
+					strerror(errno));
+		} else {
+			for (i = 0; i < num_unlock_responses; i++) {
+				if (unlock_response[i].status != 0) {
+					if (verbosity > 1)
+						fprintf(stderr,
+							"unlock on node %s failed: %s\n",
+							response[i].node,
+							strerror(unlock_response
+								 [0].status));
+				}
+			}
+		}
+		lvm_cluster_free_request(unlock_response);
+	}
+	lvm_cluster_free_request(response);
+
+	return 0;
+}
diff --git a/daemons/clvmd/libclvm.h b/daemons/clvmd/libclvm.h
new file mode 100644
index 000000000..bd735ce46
--- /dev/null
+++ b/daemons/clvmd/libclvm.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 1997-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LIBCLVM_H
+#define _LIBCLVM_H
+
+typedef struct lvm_response {
+	char node[255];
+	char *response;
+	int status;
+	int len;
+
+} lvm_response_t;
+
+extern int lvm_cluster_request(char cmd, const char *node, void *data, int len,
+			       lvm_response_t ** response, int *num);
+extern int lvm_cluster_write(char cmd, char *node, void *data, int len);
+extern int lvm_cluster_free_request(lvm_response_t * response);
+
+/* The "high-level" API */
+extern int lvm_lock_for_cluster(char scope, char *name, int verbosity);
+extern int lvm_unlock_for_cluster(char scope, char *name, int verbosity);
+
+#endif
diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c
new file mode 100644
index 000000000..400d33ff5
--- /dev/null
+++ b/daemons/clvmd/lvm-functions.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <syslog.h>
+#include <assert.h>
+
+#include "libdlm.h"
+#include "clvm.h"
+#include "clvmd-comms.h"
+#include "clvmd.h"
+#include "lvm-functions.h"
+
+/* LVM2 headers */
+#include "toolcontext.h"
+#include "log.h"
+#include "activate.h"
+#include "hash.h"
+#include "locking.h"
+
+static struct cmd_context *cmd = NULL;
+static struct hash_table *lv_hash = NULL;
+
+struct lv_info {
+	int lock_id;
+	int lock_mode;
+};
+
+/* Return the mode a lock is currently held at (or -1 if not held) */
+static int get_current_lock(char *resource)
+{
+	struct lv_info *lvi;
+
+	lvi = hash_lookup(lv_hash, resource);
+	if (lvi) {
+		return lvi->lock_mode;
+	} else {
+		return -1;
+	}
+}
+
+/* Called at shutdown to tidy the lockspace */
+void unlock_all()
+{
+	struct hash_node *v;
+	hash_iterate(v, lv_hash) {
+		struct lv_info *lvi = hash_get_data(lv_hash, v);
+
+		sync_unlock(hash_get_key(lv_hash, v), lvi->lock_id);
+	}
+}
+
+/* Gets a real lock and keeps the info in the hash table */
+int hold_lock(char *resource, int mode, int flags)
+{
+	int status;
+	int saved_errno;
+	struct lv_info *lvi;
+
+	flags &= LKF_NOQUEUE;	/* Only LKF_NOQUEUE is valid here */
+
+	lvi = hash_lookup(lv_hash, resource);
+	if (lvi) {
+		/* Already exists - convert it */
+		status =
+		    sync_lock(resource, mode, LKF_CONVERT | flags,
+			      &lvi->lock_id);
+		saved_errno = errno;
+		if (!status)
+			lvi->lock_mode = mode;
+
+		if (status) {
+			DEBUGLOG("hold_lock. convert to %d failed: %s\n", mode,
+				 strerror(errno));
+		}
+		errno = saved_errno;
+	} else {
+		lvi = malloc(sizeof(struct lv_info));
+		if (!lvi)
+			return -1;
+
+		lvi->lock_mode = mode;
+		status = sync_lock(resource, mode, flags, &lvi->lock_id);
+		saved_errno = errno;
+		if (status) {
+			free(lvi);
+			DEBUGLOG("hold_lock. lock at %d failed: %s\n", mode,
+				 strerror(errno));
+		} else {
+			hash_insert(lv_hash, resource, lvi);
+		}
+		errno = saved_errno;
+	}
+	return status;
+}
+
+/* Unlock and remove it from the hash table */
+int hold_unlock(char *resource)
+{
+	struct lv_info *lvi;
+	int status;
+	int saved_errno;
+
+	lvi = hash_lookup(lv_hash, resource);
+
+	if (!lvi) {
+		DEBUGLOG("hold_unlock, lock not already held\n");
+		return 0;
+	}
+
+	status = sync_unlock(resource, lvi->lock_id);
+	saved_errno = errno;
+	if (!status) {
+		hash_remove(lv_hash, resource);
+		free(lvi);
+	} else {
+		DEBUGLOG("hold_unlock. unlock failed(%d): %s\n", status,
+			 strerror(errno));
+	}
+
+	errno = saved_errno;
+	return status;
+}
+
+/* Watch the return codes here.
+   liblvm API functions return 1(true) for success, 0(false) for failure and don't set errno.
+   libdlm API functions return 0 for success, -1 for failure and do set errno.
+   These functions here return 0 for success or >0 for failure (where the retcode is errno)
+*/
+
+/* Activate LV exclusive or non-exclusive */
+static int do_activate_lv(char *resource, int mode)
+{
+	int oldmode;
+	int status;
+	int activate_lv;
+	struct lvinfo lvi;
+
+	/* Is it already open ? */
+	oldmode = get_current_lock(resource);
+	if (oldmode == mode) {
+		return 0;	/* Nothing to do */
+	}
+
+	/* Does the config file want us to activate this LV ? */
+	if (!lv_activation_filter(cmd, resource, &activate_lv))
+		return EIO;
+
+	if (!activate_lv)
+		return 0;	/* Success, we did nothing! */
+
+	/* Do we need to activate exclusively? */
+	if (activate_lv == 2)
+		mode = LKM_EXMODE;
+
+	/* OK, try to get the lock */
+	status = hold_lock(resource, mode, LKF_NOQUEUE);
+	if (status)
+		return errno;
+
+	/* If it's suspended then resume it */
+	if (!lv_info_by_lvid(cmd, resource, &lvi))
+		return EIO;
+
+	if (lvi.suspended)
+		if (!lv_resume(cmd, resource))
+			return EIO;
+
+	/* Now activate it */
+	if (!lv_activate(cmd, resource))
+		return EIO;
+
+	return 0;
+}
+
+/* Resume the LV if it was active */
+static int do_resume_lv(char *resource)
+{
+	int oldmode;
+
+	/* Is it open ? */
+	oldmode = get_current_lock(resource);
+	if (oldmode == -1) {
+		DEBUGLOG("do_deactivate_lock, lock not already held\n");
+		return 0;	/* We don't need to do anything */
+	}
+
+	if (!lv_resume_if_active(cmd, resource))
+		return EIO;
+
+	return 0;
+}
+
+/* Suspend the device if active */
+static int do_suspend_lv(char *resource)
+{
+	int oldmode;
+	struct lvinfo lvi;
+
+	/* Is it open ? */
+	oldmode = get_current_lock(resource);
+	if (oldmode == -1) {
+		DEBUGLOG("do_suspend_lv, lock held at %d\n", oldmode);
+		return 0; /* Not active, so it's OK */
+	}
+
+	/* Only suspend it if it exists */
+	if (!lv_info_by_lvid(cmd, resource, &lvi))
+		return EIO;
+
+	if (lvi.exists) {
+		if (!lv_suspend_if_active(cmd, resource)) {
+			return EIO;
+		}
+	}
+	return 0;
+}
+
+static int do_deactivate_lv(char *resource)
+{
+	int oldmode;
+	int status;
+
+	/* Is it open ? */
+	oldmode = get_current_lock(resource);
+	if (oldmode == -1) {
+		DEBUGLOG("do_deactivate_lock, lock not already held\n");
+		return 0;	/* We don't need to do anything */
+	}
+
+	if (!lv_deactivate(cmd, resource))
+		return EIO;
+
+	status = hold_unlock(resource);
+	if (status)
+		return errno;
+
+	return 0;
+}
+
+/* This is the LOCK_LV part that happens on all nodes in the cluster -
+   it is responsible for the interaction with device-mapper and LVM */
+int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
+{
+	int status = 0;
+
+	DEBUGLOG("do_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
+		 resource, command, lock_flags);
+
+	if (!cmd->config_valid || config_files_changed(cmd)) {
+		/* Reinitialise various settings inc. logging, filters */
+		if (!refresh_toolcontext(cmd)) {
+			log_error("Updated config file invalid. Aborting.");
+			return EINVAL;
+		}
+	}
+
+	switch (command) {
+	case LCK_LV_EXCLUSIVE:
+		status = do_activate_lv(resource, LKM_EXMODE);
+		break;
+
+	case LCK_LV_SUSPEND:
+		status = do_suspend_lv(resource);
+		break;
+
+	case LCK_UNLOCK:
+	case LCK_LV_RESUME:	/* if active */
+		status = do_resume_lv(resource);
+		break;
+
+	case LCK_LV_ACTIVATE:
+		status = do_activate_lv(resource, LKM_CRMODE);
+		break;
+
+	case LCK_LV_DEACTIVATE:
+		status = do_deactivate_lv(resource);
+		break;
+
+	default:
+		DEBUGLOG("Invalid LV command 0x%x\n", command);
+		status = EINVAL;
+		break;
+	}
+
+	/* clean the pool for another command */
+	pool_empty(cmd->mem);
+
+	DEBUGLOG("Command return is %d\n", status);
+	return status;
+}
+
+/* Functions to do on the local node only BEFORE the cluster-wide stuff above happens */
+int pre_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
+{
+	/* Nearly all the stuff happens cluster-wide. Apart from SUSPEND. Here we get the
+	   lock out on this node (because we are the node modifying the metadata)
+	   before suspending cluster-wide.
+	 */
+	if (command == LCK_LV_SUSPEND) {
+		DEBUGLOG("pre_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
+			 resource, command, lock_flags);
+
+		if (hold_lock(resource, LKM_PWMODE, LKF_NOQUEUE))
+			return errno;
+	}
+	return 0;
+}
+
+/* Functions to do on the local node only AFTER the cluster-wide stuff above happens */
+int post_lock_lv(unsigned char command, unsigned char lock_flags,
+		 char *resource)
+{
+	/* Opposite of above, done on resume after a metadata update */
+	if (command == LCK_LV_RESUME) {
+		int oldmode;
+
+		DEBUGLOG
+		    ("post_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
+		     resource, command, lock_flags);
+
+		/* If the lock state is PW then restore it to what it was */
+		oldmode = get_current_lock(resource);
+		if (oldmode == LKM_PWMODE) {
+			struct lvinfo lvi;
+
+			if (!lv_info_by_lvid(cmd, resource, &lvi))
+				return EIO;
+
+			if (lvi.exists) {
+				if (hold_lock(resource, LKM_CRMODE, 0))
+					return errno;
+			} else {
+				if (hold_unlock(resource))
+					return errno;
+			}
+		}
+	}
+	return 0;
+}
+
+/* Check if a VG is un use by LVM1 so we don't stomp on it */
+int do_check_lvm1(char *vgname)
+{
+	int status;
+
+	status = check_lvm1_vg_inactive(cmd, vgname);
+
+	return status == 1 ? 0 : EBUSY;
+}
+
+/*
+ * Ideally, clvmd should be started before any LVs are active
+ * but this may not be the case...
+ * I suppose this also comes in handy if clvmd crashes, not that it would!
+ */
+static void *get_initial_state()
+{
+	char lv[64], vg[64], flags[25];
+	char uuid[65];
+	char line[255];
+	FILE *lvs =
+	    popen
+	    ("/sbin/lvm lvs --nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr",
+	     "r");
+
+	if (!lvs)
+		return NULL;
+
+	while (fgets(line, sizeof(line), lvs)) {
+		if (sscanf(line, "%s %s %s\n", vg, lv, flags) == 3) {
+			/* States: s:suspended a:active S:dropped snapshot I:invalid snapshot */
+			if (flags[4] == 'a' || flags[4] == 's') {	/* is it active or suspended? */
+				/* Convert hyphen-separated UUIDs into one */
+				memcpy(&uuid[0], &vg[0], 6);
+				memcpy(&uuid[6], &vg[7], 4);
+				memcpy(&uuid[10], &vg[12], 4);
+				memcpy(&uuid[14], &vg[17], 4);
+				memcpy(&uuid[18], &vg[22], 4);
+				memcpy(&uuid[22], &vg[27], 4);
+				memcpy(&uuid[26], &vg[32], 6);
+				memcpy(&uuid[32], &lv[0], 6);
+				memcpy(&uuid[38], &lv[7], 4);
+				memcpy(&uuid[42], &lv[12], 4);
+				memcpy(&uuid[46], &lv[17], 4);
+				memcpy(&uuid[50], &lv[22], 4);
+				memcpy(&uuid[54], &lv[27], 4);
+				memcpy(&uuid[58], &lv[32], 6);
+				uuid[64] = '\0';
+
+				DEBUGLOG("getting initial lock for %s\n", uuid);
+				hold_lock(uuid, LKM_CRMODE, LKF_NOQUEUE);
+			}
+		}
+	}
+	fclose(lvs);
+	return NULL;
+}
+
+void init_lvhash()
+{
+	/* Create hash table for keeping LV locks & status */
+	lv_hash = hash_create(100);
+}
+
+/* Called to initialise the LVM context of the daemon */
+int init_lvm(void)
+{
+	if (!(cmd = create_toolcontext(NULL))) {
+		log_error("Failed to allocate command context");
+		return 0;
+	}
+
+	/* Use LOG_DAEMON for syslog messages instead of LOG_USER */
+	init_syslog(LOG_DAEMON);
+
+	get_initial_state();
+
+	return 1;
+}
diff --git a/daemons/clvmd/lvm-functions.h b/daemons/clvmd/lvm-functions.h
new file mode 100644
index 000000000..750eba999
--- /dev/null
+++ b/daemons/clvmd/lvm-functions.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* Functions in lvm-functions.c */
+
+#ifndef _LVM_FUNCTIONS_H
+#define _LVM_FUNCTIONS_H
+
+extern int pre_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
+		       char *resource);
+extern int do_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
+		      char *resource);
+extern int post_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
+			char *resource);
+extern int do_check_lvm1(char *vgname);
+extern int init_lvm(void);
+extern void init_lvhash(void);
+
+extern int hold_unlock(char *resource);
+extern int hold_lock(char *resource, int mode, int flags);
+extern void unlock_all(void);
+
+#endif
diff --git a/daemons/clvmd/system-lv.c b/daemons/clvmd/system-lv.c
new file mode 100644
index 000000000..5b359cdf1
--- /dev/null
+++ b/daemons/clvmd/system-lv.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* Routines dealing with the System LV */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/utsname.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <mntent.h>
+
+#include "libdlm.h"
+#include "log.h"
+#include "list.h"
+#include "locking.h"
+#include "system-lv.h"
+#include "clvmd-comms.h"
+#ifdef HAVE_CCS
+#include "ccs.h"
+#endif
+
+#define SYSTEM_LV_FILESYSTEM "ext2"
+#define SYSTEM_LV_MOUNTPOINT "/tmp/.clvmd-XXXXXX"
+
+extern char *config_filename(void);
+
+static char system_lv_name[PATH_MAX] = { '\0' };
+static char mount_point[PATH_MAX] = { '\0' };
+static int mounted = 0;
+static int mounted_rw = 0;
+static int lockid;
+static const char *lock_name = "CLVM_SYSTEM_LV";
+
+/* Look in /proc/mounts or (as a last resort) /etc/mtab to
+   see if the system-lv is mounted. If it is mounted and we
+   think it's not then abort because we don't have the right
+   lock status and we don't know what other processes are doing with it.
+
+   Returns 1 for mounted, 0 for not mounted so it matches the condition
+   of the "mounted" static variable above.
+*/
+static int is_really_mounted(void)
+{
+	FILE *mountfile;
+	struct mntent *ment;
+
+	mountfile = setmntent("/proc/mounts", "r");
+	if (!mountfile) {
+		mountfile = setmntent("/etc/mtab", "r");
+		if (!mountfile) {
+			log_error("Unable to open /proc/mounts or /etc/mtab");
+			return -1;
+		}
+	}
+
+	/* Look for system LV name in the file */
+	do {
+		ment = getmntent(mountfile);
+		if (ment) {
+			if (strcmp(ment->mnt_fsname, system_lv_name) == 0) {
+				endmntent(mountfile);
+				return 1;
+			}
+		}
+	}
+	while (ment);
+
+	endmntent(mountfile);
+	return 0;
+}
+
+/* Get the system LV name from the config file */
+static int find_system_lv(void)
+{
+	if (system_lv_name[0] == '\0') {
+#ifdef HAVE_CCS
+		int error;
+		ccs_node_t *ctree;
+
+		/* Read the cluster config file */
+		/* Open the config file */
+		error = open_ccs_file(&ctree, "clvm.ccs");
+		if (error) {
+			perror("reading config file");
+			return -1;
+		}
+
+		strcpy(system_lv_name, find_ccs_str(ctree,
+						    "cluster/systemlv", '/',
+						    "/dev/vg/system_lv"));
+
+		/* Finished with config file */
+		close_ccs_file(ctree);
+#else
+		if (getenv("CLVMD_SYSTEM_LV"))
+			strcpy(system_lv_name, getenv("CLVMD_SYSTEM_LV"));
+		else
+			return -1;
+#endif
+	}
+
+	/* See if it has been mounted outside our control */
+	if (is_really_mounted() != mounted) {
+		log_error
+		    ("The system LV state has been mounted/umounted outside the control of clvmd\n"
+		     "it cannot not be used for cluster communications until this is fixed.\n");
+		return -1;
+	}
+	return 0;
+}
+
+/* No prizes */
+int system_lv_umount(void)
+{
+	if (!mounted)
+		return 0;
+
+	if (umount(mount_point) < 0) {
+		log_error("umount of system LV (%s) failed: %m\n",
+			  system_lv_name);
+		return -1;
+	}
+
+	sync_unlock(lock_name, lockid);
+	mounted = 0;
+
+	/* Remove the mount point */
+	rmdir(mount_point);
+
+	return 0;
+}
+
+int system_lv_mount(int readwrite)
+{
+	int status;
+	int saved_errno;
+	int fd;
+
+	if (find_system_lv()) {
+		errno = EBUSY;
+		return -1;
+	}
+
+	/* Is it already mounted suitably? */
+	if (mounted) {
+		if (!readwrite || (readwrite && mounted_rw)) {
+			return 0;
+		} else {
+			/* Mounted RO and we need RW */
+			if (system_lv_umount() < 0)
+				return -1;
+		}
+	}
+
+	/* Randomize the mount point */
+	strcpy(mount_point, SYSTEM_LV_MOUNTPOINT);
+	fd = mkstemp(mount_point);
+	if (fd < 0) {
+		log_error("mkstemp for system LV mount point failed: %m\n");
+		return -1;
+	}
+
+	/* Race condition here but there's no mkstemp for directories */
+	close(fd);
+	unlink(mount_point);
+	mkdir(mount_point, 0600);
+
+	/* Make sure we have a system-lv lock */
+	status =
+	    sync_lock(lock_name, (readwrite) ? LKM_EXMODE : LKM_CRMODE, 0,
+		      &lockid);
+	if (status < 0)
+		return -1;
+
+	/* Mount it */
+	if (mount(system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM,
+		  MS_MGC_VAL | MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_SYNCHRONOUS
+		  | (readwrite ? 0 : MS_RDONLY), NULL) < 0) {
+		/* mount(2) returns EINVAL if the volume has no FS on it. So, if we want to
+		   write to it we try to make a filesystem in it and retry the mount */
+		if (errno == EINVAL && readwrite) {
+			char cmd[256];
+
+			log_error("Attempting mkfs on system LV device %s\n",
+				  system_lv_name);
+			snprintf(cmd, sizeof(cmd), "/sbin/mkfs -t %s %s",
+				 SYSTEM_LV_FILESYSTEM, system_lv_name);
+			system(cmd);
+
+			if (mount
+			    (system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM,
+			     MS_MGC_VAL | MS_NOSUID | MS_NODEV | MS_NOEXEC |
+			     MS_SYNCHRONOUS | (readwrite ? 0 : MS_RDONLY),
+			     NULL) == 0)
+				goto mounted;
+		}
+
+		saved_errno = errno;
+		log_error("mount of system LV (%s, %s, %s) failed: %m\n",
+			  system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM);
+		sync_unlock(lock_name, lockid);
+		errno = saved_errno;
+		return -1;
+	}
+
+      mounted:
+/* Set the internal flags */
+	mounted = 1;
+	mounted_rw = readwrite;
+
+	return 0;
+}
+
+/* Erase *all* files in the root directory of the system LV.
+   This *MUST* be called with an appropriate lock held!
+   The LV is left mounted RW because it is assumed that the
+   caller wants to write something here after clearing some space */
+int system_lv_eraseall(void)
+{
+	DIR *dir;
+	struct dirent *ent;
+	char fname[PATH_MAX];
+
+	/* Must be mounted R/W */
+	system_lv_mount(1);
+
+	dir = opendir(mount_point);
+	if (!dir)
+		return -1;
+
+	while ((ent = readdir(dir))) {
+		struct stat st;
+		snprintf(fname, sizeof(fname), "%s/%s", mount_point,
+			 ent->d_name);
+
+		if (stat(fname, &st)) {
+			if (S_ISREG(st.st_mode))
+				unlink(fname);
+		}
+	}
+	closedir(dir);
+	return 0;
+}
+
+/* This is a "high-level" routine - it mounts the system LV, writes
+   the data into a file named after this node and then umounts the LV
+   again */
+int system_lv_write_data(char *data, ssize_t len)
+{
+	struct utsname nodeinfo;
+	char fname[PATH_MAX];
+	int outfile;
+	ssize_t thiswrite;
+	ssize_t written;
+
+	if (system_lv_mount(1))
+		return -1;
+
+	/* Build the file name we are goingto use. */
+	uname(&nodeinfo);
+	snprintf(fname, sizeof(fname), "%s/%s", mount_point, nodeinfo.nodename);
+
+	/* Open the file for output */
+	outfile = open(fname, O_RDWR | O_CREAT | O_TRUNC, 0600);
+	if (outfile < 0) {
+		int saved_errno = errno;
+		system_lv_umount();
+		errno = saved_errno;
+		return -1;
+	}
+
+	written = 0;
+	do {
+		thiswrite = write(outfile, data + written, len - written);
+		if (thiswrite > 0)
+			written += thiswrite;
+
+	} while (written < len && thiswrite > 0);
+
+	close(outfile);
+
+	system_lv_umount();
+	return (thiswrite < 0) ? -1 : 0;
+}
+
+/* This is a "high-level" routine - it mounts the system LV, reads
+   the data from a named file and then umounts the LV
+   again */
+int system_lv_read_data(char *fname_base, char *data, ssize_t *len)
+{
+	char fname[PATH_MAX];
+	int outfile;
+	struct stat st;
+	ssize_t filesize;
+	ssize_t thisread;
+	ssize_t readbytes;
+
+	if (system_lv_mount(0))
+		return -1;
+
+	/* Build the file name we are going to use. */
+	snprintf(fname, sizeof(fname), "%s/%s", mount_point, fname_base);
+
+	/* Get the file size and stuff. Actually we only need the file size but
+	   this will also check that the file exists */
+	if (stat(fname, &st) < 0) {
+		int saved_errno = errno;
+
+		log_error("stat of file %s on system LV failed: %m\n", fname);
+		system_lv_umount();
+		errno = saved_errno;
+		return -1;
+	}
+	filesize = st.st_size;
+
+	outfile = open(fname, O_RDONLY);
+	if (outfile < 0) {
+		int saved_errno = errno;
+
+		log_error("open of file %s on system LV failed: %m\n", fname);
+		system_lv_umount();
+		errno = saved_errno;
+		return -1;
+	}
+
+	readbytes = 0;
+	do {
+		thisread =
+		    read(outfile, data + readbytes, filesize - readbytes);
+		if (thisread > 0)
+			readbytes += thisread;
+
+	} while (readbytes < filesize && thisread > 0);
+
+	close(outfile);
+
+	system_lv_umount();
+
+	*len = readbytes;
+	return (thisread < 0) ? -1 : 0;
+}
diff --git a/daemons/clvmd/system-lv.h b/daemons/clvmd/system-lv.h
new file mode 100644
index 000000000..b90ca4423
--- /dev/null
+++ b/daemons/clvmd/system-lv.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _CLVM_SYSTEM_LV_H
+#define _CLVM_SYSTEM_LV_H
+
+/* Prototypes for System-LV functions */
+
+/* "low-level" functions */
+extern int system_lv_umount(void);
+extern int system_lv_mount(int readwrite);
+extern int system_lv_eraseall(void);
+
+/* "high-level" functions */
+extern int system_lv_write_data(char *data, ssize_t len);
+extern int system_lv_read_data(char *fname_base, char *data, ssize_t *len);
+
+#endif
diff --git a/daemons/clvmd/tcp-comms.c b/daemons/clvmd/tcp-comms.c
new file mode 100644
index 000000000..2e0406b3a
--- /dev/null
+++ b/daemons/clvmd/tcp-comms.c
@@ -0,0 +1,480 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  2002-2003  All rights reserved.
+**
+*******************************************************************************
+******************************************************************************/
+
+/* This provides the inter-clvmd communications for a system without CMAN.
+   There is a listening TCP socket which accepts new connections in the
+   normal way.
+   It can also make outgoing connnections to the other clvmd nodes.
+*/
+
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <syslog.h>
+#include <netdb.h>
+#include <assert.h>
+
+#include "ccs.h"
+#include "clvm.h"
+#include "clvmd-comms.h"
+#include "clvmd.h"
+#include "clvmd-gulm.h"
+#include "hash.h"
+
+#define DEFAULT_TCP_PORT 21064
+
+static int listen_fd = -1;
+static int tcp_port;
+struct hash_table *sock_hash;
+
+static int get_tcp_port(int default_port);
+static int get_our_ip_address(char *addr, int *family);
+static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
+			     struct local_client **new_client);
+
+/* Called by init_cluster() to open up the listening socket */
+// TODO: IPv6 compat.
+int init_comms()
+{
+    struct sockaddr *addr = NULL;
+    struct sockaddr_in addr4;
+    struct sockaddr_in6 addr6;
+    int    addr_len;
+    int    family;
+    char   address[MAX_CSID_LEN];
+
+    sock_hash = hash_create(100);
+    tcp_port = get_tcp_port(DEFAULT_TCP_PORT);
+
+    /* Get IP address and IP type */
+    get_our_ip_address(address, &family);
+    if (family == AF_INET)
+    {
+	memcpy(&addr4.sin_addr, addr, sizeof(struct in_addr));
+	addr = (struct sockaddr *)&addr4;
+	addr4.sin_port = htons(tcp_port);
+	addr_len = sizeof(addr4);
+    }
+    else
+    {
+	memcpy(&addr6.sin6_addr, addr, sizeof(struct in6_addr));
+	addr = (struct sockaddr *)&addr6;
+	addr6.sin6_port = htons(tcp_port);
+	addr_len = sizeof(addr6);
+    }
+
+    listen_fd = socket(family, SOCK_STREAM, 0);
+
+    if (listen_fd < 0)
+    {
+	return -1;
+    }
+    else
+    {
+	int one = 1;
+	setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
+    }
+
+    addr->sa_family = family;
+
+    if (bind(listen_fd, addr, addr_len) < 0)
+    {
+	DEBUGLOG("Can't bind to port\n");
+	syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
+	close(listen_fd);
+	return -1;
+    }
+
+    listen(listen_fd, 5);
+
+    return 0;
+}
+
+void tcp_remove_client(char *csid)
+ {
+    struct local_client *client;
+    DEBUGLOG("tcp_remove_client\n");
+
+    /* Don't actually close the socket here - that's the
+       job of clvmd.c whch will do the job when it notices the
+       other end has gone. We just need to remove the client(s) from
+       the hash table so we don't try to use it for sending any more */
+    client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+    if (client)
+    {
+	hash_remove_binary(sock_hash, csid, MAX_CSID_LEN);
+    }
+
+    /* Look for a mangled one too */
+    csid[0] ^= 0x80;
+
+    client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+    if (client)
+    {
+	hash_remove_binary(sock_hash, csid, MAX_CSID_LEN);
+    }
+
+    /* Put it back as we found it */
+    csid[0] ^= 0x80;
+}
+
+int alloc_client(int fd, char *csid, struct local_client **new_client)
+{
+    struct local_client *client;
+
+    DEBUGLOG("alloc_client %d csid = [%d.%d.%d.%d]\n", fd,csid[0],csid[1],csid[2],csid[3]);
+
+    /* Create a local_client and return it */
+    client = malloc(sizeof(struct local_client));
+    if (!client)
+    {
+	DEBUGLOG("malloc failed\n");
+	return -1;
+    }
+
+    memset(client, 0, sizeof(struct local_client));
+    client->fd = fd;
+    client->type = CLUSTER_DATA_SOCK;
+    client->callback = read_from_tcpsock;
+    if (new_client)
+	*new_client = client;
+
+    /* Add to our list of node sockets */
+    if (hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN))
+    {
+	DEBUGLOG("alloc_client mangling CSID for second connection\n");
+	/* This is a duplicate connection but we can't close it because
+	   the other end may already have started sending.
+	   So, we mangle the IP address and keep it, all sending will
+	   go out of the main FD
+	*/
+	csid[0] ^= 0x80;
+	client->bits.net.flags = 1; /* indicate mangled CSID */
+
+        /* If it still exists then kill the connection as we should only
+           ever have one incoming connection from each node */
+        if (hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN))
+        {
+	    DEBUGLOG("Multiple incoming connections from node\n");
+            syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
+
+	    free(client);
+            errno = ECONNREFUSED;
+            return -1;
+        }
+    }
+    hash_insert_binary(sock_hash, csid, MAX_CSID_LEN, client);
+
+    return 0;
+}
+
+int get_main_cluster_fd()
+{
+    return listen_fd;
+}
+
+
+/* Read on main comms (listen) socket, accept it */
+int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid,
+			struct local_client **new_client)
+{
+    int newfd;
+    struct sockaddr_in addr;
+    socklen_t addrlen = sizeof(addr);
+    int status;
+    char name[MAX_CLUSTER_MEMBER_NAME_LEN];
+
+    DEBUGLOG("cluster_fd_callback\n");
+    *new_client = NULL;
+    newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
+
+    DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
+    if (!newfd)
+    {
+	syslog(LOG_ERR, "error in accept: %m");
+	errno = EAGAIN;
+	return -1; /* Don't return an error or clvmd will close the listening FD */
+    }
+
+    /* Check that the client is a member of the cluster
+       and reject if not.
+       // FIXME: IPv4 specific
+    */
+    if (name_from_csid((char *)&addr.sin_addr.s_addr, name) < 0)
+    {
+	char *ip = (char *)&addr.sin_addr.s_addr;
+	syslog(LOG_ERR, "Got connect from non-cluster node %d.%d.%d.%d\n",
+	       ip[0], ip[1], ip[2], ip[3]);
+	DEBUGLOG("Got connect from non-cluster node %d.%d.%d.%d\n",
+		 ip[0], ip[1], ip[2], ip[3]);
+	close(newfd);
+
+	errno = EAGAIN;
+	return -1;
+    }
+
+    status = alloc_client(newfd, (char *)&addr.sin_addr.s_addr, new_client);
+    if (status)
+    {
+	DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
+	close(newfd);
+	/* See above... */
+	errno = EAGAIN;
+	return -1;
+    }
+    DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
+    return newfd;
+}
+
+
+static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
+			     struct local_client **new_client)
+{
+    struct sockaddr_in addr;
+    socklen_t slen = sizeof(addr);
+    int status;
+
+    DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
+    *new_client = NULL;
+
+    /* Get "csid" */
+    getpeername(client->fd, (struct sockaddr *)&addr, &slen);
+    memcpy(csid, &addr.sin_addr.s_addr, MAX_CSID_LEN);
+
+    status = read(client->fd, buf, len);
+
+    DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
+
+    /* Remove it from the hash table if there's an error, clvmd will
+       remove the socket from its lists and free the client struct */
+    if (status == 0 ||
+	(status < 0 && errno != EAGAIN && errno != EINTR))
+    {
+	char remcsid[MAX_CSID_LEN];
+
+	memcpy(remcsid, csid, MAX_CSID_LEN);
+	close(client->fd);
+
+	/* If the csid was mangled, then make sure we remove the right entry */
+	if (client->bits.net.flags)
+	    remcsid[0] ^= 0x80;
+	hash_remove_binary(sock_hash, remcsid, MAX_CSID_LEN);
+
+	/* Tell cluster manager layer */
+	add_down_node(remcsid);
+    }
+    return status;
+}
+
+static int connect_csid(char *csid, struct local_client **newclient)
+{
+    int fd;
+    struct sockaddr_in addr;
+    int status;
+
+    DEBUGLOG("Connecting socket\n");
+    fd = socket(PF_INET, SOCK_STREAM, 0);
+
+    if (fd < 0)
+    {
+	syslog(LOG_ERR, "Unable to create new socket: %m");
+	return -1;
+    }
+
+    addr.sin_family = AF_INET;
+    memcpy(&addr.sin_addr.s_addr, csid, MAX_CSID_LEN);
+    addr.sin_port = htons(tcp_port);
+
+    DEBUGLOG("Connecting socket %d\n", fd);
+    if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in)) < 0)
+    {
+	syslog(LOG_ERR, "Unable to connect to remote node: %m");
+	DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
+	close(fd);
+	return -1;
+    }
+
+    status = alloc_client(fd, csid, newclient);
+    if (status)
+	close(fd);
+    else
+	add_client(*newclient);
+
+    /* If we can connect to it, it must be running a clvmd */
+    add_up_node(csid);
+    return status;
+}
+
+/* Send a message to a known CSID */
+static int tcp_send_message(void *buf, int msglen, unsigned char *csid, const char *errtext)
+{
+    int status;
+    struct local_client *client;
+    char ourcsid[MAX_CSID_LEN];
+
+    assert(csid);
+
+    DEBUGLOG("tcp_send_message, csid = [%d.%d.%d.%d], msglen = %d\n", csid[0],csid[1],csid[2],csid[3], msglen);
+
+    /* Don't connect to ourself */
+    get_our_csid(ourcsid);
+    if (memcmp(csid, ourcsid, MAX_CSID_LEN) == 0)
+	return msglen;
+
+    client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+    if (!client)
+    {
+	status = connect_csid(csid, &client);
+	if (status)
+	    return -1;
+    }
+    DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
+
+    return write(client->fd, buf, msglen);
+}
+
+
+int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext)
+{
+    int status=0;
+
+    DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
+
+    /* If csid is NULL then send to all known (not just connected) nodes */
+    if (!csid)
+    {
+	void *context = NULL;
+	char loop_csid[MAX_CSID_LEN];
+
+	/* Loop round all gulm-known nodes */
+	while (get_next_node_csid(&context, loop_csid))
+	{
+	    status = tcp_send_message(buf, msglen, loop_csid, errtext);
+	    if (status == 0 ||
+		(status < 0 && (errno == EAGAIN || errno == EINTR)))
+		break;
+	}
+    }
+    else
+    {
+
+	status = tcp_send_message(buf, msglen, csid, errtext);
+    }
+    return status;
+}
+
+static int get_tcp_port(int default_port)
+{
+    int ccs_handle;
+    int port = default_port;
+    char *portstr;
+
+    ccs_handle = ccs_connect();
+    if (ccs_handle)
+    {
+	return port;
+    }
+
+    if (!ccs_get(ccs_handle, "//clvm/@port", &portstr))
+    {
+	port = atoi(portstr);
+	free(portstr);
+
+	if (port <= 0 && port >= 65536)
+	    port = default_port;
+    }
+    ccs_disconnect(ccs_handle);
+
+    DEBUGLOG("Using port %d for communications\n", port);
+    return port;
+}
+
+/* To get our own IP address we get the locally bound address of the
+   socket that's talking to GULM in the assumption(eek) that it will
+   be on the "right" network in a multi-homed system */
+static int get_our_ip_address(char *addr, int *family)
+{
+    /* Use a sockaddr_in6 to make sure it's big enough */
+    struct sockaddr_in6 saddr;
+    int socklen = sizeof(saddr);
+
+    if (!getsockname(gulm_fd(), (struct sockaddr *)&saddr, &socklen))
+    {
+	if (saddr.sin6_family == AF_INET6)
+	{
+	    memcpy(addr, &saddr.sin6_addr, sizeof(saddr.sin6_addr));
+	}
+	else
+	{
+	    struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
+	    memcpy(addr, &sin4->sin_addr, sizeof(sin4->sin_addr));
+	}
+	return 0;
+    }
+    return -1;
+}
+
+/* Public version of above for those that don't care what protocol
+   we're using */
+void get_our_csid(char *csid)
+{
+    static char our_csid[MAX_CSID_LEN];
+    static int got_csid = 0;
+
+    if (!got_csid)
+    {
+	int family;
+
+	memset(our_csid, 0, sizeof(our_csid));
+	if (get_our_ip_address(our_csid, &family))
+	{
+	    got_csid = 1;
+	}
+    }
+    memcpy(csid, our_csid, MAX_CSID_LEN);
+}
+
+/* Get someone else's IP address from DNS */
+int get_ip_address(char *node, char *addr)
+{
+    struct hostent *he;
+
+    memset(addr, 0, MAX_CSID_LEN);
+
+    // TODO: what do we do about multi-homed hosts ???
+    // CCSs ip_interfaces solved this but some bugger removed it.
+
+    /* Try IPv6 first. The man page for gethostbyname implies that
+       it will lookup ip6 & ip4 names, but it seems not to */
+    he = gethostbyname2(node, AF_INET6);
+    if (!he)
+	he = gethostbyname2(node, AF_INET);
+    if (!he)
+	return -1;
+
+    /* For IPv4 address just use the lower 4 bytes */
+    memcpy(&addr, he->h_addr_list[0],
+	   he->h_length);
+
+    return 0;
+}
diff --git a/daemons/clvmd/tcp-comms.h b/daemons/clvmd/tcp-comms.h
new file mode 100644
index 000000000..8dafd441c
--- /dev/null
+++ b/daemons/clvmd/tcp-comms.h
@@ -0,0 +1,7 @@
+#include <netinet/in.h>
+
+#define MAX_CLUSTER_MESSAGE 1600
+#define MAX_CSID_LEN sizeof(struct in6_addr)
+#define MAX_CLUSTER_MEMBER_NAME_LEN 128
+
+extern int init_comms(void);
diff --git a/include/.symlinks b/include/.symlinks
index 74987de7c..54d27bc6a 100644
--- a/include/.symlinks
+++ b/include/.symlinks
@@ -1,3 +1,4 @@
+../daemons/clvmd/clvm.h
 ../lib/activate/activate.h
 ../lib/activate/targets.h
 ../lib/cache/lvmcache.h
diff --git a/lib/Makefile.in b/lib/Makefile.in
index f0da66e7a..75ff735b9 100644
--- a/lib/Makefile.in
+++ b/lib/Makefile.in
@@ -104,6 +104,14 @@ ifeq ("@POOL@", "internal")
 	format_pool/pool_label.c
 endif
 
+ifeq ("@CLUSTER@", "internal")
+  SOURCES += locking/cluster_locking.c
+endif
+
+ifeq ("@CLUSTER@", "shared")
+  SUBDIRS += locking
+endif
+
 ifeq ("@SNAPSHOTS@", "internal")
   SOURCES += snapshot/snapshot.c
 endif
diff --git a/lib/locking/Makefile.in b/lib/locking/Makefile.in
new file mode 100644
index 000000000..d28cfbfe5
--- /dev/null
+++ b/lib/locking/Makefile.in
@@ -0,0 +1,32 @@
+#
+# Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
+# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+SOURCES = cluster_locking.c
+
+LIB_SHARED = liblvm2clusterlock.so
+
+include $(top_srcdir)/make.tmpl
+
+.PHONY: install
+
+install: liblvm2clusterlock.so
+	$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
+		$(libdir)/liblvm2clusterlock.so.$(LIB_VERSION)
+	$(LN_S) -f liblvm2clusterlock.so.$(LIB_VERSION) \
+		$(libdir)/liblvm2clusterlock.so
+
diff --git a/lib/locking/cluster_locking.c b/lib/locking/cluster_locking.c
new file mode 100644
index 000000000..d9cab2d7e
--- /dev/null
+++ b/lib/locking/cluster_locking.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Locking functions for LVM.
+ * The main purpose of this part of the library is to serialise LVM
+ * management operations across a cluster.
+ */
+
+#include "lib.h"
+#include "clvm.h"
+#include "lvm-string.h"
+#include "locking.h"
+#include "locking_types.h"
+
+#include <stddef.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#ifndef CLUSTER_LOCKING_INTERNAL
+int lock_resource(struct cmd_context *cmd, const char *resource, int flags);
+void locking_end(void);
+int locking_init(int type, struct config_tree *cf, uint32_t *flags);
+#endif
+
+typedef struct lvm_response {
+	char node[255];
+	char *response;
+	int status;
+	int len;
+} lvm_response_t;
+
+/*
+ * This gets stuck at the start of memory we allocate so we
+ * can sanity-check it at deallocation time
+ */
+#define LVM_SIGNATURE 0x434C564D
+
+/*
+ * NOTE: the LVMD uses the socket FD as the client ID, this means
+ * that any client that calls fork() will inherit the context of
+ * it's parent.
+ */
+static int _clvmd_sock = -1;
+
+/* FIXME Install SIGPIPE handler? */
+
+/* Open connection to the Cluster Manager daemon */
+static int _open_local_sock(void)
+{
+	int local_socket;
+	struct sockaddr_un sockaddr;
+
+	/* Open local socket */
+	if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
+		log_error("Local socket creation failed: %s", strerror(errno));
+		return -1;
+	}
+
+	memset(&sockaddr, 0, sizeof(sockaddr));
+	memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
+
+	sockaddr.sun_family = AF_UNIX;
+
+	if (connect(local_socket,(struct sockaddr *) &sockaddr,
+		    sizeof(sockaddr))) {
+		int saved_errno = errno;
+
+		log_error("connect() failed on local socket: %s",
+			  strerror(errno));
+		if (close(local_socket))
+			stack;
+
+		errno = saved_errno;
+		return -1;
+	}
+
+	return local_socket;
+}
+
+/* Send a request and return the status */
+static int _send_request(char *inbuf, int inlen, char **retbuf)
+{
+	char outbuf[PIPE_BUF];
+	struct clvm_header *outheader = (struct clvm_header *) outbuf;
+	int len;
+	int off;
+	int buflen;
+	int err;
+
+	/* Send it to CLVMD */
+ rewrite:
+	if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
+	        if (err == -1 && errno == EINTR)
+		        goto rewrite;
+		log_error("Error writing data to clvmd: %s", strerror(errno));
+		return 0;
+	}
+
+	/* Get the response */
+ reread:
+	if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
+	        if (errno == EINTR)
+		        goto reread;
+		log_error("Error reading data from clvmd: %s", strerror(errno));
+		return 0;
+	}
+
+	if (len == 0) {
+		log_error("EOF reading CLVMD");
+		errno = ENOTCONN;
+		return 0;
+	}
+
+	/* Allocate buffer */
+	buflen = len + outheader->arglen;
+	*retbuf = dbg_malloc(buflen);
+	if (!*retbuf) {
+		errno = ENOMEM;
+		return 0;
+	}
+
+	/* Copy the header */
+	memcpy(*retbuf, outbuf, len);
+	outheader = (struct clvm_header *) *retbuf;
+
+	/* Read the returned values */
+	off = 1;		/* we've already read the first byte */
+
+	while (off < outheader->arglen && len > 0) {
+		len = read(_clvmd_sock, outheader->args + off,
+			   buflen - off - offsetof(struct clvm_header, args));
+		if (len > 0)
+			off += len;
+	}
+
+	/* Was it an error ? */
+	if (outheader->status < 0) {
+		errno = -outheader->status;
+		log_error("cluster send request failed: %s", strerror(errno));
+		return 0;
+	}
+
+	return 1;
+}
+
+/* Build the structure header and parse-out wildcard node names */
+static void _build_header(struct clvm_header *head, int cmd, const char *node,
+			  int len)
+{
+	head->cmd = cmd;
+	head->status = 0;
+	head->flags = 0;
+	head->clientid = 0;
+	head->arglen = len;
+
+	if (node) {
+		/*
+		 * Allow a couple of special node names:
+		 * "*" for all nodes,
+		 * "." for the local node only
+		 */
+		if (strcmp(node, "*") == 0) {
+			head->node[0] = '\0';
+		} else if (strcmp(node, ".") == 0) {
+			head->node[0] = '\0';
+			head->flags = CLVMD_FLAG_LOCAL;
+		} else
+			strcpy(head->node, node);
+	} else
+		head->node[0] = '\0';
+}
+
+/*
+ * Send a message to a(or all) node(s) in the cluster and wait for replies
+ */
+static int _cluster_request(char cmd, const char *node, void *data, int len,
+			   lvm_response_t ** response, int *num)
+{
+	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
+	int *outptr;
+	char *inptr;
+	char *retbuf = NULL;
+	int status;
+	int i;
+	int num_responses = 0;
+	struct clvm_header *head = (struct clvm_header *) outbuf;
+	lvm_response_t *rarray;
+
+	*num = 0;
+
+	if (_clvmd_sock == -1)
+		_clvmd_sock = _open_local_sock();
+
+	if (_clvmd_sock == -1)
+		return 0;
+
+	_build_header(head, cmd, node, len);
+	memcpy(head->node + strlen(head->node) + 1, data, len);
+
+	status = _send_request(outbuf, sizeof(struct clvm_header) +
+			      strlen(head->node) + len, &retbuf);
+	if (!status)
+		goto out;
+
+	/* Count the number of responses we got */
+	head = (struct clvm_header *) retbuf;
+	inptr = head->args;
+	while (inptr[0]) {
+		num_responses++;
+		inptr += strlen(inptr) + 1;
+		inptr += sizeof(int);
+		inptr += strlen(inptr) + 1;
+	}
+
+	/*
+	 * Allocate response array.
+	 * With an extra pair of INTs on the front to sanity
+	 * check the pointer when we are given it back to free
+	 */
+	outptr = dbg_malloc(sizeof(lvm_response_t) * num_responses +
+			    sizeof(int) * 2);
+	if (!outptr) {
+		errno = ENOMEM;
+		status = 0;
+		goto out;
+	}
+
+	*response = (lvm_response_t *) (outptr + 2);
+	outptr[0] = LVM_SIGNATURE;
+	outptr[1] = num_responses;
+	rarray = *response;
+
+	/* Unpack the response into an lvm_response_t array */
+	inptr = head->args;
+	i = 0;
+	while (inptr[0]) {
+		strcpy(rarray[i].node, inptr);
+		inptr += strlen(inptr) + 1;
+
+		rarray[i].status = *(int *) inptr;
+		inptr += sizeof(int);
+
+		rarray[i].response = dbg_malloc(strlen(inptr) + 1);
+		if (rarray[i].response == NULL) {
+			/* Free up everything else and return error */
+			int j;
+			for (j = 0; j < i; j++)
+				dbg_free(rarray[i].response);
+			free(outptr);
+			errno = ENOMEM;
+			status = -1;
+			goto out;
+		}
+
+		strcpy(rarray[i].response, inptr);
+		rarray[i].len = strlen(inptr);
+		inptr += strlen(inptr) + 1;
+		i++;
+	}
+	*num = num_responses;
+	*response = rarray;
+
+      out:
+	if (retbuf)
+		dbg_free(retbuf);
+
+	return status;
+}
+
+/* Free reply array */
+static int _cluster_free_request(lvm_response_t * response)
+{
+	int *ptr = (int *) response - 2;
+	int i;
+	int num;
+
+	/* Check it's ours to free */
+	if (response == NULL || *ptr != LVM_SIGNATURE) {
+		errno = EINVAL;
+		return 0;
+	}
+
+	num = ptr[1];
+
+	for (i = 0; i < num; i++) {
+		dbg_free(response[i].response);
+	}
+
+	dbg_free(ptr);
+
+	return 1;
+}
+
+static int _lock_for_cluster(unsigned char cmd, unsigned int flags, char *name)
+{
+	int status;
+	int i;
+	char *args;
+	const char *node = "";
+	int len;
+	int saved_errno = errno;
+	lvm_response_t *response = NULL;
+	int num_responses;
+
+	assert(name);
+
+	len = strlen(name) + 3;
+	args = alloca(len);
+	strcpy(args + 2, name);
+
+	args[0] = flags & 0xBF; /* Maskoff LOCAL flag */
+	args[1] = 0;		/* Not used now */
+
+	/*
+	 * VG locks are just that: locks, and have no side effects
+	 * so we only need to do them on the local node because all
+	 * locks are cluster-wide.
+	 * Also, if the lock is exclusive it makes no sense to try to 
+	 * acquire it on all nodes, so just do that on the local node too.
+	 */
+	if (cmd == CLVMD_CMD_LOCK_VG ||
+	    (flags & LCK_TYPE_MASK) == LCK_EXCL ||
+	    (flags & LCK_LOCAL))
+		node = ".";
+
+	status = _cluster_request(cmd, node, args, len,
+				  &response, &num_responses);
+
+	/* If any nodes were down then display them and return an error */
+	for (i = 0; i < num_responses; i++) {
+		if (response[i].status == -EHOSTDOWN) {
+			log_error("clvmd not running on node %s",
+				  response[i].node);
+			status = 0;
+		} else if (response[i].status) {
+			log_error("Error locking on node %s: %s",
+				  response[i].node,
+				  response[i].response[0] ?
+				  	response[i].response :
+				  	strerror(response[i].status));
+			status = 0;
+		}
+	}
+
+	saved_errno = errno;
+	_cluster_free_request(response);
+	errno = saved_errno;
+
+	return status;
+}
+
+/* API entry point for LVM */
+#ifdef CLUSTER_LOCKING_INTERNAL
+static int _lock_resource(struct cmd_context *cmd, const char *resource,
+			  int flags)
+#else
+int lock_resource(struct cmd_context *cmd, const char *resource, int flags)
+#endif
+{
+	char lockname[PATH_MAX];
+	int cluster_cmd = 0;
+
+	assert(strlen(resource) < sizeof(lockname));
+
+	switch (flags & LCK_SCOPE_MASK) {
+	case LCK_VG:
+		/* If the VG name is empty then lock the unused PVs */
+		if (!resource || !*resource)
+			lvm_snprintf(lockname, sizeof(lockname), "P_orphans");
+		else
+			lvm_snprintf(lockname, sizeof(lockname), "V_%s",
+				     resource);
+
+		cluster_cmd = CLVMD_CMD_LOCK_VG;
+		flags &= LCK_TYPE_MASK;
+		break;
+
+	case LCK_LV:
+		cluster_cmd = CLVMD_CMD_LOCK_LV;
+		strcpy(lockname, resource);
+		flags &= 0xffdf;	/* Mask off HOLD flag */
+		break;
+
+	default:
+		log_error("Unrecognised lock scope: %d",
+			  flags & LCK_SCOPE_MASK);
+		return 0;
+	}
+
+	/* Send a message to the cluster manager */
+	log_very_verbose("Locking %s at 0x%x", lockname, flags);
+
+	return _lock_for_cluster(cluster_cmd, flags, lockname);
+}
+
+#ifdef CLUSTER_LOCKING_INTERNAL
+static void _locking_end(void)
+#else
+void locking_end(void)
+#endif
+{
+	if (_clvmd_sock != -1 && close(_clvmd_sock))
+		stack;
+
+	_clvmd_sock = -1;
+}
+
+#ifdef CLUSTER_LOCKING_INTERNAL
+static void _reset_locking(void)
+#else
+void reset_locking(void)
+#endif
+{
+	if (close(_clvmd_sock))
+		stack;
+
+	_clvmd_sock = _open_local_sock();
+	if (_clvmd_sock == -1)
+	        stack;
+}
+
+#ifdef CLUSTER_LOCKING_INTERNAL
+int init_cluster_locking(struct locking_type *locking, struct config_tree *cft)
+{
+	locking->lock_resource = _lock_resource;
+	locking->fin_locking = _locking_end;
+	locking->reset_locking = _reset_locking;
+	locking->flags = LCK_PRE_MEMLOCK;
+
+	_clvmd_sock = _open_local_sock();
+	if (_clvmd_sock == -1)
+		return 0;
+
+	return 1;
+}
+#else
+int locking_init(int type, struct config_tree *cf, uint32_t *flags)
+{
+	_clvmd_sock = _open_local_sock();
+	if (_clvmd_sock == -1)
+		return 0;
+
+	/* Ask LVM to lock memory before calling us */
+	*flags |= LCK_PRE_MEMLOCK;
+
+	return 1;
+}
+#endif
diff --git a/lib/locking/locking.c b/lib/locking/locking.c
index f4fa45eb3..2c5ab1b9e 100644
--- a/lib/locking/locking.c
+++ b/lib/locking/locking.c
@@ -145,6 +145,14 @@ int init_locking(int type, struct config_tree *cft)
 		return 1;
 #endif
 
+#ifdef CLUSTER_LOCKING_INTERNAL
+	case 3:
+		if (!init_cluster_locking(&_locking, cft))
+			break;
+		log_very_verbose("Cluster locking enabled.");
+		return 1;
+#endif
+
 	default:
 		log_error("Unknown locking type requested.");
 		return 0;
diff --git a/lib/locking/locking_types.h b/lib/locking/locking_types.h
index de8d94490..441e2c309 100644
--- a/lib/locking/locking_types.h
+++ b/lib/locking/locking_types.h
@@ -40,3 +40,4 @@ int init_no_locking(struct locking_type *locking, struct config_tree *cf);
 int init_file_locking(struct locking_type *locking, struct config_tree *cf);
 
 int init_external_locking(struct locking_type *locking, struct config_tree *cf);
+int init_cluster_locking(struct locking_type *locking, struct config_tree *cf);
diff --git a/scripts/clvmd_fix_conf.sh b/scripts/clvmd_fix_conf.sh
new file mode 100644
index 000000000..9e363d521
--- /dev/null
+++ b/scripts/clvmd_fix_conf.sh
@@ -0,0 +1,154 @@
+#!/bin/sh
+#
+# Edit an lvm.conf file to enable cluster locking.
+#
+# $1 is the directory where the locking library is installed.
+# $2 (optional) is the config file
+# $3 (optional) is the locking library name
+#
+#
+PREFIX=$1
+LVMCONF=$2
+LIB=$3
+
+if [ -z "$PREFIX" ]
+then
+  echo "usage: $0 <prefix> [<config file>] [<library>]"
+  echo ""
+  echo "<prefix>      location of the cluster locking shared library. (no default)"
+  echo "<config file> name of the LVM config file (default: /etc/lvm/lvm.conf)"
+  echo "<library>     name of the shared library (default: liblvm2clusterlock.so)"
+  echo ""
+  exit 0
+fi
+
+[ -z "$LVMCONF" ] && LVMCONF="/etc/lvm/lvm.conf"
+[ -z "$LIB" ] && LIB="liblvm2clusterlock.so"
+
+if [ "${PREFIX:0:1}" != "/" ]
+then
+  echo "Prefix must be an absolute path name (starting with a /)"
+  exit 12
+fi
+
+if [ ! -f "$LVMCONF" ]
+then
+  echo "$LVMCONF does not exist"
+  exit 10
+fi
+
+if [ ! -f "$PREFIX/$LIB" ]
+then
+  echo "$PREFIX/$LIB does not exist, did you do a \"make install\" ?"
+  exit 11
+fi
+
+
+SCRIPTFILE=`mktemp -t lvmscript.XXXXXXXXXX`
+TMPFILE=`mktemp -t lvmtmp.XXXXXXXXXX`
+
+
+# Flags so we know which parts of the file we can replace and which need
+# adding. These are return codes from grep, so zero means it IS present!
+have_type=1
+have_dir=1
+have_library=1
+have_global=1
+
+grep -q '^[[:blank:]]*locking_type[[:blank:]]*=' $LVMCONF
+have_type=$?
+
+grep -q '^[[:blank:]]*library_dir[[:blank:]]*=' $LVMCONF
+have_dir=$?
+
+grep -q '^[[:blank:]]*locking_library[[:blank:]]*=' $LVMCONF
+have_library=$?
+
+# Those options are in section "global {" so we must have one if any are present.
+if [ "$have_type" = "0" -o "$have_dir" = "0" -o "$have_library" = "0" ]
+then
+
+    # See if we can find it...
+    grep -q '^[[:blank:]]*global[[:blank:]]*{' $LVMCONF
+    have_global=$?
+    
+    if [ "$have_global" = "1" ] 
+	then
+	echo "global keys but no 'global {' found, can't edit file"
+	exit 12
+    fi
+fi
+
+# So if we don't have "global {" we need to create one and 
+# populate it
+
+if [ "$have_global" = "1" ]
+then
+    cat $LVMCONF - <<EOF > $TMPFILE
+global {
+    # Enable locking for cluster LVM
+    locking_type = 2
+    library_dir = "$PREFIX"
+    locking_library = "$LIB"
+}
+EOF
+    if [ $? != 0 ]
+    then
+	echo "failed to create temporary config file, $LVMCONF not updated"
+	exit 1
+    fi
+else
+    #
+    # We have a "global {" section, so add or replace the
+    # locking entries as appropriate
+    #
+
+    if [ "$have_type" = "0" ] 
+    then
+	SEDCMD=" s/^[[:blank:]]*locking_type[[:blank:]]*=.*/\ \ \ \ locking_type = 2/g"
+    else
+	SEDCMD=" /global[[:blank:]]*{/a\ \ \ \ locking_type = 2"
+    fi
+    
+    if [ "$have_dir" = "0" ] 
+    then
+	SEDCMD="${SEDCMD}\ns'^[[:blank:]]*library_dir[[:blank:]]*=.*'\ \ \ \ library_dir = \"$PREFIX\"'g"
+    else
+	SEDCMD="${SEDCMD}\n/global[[:blank:]]*{/a\ \ \ \ library_dir = \"$PREFIX\""
+    fi
+
+    if [ "$have_library" = "0" ] 
+    then
+	SEDCMD="${SEDCMD}\ns/^[[:blank:]]*locking_library[[:blank:]]*=.*/\ \ \ \ locking_library = \"$LIB\"/g"
+    else
+	SEDCMD="${SEDCMD}\n/global[[:blank:]]*{/a\ \ \ \ locking_library = \"$LIB\""
+    fi
+
+    echo -e $SEDCMD > $SCRIPTFILE
+    sed  <$LVMCONF >$TMPFILE -f $SCRIPTFILE
+    if [ $? != 0 ]
+    then
+	echo "sed failed, $LVMCONF not updated"
+	exit 1
+    fi
+fi
+
+# Now we have a suitably editted config file in a temp place,
+# backup the original and copy our new one into place.
+
+cp $LVMCONF $LVMCONF.nocluster
+if [ $? != 0 ]
+    then
+    echo "failed to backup old config file, $LVMCONF not updated"
+    exit 2
+fi
+
+cp $TMPFILE $LVMCONF
+if [ $? != 0 ]
+    then
+    echo "failed to copy new config file into place, check $LVMCONF is still OK"
+    exit 3
+fi
+
+rm -f $SCRIPTFILE $TMPFILE
+
diff --git a/scripts/clvmd_init b/scripts/clvmd_init
new file mode 100755
index 000000000..31eb8cebc
--- /dev/null
+++ b/scripts/clvmd_init
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+#	/etc/rc.d/init.d/clvmd
+#
+# Starts the clvm daemon
+# NOTE: These startup levels may not be right yet - it depends on where
+#       the rest of the cluster startup goes.
+#
+# chkconfig: 345 72 5
+# description: distributes LVM commands in a clustered environment. \
+#    a clvmd must be run on all nodes in a cluster for clustered LVM \
+#    operations to work.
+# processname: clvmd
+
+# Source function library.
+. /etc/init.d/functions
+
+BINARY=/usr/sbin/clvmd
+LOCKFILE=/var/lock/subsys/clvmd
+
+test -x "$BINARY" || exit 0
+
+RETVAL=0
+
+#
+#	See how we were called.
+#
+
+prog="clvmd"
+
+start() {
+	# Check if clvmd is already running
+	if [ ! -f "$LOCKFILE" ]; then
+	    echo -n $"Starting $prog: "
+	    daemon $BINARY
+	    RETVAL=$?
+	    [ $RETVAL -eq 0 ] && touch $LOCKFILE
+	    echo
+	fi
+	return $RETVAL
+}
+
+stop() {
+	echo -n $"Stopping $prog: "
+	killproc $BINARY
+	RETVAL=$?
+	[ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+	echo
+        return $RETVAL
+}
+
+
+restart() {
+	stop
+	start
+}	
+
+reload() {
+	restart
+}	
+
+status_clvm() {
+ 	status $BINARY
+}
+
+case "$1" in
+start)
+	start
+	;;
+stop)
+	stop
+	;;
+reload|restart)
+	restart
+	;;
+condrestart)
+	if [ -f $LOCKFILE ]; then
+	    restart
+	fi
+	;;
+status)
+	status_clvm
+	;;
+*)
+	echo $"Usage: $0 {start|stop|restart|condrestart|status}"
+	exit 1
+esac
+
+exit $?
+exit $RETVAL