From 83dbfecad7f5a8990c1c2e56a87d6ec36a76c46e Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <sahlberg@ronnie>
Date: Wed, 15 Aug 2007 11:36:01 +1000
Subject: [PATCH] add a description on how the event scripts works to the
 README and make sure it is installed in /etc/ctdb/events.d

(This used to be ctdb commit adec62a924af5bb023f346e705515b09dbe64f21)
---
 ctdb/Makefile.in             |   1 +
 ctdb/config/events.d/README  | 125 +++++++++++++++++++++++++++++++++++
 ctdb/packaging/RPM/ctdb.spec |   1 +
 3 files changed, 127 insertions(+)

diff --git a/ctdb/Makefile.in b/ctdb/Makefile.in
index 3634a4c2a45..2ba7845626d 100644
--- a/ctdb/Makefile.in
+++ b/ctdb/Makefile.in
@@ -157,6 +157,7 @@ install: all
 	${INSTALLCMD} -m 755 config/events $(DESTDIR)$(etcdir)/ctdb
 	${INSTALLCMD} -m 755 config/functions $(DESTDIR)$(etcdir)/ctdb
 	${INSTALLCMD} -m 755 config/statd-callout $(DESTDIR)$(etcdir)/ctdb
+	${INSTALLCMD} -m 755 config/events.d/README $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/10.interface $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/40.vsftpd $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/50.samba $(DESTDIR)$(etcdir)/ctdb/events.d
diff --git a/ctdb/config/events.d/README b/ctdb/config/events.d/README
index 531e6453db9..741d0eb032d 100644
--- a/ctdb/config/events.d/README
+++ b/ctdb/config/events.d/README
@@ -1,4 +1,129 @@
 This directory is where you should put any local or application
 specific event scripts for ctdb to call.
 
+All event scripts start with the prefic 'NN.' where N is a digit.
+The event scripts are run in sequence based on NN.
+Thus 10.interfaces will be run before 60.nfs.
+
+Each NN must be unique and duplicates will cause undefined behaviour.
+I.e. having both 10.interfaces and 10.otherstuff is not allowed.
+
+
+As a special case, any eventscript that ends with a '~' character will be 
+ignored since this is a common postfix that some editors will append to 
+older versions of a file.
+
+
+The eventscripts are called with varying number of arguments.
+The first argument is the "event" and the rest of the arguments depend
+on which event was triggered.
+
+The events currently implemented are
+startup
+	This event does not take any additional arguments.
+	This event is only invoked once, when ctdb is starting up.
+	This event is used to wait for the service to start and all
+	resources for the service becoming available.
+
+	This is used to prevent ctdb from starting up and advertize its
+	services until all dependent services have become available.
+
+	All services that are managed by ctdb should implement this
+	event and use it to start the service.
+
+	Example: 50.samba uses this event to start the samba daemon
+	and then wait until samba and all its associated services have
+	become available. It then also proceeds to wait until all
+	shares have become available.
+
+shutdown
+	This event is called when the ctdb service is shuting down.
+	
+	All services that are managed by ctdb should implement this event
+	and use it to perform a controlled shutdown of the service.
+
+	Example: 60.nfs uses this event to shut down nfs and all associated
+	services and stop exporting any shares when this event is invoked.
+
+monitor
+	This event is invoked every X number of seconds.
+	The interval can be configured using the MonitorInterval tunable
+	but defaults to 15 seconds.
+
+	This event is triggered by ctdb to continously monitor that all
+	managed services are healthy.
+	When invoked, the event script will check that the service is healthy
+	and return 0 if so. If the service is not healthy the event script
+	should return non zero.
+
+	If a service returns nonzero from this script this will cause ctdb
+	to consider the node status as UNHEALTHY and will cause the public
+	address and all associated services to be failed over to a different
+	node in the cluster.
+
+	All managed services should implement this event.
+
+	Example: 10.interfaces which checks that the public interface (if used)
+	is healthy, i.e. it has a physical link established.
+
+takeip
+	This event is triggered everytime the node takes over a public ip
+	address during recovery.
+	This event takes three additional arguments :
+	'interface' 'ipaddress' and 'netmask'
+
+	This event will always be followed by a 'recovered' event onse
+	all ipaddresses have been reassigned to new nodes and the ctdb database
+	has been recovered.
+	If multiple ip addresses are reassigned during recovery it is
+	possible to get several 'takeip' events followed by a single 
+	'recovered' event.
+
+	Since there might involve substantial work for the service when an ip
+	address is taken over and since multiple ip addresses might be taken 
+	over in a single recovery it is often best to only mark which addresses
+	are being taken over in this event and defer the actual work to 
+	reconfigure or restart the services until the 'recovered' event.
+
+	Example: 60.nfs which just records which ip addresses are being taken
+	over into a local state directory   and which defers the actual
+	restart of the services until the 'recovered' event.
+
+
+releaseip
+	This event is triggered everytime the node releases a public ip
+	address during recovery.
+	This event takes three additional arguments :
+	'interface' 'ipaddress' and 'netmask'
+
+	In all other regards this event is analog to the 'takeip' event above.
+
+	Example: 60.nfs
+
+recovered
+	This event is triggered everytime a full ctdb recovery has completed
+	and all public ip addresses have been reassigned among the nodes.
+
+	Example: 60.nfs which if the ip address configuration has changed
+	during the recovery (i.e. if addresses have been taken over or
+	released) will kill off any tcp connections that exist for that
+	service and also send out statd notifications to all registered 
+	clients.
+	
+
+Additional note for takeip, releaseip, recovered:
+
+ALL services that depend on the ip address configuration of the node must 
+implement all three of these services.
+
+ALL services that use TCP should also implement these events and at least
+kill off any tcp connections to the service if the ip address config has 
+changed in a similar fashion to how 60.nfs does it.
+The reason one must do this is that ESTABLISHED tcp connections may survive
+when an ip address is released and removed from the host until the ip address
+is re-takenover.
+Any tcp connections that survive a release/takeip sequence can potentially
+cause the client/server tcp connection to get out of sync with sequence and 
+ack numbers and cause a disruptive ack storm.
+
 
diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec
index 68cad76da1d..1c6b3dfa33c 100644
--- a/ctdb/packaging/RPM/ctdb.spec
+++ b/ctdb/packaging/RPM/ctdb.spec
@@ -94,6 +94,7 @@ fi
 
 %{_sysconfdir}/ctdb/events
 %{_sysconfdir}/ctdb/functions
+%{_sysconfdir}/ctdb/events.d/README
 %{_sysconfdir}/ctdb/events.d/10.interface
 %{_sysconfdir}/ctdb/events.d/40.vsftpd
 %{_sysconfdir}/ctdb/events.d/50.samba