From 76b7361c7e12bd12f5af7d6b5a08b98cac7482a0 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 6 Jun 2007 12:08:42 +1000 Subject: [PATCH] - added monitoring of rpc ports for nfs, and of Samba ports and directories - added monitoring of the ethernet link state When monitoring detects an error, the node loses its public IP address (This used to be ctdb commit 0af57aead8c983511d25774b4ffe09fa5ff26501) --- ctdb/common/ctdb_tunables.c | 2 +- ctdb/config/events.d/10.interface | 12 +++++++ ctdb/config/events.d/50.samba | 9 +++++ ctdb/config/events.d/59.nfslock | 8 +++++ ctdb/config/events.d/60.nfs | 12 +++++++ ctdb/config/functions | 60 +++++++++++++++++++++++++++++-- 6 files changed, 100 insertions(+), 3 deletions(-) diff --git a/ctdb/common/ctdb_tunables.c b/ctdb/common/ctdb_tunables.c index d417d6c2763..c799ce4ec4c 100644 --- a/ctdb/common/ctdb_tunables.c +++ b/ctdb/common/ctdb_tunables.c @@ -36,7 +36,7 @@ static const struct { { "RecoverInterval", 1, offsetof(struct ctdb_tunable, recover_interval) }, { "ElectionTimeout", 3, offsetof(struct ctdb_tunable, election_timeout) }, { "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) }, - { "MonitorInterval", 60, offsetof(struct ctdb_tunable, monitor_interval) }, + { "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) }, }; /* diff --git a/ctdb/config/events.d/10.interface b/ctdb/config/events.d/10.interface index ea28eb70451..a6fcbba9b4e 100755 --- a/ctdb/config/events.d/10.interface +++ b/ctdb/config/events.d/10.interface @@ -6,6 +6,9 @@ # public interface . /etc/ctdb/functions +loadconfig ctdb + +[ -z "$CTDB_PUBLIC_INTERFACE" ] && exit 0 cmd="$1" shift @@ -73,6 +76,15 @@ case $cmd in shutdown) ;; + monitor) + [ -x /usr/sbin/ethtool ] && { + /usr/sbin/ethtool $CTDB_PUBLIC_INTERFACE | grep 'Link detected: yes' > /dev/null || { + echo "`date` ERROR: No link on network interface $CTDB_PUBLIC_INTERFACE" + exit 1 + } + } + ;; + esac exit 0 diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba index affd964c7de..75342f5f0db 100755 --- a/ctdb/config/events.d/50.samba +++ b/ctdb/config/events.d/50.samba @@ -50,6 +50,15 @@ case $cmd in service smb stop service winbind stop ;; + + monitor) + smb_dirs=`testparm -st 2> /dev/null | egrep '^\s*path = ' | cut -d= -f2` + ctdb_check_directories "Samba" $smb_dirs + + smb_ports=`testparm -stv 2> /dev/null | egrep '\s*smb ports =' | cut -d= -f2` + ctdb_check_tcp_ports "Samba" $smb_ports + ;; + esac # ignore unknown commands diff --git a/ctdb/config/events.d/59.nfslock b/ctdb/config/events.d/59.nfslock index 4bdf51f7785..1dba335824d 100755 --- a/ctdb/config/events.d/59.nfslock +++ b/ctdb/config/events.d/59.nfslock @@ -51,6 +51,14 @@ case $cmd in /bin/rm -f /etc/ctdb/state/statd/restart ;; + + monitor) + # check that lockd responds to rpc requests + ctdb_check_rpc "statd" 100024 1 + ctdb_check_rpc "lockd" 100021 1 + ctdb_check_directories "statd" $STATD_SHARED_DIRECTORY + ;; + esac exit 0 diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs index 258a2309d0c..549d87cc233 100755 --- a/ctdb/config/events.d/60.nfs +++ b/ctdb/config/events.d/60.nfs @@ -9,6 +9,8 @@ loadconfig nfs cmd="$1" shift +PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH + case $cmd in startup) mkdir -p /etc/ctdb/state/nfs @@ -45,6 +47,16 @@ case $cmd in /bin/rm -f /etc/ctdb/state/nfs/restart ;; + monitor) + # check that NFS responds to rpc requests + ctdb_check_rpc "NFS" 100003 3 + ctdb_check_rpc "mount" 100005 1 + + # and that its directories are available + nfs_dirs=`grep -v '^#' < /etc/exports | cut -d' ' -f1` + ctdb_check_directories "nfs" $nfs_dirs + ;; + esac exit 0 diff --git a/ctdb/config/functions b/ctdb/config/functions index f557d629107..4219f223838 100644 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -28,7 +28,7 @@ service() { ###################################################### # wait for a set of tcp ports -# usage: ctdb_wait_tcp_ports SERICE_NAME +# usage: ctdb_wait_tcp_ports SERVICE_NAME ###################################################### ctdb_wait_tcp_ports() { service_name="$1" @@ -59,9 +59,10 @@ ctdb_wait_tcp_ports() { } + ###################################################### # wait for a set of directories -# usage: ctdb_wait_directories SERICE_NAME +# usage: ctdb_wait_directories SERVICE_NAME ###################################################### ctdb_wait_directories() { service_name="$1" @@ -84,3 +85,58 @@ ctdb_wait_directories() { echo "`/bin/date` Local directories for $service_name are available" } + +###################################################### +# check that a rpc server is registered with portmap +# and responding to requests +# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION +###################################################### +ctdb_check_rpc() { + service_name="$1" + prognum="$2" + version="$3" + rpcinfo -u localhost $prognum $version > /dev/null || { + echo "`date` ERROR: $service_name not responding to rpc requests" + exit 1 + } +} + +###################################################### +# check a set of directories is available +# usage: ctdb_check_directories SERVICE_NAME +###################################################### +ctdb_check_directories() { + service_name="$1" + shift + wait_dirs="$*" + [ -z "$wait_dirs" ] && return; + for d in $wait_dirs; do + [ -d $d ] || { + echo "`date` ERROR: $service_name directory $d not available" + exit 1 + } + done +} + +###################################################### +# check a set of tcp ports +# usage: ctdb_check_tcp_ports SERVICE_NAME +###################################################### +ctdb_check_tcp_ports() { + service_name="$1" + shift + wait_ports="$*" + [ -z "$wait_ports" ] && return; + for p in $wait_ports; do + all_ok=1 + if [ -x /usr/bin/netcat ]; then + /usr/bin/netcat -z 127.0.0.1 $p || all_ok=0 + elif [ -x /usr/bin/nc ]; then + /usr/bin/nc -z 127.0.0.1 $p || all_ok=0 + fi + [ $all_ok -eq 1 ] || { + echo "`date` ERROR: $service_name tcp port $p is not responding" + exit 1 + } + done +}