samba-mirror/onnode at 021892346cd61452ddae0ccdb5f631c666a28e16

mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00

Martin Schwenke 021892346c onnode: add "any" nodespec to select any node with running CTDB.

In testing and other situations (e.g. eventscripts) it is necessary to
select a node where a ctdb command can be run.  The whole idea here is
to avoid nodes where ctdbd is not running and where most ctdb commands
would fail.  This implements a standard way of doing this involving a
recursive onnode command.

There is still a small window for a race, where the selected node is
suddenly shutdown, but this is unavoidable.

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit fb47cce86c0edae5caaf485f13ae7a151b6cb00d)

2009-09-08 15:10:20 +10:00

351 lines

8.0 KiB

Bash

Executable File

Raw Blame History

 #!/bin/bash
 # Run commands on CTDB nodes.
 # See http://ctdb.samba.org/ for more information about CTDB.
 # Copyright (C) Martin Schwenke  2008
 # Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
 # Copyright (C) Andrew Tridgell  2007
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, see <http://www.gnu.org/licenses/>.
 prog=$(basename $0)
 usage ()
 {
     cat >&2 <<EOF
 Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
   options:
     -c          Run in current working directory on specified nodes.
     -o <prefix> Save standard output from each node to file <prefix>.<ip>
     -p          Run command in parallel on specified nodes.
     -q          Do not print node addresses (overrides -v).
     -v          Print node address even for a single node.
   <NODES>       "all", "any", "ok" (or "healthy"), "con" (or "connected"),
                 "rm" (or "recmaster"), "lvs" (or "lvsmaster"),
                 "natgw" (or "natgwlist");
                 or a node number (0 base); or
                 list (comma separated) of <NODES>; or
                 range (hyphen separated) of node numbers.
 EOF
     exit 1
 }
 invalid_nodespec ()
 {
     echo "Invalid <nodespec>" >&2 ; echo >&2
     usage
 }
 # Defaults.
 current=false
 parallel=false
 verbose=false
 quiet=false
 prefix=""
 parse_options ()
 {
     # $POSIXLY_CORRECT means that the command passed to onnode can
     # take options and getopt won't reorder things to make them
     # options ot onnode.
     local temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cho:pqv" -l help -- "$@")
     [ $? != 0 ] && usage
     eval set -- "$temp"
     while true ; do
 	case "$1" in
 	    -c) current=true ; shift ;;
 	    -o) prefix="$2" ; shift 2 ;;
 	    -p) parallel=true ; shift ;;
 	    -q) quiet=true ; shift ;;
 	    -v) verbose=true ; shift ;;
 	    --) shift ; break ;;
 	    -h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
 	esac
     done
     [ $# -lt 2 ] && usage
     nodespec="$1" ; shift
     command="$@"
 }
 echo_nth ()
 {
     local n="$1" ; shift
     shift $n
     local node="$1"
     if [ -n "$node" -a "$node" != "#DEAD" ] ; then
 	echo $node
     else
 	echo "${prog}: \"node ${n}\" does not exist" >&2
 	exit 1
     fi
 }
 parse_nodespec ()
 {
     # Subshell avoids hacks to restore $IFS.
     (
 	IFS=","
 	for i in $1 ; do
 	    case "$i" in
 		*-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
 		# Separate lines for readability.
 		all|any|ok|healthy|con|connected) echo "$i" ;;
 		rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
 		*)
 		    [ $i -gt -1 ] 2>/dev/null || invalid_nodespec
 		    echo $i
 	    esac
 	done
     )
 }
 ctdb_status_output="" # cache
 get_nodes_with_status ()
 {
     local all_nodes="$1"
     local status="$2"
     local bits
     case "$status" in
 	healthy)
 	    bits="0:0:0:0:0"
 	    ;;
 	connected)
 	    bits="0:[0-1]:[0-1]:[0-1]:[0-1]"
 	    ;;
 	*)
 	    invalid_nodespec
     esac
     if [ -z "$ctdb_status_output" ] ; then
 	# FIXME: need to do something if $CTDB_NODES_SOCKETS is set.
 	ctdb_status_output=$(ctdb -Y status 2>/dev/null)
 	if [ $? -ne 0 ] ; then
 	    echo "${prog}: unable to get status of CTDB nodes" >&2
 	    exit 1
 	fi
 	ctdb_status_output="${ctdb_status_output#* }"
     fi
     local nodes=""
     local i
     for i in $ctdb_status_output ; do
 	# Try removing bits from end.
 	local t="${i%:${bits}:}"
 	if [ "$t" != "$i" ] ; then
 	    # Succeeded.  Get address.  NOTE: this is an optimisation.
 	    # It might be better to get the node number and then get
 	    # the nth node to get the address.  This would make things
 	    # more consistent if /etc/ctdb/nodes actually contained
 	    # hostnames.
 	    nodes="${nodes} ${t##*:}"
 	fi
     done
     echo $nodes
 }
 ctdb_props="" # cache
 get_node_with_property ()
 {
     local all_nodes="$1"
     local prop="$2"
     local prop_node=""
     if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
 	prop_node=$(ctdb "$prop" -Y 2>/dev/null)
 	# We only want the first line.
 	local nl="
 "
 	prop_node="${prop_node%%${nl}*}"
 	if [ $? -eq 0 ] ; then
 	    ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
 	else
 	    prop_node=""
 	fi
     else
 	prop_node="${ctdb_props##:${prop}:}"
 	prop_node="${prop_node%% *}"
     fi
     if [ -n "$prop_node" ] ; then
 	echo_nth "$prop_node" $all_nodes
     else
 	echo "${prog}: No ${prop} available" >&2
 	exit 1
     fi
 }
 get_any_available_node ()
 {
     local all_nodes="$1"
     # We do a recursive onnode to find which nodes are up and running.
     local out=$($0 -pq all ctdb pnn 2>&1)
     local line
     while read line ; do
 	local pnn="${line#PNN:}"
 	if [ "$pnn" != "$line" ] ; then
 	    echo_nth "$pnn" $all_nodes
 	    return 0
 	fi
 	# Else must be an error message from a down node.
     done <<<"$out"
     return 1
 }
 get_nodes ()
 {
     local all_nodes
     if [ -n "$CTDB_NODES_SOCKETS" ] ; then
 	all_nodes="$CTDB_NODES_SOCKETS"
     else
 	[ -f "$CTDB_NODES_FILE" ] || CTDB_NODES_FILE=/etc/ctdb/nodes
 	all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' $CTDB_NODES_FILE)
     fi
     local nodes=""
     local n
     for n in $(parse_nodespec "$1") ; do
 	[ $? != 0 ] && exit 1  # Required to catch exit in above subshell.
 	case "$n" in
 	    all)
 		echo "${all_nodes//#DEAD/}"
 		;;
 	    any)
 		get_any_available_node "$all_nodes" || exit 1
 		;;
 	    ok|healthy)
 		get_nodes_with_status "$all_nodes" "healthy" || exit 1
 		;;
 	    con|connected)
 		get_nodes_with_status "$all_nodes" "connected" || exit 1
 		;;
 	    rm|recmaster)
 		get_node_with_property "$all_nodes" "recmaster" || exit 1
 		;;
 	    lvs|lvsmaster)
 		get_node_with_property "$all_nodes" "lvsmaster" || exit 1
 		;;
 	    natgw|natgwlist)
 		get_node_with_property "$all_nodes" "natgwlist" || exit 1
 		;;
 	    *)
 		echo_nth $n $all_nodes
 	esac
     done
 }
 fakessh ()
 {
     CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
 }
 stdout_filter ()
 {
     if [ -n "$prefix" ] ; then
 	cat >"${prefix}.${n//\//_}"
     elif $verbose && $parallel ; then
 	sed -e "s@^@[$n] @"
     else
 	cat
     fi
 }
 stderr_filter ()
 {
     if $verbose && $parallel ; then
 	sed -e "s@^@[$n] @"
     else
 	cat
     fi
 }
 ######################################################################
 parse_options "$@"
 $current && command="cd $PWD && $command"
 ssh_opts=
 if [ -n "$CTDB_NODES_SOCKETS" ] ; then
     SSH=fakessh
 else
     # Could "2>/dev/null || true" but want to see errors from typos in file.
     [ -r /etc/ctdb/onnode.conf ] && . /etc/ctdb/onnode.conf
     [ -n "$SSH" ] || SSH=ssh
     if [ "$SSH" = "ssh" ] ; then
 	ssh_opts="-n"
     else
 	: # rsh? All bets are off!
     fi
 fi
 ######################################################################
 nodes=$(get_nodes "$nodespec")
 [ $? != 0 ] && exit 1   # Required to catch exit in above subshell.
 if $quiet ; then
     verbose=false
 else
     # If $nodes contains a space or a newline then assume multiple nodes.
     nl="
 "
     [ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
 fi
 pids=""
 trap 'kill -TERM $pids 2>/dev/null' INT TERM
 # There's a small race here where the kill can fail if no processes
 # have been added to $pids and the script is interrupted.  However,
 # the part of the window where it matter is very small.
 retcode=0
 for n in $nodes ; do
     set -o pipefail 2>/dev/null
     if $parallel ; then
 	{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
 	pids="${pids} $!"
     else
 	if $verbose ; then
 	    echo >&2 ; echo ">> NODE: $n <<" >&2
 	fi
 	{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
 	[ $? = 0 ] || retcode=$?
     fi
 done
 $parallel && {
     for p in $pids; do
 	wait $p
 	[ $? = 0 ] || retcode=$?
     done
 }
 exit $retcode

351 lines 8.0 KiB Bash Executable File Raw Blame History

351 lines

8.0 KiB

Bash

Executable File

Raw Blame History