mirror of
https://github.com/samba-team/samba.git
synced 2024-12-23 17:34:34 +03:00
021892346c
In testing and other situations (e.g. eventscripts) it is necessary to select a node where a ctdb command can be run. The whole idea here is to avoid nodes where ctdbd is not running and where most ctdb commands would fail. This implements a standard way of doing this involving a recursive onnode command. There is still a small window for a race, where the selected node is suddenly shutdown, but this is unavoidable. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit fb47cce86c0edae5caaf485f13ae7a151b6cb00d)
351 lines
8.0 KiB
Bash
Executable File
351 lines
8.0 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Run commands on CTDB nodes.
|
|
|
|
# See http://ctdb.samba.org/ for more information about CTDB.
|
|
|
|
# Copyright (C) Martin Schwenke 2008
|
|
|
|
# Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
|
|
|
|
# Copyright (C) Andrew Tridgell 2007
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
prog=$(basename $0)
|
|
|
|
usage ()
|
|
{
|
|
cat >&2 <<EOF
|
|
Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
|
|
options:
|
|
-c Run in current working directory on specified nodes.
|
|
-o <prefix> Save standard output from each node to file <prefix>.<ip>
|
|
-p Run command in parallel on specified nodes.
|
|
-q Do not print node addresses (overrides -v).
|
|
-v Print node address even for a single node.
|
|
<NODES> "all", "any", "ok" (or "healthy"), "con" (or "connected"),
|
|
"rm" (or "recmaster"), "lvs" (or "lvsmaster"),
|
|
"natgw" (or "natgwlist");
|
|
or a node number (0 base); or
|
|
list (comma separated) of <NODES>; or
|
|
range (hyphen separated) of node numbers.
|
|
EOF
|
|
exit 1
|
|
|
|
}
|
|
|
|
invalid_nodespec ()
|
|
{
|
|
echo "Invalid <nodespec>" >&2 ; echo >&2
|
|
usage
|
|
}
|
|
|
|
# Defaults.
|
|
current=false
|
|
parallel=false
|
|
verbose=false
|
|
quiet=false
|
|
prefix=""
|
|
|
|
parse_options ()
|
|
{
|
|
# $POSIXLY_CORRECT means that the command passed to onnode can
|
|
# take options and getopt won't reorder things to make them
|
|
# options ot onnode.
|
|
local temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cho:pqv" -l help -- "$@")
|
|
|
|
[ $? != 0 ] && usage
|
|
|
|
eval set -- "$temp"
|
|
|
|
while true ; do
|
|
case "$1" in
|
|
-c) current=true ; shift ;;
|
|
-o) prefix="$2" ; shift 2 ;;
|
|
-p) parallel=true ; shift ;;
|
|
-q) quiet=true ; shift ;;
|
|
-v) verbose=true ; shift ;;
|
|
--) shift ; break ;;
|
|
-h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
|
|
esac
|
|
done
|
|
|
|
[ $# -lt 2 ] && usage
|
|
|
|
nodespec="$1" ; shift
|
|
command="$@"
|
|
}
|
|
|
|
echo_nth ()
|
|
{
|
|
local n="$1" ; shift
|
|
|
|
shift $n
|
|
local node="$1"
|
|
|
|
if [ -n "$node" -a "$node" != "#DEAD" ] ; then
|
|
echo $node
|
|
else
|
|
echo "${prog}: \"node ${n}\" does not exist" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
parse_nodespec ()
|
|
{
|
|
# Subshell avoids hacks to restore $IFS.
|
|
(
|
|
IFS=","
|
|
for i in $1 ; do
|
|
case "$i" in
|
|
*-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
|
|
# Separate lines for readability.
|
|
all|any|ok|healthy|con|connected) echo "$i" ;;
|
|
rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
|
|
*)
|
|
[ $i -gt -1 ] 2>/dev/null || invalid_nodespec
|
|
echo $i
|
|
esac
|
|
done
|
|
)
|
|
}
|
|
|
|
ctdb_status_output="" # cache
|
|
get_nodes_with_status ()
|
|
{
|
|
local all_nodes="$1"
|
|
local status="$2"
|
|
|
|
local bits
|
|
case "$status" in
|
|
healthy)
|
|
bits="0:0:0:0:0"
|
|
;;
|
|
connected)
|
|
bits="0:[0-1]:[0-1]:[0-1]:[0-1]"
|
|
;;
|
|
*)
|
|
invalid_nodespec
|
|
esac
|
|
|
|
if [ -z "$ctdb_status_output" ] ; then
|
|
# FIXME: need to do something if $CTDB_NODES_SOCKETS is set.
|
|
ctdb_status_output=$(ctdb -Y status 2>/dev/null)
|
|
if [ $? -ne 0 ] ; then
|
|
echo "${prog}: unable to get status of CTDB nodes" >&2
|
|
exit 1
|
|
fi
|
|
ctdb_status_output="${ctdb_status_output#* }"
|
|
fi
|
|
|
|
local nodes=""
|
|
local i
|
|
for i in $ctdb_status_output ; do
|
|
# Try removing bits from end.
|
|
local t="${i%:${bits}:}"
|
|
if [ "$t" != "$i" ] ; then
|
|
# Succeeded. Get address. NOTE: this is an optimisation.
|
|
# It might be better to get the node number and then get
|
|
# the nth node to get the address. This would make things
|
|
# more consistent if /etc/ctdb/nodes actually contained
|
|
# hostnames.
|
|
nodes="${nodes} ${t##*:}"
|
|
fi
|
|
done
|
|
|
|
echo $nodes
|
|
}
|
|
|
|
ctdb_props="" # cache
|
|
get_node_with_property ()
|
|
{
|
|
local all_nodes="$1"
|
|
local prop="$2"
|
|
|
|
local prop_node=""
|
|
if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
|
|
prop_node=$(ctdb "$prop" -Y 2>/dev/null)
|
|
# We only want the first line.
|
|
local nl="
|
|
"
|
|
prop_node="${prop_node%%${nl}*}"
|
|
if [ $? -eq 0 ] ; then
|
|
ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
|
|
else
|
|
prop_node=""
|
|
fi
|
|
else
|
|
prop_node="${ctdb_props##:${prop}:}"
|
|
prop_node="${prop_node%% *}"
|
|
fi
|
|
if [ -n "$prop_node" ] ; then
|
|
echo_nth "$prop_node" $all_nodes
|
|
else
|
|
echo "${prog}: No ${prop} available" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
get_any_available_node ()
|
|
{
|
|
local all_nodes="$1"
|
|
|
|
# We do a recursive onnode to find which nodes are up and running.
|
|
local out=$($0 -pq all ctdb pnn 2>&1)
|
|
local line
|
|
while read line ; do
|
|
local pnn="${line#PNN:}"
|
|
if [ "$pnn" != "$line" ] ; then
|
|
echo_nth "$pnn" $all_nodes
|
|
return 0
|
|
fi
|
|
# Else must be an error message from a down node.
|
|
done <<<"$out"
|
|
return 1
|
|
}
|
|
|
|
get_nodes ()
|
|
{
|
|
local all_nodes
|
|
|
|
if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
|
all_nodes="$CTDB_NODES_SOCKETS"
|
|
else
|
|
[ -f "$CTDB_NODES_FILE" ] || CTDB_NODES_FILE=/etc/ctdb/nodes
|
|
all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' $CTDB_NODES_FILE)
|
|
fi
|
|
|
|
local nodes=""
|
|
local n
|
|
for n in $(parse_nodespec "$1") ; do
|
|
[ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
|
case "$n" in
|
|
all)
|
|
echo "${all_nodes//#DEAD/}"
|
|
;;
|
|
any)
|
|
get_any_available_node "$all_nodes" || exit 1
|
|
;;
|
|
ok|healthy)
|
|
get_nodes_with_status "$all_nodes" "healthy" || exit 1
|
|
;;
|
|
con|connected)
|
|
get_nodes_with_status "$all_nodes" "connected" || exit 1
|
|
;;
|
|
rm|recmaster)
|
|
get_node_with_property "$all_nodes" "recmaster" || exit 1
|
|
;;
|
|
lvs|lvsmaster)
|
|
get_node_with_property "$all_nodes" "lvsmaster" || exit 1
|
|
;;
|
|
natgw|natgwlist)
|
|
get_node_with_property "$all_nodes" "natgwlist" || exit 1
|
|
;;
|
|
*)
|
|
echo_nth $n $all_nodes
|
|
esac
|
|
|
|
done
|
|
}
|
|
|
|
fakessh ()
|
|
{
|
|
CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
|
|
}
|
|
|
|
stdout_filter ()
|
|
{
|
|
if [ -n "$prefix" ] ; then
|
|
cat >"${prefix}.${n//\//_}"
|
|
elif $verbose && $parallel ; then
|
|
sed -e "s@^@[$n] @"
|
|
else
|
|
cat
|
|
fi
|
|
}
|
|
|
|
stderr_filter ()
|
|
{
|
|
if $verbose && $parallel ; then
|
|
sed -e "s@^@[$n] @"
|
|
else
|
|
cat
|
|
fi
|
|
}
|
|
|
|
######################################################################
|
|
|
|
parse_options "$@"
|
|
|
|
$current && command="cd $PWD && $command"
|
|
|
|
ssh_opts=
|
|
if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
|
SSH=fakessh
|
|
else
|
|
# Could "2>/dev/null || true" but want to see errors from typos in file.
|
|
[ -r /etc/ctdb/onnode.conf ] && . /etc/ctdb/onnode.conf
|
|
[ -n "$SSH" ] || SSH=ssh
|
|
if [ "$SSH" = "ssh" ] ; then
|
|
ssh_opts="-n"
|
|
else
|
|
: # rsh? All bets are off!
|
|
fi
|
|
fi
|
|
|
|
######################################################################
|
|
|
|
nodes=$(get_nodes "$nodespec")
|
|
[ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
|
|
|
if $quiet ; then
|
|
verbose=false
|
|
else
|
|
# If $nodes contains a space or a newline then assume multiple nodes.
|
|
nl="
|
|
"
|
|
[ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
|
|
fi
|
|
|
|
pids=""
|
|
trap 'kill -TERM $pids 2>/dev/null' INT TERM
|
|
# There's a small race here where the kill can fail if no processes
|
|
# have been added to $pids and the script is interrupted. However,
|
|
# the part of the window where it matter is very small.
|
|
retcode=0
|
|
for n in $nodes ; do
|
|
set -o pipefail 2>/dev/null
|
|
if $parallel ; then
|
|
{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
|
|
pids="${pids} $!"
|
|
else
|
|
if $verbose ; then
|
|
echo >&2 ; echo ">> NODE: $n <<" >&2
|
|
fi
|
|
|
|
{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
|
|
[ $? = 0 ] || retcode=$?
|
|
fi
|
|
done
|
|
|
|
$parallel && {
|
|
for p in $pids; do
|
|
wait $p
|
|
[ $? = 0 ] || retcode=$?
|
|
done
|
|
}
|
|
|
|
exit $retcode
|