mirror of
https://github.com/samba-team/samba.git
synced 2024-12-23 17:34:34 +03:00
4697829e7c
Commit a0f5148ac749758e2dfbd6099e829c5bf1d900e6 caused a subtle regression. Due to the subtlety, this description is much longer than the 1 line patch that fixes it! The regression, where a process that invokes onnode is unexpectedly blocked, is only apparent if the following conditions are met: 1. $CTDB_NODES_SOCKETS is set; 2. The command passed to onnode attempts to background a process; and 3. onnode is run in certain types of subshell (e.g. foo=$(onnode ...)). In particular, when testing against local daemons (i.e. condition (1) is met), tests/simple/07_ctdb_process_exists.sh would fail (because it does both (2), (3)). The problem is caused by the use of file descriptor 3 in the code that allows separate filtering of stdout and stderr. A backgrounded process will have this descriptor open and the $(...) construct appears to wait for all file descriptors to be closed. This only happens with local daemons because SSH is replaced by a shell and file descriptor 3 leaks into that shell. It does not occur when SSH is used because the file descriptor does not leak into the remote shell where the process is backgrounded. The fix is simply to redirect file descriptor 3 to /dev/null in the fakessh function, which is used when $CTDB_NODES_SOCKETS is set. Also fixed is another minor bug when the -o option and $CTDB_NODES_SOCKETS are used in combination. The code uses the node name as a suffix for the output filename(s). Usually this is an IP address. However, when $CTDB_NODES_SOCKETS is in use the node name is the socket name, which might be a path several directories deep. Each output file is created via a simple redirection and this would fail if unexpected directories appear in the filename. 3 possible fixes were considered: 1. Replace all '/'s in the node name by '_'s. Nice and simple. 2. Use the basename of the node name. However, sockets may be in different directories but have the same basename. 3. Create all required directories before redirecting. This is a little more complex and probably doesn't meet the user's expectations. Option (1) is implemented here. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit c97d56d93d9c1007a4e85affb19ed0c2d0e11b6d)
330 lines
7.6 KiB
Bash
Executable File
330 lines
7.6 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Run commands on CTDB nodes.
|
|
|
|
# See http://ctdb.samba.org/ for more information about CTDB.
|
|
|
|
# Copyright (C) Martin Schwenke 2008
|
|
|
|
# Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
|
|
|
|
# Copyright (C) Andrew Tridgell 2007
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
prog=$(basename $0)
|
|
|
|
usage ()
|
|
{
|
|
cat >&2 <<EOF
|
|
Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
|
|
options:
|
|
-c Run in current working directory on specified nodes.
|
|
-o <prefix> Save standard output from each node to file <prefix>.<ip>
|
|
-p Run command in parallel on specified nodes.
|
|
-q Do not print node addresses (overrides -v).
|
|
-v Print node address even for a single node.
|
|
<NODES> "all", "ok" (or "healthy"), "con" (or "connected"),
|
|
"rm" (or "recmaster"), "lvs" (or "lvsmaster"),
|
|
"natgw" (or "natgwlist");
|
|
or a node number (0 base); or
|
|
list (comma separated) of <NODES>; or
|
|
range (hyphen separated) of node numbers.
|
|
EOF
|
|
exit 1
|
|
|
|
}
|
|
|
|
invalid_nodespec ()
|
|
{
|
|
echo "Invalid <nodespec>" >&2 ; echo >&2
|
|
usage
|
|
}
|
|
|
|
# Defaults.
|
|
current=false
|
|
parallel=false
|
|
verbose=false
|
|
quiet=false
|
|
prefix=""
|
|
|
|
parse_options ()
|
|
{
|
|
# $POSIXLY_CORRECT means that the command passed to onnode can
|
|
# take options and getopt won't reorder things to make them
|
|
# options ot onnode.
|
|
local temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cho:pqv" -l help -- "$@")
|
|
|
|
[ $? != 0 ] && usage
|
|
|
|
eval set -- "$temp"
|
|
|
|
while true ; do
|
|
case "$1" in
|
|
-c) current=true ; shift ;;
|
|
-o) prefix="$2" ; shift 2 ;;
|
|
-p) parallel=true ; shift ;;
|
|
-q) quiet=true ; shift ;;
|
|
-v) verbose=true ; shift ;;
|
|
--) shift ; break ;;
|
|
-h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
|
|
esac
|
|
done
|
|
|
|
[ $# -lt 2 ] && usage
|
|
|
|
nodespec="$1" ; shift
|
|
command="$@"
|
|
}
|
|
|
|
echo_nth ()
|
|
{
|
|
local n="$1" ; shift
|
|
|
|
shift $n
|
|
local node="$1"
|
|
|
|
if [ -n "$node" -a "$node" != "#DEAD" ] ; then
|
|
echo $node
|
|
else
|
|
echo "${prog}: \"node ${n}\" does not exist" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
parse_nodespec ()
|
|
{
|
|
# Subshell avoids hacks to restore $IFS.
|
|
(
|
|
IFS=","
|
|
for i in $1 ; do
|
|
case "$i" in
|
|
*-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
|
|
# Separate lines for readability.
|
|
all|ok|healthy|con|connected) echo "$i" ;;
|
|
rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
|
|
*)
|
|
[ $i -gt -1 ] 2>/dev/null || invalid_nodespec
|
|
echo $i
|
|
esac
|
|
done
|
|
)
|
|
}
|
|
|
|
ctdb_status_output="" # cache
|
|
get_nodes_with_status ()
|
|
{
|
|
local all_nodes="$1"
|
|
local status="$2"
|
|
|
|
local bits
|
|
case "$status" in
|
|
healthy)
|
|
bits="0:0:0:0"
|
|
;;
|
|
connected)
|
|
bits="0:[0-1]:[0-1]:[0-1]"
|
|
;;
|
|
*)
|
|
invalid_nodespec
|
|
esac
|
|
|
|
if [ -z "$ctdb_status_output" ] ; then
|
|
# FIXME: need to do something if $CTDB_NODES_SOCKETS is set.
|
|
ctdb_status_output=$(ctdb -Y status 2>/dev/null)
|
|
if [ $? -ne 0 ] ; then
|
|
echo "${prog}: unable to get status of CTDB nodes" >&2
|
|
exit 1
|
|
fi
|
|
ctdb_status_output="${ctdb_status_output#* }"
|
|
fi
|
|
|
|
local nodes=""
|
|
local i
|
|
for i in $ctdb_status_output ; do
|
|
# Try removing bits from end.
|
|
local t="${i%:${bits}:}"
|
|
if [ "$t" != "$i" ] ; then
|
|
# Succeeded. Get address. NOTE: this is an optimisation.
|
|
# It might be better to get the node number and then get
|
|
# the nth node to get the address. This would make things
|
|
# more consistent if /etc/ctdb/nodes actually contained
|
|
# hostnames.
|
|
nodes="${nodes} ${t##*:}"
|
|
fi
|
|
done
|
|
|
|
echo $nodes
|
|
}
|
|
|
|
ctdb_props="" # cache
|
|
get_node_with_property ()
|
|
{
|
|
local all_nodes="$1"
|
|
local prop="$2"
|
|
|
|
local prop_node=""
|
|
if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
|
|
prop_node=$(ctdb "$prop" -Y 2>/dev/null)
|
|
# We only want the first line.
|
|
local nl="
|
|
"
|
|
prop_node="${prop_node%%${nl}*}"
|
|
if [ $? -eq 0 ] ; then
|
|
ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
|
|
else
|
|
prop_node=""
|
|
fi
|
|
else
|
|
prop_node="${ctdb_props##:${prop}:}"
|
|
prop_node="${prop_node%% *}"
|
|
fi
|
|
if [ -n "$prop_node" ] ; then
|
|
echo_nth "$prop_node" $all_nodes
|
|
else
|
|
echo "${prog}: No ${prop} available" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
get_nodes ()
|
|
{
|
|
local all_nodes
|
|
|
|
if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
|
all_nodes="$CTDB_NODES_SOCKETS"
|
|
else
|
|
[ -f "$CTDB_NODES_FILE" ] || CTDB_NODES_FILE=/etc/ctdb/nodes
|
|
all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' $CTDB_NODES_FILE)
|
|
fi
|
|
|
|
local nodes=""
|
|
local n
|
|
for n in $(parse_nodespec "$1") ; do
|
|
[ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
|
case "$n" in
|
|
all)
|
|
echo "${all_nodes//#DEAD/}"
|
|
;;
|
|
ok|healthy)
|
|
get_nodes_with_status "$all_nodes" "healthy" || exit 1
|
|
;;
|
|
con|connected)
|
|
get_nodes_with_status "$all_nodes" "connected" || exit 1
|
|
;;
|
|
rm|recmaster)
|
|
get_node_with_property "$all_nodes" "recmaster" || exit 1
|
|
;;
|
|
lvs|lvsmaster)
|
|
get_node_with_property "$all_nodes" "lvsmaster" || exit 1
|
|
;;
|
|
natgw|natgwlist)
|
|
get_node_with_property "$all_nodes" "natgwlist" || exit 1
|
|
;;
|
|
*)
|
|
echo_nth $n $all_nodes
|
|
esac
|
|
|
|
done
|
|
}
|
|
|
|
fakessh ()
|
|
{
|
|
CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
|
|
}
|
|
|
|
stdout_filter ()
|
|
{
|
|
if [ -n "$prefix" ] ; then
|
|
cat >"${prefix}.${n//\//_}"
|
|
elif $verbose && $parallel ; then
|
|
sed -e "s@^@[$n] @"
|
|
else
|
|
cat
|
|
fi
|
|
}
|
|
|
|
stderr_filter ()
|
|
{
|
|
if $verbose && $parallel ; then
|
|
sed -e "s@^@[$n] @"
|
|
else
|
|
cat
|
|
fi
|
|
}
|
|
|
|
######################################################################
|
|
|
|
parse_options "$@"
|
|
|
|
$current && command="cd $PWD && $command"
|
|
|
|
ssh_opts=
|
|
if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
|
SSH=fakessh
|
|
else
|
|
# Could "2>/dev/null || true" but want to see errors from typos in file.
|
|
[ -r /etc/ctdb/onnode.conf ] && . /etc/ctdb/onnode.conf
|
|
[ -n "$SSH" ] || SSH=ssh
|
|
if [ "$SSH" = "ssh" ] ; then
|
|
ssh_opts="-n"
|
|
else
|
|
: # rsh? All bets are off!
|
|
fi
|
|
fi
|
|
|
|
######################################################################
|
|
|
|
nodes=$(get_nodes "$nodespec")
|
|
[ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
|
|
|
if $quiet ; then
|
|
verbose=false
|
|
else
|
|
# If $nodes contains a space or a newline then assume multiple nodes.
|
|
nl="
|
|
"
|
|
[ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
|
|
fi
|
|
|
|
pids=""
|
|
trap 'kill -TERM $pids 2>/dev/null' INT TERM
|
|
# There's a small race here where the kill can fail if no processes
|
|
# have been added to $pids and the script is interrupted. However,
|
|
# the part of the window where it matter is very small.
|
|
retcode=0
|
|
for n in $nodes ; do
|
|
set -o pipefail 2>/dev/null
|
|
if $parallel ; then
|
|
{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
|
|
pids="${pids} $!"
|
|
else
|
|
if $verbose ; then
|
|
echo >&2 ; echo ">> NODE: $n <<" >&2
|
|
fi
|
|
|
|
{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
|
|
[ $? = 0 ] || retcode=$?
|
|
fi
|
|
done
|
|
|
|
$parallel && {
|
|
for p in $pids; do
|
|
wait $p
|
|
[ $? = 0 ] || retcode=$?
|
|
done
|
|
}
|
|
|
|
exit $retcode
|