1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-05 21:57:51 +03:00

Merge remote-tracking branch 'origin/master' into eventscripts_relative

(This used to be ctdb commit b723f23fc9c38e75b91d43306d606be26c55d31d)
This commit is contained in:
Martin Schwenke 2011-08-08 13:25:40 +10:00
commit 46c908d542
38 changed files with 1668 additions and 287 deletions

View File

@ -81,12 +81,17 @@ static void dump_packet(unsigned char *data, size_t len)
/*
called when an incoming connection is readable
This function MUST be safe for reentry via the queue callback!
*/
static void queue_io_read(struct ctdb_queue *queue)
{
int num_ready = 0;
ssize_t nread, totread, partlen;
uint8_t *data, *data_base;
uint32_t sz_bytes_req;
uint32_t pkt_size;
uint32_t pkt_bytes_remaining;
uint32_t to_read;
ssize_t nread;
uint8_t *data;
if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
return;
@ -96,93 +101,77 @@ static void queue_io_read(struct ctdb_queue *queue)
goto failed;
}
queue->partial.data = talloc_realloc_size(queue, queue->partial.data,
num_ready + queue->partial.length);
if (queue->partial.data == NULL) {
DEBUG(DEBUG_ERR,("%s: read error alloc failed for %u\n",
queue->name, num_ready + queue->partial.length));
goto failed;
/* starting fresh, allocate buf for size bytes */
sz_bytes_req = sizeof(pkt_size);
queue->partial.data = talloc_size(queue, sz_bytes_req);
if (queue->partial.data == NULL) {
DEBUG(DEBUG_ERR,("read error alloc failed for %u\n",
sz_bytes_req));
goto failed;
}
} else if (queue->partial.length < sizeof(pkt_size)) {
/* yet to find out the packet length */
sz_bytes_req = sizeof(pkt_size) - queue->partial.length;
} else {
/* partial packet, length known, full buf allocated */
sz_bytes_req = 0;
}
nread = read(queue->fd, queue->partial.data + queue->partial.length, num_ready);
if (nread <= 0) {
DEBUG(DEBUG_ERR,("%s: read error nread=%d\n",
queue->name, (int)nread));
goto failed;
}
totread = nread;
partlen = queue->partial.length;
data = queue->partial.data;
nread += queue->partial.length;
if (sz_bytes_req > 0) {
to_read = MIN(sz_bytes_req, num_ready);
nread = read(queue->fd, data + queue->partial.length,
to_read);
if (nread <= 0) {
DEBUG(DEBUG_ERR,("read error nread=%d\n", (int)nread));
goto failed;
}
queue->partial.length += nread;
if (nread < sz_bytes_req) {
/* not enough to know the length */
DEBUG(DEBUG_DEBUG,("Partial packet length read\n"));
return;
}
/* size now known, allocate buffer for the full packet */
queue->partial.data = talloc_realloc_size(queue, data,
*(uint32_t *)data);
if (queue->partial.data == NULL) {
DEBUG(DEBUG_ERR,("read error alloc failed for %u\n",
*(uint32_t *)data));
goto failed;
}
data = queue->partial.data;
num_ready -= nread;
}
pkt_size = *(uint32_t *)data;
if (pkt_size == 0) {
DEBUG(DEBUG_CRIT,("Invalid packet of length 0\n"));
goto failed;
}
pkt_bytes_remaining = pkt_size - queue->partial.length;
to_read = MIN(pkt_bytes_remaining, num_ready);
nread = read(queue->fd, data + queue->partial.length,
to_read);
if (nread <= 0) {
DEBUG(DEBUG_ERR,("read error nread=%d\n",
(int)nread));
goto failed;
}
queue->partial.length += nread;
if (queue->partial.length < pkt_size) {
DEBUG(DEBUG_DEBUG,("Partial packet data read\n"));
return;
}
queue->partial.data = NULL;
queue->partial.length = 0;
if (nread >= 4 && *(uint32_t *)data == nread) {
/* it is the responsibility of the incoming packet
function to free 'data' */
queue->callback(data, nread, queue->private_data);
return;
}
data_base = data;
while (nread >= 4 && *(uint32_t *)data <= nread) {
/* we have at least one packet */
uint8_t *d2;
uint32_t len;
bool destroyed = false;
len = *(uint32_t *)data;
if (len == 0) {
/* bad packet! treat as EOF */
DEBUG(DEBUG_CRIT,("%s: Invalid packet of length 0 (nread = %zu, totread = %zu, partlen = %zu)\n",
queue->name, nread, totread, partlen));
dump_packet(data_base, totread + partlen);
goto failed;
}
d2 = talloc_memdup(queue, data, len);
if (d2 == NULL) {
DEBUG(DEBUG_ERR,("%s: read error memdup failed for %u\n",
queue->name, len));
/* sigh */
goto failed;
}
queue->destroyed = &destroyed;
queue->callback(d2, len, queue->private_data);
/* If callback freed us, don't do anything else. */
if (destroyed) {
return;
}
queue->destroyed = NULL;
data += len;
nread -= len;
}
if (nread > 0) {
/* we have only part of a packet */
if (data_base == data) {
queue->partial.data = data;
queue->partial.length = nread;
} else {
queue->partial.data = talloc_memdup(queue, data, nread);
if (queue->partial.data == NULL) {
DEBUG(DEBUG_ERR,("%s: read error memdup partial failed for %u\n",
queue->name, (unsigned)nread));
goto failed;
}
queue->partial.length = nread;
talloc_free(data_base);
}
return;
}
talloc_free(data_base);
/* it is the responsibility of the callback to free 'data' */
queue->callback(data, pkt_size, queue->private_data);
return;
failed:

View File

@ -302,11 +302,8 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
exit(11);
}
ctdb->db_persistent_check_errors = 0;
DEBUG(DEBUG_NOTICE,(__location__
"ctdb_start_monitoring: ctdb_recheck_persistent_health() OK\n"));
DEBUG(DEBUG_NOTICE,(__location__ " Recoveries finished. Running the \"startup\" event.\n"));
DEBUG(DEBUG_ERR,(__location__ " Allow clients to attach to databases.\n"));
event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
timeval_current(),
ctdb_check_health, ctdb);

View File

@ -1638,7 +1638,6 @@ static int do_recovery(struct ctdb_recoverd *rec,
DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
rec->need_takeover_run = true;
}
DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
/* execute the "recovered" event script on all nodes */
ret = run_recovered_eventscript(ctdb, nodemap, "do_recovery");

View File

@ -2633,8 +2633,6 @@ int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
iface = ctdb_find_iface(ctdb, info->name);
if (iface == NULL) {
DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
info->name));
return -1;
}

View File

@ -196,7 +196,7 @@ int main(int argc, const char *argv[])
exit(1);
}
DEBUG(DEBUG_NOTICE,("Starting CTDB daemon\n"));
DEBUG(DEBUG_NOTICE,("CTDB starting on node\n"));
gettimeofday(&ctdb->ctdbd_start_time, NULL);
gettimeofday(&ctdb->last_recovery_started, NULL);

View File

@ -103,15 +103,9 @@ try_command_on_node $test_node "echo \"function exportfs () { echo $foo_dir 127.
n="$rc_local_d/nfs-skip-share-check"
n_contents='loadconfig() {
name="$1"
if [ -f /etc/sysconfig/$name ]; then
. /etc/sysconfig/$name
elif [ -f /etc/default/$name ]; then
. /etc/default/$name
elif [ -f $CTDB_BASE/sysconfig/$name ]; then
. $CTDB_BASE/sysconfig/$name
fi
if [ "$name" = "ctdb" ] ; then
_loadconfig "$@"
if [ "$1" = "ctdb" -o "$1" = "nfs" ] ; then
CTDB_NFS_SKIP_SHARE_CHECK=no
fi
}

View File

@ -108,15 +108,9 @@ try_command_on_node $test_node "echo 'function testparm () { tp=\$(which testpar
n="$rc_local_d/samba-skip-share-check"
n_contents='loadconfig() {
name="$1"
if [ -f /etc/sysconfig/$name ]; then
. /etc/sysconfig/$name
elif [ -f /etc/default/$name ]; then
. /etc/default/$name
elif [ -f $CTDB_BASE/sysconfig/$name ]; then
. $CTDB_BASE/sysconfig/$name
fi
if [ "$name" = "ctdb" ] ; then
_loadconfig "$@"
if [ "$1" = "ctdb" ] ; then
CTDB_SAMBA_SKIP_SHARE_CHECK=no
fi
}

24
ctdb/tests/onnode/0001.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE all hostname"
define_test "$cmd" "all nodes OK"
required_result <<EOF
>> NODE: 192.168.1.101 <<
-n 192.168.1.101 hostname
>> NODE: 192.168.1.102 <<
-n 192.168.1.102 hostname
>> NODE: 192.168.1.103 <<
-n 192.168.1.103 hostname
>> NODE: 192.168.1.104 <<
-n 192.168.1.104 hostname
EOF
simple_test $cmd

16
ctdb/tests/onnode/0002.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE -q all hostname"
define_test "$cmd" "all nodes OK"
required_result <<EOF
-n 192.168.1.101 hostname
-n 192.168.1.102 hostname
-n 192.168.1.103 hostname
-n 192.168.1.104 hostname
EOF
simple_test $cmd

16
ctdb/tests/onnode/0003.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE -p all hostname"
define_test "$cmd" "all nodes OK"
required_result <<EOF
[192.168.1.101] -n 192.168.1.101 hostname
[192.168.1.102] -n 192.168.1.102 hostname
[192.168.1.103] -n 192.168.1.103 hostname
[192.168.1.104] -n 192.168.1.104 hostname
EOF
simple_test -s $cmd

16
ctdb/tests/onnode/0004.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE -pq all hostname"
define_test "$cmd" "all nodes OK"
required_result <<EOF
-n 192.168.1.101 hostname
-n 192.168.1.102 hostname
-n 192.168.1.103 hostname
-n 192.168.1.104 hostname
EOF
simple_test -s $cmd

13
ctdb/tests/onnode/0005.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE 3 hostname"
define_test "$cmd" "all nodes OK"
required_result <<EOF
-n 192.168.1.104 hostname
EOF
simple_test $cmd

15
ctdb/tests/onnode/0006.sh Executable file
View File

@ -0,0 +1,15 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE -v 3 hostname"
define_test "$cmd" "all nodes OK"
required_result <<EOF
>> NODE: 192.168.1.104 <<
-n 192.168.1.104 hostname
EOF
simple_test $cmd

32
ctdb/tests/onnode/0070.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE ok hostname"
define_test "$cmd" "all nodes OK"
ctdb_set_output <<EOF
:Node:IP:Disconnected:Banned:Disabled:Unhealthy:Stopped:Inactive:
:0:192.168.1.101:0:0:0:0:0:0:
:1:192.168.1.102:0:0:0:0:0:0:
:2:192.168.1.103:0:0:0:0:0:0:
:3:192.168.1.104:0:0:0:0:0:0:
EOF
required_result <<EOF
>> NODE: 192.168.1.101 <<
-n 192.168.1.101 hostname
>> NODE: 192.168.1.102 <<
-n 192.168.1.102 hostname
>> NODE: 192.168.1.103 <<
-n 192.168.1.103 hostname
>> NODE: 192.168.1.104 <<
-n 192.168.1.104 hostname
EOF
simple_test $cmd

30
ctdb/tests/onnode/0071.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE ok hostname"
define_test "$cmd" "2nd node disconnected"
ctdb_set_output <<EOF
ctdb_set_output <<EOF
:Node:IP:Disconnected:Banned:Disabled:Unhealthy:Stopped:Inactive:
:0:192.168.1.101:0:0:0:0:0:0:
:1:192.168.1.102:1:0:0:0:0:0:
:2:192.168.1.103:0:0:0:0:0:0:
:3:192.168.1.104:0:0:0:0:0:0:
EOF
required_result <<EOF
>> NODE: 192.168.1.101 <<
-n 192.168.1.101 hostname
>> NODE: 192.168.1.103 <<
-n 192.168.1.103 hostname
>> NODE: 192.168.1.104 <<
-n 192.168.1.104 hostname
EOF
simple_test $cmd

29
ctdb/tests/onnode/0072.sh Executable file
View File

@ -0,0 +1,29 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE ok hostname"
define_test "$cmd" "2nd node disconnected, extra status columns"
ctdb_set_output <<EOF
:Node:IP:Disconnected:Banned:Disabled:Unhealthy:Stopped:Inactive:X1:X2:X3:X4:
:0:192.168.1.101:0:0:0:0:0:0:0:0:0:0:
:1:192.168.1.102:1:0:0:0:0:0:0:0:0:0:
:2:192.168.1.103:0:0:0:0:0:0:0:0:0:0:
:3:192.168.1.104:0:0:0:0:0:0:0:0:0:0:
EOF
required_result <<EOF
>> NODE: 192.168.1.101 <<
-n 192.168.1.101 hostname
>> NODE: 192.168.1.103 <<
-n 192.168.1.103 hostname
>> NODE: 192.168.1.104 <<
-n 192.168.1.104 hostname
EOF
simple_test $cmd

29
ctdb/tests/onnode/0075.sh Executable file
View File

@ -0,0 +1,29 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE con hostname"
define_test "$cmd" "1st node disconnected"
ctdb_set_output <<EOF
:Node:IP:Disconnected:Banned:Disabled:Unhealthy:Stopped:Inactive:
:0:192.168.1.101:1:0:0:0:0:0:
:1:192.168.1.102:0:0:0:0:0:0:
:2:192.168.1.103:0:0:0:0:0:0:
:3:192.168.1.104:0:0:0:0:0:0:
EOF
required_result <<EOF
>> NODE: 192.168.1.102 <<
-n 192.168.1.102 hostname
>> NODE: 192.168.1.103 <<
-n 192.168.1.103 hostname
>> NODE: 192.168.1.104 <<
-n 192.168.1.104 hostname
EOF
simple_test $cmd

17
ctdb/tests/onnode/0080.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE recmaster hostname"
define_test "$cmd" "node 1 (192.168.1.102) is recmaster"
ctdb_set_output <<EOF
1
EOF
required_result <<EOF
-n 192.168.1.102 hostname
EOF
simple_test $cmd

17
ctdb/tests/onnode/0081.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE lvsmaster hostname"
define_test "$cmd" "no lvsmaster"
ctdb_set_output 255 <<EOF
There is no LVS master
EOF
required_result 1 <<EOF
onnode: No lvsmaster available
EOF
simple_test $cmd

21
ctdb/tests/onnode/0090.sh Executable file
View File

@ -0,0 +1,21 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE natgw hostname"
define_test "$cmd" "no natgw"
ctdb_set_output <<EOF
-1 0.0.0.0
:0:192.168.1.101:0:0:0:0:0:
:1:192.168.1.102:0:0:0:0:0:
:2:192.168.1.103:0:0:0:0:0:
:3:192.168.1.104:0:0:0:0:0:
EOF
required_result 1 <<EOF
onnode: No natgwlist available
EOF
simple_test $cmd

21
ctdb/tests/onnode/0091.sh Executable file
View File

@ -0,0 +1,21 @@
#!/bin/sh
. "${ONNODE_TESTS_DIR}/common.sh"
cmd="$ONNODE natgw hostname"
define_test "$cmd" "node 2 (192.168.1.103) is natgw"
ctdb_set_output <<EOF
2 192.168.1.103
:0:192.168.1.101:0:0:0:0:0:
:1:192.168.1.102:0:0:0:0:0:
:2:192.168.1.103:0:0:0:0:0:
:3:192.168.1.104:0:0:0:0:0:
EOF
required_result <<EOF
-n 192.168.1.103 hostname
EOF
simple_test $cmd

38
ctdb/tests/onnode/README Normal file
View File

@ -0,0 +1,38 @@
onnode unit tests
=================
Examples:
* ./run_tests.sh
Run all tests, displaying output.
* ./run_tests.sh -s
Run all tests, displaying output and a summary.
* ./run_tests.sh -sq
Run all tests, displaying only a summary.
* ONNODE=onnode-buggy-001 ./run_tests.sh -s
Run against stubs/onnode-buggy-001 instead of default onnode version.
Add more buggy versions of onnode to this directory as bugs are
fixed to enable test validation using this feature.
* ./run_tests.sh ./009*.sh
Run only the specified tests.
* ONNODE="bash -x stubs/onnode-buggy-001" ./run_tests.sh ./0090.sh
ONNODE="bash -x ../../tools/onnode" ./run_tests.sh ./0090.sh
Debug the specified test or test failure. The test will fail
because the bash trace output will be included in the test output.
However, this at least makes it easy to trace onnode while running
the test...
To see if the test pases, the -x can be dropped... so command-line
editing can be kept to a minimum.

103
ctdb/tests/onnode/common.sh Normal file
View File

@ -0,0 +1,103 @@
# Hey Emacs, this is a -*- shell-script -*- !!! :-)
# Set indirectly by run_tests at top level.
unset CTDB_NODES_SOCKETS
# Default to just "onnode".
: ${ONNODE:=onnode}
# Augment PATH with relevant stubs/ directories.
if [ -d "${ONNODE_TESTS_DIR}/stubs" ] ; then
PATH="${ONNODE_TESTS_DIR}/stubs:$PATH"
fi
export ONNODE_TESTCASE_DIR=$(dirname "$0")
if [ $(basename "$ONNODE_TESTCASE_DIR") = "onnode" ] ; then
# Just a test script, no testcase subdirectory.
ONNODE_TESTCASE_DIR="$ONNODE_TESTS_DIR"
else
if [ -d "${ONNODE_TESTCASE_DIR}/stubs" ] ; then
PATH="${ONNODE_TESTCASE_DIR}/stubs:$PATH"
fi
fi
# Find CTDB nodes file.
if [ -z "$CTDB_NODES_FILE" ] ; then
if [ -r "${ONNODE_TESTCASE_DIR}/nodes" ] ; then
CTDB_NODES_FILE="${ONNODE_TESTCASE_DIR}/nodes"
elif [ -r "${ONNODE_TESTS_DIR}/nodes" ] ; then
CTDB_NODES_FILE="${ONNODE_TESTS_DIR}/nodes"
else
CTDB_NODES_FILE="${CTDB_BASE:-/etc/ctdb}/nodes"
fi
fi
export CTDB_NODES_FILE
export ONNODE_TESTS_VAR_DIR="${ONNODE_TESTS_DIR}/var"
mkdir -p "$ONNODE_TESTS_VAR_DIR"
if [ -z "$CTDB_BASE" ] ; then
export CTDB_BASE=$(dirname "$CTDB_NODES_FILE")
fi
define_test ()
{
_f="$0"
_f="${_f#./}" # strip leading ./
_f="${_f%%/*}" # if subdir, strip off file
_f="${_f%.sh}" # strip off .sh suffix if any
echo "$_f $1 - $2"
}
# Set output for ctdb command. Option 1st argument is return code.
ctdb_set_output ()
{
_out="$ONNODE_TESTS_VAR_DIR/ctdb.out"
cat >"$_out"
_rc="$ONNODE_TESTS_VAR_DIR/ctdb.rc"
echo "${1:-0}" >"$_rc"
trap "rm -f $_out $_rc" 0
}
required_result ()
{
required_rc="${1:-0}"
required_output=$(cat)
}
simple_test ()
{
_sort="cat"
if [ "$1" = "-s" ] ; then
shift
_sort="sort"
fi
_out=$("$@" 2>&1)
_rc=$?
_out=$(echo "$_out" | $_sort )
if [ "$_out" = "$required_output" -a $_rc = $required_rc ] ; then
echo "PASSED"
else
cat <<EOF
CTDB_NODES_FILE="${CTDB_NODES_FILE}"
CTDB_BASE="$CTDB_BASE"
$(which ctdb)
##################################################
Required output (Exit status: ${required_rc}):
##################################################
$required_output
##################################################
Actual output (Exit status: ${_rc}):
##################################################
$_out
EOF
return 1
fi
}

4
ctdb/tests/onnode/nodes Normal file
View File

@ -0,0 +1,4 @@
192.168.1.101
192.168.1.102
192.168.1.103
192.168.1.104

31
ctdb/tests/onnode/run_tests.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/sh
# Run some onnode unit tests.
cd $(dirname "$0")
export ONNODE_TESTS_DIR=$(pwd)
test_dir=$(dirname "$ONNODE_TESTS_DIR")
opts="-d"
for i ; do
case "$i" in
-*)
opts="$opts $i"
shift
;;
*)
break
esac
done
tests=""
if [ -z "$*" ] ; then
tests=$(ls ./[0-9][0-9][0-9][0-9].sh ./[0-9][0-9][0-9][0-9]/run_test.sh 2>/dev/null)
fi
"$test_dir/scripts/run_tests" $opts "$@" $tests || exit 1
echo "All OK"
exit 0

33
ctdb/tests/onnode/stubs/ctdb Executable file
View File

@ -0,0 +1,33 @@
#!/bin/sh
# Fake ctdb client for onnode tests.
cmd=$(echo "$*" | sed -r -e 's@[[:space:]]+@_@g')
out="${ONNODE_TESTS_VAR_DIR}/ctdb.out"
if [ -r "$out" ] ; then
cat "$out"
rc="${ONNODE_TESTS_VAR_DIR}/ctdb.rc"
if [ -r "$rc" ] ; then
exit $(cat "$rc")
fi
exit 0
fi
f="${ONNODE_TESTCASE_DIR}/ctdb.d/${cmd}.sh"
if [ -x "$f" ] ; then
"$f"
exit $?
fi
f="${ONNODE_TESTCASE_DIR}/ctdb.d/${cmd}.out"
if [ -r "$f" ] ; then
cat "$f"
exit 0
fi
echo "fake ctdb: no implementation for \"$*\""
exit 1

View File

@ -0,0 +1,376 @@
#!/bin/bash
# Run commands on CTDB nodes.
# See http://ctdb.samba.org/ for more information about CTDB.
# Copyright (C) Martin Schwenke 2008
# Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
# Copyright (C) Andrew Tridgell 2007
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
prog=$(basename $0)
usage ()
{
cat >&2 <<EOF
Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
options:
-c Run in current working directory on specified nodes.
-o <prefix> Save standard output from each node to file <prefix>.<ip>
-p Run command in parallel on specified nodes.
-q Do not print node addresses (overrides -v).
-n Allow nodes to be specified by name.
-f Specify nodes file, overrides CTDB_NODES_FILE.
-v Print node address even for a single node.
<NODES> "all", "any", "ok" (or "healthy"), "con" (or "connected"),
"rm" (or "recmaster"), "lvs" (or "lvsmaster"),
"natgw" (or "natgwlist"); or
a node number (0 base); or
a hostname (if -n is specified); or
list (comma separated) of <NODES>; or
range (hyphen separated) of node numbers.
EOF
exit 1
}
invalid_nodespec ()
{
echo "Invalid <nodespec>" >&2 ; echo >&2
usage
}
# Defaults.
current=false
parallel=false
verbose=false
quiet=false
prefix=""
names_ok=false
ctdb_base="${CTDB_BASE:-/etc/ctdb}"
parse_options ()
{
# $POSIXLY_CORRECT means that the command passed to onnode can
# take options and getopt won't reorder things to make them
# options ot onnode.
local temp
# Not on the previous line - local returns 0!
temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cf:hno:pqv" -l help -- "$@")
[ $? != 0 ] && usage
eval set -- "$temp"
while true ; do
case "$1" in
-c) current=true ; shift ;;
-f) CTDB_NODES_FILE="$2" ; shift 2 ;;
-n) names_ok=true ; shift ;;
-o) prefix="$2" ; shift 2 ;;
-p) parallel=true ; shift ;;
-q) quiet=true ; shift ;;
-v) verbose=true ; shift ;;
--) shift ; break ;;
-h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
esac
done
[ $# -lt 2 ] && usage
nodespec="$1" ; shift
command="$@"
}
echo_nth ()
{
local n="$1" ; shift
shift $n
local node="$1"
if [ -n "$node" -a "$node" != "#DEAD" ] ; then
echo $node
else
echo "${prog}: \"node ${n}\" does not exist" >&2
exit 1
fi
}
parse_nodespec ()
{
# Subshell avoids hacks to restore $IFS.
(
IFS=","
for i in $1 ; do
case "$i" in
*-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
# Separate lines for readability.
all|any|ok|healthy|con|connected) echo "$i" ;;
rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
*)
[ $i -gt -1 ] 2>/dev/null || $names_ok || invalid_nodespec
echo $i
esac
done
)
}
ctdb_status_output="" # cache
get_nodes_with_status ()
{
local all_nodes="$1"
local status="$2"
local bits
case "$status" in
healthy)
bits="0:0:0:0:0:0"
;;
connected)
bits="0:[0-1]:[0-1]:[0-1]:[0-1]:[0-1]"
;;
*)
invalid_nodespec
esac
if [ -z "$ctdb_status_output" ] ; then
# FIXME: need to do something if $CTDB_NODES_SOCKETS is set.
ctdb_status_output=$(ctdb -Y status 2>/dev/null)
if [ $? -ne 0 ] ; then
echo "${prog}: unable to get status of CTDB nodes" >&2
exit 1
fi
ctdb_status_output="${ctdb_status_output#* }"
fi
local nodes=""
local i
for i in $ctdb_status_output ; do
# Try removing bits from end.
local t="${i%:${bits}:}"
if [ "$t" != "$i" ] ; then
# Succeeded. Get address. NOTE: this is an optimisation.
# It might be better to get the node number and then get
# the nth node to get the address. This would make things
# more consistent if $ctdb_base/nodes actually contained
# hostnames.
nodes="${nodes} ${t#:*:}"
fi
done
echo $nodes
}
ctdb_props="" # cache
get_node_with_property ()
{
local all_nodes="$1"
local prop="$2"
local prop_node=""
if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
prop_node=$(ctdb "$prop" -Y 2>/dev/null)
# We only want the first line.
local nl="
"
prop_node="${prop_node%%${nl}*}"
if [ $? -eq 0 ] ; then
ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
else
prop_node=""
fi
else
prop_node="${ctdb_props##:${prop}:}"
prop_node="${prop_node%% *}"
fi
if [ -n "$prop_node" ] ; then
echo_nth "$prop_node" $all_nodes
else
echo "${prog}: No ${prop} available" >&2
exit 1
fi
}
get_any_available_node ()
{
local all_nodes="$1"
# We do a recursive onnode to find which nodes are up and running.
local out=$($0 -pq all ctdb pnn 2>&1)
local line
while read line ; do
local pnn="${line#PNN:}"
if [ "$pnn" != "$line" ] ; then
echo_nth "$pnn" $all_nodes
return 0
fi
# Else must be an error message from a down node.
done <<<"$out"
return 1
}
get_nodes ()
{
local all_nodes
if [ -n "$CTDB_NODES_SOCKETS" ] ; then
all_nodes="$CTDB_NODES_SOCKETS"
else
local f="${ctdb_base}/nodes"
if [ -n "$CTDB_NODES_FILE" ] ; then
f="$CTDB_NODES_FILE"
if [ ! -e "$f" -a "${f#/}" = "$f" ] ; then
# $f is relative, try in $ctdb_base
f="${ctdb_base}/${f}"
fi
fi
if [ ! -r "$f" ] ; then
echo "${prog}: unable to open nodes file \"${f}\"" >&2
exit 1
fi
all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' "$f")
fi
local nodes=""
local n
for n in $(parse_nodespec "$1") ; do
[ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
case "$n" in
all)
echo "${all_nodes//#DEAD/}"
;;
any)
get_any_available_node "$all_nodes" || exit 1
;;
ok|healthy)
get_nodes_with_status "$all_nodes" "healthy" || exit 1
;;
con|connected)
get_nodes_with_status "$all_nodes" "connected" || exit 1
;;
rm|recmaster)
get_node_with_property "$all_nodes" "recmaster" || exit 1
;;
lvs|lvsmaster)
get_node_with_property "$all_nodes" "lvsmaster" || exit 1
;;
natgw|natgwlist)
get_node_with_property "$all_nodes" "natgwlist" || exit 1
;;
[0-9]|[0-9][0-9]|[0-9][0-9][0-9])
echo_nth $n $all_nodes
;;
*)
$names_ok || invalid_nodespec
echo $n
esac
done
}
fakessh ()
{
CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
}
stdout_filter ()
{
if [ -n "$prefix" ] ; then
cat >"${prefix}.${n//\//_}"
elif $verbose && $parallel ; then
sed -e "s@^@[$n] @"
else
cat
fi
}
stderr_filter ()
{
if $verbose && $parallel ; then
sed -e "s@^@[$n] @"
else
cat
fi
}
######################################################################
parse_options "$@"
$current && command="cd $PWD && $command"
ssh_opts=
if [ -n "$CTDB_NODES_SOCKETS" ] ; then
SSH=fakessh
else
# Could "2>/dev/null || true" but want to see errors from typos in file.
[ -r "${ctdb_base}/onnode.conf" ] && . "${ctdb_base}/onnode.conf"
[ -n "$SSH" ] || SSH=ssh
if [ "$SSH" = "ssh" ] ; then
ssh_opts="-n"
else
: # rsh? All bets are off!
fi
fi
######################################################################
nodes=$(get_nodes "$nodespec")
[ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
if $quiet ; then
verbose=false
else
# If $nodes contains a space or a newline then assume multiple nodes.
nl="
"
[ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
fi
pids=""
trap 'kill -TERM $pids 2>/dev/null' INT TERM
# There's a small race here where the kill can fail if no processes
# have been added to $pids and the script is interrupted. However,
# the part of the window where it matter is very small.
retcode=0
for n in $nodes ; do
set -o pipefail 2>/dev/null
if $parallel ; then
{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
pids="${pids} $!"
else
if $verbose ; then
echo >&2 ; echo ">> NODE: $n <<" >&2
fi
{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
[ $? = 0 ] || retcode=$?
fi
done
$parallel && {
for p in $pids; do
wait $p
[ $? = 0 ] || retcode=$?
done
}
exit $retcode

2
ctdb/tests/onnode/stubs/ssh Executable file
View File

@ -0,0 +1,2 @@
#!/bin/sh
echo "$*"

View File

@ -53,6 +53,54 @@ test_exit ()
exit $(($testfailures+0))
}
ctdb_check_time_logs ()
{
local threshold=20
local jump=false
local prev=""
local ds_prev=""
local node=""
out=$(onnode all tail -n 20 /var/log/ctdb.test.time.log 2>&1)
if [ $? -eq 0 ] ; then
local line
while read line ; do
case "$line" in
\>\>\ NODE:\ *\ \<\<)
node="${line#>> NODE: }"
node=${node% <<*}
ds_prev=""
;;
*\ *)
set -- $line
ds_curr="$1${2:0:1}"
if [ -n "$ds_prev" ] && \
[ $(($ds_curr - $ds_prev)) -ge $threshold ] ; then
echo "Node $node had time jump of $(($ds_curr - $ds_prev))ds between $(date +'%T' -d @${ds_prev%?}) and $(date +'%T' -d @${ds_curr%?})"
jump=true
fi
prev="$line"
ds_prev="$ds_curr"
;;
esac
done <<<"$out"
else
echo Error getting time logs
fi
if $jump ; then
echo "Check time sync (test client first):"
date
onnode -p all date
echo "Information from test client:"
hostname
top -b -n 1
echo "Information from cluster nodes:"
onnode all "top -b -n 1 ; echo '/proc/slabinfo' ; cat /proc/slabinfo"
fi
}
ctdb_test_exit ()
{
local status=$?
@ -68,6 +116,10 @@ ctdb_test_exit ()
echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
if [ -n "$CTDB_TEST_REAL_CLUSTER" -a $status -ne 0 ] ; then
ctdb_check_time_logs
fi
eval "$ctdb_test_exit_hook" || true
unset ctdb_test_exit_hook
@ -856,6 +908,8 @@ restart_ctdb ()
onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || return 1
local debug_out=$(onnode -p all ctdb status -Y 2>&1; onnode -p all ctdb scriptstatus 2>&1)
echo "Setting RerecoveryTimeout to 1"
onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
@ -869,6 +923,13 @@ restart_ctdb ()
onnode -q 0 $CTDB recover
echo "ctdb is ready"
if ! onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
echo "OUCH! Cluster is UNHEALTHY again..."
echo "$debug_out"
# Try to make the calling test fail
status=1
fi
}
ctdb_restart_when_done ()

View File

@ -18,8 +18,10 @@ EOF
######################################################################
with_summary=false
with_desc=false
quiet=false
temp=$(getopt -n "$prog" -o "xhs" -l help -- "$@")
temp=$(getopt -n "$prog" -o "xdhqs" -l help -- "$@")
[ $? != 0 ] && usage
@ -28,12 +30,20 @@ eval set -- "$temp"
while true ; do
case "$1" in
-x) set -x; shift ;;
-d) with_desc=true ; shift ;; # 4th line of output is description
-q) quiet=true ; shift ;;
-s) with_summary=true ; shift ;;
--) shift ; break ;;
*) usage ;;
esac
done
if $quiet ; then
show_progress() { cat >/dev/null ; }
else
show_progress() { cat ; }
fi
######################################################################
tests_total=0
@ -43,22 +53,40 @@ summary=""
rows=$(if tty -s ; then stty size ; else echo x 80 ; fi | sed -e 's@.* @@' -e 's@^0$@80@')
ww=$((rows - 7))
tf=$(mktemp)
sf=$(mktemp)
set -o pipefail
for f; do
[ -x $f ] || fail "test \"$f\" is not executable"
tests_total=$(($tests_total + 1))
if ctdb_test_run "$f" ; then
tests_passed=$(($tests_passed + 1))
t="PASSED"
else
t="FAILED"
ctdb_test_run "$f" | tee "$tf" | show_progress
status=$?
if $with_summary ; then
if [ $status -eq 0 ] ; then
tests_passed=$(($tests_passed + 1))
t=" PASSED "
else
t="*FAILED*"
fi
if $with_desc ; then
desc=$(tail -n +4 $tf | head -n 1)
f="$desc"
fi
echo "$t $f" >>"$sf"
fi
summary=$(printf "%s\n%-${ww}s%s" "$summary" "$f" "$t")
done
rm -f "$tf"
if $with_summary ; then
echo "$summary"
echo
cat "$sf"
echo
echo "${tests_passed}/${tests_total} tests passed"
fi
rm -f "$sf"
test_exit

View File

@ -27,3 +27,12 @@ ctdb_test_init "$@"
echo "Checking connectivity between nodes..."
onnode all onnode all true
# We're seeing some weirdness with CTDB controls timing out. We're
# wondering if time is jumping forward, so this creates a time log on
# each node that we can examine later if tests fail weirdly.
if [ -n "$CTDB_TEST_REAL_CLUSTER" ] ; then
echo "Starting time logging on each node..."
f="/var/log/ctdb.test.time.log"
onnode -p all "[ -f $f ] || while : ; do date '+%s %N' ; sleep 1 ; done >$f 2>&1 </dev/null &" &
fi

View File

@ -2,7 +2,7 @@
# ctdb ip takeover code
# Copyright (C) Martin Schwenke 2010
# Copyright (C) Martin Schwenke, Ronnie Sahlberg 2010, 2011
# Based on original CTDB C code:
#
@ -29,6 +29,11 @@ import sys
from optparse import OptionParser
import copy
import random
import itertools
# For parsing IP addresses
import socket
import struct
options = None
@ -44,6 +49,9 @@ def process_args(extra_options=[]):
parser.add_option("--ni",
action="store_true", dest="no_ip_failback", default=False,
help="turn on no_ip_failback")
parser.add_option("-L", "--lcp2",
action="store_true", dest="lcp2", default=False,
help="use LCP2 IP rebalancing algorithm [default: %default]")
parser.add_option("-b", "--balance",
action="store_true", dest="balance", default=False,
help="show (im)balance information after each event")
@ -54,14 +62,11 @@ def process_args(extra_options=[]):
action="store_false", dest="show", default=True,
help="don't show IP address layout after each event")
parser.add_option("-v", "--verbose",
action="store_true", dest="verbose", default=False,
action="count", dest="verbose", default=0,
help="print information and actions taken to stdout")
parser.add_option("--hack",
action="store", type="int", dest="hack", default=0,
help="apply a hack (see the code!!!)")
parser.add_option("-r", "--retries",
action="store", type="int", dest="retries", default=5,
help="number of retry loops for rebalancing [default: %default]")
help="number of retry loops for rebalancing non-deterministic failback [default: %default]")
parser.add_option("-i", "--iterations",
action="store", type="int", dest="iterations",
default=1000,
@ -69,6 +74,9 @@ def process_args(extra_options=[]):
parser.add_option("-o", "--odds",
action="store", type="int", dest="odds", default=4,
help="make the chances of a failover 1 in ODDS [default: %default]")
parser.add_option("-A", "--aggressive",
action="store_true", dest="aggressive", default=False,
help="apply ODDS to try to flip each node [default: %default]")
def seed_callback(option, opt, value, parser):
random.seed(value)
@ -78,47 +86,165 @@ def process_args(extra_options=[]):
parser.add_option("-x", "--exit",
action="store_true", dest="exit", default=False,
help="exit on the 1st gratuitous IP move")
help="exit on the 1st gratuitous IP move or IP imbalance")
parser.add_option("-H", "--hard-imbalance-limit",
action="store", type="int", dest="hard_limit", default=1,
help="exceeding this limit causes termination [default: %default]")
parser.add_option("-S", "--soft-imbalance-limit",
action="store", type="int", dest="soft_limit", default=1,
help="exceeding this limit increments a counter [default: %default]")
(options, args) = parser.parse_args()
if len(args) != 0:
parser.error("too many argumentss")
def print_begin(t):
print "=" * 40
def print_begin(t, delim='='):
print delim * 40
print "%s:" % (t)
def print_end():
print "-" * 40
def verbose_begin(t):
if options.verbose:
if options.verbose > 0:
print_begin(t)
def verbose_end():
if options.verbose:
if options.verbose > 0:
print_end()
def verbose_print(t):
if options.verbose:
if options.verbose > 0:
if not type(t) == list:
t = [t]
if t != []:
print "\n".join([str(i) for i in t])
# more than this and we switch to the logging module... :-)
def debug_begin(t):
if options.verbose > 1:
print_begin(t, '-')
def debug_end():
if options.verbose > 1:
print_end()
def debug_print(t):
if options.verbose > 1:
if not type(t) == list:
t = [t]
if t != []:
print "\n".join([str(i) for i in t])
def ip_to_list_of_ints(ip):
# Be lazy... but only expose errors in IPv4 addresses, since
# they'll be more commonly used. :-)
try:
l = socket.inet_pton(socket.AF_INET6, ip)
except:
# Pad with leading 0s. This makes IPv4 addresses comparable
# with IPv6 but reduces the overall effectiveness of the
# algorithm. The alternative would be to treat these
# addresses separately while trying to keep all the IPs in
# overall balance.
l = "".join(itertools.repeat("\0", 12)) + \
socket.inet_pton(socket.AF_INET, ip)
return map(lambda x: struct.unpack('B', x)[0], l)
def ip_distance(ip1, ip2):
"""Calculate the distance between 2 IPs.
This is the length of the longtest common prefix between the IPs.
It is calculated by XOR-ing the 2 IPs together and counting the
number of leading zeroes."""
distance = 0
for (o1, o2) in zip(ip_to_list_of_ints(ip1), ip_to_list_of_ints(ip2)):
# XOR this pair of octets
x = o1 ^ o2
# count number leading zeroes
if x == 0:
distance += 8
else:
# bin() gives minimal length '0bNNN' string
distance += (8 - (len(bin(x)) - 2))
break
return distance
def ip_distance_2_sum(ip, ips):
"""Calculate the IP distance for the given IP relative to IPs.
This could be made more efficient by insering ip_distance_2 into
the loop in this function. However, that would result in some
loss of clarity and also will not be necessary in a C
implemntation."""
sum = 0
for i in ips:
sum += ip_distance(ip, i) ** 2
return sum
def imbalance_metric(ips):
"""Return the imbalance metric for a group of IPs.
This is the sum of squares of the IP distances between each pair of IPs."""
if len(ips) > 1:
(h, t) = (ips[0], ips[1:])
return ip_distance_2_sum(h, t) + imbalance_metric(t)
else:
return 0
def mean(l):
return float(sum(l))/len(l)
class Node(object):
def __init__(self, public_addresses):
self.public_addresses = set(public_addresses)
# List of list allows groups of IPs to be passed in. They're
# not actually used in the algorithm but are just used by
# calculate_imbalance() for checking the simulation. Note
# that people can pass in garbage and make this code
# fail... but we're all friends here in simulation world...
# :-)
if type(public_addresses[0]) is str:
self.public_addresses = set(public_addresses)
self.ip_groups = []
else:
# flatten
self.public_addresses = set([i for s in public_addresses for i in s])
self.ip_groups = public_addresses
self.current_addresses = set()
self.healthy = True
self.imbalance = -1
def __str__(self):
return "%s %s%s" % \
("*" if len(self.public_addresses) == 0 else \
(" " if self.healthy else "#"),
sorted(list(self.current_addresses)),
" %d" % self.imbalance if options.lcp2 else "")
def can_node_serve_ip(self, ip):
return ip in self.public_addresses
def node_ip_coverage(self):
return len(self.current_addresses)
def node_ip_coverage(self, ips=None):
return len([a for a in self.current_addresses if ips == None or a in ips])
def set_imbalance(self, imbalance=-1):
"""Set the imbalance metric to the given value. If none given
then calculate it."""
if imbalance != -1:
self.imbalance = imbalance
else:
self.imbalance = imbalance_metric(list(self.current_addresses))
def get_imbalance(self):
return self.imbalance
class Cluster(object):
def __init__(self):
@ -131,27 +257,46 @@ class Cluster(object):
self.ip_moves = []
self.grat_ip_moves = []
self.imbalance = []
self.imbalance_groups = []
self.imbalance_count = 0
self.imbalance_groups_count = itertools.repeat(0)
self.imbalance_metric = []
self.events = -1
self.num_unhealthy = []
self.prev = None
def __str__(self):
return "\n".join(["%2d %s %s" %
(i,
"*" if len(n.public_addresses) == 0 else \
(" " if n.healthy else "#"),
sorted(list(n.current_addresses)))
for (i, n) in enumerate(self.nodes)])
return "\n".join(["%2d %s" % (i, n) \
for (i, n) in enumerate(self.nodes)])
# This is naive. It assumes that IP groups are indicated by the
# 1st node having IP groups.
def have_ip_groups(self):
return (len(self.nodes[0].ip_groups) > 0)
def print_statistics(self):
print_begin("STATISTICS")
print "Events: %6d" % self.events
print "Total IP moves: %6d" % sum(self.ip_moves)
print "Gratuitous IP moves: %6d" % sum(self.grat_ip_moves)
print "Max imbalance: %6d" % max(self.imbalance)
print "Final imbalance: %6d" % self.imbalance[-1]
print "Maximum unhealthy: %6d" % max(self.num_unhealthy)
print "Events: %6d" % self.events
print "Total IP moves: %6d" % sum(self.ip_moves)
print "Gratuitous IP moves: %6d" % sum(self.grat_ip_moves)
print "Max imbalance: %6d" % max(self.imbalance)
if self.have_ip_groups():
print "Max group imbalance counts: ", map(max, zip(*self.imbalance_groups))
print "Mean imbalance: %f" % mean(self.imbalance)
if self.have_ip_groups():
print "Mean group imbalances counts: ", map(mean, zip(*self.imbalance_groups))
print "Final imbalance: %6d" % self.imbalance[-1]
if self.have_ip_groups():
print "Final group imbalances: ", self.imbalance_groups[-1]
if options.lcp2:
print "Max LCP2 imbalance : %6d" % max(self.imbalance_metric)
print "Soft imbalance count: %6d" % self.imbalance_count
if self.have_ip_groups():
print "Soft imbalance group counts: ", self.imbalance_groups_count
if options.lcp2:
print "Final LCP2 imbalance : %6d" % self.imbalance_metric[-1]
print "Maximum unhealthy: %6d" % max(self.num_unhealthy)
print_end()
def find_pnn_with_ip(self, ip):
@ -178,7 +323,7 @@ class Cluster(object):
verbose_print(pnn)
verbose_end()
def unhealthy(self, *pnns):
verbose_begin("UNHEALTHY")
@ -191,27 +336,42 @@ class Cluster(object):
def do_something_random(self):
"""Make random node(s) healthy or unhealthy.
"""Make a random node healthy or unhealthy.
If options.aggressive is False then: If all nodes are healthy
or unhealthy, then invert one of them; otherwise, there's a 1
in options.odds chance of making another node unhealthy.
If all nodes are healthy or unhealthy, then invert one of
them. Otherwise, there's a 1 in options.odds chance of making
another node unhealthy."""
If options.aggressive is True then: For each node there is a 1
in options.odds chance of flipping the state of that node
between healthy and unhealthy."""
num_nodes = len(self.nodes)
healthy_pnns = [i for (i,n) in enumerate(self.nodes) if n.healthy]
num_healthy = len(healthy_pnns)
if not options.aggressive:
num_nodes = len(self.nodes)
healthy_pnns = [i for (i,n) in enumerate(self.nodes) if n.healthy]
num_healthy = len(healthy_pnns)
if num_nodes == num_healthy:
self.unhealthy(random.randint(0, num_nodes-1))
elif num_healthy == 0:
self.healthy(random.randint(0, num_nodes-1))
elif random.randint(1, options.odds) == 1:
self.unhealthy(random.choice(healthy_pnns))
if num_nodes == num_healthy:
self.unhealthy(random.randint(0, num_nodes-1))
elif num_healthy == 0:
self.healthy(random.randint(0, num_nodes-1))
elif random.randint(1, options.odds) == 1:
self.unhealthy(random.choice(healthy_pnns))
else:
all_pnns = range(num_nodes)
unhealthy_pnns = sorted(list(set(all_pnns) - set(healthy_pnns)))
self.healthy(random.choice(unhealthy_pnns))
else:
all_pnns = range(num_nodes)
unhealthy_pnns = sorted(list(set(all_pnns) - set(healthy_pnns)))
self.healthy(random.choice(unhealthy_pnns))
# We need to make at least one change or we retry...x
changed = False
while not changed:
for (pnn, n) in enumerate(self.nodes):
if random.randint(1, options.odds) == 1:
changed = True
if n.healthy:
self.unhealthy(pnn)
else:
self.healthy(pnn)
def random_iterations(self):
i = 1
@ -219,35 +379,26 @@ class Cluster(object):
verbose_begin("EVENT %d" % i)
verbose_end()
self.do_something_random()
if self.recover() and options.exit > 0:
if self.recover() and options.exit:
break
i += 1
self.print_statistics()
def calculate_imbalance(self):
def imbalance_for_ips(self, ips):
imbalance = 0
assigned = sorted([ip
for n in self.nodes
for ip in n.current_addresses])
maxnode = -1
minnode = -1
for ip in assigned:
num_capable = 0
maxnode = -1
minnode = -1
for ip in ips:
for (i, n) in enumerate(self.nodes):
if not n.healthy:
if not n.healthy or not n.can_node_serve_ip(ip):
continue
if not n.can_node_serve_ip(ip):
continue
num_capable += 1
num = n.node_ip_coverage()
num = n.node_ip_coverage(ips)
if maxnode == -1 or num > maxnum:
maxnode = i
@ -256,24 +407,42 @@ class Cluster(object):
if minnode == -1 or num < minnum:
minnode = i
minnum = num
if maxnode == -1:
if maxnode == -1 or minnode == -1:
continue
i = maxnum - minnum
if maxnum - minnum < 2:
i = 0
#if i < 2:
# i = 0
imbalance = max([imbalance, i])
return imbalance
def calculate_imbalance(self):
# First, do all the assigned IPs.
assigned = sorted([ip
for n in self.nodes
for ip in n.current_addresses])
i = self.imbalance_for_ips(assigned)
ig = []
# FIXME? If dealing with IP groups, assume the nodes are all
# the same.
for ips in self.nodes[0].ip_groups:
gi = self.imbalance_for_ips(ips)
ig.append(gi)
return (i, ig)
def diff(self):
"""Calculate differences in IP assignments between self and prev.
Gratuitous IP moves (from a healthy node to a healthy node)
are prefix by !!. Any gratuitous IP moves cause this function
to return False. If there are no gratuitous moves then it
will return True."""
are prefixed by !!."""
ip_moves = 0
grat_ip_moves = 0
@ -297,33 +466,6 @@ class Cluster(object):
(prefix, ip, old, new))
return (ip_moves, grat_ip_moves, details)
def find_least_loaded_node(self, ip):
"""Just like find_takeover_node but doesn't care about health."""
pnn = -1
min = 0
for (i, n) in enumerate(self.nodes):
if not n.can_node_serve_ip(ip):
continue
num = n.node_ip_coverage()
if (pnn == -1):
pnn = i
min = num
else:
if num < min:
pnn = i
min = num
if pnn == -1:
verbose_print("Could not find node to take over public address %s" % ip)
return False
self.nodes[pnn].current_addresses.add(ip)
verbose_print("%s -> %d" % (ip, pnn))
return True
def find_takeover_node(self, ip):
@ -355,6 +497,190 @@ class Cluster(object):
verbose_print("%s -> %d" % (ip, pnn))
return True
def basic_allocate_unassigned(self):
assigned = set([ip for n in self.nodes for ip in n.current_addresses])
unassigned = sorted(list(self.all_public_ips - assigned))
for ip in unassigned:
self.find_takeover_node(ip)
def basic_failback(self, retries_l):
assigned = sorted([ip
for n in self.nodes
for ip in n.current_addresses])
for ip in assigned:
maxnode = -1
minnode = -1
for (i, n) in enumerate(self.nodes):
if not n.healthy:
continue
if not n.can_node_serve_ip(ip):
continue
num = n.node_ip_coverage()
if maxnode == -1:
maxnode = i
maxnum = num
else:
if num > maxnum:
maxnode = i
maxnum = num
if minnode == -1:
minnode = i
minnum = num
else:
if num < minnum:
minnode = i
minnum = num
if maxnode == -1:
print "Could not find maxnode. May not be able to serve ip", ip
continue
#if self.deterministic_public_ips:
# continue
if maxnum > minnum + 1 and retries_l[0] < options.retries:
# Remove the 1st ip from maxnode
t = sorted(list(self.nodes[maxnode].current_addresses))
realloc = t[0]
verbose_print("%s <- %d" % (realloc, maxnode))
self.nodes[maxnode].current_addresses.remove(realloc)
# Redo the outer loop.
retries_l[0] += 1
return True
return False
def lcp2_allocate_unassigned(self):
# Assign as many unassigned addresses as possible. Keep
# selecting the optimal assignment until we don't manage to
# assign anything.
assigned = set([ip for n in self.nodes for ip in n.current_addresses])
unassigned = sorted(list(self.all_public_ips - assigned))
should_loop = True
while len(unassigned) > 0 and should_loop:
should_loop = False
debug_begin(" CONSIDERING MOVES (UNASSIGNED)")
minnode = -1
mindsum = 0
minip = None
for ip in unassigned:
for dstnode in range(len(self.nodes)):
if self.nodes[dstnode].can_node_serve_ip(ip) and \
self.nodes[dstnode].healthy:
dstdsum = ip_distance_2_sum(ip, self.nodes[dstnode].current_addresses)
dstimbl = self.nodes[dstnode].get_imbalance() + dstdsum
debug_print(" %s -> %d [+%d]" % \
(ip,
dstnode,
dstimbl - self.nodes[dstnode].get_imbalance()))
if (minnode == -1) or (dstdsum < mindsum):
minnode = dstnode
minimbl = dstimbl
mindsum = dstdsum
minip = ip
should_loop = True
debug_end()
if minnode != -1:
self.nodes[minnode].current_addresses.add(minip)
self.nodes[minnode].set_imbalance(self.nodes[minnode].get_imbalance() + mindsum)
verbose_print("%s -> %d [+%d]" % (minip, minnode, mindsum))
unassigned.remove(minip)
for ip in unassigned:
verbose_print("Could not find node to take over public address %s" % ip)
def lcp2_failback(self, targets):
# Get the node with the highest imbalance metric.
srcnode = -1
maximbl = 0
for (pnn, n) in enumerate(self.nodes):
b = n.get_imbalance()
if (srcnode == -1) or (b > maximbl):
srcnode = pnn
maximbl = b
# This means that all nodes had 0 or 1 addresses, so can't
# be imbalanced.
if maximbl == 0:
return False
# We'll need this a few times...
ips = self.nodes[srcnode].current_addresses
# Find an IP and destination node that best reduces imbalance.
optimum = None
debug_begin(" CONSIDERING MOVES FROM %d [%d]" % (srcnode, maximbl))
for ip in ips:
# What is this IP address costing the source node?
srcdsum = ip_distance_2_sum(ip, ips - set([ip]))
srcimbl = maximbl - srcdsum
# Consider this IP address would cost each potential
# destination node. Destination nodes are limited to
# those that are newly healthy, since we don't want to
# do gratuitous failover of IPs just to make minor
# balance improvements.
for dstnode in targets:
if self.nodes[dstnode].can_node_serve_ip(ip) and \
self.nodes[dstnode].healthy:
dstdsum = ip_distance_2_sum(ip, self.nodes[dstnode].current_addresses)
dstimbl = self.nodes[dstnode].get_imbalance() + dstdsum
debug_print(" %d [%d] -> %s -> %d [+%d]" % \
(srcnode,
srcimbl - self.nodes[srcnode].get_imbalance(),
ip,
dstnode,
dstimbl - self.nodes[dstnode].get_imbalance()))
if (dstimbl < maximbl) and (dstdsum < srcdsum):
if optimum is None:
optimum = (ip, srcnode, srcimbl, dstnode, dstimbl)
else:
(x, sn, si, dn, di) = optimum
if (srcimbl + dstimbl) < (si + di):
optimum = (ip, srcnode, srcimbl, dstnode, dstimbl)
debug_end()
if optimum is not None:
# We found a move that makes things better...
(ip, srcnode, srcimbl, dstnode, dstimbl) = optimum
ini_srcimbl = self.nodes[srcnode].get_imbalance()
ini_dstimbl = self.nodes[dstnode].get_imbalance()
self.nodes[srcnode].current_addresses.remove(ip)
self.nodes[srcnode].set_imbalance(srcimbl)
self.nodes[dstnode].current_addresses.add(ip)
self.nodes[dstnode].set_imbalance(dstimbl)
verbose_print("%d [%d] -> %s -> %d [+%d]" % \
(srcnode,
srcimbl - ini_srcimbl,
ip,
dstnode,
dstimbl - ini_dstimbl))
return True
return False
def ctdb_takeover_run(self):
self.events += 1
@ -369,22 +695,11 @@ class Cluster(object):
# Remap everything.
addr_list = sorted(list(self.all_public_ips))
for (i, ip) in enumerate(addr_list):
if options.hack == 1:
self.quietly_remove_ip(ip)
self.find_least_loaded_node(ip)
elif options.hack == 2:
pnn = i % len(self.nodes)
if ip in self.nodes[pnn].public_addresses:
self.quietly_remove_ip(ip)
# Add addresses to new node.
self.nodes[pnn].current_addresses.add(ip)
verbose_print("%s -> %d" % (ip, pnn))
else:
self.quietly_remove_ip(ip)
# Add addresses to new node.
pnn = i % len(self.nodes)
self.nodes[pnn].current_addresses.add(ip)
verbose_print("%s -> %d" % (ip, pnn))
self.quietly_remove_ip(ip)
# Add addresses to new node.
pnn = i % len(self.nodes)
self.nodes[pnn].current_addresses.add(ip)
verbose_print("%s -> %d" % (ip, pnn))
# Remove public addresses from unhealthy nodes.
for (pnn, n) in enumerate(self.nodes):
@ -399,69 +714,39 @@ class Cluster(object):
for ip in n.current_addresses - n.public_addresses])
n.current_addresses &= n.public_addresses
# We'll only retry the balancing act up to 5 times.
retries = 0
if options.lcp2:
newly_healthy = [pnn for (pnn, n) in enumerate(self.nodes)
if len(n.current_addresses) == 0 and n.healthy]
for n in self.nodes:
n.set_imbalance()
# We'll only retry the balancing act up to options.retries
# times (for the basic non-deterministic algorithm). This
# nonsense gives us a reference on the retries count in
# Python. It will be easier in C. :-)
# For LCP2 we reassignas many IPs from heavily "loaded" nodes
# to nodes that are newly healthy, looping until we fail to
# reassign an IP.
retries_l = [0]
should_loop = True
while should_loop:
should_loop = False
assigned = set([ip for n in self.nodes for ip in n.current_addresses])
unassigned = sorted(list(self.all_public_ips - assigned))
if options.lcp2:
self.lcp2_allocate_unassigned()
else:
self.basic_allocate_unassigned()
for ip in unassigned:
self.find_takeover_node(ip)
if self.no_ip_failback:
if self.no_ip_failback or self.deterministic_public_ips:
break
assigned = sorted([ip
for n in self.nodes
for ip in n.current_addresses])
for ip in assigned:
maxnode = -1
minnode = -1
for (i, n) in enumerate(self.nodes):
if not n.healthy:
continue
if not n.can_node_serve_ip(ip):
continue
num = n.node_ip_coverage()
if maxnode == -1:
maxnode = i
maxnum = num
else:
if num > maxnum:
maxnode = i
maxnum = num
if minnode == -1:
minnode = i
minnum = num
else:
if num < minnum:
minnode = i
minnum = num
if maxnode == -1:
print "Could not maxnode. May not be able to serve ip", ip
continue
if self.deterministic_public_ips:
continue
if maxnum > minnum + 1 and retries < options.retries:
# Remove the 1st ip from maxnode
t = sorted(list(self.nodes[maxnode].current_addresses))
realloc = t[0]
verbose_print("%s <- %d" % (realloc, maxnode))
self.nodes[maxnode].current_addresses.remove(realloc)
retries += 1
# Redo the outer loop.
should_loop = True
if options.lcp2:
if len(newly_healthy) == 0:
break
should_loop = self.lcp2_failback(newly_healthy)
else:
should_loop = self.basic_failback(retries_l)
def recover(self):
verbose_begin("TAKEOVER")
@ -482,11 +767,31 @@ class Cluster(object):
print "\n".join(details)
print_end()
imbalance = self.calculate_imbalance()
(imbalance, imbalance_groups) = self.calculate_imbalance()
self.imbalance.append(imbalance)
self.imbalance_groups.append(imbalance_groups)
if imbalance > options.soft_limit:
self.imbalance_count += 1
# There must be a cleaner way...
t = []
for (c, i) in zip(self.imbalance_groups_count, imbalance_groups):
if i > options.soft_limit:
t.append(c + i)
else:
t.append(c)
self.imbalance_groups_count = t
imbalance_metric = max([n.get_imbalance() for n in self.nodes])
self.imbalance_metric.append(imbalance_metric)
if options.balance:
print_begin("IMBALANCE")
print imbalance
print "ALL IPS:", imbalance
if self.have_ip_groups():
print "IP GROUPS:", imbalance_groups
if options.lcp2:
print "LCP2 IMBALANCE:", imbalance_metric
print_end()
num_unhealthy = len(self.nodes) - \
@ -501,4 +806,7 @@ class Cluster(object):
self.prev = None
self.prev = copy.deepcopy(self)
return grat_ip_moves
# True is bad!
return (grat_ip_moves > 0) or \
(not self.have_ip_groups() and imbalance > options.hard_limit) or \
(self.have_ip_groups() and (max(imbalance_groups) > options.hard_limit))

View File

@ -0,0 +1,25 @@
#!/usr/bin/env python
# 2 IP groups, both on the same 5 nodes, with each group on different
# interfaces/VLANs. One group has many more addresses to test how
# well an "imbalanced" configuration will balance...
from ctdb_takeover import Cluster, Node, process_args
process_args()
addresses20 = ['192.168.20.%d' % n for n in range(1, 13)]
addresses128 = ['192.168.128.%d' % n for n in range(1, 5)]
c = Cluster()
for i in range(5):
c.add_node(Node([addresses20, addresses128]))
#for i in range(3):
# c.add_node(Node([addresses20]))
c.recover()
c.random_iterations()

View File

@ -0,0 +1,20 @@
#!/usr/bin/env python
# 2 groups of addresses, combined into 1 pool so the checking
# algorithm doesn't know about the groups, across 2 nodes.
from ctdb_takeover import Cluster, Node, process_args
process_args()
addresses20 = ['192.168.20.%d' % n for n in range(1, 13)]
addresses21 = ['192.168.21.%d' % n for n in range(1, 5)]
c = Cluster()
for i in range(2):
c.add_node(Node(addresses20 + addresses21))
c.recover()
c.random_iterations()

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
# 4 IP groups, across 10 nodes, with each group on different
# interfaces/VLANs. 80 addresses in total but not evenly balanced, to
# help check some of the more extreme behaviour.
from ctdb_takeover import Cluster, Node, process_args
process_args()
addresses1 = ['192.168.1.%d' % n for n in range(1, 41)]
addresses2 = ['192.168.2.%d' % n for n in range(1, 21)]
addresses3 = ['192.168.3.%d' % n for n in range(1, 11)]
addresses4 = ['192.168.4.%d' % n for n in range(1, 11)]
# Try detecting imbalance with square root of number of nodes? Or
# just with a parameter indicating how unbalanced you're willing to
# accept...
c = Cluster()
for i in range(10):
c.add_node(Node([addresses1, addresses2, addresses3, addresses4]))
c.recover()
c.random_iterations()

View File

@ -0,0 +1,25 @@
#!/usr/bin/env python
# 2 IP groups, across 2 nodes, with each group on different
# interfaces. 4 addresses per group. A nice little canonical 2 node
# configuration.
from ctdb_takeover import Cluster, Node, process_args
process_args()
addresses1 = ['192.168.1.%d' % n for n in range(1, 5)]
addresses2 = ['192.168.2.%d' % n for n in range(1, 5)]
# Try detecting imbalance with square root of number of nodes? Or
# just with a parameter indicating how unbalanced you're willing to
# accept...
c = Cluster()
for i in range(2):
c.add_node(Node([addresses1, addresses2]))
c.recover()
c.random_iterations()

View File

@ -0,0 +1,23 @@
#!/usr/bin/env python
# 1 IP group, to test backward compatibility of LCP2 algorithm. 16
# addresses across 4 nodes.
from ctdb_takeover import Cluster, Node, process_args
process_args()
addresses1 = ['192.168.1.%d' % n for n in range(1, 17)]
# Try detecting imbalance with square root of number of nodes? Or
# just with a parameter indicating how unbalanced you're willing to
# accept...
c = Cluster()
for i in range(4):
c.add_node(Node(addresses1))
c.recover()
c.random_iterations()

View File

@ -893,6 +893,7 @@ static int control_natgwlist(struct ctdb_context *ctdb, int argc, const char **a
/* or if we still can not find any */
if (i == nodemap->num) {
printf("-1 0.0.0.0\n");
ret = 2; /* matches ENOENT */
}
}
@ -910,7 +911,7 @@ static int control_natgwlist(struct ctdb_context *ctdb, int argc, const char **a
!!(nodemap->nodes[i].flags&NODE_FLAGS_STOPPED));
}
return 0;
return ret;
}
/*