1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-12 09:18:10 +03:00

Test suite: Fixes for node state parsing plus new stop/continue tests.

The parsing of "ctdb status -Y" output to determine various node
states was implemented very strictly.  Therefore, the parsing broke
due to the addition of the new "stopped" state to the output of "ctdb
status -Y".  This relaxes the parsing so that it should work for
versions prior to the introduction of the "stopped" state, as well as
future versions that add new states to the end of the list of bits in
output of "ctdb status -Y".

Similarly the check for cluster unhealthy (in _cluster_is_healthy())
now just checks for a single 1 in any bit in the "ctdb status -Y"
output, rather than checking for a particular number of 0s.

New tests
tests/simple/{41_ctdb_stop.sh,42_ctdb_continue.sh,43_stop_recmaster_yield.sh}
do rudimentary testing of the stop and continue functions.

Remove tests tests/simple/41_ctdb_ban.sh and
tests/simple/42_ctdb_unban.sh.  They were both unreliable.

tests/simple/21_ctdb_disablemonitor.sh now schedules a restart, since
one will be required.

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit 67c5bfb5f02c9d45a32d976021ede4fb2174dfe9)
This commit is contained in:
Martin Schwenke 2009-07-29 18:01:07 +10:00
parent e50a067cb5
commit 48078dd24f
7 changed files with 195 additions and 203 deletions

View File

@ -326,7 +326,7 @@ _cluster_is_healthy ()
count=0
while read line ; do
count=$(($count + 1))
[ "${line#:*:*:}" != "0:0:0:0:" ] && return 1
[ "${line##:*:*:*1:}" != "$line" ] && return 1
done
[ $count -gt 0 ] && return $?
} <<<"$out" # Yay bash!
@ -368,14 +368,16 @@ node_has_status ()
local bits fpat mpat
case "$status" in
(unhealthy) bits="?:?:?:1" ;;
(healthy) bits="?:?:?:0" ;;
(disconnected) bits="1:?:?:?" ;;
(connected) bits="0:?:?:?" ;;
(banned) bits="?:1:?:?" ;;
(unbanned) bits="?:0:?:?" ;;
(disabled) bits="?:?:1:?" ;;
(enabled) bits="?:?:0:?" ;;
(unhealthy) bits="?:?:?:1:*" ;;
(healthy) bits="?:?:?:0:*" ;;
(disconnected) bits="1:*" ;;
(connected) bits="0:*" ;;
(banned) bits="?:1:*" ;;
(unbanned) bits="?:0:*" ;;
(disabled) bits="?:?:1:*" ;;
(enabled) bits="?:?:0:*" ;;
(stopped) bits="?:?:?:?:1:*" ;;
(notstopped) bits="?:?:?:?:0:*" ;;
(frozen) fpat='^[[:space:]]+frozen[[:space:]]+1$' ;;
(unfrozen) fpat='^[[:space:]]+frozen[[:space:]]+0$' ;;
(monon) mpat='^Monitoring mode:ACTIVE \(0\)$' ;;
@ -393,7 +395,7 @@ node_has_status ()
{
read x
while read line ; do
[ "${line#:${pnn}:*:${bits}:}" = "" ] && return 0
[ "${line#:${pnn}:*:${bits}}" != "$line" ] && return 0
done
return 1
} <<<"$out" # Yay bash!

View File

@ -44,6 +44,9 @@ set -e
cluster_is_healthy
# Reset configuration
ctdb_restart_when_done
test_node=1
# We need this for later, so we know how long to sleep.

View File

@ -1,98 +0,0 @@
#!/bin/bash
test_info()
{
cat <<EOF
Verify the operation of the 'ctdb ban' command.
This is a superficial test of the 'ctdb ban' command. It trusts
information from CTDB that indicates that the IP failover has
happened correctly. Another test should check that the failover
has actually happened at the networking level.
Prerequisites:
* An active CTDB cluster with at least 2 active nodes.
Steps:
1. Verify that the status on all of the ctdb nodes is 'OK'.
2. Ban one of the nodes using the 'ctdb ban <timeout>' command.
3. Before the ban timeout expires, verify that the status of the
node changes to 'banned'.
4. Verify that the public IP addresses that were being served by
the node are failed over to one of the other nodes.
5. When the ban expires ensure that the status of the node changes
back to 'OK' and that the public IP addresses move back to the
node.
Expected results:
* The status of the banned nodes changes as expected and IP addresses
failover as expected.
EOF
}
. ctdb_test_functions.bash
ctdb_test_init "$@"
set -e
cluster_is_healthy
echo "Finding out which node is the recovery master..."
try_command_on_node -v 0 "$CTDB recmaster"
recmaster=$out
echo "Getting list of public IPs..."
try_command_on_node 0 "$CTDB ip -n all | sed -e '1d'"
# When selecting test_node we want a node that has public IPs and that
# is not the recmaster. We pick the first one that satisfies both
# conditions. We avoid the recmaster because banning the recmaster
# (obviously) causes the recmaster to change... and changing the
# recmaster causes all nodes to become unbanned!
test_node=""
ips=""
while read ip pnn ; do
[ -z "$test_node" -a $recmaster -ne $pnn ] && test_node=$pnn
[ "$pnn" = "$test_node" ] && ips="${ips}${ips:+ }${ip}"
done <<<"$out" # bashism to avoid problem setting variable in pipeline.
if [ -z "$test_node" ] ; then
echo "BAD: unable to select a suitable node for banning."
exit 1
fi
echo "Selected node ${test_node} with IPs: $ips"
ban_time=15
echo "Banning node $test_node for $ban_time seconds"
try_command_on_node 1 $CTDB ban $ban_time -n $test_node
# Avoid a potential race condition...
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node banned
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
echo "All IPs moved."
else
echo "Some IPs didn't move."
testfailures=1
fi
echo "Sleeping until ban expires..."
sleep_for $ban_time
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node unbanned
# BUG: this is only guaranteed if DeterministicIPs is 1 and
# NoIPFailback is 0.
if wait_until_ips_are_on_nodeglob "$test_node" $ips ; then
echo "All IPs moved."
else
echo "Some IPs didn't move."
testfailures=1
fi

View File

@ -0,0 +1,55 @@
#!/bin/bash
test_info()
{
cat <<EOF
Verify the operation of the 'ctdb stop' command.
This is a superficial test of the 'ctdb stop' command. It trusts
information from CTDB that indicates that the IP failover has
happened correctly. Another test should check that the failover
has actually happened at the networking level.
Prerequisites:
* An active CTDB cluster with at least 2 active nodes.
Steps:
1. Verify that the status on all of the ctdb nodes is 'OK'.
2. Stop one of the nodes using the 'ctdb stop' command.
3. Verify that the status of the node changes to 'stopped'.
4. Verify that the public IP addresses that were being served by
the node are failed over to one of the other nodes.
Expected results:
* The status of the stopped nodes changes as expected and IP addresses
failover as expected.
EOF
}
. ctdb_test_functions.bash
ctdb_test_init "$@"
set -e
cluster_is_healthy
# Reset configuration
ctdb_restart_when_done
select_test_node_and_ips
echo "Stopping node ${test_node}..."
try_command_on_node 1 $CTDB stop -n $test_node
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node stopped
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
echo "All IPs moved."
else
echo "Some IPs didn't move."
testfailures=1
fi

View File

@ -0,0 +1,71 @@
#!/bin/bash
test_info()
{
cat <<EOF
Verify the operation of the 'ctdb continue' command.
This is a superficial test of the 'ctdb continue' command. It trusts
information from CTDB that indicates that the IP failover and failback
has happened correctly. Another test should check that the failover
and failback has actually happened at the networking level.
Prerequisites:
* An active CTDB cluster with at least 2 active nodes.
Steps:
1. Verify that the status on all of the ctdb nodes is 'OK'.
2. Stop one of the nodes using the 'ctdb stop' command.
3. Verify that the status of the node changes to 'stopped'.
4. Verify that the public IP addresses that were being served by
the node are failed over to one of the other nodes.
5. Use 'ctdb continue' to bring the node back online.
6. Verify that the status of the node changes back to 'OK' and that
the public IP addresses move back to the node.
Expected results:
* The 'ctdb continue' command successfully brings a stopped node online.
EOF
}
. ctdb_test_functions.bash
ctdb_test_init "$@"
set -e
cluster_is_healthy
# Reset configuration
ctdb_restart_when_done
select_test_node_and_ips
echo "Stopping node ${test_node}..."
try_command_on_node 1 $CTDB stop -n $test_node
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node stopped
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
echo "All IPs moved."
else
echo "Some IPs didn't move."
testfailures=1
fi
echo "Continuing node $test_node"
try_command_on_node 1 $CTDB continue -n $test_node
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node notstopped
# BUG: this is only guaranteed if DeterministicIPs is 1 and
# NoIPFailback is 0.
if wait_until_ips_are_on_nodeglob "$test_node" $ips ; then
echo "All IPs moved."
else
echo "Some IPs didn't move."
testfailures=1
fi

View File

@ -1,95 +0,0 @@
#!/bin/bash
test_info()
{
cat <<EOF
Verify the operation of the 'ctdb unban' command.
This is a superficial test of the 'ctdb uban' command. It trusts
information from CTDB that indicates that the IP failover and failback
has happened correctly. Another test should check that the failover
and failback has actually happened at the networking level.
Prerequisites:
* An active CTDB cluster with at least 2 active nodes.
Steps:
1. Verify that the status on all of the ctdb nodes is 'OK'.
2. Ban one of the nodes using the 'ctdb ban <timeout>' command.
3. Before the ban timeout expires, verify that the status of the
node changes to 'banned'.
4. Verify that the public IP addresses that were being served by
the node are failed over to one of the other nodes.
5. Before the ban timeout expires, use 'ctdb unban' to unban the
node.
6. Verify that the status of the node changes back to 'OK' and that
the public IP addresses move back to the node.
Expected results:
* The 'ctdb unban' command successfully unbans a banned node.
EOF
}
. ctdb_test_functions.bash
ctdb_test_init "$@"
set -e
cluster_is_healthy
echo "Finding out which node is the recovery master..."
try_command_on_node -v 0 "$CTDB recmaster"
recmaster=$out
echo "Getting list of public IPs..."
try_command_on_node 0 "$CTDB ip -n all | sed -e '1d'"
# See 41_ctdb_ban.sh for an explanation of why test_node is chosen
# like this.
test_node=""
ips=""
while read ip pnn ; do
[ -z "$test_node" -a $recmaster -ne $pnn ] && test_node=$pnn
[ "$pnn" = "$test_node" ] && ips="${ips}${ips:+ }${ip}"
done <<<"$out" # bashism to avoid problem setting variable in pipeline.
if [ -z "$test_node" ] ; then
echo "BAD: unable to select a suitable node for banning."
exit 1
fi
echo "Selected node ${test_node} with IPs: $ips"
ban_time=60
echo "Banning node $test_node for $ban_time seconds"
try_command_on_node 1 $CTDB ban $ban_time -n $test_node
# Avoid a potential race condition...
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node banned
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
echo "All IPs moved."
else
echo "Some IPs didn't move."
testfailures=1
fi
echo "Unbanning node $test_node"
try_command_on_node 1 $CTDB unban -n $test_node
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node unbanned
# BUG: this is only guaranteed if DeterministicIPs is 1 and
# NoIPFailback is 0.
if wait_until_ips_are_on_nodeglob "$test_node" $ips ; then
echo "All IPs moved."
else
echo "Some IPs didn't move."
testfailures=1
fi

View File

@ -0,0 +1,54 @@
#!/bin/bash
test_info()
{
cat <<EOF
Verify that 'ctdb stop' causes a node to yield the recovery master role.
Prerequisites:
* An active CTDB cluster with at least 2 active nodes.
Steps:
1. Determine which node is the recmaster.
2. Stop this node using the 'ctdb stop' command.
3. Verify that the status of the node changes to 'stopped'.
4. Verify that this node no longer has the recovery master role.
Expected results:
* The 'ctdb stop' command causes a node to yield the recmaster role.
EOF
}
. ctdb_test_functions.bash
ctdb_test_init "$@"
set -e
cluster_is_healthy
# Reset configuration
ctdb_restart_when_done
echo "Finding out which node is the recovery master..."
try_command_on_node -v 0 "$CTDB recmaster"
test_node=$out
echo "Stopping node ${test_node} - it is the current recmaster..."
try_command_on_node 1 $CTDB stop -n $test_node
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node stopped
echo "Checking which node is the recovery master now..."
try_command_on_node -v 0 "$CTDB recmaster"
recmaster=$out
if [ "$recmaster" != "$test_node" ] ; then
echo "OK: recmaster moved to node $recmaster"
else
echo "BAD: recmaster did not move"
exit 1
fi