mirror of
https://github.com/samba-team/samba.git
synced 2025-01-12 09:18:10 +03:00
Test suite: Fixes for node state parsing plus new stop/continue tests.
The parsing of "ctdb status -Y" output to determine various node states was implemented very strictly. Therefore, the parsing broke due to the addition of the new "stopped" state to the output of "ctdb status -Y". This relaxes the parsing so that it should work for versions prior to the introduction of the "stopped" state, as well as future versions that add new states to the end of the list of bits in output of "ctdb status -Y". Similarly the check for cluster unhealthy (in _cluster_is_healthy()) now just checks for a single 1 in any bit in the "ctdb status -Y" output, rather than checking for a particular number of 0s. New tests tests/simple/{41_ctdb_stop.sh,42_ctdb_continue.sh,43_stop_recmaster_yield.sh} do rudimentary testing of the stop and continue functions. Remove tests tests/simple/41_ctdb_ban.sh and tests/simple/42_ctdb_unban.sh. They were both unreliable. tests/simple/21_ctdb_disablemonitor.sh now schedules a restart, since one will be required. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 67c5bfb5f02c9d45a32d976021ede4fb2174dfe9)
This commit is contained in:
parent
e50a067cb5
commit
48078dd24f
@ -326,7 +326,7 @@ _cluster_is_healthy ()
|
||||
count=0
|
||||
while read line ; do
|
||||
count=$(($count + 1))
|
||||
[ "${line#:*:*:}" != "0:0:0:0:" ] && return 1
|
||||
[ "${line##:*:*:*1:}" != "$line" ] && return 1
|
||||
done
|
||||
[ $count -gt 0 ] && return $?
|
||||
} <<<"$out" # Yay bash!
|
||||
@ -368,14 +368,16 @@ node_has_status ()
|
||||
|
||||
local bits fpat mpat
|
||||
case "$status" in
|
||||
(unhealthy) bits="?:?:?:1" ;;
|
||||
(healthy) bits="?:?:?:0" ;;
|
||||
(disconnected) bits="1:?:?:?" ;;
|
||||
(connected) bits="0:?:?:?" ;;
|
||||
(banned) bits="?:1:?:?" ;;
|
||||
(unbanned) bits="?:0:?:?" ;;
|
||||
(disabled) bits="?:?:1:?" ;;
|
||||
(enabled) bits="?:?:0:?" ;;
|
||||
(unhealthy) bits="?:?:?:1:*" ;;
|
||||
(healthy) bits="?:?:?:0:*" ;;
|
||||
(disconnected) bits="1:*" ;;
|
||||
(connected) bits="0:*" ;;
|
||||
(banned) bits="?:1:*" ;;
|
||||
(unbanned) bits="?:0:*" ;;
|
||||
(disabled) bits="?:?:1:*" ;;
|
||||
(enabled) bits="?:?:0:*" ;;
|
||||
(stopped) bits="?:?:?:?:1:*" ;;
|
||||
(notstopped) bits="?:?:?:?:0:*" ;;
|
||||
(frozen) fpat='^[[:space:]]+frozen[[:space:]]+1$' ;;
|
||||
(unfrozen) fpat='^[[:space:]]+frozen[[:space:]]+0$' ;;
|
||||
(monon) mpat='^Monitoring mode:ACTIVE \(0\)$' ;;
|
||||
@ -393,7 +395,7 @@ node_has_status ()
|
||||
{
|
||||
read x
|
||||
while read line ; do
|
||||
[ "${line#:${pnn}:*:${bits}:}" = "" ] && return 0
|
||||
[ "${line#:${pnn}:*:${bits}}" != "$line" ] && return 0
|
||||
done
|
||||
return 1
|
||||
} <<<"$out" # Yay bash!
|
||||
|
@ -44,6 +44,9 @@ set -e
|
||||
|
||||
cluster_is_healthy
|
||||
|
||||
# Reset configuration
|
||||
ctdb_restart_when_done
|
||||
|
||||
test_node=1
|
||||
|
||||
# We need this for later, so we know how long to sleep.
|
||||
|
@ -1,98 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
test_info()
|
||||
{
|
||||
cat <<EOF
|
||||
Verify the operation of the 'ctdb ban' command.
|
||||
|
||||
This is a superficial test of the 'ctdb ban' command. It trusts
|
||||
information from CTDB that indicates that the IP failover has
|
||||
happened correctly. Another test should check that the failover
|
||||
has actually happened at the networking level.
|
||||
|
||||
Prerequisites:
|
||||
|
||||
* An active CTDB cluster with at least 2 active nodes.
|
||||
|
||||
Steps:
|
||||
|
||||
1. Verify that the status on all of the ctdb nodes is 'OK'.
|
||||
2. Ban one of the nodes using the 'ctdb ban <timeout>' command.
|
||||
3. Before the ban timeout expires, verify that the status of the
|
||||
node changes to 'banned'.
|
||||
4. Verify that the public IP addresses that were being served by
|
||||
the node are failed over to one of the other nodes.
|
||||
5. When the ban expires ensure that the status of the node changes
|
||||
back to 'OK' and that the public IP addresses move back to the
|
||||
node.
|
||||
|
||||
Expected results:
|
||||
|
||||
* The status of the banned nodes changes as expected and IP addresses
|
||||
failover as expected.
|
||||
EOF
|
||||
}
|
||||
|
||||
. ctdb_test_functions.bash
|
||||
|
||||
ctdb_test_init "$@"
|
||||
|
||||
set -e
|
||||
|
||||
cluster_is_healthy
|
||||
|
||||
echo "Finding out which node is the recovery master..."
|
||||
try_command_on_node -v 0 "$CTDB recmaster"
|
||||
recmaster=$out
|
||||
|
||||
echo "Getting list of public IPs..."
|
||||
try_command_on_node 0 "$CTDB ip -n all | sed -e '1d'"
|
||||
|
||||
# When selecting test_node we want a node that has public IPs and that
|
||||
# is not the recmaster. We pick the first one that satisfies both
|
||||
# conditions. We avoid the recmaster because banning the recmaster
|
||||
# (obviously) causes the recmaster to change... and changing the
|
||||
# recmaster causes all nodes to become unbanned!
|
||||
test_node=""
|
||||
|
||||
ips=""
|
||||
while read ip pnn ; do
|
||||
[ -z "$test_node" -a $recmaster -ne $pnn ] && test_node=$pnn
|
||||
[ "$pnn" = "$test_node" ] && ips="${ips}${ips:+ }${ip}"
|
||||
done <<<"$out" # bashism to avoid problem setting variable in pipeline.
|
||||
|
||||
if [ -z "$test_node" ] ; then
|
||||
echo "BAD: unable to select a suitable node for banning."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Selected node ${test_node} with IPs: $ips"
|
||||
|
||||
ban_time=15
|
||||
|
||||
echo "Banning node $test_node for $ban_time seconds"
|
||||
try_command_on_node 1 $CTDB ban $ban_time -n $test_node
|
||||
|
||||
# Avoid a potential race condition...
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node banned
|
||||
|
||||
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
|
||||
echo "All IPs moved."
|
||||
else
|
||||
echo "Some IPs didn't move."
|
||||
testfailures=1
|
||||
fi
|
||||
|
||||
echo "Sleeping until ban expires..."
|
||||
sleep_for $ban_time
|
||||
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node unbanned
|
||||
|
||||
# BUG: this is only guaranteed if DeterministicIPs is 1 and
|
||||
# NoIPFailback is 0.
|
||||
if wait_until_ips_are_on_nodeglob "$test_node" $ips ; then
|
||||
echo "All IPs moved."
|
||||
else
|
||||
echo "Some IPs didn't move."
|
||||
testfailures=1
|
||||
fi
|
55
ctdb/tests/simple/41_ctdb_stop.sh
Executable file
55
ctdb/tests/simple/41_ctdb_stop.sh
Executable file
@ -0,0 +1,55 @@
|
||||
#!/bin/bash
|
||||
|
||||
test_info()
|
||||
{
|
||||
cat <<EOF
|
||||
Verify the operation of the 'ctdb stop' command.
|
||||
|
||||
This is a superficial test of the 'ctdb stop' command. It trusts
|
||||
information from CTDB that indicates that the IP failover has
|
||||
happened correctly. Another test should check that the failover
|
||||
has actually happened at the networking level.
|
||||
|
||||
Prerequisites:
|
||||
|
||||
* An active CTDB cluster with at least 2 active nodes.
|
||||
|
||||
Steps:
|
||||
|
||||
1. Verify that the status on all of the ctdb nodes is 'OK'.
|
||||
2. Stop one of the nodes using the 'ctdb stop' command.
|
||||
3. Verify that the status of the node changes to 'stopped'.
|
||||
4. Verify that the public IP addresses that were being served by
|
||||
the node are failed over to one of the other nodes.
|
||||
|
||||
Expected results:
|
||||
|
||||
* The status of the stopped nodes changes as expected and IP addresses
|
||||
failover as expected.
|
||||
EOF
|
||||
}
|
||||
|
||||
. ctdb_test_functions.bash
|
||||
|
||||
ctdb_test_init "$@"
|
||||
|
||||
set -e
|
||||
|
||||
cluster_is_healthy
|
||||
|
||||
# Reset configuration
|
||||
ctdb_restart_when_done
|
||||
|
||||
select_test_node_and_ips
|
||||
|
||||
echo "Stopping node ${test_node}..."
|
||||
try_command_on_node 1 $CTDB stop -n $test_node
|
||||
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node stopped
|
||||
|
||||
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
|
||||
echo "All IPs moved."
|
||||
else
|
||||
echo "Some IPs didn't move."
|
||||
testfailures=1
|
||||
fi
|
71
ctdb/tests/simple/42_ctdb_continue.sh
Executable file
71
ctdb/tests/simple/42_ctdb_continue.sh
Executable file
@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
|
||||
test_info()
|
||||
{
|
||||
cat <<EOF
|
||||
Verify the operation of the 'ctdb continue' command.
|
||||
|
||||
This is a superficial test of the 'ctdb continue' command. It trusts
|
||||
information from CTDB that indicates that the IP failover and failback
|
||||
has happened correctly. Another test should check that the failover
|
||||
and failback has actually happened at the networking level.
|
||||
|
||||
Prerequisites:
|
||||
|
||||
* An active CTDB cluster with at least 2 active nodes.
|
||||
|
||||
Steps:
|
||||
|
||||
1. Verify that the status on all of the ctdb nodes is 'OK'.
|
||||
2. Stop one of the nodes using the 'ctdb stop' command.
|
||||
3. Verify that the status of the node changes to 'stopped'.
|
||||
4. Verify that the public IP addresses that were being served by
|
||||
the node are failed over to one of the other nodes.
|
||||
5. Use 'ctdb continue' to bring the node back online.
|
||||
6. Verify that the status of the node changes back to 'OK' and that
|
||||
the public IP addresses move back to the node.
|
||||
|
||||
Expected results:
|
||||
|
||||
* The 'ctdb continue' command successfully brings a stopped node online.
|
||||
EOF
|
||||
}
|
||||
|
||||
. ctdb_test_functions.bash
|
||||
|
||||
ctdb_test_init "$@"
|
||||
|
||||
set -e
|
||||
|
||||
cluster_is_healthy
|
||||
|
||||
# Reset configuration
|
||||
ctdb_restart_when_done
|
||||
|
||||
select_test_node_and_ips
|
||||
|
||||
echo "Stopping node ${test_node}..."
|
||||
try_command_on_node 1 $CTDB stop -n $test_node
|
||||
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node stopped
|
||||
|
||||
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
|
||||
echo "All IPs moved."
|
||||
else
|
||||
echo "Some IPs didn't move."
|
||||
testfailures=1
|
||||
fi
|
||||
|
||||
echo "Continuing node $test_node"
|
||||
try_command_on_node 1 $CTDB continue -n $test_node
|
||||
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node notstopped
|
||||
|
||||
# BUG: this is only guaranteed if DeterministicIPs is 1 and
|
||||
# NoIPFailback is 0.
|
||||
if wait_until_ips_are_on_nodeglob "$test_node" $ips ; then
|
||||
echo "All IPs moved."
|
||||
else
|
||||
echo "Some IPs didn't move."
|
||||
testfailures=1
|
||||
fi
|
@ -1,95 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
test_info()
|
||||
{
|
||||
cat <<EOF
|
||||
Verify the operation of the 'ctdb unban' command.
|
||||
|
||||
This is a superficial test of the 'ctdb uban' command. It trusts
|
||||
information from CTDB that indicates that the IP failover and failback
|
||||
has happened correctly. Another test should check that the failover
|
||||
and failback has actually happened at the networking level.
|
||||
|
||||
Prerequisites:
|
||||
|
||||
* An active CTDB cluster with at least 2 active nodes.
|
||||
|
||||
Steps:
|
||||
|
||||
1. Verify that the status on all of the ctdb nodes is 'OK'.
|
||||
2. Ban one of the nodes using the 'ctdb ban <timeout>' command.
|
||||
3. Before the ban timeout expires, verify that the status of the
|
||||
node changes to 'banned'.
|
||||
4. Verify that the public IP addresses that were being served by
|
||||
the node are failed over to one of the other nodes.
|
||||
5. Before the ban timeout expires, use 'ctdb unban' to unban the
|
||||
node.
|
||||
6. Verify that the status of the node changes back to 'OK' and that
|
||||
the public IP addresses move back to the node.
|
||||
|
||||
Expected results:
|
||||
|
||||
* The 'ctdb unban' command successfully unbans a banned node.
|
||||
EOF
|
||||
}
|
||||
|
||||
. ctdb_test_functions.bash
|
||||
|
||||
ctdb_test_init "$@"
|
||||
|
||||
set -e
|
||||
|
||||
cluster_is_healthy
|
||||
|
||||
echo "Finding out which node is the recovery master..."
|
||||
try_command_on_node -v 0 "$CTDB recmaster"
|
||||
recmaster=$out
|
||||
|
||||
echo "Getting list of public IPs..."
|
||||
try_command_on_node 0 "$CTDB ip -n all | sed -e '1d'"
|
||||
|
||||
# See 41_ctdb_ban.sh for an explanation of why test_node is chosen
|
||||
# like this.
|
||||
test_node=""
|
||||
|
||||
ips=""
|
||||
while read ip pnn ; do
|
||||
[ -z "$test_node" -a $recmaster -ne $pnn ] && test_node=$pnn
|
||||
[ "$pnn" = "$test_node" ] && ips="${ips}${ips:+ }${ip}"
|
||||
done <<<"$out" # bashism to avoid problem setting variable in pipeline.
|
||||
|
||||
if [ -z "$test_node" ] ; then
|
||||
echo "BAD: unable to select a suitable node for banning."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Selected node ${test_node} with IPs: $ips"
|
||||
|
||||
ban_time=60
|
||||
|
||||
echo "Banning node $test_node for $ban_time seconds"
|
||||
try_command_on_node 1 $CTDB ban $ban_time -n $test_node
|
||||
|
||||
# Avoid a potential race condition...
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node banned
|
||||
|
||||
if wait_until_ips_are_on_nodeglob "[!${test_node}]" $ips ; then
|
||||
echo "All IPs moved."
|
||||
else
|
||||
echo "Some IPs didn't move."
|
||||
testfailures=1
|
||||
fi
|
||||
|
||||
echo "Unbanning node $test_node"
|
||||
try_command_on_node 1 $CTDB unban -n $test_node
|
||||
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node unbanned
|
||||
|
||||
# BUG: this is only guaranteed if DeterministicIPs is 1 and
|
||||
# NoIPFailback is 0.
|
||||
if wait_until_ips_are_on_nodeglob "$test_node" $ips ; then
|
||||
echo "All IPs moved."
|
||||
else
|
||||
echo "Some IPs didn't move."
|
||||
testfailures=1
|
||||
fi
|
54
ctdb/tests/simple/43_stop_recmaster_yield.sh
Executable file
54
ctdb/tests/simple/43_stop_recmaster_yield.sh
Executable file
@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
|
||||
test_info()
|
||||
{
|
||||
cat <<EOF
|
||||
Verify that 'ctdb stop' causes a node to yield the recovery master role.
|
||||
|
||||
Prerequisites:
|
||||
|
||||
* An active CTDB cluster with at least 2 active nodes.
|
||||
|
||||
Steps:
|
||||
|
||||
1. Determine which node is the recmaster.
|
||||
2. Stop this node using the 'ctdb stop' command.
|
||||
3. Verify that the status of the node changes to 'stopped'.
|
||||
4. Verify that this node no longer has the recovery master role.
|
||||
|
||||
Expected results:
|
||||
|
||||
* The 'ctdb stop' command causes a node to yield the recmaster role.
|
||||
EOF
|
||||
}
|
||||
|
||||
. ctdb_test_functions.bash
|
||||
|
||||
ctdb_test_init "$@"
|
||||
|
||||
set -e
|
||||
|
||||
cluster_is_healthy
|
||||
|
||||
# Reset configuration
|
||||
ctdb_restart_when_done
|
||||
|
||||
echo "Finding out which node is the recovery master..."
|
||||
try_command_on_node -v 0 "$CTDB recmaster"
|
||||
test_node=$out
|
||||
|
||||
echo "Stopping node ${test_node} - it is the current recmaster..."
|
||||
try_command_on_node 1 $CTDB stop -n $test_node
|
||||
|
||||
onnode 0 $CTDB_TEST_WRAPPER wait_until_node_has_status $test_node stopped
|
||||
|
||||
echo "Checking which node is the recovery master now..."
|
||||
try_command_on_node -v 0 "$CTDB recmaster"
|
||||
recmaster=$out
|
||||
|
||||
if [ "$recmaster" != "$test_node" ] ; then
|
||||
echo "OK: recmaster moved to node $recmaster"
|
||||
else
|
||||
echo "BAD: recmaster did not move"
|
||||
exit 1
|
||||
fi
|
Loading…
Reference in New Issue
Block a user