1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00
samba-mirror/ctdb/tests/complex/18_ctdb_reloadips.sh
Martin Schwenke fdc0dbee29 ctdb-tests: Add synchronisation points in reload IPs tests
"ctdb reloadips" use of ipreallocate() can result in a spurious
takeover runs.  This can cause a subsequent "ctdb reloadips" to fail
to disable takeover runs (due to there being one already in progress).

There are various possible improvements but a proper fix probably
requires a protocol change.  That would mean receiving an ACK for a
takeover run request to indicate that the request will be processes
and then a broadcast to indicate a completed takeover run.

There are various other partial fixes (e.g. de-duping queued takeover
run requests against those in the in-progess queue) and workarounds
(e.g. always do a double ipreallocate() in the tool, which should
absorb the spurious takeover run).

However, this is unlikely to be a real-world problem.  Real use cases
should not involve repeatedly reloading the IP configuration.

Instead, work around the problem of flaky tests by manually adding
"ctdb sync" commands to cause extra no-op takeover runs.  These should
not add spurious takeover runs and will create synchronisation points
to help avoid the issue.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
2017-02-24 07:47:11 +01:00

242 lines
6.0 KiB
Bash
Executable File

#!/bin/bash
test_info()
{
cat <<EOF
Verify that adding/deleting IPs using 'ctdb reloadips' works
Checks that when IPs are added to and deleted from a single node then
those IPs are actually assigned and unassigned from the specified
interface.
Prerequisites:
* An active CTDB cluster with public IP addresses configured
Expected results:
* When IPs are added to a single node then they are assigned to an
interface.
* When IPs are deleted from a single node then they disappear from an
interface.
EOF
}
. "${TEST_SCRIPTS_DIR}/integration.bash"
set -e
ctdb_test_init "$@"
ctdb_test_check_real_cluster
cluster_is_healthy
# Reset configuration
ctdb_restart_when_done
select_test_node_and_ips
####################
# Search for an unused 10.B.1.0/24 network on which to add public IP
# addresses.
# The initial search is for a 10.B.0.0/16 network since some
# configurations may use a whole class B for the private network.
# Check that there are no public IP addresses (as reported by "ctdb ip
# all") or other IP addresses (as reported by "ip addr show") with
# the provided prefix. Note that this is an IPv4-specific test.
echo "Getting public IP information from CTDB..."
try_command_on_node any "$CTDB ip -X -v all"
ctdb_ip_info=$(echo "$out" | awk -F'|' 'NR > 1 { print $2, $3, $5 }')
echo "Getting IP information from interfaces..."
try_command_on_node all "ip addr show"
ip_addr_info=$(echo "$out" | \
awk '$1 == "inet" { ip = $2; sub(/\/.*/, "", ip); print ip }')
prefix=""
for b in $(seq 0 255) ; do
prefix="10.${b}"
# Does the prefix match any IP address returned by "ip addr info"?
while read ip ; do
if [ "${ip#${prefix}.}" != "$ip" ] ; then
prefix=""
continue 2
fi
done <<<"$ip_addr_info"
# Does the prefix match any public IP address "ctdb ip all"?
while read ip pnn iface ; do
if [ "${ip#${prefix}.}" != "$ip" ] ; then
prefix=""
continue 2
fi
done <<<"$ctdb_ip_info"
# Got through the IPs without matching prefix - done!
break
done
[ -n "$prefix" ] || die "Unable to find a usable IP address prefix"
# We really want a class C: 10.B.1.0/24
prefix="${prefix}.1"
####################
iface=$(echo "$ctdb_ip_info" | awk -v pnn=$test_node '$2 == pnn { print $3 ; exit }')
####################
# This needs to be set only on the recmaster. All nodes should do the trick.
new_takeover_timeout=90
echo "Setting TakeoverTimeout=${new_takeover_timeout} to avoid potential bans"
try_command_on_node all "$CTDB setvar TakeoverTimeout ${new_takeover_timeout}"
####################
addresses=$(get_ctdbd_command_line_option $test_node "public-addresses")
echo "Public addresses file on node $test_node is \"$addresses\""
backup="${addresses}.$$"
backup_public_addresses ()
{
try_command_on_node $test_node "cp -a $addresses $backup"
}
restore_public_addresses ()
{
try_command_on_node $test_node "mv $backup $addresses >/dev/null 2>&1 || true"
}
ctdb_test_exit_hook_add restore_public_addresses
# Now create that backup
backup_public_addresses
####################
add_ips_to_original_config ()
{
local test_node="$1"
local addresses="$2"
local iface="$3"
local prefix="$4"
local first="$5"
local last="$6"
echo "Adding new public IPs to original config on node ${test_node}..."
echo "IPs will be ${prefix}.${first}/24..${prefix}.${last}/24"
# Implement this by completely rebuilding the public_addresses
# file. This is easier than deleting entries on a remote node.
restore_public_addresses
backup_public_addresses
# Note that tee is a safe way of creating a file on a remote node.
# This avoids potential fragility with quoting or redirection.
for i in $(seq $first $last) ; do
echo "${prefix}.${i}/24 ${iface}"
done |
try_command_on_node -i $test_node "tee -a $addresses"
}
check_ips ()
{
local test_node="$1"
local iface="$2"
local prefix="$3"
local first="$4"
local last="$5"
# If just 0 specified then this is an empty range
local public_ips_file=$(mktemp)
if [ "$first" = 0 -a -z "$last" ] ; then
echo "Checking that there are no IPs in ${prefix}.0/24"
else
local prefix_regexp="inet *${prefix//./\.}"
echo "Checking IPs in range ${prefix}.${first}/24..${prefix}.${last}/24"
local i
for i in $(seq $first $last) ; do
echo "${prefix}.${i}"
done | sort >"$public_ips_file"
fi
try_command_on_node $test_node "ip addr show dev ${iface}"
local ip_addrs_file=$(mktemp)
echo "$out" | \
sed -n -e "s@.*inet * \(${prefix//./\.}\.[0-9]*\)/.*@\1@p" | \
sort >"$ip_addrs_file"
local diffs=$(diff "$public_ips_file" "$ip_addrs_file") || true
rm -f "$ip_addrs_file" "$public_ips_file"
if [ -z "$diffs" ] ; then
echo "GOOD: IP addresses are as expected"
else
echo "BAD: IP addresses are incorrect:"
echo "$diffs"
exit 1
fi
}
####################
new_ip_max=100
####################
add_ips_to_original_config \
$test_node "$addresses" "$iface" "$prefix" 1 $new_ip_max
try_command_on_node $test_node "$CTDB reloadips"
check_ips $test_node "$iface" "$prefix" 1 $new_ip_max
try_command_on_node any $CTDB sync
####################
# This should be the primary. Ensure that no other IPs are lost
echo "Using 'ctdb reloadips' to remove the 1st address just added..."
add_ips_to_original_config \
$test_node "$addresses" "$iface" "$prefix" 2 $new_ip_max
try_command_on_node $test_node "$CTDB reloadips"
check_ips $test_node "$iface" "$prefix" 2 $new_ip_max
try_command_on_node any $CTDB sync
####################
# Get rid of about 1/2 the IPs
start=$(($new_ip_max / 2 + 1))
echo "Updating to include only about 1/2 of the new IPs..."
add_ips_to_original_config \
$test_node "$addresses" "$iface" "$prefix" $start $new_ip_max
try_command_on_node $test_node "$CTDB reloadips"
check_ips $test_node "$iface" "$prefix" $start $new_ip_max
try_command_on_node any $CTDB sync
####################
# Delete the rest
echo "Restoring original IP configuration..."
restore_public_addresses
try_command_on_node $test_node "$CTDB reloadips"
check_ips $test_node "$iface" "$prefix" 0