2019-12-10 14:50:42 +11:00
#!/usr/bin/env bash
# Test that recovery correctly handles RSNs
# Recovery can under certain circumstances lead to old record copies
# resurrecting: Recovery selects the newest record copy purely by RSN. At
2022-01-10 13:22:19 +11:00
# the end of the recovery, the leader is the dmaster for all
2019-12-10 14:50:42 +11:00
# records in all (non-persistent) databases. And the other nodes locally
# hold the complete copy of the databases. The bug is that the recovery
2022-01-10 13:22:19 +11:00
# process does not increment the RSN on the leader at the end of
# the recovery. Now clients acting directly on the leader will
# directly change a record's content on the leader without migration
2019-12-10 14:50:42 +11:00
# and hence without RSN bump. So a subsequent recovery can not tell that
2022-01-10 13:22:19 +11:00
# the leader's copy is newer than the copies on the other nodes, since
# their RSN is the same. Hence, if the leader is not node 0 (or more
2019-12-10 14:50:42 +11:00
# precisely not the active node with the lowest node number), the recovery
# will choose copies from nodes with lower number and stick to these.
# 1. Create a test database
2022-01-10 13:22:19 +11:00
# 2. Add a record with value value1 on leader
2019-12-10 14:50:42 +11:00
# 3. Force a recovery
2022-01-10 13:22:19 +11:00
# 4. Update the record with value value2 on leader
2019-12-10 14:50:42 +11:00
# 5. Force a recovery
# 6. Confirm that the value is value2
2013-04-11 16:59:36 +10:00
. " ${ TEST_SCRIPTS_DIR } /integration.bash "
set -e
2019-12-10 14:50:42 +11:00
ctdb_test_init
2013-04-11 16:59:36 +10:00
#
# Main test
#
TESTDB = "rec_test.tdb"
status = 0
2022-01-10 13:22:19 +11:00
# Make sure node 0 is not the leader
echo "find out which node is leader"
ctdb_onnode 0 leader
leader = " $out "
if [ " $leader " = "0" ] ; then
echo "node 0 is leader, disable leader role on node 0"
2013-04-17 13:08:49 +02:00
#
# Note:
2022-01-10 13:22:19 +11:00
# It should be sufficient to run "ctdb setleaderrole off"
2013-04-17 13:08:49 +02:00
# on node 0 and wait for election and recovery to finish.
# But there were problems related to this in this automatic
# test, so for now use "ctdb stop" and "ctdb continue".
#
2013-04-11 16:59:36 +10:00
echo "stop node 0"
2013-11-13 14:33:31 +11:00
try_command_on_node 0 $CTDB stop
2013-04-11 16:59:36 +10:00
wait_until_node_has_status 0 stopped
echo "continue node 0"
2013-11-13 14:33:31 +11:00
try_command_on_node 0 $CTDB continue
2013-04-11 16:59:36 +10:00
wait_until_node_has_status 0 notstopped
2022-01-10 13:22:19 +11:00
ctdb_onnode 0 leader
leader = " $out "
if [ " $leader " = "0" ] ; then
echo "failed to move leader to different node"
2013-04-11 16:59:36 +10:00
exit 1
fi
fi
2022-01-10 13:22:19 +11:00
echo " Leader: ${ leader } "
2013-04-11 16:59:36 +10:00
# Create a temporary non-persistent database to test with
echo " create test database $TESTDB "
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " attach " $TESTDB "
2013-04-11 16:59:36 +10:00
# Wipe Test database
echo "wipe test database"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " wipedb " $TESTDB "
2013-04-11 16:59:36 +10:00
# Add a record key=test1 data=value1
echo "store key(test1) data(value1)"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " writekey " $TESTDB " test1 value1
2013-04-11 16:59:36 +10:00
# Fetch a record key=test1
echo "read key(test1)"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " readkey " $TESTDB " test1
2019-04-11 20:55:20 +10:00
cat " $outfile "
2013-04-11 16:59:36 +10:00
# Do a recovery
echo "force recovery"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " recover
2013-04-11 16:59:36 +10:00
2022-01-10 13:22:19 +11:00
wait_until_node_has_status " $leader " recovered
2013-04-11 16:59:36 +10:00
# Add a record key=test1 data=value2
echo "store key(test1) data(value2)"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " writekey " $TESTDB " test1 value2
2013-04-11 16:59:36 +10:00
# Fetch a record key=test1
echo "read key(test1)"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " readkey " $TESTDB " test1
2019-04-11 20:55:20 +10:00
cat " $outfile "
2013-04-11 16:59:36 +10:00
# Do a recovery
echo "force recovery"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " recover
2013-04-11 16:59:36 +10:00
2022-01-10 13:22:19 +11:00
wait_until_node_has_status " $leader " recovered
2013-04-11 16:59:36 +10:00
# Verify record key=test1
echo "read key(test1)"
2022-01-10 13:22:19 +11:00
ctdb_onnode " $leader " readkey " $TESTDB " test1
2019-04-11 20:55:20 +10:00
cat " $outfile "
2013-04-11 16:59:36 +10:00
if [ " $out " = "Data: size:6 ptr:[value2]" ] ; then
echo "GOOD: Recovery did not corrupt database"
else
echo "BAD: Recovery corrupted database"
status = 1
fi
exit $status