cluster/tier: stop tier migration after graph switch

On a graph switch, a new xlator and private structures are
created. The tier migration daemon must stop using the
old xlator and private structures and begin using the
new ones. Otherwise, when RPCs arrive (such as counter
queries from glusterd), the new xlator will be consulted
but it will not have up to date information. The fix
detects a graph switch and exits the daemon in this
case. Typical graph switches for the tier case would
be turning off performance translators.

Change-Id: Ibfbd4720dc82ea179b77c81b8f534abced21e3c8
BUG: 1226005
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: http://review.gluster.org/11372
This commit is contained in:
Dan Lambright 2015-06-23 16:35:03 -04:00
parent 26ef697318
commit 875aa01ec8
2 changed files with 34 additions and 3 deletions

View File

@ -52,6 +52,15 @@ function confirm_vol_stopped {
fi
}
function check_counters_nonzero {
$CLI volume rebalance $V0 tier status | grep ' 0 '
if [ $? == 0 ]; then
echo "1"
else
echo "0"
fi
}
DEMOTE_TIMEOUT=12
PROMOTE_TIMEOUT=5
MIGRATION_TIMEOUT=10
@ -62,12 +71,17 @@ TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
# testing bug 1215122, ie should fail if replica count and bricks are not compatible.
TEST ! $CLI volume attach-tier $V0 replica 5 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
TEST $CLI volume start $V0
# The following two commands instigate a graph switch. Do them
# before attaching the tier. If done on a tiered volume the rebalance
# daemon will terminate and must be restarted manually.
TEST $CLI volume set $V0 performance.quick-read off
TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 features.ctr-enabled on
#Not a tier volume
@ -78,6 +92,8 @@ TEST ! $CLI volume detach-tier $V0 commit force
TEST $CLI volume attach-tier $V0 replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
$CLI volume rebalance $V0 tier status
#Tier options expect non-negative value
TEST ! $CLI volume set $V0 cluster.tier-promote-frequency -1
@ -128,13 +144,12 @@ sleep 5
EXPECT_WITHIN $PROMOTE_TIMEOUT "0" file_on_fast_tier d1/data2.txt
EXPECT_WITHIN $PROMOTE_TIMEOUT "0" file_on_fast_tier d1/data3.txt
EXPECT "0" check_counters_nonzero
# stop gluster, when it comes back info file should have tiered volume
killall glusterd
TEST glusterd
# Test rebalance commands
TEST $CLI volume rebalance $V0 tier status
TEST $CLI volume detach-tier $V0 start
TEST $CLI volume detach-tier $V0 commit force

View File

@ -776,6 +776,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
pthread_t demote_thread;
gf_boolean_t is_promotion_triggered = _gf_false;
gf_boolean_t is_demotion_triggered = _gf_false;
xlator_t *any = NULL;
xlator_t *xlator = NULL;
conf = this->private;
@ -798,6 +800,20 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
while (1) {
/*
* Check if a graph switch occured. If so, stop migration
* thread. It will need to be restarted manually.
*/
any = THIS->ctx->active->first;
xlator = xlator_search_by_name(any, this->name);
if (xlator != this) {
gf_msg (this->name, GF_LOG_INFO, 0,
DHT_MSG_LOG_TIER_STATUS,
"Detected graph switch. Exiting migration daemon.");
goto out;
}
sleep(1);
if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {