tests: fix spurious failures in bug-1190734.t (remove-brick)

This is the second leading cause of spurious failures, including those
in tests for other spurious-regression-failure fixes (creating a bit of
a "catch 22" situation).  While these failures have been hard to
reproduce except during full regression-test runs, two changes have been
made that might make this test more resilient to certain types of
failures.

 * Use a specific "ls" instead of a general "find" to list/count only
   the files we're interested in, without (possibly) including transient
   artifacts from the "remove-brick" command.

 * Retry the file count up to five times, just in case there are other
   transient conditions causing it to yield the wrong result.

Also, "inlining" some of the functions for removing the brick might help
to highlight exactly which command within those functions was failing.

Change-Id: I5a462b91fb4e04d9e9a53cc60f9db11b89101107
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/10013
Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Jeff Darcy 2015-03-26 08:38:11 -04:00 committed by Vijay Bellur
parent d331c31963
commit 764ce1e2f6

View File

@ -6,6 +6,7 @@
BRICK_COUNT=3
FILE_COUNT=100
FILE_COUNT_TIME=5
function create_files {
rm -rf $2
@ -15,9 +16,8 @@ function create_files {
done
}
function check_file_count {
ORIG_FILE_COUNT=`find $2 | tail -n +2 |wc -l`
[ $ORIG_FILE_COUNT -eq $1 ]
function get_file_count {
ls $1/file_[0-9]* | wc -l
}
function reset {
@ -53,24 +53,6 @@ function start_mount_nfs {
return 0
}
function start_removing_bricks {
check_file_count $FILE_COUNT $1
[ $? -ne 0 ] && return 1
$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
[ $? -ne 0 ] && return 1
return 0
}
function finish_removing_bricks {
$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 commit
[ $? -ne 0 ] && return 1
check_file_count $FILE_COUNT $1
return $?
}
cleanup
TEST glusterd
@ -84,10 +66,11 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
# ------- test 1: AFR, fuse + remove bricks
TEST start_mount_fuse test1
TEST start_removing_bricks $M0/test1
EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $M0/test1
TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} start
EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 status > /tmp/out
TEST finish_removing_bricks $M0/test1
TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} commit
EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $M0/test1
reset $M0
# ------- test 2: AFR, nfs + remove bricks
@ -96,9 +79,11 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
$H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
TEST start_mount_nfs test2
TEST start_removing_bricks $N0/test2
EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $N0/test2
TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
TEST finish_removing_bricks $N0/test2
TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} commit
EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $N0/test2
reset $N0
cleanup