cluster/ec: Non-disruptive upgrade on EC volume fails
Problem: Enabling optimistic changelog on EC volume was not handling node down scenarios appropriately resulting in volume data inaccessibility. Solution: Update dirty xattr appropriately on good bricks whenever nodes are down. This would fix the metadata information as part of heal and thus ensures data accessibility. BUG: 1468261 Change-Id: I08b0d28df386d9b2b49c3de84b4aac1c729ac057 Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> Reviewed-on: https://review.gluster.org/17703 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
This commit is contained in:
parent
77c1ed5fd2
commit
d2650feb4b
96
tests/basic/ec/ec-1468261.t
Normal file
96
tests/basic/ec/ec-1468261.t
Normal file
@ -0,0 +1,96 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This test case verifies handling node down scenario with optimistic
|
||||
# changelog enabled on EC volume.
|
||||
###
|
||||
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
|
||||
cleanup
|
||||
|
||||
#cleate and start volume
|
||||
TEST glusterd
|
||||
TEST pidof glusterd
|
||||
TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
|
||||
TEST $CLI volume set $V0 disperse.optimistic-change-log on
|
||||
TEST $CLI volume start $V0
|
||||
|
||||
#Mount the volume
|
||||
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
||||
|
||||
#Verify that all is good
|
||||
TEST mkdir $M0/test_dir
|
||||
TEST touch $M0/test_dir/file
|
||||
sleep 2
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
|
||||
|
||||
#Touch a file and kill two bricks
|
||||
pid0=`get_brick_pid $V0 $H0 $B0/${V0}0`
|
||||
pid1=`get_brick_pid $V0 $H0 $B0/${V0}1`
|
||||
TEST touch $M0/test_dir/new_file
|
||||
kill $pid0
|
||||
kill $pid1
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
|
||||
|
||||
#Dirty should be set on up bricks
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
|
||||
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
|
||||
|
||||
#Bring up the down bricks
|
||||
TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
||||
|
||||
#remove mount point contents
|
||||
TEST rm -rf $M0"/*" 2>/dev/null
|
||||
|
||||
# unmount and remount the volume
|
||||
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
||||
TEST glusterfs -s $H0 --volfile-id $V0 $M0;
|
||||
|
||||
#Create a tar file
|
||||
TEST mkdir $M0/test_dir
|
||||
for i in {1..3000};do
|
||||
dd if=/dev/urandom of=$M0/test_dir/file-$i bs=1k count=10;
|
||||
done
|
||||
tar -cf $M0/test_dir.tar $M0/test_dir/ 2>/dev/null
|
||||
rm -rf $M0/test_dir/
|
||||
|
||||
#Untar the tar file
|
||||
tar -C $M0 -xf $M0/test_dir.tar 2>/dev/null&
|
||||
|
||||
#Kill 1st and 2nd brick
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}0
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}1
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
|
||||
|
||||
#Stop untaring
|
||||
TEST kill %1
|
||||
|
||||
#Bring up the down bricks
|
||||
TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
||||
|
||||
#Wait for heal to complete
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
||||
|
||||
#Kill 3rd and 4th brick
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}3
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}4
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
|
||||
|
||||
#remove mount point contents
|
||||
#this will fail if things are wrong
|
||||
TEST rm -rf $M0"/*" 2>/dev/null
|
||||
|
||||
cleanup
|
@ -16,6 +16,7 @@ TEST $CLI volume set $V0 performance.quick-read off
|
||||
TEST $CLI volume set $V0 performance.read-ahead off
|
||||
TEST $CLI volume set $V0 performance.io-cache off
|
||||
TEST $CLI volume set $V0 disperse.background-heals 0
|
||||
TEST $CLI volume set $V0 disperse.eager-lock off
|
||||
TEST $CLI volume start $V0
|
||||
|
||||
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
|
||||
|
@ -28,6 +28,7 @@ ln $SSL_CERT $SSL_CA
|
||||
TEST glusterd
|
||||
TEST pidof glusterd
|
||||
TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
|
||||
TEST $CLI volume set $V0 disperse.eager-lock off
|
||||
TEST $CLI volume start $V0
|
||||
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "^6$" ec_child_up_count $V0 0
|
||||
|
@ -85,6 +85,7 @@ AUTH_REFRESH_INTERVAL=10
|
||||
GRAPH_SWITCH_TIMEOUT=10
|
||||
UNLINK_TIMEOUT=5
|
||||
MDC_TIMEOUT=5
|
||||
IO_WAIT_TIMEOUT=5
|
||||
|
||||
LOGDIR=$(gluster --print-logdir)
|
||||
|
||||
@ -345,7 +346,6 @@ function _EXPECT_WITHIN()
|
||||
if [ $? -ne 0 ]; then
|
||||
break;
|
||||
fi
|
||||
|
||||
## Check match success
|
||||
if [[ "$a" =~ $e ]]; then
|
||||
break;
|
||||
|
@ -2080,7 +2080,10 @@ ec_update_info(ec_lock_link_t *link)
|
||||
/* If we set the dirty flag for update fop, we have to unset it.
|
||||
* If fop has failed on some bricks, leave the dirty as marked. */
|
||||
if (lock->unlock_now) {
|
||||
if (!(ec->node_mask & ~lock->good_mask)) {
|
||||
/* Ensure that nodes are up while doing final
|
||||
* metadata update.*/
|
||||
if (!(ec->node_mask & ~lock->good_mask) &&
|
||||
!(ec->node_mask & ~ec->xl_up)) {
|
||||
if (ctx->dirty[0] != 0) {
|
||||
dirty[0] = -1;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user