afr: mark non sources as sinks in metadata heal
Problem: In a 3 way replica, when the source brick does not have pending xattrs for the sinks, but the 2 sinks blame each other, metadata heal was not happpening because we were not setting all non-sources as sinks. Fix: Mark all non-sources as sinks, like it is done in data and entry heal. Change-Id: I534978940f5087302e307fcc810a48ffe898ce08 BUG: 1468279 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: https://review.gluster.org/17717 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
parent
61db7125a5
commit
77c1ed5fd2
64
tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
Normal file
64
tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
Normal file
@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
cleanup;
|
||||
|
||||
TEST glusterd
|
||||
TEST pidof glusterd
|
||||
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
||||
TEST $CLI volume start $V0
|
||||
TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
||||
TEST $CLI volume set $V0 cluster.metadata-self-heal off
|
||||
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
|
||||
TEST touch $M0/file
|
||||
|
||||
# Kill B1, create a pending metadata heal.
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}0
|
||||
TEST setfattr -n user.xattr -v value1 $M0/file
|
||||
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
|
||||
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
|
||||
|
||||
# Kill B2, heal from B3 to B1.
|
||||
TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}1
|
||||
TEST $CLI volume set $V0 cluster.self-heal-daemon on
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
||||
$CLI volume heal $V0
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata"
|
||||
TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
||||
|
||||
# Create another pending metadata heal.
|
||||
TEST setfattr -n user.xattr -v value2 $M0/file
|
||||
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
|
||||
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
|
||||
|
||||
# Kill B1, heal from B3 to B2
|
||||
TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}0
|
||||
TEST $CLI volume set $V0 cluster.self-heal-daemon on
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
||||
$CLI volume heal $V0
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata"
|
||||
TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
||||
|
||||
# ALL bricks up again.
|
||||
TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
||||
# B1 and B2 blame each other, B3 doesn't blame anyone.
|
||||
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
|
||||
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
|
||||
EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
|
||||
EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
|
||||
TEST $CLI volume set $V0 cluster.self-heal-daemon on
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
||||
TEST $CLI volume heal $V0
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
||||
|
||||
cleanup;
|
@ -565,7 +565,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
|
||||
healed_sinks, undid_pending,
|
||||
AFR_DATA_TRANSACTION,
|
||||
locked_on, replies);
|
||||
return source;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* No split brain at this point. If we were called from
|
||||
|
@ -231,7 +231,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
|
||||
undid_pending,
|
||||
AFR_METADATA_TRANSACTION,
|
||||
locked_on, replies);
|
||||
return source;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If this is a directory mtime/ctime only split brain
|
||||
@ -245,7 +245,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
|
||||
uuid_utoa (replies[source].poststat.ia_gfid));
|
||||
sources[source] = 1;
|
||||
healed_sinks[source] = 0;
|
||||
return source;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!priv->metadata_splitbrain_forced_heal) {
|
||||
@ -307,6 +307,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
|
||||
return source;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user