afr: mark non sources as sinks in metadata heal

Problem:
In a 3 way replica, when the source brick does not have pending xattrs
for the sinks, but the 2 sinks blame each other, metadata heal was not
happpening because we were not setting all non-sources as sinks.

Fix: Mark all non-sources as sinks, like it is done in data and entry
heal.

Change-Id: I534978940f5087302e307fcc810a48ffe898ce08
BUG: 1468279
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://review.gluster.org/17717
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
Ravishankar N 2017-07-06 19:49:47 +05:30 committed by Pranith Kumar Karampuri
parent 61db7125a5
commit 77c1ed5fd2
3 changed files with 69 additions and 3 deletions

View File

@ -0,0 +1,64 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
cleanup;
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
TEST $CLI volume start $V0
TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
TEST touch $M0/file
# Kill B1, create a pending metadata heal.
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST setfattr -n user.xattr -v value1 $M0/file
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
# Kill B2, heal from B3 to B1.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST $CLI volume set $V0 cluster.self-heal-daemon on
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
$CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata"
TEST $CLI volume set $V0 cluster.self-heal-daemon off
# Create another pending metadata heal.
TEST setfattr -n user.xattr -v value2 $M0/file
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
# Kill B1, heal from B3 to B2
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST $CLI volume set $V0 cluster.self-heal-daemon on
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
$CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata"
TEST $CLI volume set $V0 cluster.self-heal-daemon off
# ALL bricks up again.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
# B1 and B2 blame each other, B3 doesn't blame anyone.
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
TEST $CLI volume set $V0 cluster.self-heal-daemon on
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
cleanup;

View File

@ -565,7 +565,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
healed_sinks, undid_pending,
AFR_DATA_TRANSACTION,
locked_on, replies);
return source;
goto out;
}
/* No split brain at this point. If we were called from

View File

@ -231,7 +231,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
undid_pending,
AFR_METADATA_TRANSACTION,
locked_on, replies);
return source;
goto out;
}
/* If this is a directory mtime/ctime only split brain
@ -245,7 +245,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
uuid_utoa (replies[source].poststat.ia_gfid));
sources[source] = 1;
healed_sinks[source] = 0;
return source;
goto out;
}
if (!priv->metadata_splitbrain_forced_heal) {
@ -307,6 +307,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
}
}
out:
afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
return source;
}