cluster/afr: Mark dirty for entry transactions for quorum failures

Problem:
If an entry creation transaction fails on quprum number of bricks
it might end up setting the pending changelogs on the file itself
on the brick where it got created. But the parent does not have
any entry pending marker set. This will lead to the entry not
getting healed by the self heal daemon automatically.

Fix:
For entry transactions mark dirty on the parent if it fails on
quorum number of bricks, so that the heal can do conservative
merge and entry gets healed by shd.

Change-Id: I56448932dd409b3ddb095e2ae32e037b6157a607
fixes: bz#1586020
Signed-off-by: karthik-us <ksubrahm@redhat.com>
This commit is contained in:
karthik-us 2018-06-05 15:57:16 +05:30
parent 7dd08a4dd0
commit 762dff25c4
2 changed files with 124 additions and 11 deletions

View File

@ -0,0 +1,73 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
cleanup;
function create_files {
local i=1
while (true)
do
dd if=/dev/zero of=$M0/file$i bs=1M count=10
if [ -e $B0/${V0}0/file$i ] && [ -e $B0/${V0}1/file$i ]; then
((i++))
else
break
fi
done
echo $i
}
TEST glusterd
#Create brick partitions
TEST truncate -s 100M $B0/brick0
TEST truncate -s 100M $B0/brick1
#Have the 3rd brick of a higher size to test the scenario of entry transaction
#passing on only one brick and not on other bricks.
TEST truncate -s 110M $B0/brick2
LO1=`SETUP_LOOP $B0/brick0`
TEST [ $? -eq 0 ]
TEST MKFS_LOOP $LO1
LO2=`SETUP_LOOP $B0/brick1`
TEST [ $? -eq 0 ]
TEST MKFS_LOOP $LO2
LO3=`SETUP_LOOP $B0/brick2`
TEST [ $? -eq 0 ]
TEST MKFS_LOOP $LO3
TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
TEST MOUNT_LOOP $LO1 $B0/${V0}0
TEST MOUNT_LOOP $LO2 $B0/${V0}1
TEST MOUNT_LOOP $LO3 $B0/${V0}2
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
TEST $CLI volume start $V0
TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 self-heal-daemon off
TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
i=$(create_files)
TEST ! ls $B0/${V0}0/file$i
TEST ! ls $B0/${V0}1/file$i
TEST ls $B0/${V0}2/file$i
EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
TEST $CLI volume set $V0 self-heal-daemon on
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
TEST rm -f $M0/file1
TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
TEST force_umount $M0
TEST $CLI volume stop $V0
EXPECT 'Stopped' volinfo_field $V0 'Status';
TEST $CLI volume delete $V0;
UMOUNT_LOOP ${B0}/${V0}{0,1,2}
rm -f ${B0}/brick{0,1,2}
cleanup;

View File

@ -782,8 +782,38 @@ afr_has_fop_cbk_quorum (call_frame_t *frame)
return afr_has_quorum (success, this);
}
gf_boolean_t
afr_need_dirty_marking (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = this->private;
afr_local_t *local = NULL;
gf_boolean_t need_dirty = _gf_false;
local = frame->local;
if (!priv->quorum_count || !local->optimistic_change_log)
return _gf_false;
if (local->transaction.type == AFR_DATA_TRANSACTION ||
local->transaction.type == AFR_METADATA_TRANSACTION)
return _gf_false;
if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) ==
priv->child_count)
return _gf_false;
if (priv->arbiter_count) {
if (!afr_has_arbiter_fop_cbk_quorum (frame))
need_dirty = _gf_true;
} else if (!afr_has_fop_cbk_quorum (frame)) {
need_dirty = _gf_true;
}
return need_dirty;
}
void
afr_handle_quorum (call_frame_t *frame)
afr_handle_quorum (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@ -834,11 +864,15 @@ afr_handle_quorum (call_frame_t *frame)
return;
}
if (afr_need_dirty_marking (frame, this))
goto set_response;
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i])
afr_transaction_fop_failed (frame, frame->this, i);
}
set_response:
local->op_ret = -1;
local->op_errno = afr_final_errno (local, priv);
if (local->op_errno == 0)
@ -973,9 +1007,17 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
int nothing_failed = 1;
gf_boolean_t need_undirty = _gf_false;
afr_handle_quorum (frame);
afr_handle_quorum (frame, this);
local = frame->local;
idx = afr_index_for_transaction_type (local->transaction.type);
idx = afr_index_for_transaction_type (local->transaction.type);
xattr = dict_new ();
if (!xattr) {
local->op_ret = -1;
local->op_errno = ENOMEM;
afr_changelog_post_op_done (frame, this);
goto out;
}
nothing_failed = afr_txn_nothing_failed (frame, this);
@ -985,6 +1027,11 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
need_undirty = _gf_true;
if (local->op_ret < 0 && !nothing_failed) {
if (afr_need_dirty_marking (frame, this)) {
local->dirty[idx] = hton32(1);
goto set_dirty;
}
afr_changelog_post_op_done (frame, this);
goto out;
}
@ -1001,14 +1048,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
goto out;
}
xattr = dict_new ();
if (!xattr) {
local->op_ret = -1;
local->op_errno = ENOMEM;
afr_changelog_post_op_done (frame, this);
goto out;
}
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.failed_subvols[i])
local->pending[i][idx] = hton32(1);
@ -1035,6 +1074,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
else
local->dirty[idx] = hton32(0);
set_dirty:
ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
sizeof(int) * AFR_NUM_CHANGE_LOGS);
if (ret) {