cluster/afr: Delegate metadata heal with pending xattrs to SHD
Problem: When metadata-self-heal is triggered on the mount, it blocks lookup until metadata-self-heal completes. But that can lead to hangs when lot of clients are accessing a directory which needs metadata heal and all of them trigger heals waiting for other clients to complete heal. Fix: Only when the heal is needed but the pending xattrs are not set, trigger metadata heal that could block lookup. This is the only case where different clients may give different metadata to the clients without heals, which should be avoided. Updates bz#1622821 Change-Id: I6089e9fda0770a83fb287941b229c882711f4e66 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
This commit is contained in:
parent
37f77b1242
commit
ccaad48f51
@ -17,6 +17,7 @@ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
||||
echo "some data" > $M0/datafile
|
||||
EXPECT 0 echo $?
|
||||
TEST touch $M0/mdatafile
|
||||
TEST touch $M0/mdatafile-backend-direct-modify
|
||||
TEST mkdir $M0/dir
|
||||
|
||||
#Kill a brick and perform I/O to have pending heals.
|
||||
@ -29,6 +30,7 @@ EXPECT 0 echo $?
|
||||
|
||||
#pending metadata heal
|
||||
TEST chmod +x $M0/mdatafile
|
||||
TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify
|
||||
|
||||
#pending entry heal. Also causes pending metadata/data heals on file{1..5}
|
||||
TEST touch $M0/dir/file{1..5}
|
||||
@ -40,9 +42,12 @@ TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
||||
|
||||
#Medatada heal via explicit lookup must not happen
|
||||
TEST ls $M0/mdatafile
|
||||
TEST getfattr -d -m. -e hex $M0/mdatafile
|
||||
TEST ls $M0/mdatafile-backend-direct-modify
|
||||
|
||||
#Inode refresh must not trigger data and entry heals.
|
||||
TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
|
||||
|
||||
#Inode refresh must not trigger data metadata and entry heals.
|
||||
#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
|
||||
#Check that data heal does not happen.
|
||||
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
||||
@ -52,7 +57,6 @@ TEST cat $M0/datafile
|
||||
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
||||
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
||||
TEST ls $M0/dir
|
||||
|
||||
#No heal must have happened
|
||||
EXPECT 8 get_pending_heal_count $V0
|
||||
|
||||
@ -61,21 +65,25 @@ TEST $CLI volume set $V0 cluster.data-self-heal on
|
||||
TEST $CLI volume set $V0 cluster.metadata-self-heal on
|
||||
TEST $CLI volume set $V0 cluster.entry-self-heal on
|
||||
|
||||
#Metadata heal is triggered by lookup without need for inode refresh.
|
||||
TEST ls $M0/mdatafile
|
||||
EXPECT 7 get_pending_heal_count $V0
|
||||
|
||||
#Inode refresh must trigger data and entry heals.
|
||||
#Inode refresh must trigger data metadata and entry heals.
|
||||
#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
|
||||
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
||||
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
||||
TEST ls $M0/mdatafile-backend-direct-modify
|
||||
|
||||
TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
|
||||
|
||||
|
||||
TEST getfattr -d -m. -e hex $M0/mdatafile
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0
|
||||
|
||||
TEST cat $M0/datafile
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0
|
||||
|
||||
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
||||
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
||||
TEST ls $M0/dir
|
||||
EXPECT 5 get_pending_heal_count $V0
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0
|
||||
|
||||
TEST cat $M0/dir/file1
|
||||
TEST cat $M0/dir/file2
|
||||
@ -83,5 +91,5 @@ TEST cat $M0/dir/file3
|
||||
TEST cat $M0/dir/file4
|
||||
TEST cat $M0/dir/file5
|
||||
|
||||
EXPECT 0 get_pending_heal_count $V0
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
|
||||
cleanup;
|
||||
|
@ -13,7 +13,6 @@ TEST pidof glusterd
|
||||
TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
|
||||
TEST $CLI volume start $V0
|
||||
|
||||
TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
||||
TEST $CLI volume set $V0 cluster.background-self-heal-count 0
|
||||
|
||||
## Mount FUSE with caching disabled
|
||||
@ -82,10 +81,15 @@ EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
|
||||
# restart the brick process
|
||||
TEST $CLI volume start $V0 force
|
||||
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 `expr $brick_id - 1`
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
|
||||
|
||||
cat $pth >/dev/null
|
||||
TEST $CLI volume heal $V0
|
||||
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
|
||||
# check backends - xattr should not be present anywhere
|
||||
EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
|
||||
EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
|
||||
|
@ -2692,6 +2692,42 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
gf_boolean_t
|
||||
afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
|
||||
{
|
||||
int idx = -1;
|
||||
afr_private_t *priv = NULL;
|
||||
void *pending_raw = NULL;
|
||||
int *pending_int = NULL;
|
||||
int i = 0;
|
||||
|
||||
priv = this->private;
|
||||
idx = afr_index_for_transaction_type (type);
|
||||
|
||||
if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
|
||||
if (pending_raw) {
|
||||
pending_int = pending_raw;
|
||||
|
||||
if (ntoh32 (pending_int[idx]))
|
||||
return _gf_true;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < priv->child_count; i++) {
|
||||
if (dict_get_ptr (xdata, priv->pending_key[i],
|
||||
&pending_raw))
|
||||
continue;
|
||||
if (!pending_raw)
|
||||
continue;
|
||||
pending_int = pending_raw;
|
||||
|
||||
if (ntoh32 (pending_int[idx]))
|
||||
return _gf_true;
|
||||
}
|
||||
|
||||
return _gf_false;
|
||||
}
|
||||
|
||||
static gf_boolean_t
|
||||
afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
|
||||
{
|
||||
@ -2718,6 +2754,14 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (afr_is_pending_set (this, replies[i].xdata,
|
||||
AFR_METADATA_TRANSACTION)) {
|
||||
/* Let shd do the heal so that lookup is not blocked
|
||||
* on getting metadata lock/doing the heal */
|
||||
start = _gf_false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
|
||||
start = _gf_false;
|
||||
break;
|
||||
|
@ -2180,44 +2180,6 @@ afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
gf_boolean_t
|
||||
afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
|
||||
{
|
||||
int idx = -1;
|
||||
afr_private_t *priv = NULL;
|
||||
void *pending_raw = NULL;
|
||||
int *pending_int = NULL;
|
||||
int i = 0;
|
||||
|
||||
priv = this->private;
|
||||
idx = afr_index_for_transaction_type (type);
|
||||
|
||||
if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
|
||||
if (pending_raw) {
|
||||
pending_int = pending_raw;
|
||||
|
||||
if (ntoh32 (pending_int[idx]))
|
||||
return _gf_true;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < priv->child_count; i++) {
|
||||
if (dict_get_ptr (xdata, priv->pending_key[i],
|
||||
&pending_raw))
|
||||
continue;
|
||||
if (!pending_raw)
|
||||
continue;
|
||||
pending_int = pending_raw;
|
||||
|
||||
if (ntoh32 (pending_int[idx]))
|
||||
return _gf_true;
|
||||
}
|
||||
|
||||
return _gf_false;
|
||||
}
|
||||
|
||||
|
||||
gf_boolean_t
|
||||
afr_is_data_set (xlator_t *this, dict_t *xdata)
|
||||
{
|
||||
|
@ -1249,4 +1249,7 @@ afr_ta_post_op_lock (xlator_t *this, loc_t *loc);
|
||||
|
||||
int
|
||||
afr_ta_post_op_unlock (xlator_t *this, loc_t *loc);
|
||||
|
||||
gf_boolean_t
|
||||
afr_is_pending_set (xlator_t *this, dict_t *xdata, int type);
|
||||
#endif /* __AFR_H__ */
|
||||
|
Loading…
x
Reference in New Issue
Block a user