cluster/afr : Check for UP bricks before starting heal

Problem: Currently for replica volume, even if only one brick is UP SHD will keep crawling index entries even if it can not heal anything. In thin-arbiter volume which is also a replica 2 volume, this causes inode lock contention which in turn sends upcall to all the clients to release notify locks, even if it can not do anything for healing. This will slow down the client performance and kills the purpose of keeping in memory information about bad brick. Solution: Before starting heal or even crawling, check if sufficient number of children are UP and available to check and heal entries. Change-Id: I011c9da3b37cae275f791affd56b8f1c1ac9255d updates: bz#1640581 Signed-off-by: Ashish Pandey <aspandey@redhat.com>
2018-10-18 17:15:58 +05:30 · 2018-10-18 17:15:58 +05:30 · f73b4476b1
commit f73b4476b1
parent bd4d8b1826
3 changed files with 19 additions and 1 deletions
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@ -4919,7 +4919,7 @@ find_child_index(xlator_t *this, xlator_t *child)
    return i;
 }

-static int
+int
 __afr_get_up_children_count(afr_private_t *priv)
 {
    int up_children = 0;
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@ -787,6 +787,18 @@ unref:
        afr_ta_post_op_unlock(this, loc);
 }

+gf_boolean_t
+afr_bricks_available_for_heal(afr_private_t *priv)
+{
+    int up_children = 0;
+
+    up_children = __afr_get_up_children_count(priv);
+    if (up_children < 2) {
+        return _gf_false;
+    }
+    return _gf_true;
+}
+
 void *
 afr_shd_index_healer(void *data)
 {
@ -806,6 +818,9 @@ afr_shd_index_healer(void *data)
    for (;;) {
        afr_shd_healer_wait(healer);

+        if (!afr_bricks_available_for_heal(priv))
+            continue;
+
        ASSERT_LOCAL(this, healer);
        priv->local[healer->subvol] = healer->local;

--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@ -1284,6 +1284,9 @@ afr_ta_post_op_unlock(xlator_t *this, loc_t *loc);
 gf_boolean_t
 afr_is_pending_set(xlator_t *this, dict_t *xdata, int type);

+int
+__afr_get_up_children_count(afr_private_t *priv);
+
 call_frame_t *
 afr_ta_frame_create(xlator_t *this);
 #endif /* __AFR_H__ */