Cluster/afr: Fix output for gluster volume heal vn info healed

Problem:
Whenever gluster volume heal vol full command is executed, the entries
stored in the circual buffer for sh->healed are added in the dictionary
in the _crawl_post_sh_action function irrespective of whether actual self heal
(due to non-zero values in chage log) takes place or not.

Fix:
Value of key (actual-sh-done) will be set to 1 whenever self heal takes place
due to non-zero change log values and if for some FOP self heal daemon finds
that no self heal required after examining the pending matrix, the value will
be 0.

Change-Id: I11fd0b9ee76759af17c5bca6bfafbaf66bcaacbc
BUG: 863068
Signed-off-by: Venkatesh Somyajula <vsomyaju@redhat.com>
Reviewed-on: http://review.gluster.org/4181
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Venkatesh Somyajula 2012-11-23 18:12:52 +05:30 committed by Vijay Bellur
parent b6bf52bdcc
commit 2b1bf891f5
8 changed files with 129 additions and 17 deletions

76
tests/bugs/bug-863068.t Normal file
View File

@ -0,0 +1,76 @@
#!/bin/bash
. $(dirname $0)/../include.rc
cleanup;
## This function get the No. of entries for
## gluster volume heal volnmae info healed command for brick1 and brick2
## and compare the initial value (Before volume heal full) and final value
## (After gluster volume heal vol full) and compare.
function getdiff()
{
val=10
if [ "$1" == "$3" ]
then
if [ "$2" == "$4" ]
then
val=0
else
val=20
fi
fi
echo $val
}
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
TEST $CLI volume start $V0;
mount -t glusterfs $H0:/$V0 $M0;
B0_hiphenated=`echo $B0 | tr '/' '-'`
kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ;
mkdir $M0/{a,b,c};
echo "GLUSTERFS" >> $M0/a/file;
TEST $CLI volume start $V0 force;
sleep 5
TEST $CLI volume heal $V0 full;
sleep 5
##First Brick Initial(Before full type self heal) value
FBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1`
##Second Brick Initial Value
SBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1`
TEST $CLI volume heal $V0 full;
sleep 5
##First Brick Final value
##Number of entries from output of <gluster volume heal volname info healed>
FBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1`
##Second Brick Final Value
SBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1`
##get the difference of values
EXPECT "0" getdiff $FBI $SBI $FBF $SBF;
## Tests after this comment checks for the background self heal
TEST mkdir $M0/d
kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ;
TEST $CLI volume set $V0 self-heal-daemon off
dd if=/dev/random of=$M0/d/file1 bs=100M count=1 2>/dev/null;
TEST $CLI volume start $V0 force
sleep 3
TEST ls -l $M0/d
cleanup;

View File

@ -1624,6 +1624,17 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
if (ret)
gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
"sh-failed to %d", local->loc.path, sh_failed);
if (local->self_heal.actual_sh_started == _gf_true &&
sh_failed == 0) {
ret = dict_set_int32 (xattr, "actual-sh-done", 1);
if (ret)
gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
" set actual-sh-done to %d",
local->loc.path,
local->self_heal.actual_sh_started);
}
}
out:
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,

View File

@ -2156,6 +2156,8 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
afr_local_t * orig_frame_local = NULL;
afr_self_heal_t * orig_frame_sh = NULL;
char sh_type_str[256] = {0,};
gf_boolean_t split_brain = _gf_false;
@ -2189,6 +2191,9 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
FRAME_SU_UNDO (bgsh_frame, afr_local_t);
if (!sh->unwound && sh->unwind) {
orig_frame_local = sh->orig_frame->local;
orig_frame_sh = &orig_frame_local->self_heal;
orig_frame_sh->actual_sh_started = _gf_true;
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
sh->op_failed);
}

View File

@ -666,6 +666,8 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
"self-healing file %s from subvolume %s to %d other",
local->loc.path, priv->children[sh->source]->name,
sh->active_sinks);
sh->actual_sh_started = _gf_true;
afr_sh_data_trim_sinks (frame, this);
}

View File

@ -2234,6 +2234,7 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
"merging all entries as a conservative decision",
local->loc.path);
sh->actual_sh_started = _gf_true;
afr_sh_entry_open (frame, this);
return 0;

View File

@ -341,6 +341,7 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
local->loc.path, priv->children[source]->name,
sh->active_sinks);
sh->actual_sh_started = _gf_true;
STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
priv->children[source],
priv->children[source]->fops->getxattr,

View File

@ -307,7 +307,7 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
shd_event_t *event = NULL;
int32_t sh_failed = 0;
gf_boolean_t split_brain = 0;
int32_t actual_sh_done = 0;
priv = this->private;
shd = &priv->shd;
if (crawl_data->crawl == INDEX) {
@ -328,26 +328,42 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
}
}
if (xattr_rsp)
if (xattr_rsp) {
ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done);
}
split_brain = afr_is_split_brain (this, child->inode);
if ((op_ret < 0 && op_errno == EIO) || split_brain)
if ((op_ret < 0 && op_errno == EIO) || split_brain) {
eh = shd->split_brain;
else if ((op_ret < 0) || sh_failed)
} else if ((op_ret < 0) || sh_failed) {
eh = shd->heal_failed;
else
eh = shd->healed;
} else if (actual_sh_done == 1) {
eh = shd->healed;
}
ret = -1;
event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
if (!event)
goto out;
event->child = crawl_data->child;
event->path = path;
ret = eh_save_history (eh, event);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to "
"event history, (%d, %s)", path, op_ret, strerror (op_errno));
goto out;
if (eh != NULL) {
event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
if (!event)
goto out;
event->child = crawl_data->child;
event->path = path;
ret = eh_save_history (eh, event);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save "
"to event history, (%d, %s)", path, op_ret,
strerror (op_errno));
goto out;
}
} else {
gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ",
path);
}
ret = 0;
out:

View File

@ -241,7 +241,7 @@ typedef struct {
gf_boolean_t entries_skipped;
int op_failed;
gf_boolean_t actual_sh_started;
gf_boolean_t sync_done;
gf_boolean_t data_lock_held;
gf_boolean_t eof_reached;