cluster/afr: Allow data/entry self heal for metadata split-brain

Problem:
Currently whenever there is metadata split-brain, a variable
sh->op_failed is set to 1 to denote that self heal got failed.
But if we proceed for data self heal, even code-path of data
self heal also relies on the sh->op_failed variable. So if will
check for sh->op_failed variable and will eventually fails to
do data self heal. So needed a mechanism to allow data self heal
even if metadata is in split brain.

Fix:
Some data structure revamp is done in
http://review.gluster.com/#/c/5106/ fix and this patch is
based on the above fix. Now we can store which particular self-heal
got failed i.e GFID_OR_MISSING_ENTRY_SELF_HEAL, METADATA, DATA,
ENTRY. And we can do two types of self heal failure check.
1. Individual type check: We can check which among all four
   (Metadata, Data, Gfid or missing entry, entry self heal)
   got failed.

2. In afr_self_heal_completion_cbk, we need to make check
   based on the fact that if any specific self heal got failed treat
   the complete self heal as failure so that it will populate
   corresponding circular buffer of event history accordingly.

Change-Id: Icb91e513bcc752386fc8a78812405cfabe5cac2d
BUG: 977797
Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com>
Reviewed-on: http://review.gluster.org/5253
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Venkatesh Somyajulu 2013-06-28 19:11:47 +05:30 committed by Vijay Bellur
parent 7062eda157
commit ef8092fab7
9 changed files with 285 additions and 134 deletions

114
tests/bugs/bug-977797.t Executable file
View File

@ -0,0 +1,114 @@
#!/bin/bash
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
cleanup;
## Start and create a volume
TEST glusterd;
TEST pidof glusterd;
TEST $CLI volume info;
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
## Verify volume is is created
EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
## Start volume and verify
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
TEST $CLI volume set $V0 self-heal-daemon off
TEST $CLI volume set $V0 open-behind off
TEST $CLI volume set $V0 quick-read off
TEST $CLI volume set $V0 read-ahead off
TEST $CLI volume set $V0 write-behind off
TEST $CLI volume set $V0 io-cache off
TEST $CLI volume set $V0 background-self-heal-count 0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
TEST mkdir -p $M0/a
TEST `echo "GLUSTERFS" > $M0/a/file`
TEST kill_brick $V0 $H0 $B0/$V0"1"
TEST chown root $M0/a
TEST chown root $M0/a/file
TEST `echo "GLUSTER-FILE-SYSTEM" > $M0/a/file`
TEST mkdir $M0/a/b
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0;
TEST kill_brick $V0 $H0 $B0/$V0"2"
TEST chmod 757 $M0/a
TEST chmod 757 $M0/a/file
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1;
TEST ls -l $M0/a/file
b1c0dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
trusted.afr.$V0-client-0 "entry")
b1c1dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
trusted.afr.$V0-client-1 "entry")
b2c0dir=$(afr_get_specific_changelog_xattr \
$B0/$V0"2"/a trusted.afr.$V0-client-0 "entry")
b2c1dir=$(afr_get_specific_changelog_xattr \
$B0/$V0"2"/a trusted.afr.$V0-client-1 "entry")
b1c0f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
trusted.afr.$V0-client-0 "data")
b1c1f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
trusted.afr.$V0-client-1 "data")
b2c0f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
trusted.afr.$V0-client-0 "data")
b2c1f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
trusted.afr.$V0-client-1 "data")
EXPECT "00000000" echo $b1c0f
EXPECT "00000000" echo $b1c1f
EXPECT "00000000" echo $b2c0f
EXPECT "00000000" echo $b2c1f
EXPECT "00000000" echo $b1c0dir
EXPECT "00000000" echo $b1c1dir
EXPECT "00000000" echo $b2c0dir
EXPECT "00000000" echo $b2c1dir
contains() {
string="$1"
substring="$2"
var="-1"
if test "${string#*$substring}" != "$string"
then
var="0" # $substring is in $string
else
var="1" # $substring is not in $string
fi
echo $var
}
var1=$(cat $M0/a/file 2>&1)
var2="Input/output error"
EXPECT "0" contains "$var1" "$var2"
## Finish up
TEST $CLI volume stop $V0;
EXPECT 'Stopped' volinfo_field $V0 'Status';
TEST $CLI volume delete $V0;
TEST ! $CLI volume info $V0;
cleanup;

View File

@ -237,6 +237,26 @@ function dht_get_layout {
getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2
}
function afr_get_specific_changelog_xattr ()
{
local path=$1
local key=$2
local type=$3
local specific_changelog=""
changelog_xattr=$(afr_get_changelog_xattr "$path" "$key")
if [ "$type" == "data" ]; then
specific_changelog=${changelog_xattr:2:8}
elif [ "$type" == "metadata" ]; then
specific_changelog=${changelog_xattr:10:8}
elif [ "$type" == "entry" ]; then
specific_changelog=${changelog_xattr:18:8}
else
specific_changlog="error"
fi
echo $specific_changelog
}
##
# query pathinfo xattr and extract POSIX pathname(s)
##

View File

@ -100,7 +100,7 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
}
sh_private_cleanup (sh_frame, this);
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
GF_ASSERT (!last_loop_frame);
//loop_finish should have happened and the old_loop should be NULL
gf_log (this->name, GF_LOG_DEBUG,
@ -276,7 +276,7 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
_gf_true, sh_loop_lock_success, sh_loop_lock_failure);
return 0;
out:
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (old_loop_frame)
sh_loop_finish (old_loop_frame, this);
sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
@ -307,8 +307,9 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
sh_priv->loops_running--;
offset = sh_priv->offset;
block_size = sh->block_size;
while ((!sh->eof_reached) && (!is_self_heal_failed (sh)) &&
(sh_priv->loops_running < priv->data_self_heal_window_size)
while ((!sh->eof_reached) &&
(!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
(sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
@ -327,7 +328,8 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
if (0 == loop) {
//loop finish does unlock, but the erasing of the pending
//xattrs needs to happen before that so do not finish the loop
if (is_driver_done && !is_self_heal_failed (sh))
if (is_driver_done &&
!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
goto driver_done;
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@ -338,7 +340,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
//If we have more loops to form we should finish previous loop after
//the next loop lock
while (loop--) {
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
// op failed in other loop, stop spawning more loops
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@ -384,7 +386,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame
}
if (op_ret == -1) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
if (loop_frame) {
sh_loop_finish (loop_frame, this);
@ -432,7 +434,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
priv->children[child_index]->name,
strerror (op_errno));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (loop_sh, op_errno);
} else if (op_ret < loop_local->cont.writev.vector->iov_len) {
gf_log (this->name, GF_LOG_ERROR,
@ -440,7 +442,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
"(expected %lu, returned %d)", sh_local->loc.path,
priv->children[child_index]->name,
loop_local->cont.writev.vector->iov_len, op_ret);
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
call_count = afr_frame_return (loop_frame);
@ -514,7 +516,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
if (op_ret <= 0) {
if (op_ret < 0) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
"for %s reason :%s", sh->source,
sh_local->loc.path, strerror (errno));
@ -624,7 +626,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
strong_checksum, MD5_DIGEST_LENGTH);
@ -662,7 +664,8 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
}
UNLOCK (&sh_priv->lock);
if (write_needed && !is_self_heal_failed (sh)) {
if (write_needed &&
!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh_loop_read (loop_frame, this);
} else {
sh_loop_return (sh_frame, this, loop_frame,
@ -800,7 +803,7 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
ret = 0;
out:
if (ret) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
sh_loop_driver_done (sh_frame, this, NULL);
}
return 0;

View File

@ -1018,7 +1018,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
local->loc.path);
}
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh->completion_cbk (frame, this);
} else {
gf_log (this->name, GF_LOG_TRACE,
@ -1250,7 +1250,7 @@ out:
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
"reason: %s", local->loc.path, strerror (-ret));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
afr_sh_missing_entries_finish (frame, this);
}
@ -1265,7 +1265,7 @@ afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
local = frame->local;
sh = &local->self_heal;
if (op_ret < 0)
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return 0;
}
@ -1386,7 +1386,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
}
return;
out:
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@ -1470,7 +1470,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
LOCK (&frame->lock);
{
afr_sh_set_error (sh, EIO);
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
}
@ -1552,7 +1552,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_missing_entries_finish (frame, this);
} else {
if (afr_gfid_missing_count (this->name, sh->fresh_children,
@ -1766,7 +1766,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
priv->child_count, ENOENT);
if (fresh_child_enoents == fresh_parent_count) {
afr_sh_set_error (sh, ENOENT);
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_purge_entry (frame, this);
} else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
priv->child_count, local->loc.path,
@ -1787,7 +1787,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
return;
fail:
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@ -1858,7 +1858,7 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
out:
afr_sh_set_error (sh, op_errno);
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return;
}
@ -1962,7 +1962,7 @@ afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_INFO,
"Non blocking entrylks failed.");
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_done (frame, this);
} else {
@ -2047,8 +2047,9 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
sh->afr_set_self_heal_status = afr_set_gfid_or_missing_entry_sh_status;
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_self_heal_parent_entrylk (frame, this,
afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
@ -2176,7 +2177,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_self_heal_type_str_get (sh, sh_type_str,
sizeof(sh_type_str));
if (is_self_heal_failed (sh) && !priv->shd.iamshd) {
if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
loglevel = GF_LOG_ERROR;
} else {
loglevel = GF_LOG_DEBUG;
@ -2191,7 +2192,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
orig_frame_sh = &orig_frame_local->self_heal;
orig_frame_sh->actual_sh_started = _gf_true;
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
is_self_heal_failed (sh));
is_self_heal_failed (sh, AFR_CHECK_ALL));
}
if (sh->background) {
@ -2305,6 +2306,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
sh->do_gfid_self_heal = _gf_false;
}
sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
FRAME_SU_DO (sh_frame, afr_local_t);
if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
afr_self_heal_missing_entries (sh_frame, this);
@ -2514,7 +2517,7 @@ out:
GF_FREE (erase_xattr);
if (ret < 0) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
finish (frame, this);
}
@ -2522,59 +2525,39 @@ out:
}
void
afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
{
xlator_t *this = NULL;
xlator_t *this = NULL;
afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
this = THIS;
if (sh)
sh->afr_all_sh_status.data_self_heal = status;
else
gf_log_callingfn (this->name, GF_LOG_ERROR,
"Null self heal struct");
}
if (!sh) {
gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
"Structure");
goto out;
}
void
afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
{
xlator_t *this = NULL;
this = THIS;
if (sh)
sh->afr_all_sh_status.metadata_self_heal = status;
else
gf_log_callingfn (this->name, GF_LOG_ERROR,
"Null self heal struct");
}
void
afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
{
xlator_t *this = NULL;
this = THIS;
if (sh)
sh->afr_all_sh_status.entry_self_heal = status;
else
gf_log_callingfn (this->name, GF_LOG_ERROR,
"Null self heal struct");
}
void
afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh,
afr_self_heal_status status)
{
xlator_t *this = NULL;
this = THIS;
if (sh)
sh->afr_all_sh_status.gfid_or_missing_entry_self_heal = status;
else
gf_log_callingfn (this->name, GF_LOG_ERROR,
"Null self heal struct");
switch (sh_type_in_action) {
case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
sh_status->gfid_or_missing_entry_self_heal = status;
break;
case AFR_SELF_HEAL_METADATA:
sh_status->metadata_self_heal = status;
break;
case AFR_SELF_HEAL_DATA:
sh_status->data_self_heal = status;
break;
case AFR_SELF_HEAL_ENTRY:
sh_status->entry_self_heal = status;
break;
case AFR_SELF_HEAL_INVALID:
gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
"self heal type in action");
break;
}
out:
return;
}
void
@ -2585,22 +2568,58 @@ afr_set_local_for_unhealable (afr_local_t *local)
sh = &local->self_heal;
local->unhealable = 1;
if (sh->afr_set_self_heal_status)
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
int
is_self_heal_failed (afr_self_heal_t *sh)
is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
{
afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
xlator_t *this = NULL;
int sh_failed = 0;
int sh_failed = 0;
if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
|| (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
|| (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
|| (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
sh_failed = 1;
this = THIS;
if (!sh) {
gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
"structure");
sh_failed = 1;
goto out;
}
if (type == AFR_CHECK_ALL) {
if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
|| (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
|| (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
|| (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
sh_failed = 1;
} else if (type == AFR_CHECK_SPECIFIC) {
sh_type_in_action = sh->sh_type_in_action;
switch (sh_type_in_action) {
case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
status = sh_status.gfid_or_missing_entry_self_heal;
break;
case AFR_SELF_HEAL_METADATA:
status = sh_status.metadata_self_heal;
break;
case AFR_SELF_HEAL_ENTRY:
status = sh_status.entry_self_heal;
break;
case AFR_SELF_HEAL_DATA:
status = sh_status.data_self_heal;
break;
case AFR_SELF_HEAL_INVALID:
status = AFR_SELF_HEAL_NOT_ATTEMPTED;
break;
}
if (status == AFR_SELF_HEAL_FAILED)
sh_failed = 1;
}
out:
return sh_failed;
}

View File

@ -14,13 +14,6 @@
#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
AFR_SELF_HEAL_ENTRY,
AFR_SELF_HEAL_METADATA,
AFR_SELF_HEAL_DATA,
AFR_SELF_HEAL_INVALID = -1,
} afr_self_heal_type;
typedef enum {
AFR_LOOKUP_FAIL_CONFLICTS = 1,
AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
@ -138,20 +131,10 @@ void
afr_set_local_for_unhealable (afr_local_t *local);
int
is_self_heal_failed (afr_self_heal_t *sh);
is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
void
afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status);
void
afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status staus);
void
afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status);
void
afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh,
afr_self_heal_status status);
afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
void
afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);

View File

@ -335,7 +335,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG,
"finishing failed data selfheal of %s", local->loc.path);
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (sh->data_lock_held)
afr_sh_data_unlock (frame, this, afr_sh_data_close);
else
@ -362,13 +362,13 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
"log failed on %s for subvol %s, reason: %s",
local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
call_count = afr_frame_return (frame);
if (call_count == 0) {
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
if (sh->old_loop_frame)
sh_loop_finish (sh->old_loop_frame, this);
sh->old_loop_frame = NULL;
@ -418,7 +418,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index]->name, strerror (op_errno));
LOCK (&frame->lock);
{
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
if (sh->old_loop_frame)
@ -428,7 +428,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
if (is_self_heal_failed (sh))
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
afr_sh_data_fail (frame, this);
else
afr_sh_data_erase_pending (frame, this);
@ -604,7 +604,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"ftruncate of %s on subvolume %s completed",
@ -617,7 +617,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
if (is_self_heal_failed (sh))
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
afr_sh_data_fail (frame, this);
else
afr_sh_data_sync_prepare (frame, this);
@ -718,7 +718,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
if (sh->background && sh->unwind && !sh->unwound) {
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
is_self_heal_failed (sh));
is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
sh->unwound = _gf_true;
}
@ -1342,7 +1342,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"open of %s succeeded on child %s",
@ -1355,7 +1355,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_data_fail (frame, this);
return 0;
}
@ -1485,10 +1485,10 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
sh->afr_set_self_heal_status = afr_set_data_sh_status;
sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
if (afr_can_start_data_self_heal (sh, priv)) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
if (IA_ISREG (sh->type)) {
afr_sh_data_open (frame, this);
} else {

View File

@ -162,7 +162,7 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
if (sh->entries_skipped) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
goto out;
}
afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
@ -799,7 +799,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_sink (frame, this, sh->active_source);
sh->active_source = active_src;
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
goto out;
}
@ -1946,7 +1946,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
local->loc.path,
priv->children[active_src]->name,
strerror (op_errno));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"readdir of %s on subvolume %s complete",
@ -2019,7 +2019,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_source (frame, this, sh->active_source);
sh->active_source = active_src;
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@ -2068,7 +2068,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
}
UNLOCK (&frame->lock);
@ -2076,7 +2076,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
if (is_self_heal_failed (sh)) {
if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@ -2231,7 +2231,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_entry_finish (frame, this);
goto out;
@ -2294,7 +2294,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
"failed for %s.", local->loc.path);
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_entry_done (frame, this);
} else {
@ -2321,9 +2321,10 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
sh->afr_set_self_heal_status = afr_set_entry_sh_status;
sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_entrylk (frame, this, &local->loc, NULL,
afr_sh_post_nonblocking_entry_cbk);
} else {

View File

@ -97,7 +97,7 @@ afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_metadata_finish (frame, this);
return 0;
}
@ -461,7 +461,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_metadata_finish (frame, this);
goto out;
@ -618,10 +618,10 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
sh->afr_set_self_heal_status = afr_set_metadata_sh_status;
sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
if (afr_can_start_metadata_self_heal (sh, priv)) {
sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_metadata_lock (frame, this);
} else {
afr_sh_metadata_done (frame, this);

View File

@ -185,6 +185,18 @@ typedef struct {
afr_self_heal_status entry_self_heal;
} afr_sh_status_for_all_type;
typedef enum {
AFR_SELF_HEAL_ENTRY,
AFR_SELF_HEAL_METADATA,
AFR_SELF_HEAL_DATA,
AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY,
AFR_SELF_HEAL_INVALID = -1,
} afr_self_heal_type;
typedef enum {
AFR_CHECK_ALL,
AFR_CHECK_SPECIFIC,
} afr_sh_fail_check_type;
struct afr_self_heal_ {
/* External interface: These are variables (some optional) that
@ -283,9 +295,8 @@ struct afr_self_heal_ {
afr_sh_algo_private_t *private;
afr_sh_status_for_all_type afr_all_sh_status;
afr_self_heal_type sh_type_in_action;
void (*afr_set_self_heal_status) (struct afr_self_heal_ *sh,
afr_self_heal_status status);
struct afr_sh_algorithm *algo;
afr_lock_cbk_t data_lock_success_handler;
afr_lock_cbk_t data_lock_failure_handler;