afr: fix incorrect reporting of directory split-brain
Problem: When a directory has dirty xattrs due to failed post-ops or when replace/reset brick is performed, AFR does a conservative merge as expected, but heal-info reports it as split-brain because there are no clear sources. Fix: Modify pending flag to contain information about pending heals and split-brains. For directories, if spit-brain flag is not set,just show them as needing heal and not being in split-brain. Fixes: bz#1626994 Change-Id: I09ef821f6887c87d315ae99e6b1de05103cd9383 Signed-off-by: Ravishankar N <ravishankar@redhat.com>
This commit is contained in:
parent
d901308a06
commit
9d8f5a34c1
@ -2,7 +2,7 @@
|
||||
|
||||
function create_brick_xattrop_entry {
|
||||
local xattrop_dir=$(afr_get_index_path $1)
|
||||
local base_entry=`ls $xattrop_dir`
|
||||
local base_entry=`ls $xattrop_dir|grep xattrop`
|
||||
local gfid_str
|
||||
local params=`echo "$@" | cut -d' ' -f2-`
|
||||
echo $params
|
||||
|
62
tests/bugs/replicate/bug-1626994-info-split-brain.t
Normal file
62
tests/bugs/replicate/bug-1626994-info-split-brain.t
Normal file
@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
. $(dirname $0)/../../afr.rc
|
||||
|
||||
cleanup;
|
||||
|
||||
# Test to check dirs having dirty xattr do not show up in info split-brain.
|
||||
|
||||
TEST glusterd;
|
||||
TEST pidof glusterd;
|
||||
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
|
||||
TEST $CLI volume set $V0 self-heal-daemon off
|
||||
TEST $CLI volume start $V0;
|
||||
EXPECT 'Started' volinfo_field $V0 'Status';
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
|
||||
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
|
||||
|
||||
# Create base entry in indices/xattrop
|
||||
echo "Data" > $M0/FILE
|
||||
rm -f $M0/FILE
|
||||
EXPECT "1" count_index_entries $B0/${V0}0
|
||||
EXPECT "1" count_index_entries $B0/${V0}1
|
||||
EXPECT "1" count_index_entries $B0/${V0}2
|
||||
|
||||
TEST mkdir $M0/dirty_dir
|
||||
TEST mkdir $M0/pending_dir
|
||||
|
||||
# Set dirty xattrs on all bricks to simulate the case where entry transaction
|
||||
# succeeded only the pre-op phase.
|
||||
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir
|
||||
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir
|
||||
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir
|
||||
create_brick_xattrop_entry $B0/${V0}0 dirty_dir
|
||||
# Should not show up as split-brain.
|
||||
EXPECT "0" afr_get_split_brain_count $V0
|
||||
|
||||
# replace/reset brick case where the new brick has dirty and the other 2 bricks
|
||||
# blame it should not be reported as split-brain.
|
||||
TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0
|
||||
TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1
|
||||
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2
|
||||
create_brick_xattrop_entry $B0/${V0}0 "/"
|
||||
# Should not show up as split-brain.
|
||||
EXPECT "0" afr_get_split_brain_count $V0
|
||||
|
||||
# Set pending xattrs on all bricks blaming each other to simulate the case of
|
||||
# entry split-brain.
|
||||
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir
|
||||
TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir
|
||||
TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir
|
||||
create_brick_xattrop_entry $B0/${V0}0 pending_dir
|
||||
# Should show up as split-brain.
|
||||
EXPECT "1" afr_get_split_brain_count $V0
|
||||
|
||||
cleanup;
|
@ -5814,7 +5814,7 @@ out:
|
||||
int
|
||||
afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this,
|
||||
inode_t *inode, gf_boolean_t *msh,
|
||||
gf_boolean_t *pending)
|
||||
unsigned char *pending)
|
||||
{
|
||||
int ret = -1;
|
||||
unsigned char *locked_on = NULL;
|
||||
@ -5859,7 +5859,7 @@ out:
|
||||
|
||||
int
|
||||
afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd,
|
||||
gf_boolean_t *dsh, gf_boolean_t *pflag)
|
||||
gf_boolean_t *dsh, unsigned char *pflag)
|
||||
{
|
||||
int ret = -1;
|
||||
unsigned char *data_lock = NULL;
|
||||
@ -5903,7 +5903,7 @@ out:
|
||||
int
|
||||
afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,
|
||||
inode_t *inode, gf_boolean_t *esh,
|
||||
gf_boolean_t *pflag)
|
||||
unsigned char *pflag)
|
||||
{
|
||||
int ret = -1;
|
||||
int source = -1;
|
||||
@ -5952,7 +5952,7 @@ afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,
|
||||
ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock,
|
||||
sources, sinks, healed_sinks,
|
||||
locked_replies, &source, pflag);
|
||||
if ((ret == 0) && source < 0)
|
||||
if ((ret == 0) && (*pflag & PFLAG_SBRAIN))
|
||||
ret = -EIO;
|
||||
*esh = afr_decide_heal_info(priv, sources, ret);
|
||||
}
|
||||
@ -5974,7 +5974,7 @@ afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
|
||||
inode_t **inode, gf_boolean_t *entry_selfheal,
|
||||
gf_boolean_t *data_selfheal,
|
||||
gf_boolean_t *metadata_selfheal,
|
||||
gf_boolean_t *pending)
|
||||
unsigned char *pending)
|
||||
|
||||
{
|
||||
int ret = -1;
|
||||
@ -6059,7 +6059,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
|
||||
gf_boolean_t data_selfheal = _gf_false;
|
||||
gf_boolean_t metadata_selfheal = _gf_false;
|
||||
gf_boolean_t entry_selfheal = _gf_false;
|
||||
gf_boolean_t pending = _gf_false;
|
||||
unsigned char pending = 0;
|
||||
dict_t *dict = NULL;
|
||||
int ret = -1;
|
||||
int op_errno = 0;
|
||||
@ -6077,7 +6077,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pending) {
|
||||
if (pending & PFLAG_PENDING) {
|
||||
gf_asprintf(&substr, "-pending");
|
||||
if (!substr)
|
||||
goto out;
|
||||
|
@ -1513,7 +1513,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
|
||||
struct afr_reply *replies,
|
||||
afr_transaction_type type, unsigned char *locked_on,
|
||||
unsigned char *sources, unsigned char *sinks,
|
||||
uint64_t *witness, gf_boolean_t *pflag)
|
||||
uint64_t *witness, unsigned char *pflag)
|
||||
{
|
||||
afr_private_t *priv = NULL;
|
||||
int i = 0;
|
||||
@ -1541,7 +1541,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
|
||||
for (i = 0; i < priv->child_count; i++) {
|
||||
for (j = 0; j < priv->child_count; j++)
|
||||
if (matrix[i][j])
|
||||
*pflag = _gf_true;
|
||||
*pflag |= PFLAG_PENDING;
|
||||
if (*pflag)
|
||||
break;
|
||||
}
|
||||
@ -1623,6 +1623,8 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
|
||||
if (locked_on[i])
|
||||
sinks[i] = 1;
|
||||
}
|
||||
if (pflag)
|
||||
*pflag |= PFLAG_SBRAIN;
|
||||
}
|
||||
|
||||
/* One more class of witness similar to dirty in v2 is where no pending
|
||||
|
@ -597,7 +597,7 @@ __afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
|
||||
unsigned char *locked_on, unsigned char *sources,
|
||||
unsigned char *sinks, unsigned char *healed_sinks,
|
||||
unsigned char *undid_pending,
|
||||
struct afr_reply *replies, gf_boolean_t *pflag)
|
||||
struct afr_reply *replies, unsigned char *pflag)
|
||||
{
|
||||
int ret = -1;
|
||||
int source = -1;
|
||||
|
@ -487,7 +487,7 @@ __afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
|
||||
unsigned char *sources, unsigned char *sinks,
|
||||
unsigned char *healed_sinks,
|
||||
struct afr_reply *replies, int *source_p,
|
||||
gf_boolean_t *pflag)
|
||||
unsigned char *pflag)
|
||||
{
|
||||
int ret = -1;
|
||||
int source = -1;
|
||||
|
@ -310,7 +310,7 @@ __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
|
||||
unsigned char *sources, unsigned char *sinks,
|
||||
unsigned char *healed_sinks,
|
||||
unsigned char *undid_pending,
|
||||
struct afr_reply *replies, gf_boolean_t *pflag)
|
||||
struct afr_reply *replies, unsigned char *pflag)
|
||||
{
|
||||
int ret = -1;
|
||||
int source = -1;
|
||||
|
@ -177,7 +177,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
|
||||
struct afr_reply *replies,
|
||||
afr_transaction_type type, unsigned char *locked_on,
|
||||
unsigned char *sources, unsigned char *sinks,
|
||||
uint64_t *witness, gf_boolean_t *flag);
|
||||
uint64_t *witness, unsigned char *flag);
|
||||
int
|
||||
afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,
|
||||
dict_t *xdata);
|
||||
@ -284,7 +284,7 @@ __afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
|
||||
unsigned char *locked_on, unsigned char *sources,
|
||||
unsigned char *sinks, unsigned char *healed_sinks,
|
||||
unsigned char *undid_pending,
|
||||
struct afr_reply *replies, gf_boolean_t *flag);
|
||||
struct afr_reply *replies, unsigned char *flag);
|
||||
|
||||
int
|
||||
__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
|
||||
@ -292,14 +292,14 @@ __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
|
||||
unsigned char *sources, unsigned char *sinks,
|
||||
unsigned char *healed_sinks,
|
||||
unsigned char *undid_pending,
|
||||
struct afr_reply *replies, gf_boolean_t *flag);
|
||||
struct afr_reply *replies, unsigned char *flag);
|
||||
int
|
||||
__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
|
||||
inode_t *inode, unsigned char *locked_on,
|
||||
unsigned char *sources, unsigned char *sinks,
|
||||
unsigned char *healed_sinks,
|
||||
struct afr_reply *replies, int *source_p,
|
||||
gf_boolean_t *flag);
|
||||
unsigned char *flag);
|
||||
|
||||
int
|
||||
afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
|
||||
|
@ -39,6 +39,10 @@
|
||||
#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
|
||||
|
||||
#define AFR_HALO_MAX_LATENCY 99999
|
||||
|
||||
#define PFLAG_PENDING (1 << 0)
|
||||
#define PFLAG_SBRAIN (1 << 1)
|
||||
|
||||
typedef int (*afr_lock_cbk_t)(call_frame_t *frame, xlator_t *this);
|
||||
|
||||
typedef int (*afr_read_txn_wind_t)(call_frame_t *frame, xlator_t *this,
|
||||
|
Loading…
Reference in New Issue
Block a user