cluster/afr: When parent and entry read subvols are different, set entry->inode to NULL
That way a lookup would be forced on the entry, and its attributes will always be selected from its read subvol. Change-Id: Iaba25e2cd5f83e983fc8b1a1f48da3850808e6b8 BUG: 1179169 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/9477 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
parent
e9276230e1
commit
c78998c39f
@ -24,7 +24,7 @@ TEST kill_brick $V0 $H0 $B0/${V0}0
|
||||
rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a
|
||||
|
||||
TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
|
||||
#Test that the lookup returns ENOENT instead of ESTALE
|
||||
#If lookup returns ESTALE this command will fail with ESTALE
|
||||
TEST touch f
|
||||
@ -44,8 +44,8 @@ echo jkl > $M1/b
|
||||
#gfid-mismatch happened. This should result in EIO
|
||||
TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
|
||||
TEST $CLI volume start $V0 force
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
||||
# The kernel knows nothing about the tricks done to the volume, and the file
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
|
||||
# The kernel knows nothing about the tricks done to the volume, and the file
|
||||
# may still be in page cache. Wait for timeout.
|
||||
EXPECT_WITHIN 10 "" cat $M0/b
|
||||
EXPECT_WITHIN 10 "^$" cat $M0/b
|
||||
cleanup
|
||||
|
@ -103,6 +103,13 @@ function _afr_child_up_status {
|
||||
echo "$up"
|
||||
}
|
||||
|
||||
function afr_child_up_status_meta {
|
||||
local mnt=$1
|
||||
local repl=$2
|
||||
local child=$3
|
||||
grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
|
||||
}
|
||||
|
||||
function afr_child_up_status {
|
||||
local vol=$1
|
||||
#brick_id is (brick-num in volume info - 1)
|
||||
|
@ -1219,6 +1219,50 @@ afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2)
|
||||
return _gf_true;
|
||||
}
|
||||
|
||||
static int
|
||||
afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
|
||||
struct afr_reply *replies, unsigned char *readable)
|
||||
{
|
||||
int i = 0;
|
||||
int par_read_subvol = -1;
|
||||
int par_read_subvol_iter = -1;
|
||||
afr_private_t *priv = NULL;
|
||||
|
||||
priv = this->private;
|
||||
|
||||
if (parent)
|
||||
par_read_subvol = afr_data_subvol_get (parent, this, 0, 0);
|
||||
|
||||
for (i = 0; i < priv->child_count; i++) {
|
||||
if (!replies[i].valid)
|
||||
continue;
|
||||
|
||||
if (replies[i].op_ret < 0)
|
||||
continue;
|
||||
|
||||
if (par_read_subvol_iter == -1) {
|
||||
par_read_subvol_iter = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((i != par_read_subvol) && readable[i])
|
||||
par_read_subvol_iter = i;
|
||||
|
||||
if (i == par_read_subvol)
|
||||
par_read_subvol_iter = i;
|
||||
}
|
||||
/* At the end of the for-loop, the only reason why @par_read_subvol_iter
|
||||
* could be -1 is when this LOOKUP has failed on all sub-volumes.
|
||||
* So it is okay to send an arbitrary subvolume (0 in this case)
|
||||
* as parent read subvol.
|
||||
*/
|
||||
if (par_read_subvol_iter == -1)
|
||||
par_read_subvol_iter = 0;
|
||||
|
||||
return par_read_subvol_iter;
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
||||
{
|
||||
@ -1227,23 +1271,25 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
||||
int i = -1;
|
||||
int op_errno = 0;
|
||||
int read_subvol = 0;
|
||||
int par_read_subvol = 0;
|
||||
unsigned char *readable = NULL;
|
||||
int event = 0;
|
||||
struct afr_reply *replies = NULL;
|
||||
uuid_t read_gfid = {0, };
|
||||
gf_boolean_t locked_entry = _gf_false;
|
||||
gf_boolean_t can_interpret = _gf_true;
|
||||
inode_t *parent = NULL;
|
||||
|
||||
priv = this->private;
|
||||
local = frame->local;
|
||||
replies = local->replies;
|
||||
parent = local->loc.parent;
|
||||
|
||||
locked_entry = afr_is_entry_possibly_under_txn (local, this);
|
||||
|
||||
readable = alloca0 (priv->child_count);
|
||||
|
||||
afr_inode_read_subvol_get (local->loc.parent, this, readable,
|
||||
NULL, &event);
|
||||
afr_inode_read_subvol_get (parent, this, readable, NULL, &event);
|
||||
|
||||
/* First, check if we have a gfid-change from somewhere,
|
||||
If so, propagate that so that a fresh lookup can be
|
||||
@ -1269,7 +1315,6 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
||||
"underway" in creation */
|
||||
local->op_ret = -1;
|
||||
local->op_errno = ENOENT;
|
||||
read_subvol = i;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
@ -1347,10 +1392,13 @@ unwind:
|
||||
if (read_subvol == -1)
|
||||
read_subvol = 0;
|
||||
|
||||
par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
|
||||
readable);
|
||||
|
||||
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
|
||||
local->inode, &local->replies[read_subvol].poststat,
|
||||
local->replies[read_subvol].xdata,
|
||||
&local->replies[read_subvol].postparent);
|
||||
&local->replies[par_read_subvol].postparent);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -123,22 +123,50 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
|
||||
{
|
||||
int gen = 0;
|
||||
int entry_read_subvol = 0;
|
||||
unsigned char *data_readable = NULL;
|
||||
unsigned char *metadata_readable = NULL;
|
||||
afr_private_t *priv = NULL;
|
||||
|
||||
priv = this->private;
|
||||
data_readable = alloca0 (priv->child_count);
|
||||
metadata_readable = alloca0 (priv->child_count);
|
||||
|
||||
afr_inode_read_subvol_get (inode, this, data_readable,
|
||||
metadata_readable, &gen);
|
||||
|
||||
if (gen != priv->event_generation ||
|
||||
!data_readable[par_read_subvol] ||
|
||||
!metadata_readable[par_read_subvol])
|
||||
return -1;
|
||||
|
||||
/* Once the control reaches the following statement, it means that the
|
||||
* parent's read subvol is perfectly readable. So calling
|
||||
* either afr_data_subvol_get() or afr_metadata_subvol_get() would
|
||||
* yield the same result. Hence, choosing afr_data_subvol_get() below.
|
||||
*/
|
||||
entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0);
|
||||
if (entry_read_subvol != par_read_subvol)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
|
||||
gf_dirent_t *entries, fd_t *fd)
|
||||
{
|
||||
afr_private_t *priv = NULL;
|
||||
gf_dirent_t *entry = NULL;
|
||||
gf_dirent_t *tmp = NULL;
|
||||
unsigned char *data_readable = NULL;
|
||||
unsigned char *metadata_readable = NULL;
|
||||
int gen = 0;
|
||||
int ret = -1;
|
||||
gf_dirent_t *entry = NULL;
|
||||
gf_dirent_t *tmp = NULL;
|
||||
xlator_t *this = NULL;
|
||||
|
||||
priv = THIS->private;
|
||||
|
||||
data_readable = alloca0 (priv->child_count);
|
||||
metadata_readable = alloca0 (priv->child_count);
|
||||
this = THIS;
|
||||
|
||||
list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
|
||||
if (__is_root_gfid (fd->inode->gfid) &&
|
||||
@ -150,17 +178,12 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
|
||||
list_add_tail (&entry->list, &entries->list);
|
||||
|
||||
if (entry->inode) {
|
||||
gen = 0;
|
||||
afr_inode_read_subvol_get (entry->inode, THIS,
|
||||
data_readable,
|
||||
metadata_readable, &gen);
|
||||
|
||||
if (gen != priv->event_generation ||
|
||||
!data_readable[subvol] ||
|
||||
!metadata_readable[subvol]) {
|
||||
|
||||
ret = afr_validate_read_subvol (entry->inode, this,
|
||||
subvol);
|
||||
if (ret == -1) {
|
||||
inode_unref (entry->inode);
|
||||
entry->inode = NULL;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user