cluster/afr: When parent and entry read subvols are different, set entry->inode to NULL

That way a lookup would be forced on the entry, and its attributes will
always be selected from its read subvol.

Change-Id: Iaba25e2cd5f83e983fc8b1a1f48da3850808e6b8
BUG: 1179169
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: http://review.gluster.org/9477
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Krutika Dhananjay 2015-01-22 13:53:47 +05:30 committed by Vijay Bellur
parent e9276230e1
commit c78998c39f
4 changed files with 105 additions and 27 deletions

View File

@ -24,7 +24,7 @@ TEST kill_brick $V0 $H0 $B0/${V0}0
rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
#Test that the lookup returns ENOENT instead of ESTALE
#If lookup returns ESTALE this command will fail with ESTALE
TEST touch f
@ -44,8 +44,8 @@ echo jkl > $M1/b
#gfid-mismatch happened. This should result in EIO
TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
# The kernel knows nothing about the tricks done to the volume, and the file
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
# The kernel knows nothing about the tricks done to the volume, and the file
# may still be in page cache. Wait for timeout.
EXPECT_WITHIN 10 "" cat $M0/b
EXPECT_WITHIN 10 "^$" cat $M0/b
cleanup

View File

@ -103,6 +103,13 @@ function _afr_child_up_status {
echo "$up"
}
function afr_child_up_status_meta {
local mnt=$1
local repl=$2
local child=$3
grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
}
function afr_child_up_status {
local vol=$1
#brick_id is (brick-num in volume info - 1)

View File

@ -1219,6 +1219,50 @@ afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2)
return _gf_true;
}
static int
afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
struct afr_reply *replies, unsigned char *readable)
{
int i = 0;
int par_read_subvol = -1;
int par_read_subvol_iter = -1;
afr_private_t *priv = NULL;
priv = this->private;
if (parent)
par_read_subvol = afr_data_subvol_get (parent, this, 0, 0);
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
if (replies[i].op_ret < 0)
continue;
if (par_read_subvol_iter == -1) {
par_read_subvol_iter = i;
continue;
}
if ((i != par_read_subvol) && readable[i])
par_read_subvol_iter = i;
if (i == par_read_subvol)
par_read_subvol_iter = i;
}
/* At the end of the for-loop, the only reason why @par_read_subvol_iter
* could be -1 is when this LOOKUP has failed on all sub-volumes.
* So it is okay to send an arbitrary subvolume (0 in this case)
* as parent read subvol.
*/
if (par_read_subvol_iter == -1)
par_read_subvol_iter = 0;
return par_read_subvol_iter;
}
static void
afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
@ -1227,23 +1271,25 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
int i = -1;
int op_errno = 0;
int read_subvol = 0;
int par_read_subvol = 0;
unsigned char *readable = NULL;
int event = 0;
struct afr_reply *replies = NULL;
uuid_t read_gfid = {0, };
gf_boolean_t locked_entry = _gf_false;
gf_boolean_t can_interpret = _gf_true;
inode_t *parent = NULL;
priv = this->private;
local = frame->local;
replies = local->replies;
parent = local->loc.parent;
locked_entry = afr_is_entry_possibly_under_txn (local, this);
readable = alloca0 (priv->child_count);
afr_inode_read_subvol_get (local->loc.parent, this, readable,
NULL, &event);
afr_inode_read_subvol_get (parent, this, readable, NULL, &event);
/* First, check if we have a gfid-change from somewhere,
If so, propagate that so that a fresh lookup can be
@ -1269,7 +1315,6 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
"underway" in creation */
local->op_ret = -1;
local->op_errno = ENOENT;
read_subvol = i;
goto unwind;
}
@ -1347,10 +1392,13 @@ unwind:
if (read_subvol == -1)
read_subvol = 0;
par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
readable);
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->replies[read_subvol].poststat,
local->replies[read_subvol].xdata,
&local->replies[read_subvol].postparent);
&local->replies[par_read_subvol].postparent);
}
/*

View File

@ -123,22 +123,50 @@ out:
return 0;
}
static int
afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
{
int gen = 0;
int entry_read_subvol = 0;
unsigned char *data_readable = NULL;
unsigned char *metadata_readable = NULL;
afr_private_t *priv = NULL;
priv = this->private;
data_readable = alloca0 (priv->child_count);
metadata_readable = alloca0 (priv->child_count);
afr_inode_read_subvol_get (inode, this, data_readable,
metadata_readable, &gen);
if (gen != priv->event_generation ||
!data_readable[par_read_subvol] ||
!metadata_readable[par_read_subvol])
return -1;
/* Once the control reaches the following statement, it means that the
* parent's read subvol is perfectly readable. So calling
* either afr_data_subvol_get() or afr_metadata_subvol_get() would
* yield the same result. Hence, choosing afr_data_subvol_get() below.
*/
entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0);
if (entry_read_subvol != par_read_subvol)
return -1;
return 0;
}
static void
afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
gf_dirent_t *entries, fd_t *fd)
{
afr_private_t *priv = NULL;
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
unsigned char *data_readable = NULL;
unsigned char *metadata_readable = NULL;
int gen = 0;
int ret = -1;
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
xlator_t *this = NULL;
priv = THIS->private;
data_readable = alloca0 (priv->child_count);
metadata_readable = alloca0 (priv->child_count);
this = THIS;
list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
if (__is_root_gfid (fd->inode->gfid) &&
@ -150,17 +178,12 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
list_add_tail (&entry->list, &entries->list);
if (entry->inode) {
gen = 0;
afr_inode_read_subvol_get (entry->inode, THIS,
data_readable,
metadata_readable, &gen);
if (gen != priv->event_generation ||
!data_readable[subvol] ||
!metadata_readable[subvol]) {
ret = afr_validate_read_subvol (entry->inode, this,
subvol);
if (ret == -1) {
inode_unref (entry->inode);
entry->inode = NULL;
continue;
}
}
}