afr: misc performance improvements

1. In afr_getxattr_cbk, consider the errno value before blindly
launching an inode refresh and a subsequent retry on other children.

2. We want to accuse small files only when we know for sure that there is no
IO happening on that inode. Otherwise, the ia_sizes obtained in the
post-inode-refresh replies may mismatch due to a race between
inode-refresh and ongoing writes, causing spurious heal launches.

Change-Id: Ife180f4fa5e584808c1077aacdc2423897675d33
BUG: 1309462
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/13595
Smoke: Gluster Build System <jenkins@build.gluster.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
This commit is contained in:
Ravishankar N 2016-03-03 23:17:17 +05:30 committed by Pranith Kumar Karampuri
parent eea4175add
commit d1d364634d
3 changed files with 74 additions and 29 deletions

View File

@ -62,6 +62,37 @@ afr_copy_frame (call_frame_t *base)
return frame;
}
/* Check if an entry or inode could be undergoing a transaction. */
gf_boolean_t
afr_is_possibly_under_txn (afr_transaction_type type, afr_local_t *local,
xlator_t *this)
{
int i = 0;
int tmp = 0;
afr_private_t *priv = NULL;
GF_UNUSED char *key = NULL;
priv = this->private;
if (type == AFR_ENTRY_TRANSACTION)
key = GLUSTERFS_PARENT_ENTRYLK;
else if (type == AFR_DATA_TRANSACTION)
/*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
* pl_inodelk_xattr_fill supports separate keys for different
* domains.*/
key = GLUSTERFS_INODELK_COUNT;
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].xdata)
continue;
if (dict_get_int32 (local->replies[i].xdata, key, &tmp) == 0)
if (tmp)
return _gf_true;
}
return _gf_false;
}
int
__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
{
@ -628,7 +659,6 @@ afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused,
return 0;
}
int
afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
unsigned char *data_accused)
@ -661,7 +691,6 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
return 0;
}
int
afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
gf_boolean_t *start_heal)
@ -725,7 +754,12 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
}
if (inode->ia_type != IA_IFDIR)
if ((inode->ia_type != IA_IFDIR) &&
/* We want to accuse small files only when we know for sure that
* there is no IO happening. Otherwise, the ia_sizes obtained in
* post-refresh replies may mismatch due to a race between inode-
* refresh and ongoing writes, causing spurious heal launches*/
!afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this))
afr_accuse_smallfiles (this, replies, data_accused);
for (i = 0; i < priv->child_count; i++) {
@ -979,6 +1013,13 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
"Unable to set link-count in dict ");
}
ret = dict_set_str (xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
if (ret) {
gf_msg_debug (this->name, -ret,
"Unable to set inodelk-dom-count in dict ");
}
if (local->fd) {
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i] &&
@ -1492,30 +1533,6 @@ afr_frame_return (call_frame_t *frame)
return call_count;
}
gf_boolean_t
afr_is_entry_possibly_under_txn (afr_local_t *local, xlator_t *this)
{
int i = 0;
int tmp = 0;
afr_private_t *priv = NULL;
priv = this->private;
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].xdata)
continue;
if (dict_get_int32 (local->replies[i].xdata,
GLUSTERFS_PARENT_ENTRYLK,
&tmp) == 0)
if (tmp)
return _gf_true;
}
return _gf_false;
}
static char *afr_ignore_xattrs[] = {
GLUSTERFS_OPEN_FD_COUNT,
GLUSTERFS_PARENT_ENTRYLK,
@ -1659,7 +1676,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
replies = local->replies;
parent = local->loc.parent;
locked_entry = afr_is_entry_possibly_under_txn (local, this);
locked_entry = afr_is_possibly_under_txn (AFR_ENTRY_TRANSACTION, local,
this);
readable = alloca0 (priv->child_count);

View File

@ -472,7 +472,16 @@ afr_filter_xattrs (dict_t *dict)
}
}
static
gf_boolean_t
afr_getxattr_ignorable_errnos (int32_t op_errno)
{
if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE ||
op_errno == ENAMETOOLONG)
return _gf_true;
return _gf_false;
}
int
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
@ -482,7 +491,7 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
local = frame->local;
if (op_ret < 0) {
if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) {
local->op_ret = op_ret;
local->op_errno = op_errno;

View File

@ -2184,6 +2184,23 @@ pl_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
return 0;
}
int32_t
pl_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
PL_STACK_UNWIND (fstat, xdata, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int32_t
pl_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL);
STACK_WIND (frame, pl_fstat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
}
int
pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
@ -2771,6 +2788,7 @@ pl_fentrylk (call_frame_t *frame, xlator_t *this,
struct xlator_fops fops = {
.lookup = pl_lookup,
.create = pl_create,
.fstat = pl_fstat,
.truncate = pl_truncate,
.ftruncate = pl_ftruncate,
.open = pl_open,