performance/readdir-ahead: limit cache size
This patch introduces a new option called "rda-cache-limit", which is the maximum value the entire readdir-ahead cache can grow into. Since, readdir-ahead holds a reference to inode through dentries, this patch also accounts memory stored by various xlators in inode contexts. Change-Id: I84cc0ca812f35e0f9041f8cc71effae53a9e7f99 BUG: 1356960 Signed-off-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-on: http://review.gluster.org/16137 NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Poornima G <pgurusid@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
parent
7ee998b904
commit
96fb356240
@ -2504,3 +2504,35 @@ out:
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
size_t
|
||||
inode_ctx_size (inode_t *inode)
|
||||
{
|
||||
int i = 0;
|
||||
size_t size = 0;
|
||||
xlator_t *xl = NULL, *old_THIS = NULL;
|
||||
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
LOCK (&inode->lock);
|
||||
{
|
||||
for (i = 0; i < inode->table->ctxcount; i++) {
|
||||
if (!inode->_ctx[i].xl_key)
|
||||
continue;
|
||||
|
||||
xl = (xlator_t *)(long)inode->_ctx[i].xl_key;
|
||||
old_THIS = THIS;
|
||||
THIS = xl;
|
||||
|
||||
if (xl->cbks->ictxsize)
|
||||
size += xl->cbks->ictxsize (xl, inode);
|
||||
|
||||
THIS = old_THIS;
|
||||
}
|
||||
}
|
||||
UNLOCK (&inode->lock);
|
||||
|
||||
out:
|
||||
return size;
|
||||
}
|
||||
|
@ -279,4 +279,7 @@ inode_needs_lookup (inode_t *inode, xlator_t *this);
|
||||
int
|
||||
inode_has_dentry (inode_t *inode);
|
||||
|
||||
size_t
|
||||
inode_ctx_size (inode_t *inode);
|
||||
|
||||
#endif /* _INODE_H */
|
||||
|
@ -847,6 +847,10 @@ typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client);
|
||||
typedef void (*cbk_ictxmerge_t) (xlator_t *this, fd_t *fd,
|
||||
inode_t *inode, inode_t *linked_inode);
|
||||
|
||||
typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
|
||||
|
||||
typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
|
||||
|
||||
struct xlator_cbks {
|
||||
cbk_forget_t forget;
|
||||
cbk_release_t release;
|
||||
@ -855,6 +859,8 @@ struct xlator_cbks {
|
||||
cbk_client_t client_destroy;
|
||||
cbk_client_t client_disconnect;
|
||||
cbk_ictxmerge_t ictxmerge;
|
||||
cbk_inodectx_size_t ictxsize;
|
||||
cbk_fdctx_size_t fdctxsize;
|
||||
};
|
||||
|
||||
typedef int32_t (*dumpop_priv_t) (xlator_t *this);
|
||||
|
@ -2625,7 +2625,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
||||
"option. Unmount and delete the shared storage volume "
|
||||
" on disabling this option."
|
||||
},
|
||||
|
||||
#if USE_GFDB /* no GFDB means tiering is disabled */
|
||||
/* tier translator - global tunables */
|
||||
{ .key = "cluster.write-freq-threshold",
|
||||
@ -3089,6 +3088,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
||||
.op_version = GD_OP_VERSION_3_8_4,
|
||||
.flags = OPT_FLAG_CLIENT_OPT
|
||||
},
|
||||
{ .key = "performance.rda-request-size",
|
||||
.voltype = "performance/readdir-ahead",
|
||||
.option = "rda-request-size",
|
||||
.flags = OPT_FLAG_CLIENT_OPT,
|
||||
.type = DOC,
|
||||
.op_version = GD_OP_VERSION_3_9_1,
|
||||
},
|
||||
{ .key = "performance.rda-low-wmark",
|
||||
.voltype = "performance/readdir-ahead",
|
||||
.option = "rda-low-wmark",
|
||||
.type = DOC,
|
||||
.flags = OPT_FLAG_CLIENT_OPT,
|
||||
.op_version = GD_OP_VERSION_3_9_1,
|
||||
},
|
||||
{ .key = "performance.rda-high-wmark",
|
||||
.voltype = "performance/readdir-ahead",
|
||||
.type = DOC,
|
||||
.flags = OPT_FLAG_CLIENT_OPT,
|
||||
.op_version = GD_OP_VERSION_3_9_1,
|
||||
},
|
||||
{ .key = "performance.rda-cache-limit",
|
||||
.voltype = "performance/readdir-ahead",
|
||||
.type = DOC,
|
||||
.flags = OPT_FLAG_CLIENT_OPT,
|
||||
.op_version = GD_OP_VERSION_3_9_1,
|
||||
},
|
||||
{ .key = NULL
|
||||
}
|
||||
};
|
||||
|
@ -97,7 +97,8 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
|
||||
{
|
||||
if ((ctx->state & RDA_FD_EOD) ||
|
||||
(ctx->state & RDA_FD_ERROR) ||
|
||||
(!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)))
|
||||
(!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) ||
|
||||
(request_size && ctx->cur_size >= request_size))
|
||||
return _gf_true;
|
||||
|
||||
return _gf_false;
|
||||
@ -111,20 +112,28 @@ static int32_t
|
||||
__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
|
||||
struct rda_fd_ctx *ctx)
|
||||
{
|
||||
gf_dirent_t *dirent, *tmp;
|
||||
size_t dirent_size, size = 0;
|
||||
int32_t count = 0;
|
||||
struct rda_priv *priv = this->private;
|
||||
gf_dirent_t *dirent, *tmp;
|
||||
size_t dirent_size, size = 0, inodectx_size = 0;
|
||||
int32_t count = 0;
|
||||
struct rda_priv *priv = NULL;
|
||||
|
||||
priv = this->private;
|
||||
|
||||
list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) {
|
||||
dirent_size = gf_dirent_size(dirent->d_name);
|
||||
if (size + dirent_size > request_size)
|
||||
break;
|
||||
|
||||
inodectx_size = 0;
|
||||
|
||||
inode_ctx_del (dirent->inode, this, (void *)&inodectx_size);
|
||||
|
||||
size += dirent_size;
|
||||
list_del_init(&dirent->list);
|
||||
ctx->cur_size -= dirent_size;
|
||||
|
||||
priv->rda_cache_size -= (dirent_size + inodectx_size);
|
||||
|
||||
list_add_tail(&dirent->list, &entries->list);
|
||||
ctx->cur_offset = dirent->d_off;
|
||||
count++;
|
||||
@ -234,11 +243,17 @@ rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
||||
* the request out of the preload or the request that enables us to do
|
||||
* so is in flight...
|
||||
*/
|
||||
if (rda_can_serve_readdirp(ctx, size))
|
||||
if (rda_can_serve_readdirp(ctx, size)) {
|
||||
call_resume(stub);
|
||||
else
|
||||
} else {
|
||||
ctx->stub = stub;
|
||||
|
||||
if (!(ctx->state & RDA_FD_RUNNING)) {
|
||||
fill = 1;
|
||||
ctx->state |= RDA_FD_RUNNING;
|
||||
}
|
||||
}
|
||||
|
||||
UNLOCK(&ctx->lock);
|
||||
|
||||
if (fill)
|
||||
@ -266,6 +281,7 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
struct rda_fd_ctx *ctx = local->ctx;
|
||||
struct rda_priv *priv = this->private;
|
||||
int fill = 1;
|
||||
size_t inodectx_size = 0, dirent_size = 0;
|
||||
|
||||
LOCK(&ctx->lock);
|
||||
|
||||
@ -286,7 +302,19 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
/* must preserve entry order */
|
||||
list_add_tail(&dirent->list, &ctx->entries.list);
|
||||
|
||||
ctx->cur_size += gf_dirent_size(dirent->d_name);
|
||||
dirent_size = gf_dirent_size (dirent->d_name);
|
||||
inodectx_size = 0;
|
||||
|
||||
if (dirent->inode) {
|
||||
inodectx_size = inode_ctx_size (dirent->inode);
|
||||
inode_ctx_set (dirent->inode, this,
|
||||
(void *)inodectx_size);
|
||||
}
|
||||
|
||||
ctx->cur_size += dirent_size;
|
||||
|
||||
priv->rda_cache_size += (dirent_size + inodectx_size);
|
||||
|
||||
ctx->next_offset = dirent->d_off;
|
||||
}
|
||||
}
|
||||
@ -321,19 +349,21 @@ out:
|
||||
* If we have been marked for bypass and have no pending stub, clear the
|
||||
* run state so we stop preloading the context with entries.
|
||||
*/
|
||||
if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub)
|
||||
if (!ctx->stub && ((ctx->state & RDA_FD_BYPASS)
|
||||
|| (priv->rda_cache_size > priv->rda_cache_limit)))
|
||||
ctx->state &= ~RDA_FD_RUNNING;
|
||||
|
||||
if (!(ctx->state & RDA_FD_RUNNING)) {
|
||||
fill = 0;
|
||||
if (ctx->xattrs) {
|
||||
/*
|
||||
* fill = 0 and hence rda_fill_fd() won't be invoked.
|
||||
* unref for ref taken in rda_fill_fd()
|
||||
*/
|
||||
dict_unref (ctx->xattrs);
|
||||
ctx->xattrs = NULL;
|
||||
}
|
||||
if (ctx->xattrs) {
|
||||
/*
|
||||
* fill = 0 and hence rda_fill_fd() won't be invoked.
|
||||
* unref for ref taken in rda_fill_fd()
|
||||
*/
|
||||
dict_unref (ctx->xattrs);
|
||||
ctx->xattrs = NULL;
|
||||
}
|
||||
|
||||
STACK_DESTROY(ctx->fill_frame->root);
|
||||
ctx->fill_frame = NULL;
|
||||
}
|
||||
@ -393,10 +423,10 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
|
||||
|
||||
ctx->fill_frame = nframe;
|
||||
|
||||
if (!ctx->xattrs && orig_local && orig_local->xattrs) {
|
||||
/* when this function is invoked by rda_opendir_cbk */
|
||||
ctx->xattrs = dict_ref(orig_local->xattrs);
|
||||
}
|
||||
if (!ctx->xattrs && orig_local && orig_local->xattrs) {
|
||||
/* when this function is invoked by rda_opendir_cbk */
|
||||
ctx->xattrs = dict_ref(orig_local->xattrs);
|
||||
}
|
||||
} else {
|
||||
nframe = ctx->fill_frame;
|
||||
local = nframe->local;
|
||||
@ -578,11 +608,13 @@ reconfigure(xlator_t *this, dict_t *options)
|
||||
struct rda_priv *priv = this->private;
|
||||
|
||||
GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
|
||||
uint32, err);
|
||||
GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64,
|
||||
err);
|
||||
GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64,
|
||||
err);
|
||||
size_uint64, err);
|
||||
GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options,
|
||||
size_uint64, err);
|
||||
GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options,
|
||||
size_uint64, err);
|
||||
GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options,
|
||||
size_uint64, err);
|
||||
|
||||
return 0;
|
||||
err:
|
||||
@ -619,9 +651,13 @@ init(xlator_t *this)
|
||||
if (!this->local_pool)
|
||||
goto err;
|
||||
|
||||
GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err);
|
||||
GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64,
|
||||
err);
|
||||
GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err);
|
||||
GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err);
|
||||
GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64,
|
||||
err);
|
||||
GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64,
|
||||
err);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -657,26 +693,38 @@ struct xlator_cbks cbks = {
|
||||
|
||||
struct volume_options options[] = {
|
||||
{ .key = {"rda-request-size"},
|
||||
.type = GF_OPTION_TYPE_INT,
|
||||
.type = GF_OPTION_TYPE_SIZET,
|
||||
.min = 4096,
|
||||
.max = 131072,
|
||||
.default_value = "131072",
|
||||
.description = "readdir-ahead request size",
|
||||
.default_value = "128KB",
|
||||
.description = "size of buffer in readdirp calls initiated by "
|
||||
"readdir-ahead ",
|
||||
},
|
||||
{ .key = {"rda-low-wmark"},
|
||||
.type = GF_OPTION_TYPE_SIZET,
|
||||
.min = 0,
|
||||
.max = 10 * GF_UNIT_MB,
|
||||
.default_value = "4096",
|
||||
.description = "the value under which we plug",
|
||||
.description = "the value under which readdir-ahead plugs",
|
||||
},
|
||||
{ .key = {"rda-high-wmark"},
|
||||
.type = GF_OPTION_TYPE_SIZET,
|
||||
.min = 0,
|
||||
.max = 100 * GF_UNIT_MB,
|
||||
.default_value = "131072",
|
||||
.description = "the value over which we unplug",
|
||||
.default_value = "128KB",
|
||||
.description = "the value over which readdir-ahead unplugs",
|
||||
},
|
||||
{ .key = {"rda-cache-limit"},
|
||||
.type = GF_OPTION_TYPE_SIZET,
|
||||
.min = 0,
|
||||
.max = 1 * GF_UNIT_GB,
|
||||
.default_value = "10MB",
|
||||
.description = "maximum size of cache consumed by readdir-ahead "
|
||||
"xlator. This value is global and total memory "
|
||||
"consumption by readdir-ahead is capped by this "
|
||||
"value, irrespective of the number/size of "
|
||||
"directories cached",
|
||||
},
|
||||
{ .key = {NULL} },
|
||||
};
|
||||
|
||||
|
@ -40,9 +40,11 @@ struct rda_local {
|
||||
};
|
||||
|
||||
struct rda_priv {
|
||||
uint32_t rda_req_size;
|
||||
uint64_t rda_req_size;
|
||||
uint64_t rda_low_wmark;
|
||||
uint64_t rda_high_wmark;
|
||||
uint64_t rda_cache_limit;
|
||||
uint64_t rda_cache_size;
|
||||
};
|
||||
|
||||
#endif /* __READDIR_AHEAD_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user