Quota/marker : Support for inode quota

Currently, the only way to retrieve the number of files/objects in a
directory or volume is to do a crawl of the entire directory/volume.
This is expensive and is not scalable.

The new mechanism proposes to store count of objects/files as part of
an extended attribute of a directory. Each directory's extended
attribute value will indicate the number of files/objects present
in a tree with the directory being considered as the root of the tree.

Currently file usage is accounted in marker by doing multiple FOPs
like setting and getting xattrs. Doing this with STACK WIND and
UNWIND can be harder to debug as involves multiple callbacks.
In this code we are replacing current mechanism with syncop approach
as syncop code is much simpler to follow and help us implement inode
quota in an organized way.

Change-Id: Ibf366fbe07037284e89a241ddaff7750fc8771b4
BUG: 1188636
Signed-off-by: vmallika <vmallika@redhat.com>
Signed-off-by: Sachin Pandit <spandit@redhat.com>
Reviewed-on: http://review.gluster.org/9567
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
vmallika 2015-03-17 20:05:19 +05:30 committed by Vijay Bellur
parent 33bb32ce58
commit 7970183f4c
12 changed files with 1793 additions and 274 deletions

View File

@ -2413,23 +2413,29 @@ static int
print_quota_list_output (cli_local_t *local, char *mountdir,
char *default_sl, char *path)
{
int64_t used_space = 0;
int64_t avail = 0;
char *used_str = NULL;
char *avail_str = NULL;
int ret = -1;
char *sl_final = NULL;
char *hl_str = NULL;
double sl_num = 0;
gf_boolean_t sl = _gf_false;
gf_boolean_t hl = _gf_false;
char percent_str[20] = {0};
int64_t avail = 0;
char *used_str = NULL;
char *avail_str = NULL;
int ret = -1;
char *sl_final = NULL;
char *hl_str = NULL;
double sl_num = 0;
gf_boolean_t sl = _gf_false;
gf_boolean_t hl = _gf_false;
char percent_str[20] = {0};
ssize_t xattr_size = 0;
struct quota_limit {
int64_t hl;
int64_t sl;
} __attribute__ ((__packed__)) existing_limits;
struct quota_meta {
int64_t size;
int64_t file_count;
int64_t dir_count;
} __attribute__ ((__packed__)) used_space;
ret = sys_lgetxattr (mountdir, "trusted.glusterfs.quota.limit-set",
(void *)&existing_limits,
sizeof (existing_limits));
@ -2490,10 +2496,26 @@ print_quota_list_output (cli_local_t *local, char *mountdir,
sl_final = percent_str;
}
ret = sys_lgetxattr (mountdir, "trusted.glusterfs.quota.size",
&used_space, sizeof (used_space));
used_space.size = used_space.file_count = used_space.dir_count = 0;
xattr_size = sys_lgetxattr (mountdir, "trusted.glusterfs.quota.size",
NULL, 0);
if (xattr_size > sizeof (int64_t)) {
ret = sys_lgetxattr (mountdir, "trusted.glusterfs.quota.size",
&used_space, sizeof (used_space));
} else if (xattr_size > 0) {
/* This is for compatibility.
* Older version had only file usage
*/
ret = sys_lgetxattr (mountdir, "trusted.glusterfs.quota.size",
&(used_space.size), sizeof (used_space.size));
} else {
ret = -1;
}
if (ret < 0) {
gf_log ("cli", GF_LOG_ERROR, "Failed to get quota size "
"on path %s: %s", mountdir, strerror (errno));
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_quota_xml_output (local, path, hl_str,
sl_final, "N/A",
@ -2510,14 +2532,16 @@ print_quota_list_output (cli_local_t *local, char *mountdir,
"N/A", "N/A", "N/A", "N/A");
}
} else {
used_space = ntoh64 (used_space);
used_space.size = ntoh64 (used_space.size);
used_space.file_count = ntoh64 (used_space.file_count);
used_space.dir_count = ntoh64 (used_space.dir_count);
used_str = gf_uint64_2human_readable (used_space);
used_str = gf_uint64_2human_readable (used_space.size);
if (existing_limits.hl > used_space) {
avail = existing_limits.hl - used_space;
if (existing_limits.hl > used_space.size) {
avail = existing_limits.hl - used_space.size;
hl = _gf_false;
if (used_space > sl_num)
if (used_space.size > sl_num)
sl = _gf_true;
else
sl = _gf_false;
@ -2544,8 +2568,9 @@ print_quota_list_output (cli_local_t *local, char *mountdir,
if (used_str == NULL) {
cli_out ("%-40s %7s %9s %11"PRIu64
"%9"PRIu64" %15s %18s", path, hl_str,
sl_final, used_space, avail, sl? "Yes" : "No",
hl? "Yes" : "No");
sl_final, used_space.size, avail,
sl ? "Yes" : "No",
hl ? "Yes" : "No");
} else {
cli_out ("%-40s %7s %9s %11s %7s %15s %20s", path, hl_str,
sl_final, used_str, avail_str, sl? "Yes" : "No",

View File

@ -128,11 +128,12 @@
#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
#define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count"
#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
#define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime"
#define GF_XATTR_TRIGGER_SYNC "glusterfs.geo-rep.trigger-sync"
#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
/* Index xlator related */
#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
#define GF_XATTROP_INDEX_COUNT "glusterfs.xattrop_index_count"

View File

@ -2537,3 +2537,39 @@ syncop_inodelk (xlator_t *subvol, const char *volume, loc_t *loc, int32_t cmd,
return args.op_ret;
}
int32_t
syncop_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata)
{
struct syncargs *args = NULL;
args = cookie;
args->op_ret = op_ret;
args->op_errno = op_errno;
if (xdata)
args->xdata = dict_ref (xdata);
__wake (args);
return 0;
}
int
syncop_xattrop (xlator_t *subvol, loc_t *loc, gf_xattrop_flags_t flags,
dict_t *dict, dict_t *xdata)
{
struct syncargs args = {0, };
SYNCOP (subvol, (&args), syncop_xattrop_cbk, subvol->fops->xattrop,
loc, flags, dict, xdata);
if (args.op_ret < 0)
return -args.op_errno;
return args.op_ret;
}

View File

@ -439,4 +439,8 @@ syncop_inodelk (xlator_t *subvol, const char *volume, loc_t *loc, int32_t cmd,
int
syncop_ipc (xlator_t *subvol, int op, dict_t *xdata_in, dict_t **xdata_out);
int
syncop_xattrop (xlator_t *subvol, loc_t *loc, gf_xattrop_flags_t flags,
dict_t *dict, dict_t *xdata);
#endif /* _SYNCOP_H */

View File

@ -860,9 +860,51 @@ loc_is_root (loc_t *loc)
} else if (loc && loc->inode && __is_root_gfid (loc->inode->gfid)) {
return _gf_true;
}
return _gf_false;
}
int32_t
loc_build_child (loc_t *child, loc_t *parent, char *name)
{
int32_t ret = -1;
GF_VALIDATE_OR_GOTO ("xlator", child, out);
GF_VALIDATE_OR_GOTO ("xlator", parent, out);
GF_VALIDATE_OR_GOTO ("xlator", name, out);
loc_gfid (parent, child->pargfid);
if (strcmp (parent->path, "/") == 0)
ret = gf_asprintf ((char **)&child->path, "/%s", name);
else
ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
name);
if (ret < 0 || !child->path) {
ret = -1;
goto out;
}
child->name = strrchr (child->path, '/') + 1;
child->parent = inode_ref (parent->inode);
child->inode = inode_new (parent->inode->table);
if (!child->inode) {
ret = -1;
goto out;
}
ret = 0;
out:
if ((ret < 0) && child)
loc_wipe (child);
return ret;
}
int
xlator_destroy (xlator_t *xl)
{

View File

@ -960,6 +960,7 @@ int loc_path (loc_t *loc, const char *bname);
void loc_gfid (loc_t *loc, uuid_t gfid);
char* loc_gfid_utoa (loc_t *loc);
gf_boolean_t loc_is_root (loc_t *loc);
int32_t loc_build_child (loc_t *child, loc_t *parent, char *name);
int xlator_mem_acct_init (xlator_t *xl, int num_types);
int is_gf_log_command (xlator_t *trans, const char *name, char *value);
int glusterd_check_log_level (const char *value);

View File

@ -20,6 +20,8 @@ enum gf_marker_mem_types_ {
gf_marker_mt_quota_inode_ctx_t,
gf_marker_mt_marker_inode_ctx_t,
gf_marker_mt_inode_contribution_t,
gf_marker_mt_quota_meta_t,
gf_marker_mt_quota_synctask_t,
gf_marker_mt_end
};
#endif

View File

@ -37,23 +37,27 @@ mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
if (parent)
loc->parent = inode_ref (parent);
if (!uuid_is_null (inode->gfid))
uuid_copy (loc->gfid, inode->gfid);
loc->path = gf_strdup (path);
if (!loc->path) {
gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
goto loc_wipe;
goto out;
}
loc->name = strrchr (loc->path, '/');
if (loc->name)
loc->name++;
else
goto loc_wipe;
goto out;
ret = 0;
loc_wipe:
out:
if (ret < 0)
loc_wipe (loc);
out:
return ret;
}
@ -222,37 +226,39 @@ mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx,
int32_t
mq_dict_set_contribution (xlator_t *this, dict_t *dict,
loc_t *loc)
mq_dict_set_contribution (xlator_t *this, dict_t *dict, loc_t *loc,
uuid_t gfid, char *contri_key)
{
int32_t ret = -1;
char contri_key [512] = {0, };
int32_t ret = -1;
char key[CONTRI_KEY_MAX] = {0, };
GF_VALIDATE_OR_GOTO ("marker", this, out);
GF_VALIDATE_OR_GOTO ("marker", dict, out);
GF_VALIDATE_OR_GOTO ("marker", loc, out);
if (loc->parent) {
GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret);
if (ret < 0) {
ret = -1;
goto out;
}
if (gfid && !uuid_is_null(gfid)) {
GET_CONTRI_KEY (key, gfid, ret);
} else if (loc->parent) {
GET_CONTRI_KEY (key, loc->parent->gfid, ret);
} else {
/* nameless lookup, fetch contributions to all parents */
GET_CONTRI_KEY (contri_key, NULL, ret);
GET_CONTRI_KEY (key, NULL, ret);
}
ret = dict_set_int64 (dict, contri_key, 0);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"unable to set dict value on %s.",
loc->path);
if (ret < 0)
goto out;
}
ret = 0;
ret = dict_set_int64 (dict, key, 0);
if (ret < 0)
goto out;
if (contri_key)
strncpy (contri_key, key, CONTRI_KEY_MAX);
out:
if (ret < 0)
gf_log_callingfn (this->name, GF_LOG_ERROR, "dict set failed");
return ret;
}

View File

@ -44,7 +44,7 @@ inode_contribution_t *
mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *);
int32_t
mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *);
mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *, uuid_t, char *);
quota_inode_ctx_t *
mq_inode_ctx_new (inode_t *, xlator_t *);

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@
#define QUOTA_XATTR_PREFIX "trusted.glusterfs"
#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty"
#define CONTRIBUTION "contri"
#define CONTRIBUTION "contri"
#define CONTRI_KEY_MAX 512
#define READDIR_BUF 4096
@ -59,21 +59,21 @@
ret = 0; \
} while (0);
#define GET_CONTRI_KEY(var, _gfid, _ret) \
do { \
if (_gfid != NULL) { \
char _gfid_unparsed[40]; \
uuid_unparse (_gfid, _gfid_unparsed); \
_ret = snprintf (var, CONTRI_KEY_MAX, \
QUOTA_XATTR_PREFIX \
#define GET_CONTRI_KEY(var, _gfid, _ret) \
do { \
if (_gfid != NULL) { \
char _gfid_unparsed[40]; \
uuid_unparse (_gfid, _gfid_unparsed); \
_ret = snprintf (var, CONTRI_KEY_MAX, \
QUOTA_XATTR_PREFIX \
".%s.%s." CONTRIBUTION, "quota", \
_gfid_unparsed); \
} else { \
_ret = snprintf (var, CONTRI_KEY_MAX, \
QUOTA_XATTR_PREFIX \
".%s.." CONTRIBUTION, "quota"); \
} \
} while (0);
_gfid_unparsed); \
} else { \
_ret = snprintf (var, CONTRI_KEY_MAX, \
QUOTA_XATTR_PREFIX \
".%s.." CONTRIBUTION, "quota"); \
} \
} while (0)
#define QUOTA_SAFE_INCREMENT(lock, var) \
do { \
@ -84,6 +84,8 @@
struct quota_inode_ctx {
int64_t size;
int64_t file_count;
int64_t dir_count;
int8_t dirty;
gf_boolean_t updation_status;
gf_lock_t lock;
@ -91,9 +93,28 @@ struct quota_inode_ctx {
};
typedef struct quota_inode_ctx quota_inode_ctx_t;
struct quota_meta {
int64_t size;
int64_t file_count;
int64_t dir_count;
};
typedef struct quota_meta quota_meta_t;
struct quota_synctask {
xlator_t *this;
loc_t loc;
dict_t *dict;
struct iatt buf;
int64_t contri;
gf_boolean_t is_static;
};
typedef struct quota_synctask quota_synctask_t;
struct inode_contribution {
struct list_head contri_list;
int64_t contribution;
int64_t file_count;
int64_t dir_count;
uuid_t gfid;
gf_lock_t lock;
};
@ -103,7 +124,7 @@ int32_t
mq_get_lock_on_parent (call_frame_t *, xlator_t *);
int32_t
mq_req_xattr (xlator_t *, loc_t *, dict_t *);
mq_req_xattr (xlator_t *, loc_t *, dict_t *, char *);
int32_t
init_quota_priv (xlator_t *);
@ -117,6 +138,12 @@ mq_set_inode_xattr (xlator_t *, loc_t *);
int
mq_initiate_quota_txn (xlator_t *, loc_t *);
int
mq_initiate_quota_blocking_txn (xlator_t *, loc_t *);
int
mq_create_xattrs_txn (xlator_t *this, loc_t *loc);
int32_t
mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *,
int32_t, int32_t, fd_t *, dict_t *);
@ -124,6 +151,9 @@ mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *,
int32_t
mq_reduce_parent_size (xlator_t *, loc_t *, int64_t);
int32_t
mq_reduce_parent_size_txn (xlator_t *, loc_t *, int64_t);
int32_t
mq_rename_update_newpath (xlator_t *, loc_t *);

View File

@ -607,7 +607,7 @@ marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
mq_set_inode_xattr (this, &local->loc);
mq_create_xattrs_txn (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@ -681,7 +681,7 @@ marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
mq_set_inode_xattr (this, &local->loc);
mq_create_xattrs_txn (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@ -827,7 +827,7 @@ marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
mq_reduce_parent_size (this, &local->loc, -1);
mq_reduce_parent_size_txn (this, &local->loc, -1);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@ -896,7 +896,7 @@ marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (priv->feature_enabled & GF_QUOTA) {
if (!local->skip_txn)
mq_reduce_parent_size (this, &local->loc, -1);
mq_reduce_parent_size_txn (this, &local->loc, -1);
}
if (priv->feature_enabled & GF_XTIME)
@ -977,7 +977,7 @@ marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (priv->feature_enabled & GF_QUOTA) {
if (!local->skip_txn)
mq_set_inode_xattr (this, &local->loc);
mq_create_xattrs_txn (this, &local->loc);
}
@ -1065,10 +1065,11 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
frame->root->unique);
}
mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution);
mq_reduce_parent_size_txn (this, &oplocal->loc, oplocal->contribution);
if (local->loc.inode != NULL) {
mq_reduce_parent_size (this, &local->loc, local->contribution);
mq_reduce_parent_size_txn (this, &local->loc,
local->contribution);
}
newloc.inode = inode_ref (oplocal->loc.inode);
@ -1078,7 +1079,7 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
newloc.name++;
newloc.parent = inode_ref (local->loc.parent);
mq_set_inode_xattr (this, &newloc);
mq_create_xattrs_txn (this, &newloc);
loc_wipe (&newloc);
@ -1181,13 +1182,13 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *prenewparent, struct iatt *postnewparent,
dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
marker_local_t *oplocal = NULL;
call_stub_t *stub = NULL;
int32_t ret = 0;
char contri_key [512] = {0, };
loc_t newloc = {0, };
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
marker_local_t *oplocal = NULL;
call_stub_t *stub = NULL;
int32_t ret = 0;
char contri_key[CONTRI_KEY_MAX] = {0, };
loc_t newloc = {0, };
local = (marker_local_t *) frame->local;
@ -1284,10 +1285,11 @@ int32_t
marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
int32_t ret = 0;
int64_t *contribution = 0;
marker_local_t *local = NULL;
marker_local_t *oplocal = NULL;
char contri_key[CONTRI_KEY_MAX] = {0, };
int32_t ret = 0;
int64_t *contribution = 0;
local = frame->local;
oplocal = local->oplocal;
@ -1336,10 +1338,11 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *dict, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
int32_t ret = 0;
int64_t *contribution = 0;
marker_local_t *local = NULL;
marker_local_t *oplocal = NULL;
char contri_key[CONTRI_KEY_MAX] = {0, };
int32_t ret = 0;
int64_t *contribution = 0;
local = frame->local;
oplocal = local->oplocal;
@ -1403,9 +1406,10 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
int32_t ret = 0;
marker_local_t *local = NULL;
marker_local_t *oplocal = NULL;
char contri_key[CONTRI_KEY_MAX] = {0, };
int32_t ret = 0;
local = frame->local;
oplocal = local->oplocal;
@ -1764,8 +1768,9 @@ marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
mq_set_inode_xattr (this, &local->loc);
if (priv->feature_enabled & GF_QUOTA) {
mq_create_xattrs_txn (this, &local->loc);
}
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@ -1838,7 +1843,7 @@ marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) {
mq_set_inode_xattr (this, &local->loc);
mq_create_xattrs_txn (this, &local->loc);
}
if (priv->feature_enabled & GF_XTIME)
@ -2706,7 +2711,7 @@ marker_lookup (call_frame_t *frame, xlator_t *this,
goto err;
if ((priv->feature_enabled & GF_QUOTA) && xattr_req)
mq_req_xattr (this, loc, xattr_req);
mq_req_xattr (this, loc, xattr_req, NULL);
wind:
STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@ -2854,7 +2859,7 @@ marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
loc.parent = local->loc.inode = inode_ref (fd->inode);
mq_req_xattr (this, &loc, dict);
mq_req_xattr (this, &loc, dict, NULL);
}
STACK_WIND (frame, marker_readdirp_cbk,