features/shard: Introducing sharding translator

Based on the high-level design by Anand V. Avati which can be found @
https://gist.github.com/avati/af04f1030dcf52e16535#sharding-xlator-stripe-20

Still to-do:
        * complete implementation of inode write fops - [f]truncate,
          zerofill, fallocate, discard
        * introduce transaction mechanism in inode write fops
        * complete readv
        * Handle open with O_TRUNC
        * Handle unlinking of all shards during unlink/rename
        * Compute total ia_size and ia_blocks in lookup, readdirp, etc
        * wind fsync/flush on all shards

        Note: Most of the items above are related. Once we come up
        with a clean way to determine the last shard/shard count for
        a file/file size and the mgmt of sparse regions of the file,
        implementing them becomes trivial.

Change-Id: Id871379b53a4a916e4baa2e06f197dd8c0043b0f
BUG: 1200082
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: http://review.gluster.org/9841
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Krutika Dhananjay 2014-11-21 11:47:23 +05:30 committed by Vijay Bellur
parent 32ed7aa5ad
commit 6f389fbb81
9 changed files with 1976 additions and 2 deletions

View File

@ -160,6 +160,8 @@ AC_CONFIG_FILES([Makefile
xlators/features/snapview-client/src/Makefile
xlators/features/upcall/Makefile
xlators/features/upcall/src/Makefile
xlators/features/shard/Makefile
xlators/features/shard/src/Makefile
xlators/playground/Makefile
xlators/playground/template/Makefile
xlators/playground/template/src/Makefile

View File

@ -1,5 +1,5 @@
SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\
protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \
upcall snapview-client snapview-server trash #path-converter # filter
upcall snapview-client snapview-server trash shard #path-converter # filter
CLEANFILES =

View File

@ -0,0 +1,3 @@
SUBDIRS = src
CLEANFILES =

View File

@ -0,0 +1,16 @@
xlator_LTLIBRARIES = shard.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
shard_la_LDFLAGS = -module -avoid-version
shard_la_SOURCES = shard.c
shard_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = shard.h shard-mem-types.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =

View File

@ -0,0 +1,22 @@
/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifndef __SHARD_MEM_TYPES_H__
#define __SHARD_MEM_TYPES_H__
#include "mem-types.h"
enum gf_shard_mem_types_ {
gf_shard_mt_priv_t = gf_common_mt_end + 1,
gf_shard_mt_inode_list,
gf_shard_mt_inode_ctx_t,
gf_shard_mt_iovec,
gf_shard_mt_end
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,118 @@
/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifndef __SHARD_H__
#define __SHARD_H__
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include "xlator.h"
#include "compat-errno.h"
#define GF_SHARD_DIR ".shard"
#define SHARD_MIN_BLOCK_SIZE (128*GF_UNIT_KB)
#define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
#define SHARD_INODE_LRU_LIMIT 4096
#define get_lowest_block(off, shard_size) (off / shard_size)
#define get_highest_block(off, len, shard_size) ((off+len-1) / shard_size)
#define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do { \
if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) && \
(((loc->parent) && \
__is_root_gfid (loc->parent->gfid)) || \
__is_root_gfid (loc->pargfid))) { \
op_errno = EPERM; \
goto label; \
} \
\
if ((loc->parent && \
__is_shard_dir (loc->parent->gfid)) || \
__is_shard_dir (loc->pargfid)) { \
op_errno = EPERM; \
goto label; \
} \
} while (0)
#define SHARD_INODE_OP_CHECK(gfid, err, label) do { \
if (__is_shard_dir(gfid)) { \
err = EPERM; \
goto label; \
} \
} while (0)
#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \
shard_local_t *__local = NULL; \
if (frame) { \
__local = frame->local; \
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
if (__local) { \
shard_local_wipe (__local); \
mem_put (__local); \
} \
} while (0) \
typedef struct shard_priv {
uint64_t block_size;
uuid_t dot_shard_gfid;
inode_table_t *inode_table;
inode_t *dot_shard_inode;
} shard_priv_t;
typedef struct {
loc_t *loc;
short type;
char *domain;
} shard_lock_t;
typedef struct shard_local {
int op_ret;
int op_errno;
int first_block;
int last_block;
int num_blocks;
int call_count;
int eexist_count;
int xflag;
int count;
uint32_t flags;
uint64_t block_size;
off_t offset;
size_t total_size;
uuid_t shard_gfid;
loc_t loc;
loc_t dot_shard_loc;
fd_t *fd;
dict_t *xattr_req;
inode_t **inode_list;
struct iovec *vector;
struct iobref *iobref;
struct {
int lock_count;
fop_inodelk_cbk_t inodelk_cbk;
shard_lock_t *shard_lock;
} lock;
} shard_local_t;
typedef struct shard_inode_ctx {
uint32_t rdev;
uint64_t block_size; /* The block size with which this inode is
sharded */
mode_t mode;
} shard_inode_ctx_t;
#endif /* __SHARD_H__ */

View File

@ -3353,6 +3353,18 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
if (ret == -1)
goto out;
ret = dict_get_str_boolean (set_dict, "features.shard", _gf_false);
if (ret == -1)
goto out;
if (ret) {
xl = volgen_graph_add (graph, "features/shard", volname);
if (!xl) {
ret = -1;
goto out;
}
}
/* As of now snapshot volume is read-only. Read-only xlator is loaded
* in client graph so that AFR & DHT healing can be done in server.
*/
@ -3388,7 +3400,6 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
goto out;
if (ret) {
xl = volgen_graph_add (graph, "encryption/crypt", volname);
if (!xl) {
ret = -1;
goto out;

View File

@ -1763,6 +1763,17 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.type = NO_DOC,
.op_version = GD_OP_VERSION_3_7_0,
},
{ .key = "features.shard",
.voltype = "features/shard",
.value = "off",
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT
},
{ .key = "features.shard-block-size",
.voltype = "features/shard",
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT
},
{ .key = NULL
}
};