features/shard: Introducing sharding translator
Based on the high-level design by Anand V. Avati which can be found @ https://gist.github.com/avati/af04f1030dcf52e16535#sharding-xlator-stripe-20 Still to-do: * complete implementation of inode write fops - [f]truncate, zerofill, fallocate, discard * introduce transaction mechanism in inode write fops * complete readv * Handle open with O_TRUNC * Handle unlinking of all shards during unlink/rename * Compute total ia_size and ia_blocks in lookup, readdirp, etc * wind fsync/flush on all shards Note: Most of the items above are related. Once we come up with a clean way to determine the last shard/shard count for a file/file size and the mgmt of sparse regions of the file, implementing them becomes trivial. Change-Id: Id871379b53a4a916e4baa2e06f197dd8c0043b0f BUG: 1200082 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/9841 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
parent
32ed7aa5ad
commit
6f389fbb81
@ -160,6 +160,8 @@ AC_CONFIG_FILES([Makefile
|
||||
xlators/features/snapview-client/src/Makefile
|
||||
xlators/features/upcall/Makefile
|
||||
xlators/features/upcall/src/Makefile
|
||||
xlators/features/shard/Makefile
|
||||
xlators/features/shard/src/Makefile
|
||||
xlators/playground/Makefile
|
||||
xlators/playground/template/Makefile
|
||||
xlators/playground/template/src/Makefile
|
||||
|
@ -1,5 +1,5 @@
|
||||
SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\
|
||||
protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \
|
||||
upcall snapview-client snapview-server trash #path-converter # filter
|
||||
upcall snapview-client snapview-server trash shard #path-converter # filter
|
||||
|
||||
CLEANFILES =
|
||||
|
3
xlators/features/shard/Makefile.am
Normal file
3
xlators/features/shard/Makefile.am
Normal file
@ -0,0 +1,3 @@
|
||||
SUBDIRS = src
|
||||
|
||||
CLEANFILES =
|
16
xlators/features/shard/src/Makefile.am
Normal file
16
xlators/features/shard/src/Makefile.am
Normal file
@ -0,0 +1,16 @@
|
||||
xlator_LTLIBRARIES = shard.la
|
||||
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
|
||||
|
||||
shard_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
shard_la_SOURCES = shard.c
|
||||
|
||||
shard_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
|
||||
|
||||
noinst_HEADERS = shard.h shard-mem-types.h
|
||||
|
||||
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
|
||||
|
||||
AM_CFLAGS = -Wall $(GF_CFLAGS)
|
||||
|
||||
CLEANFILES =
|
22
xlators/features/shard/src/shard-mem-types.h
Normal file
22
xlators/features/shard/src/shard-mem-types.h
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
||||
This file is part of GlusterFS.
|
||||
|
||||
This file is licensed to you under your choice of the GNU Lesser
|
||||
General Public License, version 3 or any later version (LGPLv3 or
|
||||
later), or the GNU General Public License, version 2 (GPLv2), in all
|
||||
cases as published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef __SHARD_MEM_TYPES_H__
|
||||
#define __SHARD_MEM_TYPES_H__
|
||||
|
||||
#include "mem-types.h"
|
||||
|
||||
enum gf_shard_mem_types_ {
|
||||
gf_shard_mt_priv_t = gf_common_mt_end + 1,
|
||||
gf_shard_mt_inode_list,
|
||||
gf_shard_mt_inode_ctx_t,
|
||||
gf_shard_mt_iovec,
|
||||
gf_shard_mt_end
|
||||
};
|
||||
#endif
|
1791
xlators/features/shard/src/shard.c
Normal file
1791
xlators/features/shard/src/shard.c
Normal file
File diff suppressed because it is too large
Load Diff
118
xlators/features/shard/src/shard.h
Normal file
118
xlators/features/shard/src/shard.h
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
||||
This file is part of GlusterFS.
|
||||
|
||||
This file is licensed to you under your choice of the GNU Lesser
|
||||
General Public License, version 3 or any later version (LGPLv3 or
|
||||
later), or the GNU General Public License, version 2 (GPLv2), in all
|
||||
cases as published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __SHARD_H__
|
||||
#define __SHARD_H__
|
||||
|
||||
#ifndef _CONFIG_H
|
||||
#define _CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "xlator.h"
|
||||
#include "compat-errno.h"
|
||||
|
||||
#define GF_SHARD_DIR ".shard"
|
||||
#define SHARD_MIN_BLOCK_SIZE (128*GF_UNIT_KB)
|
||||
#define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
|
||||
#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
|
||||
#define SHARD_INODE_LRU_LIMIT 4096
|
||||
|
||||
#define get_lowest_block(off, shard_size) (off / shard_size)
|
||||
#define get_highest_block(off, len, shard_size) ((off+len-1) / shard_size)
|
||||
|
||||
#define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do { \
|
||||
if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) && \
|
||||
(((loc->parent) && \
|
||||
__is_root_gfid (loc->parent->gfid)) || \
|
||||
__is_root_gfid (loc->pargfid))) { \
|
||||
op_errno = EPERM; \
|
||||
goto label; \
|
||||
} \
|
||||
\
|
||||
if ((loc->parent && \
|
||||
__is_shard_dir (loc->parent->gfid)) || \
|
||||
__is_shard_dir (loc->pargfid)) { \
|
||||
op_errno = EPERM; \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SHARD_INODE_OP_CHECK(gfid, err, label) do { \
|
||||
if (__is_shard_dir(gfid)) { \
|
||||
err = EPERM; \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \
|
||||
shard_local_t *__local = NULL; \
|
||||
if (frame) { \
|
||||
__local = frame->local; \
|
||||
frame->local = NULL; \
|
||||
} \
|
||||
STACK_UNWIND_STRICT (fop, frame, params); \
|
||||
if (__local) { \
|
||||
shard_local_wipe (__local); \
|
||||
mem_put (__local); \
|
||||
} \
|
||||
} while (0) \
|
||||
|
||||
typedef struct shard_priv {
|
||||
uint64_t block_size;
|
||||
uuid_t dot_shard_gfid;
|
||||
inode_table_t *inode_table;
|
||||
inode_t *dot_shard_inode;
|
||||
} shard_priv_t;
|
||||
|
||||
typedef struct {
|
||||
loc_t *loc;
|
||||
short type;
|
||||
char *domain;
|
||||
} shard_lock_t;
|
||||
|
||||
typedef struct shard_local {
|
||||
int op_ret;
|
||||
int op_errno;
|
||||
int first_block;
|
||||
int last_block;
|
||||
int num_blocks;
|
||||
int call_count;
|
||||
int eexist_count;
|
||||
int xflag;
|
||||
int count;
|
||||
uint32_t flags;
|
||||
uint64_t block_size;
|
||||
off_t offset;
|
||||
size_t total_size;
|
||||
uuid_t shard_gfid;
|
||||
loc_t loc;
|
||||
loc_t dot_shard_loc;
|
||||
fd_t *fd;
|
||||
dict_t *xattr_req;
|
||||
inode_t **inode_list;
|
||||
struct iovec *vector;
|
||||
struct iobref *iobref;
|
||||
struct {
|
||||
int lock_count;
|
||||
fop_inodelk_cbk_t inodelk_cbk;
|
||||
shard_lock_t *shard_lock;
|
||||
} lock;
|
||||
} shard_local_t;
|
||||
|
||||
typedef struct shard_inode_ctx {
|
||||
uint32_t rdev;
|
||||
uint64_t block_size; /* The block size with which this inode is
|
||||
sharded */
|
||||
mode_t mode;
|
||||
} shard_inode_ctx_t;
|
||||
|
||||
#endif /* __SHARD_H__ */
|
@ -3353,6 +3353,18 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
|
||||
if (ret == -1)
|
||||
goto out;
|
||||
|
||||
ret = dict_get_str_boolean (set_dict, "features.shard", _gf_false);
|
||||
if (ret == -1)
|
||||
goto out;
|
||||
|
||||
if (ret) {
|
||||
xl = volgen_graph_add (graph, "features/shard", volname);
|
||||
if (!xl) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* As of now snapshot volume is read-only. Read-only xlator is loaded
|
||||
* in client graph so that AFR & DHT healing can be done in server.
|
||||
*/
|
||||
@ -3388,7 +3400,6 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
|
||||
goto out;
|
||||
if (ret) {
|
||||
xl = volgen_graph_add (graph, "encryption/crypt", volname);
|
||||
|
||||
if (!xl) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
|
@ -1763,6 +1763,17 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
||||
.type = NO_DOC,
|
||||
.op_version = GD_OP_VERSION_3_7_0,
|
||||
},
|
||||
{ .key = "features.shard",
|
||||
.voltype = "features/shard",
|
||||
.value = "off",
|
||||
.op_version = GD_OP_VERSION_3_7_0,
|
||||
.flags = OPT_FLAG_CLIENT_OPT
|
||||
},
|
||||
{ .key = "features.shard-block-size",
|
||||
.voltype = "features/shard",
|
||||
.op_version = GD_OP_VERSION_3_7_0,
|
||||
.flags = OPT_FLAG_CLIENT_OPT
|
||||
},
|
||||
{ .key = NULL
|
||||
}
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user