cluster/ec: Added erasure code translator

Change-Id: I293917501d5c2ca4cdc6303df30cf0b568cea361
BUG: 1118629
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/7749
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Xavier Hernandez 2014-05-05 12:57:34 +02:00 committed by Vijay Bellur
parent 6b4702897b
commit ad112305a1
27 changed files with 26034 additions and 1 deletions

View File

@ -72,6 +72,8 @@ AC_CONFIG_FILES([Makefile
xlators/cluster/stripe/src/Makefile
xlators/cluster/dht/Makefile
xlators/cluster/dht/src/Makefile
xlators/cluster/ec/Makefile
xlators/cluster/ec/src/Makefile
xlators/performance/Makefile
xlators/performance/write-behind/Makefile
xlators/performance/write-behind/src/Makefile

View File

@ -1,3 +1,3 @@
SUBDIRS = stripe afr dht
SUBDIRS = stripe afr dht ec
CLEANFILES =

View File

@ -0,0 +1,3 @@
SUBDIRS = src
CLEANFILES =

View File

@ -0,0 +1,49 @@
xlator_LTLIBRARIES = ec.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
ec_sources := ec.c
ec_sources += ec-data.c
ec_sources += ec-helpers.c
ec_sources += ec-common.c
ec_sources += ec-generic.c
ec_sources += ec-locks.c
ec_sources += ec-dir-read.c
ec_sources += ec-dir-write.c
ec_sources += ec-inode-read.c
ec_sources += ec-inode-write.c
ec_sources += ec-combine.c
ec_sources += ec-gf.c
ec_sources += ec-method.c
ec_sources += ec-heal.c
ec_headers := ec.h
ec_headers += ec-mem-types.h
ec_headers += ec-helpers.h
ec_headers += ec-data.h
ec_headers += ec-fops.h
ec_headers += ec-common.h
ec_headers += ec-combine.h
ec_headers += ec-gf.h
ec_headers += ec-method.h
ec_ext_sources = $(top_builddir)/xlators/lib/src/libxlator.c
ec_ext_headers = $(top_builddir)/xlators/lib/src/libxlator.h
ec_la_LDFLAGS = -module -avoid-version
ec_la_SOURCES = $(ec_sources) $(ec_headers) $(ec_ext_sources) $(ec_ext_headers)
ec_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
AM_CPPFLAGS = $(GF_CPPFLAGS)
AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
install-data-hook:
ln -sf ec.so $(DESTDIR)$(xlatordir)/disperse.so
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/disperse.so

View File

@ -0,0 +1,787 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#include <fnmatch.h>
#include "libxlator.h"
#include "ec-data.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
struct _ec_dict_info;
typedef struct _ec_dict_info ec_dict_info_t;
struct _ec_dict_combine;
typedef struct _ec_dict_combine ec_dict_combine_t;
struct _ec_dict_info
{
dict_t * dict;
int32_t count;
};
struct _ec_dict_combine
{
ec_cbk_data_t * cbk;
int32_t which;
};
void ec_iatt_time_merge(uint32_t * dst_sec, uint32_t * dst_nsec,
uint32_t src_sec, uint32_t src_nsec)
{
if ((*dst_sec < src_sec) ||
((*dst_sec == src_sec) && (*dst_nsec < src_nsec)))
{
*dst_sec = src_sec;
*dst_nsec = src_nsec;
}
}
int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count)
{
int32_t i;
for (i = 0; i < count; i++)
{
if ((dst->ia_ino != src->ia_ino) ||
(dst->ia_uid != src->ia_uid) ||
(dst->ia_gid != src->ia_gid) ||
(((dst->ia_type == IA_IFBLK) || (dst->ia_type == IA_IFCHR)) &&
(dst->ia_rdev != src->ia_rdev)) ||
((dst->ia_type == IA_IFREG) && (dst->ia_size != src->ia_size)) ||
(st_mode_from_ia(dst->ia_prot, dst->ia_type) !=
st_mode_from_ia(src->ia_prot, src->ia_type)) ||
(uuid_compare(dst->ia_gfid, src->ia_gfid) != 0))
{
gf_log(THIS->name, GF_LOG_WARNING,
"Failed to combine iatt (inode: %lu-%lu, links: %u-%u, "
"uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, "
"mode: %o-%o)",
dst->ia_ino, src->ia_ino, dst->ia_nlink, src->ia_nlink,
dst->ia_uid, src->ia_uid, dst->ia_gid, src->ia_gid,
dst->ia_rdev, src->ia_rdev, dst->ia_size, src->ia_size,
st_mode_from_ia(dst->ia_prot, dst->ia_type),
st_mode_from_ia(src->ia_prot, dst->ia_type));
return 0;
}
}
while (count-- > 0)
{
dst->ia_blocks += src->ia_blocks;
if (dst->ia_blksize < src->ia_blksize)
{
dst->ia_blksize = src->ia_blksize;
}
ec_iatt_time_merge(&dst->ia_atime, &dst->ia_atime_nsec, src->ia_atime,
src->ia_atime_nsec);
ec_iatt_time_merge(&dst->ia_mtime, &dst->ia_mtime_nsec, src->ia_mtime,
src->ia_mtime_nsec);
ec_iatt_time_merge(&dst->ia_ctime, &dst->ia_ctime_nsec, src->ia_ctime,
src->ia_ctime_nsec);
}
return 1;
}
void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
int32_t answers)
{
size_t blocks;
while (count-- > 0)
{
blocks = iatt[count].ia_blocks * ec->fragments + answers - 1;
blocks /= answers;
iatt[count].ia_blocks = blocks;
}
}
int32_t ec_dict_data_compare(dict_t * dict, char * key, data_t * value,
void * arg)
{
ec_dict_info_t * info = arg;
data_t * data;
data = dict_get(info->dict, key);
if (data == NULL)
{
gf_log("ec", GF_LOG_DEBUG, "key '%s' found only on one dict", key);
return -1;
}
info->count--;
if ((strcmp(key, GF_CONTENT_KEY) == 0) ||
(strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_CLRLK_CMD) == 0) ||
(strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) ||
(fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) ||
(XATTR_IS_NODE_UUID(key)))
{
return 0;
}
if ((data->len != value->len) ||
(memcmp(data->data, value->data, data->len) != 0))
{
gf_log("ec", GF_LOG_DEBUG, "key '%s' is different (size: %u, %u)",
key, data->len, value->len);
return -1;
}
return 0;
}
int32_t ec_dict_data_show(dict_t * dict, char * key, data_t * value,
void * arg)
{
if (dict_get(arg, key) == NULL)
{
gf_log("ec", GF_LOG_DEBUG, "key '%s' found only on one dict", key);
}
return 0;
}
int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2)
{
ec_dict_info_t info;
dict_t * dict;
if (dict1 != NULL)
{
info.dict = dict1;
info.count = dict1->count;
dict = dict2;
}
else if (dict2 != NULL)
{
info.dict = dict2;
info.count = dict2->count;
dict = dict1;
}
else
{
return 1;
}
if (dict != NULL)
{
if (dict_foreach(dict, ec_dict_data_compare, &info) != 0)
{
return 0;
}
}
if (info.count != 0)
{
dict_foreach(info.dict, ec_dict_data_show, dict);
}
return (info.count == 0);
}
int32_t ec_dict_list(data_t ** list, int32_t * count, ec_cbk_data_t * cbk,
int32_t which, char * key)
{
ec_cbk_data_t * ans;
dict_t * dict;
int32_t i, max;
max = *count;
i = 0;
for (ans = cbk; ans != NULL; ans = ans->next)
{
if (i >= max)
{
gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Unexpected number of "
"dictionaries");
return 0;
}
dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict;
list[i] = dict_get(dict, key);
if (list[i] == NULL)
{
gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Unexpected missing "
"dictionary entry");
return 0;
}
i++;
}
*count = i;
return 1;
}
char * ec_concat_prepare(xlator_t * xl, char ** sep, char ** post,
const char * fmt, va_list args)
{
char * str, * tmp;
int32_t len;
len = gf_vasprintf(&str, fmt, args);
if (len < 0)
{
return NULL;
}
tmp = strchr(str, '{');
if (tmp == NULL)
{
goto out;
}
*tmp++ = 0;
*sep = tmp;
tmp = strchr(tmp, '}');
if (tmp == NULL)
{
goto out;
}
*tmp++ = 0;
*post = tmp;
return str;
out:
gf_log(xl->name, GF_LOG_ERROR, "Invalid concat format");
GF_FREE(str);
return NULL;
}
int32_t ec_dict_data_concat(const char * fmt, ec_cbk_data_t * cbk,
int32_t which, char * key, ...)
{
data_t * data[cbk->count];
size_t len, tmp;
char * str = NULL, * pre = NULL, * sep, * post;
dict_t * dict;
va_list args;
int32_t i, num, prelen, postlen, seplen;
int32_t ret = -1;
num = cbk->count;
if (!ec_dict_list(data, &num, cbk, which, key))
{
return -1;
}
va_start(args, key);
pre = ec_concat_prepare(cbk->fop->xl, &sep, &post, fmt, args);
va_end(args);
if (pre == NULL)
{
return -1;
}
prelen = strlen(pre);
seplen = strlen(sep);
postlen = strlen(post);
len = prelen + (num - 1) * seplen + postlen + 1;
for (i = 0; i < num; i++)
{
len += data[i]->len - 1;
}
str = GF_MALLOC(len, gf_common_mt_char);
if (str == NULL)
{
goto out;
}
memcpy(str, pre, prelen);
len = prelen;
for (i = 0; i < num; i++)
{
memcpy(str + len, sep, seplen);
len += seplen;
tmp = data[i]->len - 1;
memcpy(str + len, data[i]->data, tmp);
len += tmp;
}
memcpy(str + len, post, postlen + 1);
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
if (dict_set_dynstr(dict, key, str) != 0)
{
goto out;
}
str = NULL;
ret = 0;
out:
GF_FREE(str);
GF_FREE(pre);
return ret;
}
int32_t ec_dict_data_merge(ec_cbk_data_t * cbk, int32_t which, char * key)
{
data_t * data[cbk->count];
dict_t * dict, * lockinfo, * tmp;
char * ptr = NULL;
int32_t i, num, len;
int32_t ret = -1;
num = cbk->count;
if (!ec_dict_list(data, &num, cbk, which, key))
{
return -1;
}
if (dict_unserialize(data[0]->data, data[0]->len, &lockinfo) != 0)
{
return -1;
}
for (i = 1; i < num; i++)
{
if (dict_unserialize(data[i]->data, data[i]->len, &tmp) != 0)
{
goto out;
}
if (dict_copy(tmp, lockinfo) == NULL)
{
dict_unref(tmp);
goto out;
}
dict_unref(tmp);
}
len = dict_serialized_length(lockinfo);
if (len < 0)
{
goto out;
}
ptr = GF_MALLOC(len, gf_common_mt_char);
if (ptr == NULL)
{
goto out;
}
if (dict_serialize(lockinfo, ptr) != 0)
{
goto out;
}
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
if (dict_set_dynptr(dict, key, ptr, len) != 0)
{
goto out;
}
ptr = NULL;
ret = 0;
out:
GF_FREE(ptr);
dict_unref(lockinfo);
return ret;
}
int32_t ec_dict_data_uuid(ec_cbk_data_t * cbk, int32_t which, char * key)
{
ec_cbk_data_t * ans, * min;
dict_t * src, * dst;
data_t * data;
min = cbk;
for (ans = cbk->next; ans != NULL; ans = ans->next)
{
if (ans->idx < min->idx)
{
min = ans;
}
}
if (min != cbk)
{
src = (which == EC_COMBINE_XDATA) ? min->xdata : min->dict;
dst = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
data = dict_get(src, key);
if (data == NULL)
{
return -1;
}
if (dict_set(dst, key, data) != 0)
{
return -1;
}
}
return 0;
}
int32_t ec_dict_data_max(ec_cbk_data_t * cbk, int32_t which, char * key)
{
data_t * data[cbk->count];
dict_t * dict;
int32_t i, num;
uint32_t max, tmp;
num = cbk->count;
if (!ec_dict_list(data, &num, cbk, which, key))
{
return -1;
}
if (num <= 1)
{
return 0;
}
max = data_to_uint32(data[0]);
for (i = 1; i < num; i++)
{
tmp = data_to_uint32(data[i]);
if (max < tmp)
{
max = tmp;
}
}
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
if (dict_set_uint32(dict, key, max) != 0)
{
return -1;
}
return 0;
}
int32_t ec_dict_data_stime(ec_cbk_data_t * cbk, int32_t which, char * key)
{
data_t * data[cbk->count];
dict_t * dict;
int32_t i, num;
num = cbk->count;
if (!ec_dict_list(data, &num, cbk, which, key))
{
return -1;
}
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
for (i = 1; i < num; i++)
{
if (gf_get_max_stime(cbk->fop->xl, dict, key, data[i]) != 0)
{
gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "STIME combination "
"failed");
return -1;
}
}
return 0;
}
int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value,
void * arg)
{
ec_dict_combine_t * data = arg;
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0))
{
return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which,
key, data->cbk->fop->xl->name);
}
if (strncmp(key, GF_XATTR_CLRLK_CMD, strlen(GF_XATTR_CLRLK_CMD)) == 0)
{
return ec_dict_data_concat("{\n}", data->cbk, data->which, key);
}
if (strncmp(key, GF_XATTR_LOCKINFO_KEY,
strlen(GF_XATTR_LOCKINFO_KEY)) == 0)
{
return ec_dict_data_merge(data->cbk, data->which, key);
}
if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)
{
return ec_dict_data_max(data->cbk, data->which, key);
}
if (XATTR_IS_NODE_UUID(key))
{
return ec_dict_data_uuid(data->cbk, data->which, key);
}
if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
{
return ec_dict_data_stime(data->cbk, data->which, key);
}
return 0;
}
int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which)
{
dict_t * dict;
ec_dict_combine_t data;
data.cbk = cbk;
data.which = which;
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
if ((dict != NULL) &&
(dict_foreach(dict, ec_dict_data_combine, &data) != 0))
{
gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Dictionary combination "
"failed");
return 0;
}
return 1;
}
int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
struct iovec * src_vector, int32_t src_count)
{
size_t dst_size = 0, src_size = 0;
if (dst_count > 0)
{
dst_size = iov_length(dst_vector, dst_count);
}
if (src_count > 0)
{
src_size = iov_length(src_vector, src_count);
}
return (dst_size == src_size);
}
int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src)
{
if ((dst->l_type != src->l_type) ||
(dst->l_whence != src->l_whence) ||
(dst->l_start != src->l_start) ||
(dst->l_len != src->l_len) ||
(dst->l_pid != src->l_pid) ||
!is_same_lkowner(&dst->l_owner, &src->l_owner))
{
return 0;
}
return 1;
}
void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src)
{
if (dst->f_bsize < src->f_bsize)
{
dst->f_bsize = src->f_bsize;
}
if (dst->f_frsize < src->f_frsize)
{
dst->f_blocks *= dst->f_frsize;
dst->f_blocks /= src->f_frsize;
dst->f_bfree *= dst->f_frsize;
dst->f_bfree /= src->f_frsize;
dst->f_bavail *= dst->f_frsize;
dst->f_bavail /= src->f_frsize;
dst->f_frsize = src->f_frsize;
}
else if (dst->f_frsize > src->f_frsize)
{
src->f_blocks *= src->f_frsize;
src->f_blocks /= dst->f_frsize;
src->f_bfree *= src->f_frsize;
src->f_bfree /= dst->f_frsize;
src->f_bavail *= src->f_frsize;
src->f_bavail /= dst->f_frsize;
}
if (dst->f_blocks > src->f_blocks)
{
dst->f_blocks = src->f_blocks;
}
if (dst->f_bfree > src->f_bfree)
{
dst->f_bfree = src->f_bfree;
}
if (dst->f_bavail > src->f_bavail)
{
dst->f_bavail = src->f_bavail;
}
if (dst->f_files < src->f_files)
{
dst->f_files = src->f_files;
}
if (dst->f_ffree > src->f_ffree)
{
dst->f_ffree = src->f_ffree;
}
if (dst->f_favail > src->f_favail)
{
dst->f_favail = src->f_favail;
}
if (dst->f_namemax > src->f_namemax)
{
dst->f_namemax = src->f_namemax;
}
if (dst->f_flag != src->f_flag)
{
gf_log(THIS->name, GF_LOG_DEBUG, "Mismatching file system flags "
"(%lX, %lX)",
dst->f_flag, src->f_flag);
}
dst->f_flag &= src->f_flag;
}
int32_t ec_combine_check(ec_cbk_data_t * dst, ec_cbk_data_t * src,
ec_combine_f combine)
{
ec_fop_data_t * fop = dst->fop;
if (dst->op_ret != src->op_ret)
{
gf_log(fop->xl->name, GF_LOG_DEBUG, "Mismatching return code in "
"answers of '%s': %d <-> %d",
ec_fop_name(fop->id), dst->op_ret, src->op_ret);
return 0;
}
if (dst->op_ret < 0)
{
if (dst->op_errno != src->op_errno)
{
gf_log(fop->xl->name, GF_LOG_DEBUG, "Mismatching errno code in "
"answers of '%s': %d <-> %d",
ec_fop_name(fop->id), dst->op_errno, src->op_errno);
return 0;
}
}
if (!ec_dict_compare(dst->xdata, src->xdata))
{
gf_log(fop->xl->name, GF_LOG_WARNING, "Mismatching xdata in answers "
"of '%s'",
ec_fop_name(fop->id));
return 0;
}
if ((dst->op_ret >= 0) && (combine != NULL))
{
return combine(fop, dst, src);
}
return 1;
}
void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine)
{
ec_fop_data_t * fop = cbk->fop;
ec_cbk_data_t * ans = NULL, * tmp = NULL;
struct list_head * item = NULL;
int32_t needed = 0, report = 0;
char str[32];
LOCK(&fop->lock);
item = fop->cbk_list.prev;
list_for_each_entry(ans, &fop->cbk_list, list)
{
if (ec_combine_check(cbk, ans, combine))
{
cbk->count += ans->count;
cbk->mask |= ans->mask;
item = ans->list.prev;
while (item != &fop->cbk_list)
{
tmp = list_entry(item, ec_cbk_data_t, list);
if (tmp->count >= cbk->count)
{
break;
}
item = item->prev;
}
list_del(&ans->list);
cbk->next = ans;
break;
}
}
list_add(&cbk->list, item);
ec_trace("ANSWER", fop, "combine=%s[%d]",
ec_bin(str, sizeof(str), cbk->mask, 0), cbk->count);
if ((cbk->count == fop->expected) && (fop->answer == NULL))
{
fop->answer = cbk;
ec_update_bad(fop, cbk->mask);
report = 1;
}
ans = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
needed = fop->minimum - ans->count - fop->winds + 1;
UNLOCK(&fop->lock);
if (needed > 0)
{
ec_dispatch_next(fop, cbk->idx);
}
else if (report)
{
ec_report(fop, 0);
}
}

View File

@ -0,0 +1,44 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_COMBINE_H__
#define __EC_COMBINE_H__
#define EC_COMBINE_DICT 0
#define EC_COMBINE_XDATA 1
typedef int32_t (* ec_combine_f)(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src);
void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
int32_t answers);
int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count);
int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2);
int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
struct iovec * src_vector, int32_t src_count);
int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src);
void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src);
int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which);
void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine);
#endif /* __EC_COMBINE_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,105 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_COMMON_H__
#define __EC_COMMON_H__
#include "xlator.h"
#include "ec-data.h"
#define EC_FLAG_UPDATE_LOC_PARENT 0x0001
#define EC_FLAG_UPDATE_LOC_INODE 0x0002
#define EC_FLAG_UPDATE_FD 0x0004
#define EC_FLAG_UPDATE_FD_INODE 0x0008
#define EC_FLAG_WAITING_WINDS 0x0010
#define EC_MINIMUM_ONE -1
#define EC_MINIMUM_MIN -2
#define EC_MINIMUM_ALL -3
#define EC_LOCK_ENTRY 0
#define EC_LOCK_INODE 1
#define EC_STATE_START 0
#define EC_STATE_END 0
#define EC_STATE_INIT 1
#define EC_STATE_LOCK 2
#define EC_STATE_GET_SIZE_AND_VERSION 3
#define EC_STATE_DISPATCH 4
#define EC_STATE_PREPARE_ANSWER 5
#define EC_STATE_REPORT 6
#define EC_STATE_UPDATE_SIZE_AND_VERSION 7
#define EC_STATE_UNLOCK 8
#define EC_STATE_WRITE_START 100
#define EC_STATE_HEAL_ENTRY_LOOKUP 200
#define EC_STATE_HEAL_ENTRY_PREPARE 201
#define EC_STATE_HEAL_PRE_INODELK_LOCK 202
#define EC_STATE_HEAL_PRE_INODE_LOOKUP 203
#define EC_STATE_HEAL_XATTRIBUTES_REMOVE 204
#define EC_STATE_HEAL_XATTRIBUTES_SET 205
#define EC_STATE_HEAL_ATTRIBUTES 206
#define EC_STATE_HEAL_OPEN 207
#define EC_STATE_HEAL_REOPEN_FD 208
#define EC_STATE_HEAL_UNLOCK 209
#define EC_STATE_HEAL_DATA_LOCK 210
#define EC_STATE_HEAL_DATA_COPY 211
#define EC_STATE_HEAL_DATA_UNLOCK 212
#define EC_STATE_HEAL_POST_INODELK_LOCK 213
#define EC_STATE_HEAL_POST_INODE_LOOKUP 214
#define EC_STATE_HEAL_SETATTR 215
#define EC_STATE_HEAL_POST_INODELK_UNLOCK 216
#define EC_STATE_HEAL_DISPATCH 217
int32_t ec_dispatch_one_retry(ec_fop_data_t * fop, int32_t idx, int32_t op_ret,
int32_t op_errno);
int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx);
void ec_complete(ec_fop_data_t * fop);
void ec_update_bad(ec_fop_data_t * fop, uintptr_t good);
void ec_fop_set_error(ec_fop_data_t * fop, int32_t error);
void ec_lock_inode(ec_fop_data_t * fop, loc_t * loc);
void ec_lock_entry(ec_fop_data_t * fop, loc_t * loc);
void ec_lock_fd(ec_fop_data_t * fop, fd_t * fd);
void ec_unlock(ec_fop_data_t * fop);
void ec_get_size_version(ec_fop_data_t * fop);
void ec_update_size_version(ec_fop_data_t * fop);
void ec_dispatch_all(ec_fop_data_t * fop);
void ec_dispatch_inc(ec_fop_data_t * fop);
void ec_dispatch_min(ec_fop_data_t * fop);
void ec_dispatch_one(ec_fop_data_t * fop);
void ec_wait_winds(ec_fop_data_t * fop);
void ec_resume_parent(ec_fop_data_t * fop, int32_t error);
void ec_report(ec_fop_data_t * fop, int32_t error);
void ec_manager(ec_fop_data_t * fop, int32_t error);
#endif /* __EC_COMMON_H__ */

View File

@ -0,0 +1,261 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#include "ec-mem-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-data.h"
ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
ec_fop_data_t * fop, int32_t id,
int32_t idx, int32_t op_ret,
int32_t op_errno)
{
ec_cbk_data_t * cbk;
ec_t * ec = this->private;
if (fop->xl != this)
{
gf_log(this->name, GF_LOG_ERROR, "Mismatching xlators between request "
"and answer (req=%s, ans=%s).",
fop->xl->name, this->name);
return NULL;
}
if (fop->frame != frame)
{
gf_log(this->name, GF_LOG_ERROR, "Mismatching frames between request "
"and answer (req=%p, ans=%p).",
fop->frame, frame);
return NULL;
}
if (fop->id != id)
{
gf_log(this->name, GF_LOG_ERROR, "Mismatching fops between request "
"and answer (req=%d, ans=%d).",
fop->id, id);
return NULL;
}
cbk = mem_get0(ec->cbk_pool);
if (cbk == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to allocate memory for an "
"answer.");
}
cbk->fop = fop;
cbk->idx = idx;
cbk->mask = 1ULL << idx;
cbk->count = 1;
cbk->op_ret = op_ret;
cbk->op_errno = op_errno;
LOCK(&fop->lock);
list_add_tail(&cbk->answer_list, &fop->answer_list);
UNLOCK(&fop->lock);
return cbk;
}
void ec_cbk_data_destroy(ec_cbk_data_t * cbk)
{
if (cbk->xdata != NULL)
{
dict_unref(cbk->xdata);
}
if (cbk->dict != NULL)
{
dict_unref(cbk->dict);
}
if (cbk->inode != NULL)
{
inode_unref(cbk->inode);
}
if (cbk->fd != NULL)
{
fd_unref(cbk->fd);
}
if (cbk->buffers != NULL)
{
iobref_unref(cbk->buffers);
}
GF_FREE(cbk->vector);
mem_put(cbk);
}
ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
int32_t id, uint32_t flags,
uintptr_t target, int32_t minimum,
ec_wind_f wind, ec_handler_f handler,
ec_cbk_t cbks, void * data)
{
ec_fop_data_t * fop, * parent;
ec_t * ec = this->private;
fop = mem_get0(ec->fop_pool);
if (fop == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to allocate memory for a "
"request.");
return NULL;
}
fop->xl = this;
fop->req_frame = frame;
/* fops need a private frame to be able to execute some postop operations
* even if the original fop has completed and reported back to the upper
* xlator and it has destroyed the base frame.
*
* TODO: minimize usage of private frames. Reuse req_frame as much as
* possible.
*/
if (frame != NULL)
{
fop->frame = copy_frame(frame);
}
else
{
fop->frame = create_frame(this, this->ctx->pool);
}
if (fop->frame == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to create a private frame "
"for a request");
mem_put(fop);
return NULL;
}
fop->id = id;
fop->refs = 1;
fop->flags = flags;
fop->minimum = minimum;
fop->mask = target;
INIT_LIST_HEAD(&fop->lock_list);
INIT_LIST_HEAD(&fop->cbk_list);
INIT_LIST_HEAD(&fop->answer_list);
fop->wind = wind;
fop->handler = handler;
fop->cbks = cbks;
fop->data = data;
LOCK_INIT(&fop->lock);
fop->frame->local = fop;
if (frame != NULL)
{
parent = frame->local;
if (parent != NULL)
{
LOCK(&parent->lock);
parent->jobs++;
parent->refs++;
UNLOCK(&parent->lock);
}
fop->parent = parent;
}
return fop;
}
void ec_fop_data_acquire(ec_fop_data_t * fop)
{
LOCK(&fop->lock);
ec_trace("ACQUIRE", fop, "");
fop->refs++;
UNLOCK(&fop->lock);
}
void ec_fop_data_release(ec_fop_data_t * fop)
{
ec_cbk_data_t * cbk, * tmp;
int32_t refs;
LOCK(&fop->lock);
ec_trace("RELEASE", fop, "");
refs = --fop->refs;
UNLOCK(&fop->lock);
if (refs == 0)
{
fop->frame->local = NULL;
STACK_DESTROY(fop->frame->root);
LOCK_DESTROY(&fop->lock);
if (fop->xdata != NULL)
{
dict_unref(fop->xdata);
}
if (fop->dict != NULL)
{
dict_unref(fop->dict);
}
if (fop->inode != NULL)
{
inode_unref(fop->inode);
}
if (fop->fd != NULL)
{
fd_unref(fop->fd);
}
if (fop->buffers != NULL)
{
iobref_unref(fop->buffers);
}
GF_FREE(fop->vector);
GF_FREE(fop->str[0]);
GF_FREE(fop->str[1]);
loc_wipe(&fop->loc[0]);
loc_wipe(&fop->loc[1]);
ec_resume_parent(fop, fop->error);
list_for_each_entry_safe(cbk, tmp, &fop->answer_list, answer_list)
{
list_del_init(&cbk->answer_list);
ec_cbk_data_destroy(cbk);
}
mem_put(fop);
}
}

View File

@ -0,0 +1,260 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_DATA_H__
#define __EC_DATA_H__
#include "xlator.h"
#include "ec.h"
struct _ec_fd;
typedef struct _ec_fd ec_fd_t;
struct _ec_inode;
typedef struct _ec_inode ec_inode_t;
union _ec_cbk;
typedef union _ec_cbk ec_cbk_t;
struct _ec_lock;
typedef struct _ec_lock ec_lock_t;
struct _ec_fop_data;
typedef struct _ec_fop_data ec_fop_data_t;
struct _ec_cbk_data;
typedef struct _ec_cbk_data ec_cbk_data_t;
struct _ec_heal;
typedef struct _ec_heal ec_heal_t;
typedef void (* ec_wind_f)(ec_t *, ec_fop_data_t *, int32_t);
typedef int32_t (* ec_handler_f)(ec_fop_data_t *, int32_t);
typedef void (* ec_resume_f)(ec_fop_data_t *, int32_t);
struct _ec_fd
{
uintptr_t bad;
loc_t loc;
uintptr_t open;
int32_t flags;
};
struct _ec_inode
{
uintptr_t bad;
ec_heal_t * heal;
};
typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
int32_t, int32_t, uintptr_t, uintptr_t,
uintptr_t, dict_t *);
typedef int32_t (* fop_fheal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
int32_t, int32_t, uintptr_t, uintptr_t,
uintptr_t, dict_t *);
union _ec_cbk
{
fop_access_cbk_t access;
fop_create_cbk_t create;
fop_discard_cbk_t discard;
fop_entrylk_cbk_t entrylk;
fop_fentrylk_cbk_t fentrylk;
fop_fallocate_cbk_t fallocate;
fop_flush_cbk_t flush;
fop_fsync_cbk_t fsync;
fop_fsyncdir_cbk_t fsyncdir;
fop_getxattr_cbk_t getxattr;
fop_fgetxattr_cbk_t fgetxattr;
fop_heal_cbk_t heal;
fop_fheal_cbk_t fheal;
fop_inodelk_cbk_t inodelk;
fop_finodelk_cbk_t finodelk;
fop_link_cbk_t link;
fop_lk_cbk_t lk;
fop_lookup_cbk_t lookup;
fop_mkdir_cbk_t mkdir;
fop_mknod_cbk_t mknod;
fop_open_cbk_t open;
fop_opendir_cbk_t opendir;
fop_readdir_cbk_t readdir;
fop_readdirp_cbk_t readdirp;
fop_readlink_cbk_t readlink;
fop_readv_cbk_t readv;
fop_removexattr_cbk_t removexattr;
fop_fremovexattr_cbk_t fremovexattr;
fop_rename_cbk_t rename;
fop_rmdir_cbk_t rmdir;
fop_setattr_cbk_t setattr;
fop_fsetattr_cbk_t fsetattr;
fop_setxattr_cbk_t setxattr;
fop_fsetxattr_cbk_t fsetxattr;
fop_stat_cbk_t stat;
fop_fstat_cbk_t fstat;
fop_statfs_cbk_t statfs;
fop_symlink_cbk_t symlink;
fop_truncate_cbk_t truncate;
fop_ftruncate_cbk_t ftruncate;
fop_unlink_cbk_t unlink;
fop_writev_cbk_t writev;
fop_xattrop_cbk_t xattrop;
fop_fxattrop_cbk_t fxattrop;
fop_zerofill_cbk_t zerofill;
};
struct _ec_lock
{
struct list_head list;
uintptr_t mask;
int32_t kind;
loc_t loc;
union
{
struct
{
entrylk_type type;
char * basename;
};
struct gf_flock flock;
};
};
struct _ec_fop_data
{
int32_t id;
int32_t refs;
int32_t state;
int32_t minimum;
int32_t expected;
int32_t winds;
int32_t jobs;
int32_t error;
ec_fop_data_t * parent;
xlator_t * xl;
call_frame_t * req_frame; // frame of the calling xlator
call_frame_t * frame; // frame used by this fop
struct list_head lock_list; // list locks held by this fop
struct list_head cbk_list; // sorted list of groups of answers
struct list_head answer_list; // list of answers
ec_cbk_data_t * answer; // accepted answer
size_t pre_size;
size_t post_size;
gf_lock_t lock;
uint32_t flags;
uint32_t first;
uintptr_t mask;
uintptr_t remaining;
uintptr_t good;
uintptr_t bad;
ec_wind_f wind;
ec_handler_f handler;
ec_resume_f resume;
ec_cbk_t cbks;
void * data;
size_t user_size;
size_t head;
dict_t * xdata;
dict_t * dict;
int32_t int32;
uint32_t uint32;
size_t size;
off_t offset;
mode_t mode[2];
entrylk_cmd entrylk_cmd;
entrylk_type entrylk_type;
gf_xattrop_flags_t xattrop_flags;
dev_t dev;
inode_t * inode;
fd_t * fd;
struct iatt iatt;
char * str[2];
loc_t loc[2];
struct gf_flock flock;
struct iovec * vector;
struct iobref * buffers;
};
struct _ec_cbk_data
{
struct list_head list; // item in the sorted list of groups
struct list_head answer_list; // item in the list of answers
ec_fop_data_t * fop;
ec_cbk_data_t * next; // next answer in the same group
int32_t idx;
int32_t op_ret;
int32_t op_errno;
int32_t count;
uintptr_t mask;
dict_t * xdata;
dict_t * dict;
int32_t int32;
uintptr_t uintptr[3];
size_t size;
uint64_t version;
inode_t * inode;
fd_t * fd;
struct statvfs statvfs;
struct iatt iatt[5];
struct gf_flock flock;
struct iovec * vector;
struct iobref * buffers;
};
struct _ec_heal
{
gf_lock_t lock;
xlator_t * xl;
ec_fop_data_t * fop;
ec_fop_data_t * lookup;
loc_t loc;
struct iatt iatt;
char * symlink;
fd_t * fd;
int32_t done;
uintptr_t available;
uintptr_t good;
uintptr_t bad;
uintptr_t open;
off_t offset;
size_t size;
uint64_t version;
size_t raw_size;
};
ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
ec_fop_data_t * fop, int32_t id,
int32_t idx, int32_t op_ret,
int32_t op_errno);
ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
int32_t id, uint32_t flags,
uintptr_t target, int32_t minimum,
ec_wind_f wind, ec_handler_f handler,
ec_cbk_t cbks, void * data);
void ec_fop_data_acquire(ec_fop_data_t * fop);
void ec_fop_data_release(ec_fop_data_t * fop);
#endif /* __EC_DATA_H__ */

View File

@ -0,0 +1,571 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#include "xlator.h"
#include "defaults.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
#include "ec-method.h"
#include "ec-fops.h"
/* FOP: opendir */
int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
if (dst->fd != src->fd)
{
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching fd in answers "
"of 'GF_FOP_OPENDIR': %p <-> %p",
dst->fd, src->fd);
return 0;
}
return 1;
}
int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
int32_t op_ret, int32_t op_errno, fd_t * fd,
dict_t * xdata)
{
ec_fop_data_t * fop = NULL;
ec_cbk_data_t * cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = frame->local;
ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
frame, op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPENDIR, idx, op_ret,
op_errno);
if (cbk != NULL)
{
if (op_ret >= 0)
{
if (fd != NULL)
{
cbk->fd = fd_ref(fd);
if (cbk->fd == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"file descriptor.");
goto out;
}
}
}
if (xdata != NULL)
{
cbk->xdata = dict_ref(xdata);
if (cbk->xdata == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"dictionary.");
goto out;
}
}
ec_combine(cbk, ec_combine_opendir);
}
out:
if (fop != NULL)
{
ec_complete(fop);
}
return 0;
}
void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
STACK_WIND_COOKIE(fop->frame, ec_opendir_cbk, (void *)(uintptr_t)idx,
ec->xl_list[idx], ec->xl_list[idx]->fops->opendir,
&fop->loc[0], fop->fd, fop->xdata);
}
int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
{
ec_cbk_data_t * cbk;
switch (state)
{
case EC_STATE_INIT:
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
cbk = fop->answer;
if (cbk != NULL)
{
if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
{
if (cbk->op_ret >= 0)
{
cbk->op_ret = -1;
cbk->op_errno = EIO;
}
}
if (cbk->op_ret < 0)
{
ec_fop_set_error(fop, cbk->op_errno);
}
}
else
{
ec_fop_set_error(fop, EIO);
}
return EC_STATE_REPORT;
case EC_STATE_REPORT:
cbk = fop->answer;
GF_ASSERT(cbk != NULL);
if (fop->cbks.opendir != NULL)
{
fop->cbks.opendir(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->fd, cbk->xdata);
}
return EC_STATE_END;
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
if (fop->cbks.opendir != NULL)
{
fop->cbks.opendir(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL);
}
return EC_STATE_END;
default:
gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_opendir_cbk_t func, void * data,
loc_t * loc, fd_t * fd, dict_t * xdata)
{
ec_cbk_t callback = { .opendir = func };
ec_fop_data_t * fop = NULL;
int32_t error = EIO;
gf_log("ec", GF_LOG_TRACE, "EC(OPENDIR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_UPDATE_FD,
target, minimum, ec_wind_opendir,
ec_manager_opendir, callback, data);
if (fop == NULL)
{
goto out;
}
if (loc != NULL)
{
if (loc_copy(&fop->loc[0], loc) != 0)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
goto out;
}
}
if (fd != NULL)
{
fop->fd = fd_ref(fd);
if (fop->fd == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"file descriptor.");
goto out;
}
}
if (xdata != NULL)
{
fop->xdata = dict_ref(xdata);
if (fop->xdata == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"dictionary.");
goto out;
}
}
error = 0;
out:
if (fop != NULL)
{
ec_manager(fop, error);
}
else
{
func(frame, NULL, this, -1, EIO, NULL, NULL);
}
}
/* FOP: readdir */
void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries)
{
gf_dirent_t * entry;
list_for_each_entry(entry, &entries->list, list)
{
entry->d_off = ec_itransform(ec, idx, entry->d_off);
if (entry->d_stat.ia_type == IA_IFREG)
{
if ((entry->dict == NULL) ||
(ec_dict_del_number(entry->dict, EC_XATTR_SIZE,
&entry->d_stat.ia_size) != 0))
{
gf_log(ec->xl->name, GF_LOG_WARNING, "Unable to get exact "
"file size.");
entry->d_stat.ia_size *= ec->fragments;
}
ec_iatt_rebuild(ec, &entry->d_stat, 1, 1);
}
}
}
int32_t ec_readdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
int32_t op_ret, int32_t op_errno, gf_dirent_t * entries,
dict_t * xdata)
{
ec_fop_data_t * fop = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = frame->local;
ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
frame, op_ret, op_errno);
if (op_ret > 0)
{
ec_adjust_readdir(fop->xl->private, idx, entries);
}
if (!ec_dispatch_one_retry(fop, idx, op_ret, op_errno))
{
if (fop->cbks.readdir != NULL)
{
fop->cbks.readdir(fop->req_frame, fop, this, op_ret, op_errno,
entries, xdata);
}
}
out:
if (fop != NULL)
{
ec_complete(fop);
}
return 0;
}
void ec_wind_readdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
STACK_WIND_COOKIE(fop->frame, ec_readdir_cbk, (void *)(uintptr_t)idx,
ec->xl_list[idx], ec->xl_list[idx]->fops->readdir,
fop->fd, fop->size, fop->offset, fop->xdata);
}
int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
{
switch (state)
{
case EC_STATE_INIT:
if (fop->xdata == NULL)
{
fop->xdata = dict_new();
if (fop->xdata == NULL)
{
gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare "
"readdirp request");
fop->error = EIO;
return EC_STATE_REPORT;
}
}
if (dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0) != 0)
{
gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare "
"readdirp request");
fop->error = EIO;
return EC_STATE_REPORT;
}
if (fop->offset != 0)
{
int32_t idx;
fop->offset = ec_deitransform(fop->xl->private, &idx,
fop->offset);
fop->mask &= 1ULL << idx;
}
case EC_STATE_DISPATCH:
ec_dispatch_one(fop);
return EC_STATE_REPORT;
case -EC_STATE_REPORT:
if (fop->id == GF_FOP_READDIR)
{
if (fop->cbks.readdir != NULL)
{
fop->cbks.readdir(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL);
}
}
else
{
if (fop->cbks.readdirp != NULL)
{
fop->cbks.readdirp(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL);
}
}
case EC_STATE_REPORT:
return EC_STATE_END;
default:
gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_readdir_cbk_t func, void * data,
fd_t * fd, size_t size, off_t offset, dict_t * xdata)
{
ec_cbk_t callback = { .readdir = func };
ec_fop_data_t * fop = NULL;
int32_t error = EIO;
gf_log("ec", GF_LOG_TRACE, "EC(READDIR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, 0, target, minimum,
ec_wind_readdir, ec_manager_readdir, callback,
data);
if (fop == NULL)
{
goto out;
}
fop->size = size;
fop->offset = offset;
if (fd != NULL)
{
fop->fd = fd_ref(fd);
if (fop->fd == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"file descriptor.");
goto out;
}
}
if (xdata != NULL)
{
fop->xdata = dict_ref(xdata);
if (fop->xdata == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"dictionary.");
goto out;
}
}
error = 0;
out:
if (fop != NULL)
{
ec_manager(fop, error);
}
else
{
func(frame, NULL, this, -1, EIO, NULL, NULL);
}
}
/* FOP: readdirp */
int32_t ec_readdirp_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
int32_t op_ret, int32_t op_errno,
gf_dirent_t * entries, dict_t * xdata)
{
ec_fop_data_t * fop = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = frame->local;
ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
frame, op_ret, op_errno);
if (op_ret > 0)
{
ec_adjust_readdir(fop->xl->private, idx, entries);
}
if (!ec_dispatch_one_retry(fop, idx, op_ret, op_errno))
{
if (fop->cbks.readdirp != NULL)
{
fop->cbks.readdirp(fop->req_frame, fop, this, op_ret, op_errno,
entries, xdata);
}
}
out:
if (fop != NULL)
{
ec_complete(fop);
}
return 0;
}
void ec_wind_readdirp(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
STACK_WIND_COOKIE(fop->frame, ec_readdirp_cbk, (void *)(uintptr_t)idx,
ec->xl_list[idx], ec->xl_list[idx]->fops->readdirp,
fop->fd, fop->size, fop->offset, fop->xdata);
}
void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_readdirp_cbk_t func, void * data,
fd_t * fd, size_t size, off_t offset, dict_t * xdata)
{
ec_cbk_t callback = { .readdirp = func };
ec_fop_data_t * fop = NULL;
int32_t error = EIO;
gf_log("ec", GF_LOG_TRACE, "EC(READDIRP) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIRP, 0, target,
minimum, ec_wind_readdirp, ec_manager_readdir,
callback, data);
if (fop == NULL)
{
goto out;
}
fop->size = size;
fop->offset = offset;
if (fd != NULL)
{
fop->fd = fd_ref(fd);
if (fop->fd == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"file descriptor.");
goto out;
}
}
if (xdata != NULL)
{
fop->xdata = dict_ref(xdata);
if (fop->xdata == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
"dictionary.");
goto out;
}
}
error = 0;
out:
if (fop != NULL)
{
ec_manager(fop, error);
}
else
{
func(frame, NULL, this, -1, EIO, NULL, NULL);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,211 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_FOPS_H__
#define __EC_FOPS_H__
#include "xlator.h"
#include "ec-data.h"
#include "ec-common.h"
#define EC_FOP_HEAL -1
#define EC_FOP_FHEAL -2
void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_access_cbk_t func, void *data, loc_t * loc,
int32_t mask, dict_t * xdata);
void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_create_cbk_t func, void *data, loc_t * loc,
int32_t flags, mode_t mode, mode_t umask, fd_t * fd,
dict_t * xdata);
void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_entrylk_cbk_t func, void *data,
const char * volume, loc_t * loc, const char * basename,
entrylk_cmd cmd, entrylk_type type, dict_t * xdata);
void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fentrylk_cbk_t func, void *data,
const char * volume, fd_t * fd, const char * basename,
entrylk_cmd cmd, entrylk_type type, dict_t * xdata);
void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_flush_cbk_t func, void *data, fd_t * fd,
dict_t * xdata);
void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fsync_cbk_t func, void *data, fd_t * fd,
int32_t datasync, dict_t * xdata);
void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fsyncdir_cbk_t func, void *data,
fd_t * fd, int32_t datasync, dict_t * xdata);
void ec_getxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_getxattr_cbk_t func, void *data,
loc_t * loc, const char * name, dict_t * xdata);
void ec_fgetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fgetxattr_cbk_t func, void *data,
fd_t * fd, const char * name, dict_t * xdata);
void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_heal_cbk_t func, void *data, loc_t * loc,
dict_t * xdata);
void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fheal_cbk_t func, void *data, fd_t * fd,
dict_t * xdata);
void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_inodelk_cbk_t func, void *data,
const char * volume, loc_t * loc, int32_t cmd,
struct gf_flock * flock, dict_t * xdata);
void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_finodelk_cbk_t func, void *data,
const char * volume, fd_t * fd, int32_t cmd,
struct gf_flock * flock, dict_t * xdata);
void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_link_cbk_t func, void *data, loc_t * oldloc,
loc_t * newloc, dict_t * xdata);
void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_lk_cbk_t func, void *data, fd_t * fd,
int32_t cmd, struct gf_flock * flock, dict_t * xdata);
void ec_lookup(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t * loc,
dict_t * xdata);
void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_mkdir_cbk_t func, void *data, loc_t * loc,
mode_t mode, mode_t umask, dict_t * xdata);
void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_mknod_cbk_t func, void *data, loc_t * loc,
mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata);
void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_open_cbk_t func, void *data, loc_t * loc,
int32_t flags, fd_t * fd, dict_t * xdata);
void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_opendir_cbk_t func, void *data,
loc_t * loc, fd_t * fd, dict_t * xdata);
void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t * fd,
size_t size, off_t offset, dict_t * xdata);
void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_readdirp_cbk_t func, void *data,
fd_t * fd, size_t size, off_t offset, dict_t * xdata);
void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_readlink_cbk_t func, void *data,
loc_t * loc, size_t size, dict_t * xdata);
void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_readv_cbk_t func, void *data, fd_t * fd,
size_t size, off_t offset, uint32_t flags, dict_t * xdata);
void ec_removexattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_removexattr_cbk_t func, void *data,
loc_t * loc, const char * name, dict_t * xdata);
void ec_fremovexattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
fd_t * fd, const char * name, dict_t * xdata);
void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_rename_cbk_t func, void *data,
loc_t * oldloc, loc_t * newloc, dict_t * xdata);
void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_rmdir_cbk_t func, void *data, loc_t * loc,
int xflags, dict_t * xdata);
void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_setattr_cbk_t func, void *data,
loc_t * loc, struct iatt * stbuf, int32_t valid,
dict_t * xdata);
void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fsetattr_cbk_t func, void *data,
fd_t * fd, struct iatt * stbuf, int32_t valid,
dict_t * xdata);
void ec_setxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_setxattr_cbk_t func, void *data,
loc_t * loc, dict_t * dict, int32_t flags, dict_t * xdata);
void ec_fsetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fsetxattr_cbk_t func, void *data,
fd_t * fd, dict_t * dict, int32_t flags, dict_t * xdata);
void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_stat_cbk_t func, void *data, loc_t * loc,
dict_t * xdata);
void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fstat_cbk_t func, void *data, fd_t * fd,
dict_t * xdata);
void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t * loc,
dict_t * xdata);
void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_symlink_cbk_t func, void *data,
const char * linkname, loc_t * loc, mode_t umask,
dict_t * xdata);
void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_truncate_cbk_t func, void *data,
loc_t * loc, off_t offset, dict_t * xdata);
void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_ftruncate_cbk_t func, void *data,
fd_t * fd, off_t offset, dict_t * xdata);
void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t * loc,
int xflags, dict_t * xdata);
void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_writev_cbk_t func, void *data, fd_t * fd,
struct iovec * vector, int32_t count, off_t offset,
uint32_t flags, struct iobref * iobref, dict_t * xdata);
void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_xattrop_cbk_t func, void *data,
loc_t * loc, gf_xattrop_flags_t optype, dict_t * xattr,
dict_t * xdata);
void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fxattrop_cbk_t func, void *data,
fd_t * fd, gf_xattrop_flags_t optype, dict_t * xattr,
dict_t * xdata);
#endif /* __EC_FOPS_H__ */

File diff suppressed because it is too large Load Diff

10120
xlators/cluster/ec/src/ec-gf.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,114 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
/*
* File automatically generated on Thu Jan 26 12:08:19 2012
*
* DO NOT MODIFY
*
* Multiplications in a GF(2^8) with modulus 0x11D using XOR's
*
*/
#ifndef __EC_GF_H__
#define __EC_GF_H__
#define EC_GF_BITS 8
#define EC_GF_MOD 0x11D
#define ec_gf_load(addr) \
do \
{ \
__asm__ __volatile__ \
( \
"\tmovdqa 0*16(%0), %%xmm0\n" \
"\tmovdqa 1*16(%0), %%xmm1\n" \
"\tmovdqa 2*16(%0), %%xmm2\n" \
"\tmovdqa 3*16(%0), %%xmm3\n" \
"\tmovdqa 4*16(%0), %%xmm4\n" \
"\tmovdqa 5*16(%0), %%xmm5\n" \
"\tmovdqa 6*16(%0), %%xmm6\n" \
"\tmovdqa 7*16(%0), %%xmm7\n" \
: \
: "r" (addr) \
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" \
); \
} while (0)
#define ec_gf_store(addr) \
do \
{ \
__asm__ __volatile__ \
( \
"\tmovdqa %%xmm0, 0*16(%0)\n" \
"\tmovdqa %%xmm1, 1*16(%0)\n" \
"\tmovdqa %%xmm2, 2*16(%0)\n" \
"\tmovdqa %%xmm3, 3*16(%0)\n" \
"\tmovdqa %%xmm4, 4*16(%0)\n" \
"\tmovdqa %%xmm5, 5*16(%0)\n" \
"\tmovdqa %%xmm6, 6*16(%0)\n" \
"\tmovdqa %%xmm7, 7*16(%0)\n" \
: \
: "r" (addr) \
: "memory" \
); \
} while (0)
#define ec_gf_clear() \
do \
{ \
__asm__ __volatile__ \
( \
"\tpxor %xmm0, %xmm0\n" \
"\tpxor %xmm1, %xmm1\n" \
"\tpxor %xmm2, %xmm2\n" \
"\tpxor %xmm3, %xmm3\n" \
"\tpxor %xmm4, %xmm4\n" \
"\tpxor %xmm5, %xmm5\n" \
"\tpxor %xmm6, %xmm6\n" \
"\tpxor %xmm7, %xmm7\n" \
: \
: \
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" \
); \
} while (0)
#define ec_gf_xor(addr) \
do \
{ \
__asm__ __volatile__ \
( \
"\tpxor 0*16(%0), %%xmm0\n" \
"\tpxor 1*16(%0), %%xmm1\n" \
"\tpxor 2*16(%0), %%xmm2\n" \
"\tpxor 3*16(%0), %%xmm3\n" \
"\tpxor 4*16(%0), %%xmm4\n" \
"\tpxor 5*16(%0), %%xmm5\n" \
"\tpxor 6*16(%0), %%xmm6\n" \
"\tpxor 7*16(%0), %%xmm7\n" \
: \
: "r" (addr) \
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" \
); \
} while (0)
extern void (* ec_gf_mul_table[])(void);
#endif /* __EC_GF_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,594 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#include <libgen.h>
#include "byte-order.h"
#include "ec-mem-types.h"
#include "ec-fops.h"
#include "ec-helpers.h"
#define BACKEND_D_OFF_BITS 63
#define PRESENT_D_OFF_BITS 63
#define ONE 1ULL
#define MASK (~0ULL)
#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
static const char * ec_fop_list[] =
{
[-EC_FOP_HEAL] = "HEAL"
};
const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits)
{
str += size;
if (size-- < 1)
{
goto failed;
}
*--str = 0;
while ((value != 0) || (digits > 0))
{
if (size-- < 1)
{
goto failed;
}
*--str = '0' + (value & 1);
digits--;
value >>= 1;
}
return str;
failed:
return "<buffer too small>";
}
const char * ec_fop_name(int32_t id)
{
if (id >= 0)
{
return gf_fop_list[id];
}
return ec_fop_list[-id];
}
void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
{
char str1[32], str2[32], str3[32];
char * msg;
ec_t * ec = fop->xl->private;
va_list args;
int32_t ret;
va_start(args, fmt);
ret = vasprintf(&msg, fmt, args);
va_end(args);
if (ret < 0)
{
msg = "<memory allocation error>";
}
gf_log("ec", GF_LOG_TRACE, "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] "
"frame=%p/%p, min/exp=%d/%d, err=%d state=%d "
"{%s:%s:%s} %s",
event, ec_fop_name(fop->id), fop, fop->parent, fop->refs,
fop->winds, fop->jobs, fop->req_frame, fop->frame, fop->minimum,
fop->expected, fop->error, fop->state,
ec_bin(str1, sizeof(str1), fop->mask, ec->nodes),
ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes),
ec_bin(str3, sizeof(str3), fop->bad, ec->nodes), msg);
if (ret >= 0)
{
free(msg);
}
}
uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset)
{
int32_t bits;
if (offset == -1ULL)
{
return -1ULL;
}
bits = ec->bits_for_nodes;
if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0)
{
return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx;
}
return (offset * ec->nodes) + idx;
}
uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset)
{
uint64_t mask = 0;
if ((offset & TOP_BIT) != 0)
{
mask = MASK << ec->bits_for_nodes;
*idx = offset & ~mask;
return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS;
}
*idx = offset % ec->nodes;
return offset / ec->nodes;
}
int32_t ec_bits_count(uint64_t n)
{
n -= (n >> 1) & 0x5555555555555555ULL;
n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
n += n >> 8;
n += n >> 16;
n += n >> 32;
return n & 0xFF;
}
int32_t ec_bits_index(uint64_t n)
{
return ffsll(n) - 1;
}
int32_t ec_bits_consume(uint64_t * n)
{
uint64_t tmp;
tmp = *n;
tmp &= -tmp;
*n ^= tmp;
return ffsll(tmp) - 1;
}
size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
off_t offset, size_t size)
{
int32_t i = 0;
size_t total = 0, len = 0;
while (i < count)
{
if (offset < vector[i].iov_len)
{
while ((i < count) && (size > 0))
{
len = size;
if (len > vector[i].iov_len - offset)
{
len = vector[i].iov_len - offset;
}
memcpy(dst, vector[i++].iov_base + offset, len);
offset = 0;
dst += len;
total += len;
size -= len;
}
break;
}
offset -= vector[i].iov_len;
i++;
}
return total;
}
int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value)
{
uint64_t * ptr;
ptr = GF_MALLOC(sizeof(value), gf_common_mt_char);
if (ptr == NULL)
{
return -1;
}
*ptr = hton64(value);
return dict_set_bin(dict, key, ptr, sizeof(value));
}
int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value)
{
void * ptr;
int32_t len;
if ((dict == NULL) || (dict_get_ptr_and_len(dict, key, &ptr, &len) != 0) ||
(len != sizeof(uint64_t)))
{
return -1;
}
*value = ntoh64(*(uint64_t *)ptr);
dict_del(dict, key);
return 0;
}
int32_t ec_loc_gfid_check(xlator_t * xl, uuid_t dst, uuid_t src)
{
if (uuid_is_null(src))
{
return 1;
}
if (uuid_is_null(dst))
{
uuid_copy(dst, src);
return 1;
}
if (uuid_compare(dst, src) != 0)
{
gf_log(xl->name, GF_LOG_WARNING, "Mismatching GFID's in loc");
return 0;
}
return 1;
}
int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, char ** name)
{
char * str = NULL;
int32_t error = 0;
memset(parent, 0, sizeof(loc_t));
if (loc->path == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "inode path missing in loc_t: %p", loc->parent);
return EINVAL;
}
if (loc->parent == NULL)
{
if ((loc->inode == NULL) || !__is_root_gfid(loc->inode->gfid) ||
(strcmp(loc->path, "/") != 0))
{
gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for "
"loc_t (path=%s, name=%s)",
loc->path, loc->name);
return EINVAL;
}
if (loc_copy(parent, loc) != 0)
{
return ENOMEM;
}
parent->name = NULL;
if (name != NULL)
{
*name = NULL;
}
}
else
{
if (uuid_is_null(loc->parent->gfid) && (uuid_is_null(loc->pargfid)))
{
gf_log(xl->name, GF_LOG_ERROR, "Invalid parent inode "
"(path=%s, name=%s)",
loc->path, loc->name);
return EINVAL;
}
uuid_copy(parent->gfid, loc->pargfid);
str = gf_strdup(loc->path);
if (str == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path "
"'%s'", str);
return ENOMEM;
}
if (name != NULL)
{
*name = gf_strdup(basename(str));
if (*name == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "Unable to get basename "
"of '%s'", str);
error = ENOMEM;
goto out;
}
strcpy(str, loc->path);
}
parent->path = gf_strdup(dirname(str));
if (parent->path == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "Unable to get dirname of "
"'%s'", str);
error = ENOMEM;
goto out;
}
parent->name = strrchr(parent->path, '/');
if (parent->name == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "Invalid path name (%s)",
parent->path);
error = EINVAL;
goto out;
}
parent->name++;
parent->inode = inode_ref(loc->parent);
}
if ((loc->inode == NULL) ||
ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid))
{
parent = NULL;
}
out:
GF_FREE(str);
if (parent != NULL)
{
loc_wipe(parent);
}
return error;
}
int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode,
struct iatt * iatt)
{
if ((inode != NULL) && (loc->inode != inode))
{
if (loc->inode != NULL)
{
inode_unref(loc->inode);
}
loc->inode = inode_ref(inode);
uuid_copy(loc->gfid, inode->gfid);
}
else if (loc->inode != NULL)
{
if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid))
{
return 0;
}
}
if (iatt != NULL)
{
if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid))
{
return 0;
}
}
if (loc->parent != NULL)
{
if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid))
{
return 0;
}
}
if (uuid_is_null(loc->gfid))
{
gf_log(xl->name, GF_LOG_WARNING, "GFID not available for inode");
}
return 1;
}
int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd)
{
ec_fd_t * ctx;
memset(loc, 0, sizeof(*loc));
ctx = ec_fd_get(fd, xl);
if (ctx != NULL)
{
if (loc_copy(loc, &ctx->loc) != 0)
{
return 0;
}
}
if (ec_loc_prepare(xl, loc, fd->inode, NULL))
{
return 1;
}
loc_wipe(loc);
return 0;
}
int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src)
{
memset(dst, 0, sizeof(*dst));
if (loc_copy(dst, src) != 0)
{
return 0;
}
if (ec_loc_prepare(xl, dst, NULL, NULL))
{
return 1;
}
loc_wipe(dst);
return 0;
}
void ec_owner_set(call_frame_t * frame, void * owner)
{
set_lk_owner_from_ptr(&frame->root->lk_owner, owner);
}
void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner)
{
frame->root->lk_owner.len = owner->len;
memcpy(frame->root->lk_owner.data, owner->data, owner->len);
}
ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl)
{
ec_inode_t * ctx = NULL;
uint64_t value = 0;
if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0))
{
ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_inode_t);
if (ctx != NULL)
{
memset(ctx, 0, sizeof(*ctx));
value = (uint64_t)(uintptr_t)ctx;
if (__inode_ctx_set(inode, xl, &value) != 0)
{
GF_FREE(ctx);
return NULL;
}
}
}
else
{
ctx = (ec_inode_t *)(uintptr_t)value;
}
return ctx;
}
ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
{
ec_inode_t * ctx = NULL;
LOCK(&inode->lock);
ctx = __ec_inode_get(inode, xl);
UNLOCK(&inode->lock);
return ctx;
}
ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
{
ec_fd_t * ctx = NULL;
uint64_t value = 0;
if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
{
ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
if (ctx != NULL)
{
memset(ctx, 0, sizeof(*ctx));
value = (uint64_t)(uintptr_t)ctx;
if (__fd_ctx_set(fd, xl, value) != 0)
{
GF_FREE(ctx);
return NULL;
}
}
}
else
{
ctx = (ec_fd_t *)(uintptr_t)value;
}
return ctx;
}
ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl)
{
ec_fd_t * ctx = NULL;
LOCK(&fd->lock);
ctx = __ec_fd_get(fd, xl);
UNLOCK(&fd->lock);
return ctx;
}
size_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale)
{
size_t head, tmp;
tmp = *offset;
head = tmp % ec->stripe_size;
tmp -= head;
if (scale)
{
tmp /= ec->fragments;
}
*offset = tmp;
return head;
}
size_t ec_adjust_size(ec_t * ec, size_t size, int32_t scale)
{
size += ec->stripe_size - 1;
size -= size % ec->stripe_size;
if (scale)
{
size /= ec->fragments;
}
return size;
}

View File

@ -0,0 +1,59 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_HELPERS_H__
#define __EC_HELPERS_H__
#include "ec-data.h"
const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits);
const char * ec_fop_name(int32_t id);
void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...);
uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset);
uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset);
int32_t ec_bits_count(uint64_t n);
int32_t ec_bits_index(uint64_t n);
int32_t ec_bits_consume(uint64_t * n);
size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
off_t offset, size_t size);
int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value);
int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value);
int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent,
char ** name);
int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode,
struct iatt * iatt);
int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd);
int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src);
void ec_owner_set(call_frame_t * frame, void * owner);
void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner);
ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl);
ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl);
ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl);
ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl);
size_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale);
size_t ec_adjust_size(ec_t * ec, size_t size, int32_t scale);
#endif /* __EC_HELPERS_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_MEM_TYPES_H__
#define __EC_MEM_TYPES_H__
#include "mem-types.h"
enum gf_ec_mem_types_
{
ec_mt_ec_t = gf_common_mt_end + 1,
ec_mt_xlator_t,
ec_mt_ec_fop_data_t,
ec_mt_ec_cbk_data_t,
ec_mt_ec_inode_t,
ec_mt_ec_fd_t,
ec_mt_ec_lock_t,
ec_mt_ec_heal_t,
ec_mt_end
};
#endif /* __EC_MEM_TYPES_H__ */

View File

@ -0,0 +1,182 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#include <string.h>
#include <inttypes.h>
#include "ec-method.h"
#define EC_METHOD_WORD_SIZE 16
static uint32_t GfPow[EC_METHOD_SIZE << 1];
static uint32_t GfLog[EC_METHOD_SIZE << 1];
void ec_method_initialize(void)
{
uint32_t i;
GfPow[0] = 1;
GfLog[0] = EC_METHOD_SIZE;
for (i = 1; i < EC_METHOD_SIZE; i++)
{
GfPow[i] = GfPow[i - 1] << 1;
if (GfPow[i] >= EC_METHOD_SIZE)
{
GfPow[i] ^= EC_GF_MOD;
}
GfPow[i + EC_METHOD_SIZE - 1] = GfPow[i];
GfLog[GfPow[i] + EC_METHOD_SIZE - 1] = GfLog[GfPow[i]] = i;
}
}
static uint32_t ec_method_mul(uint32_t a, uint32_t b)
{
if (a && b)
{
return GfPow[GfLog[a] + GfLog[b]];
}
return 0;
}
static uint32_t ec_method_div(uint32_t a, uint32_t b)
{
if (b)
{
if (a)
{
return GfPow[EC_METHOD_SIZE - 1 + GfLog[a] - GfLog[b]];
}
return 0;
}
return EC_METHOD_SIZE;
}
size_t ec_method_encode(size_t size, uint32_t columns, uint32_t row,
uint8_t * in, uint8_t * out)
{
uint32_t i, j;
size /= EC_METHOD_CHUNK_SIZE * columns;
row++;
for (j = 0; j < size; j++)
{
ec_gf_load(in);
in += EC_METHOD_CHUNK_SIZE;
for (i = 1; i < columns; i++)
{
ec_gf_mul_table[row]();
ec_gf_xor(in);
in += EC_METHOD_CHUNK_SIZE;
}
ec_gf_store(out);
out += EC_METHOD_CHUNK_SIZE;
}
return size * EC_METHOD_CHUNK_SIZE;
}
size_t ec_method_decode(size_t size, uint32_t columns, uint32_t * rows,
uint8_t ** in, uint8_t * out)
{
uint32_t i, j, k;
uint32_t f, off, mask;
uint8_t inv[EC_METHOD_MAX_FRAGMENTS][EC_METHOD_MAX_FRAGMENTS + 1];
uint8_t mtx[EC_METHOD_MAX_FRAGMENTS][EC_METHOD_MAX_FRAGMENTS];
uint8_t * p[EC_METHOD_MAX_FRAGMENTS];
size /= EC_METHOD_CHUNK_SIZE;
memset(inv, 0, sizeof(inv));
memset(mtx, 0, sizeof(mtx));
mask = 0;
for (i = 0; i < columns; i++)
{
inv[i][i] = 1;
inv[i][columns] = 1;
}
k = 0;
for (i = 0; i < columns; i++)
{
while ((mask & 1) != 0)
{
k++;
mask >>= 1;
}
mtx[k][columns - 1] = 1;
for (j = columns - 1; j > 0; j--)
{
mtx[k][j - 1] = ec_method_mul(mtx[k][j], rows[i] + 1);
}
p[k] = in[i];
k++;
mask >>= 1;
}
for (i = 0; i < columns; i++)
{
f = mtx[i][i];
for (j = 0; j < columns; j++)
{
mtx[i][j] = ec_method_div(mtx[i][j], f);
inv[i][j] = ec_method_div(inv[i][j], f);
}
for (j = 0; j < columns; j++)
{
if (i != j)
{
f = mtx[j][i];
for (k = 0; k < columns; k++)
{
mtx[j][k] ^= ec_method_mul(mtx[i][k], f);
inv[j][k] ^= ec_method_mul(inv[i][k], f);
}
}
}
}
off = 0;
for (f = 0; f < size; f++)
{
for (i = 0; i < columns; i++)
{
ec_gf_load(p[0] + off);
j = 0;
while (j < columns)
{
k = j + 1;
while (inv[i][k] == 0)
{
k++;
}
ec_gf_mul_table[ec_method_div(inv[i][j], inv[i][k])]();
if (k < columns)
{
ec_gf_xor(p[k] + off);
}
j = k;
}
ec_gf_store(out);
out += EC_METHOD_CHUNK_SIZE;
in[i] += EC_METHOD_CHUNK_SIZE;
}
off += EC_METHOD_CHUNK_SIZE;
}
return size * EC_METHOD_CHUNK_SIZE * columns;
}

View File

@ -0,0 +1,42 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_METHOD_H__
#define __EC_METHOD_H__
#include "ec-gf.h"
#define EC_METHOD_MAX_FRAGMENTS 16
#define EC_METHOD_WORD_SIZE 16
#define EC_BUFFER_ALIGN_SIZE EC_METHOD_WORD_SIZE
#define EC_METHOD_BITS EC_GF_BITS
#define EC_METHOD_SIZE (1 << (EC_METHOD_BITS))
#define EC_METHOD_CHUNK_SIZE (EC_METHOD_WORD_SIZE * EC_METHOD_BITS)
void ec_method_initialize(void);
size_t ec_method_encode(size_t size, uint32_t columns, uint32_t row,
uint8_t * in, uint8_t * out);
size_t ec_method_decode(size_t size, uint32_t columns, uint32_t * rows,
uint8_t ** in, uint8_t * out);
#endif /* __EC_METHOD_H__ */

904
xlators/cluster/ec/src/ec.c Normal file
View File

@ -0,0 +1,904 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#include "defaults.h"
#include "ec-mem-types.h"
#include "ec-common.h"
#include "ec-fops.h"
#include "ec-method.h"
#include "ec.h"
#define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS
/* The maximum number of nodes is derived from the maximum allowed fragments
* using the rule that redundancy cannot be equal or greater than the number
* of fragments.
*/
#define EC_MAX_NODES (EC_MAX_FRAGMENTS + ((EC_MAX_FRAGMENTS - 1) / 2))
int32_t ec_parse_options(xlator_t * this)
{
ec_t * ec = this->private;
int32_t error = EINVAL;
uintptr_t mask;
GF_OPTION_INIT("redundancy", ec->redundancy, int32, out);
ec->fragments = ec->nodes - ec->redundancy;
if ((ec->redundancy < 1) || (ec->redundancy >= ec->fragments) ||
(ec->fragments > EC_MAX_FRAGMENTS))
{
gf_log(this->name, GF_LOG_ERROR, "Invalid redundancy (must be between "
"1 and %d)", (ec->nodes - 1) / 2);
goto out;
}
ec->bits_for_nodes = 1;
mask = 2;
while (ec->nodes > mask)
{
ec->bits_for_nodes++;
mask <<= 1;
}
ec->node_mask = (1ULL << ec->nodes) - 1ULL;
ec->fragment_size = EC_METHOD_CHUNK_SIZE;
ec->stripe_size = ec->fragment_size * ec->fragments;
gf_log("ec", GF_LOG_DEBUG, "Initialized with: nodes=%u, fragments=%u, "
"stripe_size=%u, node_mask=%lX",
ec->nodes, ec->fragments, ec->stripe_size, ec->node_mask);
error = 0;
out:
return error;
}
int32_t ec_prepare_childs(xlator_t * this)
{
ec_t * ec = this->private;
xlator_list_t * child = NULL;
int32_t count = 0;
for (child = this->children; child != NULL; child = child->next)
{
count++;
}
if (count > EC_MAX_NODES)
{
gf_log(this->name, GF_LOG_ERROR, "Too many subvolumes");
return EINVAL;
}
ec->nodes = count;
ec->xl_list = GF_CALLOC(count, sizeof(ec->xl_list[0]), ec_mt_xlator_t);
if (ec->xl_list == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Allocation of xlator list failed");
return ENOMEM;
}
ec->xl_up = 0;
ec->xl_up_count = 0;
count = 0;
for (child = this->children; child != NULL; child = child->next)
{
ec->xl_list[count++] = child->xlator;
}
return 0;
}
void __ec_destroy_private(xlator_t * this)
{
ec_t * ec = this->private;
if (ec != NULL)
{
LOCK(&ec->lock);
if (ec->timer != NULL)
{
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
UNLOCK(&ec->lock);
/* There is a race with timer because there is no way to know if
* timer callback has really been cancelled or it has been scheduled
* for execution. If it has been scheduled, it will crash if we
* destroy ec too fast.
*
* Not sure how this can be solved without using global variables or
* having support from gf_timer_call_cancel()
*/
sleep(2);
this->private = NULL;
if (ec->xl_list != NULL)
{
GF_FREE(ec->xl_list);
ec->xl_list = NULL;
}
if (ec->fop_pool != NULL)
{
mem_pool_destroy(ec->fop_pool);
}
if (ec->cbk_pool != NULL)
{
mem_pool_destroy(ec->cbk_pool);
}
LOCK_DESTROY(&ec->lock);
GF_FREE(ec);
}
}
int32_t mem_acct_init(xlator_t * this)
{
if (xlator_mem_acct_init(this, ec_mt_end + 1) != 0)
{
gf_log(this->name, GF_LOG_ERROR, "Memory accounting initialization "
"failed.");
return -1;
}
return 0;
}
int32_t reconfigure(xlator_t * this, dict_t * options)
{
gf_log(this->name, GF_LOG_ERROR, "Online volume reconfiguration is not "
"supported.");
return -1;
}
void ec_up(xlator_t * this, ec_t * ec)
{
if (ec->timer != NULL)
{
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
if (!ec->up && (ec->xl_up_count >= ec->fragments))
{
if (ec->xl_up_count < ec->nodes)
{
gf_log("ec", GF_LOG_WARNING, "Starting volume with only %d bricks",
ec->xl_up_count);
}
ec->up = 1;
gf_log(this->name, GF_LOG_INFO, "Going UP");
default_notify(this, GF_EVENT_CHILD_UP, NULL);
}
}
void ec_down(xlator_t * this, ec_t * ec)
{
if (ec->timer != NULL)
{
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
if (ec->up)
{
ec->up = 0;
gf_log(this->name, GF_LOG_INFO, "Going DOWN");
default_notify(this, GF_EVENT_CHILD_DOWN, NULL);
}
}
void ec_notify_up_cbk(void * data)
{
ec_t * ec = data;
LOCK(&ec->lock);
if (ec->timer != NULL)
{
ec_up(ec->xl, ec);
}
UNLOCK(&ec->lock);
}
int32_t ec_notify_up(xlator_t * this, ec_t * ec, int32_t idx)
{
struct timespec delay = {0, };
if (((ec->xl_up >> idx) & 1) == 0)
{
ec->xl_up |= 1ULL << idx;
ec->xl_up_count++;
gf_log("ec", GF_LOG_DEBUG, "Child %d is UP (%lX, %u)", idx, ec->xl_up,
ec->xl_up_count);
if (ec->xl_up_count == ec->fragments)
{
gf_log("ec", GF_LOG_DEBUG, "Initiating up timer");
delay.tv_sec = 5;
delay.tv_nsec = 0;
ec->timer = gf_timer_call_after(this->ctx, delay, ec_notify_up_cbk,
ec);
if (ec->timer == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Cannot create timer for "
"delayed initialization");
return ENOMEM;
}
}
else if (ec->xl_up_count == ec->nodes)
{
ec_up(this, ec);
}
}
return EAGAIN;
}
int32_t ec_notify_down(xlator_t * this, ec_t * ec, int32_t idx)
{
if (((ec->xl_up >> idx) & 1) != 0)
{
gf_log("ec", GF_LOG_DEBUG, "Child %d is DOWN", idx);
ec->xl_up ^= 1ULL << idx;
if (ec->xl_up_count-- == ec->fragments)
{
ec_down(this, ec);
}
}
return EAGAIN;
}
int32_t notify(xlator_t * this, int32_t event, void * data, ...)
{
ec_t * ec = this->private;
int32_t idx = 0;
int32_t error = 0;
LOCK(&ec->lock);
for (idx = 0; idx < ec->nodes; idx++)
{
if (ec->xl_list[idx] == data)
{
break;
}
}
gf_log("ec", GF_LOG_TRACE, "NOTIFY(%d): %p, %d", event, data, idx);
if (idx < ec->nodes)
{
if (event == GF_EVENT_CHILD_UP)
{
error = ec_notify_up(this, ec, idx);
}
else if (event == GF_EVENT_CHILD_DOWN)
{
error = ec_notify_down(this, ec, idx);
}
}
UNLOCK(&ec->lock);
if (error == 0)
{
return default_notify(this, event, data);
}
return 0;
}
int32_t init(xlator_t * this)
{
ec_t * ec;
if (this->parents == NULL)
{
gf_log(this->name, GF_LOG_WARNING, "Volume does not have parents.");
}
ec = GF_MALLOC(sizeof(*ec), ec_mt_ec_t);
if (ec == NULL)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to allocate private memory.");
return -1;
}
memset(ec, 0, sizeof(*ec));
this->private = ec;
ec->xl = this;
LOCK_INIT(&ec->lock);
ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024);
ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096);
if ((ec->fop_pool == NULL) || (ec->cbk_pool == NULL))
{
gf_log(this->name, GF_LOG_ERROR, "Failed to create memory pools.");
goto failed;
}
if (ec_prepare_childs(this) != 0)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to initialize xlator");
goto failed;
}
if (ec_parse_options(this) != 0)
{
gf_log(this->name, GF_LOG_ERROR, "Failed to parse xlator options");
goto failed;
}
ec_method_initialize();
gf_log(this->name, GF_LOG_DEBUG, "Disperse translator initialized.");
return 0;
failed:
__ec_destroy_private(this);
return -1;
}
void fini(xlator_t * this)
{
__ec_destroy_private(this);
}
int32_t ec_gf_access(call_frame_t * frame, xlator_t * this, loc_t * loc,
int32_t mask, dict_t * xdata)
{
ec_access(frame, this, -1, EC_MINIMUM_ONE, default_access_cbk, NULL, loc,
mask, xdata);
return 0;
}
int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc,
int32_t flags, mode_t mode, mode_t umask, fd_t * fd,
dict_t * xdata)
{
ec_create(frame, this, -1, EC_MINIMUM_MIN, default_create_cbk, NULL, loc,
flags, mode, umask, fd, xdata);
return 0;
}
int32_t ec_gf_discard(call_frame_t * frame, xlator_t * this, fd_t * fd,
off_t offset, size_t len, dict_t * xdata)
{
default_discard_failure_cbk(frame, ENOTSUP);
return 0;
}
int32_t ec_gf_entrylk(call_frame_t * frame, xlator_t * this,
const char * volume, loc_t * loc, const char * basename,
entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
{
ec_entrylk(frame, this, -1, EC_MINIMUM_ALL, default_entrylk_cbk, NULL,
volume, loc, basename, cmd, type, xdata);
return 0;
}
int32_t ec_gf_fentrylk(call_frame_t * frame, xlator_t * this,
const char * volume, fd_t * fd, const char * basename,
entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
{
ec_fentrylk(frame, this, -1, EC_MINIMUM_ALL, default_fentrylk_cbk, NULL,
volume, fd, basename, cmd, type, xdata);
return 0;
}
int32_t ec_gf_fallocate(call_frame_t * frame, xlator_t * this, fd_t * fd,
int32_t keep_size, off_t offset, size_t len,
dict_t * xdata)
{
default_fallocate_failure_cbk(frame, ENOTSUP);
return 0;
}
int32_t ec_gf_flush(call_frame_t * frame, xlator_t * this, fd_t * fd,
dict_t * xdata)
{
ec_flush(frame, this, -1, EC_MINIMUM_MIN, default_flush_cbk, NULL, fd,
xdata);
return 0;
}
int32_t ec_gf_fsync(call_frame_t * frame, xlator_t * this, fd_t * fd,
int32_t datasync, dict_t * xdata)
{
ec_fsync(frame, this, -1, EC_MINIMUM_MIN, default_fsync_cbk, NULL, fd,
datasync, xdata);
return 0;
}
int32_t ec_gf_fsyncdir(call_frame_t * frame, xlator_t * this, fd_t * fd,
int32_t datasync, dict_t * xdata)
{
ec_fsyncdir(frame, this, -1, EC_MINIMUM_MIN, default_fsyncdir_cbk, NULL,
fd, datasync, xdata);
return 0;
}
int32_t ec_gf_getxattr(call_frame_t * frame, xlator_t * this, loc_t * loc,
const char * name, dict_t * xdata)
{
ec_getxattr(frame, this, -1, EC_MINIMUM_MIN, default_getxattr_cbk, NULL,
loc, name, xdata);
return 0;
}
int32_t ec_gf_fgetxattr(call_frame_t * frame, xlator_t * this, fd_t * fd,
const char * name, dict_t * xdata)
{
ec_fgetxattr(frame, this, -1, EC_MINIMUM_MIN, default_fgetxattr_cbk, NULL,
fd, name, xdata);
return 0;
}
int32_t ec_gf_inodelk(call_frame_t * frame, xlator_t * this,
const char * volume, loc_t * loc, int32_t cmd,
struct gf_flock * flock, dict_t * xdata)
{
ec_inodelk(frame, this, -1, EC_MINIMUM_ALL, default_inodelk_cbk, NULL,
volume, loc, cmd, flock, xdata);
return 0;
}
int32_t ec_gf_finodelk(call_frame_t * frame, xlator_t * this,
const char * volume, fd_t * fd, int32_t cmd,
struct gf_flock * flock, dict_t * xdata)
{
ec_finodelk(frame, this, -1, EC_MINIMUM_ALL, default_finodelk_cbk, NULL,
volume, fd, cmd, flock, xdata);
return 0;
}
int32_t ec_gf_link(call_frame_t * frame, xlator_t * this, loc_t * oldloc,
loc_t * newloc, dict_t * xdata)
{
ec_link(frame, this, -1, EC_MINIMUM_MIN, default_link_cbk, NULL, oldloc,
newloc, xdata);
return 0;
}
int32_t ec_gf_lk(call_frame_t * frame, xlator_t * this, fd_t * fd,
int32_t cmd, struct gf_flock * flock, dict_t * xdata)
{
ec_lk(frame, this, -1, EC_MINIMUM_ALL, default_lk_cbk, NULL, fd, cmd,
flock, xdata);
return 0;
}
int32_t ec_gf_lookup(call_frame_t * frame, xlator_t * this, loc_t * loc,
dict_t * xdata)
{
ec_lookup(frame, this, -1, EC_MINIMUM_MIN, default_lookup_cbk, NULL, loc,
xdata);
return 0;
}
int32_t ec_gf_mkdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
mode_t mode, mode_t umask, dict_t * xdata)
{
ec_mkdir(frame, this, -1, EC_MINIMUM_MIN, default_mkdir_cbk, NULL, loc,
mode, umask, xdata);
return 0;
}
int32_t ec_gf_mknod(call_frame_t * frame, xlator_t * this, loc_t * loc,
mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata)
{
ec_mknod(frame, this, -1, EC_MINIMUM_MIN, default_mknod_cbk, NULL, loc,
mode, rdev, umask, xdata);
return 0;
}
int32_t ec_gf_open(call_frame_t * frame, xlator_t * this, loc_t * loc,
int32_t flags, fd_t * fd, dict_t * xdata)
{
ec_open(frame, this, -1, EC_MINIMUM_MIN, default_open_cbk, NULL, loc,
flags, fd, xdata);
return 0;
}
int32_t ec_gf_opendir(call_frame_t * frame, xlator_t * this, loc_t * loc,
fd_t * fd, dict_t * xdata)
{
ec_opendir(frame, this, -1, EC_MINIMUM_MIN, default_opendir_cbk, NULL, loc,
fd, xdata);
return 0;
}
int32_t ec_gf_readdir(call_frame_t * frame, xlator_t * this, fd_t * fd,
size_t size, off_t offset, dict_t * xdata)
{
ec_readdir(frame, this, -1, EC_MINIMUM_ONE, default_readdir_cbk, NULL, fd,
size, offset, xdata);
return 0;
}
int32_t ec_gf_readdirp(call_frame_t * frame, xlator_t * this, fd_t * fd,
size_t size, off_t offset, dict_t * xdata)
{
ec_readdirp(frame, this, -1, EC_MINIMUM_ONE, default_readdirp_cbk, NULL,
fd, size, offset, xdata);
return 0;
}
int32_t ec_gf_readlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
size_t size, dict_t * xdata)
{
ec_readlink(frame, this, -1, EC_MINIMUM_ONE, default_readlink_cbk, NULL,
loc, size, xdata);
return 0;
}
int32_t ec_gf_readv(call_frame_t * frame, xlator_t * this, fd_t * fd,
size_t size, off_t offset, uint32_t flags, dict_t * xdata)
{
ec_readv(frame, this, -1, EC_MINIMUM_MIN, default_readv_cbk, NULL, fd,
size, offset, flags, xdata);
return 0;
}
int32_t ec_gf_removexattr(call_frame_t * frame, xlator_t * this, loc_t * loc,
const char * name, dict_t * xdata)
{
ec_removexattr(frame, this, -1, EC_MINIMUM_MIN, default_removexattr_cbk,
NULL, loc, name, xdata);
return 0;
}
int32_t ec_gf_fremovexattr(call_frame_t * frame, xlator_t * this, fd_t * fd,
const char * name, dict_t * xdata)
{
ec_fremovexattr(frame, this, -1, EC_MINIMUM_MIN, default_fremovexattr_cbk,
NULL, fd, name, xdata);
return 0;
}
int32_t ec_gf_rename(call_frame_t * frame, xlator_t * this, loc_t * oldloc,
loc_t * newloc, dict_t * xdata)
{
ec_rename(frame, this, -1, EC_MINIMUM_MIN, default_rename_cbk, NULL,
oldloc, newloc, xdata);
return 0;
}
int32_t ec_gf_rmdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
int xflags, dict_t * xdata)
{
ec_rmdir(frame, this, -1, EC_MINIMUM_MIN, default_rmdir_cbk, NULL, loc,
xflags, xdata);
return 0;
}
int32_t ec_gf_setattr(call_frame_t * frame, xlator_t * this, loc_t * loc,
struct iatt * stbuf, int32_t valid, dict_t * xdata)
{
ec_setattr(frame, this, -1, EC_MINIMUM_MIN, default_setattr_cbk, NULL, loc,
stbuf, valid, xdata);
return 0;
}
int32_t ec_gf_fsetattr(call_frame_t * frame, xlator_t * this, fd_t * fd,
struct iatt * stbuf, int32_t valid, dict_t * xdata)
{
ec_fsetattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetattr_cbk, NULL,
fd, stbuf, valid, xdata);
return 0;
}
int32_t ec_gf_setxattr(call_frame_t * frame, xlator_t * this, loc_t * loc,
dict_t * dict, int32_t flags, dict_t * xdata)
{
ec_setxattr(frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk, NULL,
loc, dict, flags, xdata);
return 0;
}
int32_t ec_gf_fsetxattr(call_frame_t * frame, xlator_t * this, fd_t * fd,
dict_t * dict, int32_t flags, dict_t * xdata)
{
ec_fsetxattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetxattr_cbk, NULL,
fd, dict, flags, xdata);
return 0;
}
int32_t ec_gf_stat(call_frame_t * frame, xlator_t * this, loc_t * loc,
dict_t * xdata)
{
ec_stat(frame, this, -1, EC_MINIMUM_MIN, default_stat_cbk, NULL, loc,
xdata);
return 0;
}
int32_t ec_gf_fstat(call_frame_t * frame, xlator_t * this, fd_t * fd,
dict_t * xdata)
{
ec_fstat(frame, this, -1, EC_MINIMUM_MIN, default_fstat_cbk, NULL, fd,
xdata);
return 0;
}
int32_t ec_gf_statfs(call_frame_t * frame, xlator_t * this, loc_t * loc,
dict_t * xdata)
{
ec_statfs(frame, this, -1, EC_MINIMUM_MIN, default_statfs_cbk, NULL, loc,
xdata);
return 0;
}
int32_t ec_gf_symlink(call_frame_t * frame, xlator_t * this,
const char * linkname, loc_t * loc, mode_t umask,
dict_t * xdata)
{
ec_symlink(frame, this, -1, EC_MINIMUM_MIN, default_symlink_cbk, NULL,
linkname, loc, umask, xdata);
return 0;
}
int32_t ec_gf_truncate(call_frame_t * frame, xlator_t * this, loc_t * loc,
off_t offset, dict_t * xdata)
{
ec_truncate(frame, this, -1, EC_MINIMUM_MIN, default_truncate_cbk, NULL,
loc, offset, xdata);
return 0;
}
int32_t ec_gf_ftruncate(call_frame_t * frame, xlator_t * this, fd_t * fd,
off_t offset, dict_t * xdata)
{
ec_ftruncate(frame, this, -1, EC_MINIMUM_MIN, default_ftruncate_cbk, NULL,
fd, offset, xdata);
return 0;
}
int32_t ec_gf_unlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
int xflags, dict_t * xdata)
{
ec_unlink(frame, this, -1, EC_MINIMUM_MIN, default_unlink_cbk, NULL, loc,
xflags, xdata);
return 0;
}
int32_t ec_gf_writev(call_frame_t * frame, xlator_t * this, fd_t * fd,
struct iovec * vector, int32_t count, off_t offset,
uint32_t flags, struct iobref * iobref, dict_t * xdata)
{
ec_writev(frame, this, -1, EC_MINIMUM_MIN, default_writev_cbk, NULL, fd,
vector, count, offset, flags, iobref, xdata);
return 0;
}
int32_t ec_gf_xattrop(call_frame_t * frame, xlator_t * this, loc_t * loc,
gf_xattrop_flags_t optype, dict_t * xattr,
dict_t * xdata)
{
ec_xattrop(frame, this, -1, EC_MINIMUM_MIN, default_xattrop_cbk, NULL, loc,
optype, xattr, xdata);
return 0;
}
int32_t ec_gf_fxattrop(call_frame_t * frame, xlator_t * this, fd_t * fd,
gf_xattrop_flags_t optype, dict_t * xattr,
dict_t * xdata)
{
ec_fxattrop(frame, this, -1, EC_MINIMUM_MIN, default_fxattrop_cbk, NULL,
fd, optype, xattr, xdata);
return 0;
}
int32_t ec_gf_zerofill(call_frame_t * frame, xlator_t * this, fd_t * fd,
off_t offset, off_t len, dict_t * xdata)
{
default_zerofill_failure_cbk(frame, ENOTSUP);
return 0;
}
void __ec_gf_release_fd(xlator_t * this, fd_t * fd)
{
uint64_t value = 0;
ec_fd_t * ctx = NULL;
if ((fd_ctx_del(fd, this, &value) == 0) && (value != 0))
{
ctx = (ec_fd_t *)(uintptr_t)value;
loc_wipe(&ctx->loc);
GF_FREE(ctx);
}
}
void __ec_gf_release_inode(xlator_t * this, inode_t * inode)
{
uint64_t value = 0;
ec_inode_t * ctx = NULL;
if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0))
{
ctx = (ec_inode_t *)(uintptr_t)value;
GF_FREE(ctx);
}
}
int32_t ec_gf_forget(xlator_t * this, inode_t * inode)
{
__ec_gf_release_inode(this, inode);
return 0;
}
int32_t ec_gf_invalidate(xlator_t * this, inode_t * inode)
{
__ec_gf_release_inode(this, inode);
return 0;
}
int32_t ec_gf_release(xlator_t * this, fd_t * fd)
{
__ec_gf_release_fd(this, fd);
return 0;
}
int32_t ec_gf_releasedir(xlator_t * this, fd_t * fd)
{
__ec_gf_release_fd(this, fd);
return 0;
}
struct xlator_fops fops =
{
.lookup = ec_gf_lookup,
.stat = ec_gf_stat,
.fstat = ec_gf_fstat,
.truncate = ec_gf_truncate,
.ftruncate = ec_gf_ftruncate,
.access = ec_gf_access,
.readlink = ec_gf_readlink,
.mknod = ec_gf_mknod,
.mkdir = ec_gf_mkdir,
.unlink = ec_gf_unlink,
.rmdir = ec_gf_rmdir,
.symlink = ec_gf_symlink,
.rename = ec_gf_rename,
.link = ec_gf_link,
.create = ec_gf_create,
.open = ec_gf_open,
.readv = ec_gf_readv,
.writev = ec_gf_writev,
.flush = ec_gf_flush,
.fsync = ec_gf_fsync,
.opendir = ec_gf_opendir,
.readdir = ec_gf_readdir,
.readdirp = ec_gf_readdirp,
.fsyncdir = ec_gf_fsyncdir,
.statfs = ec_gf_statfs,
.setxattr = ec_gf_setxattr,
.getxattr = ec_gf_getxattr,
.fsetxattr = ec_gf_fsetxattr,
.fgetxattr = ec_gf_fgetxattr,
.removexattr = ec_gf_removexattr,
.fremovexattr = ec_gf_fremovexattr,
.lk = ec_gf_lk,
.inodelk = ec_gf_inodelk,
.finodelk = ec_gf_finodelk,
.entrylk = ec_gf_entrylk,
.fentrylk = ec_gf_fentrylk,
.xattrop = ec_gf_xattrop,
.fxattrop = ec_gf_fxattrop,
.setattr = ec_gf_setattr,
.fsetattr = ec_gf_fsetattr,
.fallocate = ec_gf_fallocate,
.discard = ec_gf_discard,
.zerofill = ec_gf_zerofill
};
struct xlator_cbks cbks =
{
.forget = ec_gf_forget,
.release = ec_gf_release,
.releasedir = ec_gf_releasedir,
.invalidate = ec_gf_invalidate,
.client_destroy = NULL,
.client_disconnect = NULL
};
struct volume_options options[] =
{
{
.key = { "redundancy" },
.type = GF_OPTION_TYPE_INT,
.description = "Maximum number of bricks that can fail "
"simultaneously without losing data."
},
{ }
};

View File

@ -0,0 +1,54 @@
/*
Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
This file is part of the cluster/ec translator for GlusterFS.
The cluster/ec translator for GlusterFS is free software: you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
The cluster/ec translator for GlusterFS is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the cluster/ec translator for GlusterFS. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __EC_H__
#define __EC_H__
#include "xlator.h"
#include "timer.h"
#define EC_XATTR_SIZE "trusted.ec.size"
#define EC_XATTR_VERSION "trusted.ec.version"
struct _ec;
typedef struct _ec ec_t;
struct _ec
{
xlator_t * xl;
int32_t nodes;
int32_t bits_for_nodes;
int32_t fragments;
int32_t redundancy;
uint32_t fragment_size;
uint32_t stripe_size;
int32_t up;
uint32_t idx;
uint32_t xl_up_count;
uintptr_t xl_up;
uintptr_t node_mask;
xlator_t ** xl_list;
gf_lock_t lock;
gf_timer_t * timer;
struct mem_pool * fop_pool;
struct mem_pool * cbk_pool;
};
#endif /* __EC_H__ */