xlators/experimental: move template files to '.c.in' type

This is critical because we shouldn't be applying the automated
coding standard (clang-format) tool on these files.

This patchset is done by below steps:

* clang-format -i ${filename}

This creates syntax errors. Fix them using below two commands:
* sed -i -e 's/ @/@/g' ${filename}
* sed -i -e 's/,@/, @/g' ${filename}

With this, these files are having minimum changes requried to
compile, and is as close to the coding standard as possible.

* git rename ${filename} ${filename}.in

Updates: bz#1564149
Change-Id: Icf90f7f81d6fa4400be4826e094fdff8e64508d0
Signed-off-by: Amar Tumballi <amarts@redhat.com>
This commit is contained in:
Amar Tumballi 2018-09-17 17:17:54 +05:30 committed by Xavi Hernandez
parent c1f0409822
commit 76bd93c7b8
13 changed files with 1598 additions and 1691 deletions

View File

@ -33,16 +33,16 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
AM_CFLAGS = -Wall $(GF_CFLAGS)
noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py
EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c
EXTRA_DIST = fdl-tmpl.c.in dump-tmpl.c.in recon-tmpl.c.in
CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \
$(nodist_gf_recon_SOURCES)
fdl.c: fdl-tmpl.c gen_fdl.py
$(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@
fdl.c: fdl-tmpl.c.in gen_fdl.py
$(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c.in > $@
libfdl.c: dump-tmpl.c gen_dumper.py
$(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@
libfdl.c: dump-tmpl.c.in gen_dumper.py
$(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c.in > $@
librecon.c: recon-tmpl.c gen_recon.py
$(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@
librecon.c: recon-tmpl.c.in gen_recon.py
$(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c.in > $@

View File

@ -1,187 +0,0 @@
#pragma fragment PROLOG
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#include <ctype.h>
#endif
#include "glfs.h"
#include "iatt.h"
#include "xlator.h"
#include "fdl.h"
/*
* Returns 0 if the string is ASCII printable *
* and -1 if it's not ASCII printable *
*/
int str_isprint (char *s)
{
int ret = -1;
if (!s)
goto out;
while (s[0] != '\0') {
if (!isprint(s[0]))
goto out;
else
s++;
}
ret = 0;
out:
return ret;
}
#pragma fragment DICT
{
int key_len, data_len;
char *key_ptr;
char *key_val;
printf ("@ARGNAME@ = dict {\n");
for (;;) {
key_len = *((int *)new_meta);
new_meta += sizeof(int);
if (!key_len) {
break;
}
key_ptr = new_meta;
new_meta += key_len;
data_len = *((int *)new_meta);
key_val = new_meta + sizeof(int);
new_meta += sizeof(int) + data_len;
if (str_isprint(key_val))
printf (" %s = <%d bytes>\n",
key_ptr, data_len);
else
printf (" %s = %s <%d bytes>\n",
key_ptr, key_val, data_len);
}
printf ("}\n");
}
#pragma fragment DOUBLE
printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta),
*((uint64_t *)new_meta));
new_meta += sizeof(uint64_t);
#pragma fragment GFID
printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta)));
new_meta += 16;
#pragma fragment INTEGER
printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta),
*((uint32_t *)new_meta));
new_meta += sizeof(uint32_t);
#pragma fragment LOC
printf ("@ARGNAME@ = loc {\n");
printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
new_meta += 16;
printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
new_meta += 16;
if (*(new_meta++)) {
printf (" name = %s\n", new_meta);
new_meta += (strlen(new_meta) + 1);
}
printf ("}\n");
#pragma fragment STRING
if (*(new_meta++)) {
printf ("@ARGNAME@ = %s\n", new_meta);
new_meta += (strlen(new_meta) + 1);
}
#pragma fragment VECTOR
{
size_t len = *((size_t *)new_meta);
new_meta += sizeof(len);
printf ("@ARGNAME@ = <%zu bytes>\n", len);
new_data += len;
}
#pragma fragment IATT
{
ia_prot_t *myprot = ((ia_prot_t *)new_meta);
printf ("@ARGNAME@ = iatt {\n");
printf (" ia_prot = %c%c%c",
myprot->suid ? 'S' : '-',
myprot->sgid ? 'S' : '-',
myprot->sticky ? 'T' : '-');
printf ("%c%c%c",
myprot->owner.read ? 'r' : '-',
myprot->owner.write ? 'w' : '-',
myprot->owner.exec ? 'x' : '-');
printf ("%c%c%c",
myprot->group.read ? 'r' : '-',
myprot->group.write ? 'w' : '-',
myprot->group.exec ? 'x' : '-');
printf ("%c%c%c\n",
myprot->other.read ? 'r' : '-',
myprot->other.write ? 'w' : '-',
myprot->other.exec ? 'x' : '-');
new_meta += sizeof(ia_prot_t);
uint32_t *myints = (uint32_t *)new_meta;
printf (" ia_uid = %u\n", myints[0]);
printf (" ia_gid = %u\n", myints[1]);
printf (" ia_atime = %u.%09u\n", myints[2], myints[3]);
printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]);
new_meta += sizeof(*myints) * 6;
}
#pragma fragment FOP
void
fdl_dump_@NAME@ (char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
/* TBD: word size/endianness */
@FUNCTION_BODY@
*old_meta = new_meta;
*old_data = new_data;
}
#pragma fragment CASE
case GF_FOP_@UPNAME@:
printf ("=== GF_FOP_@UPNAME@\n");
fdl_dump_@NAME@ (&new_meta, &new_data);
break;
#pragma fragment EPILOG
int
fdl_dump (char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
static glfs_t *fs = NULL;
int recognized = 1;
event_header_t *eh;
/*
* We don't really call anything else in GFAPI, but this is the most
* convenient way to satisfy all of the spurious dependencies on how it
* or glusterfsd initialize (e.g. setting up THIS).
*/
if (!fs) {
fs = glfs_new ("dummy");
}
eh = (event_header_t *)new_meta;
new_meta += sizeof (*eh);
/* TBD: check event_type instead of assuming NEW_REQUEST */
switch (eh->fop_type) {
@SWITCH_BODY@
default:
printf ("unknown fop %u\n", eh->fop_type);
recognized = 0;
}
*old_meta = new_meta;
*old_data = new_data;
return recognized;
}

View File

@ -0,0 +1,177 @@
#pragma fragment PROLOG
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#include <ctype.h>
#endif
#include "glfs.h"
#include "iatt.h"
#include "xlator.h"
#include "fdl.h"
/*
* Returns 0 if the string is ASCII printable *
* and -1 if it's not ASCII printable *
*/
int
str_isprint(char *s)
{
int ret = -1;
if (!s)
goto out;
while (s[0] != '\0') {
if (!isprint(s[0]))
goto out;
else
s++;
}
ret = 0;
out:
return ret;
}
#pragma fragment DICT
{
int key_len, data_len;
char *key_ptr;
char *key_val;
printf("@ARGNAME@ = dict {\n");
for (;;) {
key_len = *((int *)new_meta);
new_meta += sizeof(int);
if (!key_len) {
break;
}
key_ptr = new_meta;
new_meta += key_len;
data_len = *((int *)new_meta);
key_val = new_meta + sizeof(int);
new_meta += sizeof(int) + data_len;
if (str_isprint(key_val))
printf(" %s = <%d bytes>\n", key_ptr, data_len);
else
printf(" %s = %s <%d bytes>\n", key_ptr, key_val, data_len);
}
printf("}\n");
}
#pragma fragment DOUBLE
printf("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta),
*((uint64_t *)new_meta));
new_meta += sizeof(uint64_t);
#pragma fragment GFID
printf("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta)));
new_meta += 16;
#pragma fragment INTEGER
printf("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta),
*((uint32_t *)new_meta));
new_meta += sizeof(uint32_t);
#pragma fragment LOC
printf("@ARGNAME@ = loc {\n");
printf(" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
new_meta += 16;
printf(" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
new_meta += 16;
if (*(new_meta++)) {
printf(" name = %s\n", new_meta);
new_meta += (strlen(new_meta) + 1);
}
printf("}\n");
#pragma fragment STRING
if (*(new_meta++)) {
printf("@ARGNAME@ = %s\n", new_meta);
new_meta += (strlen(new_meta) + 1);
}
#pragma fragment VECTOR
{
size_t len = *((size_t *)new_meta);
new_meta += sizeof(len);
printf("@ARGNAME@ = <%zu bytes>\n", len);
new_data += len;
}
#pragma fragment IATT
{
ia_prot_t *myprot = ((ia_prot_t *)new_meta);
printf("@ARGNAME@ = iatt {\n");
printf(" ia_prot = %c%c%c", myprot->suid ? 'S' : '-',
myprot->sgid ? 'S' : '-', myprot->sticky ? 'T' : '-');
printf("%c%c%c", myprot->owner.read ? 'r' : '-',
myprot->owner.write ? 'w' : '-', myprot->owner.exec ? 'x' : '-');
printf("%c%c%c", myprot->group.read ? 'r' : '-',
myprot->group.write ? 'w' : '-', myprot->group.exec ? 'x' : '-');
printf("%c%c%c\n", myprot->other.read ? 'r' : '-',
myprot->other.write ? 'w' : '-', myprot->other.exec ? 'x' : '-');
new_meta += sizeof(ia_prot_t);
uint32_t *myints = (uint32_t *)new_meta;
printf(" ia_uid = %u\n", myints[0]);
printf(" ia_gid = %u\n", myints[1]);
printf(" ia_atime = %u.%09u\n", myints[2], myints[3]);
printf(" ia_mtime = %u.%09u\n", myints[4], myints[5]);
new_meta += sizeof(*myints) * 6;
}
#pragma fragment FOP
void fdl_dump_@NAME@(char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
/* TBD: word size/endianness */
@FUNCTION_BODY@
*old_meta = new_meta;
*old_data = new_data;
}
#pragma fragment CASE
case GF_FOP_@UPNAME@:
printf("=== GF_FOP_@UPNAME@\n");
fdl_dump_@NAME@(&new_meta, &new_data);
break;
#pragma fragment EPILOG
int
fdl_dump(char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
static glfs_t *fs = NULL;
int recognized = 1;
event_header_t *eh;
/*
* We don't really call anything else in GFAPI, but this is the most
* convenient way to satisfy all of the spurious dependencies on how it
* or glusterfsd initialize (e.g. setting up THIS).
*/
if (!fs) {
fs = glfs_new("dummy");
}
eh = (event_header_t *)new_meta;
new_meta += sizeof(*eh);
/* TBD: check event_type instead of assuming NEW_REQUEST */
switch (eh->fop_type) {
@SWITCH_BODY@
default :
printf("unknown fop %u\n", eh->fop_type);
recognized = 0;
}
*old_meta = new_meta;
*old_data = new_data;
return recognized;
}

View File

@ -1,536 +0,0 @@
/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include "call-stub.h"
#include "iatt.h"
#include "defaults.h"
#include "syscall.h"
#include "xlator.h"
#include "fdl.h"
/* TBD: make tunable */
#define META_FILE_SIZE (1 << 20)
#define DATA_FILE_SIZE (1 << 24)
enum gf_fdl {
gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1,
gf_fdl_mt_end
};
typedef struct {
char *type;
off_t size;
char *path;
int fd;
void * ptr;
off_t max_offset;
} log_obj_t;
typedef struct {
struct list_head reqs;
pthread_mutex_t req_lock;
pthread_cond_t req_cond;
char *log_dir;
pthread_t worker;
gf_boolean_t should_stop;
gf_boolean_t change_term;
log_obj_t meta_log;
log_obj_t data_log;
int term;
int first_term;
} fdl_private_t;
int32_t
fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
void
fdl_enqueue (xlator_t *this, call_stub_t *stub)
{
fdl_private_t *priv = this->private;
pthread_mutex_lock (&priv->req_lock);
list_add_tail (&stub->list, &priv->reqs);
pthread_mutex_unlock (&priv->req_lock);
pthread_cond_signal (&priv->req_cond);
}
#pragma generate
char *
fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term)
{
fdl_private_t *priv = this->private;
int ret;
char * ptr = NULL;
/*
* Use .jnl instead of .log so that we don't get test info (mistakenly)
* appended to our journal files.
*/
if (this->ctx->cmd_args.log_ident) {
ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl",
priv->log_dir, this->ctx->cmd_args.log_ident,
obj->type, term);
}
else {
ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl",
priv->log_dir, obj->type, term);
}
if ((ret <= 0) || !obj->path) {
gf_log (this->name, GF_LOG_ERROR,
"failed to construct log-file path");
goto err;
}
gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)",
obj->path, obj->size);
obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666);
if (obj->fd < 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to open log file (%s)", strerror(errno));
goto err;
}
#if !defined(GF_BSD_HOST_OS)
/*
* NetBSD can just go die in a fire. Even though it claims to support
* fallocate/posix_fallocate they don't actually *do* anything so the
* file size remains zero. Then mmap succeeds anyway, but any access
* to the mmap'ed region will segfault. It would be acceptable for
* fallocate to do what it says, for mmap to fail, or for access to
* extend the file. NetBSD managed to hit the trifecta of Getting
* Everything Wrong, and debugging in that environment to get this far
* has already been painful enough (systems I worked on in 1990 were
* better that way). We'll fall through to the lseek/write method, and
* performance will be worse, and TOO BAD.
*/
if (sys_fallocate(obj->fd,0,0,obj->size) < 0)
#endif
{
gf_log (this->name, GF_LOG_WARNING,
"failed to fallocate space for log file");
/* Have to do this the ugly page-faulty way. */
(void) sys_lseek (obj->fd, obj->size-1, SEEK_SET);
(void) sys_write (obj->fd, "", 1);
}
ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0);
if (ptr == MAP_FAILED) {
gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)",
strerror(errno));
goto err;
}
obj->ptr = ptr;
obj->max_offset = 0;
return ptr;
err:
if (obj->fd >= 0) {
sys_close (obj->fd);
obj->fd = (-1);
}
if (obj->path) {
GF_FREE (obj->path);
obj->path = NULL;
}
return ptr;
}
void
fdl_close_term_log (xlator_t *this, log_obj_t *obj)
{
fdl_private_t *priv = this->private;
if (obj->ptr) {
(void) munmap (obj->ptr, obj->size);
obj->ptr = NULL;
}
if (obj->fd >= 0) {
gf_log (this->name, GF_LOG_INFO,
"truncating term %d %s journal to %ld",
priv->term, obj->type, obj->max_offset);
if (sys_ftruncate(obj->fd,obj->max_offset) < 0) {
gf_log (this->name, GF_LOG_WARNING,
"failed to truncate journal (%s)",
strerror(errno));
}
sys_close (obj->fd);
obj->fd = (-1);
}
if (obj->path) {
GF_FREE (obj->path);
obj->path = NULL;
}
}
gf_boolean_t
fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr)
{
fdl_private_t *priv = this->private;
fdl_close_term_log (this, &priv->meta_log);
fdl_close_term_log (this, &priv->data_log);
++(priv->term);
*meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term);
if (!*meta_ptr) {
return _gf_false;
}
*data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term);
if (!*data_ptr) {
return _gf_false;
}
return _gf_true;
}
void *
fdl_worker (void *arg)
{
xlator_t *this = arg;
fdl_private_t *priv = this->private;
call_stub_t *stub;
char * meta_ptr = NULL;
off_t *meta_offset = &priv->meta_log.max_offset;
char * data_ptr = NULL;
off_t *data_offset = &priv->data_log.max_offset;
unsigned long base_as_ul;
void * msync_ptr;
size_t msync_len;
gf_boolean_t recycle;
void *err_label = &&err_unlocked;
priv->meta_log.type = "meta";
priv->meta_log.size = META_FILE_SIZE;
priv->meta_log.path = NULL;
priv->meta_log.fd = (-1);
priv->meta_log.ptr = NULL;
priv->data_log.type = "data";
priv->data_log.size = DATA_FILE_SIZE;
priv->data_log.path = NULL;
priv->data_log.fd = (-1);
priv->data_log.ptr = NULL;
/* TBD: initial term should come from persistent storage (e.g. etcd) */
priv->first_term = ++(priv->term);
meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term);
if (!meta_ptr) {
goto *err_label;
}
data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term);
if (!data_ptr) {
fdl_close_term_log (this, &priv->meta_log);
goto *err_label;
}
for (;;) {
pthread_mutex_lock (&priv->req_lock);
err_label = &&err_locked;
while (list_empty(&priv->reqs)) {
pthread_cond_wait (&priv->req_cond, &priv->req_lock);
if (priv->should_stop) {
goto *err_label;
}
if (priv->change_term) {
if (!fdl_change_term(this, &meta_ptr,
&data_ptr)) {
goto *err_label;
}
priv->change_term = _gf_false;
continue;
}
}
stub = list_entry (priv->reqs.next, call_stub_t, list);
list_del_init (&stub->list);
pthread_mutex_unlock (&priv->req_lock);
err_label = &&err_unlocked;
/*
* TBD: batch requests
*
* What we should do here is gather up *all* of the requests
* that have accumulated since we were last at this point,
* blast them all out in one big writev, and then dispatch them
* all before coming back for more. That maximizes throughput,
* at some cost to latency (due to queuing effects at the log
* stage). Note that we're likely to be above io-threads, so
* the dispatch itself will be parallelized (at further cost to
* latency). For now, we just do the simplest thing and handle
* one request all the way through before fetching the next.
*
* So, why mmap/msync instead of writev/fdatasync? Because it's
* faster. Much faster. So much faster that I half-suspect
* cheating, but it's more convenient for now than having to
* ensure that everything's page-aligned for O_DIRECT (the only
* alternative that still might avoid ridiculous levels of
* local-FS overhead).
*
* TBD: check that msync really does get our data to disk.
*/
gf_log (this->name, GF_LOG_DEBUG,
"logging %u+%u bytes for op %d",
stub->jnl_meta_len, stub->jnl_data_len, stub->fop);
recycle = _gf_false;
if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) {
recycle = _gf_true;
}
if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) {
recycle = _gf_true;
}
if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) {
goto *err_label;
}
meta_ptr = priv->meta_log.ptr;
data_ptr = priv->data_log.ptr;
gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p",
meta_ptr + *meta_offset, data_ptr + *data_offset);
stub->serialize (stub, meta_ptr + *meta_offset,
data_ptr + *data_offset);
if (stub->jnl_meta_len > 0) {
base_as_ul = (unsigned long) (meta_ptr + *meta_offset);
msync_ptr = (void *) (base_as_ul & ~0x0fff);
msync_len = (size_t) (base_as_ul & 0x0fff);
if (msync (msync_ptr, msync_len+stub->jnl_meta_len,
MS_SYNC) < 0) {
gf_log (this->name, GF_LOG_WARNING,
"failed to log request meta (%s)",
strerror(errno));
}
*meta_offset += stub->jnl_meta_len;
}
if (stub->jnl_data_len > 0) {
base_as_ul = (unsigned long) (data_ptr + *data_offset);
msync_ptr = (void *) (base_as_ul & ~0x0fff);
msync_len = (size_t) (base_as_ul & 0x0fff);
if (msync (msync_ptr, msync_len+stub->jnl_data_len,
MS_SYNC) < 0) {
gf_log (this->name, GF_LOG_WARNING,
"failed to log request data (%s)",
strerror(errno));
}
*data_offset += stub->jnl_data_len;
}
call_resume (stub);
}
err_locked:
pthread_mutex_unlock (&priv->req_lock);
err_unlocked:
fdl_close_term_log (this, &priv->meta_log);
fdl_close_term_log (this, &priv->data_log);
return NULL;
}
int32_t
fdl_ipc_continue (call_frame_t *frame, xlator_t *this,
int32_t op, dict_t *xdata)
{
/*
* Nothing to be done here. Just Unwind. *
*/
STACK_UNWIND_STRICT (ipc, frame, 0, 0, xdata);
return 0;
}
int32_t
fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
call_stub_t *stub;
fdl_private_t *priv = this->private;
dict_t *tdict;
int32_t gt_err = EIO;
switch (op) {
case FDL_IPC_CHANGE_TERM:
gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op");
priv->change_term = _gf_true;
pthread_cond_signal (&priv->req_cond);
STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL);
break;
case FDL_IPC_GET_TERMS:
gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op");
tdict = dict_new ();
if (!tdict) {
gt_err = ENOMEM;
goto gt_done;
}
if (dict_set_int32(tdict,"first",priv->first_term) != 0) {
goto gt_done;
}
if (dict_set_int32(tdict,"last",priv->term) != 0) {
goto gt_done;
}
gt_err = 0;
gt_done:
if (gt_err) {
STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL);
} else {
STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict);
}
if (tdict) {
dict_unref (tdict);
}
break;
case FDL_IPC_JBR_SERVER_ROLLBACK:
/*
* In case of a rollback from jbr-server, dump *
* the term and index number in the journal, *
* which will later be used to rollback the fop *
*/
stub = fop_ipc_stub (frame, fdl_ipc_continue,
op, xdata);
fdl_len_ipc (stub);
stub->serialize = fdl_serialize_ipc;
fdl_enqueue (this, stub);
break;
default:
STACK_WIND_TAIL (frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ipc,
op, xdata);
}
return 0;
}
int
fdl_init (xlator_t *this)
{
fdl_private_t *priv = NULL;
priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t);
if (!priv) {
gf_log (this->name, GF_LOG_ERROR,
"failed to allocate fdl_private");
goto err;
}
INIT_LIST_HEAD (&priv->reqs);
if (pthread_mutex_init (&priv->req_lock, NULL) != 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to initialize req_lock");
goto err;
}
if (pthread_cond_init (&priv->req_cond, NULL) != 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to initialize req_cond");
goto err;
}
GF_OPTION_INIT ("log-path", priv->log_dir, path, err);
this->private = priv;
/*
* The rest of the fop table is automatically generated, so this is a
* bit cleaner than messing with the generation to add a hand-written
* exception.
*/
if (gf_thread_create (&priv->worker, NULL, fdl_worker, this,
"fdlwrker") != 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to start fdl_worker");
goto err;
}
return 0;
err:
if (priv) {
GF_FREE(priv);
}
return -1;
}
void
fdl_fini (xlator_t *this)
{
fdl_private_t *priv = this->private;
if (priv) {
priv->should_stop = _gf_true;
pthread_cond_signal (&priv->req_cond);
pthread_join (priv->worker, NULL);
GF_FREE(priv);
}
}
int
fdl_reconfigure (xlator_t *this, dict_t *options)
{
fdl_private_t *priv = this->private;
GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out);
/* TBD: react if it changed */
out:
return 0;
}
int32_t
mem_acct_init (xlator_t *this)
{
int ret = -1;
GF_VALIDATE_OR_GOTO ("fdl", this, out);
ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1);
if (ret != 0) {
gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
"failed");
return ret;
}
out:
return ret;
}
class_methods_t class_methods = {
.init = fdl_init,
.fini = fdl_fini,
.reconfigure = fdl_reconfigure,
.notify = default_notify,
};
struct volume_options options[] = {
{ .key = {"log-path"},
.type = GF_OPTION_TYPE_PATH,
.default_value = DEFAULT_LOG_FILE_DIRECTORY,
.description = "Directory for FDL files."
},
{ .key = {NULL} },
};
struct xlator_cbks cbks = {
.release = default_release,
.releasedir = default_releasedir,
.forget = default_forget,
};

View File

@ -0,0 +1,512 @@
/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include "call-stub.h"
#include "iatt.h"
#include "defaults.h"
#include "syscall.h"
#include "xlator.h"
#include "fdl.h"
/* TBD: make tunable */
#define META_FILE_SIZE (1 << 20)
#define DATA_FILE_SIZE (1 << 24)
enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end };
typedef struct {
char *type;
off_t size;
char *path;
int fd;
void *ptr;
off_t max_offset;
} log_obj_t;
typedef struct {
struct list_head reqs;
pthread_mutex_t req_lock;
pthread_cond_t req_cond;
char *log_dir;
pthread_t worker;
gf_boolean_t should_stop;
gf_boolean_t change_term;
log_obj_t meta_log;
log_obj_t data_log;
int term;
int first_term;
} fdl_private_t;
int32_t
fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
void
fdl_enqueue(xlator_t *this, call_stub_t *stub)
{
fdl_private_t *priv = this->private;
pthread_mutex_lock(&priv->req_lock);
list_add_tail(&stub->list, &priv->reqs);
pthread_mutex_unlock(&priv->req_lock);
pthread_cond_signal(&priv->req_cond);
}
#pragma generate
char *
fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term)
{
fdl_private_t *priv = this->private;
int ret;
char *ptr = NULL;
/*
* Use .jnl instead of .log so that we don't get test info (mistakenly)
* appended to our journal files.
*/
if (this->ctx->cmd_args.log_ident) {
ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir,
this->ctx->cmd_args.log_ident, obj->type, term);
} else {
ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir,
obj->type, term);
}
if ((ret <= 0) || !obj->path) {
gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path");
goto err;
}
gf_log(this->name, GF_LOG_INFO, "opening %s (size %ld)", obj->path,
obj->size);
obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (obj->fd < 0) {
gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)",
strerror(errno));
goto err;
}
#if !defined(GF_BSD_HOST_OS)
/*
* NetBSD can just go die in a fire. Even though it claims to support
* fallocate/posix_fallocate they don't actually *do* anything so the
* file size remains zero. Then mmap succeeds anyway, but any access
* to the mmap'ed region will segfault. It would be acceptable for
* fallocate to do what it says, for mmap to fail, or for access to
* extend the file. NetBSD managed to hit the trifecta of Getting
* Everything Wrong, and debugging in that environment to get this far
* has already been painful enough (systems I worked on in 1990 were
* better that way). We'll fall through to the lseek/write method, and
* performance will be worse, and TOO BAD.
*/
if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0)
#endif
{
gf_log(this->name, GF_LOG_WARNING,
"failed to fallocate space for log file");
/* Have to do this the ugly page-faulty way. */
(void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET);
(void)sys_write(obj->fd, "", 1);
}
ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0);
if (ptr == MAP_FAILED) {
gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)",
strerror(errno));
goto err;
}
obj->ptr = ptr;
obj->max_offset = 0;
return ptr;
err:
if (obj->fd >= 0) {
sys_close(obj->fd);
obj->fd = (-1);
}
if (obj->path) {
GF_FREE(obj->path);
obj->path = NULL;
}
return ptr;
}
void
fdl_close_term_log(xlator_t *this, log_obj_t *obj)
{
fdl_private_t *priv = this->private;
if (obj->ptr) {
(void)munmap(obj->ptr, obj->size);
obj->ptr = NULL;
}
if (obj->fd >= 0) {
gf_log(this->name, GF_LOG_INFO, "truncating term %d %s journal to %ld",
priv->term, obj->type, obj->max_offset);
if (sys_ftruncate(obj->fd, obj->max_offset) < 0) {
gf_log(this->name, GF_LOG_WARNING,
"failed to truncate journal (%s)", strerror(errno));
}
sys_close(obj->fd);
obj->fd = (-1);
}
if (obj->path) {
GF_FREE(obj->path);
obj->path = NULL;
}
}
gf_boolean_t
fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr)
{
fdl_private_t *priv = this->private;
fdl_close_term_log(this, &priv->meta_log);
fdl_close_term_log(this, &priv->data_log);
++(priv->term);
*meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term);
if (!*meta_ptr) {
return _gf_false;
}
*data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term);
if (!*data_ptr) {
return _gf_false;
}
return _gf_true;
}
void *
fdl_worker(void *arg)
{
xlator_t *this = arg;
fdl_private_t *priv = this->private;
call_stub_t *stub;
char *meta_ptr = NULL;
off_t *meta_offset = &priv->meta_log.max_offset;
char *data_ptr = NULL;
off_t *data_offset = &priv->data_log.max_offset;
unsigned long base_as_ul;
void *msync_ptr;
size_t msync_len;
gf_boolean_t recycle;
void *err_label = &&err_unlocked;
priv->meta_log.type = "meta";
priv->meta_log.size = META_FILE_SIZE;
priv->meta_log.path = NULL;
priv->meta_log.fd = (-1);
priv->meta_log.ptr = NULL;
priv->data_log.type = "data";
priv->data_log.size = DATA_FILE_SIZE;
priv->data_log.path = NULL;
priv->data_log.fd = (-1);
priv->data_log.ptr = NULL;
/* TBD: initial term should come from persistent storage (e.g. etcd) */
priv->first_term = ++(priv->term);
meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term);
if (!meta_ptr) {
goto *err_label;
}
data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term);
if (!data_ptr) {
fdl_close_term_log(this, &priv->meta_log);
goto *err_label;
}
for (;;) {
pthread_mutex_lock(&priv->req_lock);
err_label = &&err_locked;
while (list_empty(&priv->reqs)) {
pthread_cond_wait(&priv->req_cond, &priv->req_lock);
if (priv->should_stop) {
goto *err_label;
}
if (priv->change_term) {
if (!fdl_change_term(this, &meta_ptr, &data_ptr)) {
goto *err_label;
}
priv->change_term = _gf_false;
continue;
}
}
stub = list_entry(priv->reqs.next, call_stub_t, list);
list_del_init(&stub->list);
pthread_mutex_unlock(&priv->req_lock);
err_label = &&err_unlocked;
/*
* TBD: batch requests
*
* What we should do here is gather up *all* of the requests
* that have accumulated since we were last at this point,
* blast them all out in one big writev, and then dispatch them
* all before coming back for more. That maximizes throughput,
* at some cost to latency (due to queuing effects at the log
* stage). Note that we're likely to be above io-threads, so
* the dispatch itself will be parallelized (at further cost to
* latency). For now, we just do the simplest thing and handle
* one request all the way through before fetching the next.
*
* So, why mmap/msync instead of writev/fdatasync? Because it's
* faster. Much faster. So much faster that I half-suspect
* cheating, but it's more convenient for now than having to
* ensure that everything's page-aligned for O_DIRECT (the only
* alternative that still might avoid ridiculous levels of
* local-FS overhead).
*
* TBD: check that msync really does get our data to disk.
*/
gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d",
stub->jnl_meta_len, stub->jnl_data_len, stub->fop);
recycle = _gf_false;
if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) {
recycle = _gf_true;
}
if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) {
recycle = _gf_true;
}
if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) {
goto *err_label;
}
meta_ptr = priv->meta_log.ptr;
data_ptr = priv->data_log.ptr;
gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p",
meta_ptr + *meta_offset, data_ptr + *data_offset);
stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset);
if (stub->jnl_meta_len > 0) {
base_as_ul = (unsigned long)(meta_ptr + *meta_offset);
msync_ptr = (void *)(base_as_ul & ~0x0fff);
msync_len = (size_t)(base_as_ul & 0x0fff);
if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) {
gf_log(this->name, GF_LOG_WARNING,
"failed to log request meta (%s)", strerror(errno));
}
*meta_offset += stub->jnl_meta_len;
}
if (stub->jnl_data_len > 0) {
base_as_ul = (unsigned long)(data_ptr + *data_offset);
msync_ptr = (void *)(base_as_ul & ~0x0fff);
msync_len = (size_t)(base_as_ul & 0x0fff);
if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) {
gf_log(this->name, GF_LOG_WARNING,
"failed to log request data (%s)", strerror(errno));
}
*data_offset += stub->jnl_data_len;
}
call_resume(stub);
}
err_locked:
pthread_mutex_unlock(&priv->req_lock);
err_unlocked:
fdl_close_term_log(this, &priv->meta_log);
fdl_close_term_log(this, &priv->data_log);
return NULL;
}
int32_t
fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
/*
* Nothing to be done here. Just Unwind. *
*/
STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata);
return 0;
}
int32_t
fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
call_stub_t *stub;
fdl_private_t *priv = this->private;
dict_t *tdict;
int32_t gt_err = EIO;
switch (op) {
case FDL_IPC_CHANGE_TERM:
gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op");
priv->change_term = _gf_true;
pthread_cond_signal(&priv->req_cond);
STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL);
break;
case FDL_IPC_GET_TERMS:
gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op");
tdict = dict_new();
if (!tdict) {
gt_err = ENOMEM;
goto gt_done;
}
if (dict_set_int32(tdict, "first", priv->first_term) != 0) {
goto gt_done;
}
if (dict_set_int32(tdict, "last", priv->term) != 0) {
goto gt_done;
}
gt_err = 0;
gt_done:
if (gt_err) {
STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL);
} else {
STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict);
}
if (tdict) {
dict_unref(tdict);
}
break;
case FDL_IPC_JBR_SERVER_ROLLBACK:
/*
* In case of a rollback from jbr-server, dump *
* the term and index number in the journal, *
* which will later be used to rollback the fop *
*/
stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata);
fdl_len_ipc(stub);
stub->serialize = fdl_serialize_ipc;
fdl_enqueue(this, stub);
break;
default:
STACK_WIND_TAIL(frame, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ipc, op, xdata);
}
return 0;
}
int
fdl_init(xlator_t *this)
{
fdl_private_t *priv = NULL;
priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t);
if (!priv) {
gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private");
goto err;
}
INIT_LIST_HEAD(&priv->reqs);
if (pthread_mutex_init(&priv->req_lock, NULL) != 0) {
gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock");
goto err;
}
if (pthread_cond_init(&priv->req_cond, NULL) != 0) {
gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond");
goto err;
}
GF_OPTION_INIT("log-path", priv->log_dir, path, err);
this->private = priv;
/*
* The rest of the fop table is automatically generated, so this is a
* bit cleaner than messing with the generation to add a hand-written
* exception.
*/
if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") !=
0) {
gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker");
goto err;
}
return 0;
err:
if (priv) {
GF_FREE(priv);
}
return -1;
}
void
fdl_fini(xlator_t *this)
{
fdl_private_t *priv = this->private;
if (priv) {
priv->should_stop = _gf_true;
pthread_cond_signal(&priv->req_cond);
pthread_join(priv->worker, NULL);
GF_FREE(priv);
}
}
int
fdl_reconfigure(xlator_t *this, dict_t *options)
{
fdl_private_t *priv = this->private;
GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out);
/* TBD: react if it changed */
out:
return 0;
}
int32_t
mem_acct_init(xlator_t *this)
{
int ret = -1;
GF_VALIDATE_OR_GOTO("fdl", this, out);
ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1);
if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR,
"Memory accounting init"
"failed");
return ret;
}
out:
return ret;
}
class_methods_t class_methods = {
.init = fdl_init,
.fini = fdl_fini,
.reconfigure = fdl_reconfigure,
.notify = default_notify,
};
struct volume_options options[] = {
{.key = {"log-path"},
.type = GF_OPTION_TYPE_PATH,
.default_value = DEFAULT_LOG_FILE_DIRECTORY,
.description = "Directory for FDL files."},
{.key = {NULL}},
};
struct xlator_cbks cbks = {
.release = default_release,
.releasedir = default_releasedir,
.forget = default_forget,
};

View File

@ -1,304 +0,0 @@
#pragma fragment PROLOG
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include "glusterfs.h"
#include "iatt.h"
#include "syncop.h"
#include "xlator.h"
#include "glfs-internal.h"
#include "fdl.h"
#define GFAPI_SUCCESS 0
inode_t *
recon_get_inode (glfs_t *fs, uuid_t gfid)
{
inode_t *inode;
loc_t loc = {NULL,};
struct iatt iatt;
int ret;
inode_t *newinode;
inode = inode_find (fs->active_subvol->itable, gfid);
if (inode) {
printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid));
return inode;
}
loc.inode = inode_new (fs->active_subvol->itable);
if (!loc.inode) {
return NULL;
}
gf_uuid_copy (loc.inode->gfid, gfid);
gf_uuid_copy (loc.gfid, gfid);
printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid));
ret = syncop_lookup (fs->active_subvol, &loc, &iatt,
NULL, NULL, NULL);
if (ret != GFAPI_SUCCESS) {
fprintf (stderr, "syncop_lookup failed (%d)\n", ret);
return NULL;
}
newinode = inode_link (loc.inode, NULL, NULL, &iatt);
if (newinode) {
inode_lookup (newinode);
}
return newinode;
}
#pragma fragment DICT
dict_t *@ARGNAME@;
@ARGNAME@ = dict_new();
if (!@ARGNAME@) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
{
int key_len, data_len;
char *key_ptr;
int garbage;
for (;;) {
key_len = *((int *)new_meta);
new_meta += sizeof(int);
if (!key_len) {
break;
}
key_ptr = new_meta;
new_meta += key_len;
data_len = *((int *)new_meta);
new_meta += sizeof(int);
garbage = dict_set_static_bin (@ARGNAME@, key_ptr,
new_meta, data_len);
/* TBD: check error from dict_set_static_bin */
(void)garbage;
new_meta += data_len;
}
}
#pragma fragment DICT_CLEANUP
cleanup_@ARGNAME@:
dict_unref (@ARGNAME@);
#pragma fragment DOUBLE
@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
new_meta += sizeof(uint64_t);
#pragma fragment FD
inode_t *@ARGNAME@_ino;
fd_t *@ARGNAME@;
@ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta));
new_meta += 16;
if (!@ARGNAME@_ino) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@_ino;
@ARGNAME@ = fd_anonymous (@ARGNAME@_ino);
if (!@ARGNAME@) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
#pragma fragment FD_CLEANUP
cleanup_@ARGNAME@:
fd_unref (@ARGNAME@);
cleanup_@ARGNAME@_ino:
inode_unref (@ARGNAME@_ino);
#pragma fragment NEW_FD
/*
* This pseudo-type is only used for create, and in that case we know
* we'll be using loc.inode, so it's not worth generalizing to take an
* extra argument.
*/
fd_t *@ARGNAME@ = fd_anonymous (loc.inode);
if (!fd) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
new_meta += 16;
#pragma fragment NEW_FD_CLEANUP
cleanup_@ARGNAME@:
fd_unref (@ARGNAME@);
#pragma fragment INTEGER
@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
new_meta += sizeof(@ARGTYPE@);
#pragma fragment LOC
loc_t @ARGNAME@ = { NULL, };
@ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta));
if (!@ARGNAME@.inode) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid);
new_meta += 16;
new_meta += 16; /* skip over pargfid */
if (*(new_meta++)) {
@ARGNAME@.name = new_meta;
new_meta += strlen(new_meta) + 1;
}
#pragma fragment LOC_CLEANUP
cleanup_@ARGNAME@:
loc_wipe (&@ARGNAME@);
#pragma fragment PARENT_LOC
loc_t @ARGNAME@ = { NULL, };
new_meta += 16; /* skip over gfid */
@ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta));
if (!@ARGNAME@.parent) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid);
new_meta += 16;
if (!*(new_meta++)) {
goto *err_label;
}
@ARGNAME@.name = new_meta;
new_meta += strlen(new_meta) + 1;
@ARGNAME@.inode = inode_new (fs->active_subvol->itable);
if (!@ARGNAME@.inode) {
goto *err_label;
}
#pragma fragment PARENT_LOC_CLEANUP
cleanup_@ARGNAME@:
loc_wipe (&@ARGNAME@);
#pragma fragment STRING
char *@ARGNAME@;
if (*(new_meta++)) {
@ARGNAME@ = new_meta;
new_meta += (strlen(new_meta) + 1);
}
else {
goto *err_label;
}
#pragma fragment VECTOR
struct iovec @ARGNAME@;
@ARGNAME@.iov_len = *((size_t *)new_meta);
new_meta += sizeof(@ARGNAME@.iov_len);
@ARGNAME@.iov_base = new_data;
new_data += @ARGNAME@.iov_len;
#pragma fragment IATT
struct iatt @ARGNAME@;
{
@ARGNAME@.ia_prot = *((ia_prot_t *)new_meta);
new_meta += sizeof(ia_prot_t);
uint32_t *myints = (uint32_t *)new_meta;
@ARGNAME@.ia_uid = myints[0];
@ARGNAME@.ia_gid = myints[1];
@ARGNAME@.ia_atime = myints[2];
@ARGNAME@.ia_atime_nsec = myints[3];
@ARGNAME@.ia_mtime = myints[4];
@ARGNAME@.ia_mtime_nsec = myints[5];
new_meta += sizeof(*myints) * 6;
}
#pragma fragment IOBREF
struct iobref *@ARGNAME@;
@ARGNAME@ = iobref_new();
if (!@ARGNAME@) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
#pragma fragment IOBREF_CLEANUP
cleanup_@ARGNAME@:
iobref_unref (@ARGNAME@);
#pragma fragment LINK
/* TBD: check error */
inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@);
if (new_inode) {
inode_lookup (new_inode);
}
#pragma fragment FOP
int
fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
int ret;
int status = 0xbad;
void *err_label = &&done;
@FUNCTION_BODY@
ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL);
if (ret != @SUCCESS_VALUE@) {
fprintf (stderr, "syncop_@NAME@ returned %d", ret);
goto *err_label;
}
@LINKS@
status = 0;
@CLEANUPS@
done:
*old_meta = new_meta;
*old_data = new_data;
return status;
}
#pragma fragment CASE
case GF_FOP_@UPNAME@:
printf ("=== GF_FOP_@UPNAME@\n");
if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) {
goto done;
}
recognized = 1;
break;
#pragma fragment EPILOG
int
recon_execute (glfs_t *fs, char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
int recognized = 0;
event_header_t *eh;
eh = (event_header_t *)new_meta;
new_meta += sizeof (*eh);
/* TBD: check event_type instead of assuming NEW_REQUEST */
switch (eh->fop_type) {
@SWITCH_BODY@
default:
printf ("unknown fop %u\n", eh->fop_type);
}
done:
*old_meta = new_meta;
*old_data = new_data;
return recognized;
}

View File

@ -0,0 +1,297 @@
#pragma fragment PROLOG
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include "glusterfs.h"
#include "iatt.h"
#include "syncop.h"
#include "xlator.h"
#include "glfs-internal.h"
#include "fdl.h"
#define GFAPI_SUCCESS 0
inode_t *
recon_get_inode(glfs_t *fs, uuid_t gfid)
{
inode_t *inode;
loc_t loc = {
NULL,
};
struct iatt iatt;
int ret;
inode_t *newinode;
inode = inode_find(fs->active_subvol->itable, gfid);
if (inode) {
printf("=== FOUND %s IN TABLE\n", uuid_utoa(gfid));
return inode;
}
loc.inode = inode_new(fs->active_subvol->itable);
if (!loc.inode) {
return NULL;
}
gf_uuid_copy(loc.inode->gfid, gfid);
gf_uuid_copy(loc.gfid, gfid);
printf("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid));
ret = syncop_lookup(fs->active_subvol, &loc, &iatt, NULL, NULL, NULL);
if (ret != GFAPI_SUCCESS) {
fprintf(stderr, "syncop_lookup failed (%d)\n", ret);
return NULL;
}
newinode = inode_link(loc.inode, NULL, NULL, &iatt);
if (newinode) {
inode_lookup(newinode);
}
return newinode;
}
#pragma fragment DICT
dict_t *@ARGNAME@;
@ARGNAME@ = dict_new();
if (!@ARGNAME@) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
{
int key_len, data_len;
char *key_ptr;
int garbage;
for (;;) {
key_len = *((int *)new_meta);
new_meta += sizeof(int);
if (!key_len) {
break;
}
key_ptr = new_meta;
new_meta += key_len;
data_len = *((int *)new_meta);
new_meta += sizeof(int);
garbage = dict_set_static_bin(@ARGNAME@, key_ptr, new_meta, data_len);
/* TBD: check error from dict_set_static_bin */
(void)garbage;
new_meta += data_len;
}
}
#pragma fragment DICT_CLEANUP
cleanup_@ARGNAME@ : dict_unref(@ARGNAME@);
#pragma fragment DOUBLE
@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
new_meta += sizeof(uint64_t);
#pragma fragment FD
inode_t *@ARGNAME@_ino;
fd_t *@ARGNAME@;
@ARGNAME@_ino = recon_get_inode(fs, *((uuid_t *)new_meta));
new_meta += 16;
if (!@ARGNAME@_ino) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@_ino;
@ARGNAME@ = fd_anonymous(@ARGNAME@_ino);
if (!@ARGNAME@) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
#pragma fragment FD_CLEANUP
cleanup_@ARGNAME@ : fd_unref(@ARGNAME@);
cleanup_@ARGNAME@_ino : inode_unref(@ARGNAME@_ino);
#pragma fragment NEW_FD
/*
* This pseudo-type is only used for create, and in that case we know
* we'll be using loc.inode, so it's not worth generalizing to take an
* extra argument.
*/
fd_t *@ARGNAME@ = fd_anonymous(loc.inode);
if (!fd) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
new_meta += 16;
#pragma fragment NEW_FD_CLEANUP
cleanup_@ARGNAME@ : fd_unref(@ARGNAME@);
#pragma fragment INTEGER
@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
new_meta += sizeof(@ARGTYPE@);
#pragma fragment LOC
loc_t @ARGNAME@ = {
NULL,
};
@ARGNAME@.inode = recon_get_inode(fs, *((uuid_t *)new_meta));
if (!@ARGNAME@.inode) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
gf_uuid_copy(@ARGNAME@.gfid, @ARGNAME@.inode->gfid);
new_meta += 16;
new_meta += 16; /* skip over pargfid */
if (*(new_meta++)) {
@ARGNAME@.name = new_meta;
new_meta += strlen(new_meta) + 1;
}
#pragma fragment LOC_CLEANUP
cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@);
#pragma fragment PARENT_LOC
loc_t @ARGNAME@ = {
NULL,
};
new_meta += 16; /* skip over gfid */
@ARGNAME@.parent = recon_get_inode(fs, *((uuid_t *)new_meta));
if (!@ARGNAME@.parent) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
gf_uuid_copy(@ARGNAME@.pargfid, @ARGNAME@.parent->gfid);
new_meta += 16;
if (!*(new_meta++)) {
goto *err_label;
}
@ARGNAME@.name = new_meta;
new_meta += strlen(new_meta) + 1;
@ARGNAME@.inode = inode_new(fs->active_subvol->itable);
if (!@ARGNAME@.inode) {
goto *err_label;
}
#pragma fragment PARENT_LOC_CLEANUP
cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@);
#pragma fragment STRING
char *@ARGNAME@;
if (*(new_meta++)) {
@ARGNAME@ = new_meta;
new_meta += (strlen(new_meta) + 1);
} else {
goto *err_label;
}
#pragma fragment VECTOR
struct iovec @ARGNAME@;
@ARGNAME@.iov_len = *((size_t *)new_meta);
new_meta += sizeof(@ARGNAME@.iov_len);
@ARGNAME@.iov_base = new_data;
new_data += @ARGNAME@.iov_len;
#pragma fragment IATT
struct iatt @ARGNAME@;
{
@ARGNAME@.ia_prot = *((ia_prot_t *)new_meta);
new_meta += sizeof(ia_prot_t);
uint32_t *myints = (uint32_t *)new_meta;
@ARGNAME@.ia_uid = myints[0];
@ARGNAME@.ia_gid = myints[1];
@ARGNAME@.ia_atime = myints[2];
@ARGNAME@.ia_atime_nsec = myints[3];
@ARGNAME@.ia_mtime = myints[4];
@ARGNAME@.ia_mtime_nsec = myints[5];
new_meta += sizeof(*myints) * 6;
}
#pragma fragment IOBREF
struct iobref *@ARGNAME@;
@ARGNAME@ = iobref_new();
if (!@ARGNAME@) {
goto *err_label;
}
err_label = &&cleanup_@ARGNAME@;
#pragma fragment IOBREF_CLEANUP
cleanup_@ARGNAME@ : iobref_unref(@ARGNAME@);
#pragma fragment LINK
/* TBD: check error */
inode_t *new_inode = inode_link(@INODE_ARG@, NULL, NULL, @IATT_ARG@);
if (new_inode) {
inode_lookup(new_inode);
}
#pragma fragment FOP
int fdl_replay_@NAME@(glfs_t *fs, char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
int ret;
int status = 0xbad;
void *err_label = &&done;
@FUNCTION_BODY@
ret = syncop_@NAME@(fs->active_subvol, @SYNCOP_ARGS@, NULL);
if (ret !=@SUCCESS_VALUE@) {
fprintf(stderr, "syncop_@NAME@ returned %d", ret);
goto *err_label;
}
@LINKS@
status = 0;
@CLEANUPS@
done : *old_meta = new_meta;
*old_data = new_data;
return status;
}
#pragma fragment CASE
case GF_FOP_@UPNAME@:
printf("=== GF_FOP_@UPNAME@\n");
if (fdl_replay_@NAME@(fs, &new_meta, &new_data) != 0) {
goto done;
}
recognized = 1;
break;
#pragma fragment EPILOG
int
recon_execute(glfs_t *fs, char **old_meta, char **old_data)
{
char *new_meta = *old_meta;
char *new_data = *old_data;
int recognized = 0;
event_header_t *eh;
eh = (event_header_t *)new_meta;
new_meta += sizeof(*eh);
/* TBD: check event_type instead of assuming NEW_REQUEST */
switch (eh->fop_type) {
@SWITCH_BODY@
default : printf("unknown fop %u\n", eh->fop_type);
}
done:
*old_meta = new_meta;
*old_data = new_data;
return recognized;
}

View File

@ -22,7 +22,7 @@ AM_CFLAGS = -Wall $(GF_CFLAGS)
JBRC_PREFIX = $(top_srcdir)/xlators/experimental/jbr-client/src
JBRC_GEN_FOPS = $(JBRC_PREFIX)/gen-fops.py
JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c
JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c.in
JBRC_WRAPPER = $(JBRC_PREFIX)/jbrc.c
noinst_PYTHON = $(JBRC_GEN_FOPS)
EXTRA_DIST = $(JBRC_TEMPLATES) $(JBRC_WRAPPER)

View File

@ -1,113 +0,0 @@
/* template-name fop */
int32_t
jbrc_@NAME@ (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
jbrc_local_t *local = NULL;
xlator_t *target_xl = ACTIVE_CHILD(this);
local = mem_get(this->local_pool);
if (!local) {
goto err;
}
local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue,
@SHORT_ARGS@);
if (!local->stub) {
goto err;
}
local->curr_xl = target_xl;
local->scars = 0;
frame->local = local;
STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl,
target_xl, target_xl->fops->@NAME@,
@SHORT_ARGS@);
return 0;
err:
if (local) {
mem_put(local);
}
STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM,
@ERROR_ARGS@);
return 0;
}
/* template-name cbk */
int32_t
jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@LONG_ARGS@)
{
jbrc_local_t *local = frame->local;
xlator_t *last_xl = cookie;
xlator_t *next_xl;
jbrc_private_t *priv = this->private;
struct timespec spec;
if (op_ret != (-1)) {
if (local->scars) {
gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG,
HILITE("retried %p OK"), frame->local);
}
priv->active = last_xl;
goto unwind;
}
if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) {
goto unwind;
}
/* TBD: get leader ID from xdata? */
next_xl = next_xlator(this, last_xl);
/*
* We can't just give up after we've tried all bricks, because it's
* quite likely that a new leader election just hasn't finished yet.
* We also shouldn't retry endlessly, and especially not at a high
* rate, but that's good enough while we work on other things.
*
* TBD: implement slow/finite retry via a worker thread
*/
if (!next_xl || (local->scars >= SCAR_LIMIT)) {
gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG,
HILITE("ran out of retries for %p"), frame->local);
goto unwind;
}
local->curr_xl = next_xl;
local->scars += 1;
spec.tv_sec = 1;
spec.tv_nsec = 0;
/*
* WARNING
*
* Just calling gf_timer_call_after like this leaves open the
* possibility that writes will get reordered, if a first write is
* rescheduled and then a second comes along to find an updated
* priv->active before the first actually executes. We might need to
* implement a stricter (and more complicated) queuing mechanism to
* ensure absolute consistency in this case.
*/
if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) {
return 0;
}
unwind:
call_stub_destroy(local->stub);
STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
@SHORT_ARGS@);
return 0;
}
/* template-name cont-func */
int32_t
jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
jbrc_local_t *local = frame->local;
STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl,
local->curr_xl, local->curr_xl->fops->@NAME@,
@SHORT_ARGS@);
return 0;
}

View File

@ -0,0 +1,102 @@
/* template-name fop */
int32_t jbrc_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
{
jbrc_local_t *local = NULL;
xlator_t *target_xl = ACTIVE_CHILD(this);
local = mem_get(this->local_pool);
if (!local) {
goto err;
}
local->stub = fop_@NAME@_stub(frame, jbrc_@NAME@_continue, @SHORT_ARGS@);
if (!local->stub) {
goto err;
}
local->curr_xl = target_xl;
local->scars = 0;
frame->local = local;
STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, target_xl, target_xl,
target_xl->fops->@NAME@, @SHORT_ARGS@);
return 0;
err:
if (local) {
mem_put(local);
}
STACK_UNWIND_STRICT(@NAME@, frame, -1, ENOMEM, @ERROR_ARGS@);
return 0;
}
/* template-name cbk */
int32_t jbrc_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
{
jbrc_local_t *local = frame->local;
xlator_t *last_xl = cookie;
xlator_t *next_xl;
jbrc_private_t *priv = this->private;
struct timespec spec;
if (op_ret != (-1)) {
if (local->scars) {
gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG,
HILITE("retried %p OK"), frame->local);
}
priv->active = last_xl;
goto unwind;
}
if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) {
goto unwind;
}
/* TBD: get leader ID from xdata? */
next_xl = next_xlator(this, last_xl);
/*
* We can't just give up after we've tried all bricks, because it's
* quite likely that a new leader election just hasn't finished yet.
* We also shouldn't retry endlessly, and especially not at a high
* rate, but that's good enough while we work on other things.
*
* TBD: implement slow/finite retry via a worker thread
*/
if (!next_xl || (local->scars >= SCAR_LIMIT)) {
gf_msg(this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG,
HILITE("ran out of retries for %p"), frame->local);
goto unwind;
}
local->curr_xl = next_xl;
local->scars += 1;
spec.tv_sec = 1;
spec.tv_nsec = 0;
/*
* WARNING
*
* Just calling gf_timer_call_after like this leaves open the
* possibility that writes will get reordered, if a first write is
* rescheduled and then a second comes along to find an updated
* priv->active before the first actually executes. We might need to
* implement a stricter (and more complicated) queuing mechanism to
* ensure absolute consistency in this case.
*/
if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) {
return 0;
}
unwind:
call_stub_destroy(local->stub);
STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
return 0;
}
/* template-name cont-func */
int32_t jbrc_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
{
jbrc_local_t *local = frame->local;
STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, local->curr_xl, local->curr_xl,
local->curr_xl->fops->@NAME@, @SHORT_ARGS@);
return 0;
}

View File

@ -27,7 +27,7 @@ AM_CFLAGS = -Wall $(GF_CFLAGS)
JBR_PREFIX = $(top_srcdir)/xlators/experimental/jbr-server/src
JBR_GEN_FOPS = $(JBR_PREFIX)/gen-fops.py
JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c
JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c.in
JBR_WRAPPER = $(JBR_PREFIX)/jbr.c
noinst_PYTHON = $(JBR_GEN_FOPS)
EXTRA_DIST = $(JBR_TEMPLATES) $(JBR_WRAPPER)

View File

@ -1,542 +0,0 @@
/*
* You can put anything here - it doesn't even have to be a comment - and it
* will be ignored until we reach the first template-name comment.
*/
/* template-name read-fop */
int32_t
jbr_@NAME@ (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
jbr_private_t *priv = NULL;
gf_boolean_t in_recon = _gf_false;
int32_t op_errno = 0;
int32_t recon_term, recon_index;
GF_VALIDATE_OR_GOTO ("jbr", this, err);
priv = this->private;
GF_VALIDATE_OR_GOTO (this->name, priv, err);
GF_VALIDATE_OR_GOTO (this->name, frame, err);
op_errno = EREMOTE;
/* allow reads during reconciliation *
* TBD: allow "dirty" reads on non-leaders *
*/
if (xdata &&
(dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) &&
(dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) {
in_recon = _gf_true;
}
if ((!priv->leader) && (in_recon == _gf_false)) {
goto err;
}
STACK_WIND (frame, default_@NAME@_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
@SHORT_ARGS@);
return 0;
err:
STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno,
@ERROR_ARGS@);
return 0;
}
/* template-name read-perform_local_op */
/* No "perform_local_op" function needed for @NAME@ */
/* template-name read-dispatch */
/* No "dispatch" function needed for @NAME@ */
/* template-name read-call_dispatch */
/* No "call_dispatch" function needed for @NAME@ */
/* template-name read-fan-in */
/* No "fan-in" function needed for @NAME@ */
/* template-name read-continue */
/* No "continue" function needed for @NAME@ */
/* template-name read-complete */
/* No "complete" function needed for @NAME@ */
/* template-name write-fop */
int32_t
jbr_@NAME@ (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
jbr_local_t *local = NULL;
jbr_private_t *priv = NULL;
int32_t ret = -1;
int op_errno = ENOMEM;
GF_VALIDATE_OR_GOTO ("jbr", this, err);
priv = this->private;
GF_VALIDATE_OR_GOTO (this->name, priv, err);
GF_VALIDATE_OR_GOTO (this->name, frame, err);
#if defined(JBR_CG_NEED_FD)
ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, fd);
#else
ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, NULL);
#endif
if (ret)
goto err;
local = frame->local;
/*
* If we let it through despite not being the leader, then we just want
* to pass it on down without all of the additional xattrs, queuing, and
* so on. However, jbr_*_complete does depend on the initialization
* immediately above this.
*/
if (!priv->leader) {
STACK_WIND (frame, jbr_@NAME@_complete,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
@SHORT_ARGS@);
return 0;
}
ret = jbr_initialize_xdata_set_attrs (this, &xdata);
if (ret)
goto err;
local->xdata = dict_ref(xdata);
local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue,
@SHORT_ARGS@);
if (!local->stub) {
goto err;
}
/*
* Can be used to just call_dispatch or be customised per fop to *
* perform ops specific to that particular fop. *
*/
ret = jbr_@NAME@_perform_local_op (frame, this, &op_errno,
@SHORT_ARGS@);
if (ret)
goto err;
return ret;
err:
if (local) {
if (local->stub) {
call_stub_destroy(local->stub);
}
if (local->qstub) {
call_stub_destroy(local->qstub);
}
if (local->fd) {
fd_unref(local->fd);
}
mem_put(local);
}
STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno,
@ERROR_ARGS@);
return 0;
}
/* template-name write-perform_local_op */
int32_t
jbr_@NAME@_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno,
@LONG_ARGS@)
{
int32_t ret = -1;
GF_VALIDATE_OR_GOTO ("jbr", this, out);
GF_VALIDATE_OR_GOTO (this->name, frame, out);
GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
ret = jbr_@NAME@_call_dispatch (frame, this, op_errno,
@SHORT_ARGS@);
out:
return ret;
}
/* template-name write-call_dispatch */
int32_t
jbr_@NAME@_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno,
@LONG_ARGS@)
{
jbr_local_t *local = NULL;
jbr_private_t *priv = NULL;
int32_t ret = -1;
GF_VALIDATE_OR_GOTO ("jbr", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO (this->name, priv, out);
GF_VALIDATE_OR_GOTO (this->name, frame, out);
local = frame->local;
GF_VALIDATE_OR_GOTO (this->name, local, out);
GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
#if defined(JBR_CG_QUEUE)
jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
if (!ictx) {
*op_errno = EIO;
goto out;
}
LOCK(&ictx->lock);
if (ictx->active) {
gf_msg_debug (this->name, 0,
"queuing request due to conflict");
/*
* TBD: enqueue only for real conflict
*
* Currently we just act like all writes are in
* conflict with one another. What we should really do
* is check the active/pending queues and defer only if
* there's a conflict there.
*
* It's important to check the pending queue because we
* might have an active request X which conflicts with
* a pending request Y, and this request Z might
* conflict with Y but not X. If we checked only the
* active queue then Z could jump ahead of Y, which
* would be incorrect.
*/
local->qstub = fop_@NAME@_stub (frame,
jbr_@NAME@_dispatch,
@SHORT_ARGS@);
if (!local->qstub) {
UNLOCK(&ictx->lock);
goto out;
}
list_add_tail(&local->qlinks, &ictx->pqueue);
++(ictx->pending);
UNLOCK(&ictx->lock);
ret = 0;
goto out;
} else {
list_add_tail(&local->qlinks, &ictx->aqueue);
++(ictx->active);
}
UNLOCK(&ictx->lock);
#endif
ret = jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@);
out:
return ret;
}
/* template-name write-dispatch */
int32_t
jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
jbr_local_t *local = NULL;
jbr_private_t *priv = NULL;
int32_t ret = -1;
xlator_list_t *trav;
GF_VALIDATE_OR_GOTO ("jbr", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO (this->name, priv, out);
GF_VALIDATE_OR_GOTO (this->name, frame, out);
local = frame->local;
GF_VALIDATE_OR_GOTO (this->name, local, out);
/*
* TBD: unblock pending request(s) if we fail after this point but
* before we get to jbr_@NAME@_complete (where that code currently
* resides).
*/
local->call_count = priv->n_children - 1;
for (trav = this->children->next; trav; trav = trav->next) {
STACK_WIND (frame, jbr_@NAME@_fan_in,
trav->xlator, trav->xlator->fops->@NAME@,
@SHORT_ARGS@);
}
/* TBD: variable Issue count */
ret = 0;
out:
return ret;
}
/* template-name write-fan-in */
int32_t
jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@LONG_ARGS@)
{
jbr_local_t *local = NULL;
int32_t ret = -1;
uint8_t call_count;
GF_VALIDATE_OR_GOTO ("jbr", this, out);
GF_VALIDATE_OR_GOTO (this->name, frame, out);
local = frame->local;
GF_VALIDATE_OR_GOTO (this->name, local, out);
gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n",
op_ret, op_errno);
LOCK(&frame->lock);
call_count = --(local->call_count);
if (op_ret != -1) {
/* Increment the number of successful acks *
* received for the operation. *
*/
(local->successful_acks)++;
local->successful_op_ret = op_ret;
}
gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
op_ret, op_errno, local->successful_acks);
UNLOCK(&frame->lock);
/* TBD: variable Completion count */
if (call_count == 0) {
call_resume(local->stub);
}
ret = 0;
out:
return ret;
}
/* template-name write-continue */
int32_t
jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
int32_t ret = -1;
gf_boolean_t result = _gf_false;
jbr_local_t *local = NULL;
jbr_local_t *new_local = NULL;
jbr_private_t *priv = NULL;
int32_t op_errno = 0;
GF_VALIDATE_OR_GOTO ("jbr", this, out);
GF_VALIDATE_OR_GOTO (this->name, frame, out);
priv = this->private;
local = frame->local;
GF_VALIDATE_OR_GOTO (this->name, priv, out);
GF_VALIDATE_OR_GOTO (this->name, local, out);
/* Perform quorum check to see if the leader needs *
* to perform the operation. If the operation will not *
* meet quorum irrespective of the leader's result *
* there is no point in the leader performing the fop *
*/
result = fop_quorum_check (this, (double)priv->n_children,
(double)local->successful_acks + 1);
if (result == _gf_false) {
gf_msg (this->name, GF_LOG_ERROR, EROFS,
J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks "
"to meet quorum. Failing the operation without trying "
"it on the leader.");
#if defined(JBR_CG_QUEUE)
/*
* In case of a fop failure, before unwinding need to *
* remove it from queue *
*/
ret = jbr_remove_from_queue (frame, this);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
J_MSG_GENERIC, "Failed to remove from queue.");
}
#endif
/*
* In this case, the quorum is not met on the followers *
* So the operation will not be performed on the leader *
* and a rollback will be sent via GF_FOP_IPC to all the *
* followers, where this particular fop's term and index *
* numbers will be journaled, and later used to rollback *
*/
call_frame_t *new_frame;
new_frame = copy_frame (frame);
if (new_frame) {
new_local = mem_get0(this->local_pool);
if (new_local) {
INIT_LIST_HEAD(&new_local->qlinks);
ret = dict_set_int32 (local->xdata,
"rollback-fop",
GF_FOP_@UPNAME@);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
J_MSG_DICT_FLR,
"failed to set rollback-fop");
} else {
new_local->xdata = dict_ref(local->xdata);
new_frame->local = new_local;
jbr_ipc_call_dispatch (new_frame,
this, &op_errno,
FDL_IPC_JBR_SERVER_ROLLBACK,
new_local->xdata);
}
} else {
gf_log (this->name, GF_LOG_WARNING,
"Could not create local for new_frame");
}
} else {
gf_log (this->name, GF_LOG_WARNING,
"Could not send rollback ipc");
}
STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS,
@ERROR_ARGS@);
} else {
STACK_WIND (frame, jbr_@NAME@_complete,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
@SHORT_ARGS@);
}
out:
return 0;
}
/* template-name write-complete */
int32_t
jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@LONG_ARGS@)
{
int32_t ret = -1;
gf_boolean_t result = _gf_false;
jbr_private_t *priv = NULL;
jbr_local_t *local = NULL;
jbr_local_t *new_local = NULL;
GF_VALIDATE_OR_GOTO ("jbr", this, err);
GF_VALIDATE_OR_GOTO (this->name, frame, err);
priv = this->private;
local = frame->local;
GF_VALIDATE_OR_GOTO (this->name, priv, err);
GF_VALIDATE_OR_GOTO (this->name, local, err);
/* If the fop failed on the leader, then reduce one successful ack
* before calculating the fop quorum
*/
LOCK(&frame->lock);
if (op_ret == -1)
(local->successful_acks)--;
UNLOCK(&frame->lock);
#if defined(JBR_CG_QUEUE)
ret = jbr_remove_from_queue (frame, this);
if (ret)
goto err;
#endif
#if defined(JBR_CG_FSYNC)
jbr_mark_fd_dirty(this, local);
#endif
#if defined(JBR_CG_NEED_FD)
fd_unref(local->fd);
#endif
/* After the leader completes the fop, a quorum check is *
* performed, taking into account the outcome of the fop *
* on the leader. Irrespective of the fop being successful *
* or failing on the leader, the result of the quorum will *
* determine if the overall fop is successful or not. For *
* example, a fop might have succeeded on every node except *
* the leader, in which case as quorum is being met, the fop *
* will be treated as a successful fop, even though it failed *
* on the leader. On follower nodes, no quorum check should *
* be done, and the result is returned to the leader as is. *
*/
if (priv->leader) {
result = fop_quorum_check (this, (double)priv->n_children,
(double)local->successful_acks + 1);
if (result == _gf_false) {
op_ret = -1;
op_errno = EROFS;
gf_msg (this->name, GF_LOG_ERROR, EROFS,
J_MSG_QUORUM_NOT_MET, "Quorum is not met. "
"The operation has failed.");
/*
* In this case, the quorum is not met after the *
* operation is performed on the leader. Hence a *
* rollback will be sent via GF_FOP_IPC to the leader *
* where this particular fop's term and index numbers *
* will be journaled, and later used to rollback. *
* The same will be done on all the followers *
*/
call_frame_t *new_frame;
new_frame = copy_frame (frame);
if (new_frame) {
new_local = mem_get0(this->local_pool);
if (new_local) {
INIT_LIST_HEAD(&new_local->qlinks);
gf_msg (this->name, GF_LOG_ERROR, 0,
J_MSG_DICT_FLR, "op = %d",
new_frame->op);
ret = dict_set_int32 (local->xdata,
"rollback-fop",
GF_FOP_@UPNAME@);
if (ret) {
gf_msg (this->name,
GF_LOG_ERROR, 0,
J_MSG_DICT_FLR,
"failed to set "
"rollback-fop");
} else {
new_local->xdata = dict_ref (local->xdata);
new_frame->local = new_local;
/*
* Calling STACK_WIND instead *
* of jbr_ipc as it will not *
* unwind to the previous *
* translators like it will *
* in case of jbr_ipc. *
*/
STACK_WIND (new_frame,
jbr_ipc_complete,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ipc,
FDL_IPC_JBR_SERVER_ROLLBACK,
new_local->xdata);
}
} else {
gf_log (this->name, GF_LOG_WARNING,
"Could not create local "
"for new_frame");
}
} else {
gf_log (this->name, GF_LOG_WARNING,
"Could not send rollback ipc");
}
} else {
#if defined(JBR_CG_NEED_FD)
op_ret = local->successful_op_ret;
#else
op_ret = 0;
#endif
op_errno = 0;
gf_msg_debug (this->name, 0,
"Quorum has met. The operation has succeeded.");
}
}
/*
* Unrefing the reference taken in jbr_@NAME@ () *
*/
dict_unref (local->xdata);
STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
@SHORT_ARGS@);
return 0;
err:
STACK_UNWIND_STRICT (@NAME@, frame, -1, 0,
@SHORT_ARGS@);
return 0;
}

View File

@ -0,0 +1,501 @@
/*
* You can put anything here - it doesn't even have to be a comment - and it
* will be ignored until we reach the first template-name comment.
*/
/* template-name read-fop */
int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
{
jbr_private_t *priv = NULL;
gf_boolean_t in_recon = _gf_false;
int32_t op_errno = 0;
int32_t recon_term, recon_index;
GF_VALIDATE_OR_GOTO("jbr", this, err);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, err);
GF_VALIDATE_OR_GOTO(this->name, frame, err);
op_errno = EREMOTE;
/* allow reads during reconciliation *
* TBD: allow "dirty" reads on non-leaders *
*/
if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) &&
(dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) {
in_recon = _gf_true;
}
if ((!priv->leader) && (in_recon == _gf_false)) {
goto err;
}
STACK_WIND(frame, default_@NAME@_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
return 0;
err:
STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
return 0;
}
/* template-name read-perform_local_op */
/* No "perform_local_op" function needed for @NAME@ */
/* template-name read-dispatch */
/* No "dispatch" function needed for @NAME@ */
/* template-name read-call_dispatch */
/* No "call_dispatch" function needed for @NAME@ */
/* template-name read-fan-in */
/* No "fan-in" function needed for @NAME@ */
/* template-name read-continue */
/* No "continue" function needed for @NAME@ */
/* template-name read-complete */
/* No "complete" function needed for @NAME@ */
/* template-name write-fop */
int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
{
jbr_local_t *local = NULL;
jbr_private_t *priv = NULL;
int32_t ret = -1;
int op_errno = ENOMEM;
GF_VALIDATE_OR_GOTO("jbr", this, err);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, err);
GF_VALIDATE_OR_GOTO(this->name, frame, err);
#if defined(JBR_CG_NEED_FD)
ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, fd);
#else
ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, NULL);
#endif
if (ret)
goto err;
local = frame->local;
/*
* If we let it through despite not being the leader, then we just want
* to pass it on down without all of the additional xattrs, queuing, and
* so on. However, jbr_*_complete does depend on the initialization
* immediately above this.
*/
if (!priv->leader) {
STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
return 0;
}
ret = jbr_initialize_xdata_set_attrs(this, &xdata);
if (ret)
goto err;
local->xdata = dict_ref(xdata);
local->stub = fop_@NAME@_stub(frame, jbr_@NAME@_continue, @SHORT_ARGS@);
if (!local->stub) {
goto err;
}
/*
* Can be used to just call_dispatch or be customised per fop to *
* perform ops specific to that particular fop. *
*/
ret = jbr_@NAME@_perform_local_op(frame, this, &op_errno, @SHORT_ARGS@);
if (ret)
goto err;
return ret;
err:
if (local) {
if (local->stub) {
call_stub_destroy(local->stub);
}
if (local->qstub) {
call_stub_destroy(local->qstub);
}
if (local->fd) {
fd_unref(local->fd);
}
mem_put(local);
}
STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
return 0;
}
/* template-name write-perform_local_op */
int32_t jbr_@NAME@_perform_local_op(call_frame_t *frame, xlator_t *this,
int *op_errno, @LONG_ARGS@)
{
int32_t ret = -1;
GF_VALIDATE_OR_GOTO("jbr", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
ret = jbr_@NAME@_call_dispatch(frame, this, op_errno, @SHORT_ARGS@);
out:
return ret;
}
/* template-name write-call_dispatch */
int32_t jbr_@NAME@_call_dispatch(call_frame_t *frame, xlator_t *this,
int *op_errno, @LONG_ARGS@)
{
jbr_local_t *local = NULL;
jbr_private_t *priv = NULL;
int32_t ret = -1;
GF_VALIDATE_OR_GOTO("jbr", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
local = frame->local;
GF_VALIDATE_OR_GOTO(this->name, local, out);
GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
#if defined(JBR_CG_QUEUE)
jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
if (!ictx) {
*op_errno = EIO;
goto out;
}
LOCK(&ictx->lock);
if (ictx->active) {
gf_msg_debug(this->name, 0, "queuing request due to conflict");
/*
* TBD: enqueue only for real conflict
*
* Currently we just act like all writes are in
* conflict with one another. What we should really do
* is check the active/pending queues and defer only if
* there's a conflict there.
*
* It's important to check the pending queue because we
* might have an active request X which conflicts with
* a pending request Y, and this request Z might
* conflict with Y but not X. If we checked only the
* active queue then Z could jump ahead of Y, which
* would be incorrect.
*/
local->qstub = fop_@NAME@_stub(frame, jbr_@NAME@_dispatch,
@SHORT_ARGS@);
if (!local->qstub) {
UNLOCK(&ictx->lock);
goto out;
}
list_add_tail(&local->qlinks, &ictx->pqueue);
++(ictx->pending);
UNLOCK(&ictx->lock);
ret = 0;
goto out;
} else {
list_add_tail(&local->qlinks, &ictx->aqueue);
++(ictx->active);
}
UNLOCK(&ictx->lock);
#endif
ret = jbr_@NAME@_dispatch(frame, this, @SHORT_ARGS@);
out:
return ret;
}
/* template-name write-dispatch */
int32_t jbr_@NAME@_dispatch(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
{
jbr_local_t *local = NULL;
jbr_private_t *priv = NULL;
int32_t ret = -1;
xlator_list_t *trav;
GF_VALIDATE_OR_GOTO("jbr", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
local = frame->local;
GF_VALIDATE_OR_GOTO(this->name, local, out);
/*
* TBD: unblock pending request(s) if we fail after this point but
* before we get to jbr_@NAME@_complete (where that code currently
* resides).
*/
local->call_count = priv->n_children - 1;
for (trav = this->children->next; trav; trav = trav->next) {
STACK_WIND(frame, jbr_@NAME@_fan_in, trav->xlator,
trav->xlator->fops->@NAME@, @SHORT_ARGS@);
}
/* TBD: variable Issue count */
ret = 0;
out:
return ret;
}
/* template-name write-fan-in */
int32_t jbr_@NAME@_fan_in(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
{
jbr_local_t *local = NULL;
int32_t ret = -1;
uint8_t call_count;
GF_VALIDATE_OR_GOTO("jbr", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
local = frame->local;
GF_VALIDATE_OR_GOTO(this->name, local, out);
gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret,
op_errno);
LOCK(&frame->lock);
call_count = --(local->call_count);
if (op_ret != -1) {
/* Increment the number of successful acks *
* received for the operation. *
*/
(local->successful_acks)++;
local->successful_op_ret = op_ret;
}
gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
op_ret, op_errno, local->successful_acks);
UNLOCK(&frame->lock);
/* TBD: variable Completion count */
if (call_count == 0) {
call_resume(local->stub);
}
ret = 0;
out:
return ret;
}
/* template-name write-continue */
int32_t jbr_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
{
int32_t ret = -1;
gf_boolean_t result = _gf_false;
jbr_local_t *local = NULL;
jbr_local_t *new_local = NULL;
jbr_private_t *priv = NULL;
int32_t op_errno = 0;
GF_VALIDATE_OR_GOTO("jbr", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
priv = this->private;
local = frame->local;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
GF_VALIDATE_OR_GOTO(this->name, local, out);
/* Perform quorum check to see if the leader needs *
* to perform the operation. If the operation will not *
* meet quorum irrespective of the leader's result *
* there is no point in the leader performing the fop *
*/
result = fop_quorum_check(this, (double)priv->n_children,
(double)local->successful_acks + 1);
if (result == _gf_false) {
gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
"Didn't receive enough acks "
"to meet quorum. Failing the operation without trying "
"it on the leader.");
#if defined(JBR_CG_QUEUE)
/*
* In case of a fop failure, before unwinding need to *
* remove it from queue *
*/
ret = jbr_remove_from_queue(frame, this);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC,
"Failed to remove from queue.");
}
#endif
/*
* In this case, the quorum is not met on the followers *
* So the operation will not be performed on the leader *
* and a rollback will be sent via GF_FOP_IPC to all the *
* followers, where this particular fop's term and index *
* numbers will be journaled, and later used to rollback *
*/
call_frame_t *new_frame;
new_frame = copy_frame(frame);
if (new_frame) {
new_local = mem_get0(this->local_pool);
if (new_local) {
INIT_LIST_HEAD(&new_local->qlinks);
ret = dict_set_int32(local->xdata, "rollback-fop",
GF_FOP_@UPNAME@);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
"failed to set rollback-fop");
} else {
new_local->xdata = dict_ref(local->xdata);
new_frame->local = new_local;
jbr_ipc_call_dispatch(new_frame, this, &op_errno,
FDL_IPC_JBR_SERVER_ROLLBACK,
new_local->xdata);
}
} else {
gf_log(this->name, GF_LOG_WARNING,
"Could not create local for new_frame");
}
} else {
gf_log(this->name, GF_LOG_WARNING, "Could not send rollback ipc");
}
STACK_UNWIND_STRICT(@NAME@, frame, -1, EROFS, @ERROR_ARGS@);
} else {
STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
}
out:
return 0;
}
/* template-name write-complete */
int32_t jbr_@NAME@_complete(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
{
int32_t ret = -1;
gf_boolean_t result = _gf_false;
jbr_private_t *priv = NULL;
jbr_local_t *local = NULL;
jbr_local_t *new_local = NULL;
GF_VALIDATE_OR_GOTO("jbr", this, err);
GF_VALIDATE_OR_GOTO(this->name, frame, err);
priv = this->private;
local = frame->local;
GF_VALIDATE_OR_GOTO(this->name, priv, err);
GF_VALIDATE_OR_GOTO(this->name, local, err);
/* If the fop failed on the leader, then reduce one successful ack
* before calculating the fop quorum
*/
LOCK(&frame->lock);
if (op_ret == -1)
(local->successful_acks)--;
UNLOCK(&frame->lock);
#if defined(JBR_CG_QUEUE)
ret = jbr_remove_from_queue(frame, this);
if (ret)
goto err;
#endif
#if defined(JBR_CG_FSYNC)
jbr_mark_fd_dirty(this, local);
#endif
#if defined(JBR_CG_NEED_FD)
fd_unref(local->fd);
#endif
/* After the leader completes the fop, a quorum check is *
* performed, taking into account the outcome of the fop *
* on the leader. Irrespective of the fop being successful *
* or failing on the leader, the result of the quorum will *
* determine if the overall fop is successful or not. For *
* example, a fop might have succeeded on every node except *
* the leader, in which case as quorum is being met, the fop *
* will be treated as a successful fop, even though it failed *
* on the leader. On follower nodes, no quorum check should *
* be done, and the result is returned to the leader as is. *
*/
if (priv->leader) {
result = fop_quorum_check(this, (double)priv->n_children,
(double)local->successful_acks + 1);
if (result == _gf_false) {
op_ret = -1;
op_errno = EROFS;
gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
"Quorum is not met. "
"The operation has failed.");
/*
* In this case, the quorum is not met after the *
* operation is performed on the leader. Hence a *
* rollback will be sent via GF_FOP_IPC to the leader *
* where this particular fop's term and index numbers *
* will be journaled, and later used to rollback. *
* The same will be done on all the followers *
*/
call_frame_t *new_frame;
new_frame = copy_frame(frame);
if (new_frame) {
new_local = mem_get0(this->local_pool);
if (new_local) {
INIT_LIST_HEAD(&new_local->qlinks);
gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
"op = %d", new_frame->op);
ret = dict_set_int32(local->xdata, "rollback-fop",
GF_FOP_@UPNAME@);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
"failed to set "
"rollback-fop");
} else {
new_local->xdata = dict_ref(local->xdata);
new_frame->local = new_local;
/*
* Calling STACK_WIND instead *
* of jbr_ipc as it will not *
* unwind to the previous *
* translators like it will *
* in case of jbr_ipc. *
*/
STACK_WIND(
new_frame, jbr_ipc_complete, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ipc,
FDL_IPC_JBR_SERVER_ROLLBACK, new_local->xdata);
}
} else {
gf_log(this->name, GF_LOG_WARNING,
"Could not create local "
"for new_frame");
}
} else {
gf_log(this->name, GF_LOG_WARNING,
"Could not send rollback ipc");
}
} else {
#if defined(JBR_CG_NEED_FD)
op_ret = local->successful_op_ret;
#else
op_ret = 0;
#endif
op_errno = 0;
gf_msg_debug(this->name, 0,
"Quorum has met. The operation has succeeded.");
}
}
/*
* Unrefing the reference taken in jbr_@NAME@ () *
*/
dict_unref(local->xdata);
STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
return 0;
err:
STACK_UNWIND_STRICT(@NAME@, frame, -1, 0, @SHORT_ARGS@);
return 0;
}