NSR : nsr server code generation patch
The NSR-server with this patch, appoints the first node on every replica subvolume, as the leader for that subvolume. On receiving a 'write' fop, the leader first checks if there is quorum in the replica subvolume to proceeed. In case there isn't it fails with EROFS. If there is quorum, the leader forwards the fop to the followers. The followers on receiving the fop, perform the operation, and based on the success or failure of the outcome send a +ve or a -ve ack to the leader. The leader after receiving acks from the followers performs a quorum check of the acks, to see if it should even try to perform the fop. If quorum is not being met, and the leader's outcome wouldn't affect quorum, then it would send -ve ack to the client without even performing the fop. If quorum is being met, the leader will then try the fop on itself, and based on it's outcome perform a quorum check of all the acks received (this time, including it's own). Based on the result of the quorum check (irrespective of the outcome on the leader), a +ve or -ve ack is send back to the client. Change-Id: I860654b74c53e9b139b37dba43848e5504df6dce Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/12705 Smoke: Gluster Build System <jenkins@build.gluster.com> Tested-by: Jeff Darcy <jdarcy@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
This commit is contained in:
parent
320779d53a
commit
55617ef037
@ -189,6 +189,8 @@ AC_CONFIG_FILES([Makefile
|
||||
xlators/experimental/Makefile
|
||||
xlators/experimental/nsr-client/Makefile
|
||||
xlators/experimental/nsr-client/src/Makefile
|
||||
xlators/experimental/nsr-server/Makefile
|
||||
xlators/experimental/nsr-server/src/Makefile
|
||||
cli/Makefile
|
||||
cli/src/Makefile
|
||||
doc/Makefile
|
||||
|
@ -1140,6 +1140,7 @@ fi
|
||||
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
|
||||
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
|
||||
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/nsrc.so
|
||||
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/nsr.so
|
||||
%if ( 0%{!?_without_tiering:1} )
|
||||
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so
|
||||
%endif
|
||||
|
@ -1,3 +1,3 @@
|
||||
SUBDIRS = nsr-client
|
||||
SUBDIRS = nsr-client nsr-server
|
||||
|
||||
CLEANFILES =
|
||||
|
@ -60,8 +60,46 @@
|
||||
*/
|
||||
#define N_MSG_MEM_ERR (NSR_COMP_BASE + 3)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*/
|
||||
#define N_MSG_DICT_FLR (NSR_COMP_BASE + 4)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*/
|
||||
#define N_MSG_GENERIC (NSR_COMP_BASE + 5)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*/
|
||||
#define N_MSG_INVALID (NSR_COMP_BASE + 6)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*/
|
||||
#define N_MSG_NO_DATA (NSR_COMP_BASE + 7)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*/
|
||||
#define N_MSG_SYS_CALL_FAILURE (NSR_COMP_BASE + 8)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*/
|
||||
#define N_MSG_QUORUM_NOT_MET (NSR_COMP_BASE + 9)
|
||||
|
||||
#endif /* _NSR_MESSAGES_H_ */
|
||||
|
3
xlators/experimental/nsr-server/Makefile.am
Normal file
3
xlators/experimental/nsr-server/Makefile.am
Normal file
@ -0,0 +1,3 @@
|
||||
SUBDIRS = src
|
||||
|
||||
CLEANFILES =
|
35
xlators/experimental/nsr-server/src/Makefile.am
Normal file
35
xlators/experimental/nsr-server/src/Makefile.am
Normal file
@ -0,0 +1,35 @@
|
||||
xlator_LTLIBRARIES = nsr.la
|
||||
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
|
||||
|
||||
nodist_nsr_la_SOURCES = nsr-cg.c
|
||||
CLEANFILES = $(nodist_nsr_la_SOURCES)
|
||||
|
||||
nsr_la_LDFLAGS = -module -avoid-version
|
||||
nsr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
|
||||
$(top_builddir)/api/src/libgfapi.la
|
||||
|
||||
noinst_HEADERS = nsr-internal.h \
|
||||
$(top_srcdir)/xlators/lib/src/libxlator.h \
|
||||
$(top_srcdir)/glusterfsd/src/glusterfsd.h
|
||||
|
||||
AM_CPPFLAGS = $(GF_CPPFLAGS) \
|
||||
-I$(top_srcdir)/libglusterfs/src \
|
||||
-I$(top_srcdir)/xlators/lib/src \
|
||||
-I$(top_srcdir)/rpc/rpc-lib/src -DSBIN_DIR=\"$(sbindir)\" \
|
||||
-I$(top_srcdir)/api/src -DNSR_SCRIPT_PREFIX=\"$(nsrdir)\" \
|
||||
-I$(top_srcdir)/xlators/experimental/nsr-client/src/
|
||||
|
||||
AM_CFLAGS = -Wall $(GF_CFLAGS)
|
||||
|
||||
NSR_PREFIX = $(top_srcdir)/xlators/experimental/nsr-server/src
|
||||
NSR_GEN_FOPS = $(NSR_PREFIX)/gen-fops.py
|
||||
NSR_TEMPLATES = $(NSR_PREFIX)/all-templates.c
|
||||
NSR_WRAPPER = $(NSR_PREFIX)/nsr.c
|
||||
noinst_PYTHON = $(NSR_GEN_FOPS)
|
||||
EXTRA_DIST = $(NSR_TEMPLATES) $(NSR_WRAPPER)
|
||||
|
||||
nsr-cg.c: $(NSR_GEN_FOPS) $(NSR_TEMPLATES) $(NSR_WRAPPER)
|
||||
$(PYTHON) $(NSR_GEN_FOPS) $(NSR_TEMPLATES) $(NSR_WRAPPER) > $@
|
||||
|
||||
uninstall-local:
|
||||
rm -f $(DESTDIR)$(xlatordir)/nsr.so
|
429
xlators/experimental/nsr-server/src/all-templates.c
Normal file
429
xlators/experimental/nsr-server/src/all-templates.c
Normal file
@ -0,0 +1,429 @@
|
||||
/*
|
||||
* You can put anything here - it doesn't even have to be a comment - and it
|
||||
* will be ignored until we reach the first template-name comment.
|
||||
*/
|
||||
|
||||
|
||||
/* template-name read-fop */
|
||||
int32_t
|
||||
nsr_@NAME@ (call_frame_t *frame, xlator_t *this,
|
||||
@LONG_ARGS@)
|
||||
{
|
||||
nsr_private_t *priv = this->private;
|
||||
gf_boolean_t in_recon = _gf_false;
|
||||
int32_t recon_term, recon_index;
|
||||
|
||||
/* allow reads during reconciliation *
|
||||
* TBD: allow "dirty" reads on non-leaders *
|
||||
*/
|
||||
if (xdata &&
|
||||
(dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) &&
|
||||
(dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) {
|
||||
in_recon = _gf_true;
|
||||
}
|
||||
|
||||
if ((!priv->leader) && (in_recon == _gf_false)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
STACK_WIND (frame, default_@NAME@_cbk,
|
||||
FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
|
||||
@SHORT_ARGS@);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
STACK_UNWIND_STRICT (@NAME@, frame, -1, EREMOTE,
|
||||
@ERROR_ARGS@);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* template-name read-dispatch */
|
||||
/* No "dispatch" function needed for @NAME@ */
|
||||
|
||||
/* template-name read-fan-in */
|
||||
/* No "fan-in" function needed for @NAME@ */
|
||||
|
||||
/* template-name read-continue */
|
||||
/* No "continue" function needed for @NAME@ */
|
||||
|
||||
/* template-name read-complete */
|
||||
/* No "complete" function needed for @NAME@ */
|
||||
|
||||
/* template-name write-fop */
|
||||
int32_t
|
||||
nsr_@NAME@ (call_frame_t *frame, xlator_t *this,
|
||||
@LONG_ARGS@)
|
||||
{
|
||||
nsr_local_t *local = NULL;
|
||||
nsr_private_t *priv = this->private;
|
||||
gf_boolean_t result = _gf_false;
|
||||
int op_errno = ENOMEM;
|
||||
int from_leader;
|
||||
int from_recon;
|
||||
uint32_t ti = 0;
|
||||
|
||||
/*
|
||||
* Our first goal here is to avoid "split brain surprise" for users who
|
||||
* specify exactly 50% with two- or three-way replication. That means
|
||||
* either a more-than check against half the total replicas or an
|
||||
* at-least check against half of our peers (one less). Of the two,
|
||||
* only an at-least check supports the intuitive use of 100% to mean
|
||||
* all replicas must be present, because "more than 100%" will never
|
||||
* succeed regardless of which count we use. This leaves us with a
|
||||
* slightly non-traditional definition of quorum ("at least X% of peers
|
||||
* not including ourselves") but one that's useful enough to be worth
|
||||
* it.
|
||||
*
|
||||
* Note that n_children and up_children *do* include the local
|
||||
* subvolume, so we need to subtract one in each case.
|
||||
*/
|
||||
if (priv->leader) {
|
||||
result = fop_quorum_check (this, (double)(priv->n_children - 1),
|
||||
(double)(priv->up_children - 1));
|
||||
|
||||
if (result == _gf_false) {
|
||||
/* Emulate the AFR client-side-quorum behavior. */
|
||||
op_errno = EROFS;
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
if (xdata) {
|
||||
from_leader = !!dict_get(xdata, NSR_TERM_XATTR);
|
||||
from_recon = !!dict_get(xdata, RECON_TERM_XATTR)
|
||||
&& !!dict_get(xdata, RECON_INDEX_XATTR);
|
||||
} else {
|
||||
from_leader = from_recon = _gf_false;
|
||||
}
|
||||
|
||||
/* follower/recon path *
|
||||
* just send it to local node *
|
||||
*/
|
||||
if (!from_leader && !from_recon) {
|
||||
op_errno = EREMOTE;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
local = mem_get0(this->local_pool);
|
||||
if (!local) {
|
||||
goto err;
|
||||
}
|
||||
#if defined(NSR_CG_NEED_FD)
|
||||
local->fd = fd_ref(fd);
|
||||
#else
|
||||
local->fd = NULL;
|
||||
#endif
|
||||
INIT_LIST_HEAD(&local->qlinks);
|
||||
frame->local = local;
|
||||
|
||||
/*
|
||||
* If we let it through despite not being the leader, then we just want
|
||||
* to pass it on down without all of the additional xattrs, queuing, and
|
||||
* so on. However, nsr_*_complete does depend on the initialization
|
||||
* immediately above this.
|
||||
*/
|
||||
if (!priv->leader) {
|
||||
STACK_WIND (frame, nsr_@NAME@_complete,
|
||||
FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
|
||||
@SHORT_ARGS@);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!xdata) {
|
||||
xdata = dict_new();
|
||||
if (!xdata) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
|
||||
N_MSG_MEM_ERR, "failed to allocate xdata");
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (dict_set_int32(xdata, NSR_TERM_XATTR, priv->current_term) != 0) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
N_MSG_DICT_FLR, "failed to set nsr-term");
|
||||
goto err;
|
||||
}
|
||||
|
||||
LOCK(&priv->index_lock);
|
||||
ti = ++(priv->index);
|
||||
UNLOCK(&priv->index_lock);
|
||||
if (dict_set_int32(xdata, NSR_INDEX_XATTR, ti) != 0) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
N_MSG_DICT_FLR, "failed to set index");
|
||||
goto err;
|
||||
}
|
||||
|
||||
local->stub = fop_@NAME@_stub (frame, nsr_@NAME@_continue,
|
||||
@SHORT_ARGS@);
|
||||
if (!local->stub) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
||||
#if defined(NSR_CG_QUEUE)
|
||||
nsr_inode_ctx_t *ictx = nsr_get_inode_ctx(this, fd->inode);
|
||||
|
||||
if (!ictx) {
|
||||
op_errno = EIO;
|
||||
goto err;
|
||||
}
|
||||
LOCK(&ictx->lock);
|
||||
if (ictx->active) {
|
||||
gf_msg_debug (this->name, 0,
|
||||
"queuing request due to conflict");
|
||||
/*
|
||||
* TBD: enqueue only for real conflict
|
||||
*
|
||||
* Currently we just act like all writes are in
|
||||
* conflict with one another. What we should really do
|
||||
* is check the active/pending queues and defer only if
|
||||
* there's a conflict there.
|
||||
*
|
||||
* It's important to check the pending queue because we
|
||||
* might have an active request X which conflicts with
|
||||
* a pending request Y, and this request Z might
|
||||
* conflict with Y but not X. If we checked only the
|
||||
* active queue then Z could jump ahead of Y, which
|
||||
* would be incorrect.
|
||||
*/
|
||||
local->qstub = fop_@NAME@_stub (frame,
|
||||
nsr_@NAME@_dispatch,
|
||||
@SHORT_ARGS@);
|
||||
if (!local->qstub) {
|
||||
UNLOCK(&ictx->lock);
|
||||
goto err;
|
||||
}
|
||||
list_add_tail(&local->qlinks, &ictx->pqueue);
|
||||
++(ictx->pending);
|
||||
UNLOCK(&ictx->lock);
|
||||
return 0;
|
||||
} else {
|
||||
list_add_tail(&local->qlinks, &ictx->aqueue);
|
||||
++(ictx->active);
|
||||
}
|
||||
UNLOCK(&ictx->lock);
|
||||
#endif
|
||||
|
||||
return nsr_@NAME@_dispatch (frame, this, @SHORT_ARGS@);
|
||||
|
||||
err:
|
||||
if (local) {
|
||||
if (local->stub) {
|
||||
call_stub_destroy(local->stub);
|
||||
}
|
||||
if (local->qstub) {
|
||||
call_stub_destroy(local->qstub);
|
||||
}
|
||||
if (local->fd) {
|
||||
fd_unref(local->fd);
|
||||
}
|
||||
mem_put(local);
|
||||
}
|
||||
STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno,
|
||||
@ERROR_ARGS@);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* template-name write-dispatch */
|
||||
int32_t
|
||||
nsr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this,
|
||||
@LONG_ARGS@)
|
||||
{
|
||||
nsr_local_t *local = frame->local;
|
||||
nsr_private_t *priv = this->private;
|
||||
xlator_list_t *trav;
|
||||
|
||||
/*
|
||||
* TBD: unblock pending request(s) if we fail after this point but
|
||||
* before we get to nsr_@NAME@_complete (where that code currently
|
||||
* resides).
|
||||
*/
|
||||
|
||||
local->call_count = priv->n_children - 1;
|
||||
local->successful_acks = 0;
|
||||
for (trav = this->children->next; trav; trav = trav->next) {
|
||||
STACK_WIND (frame, nsr_@NAME@_fan_in,
|
||||
trav->xlator, trav->xlator->fops->@NAME@,
|
||||
@SHORT_ARGS@);
|
||||
}
|
||||
|
||||
/* TBD: variable Issue count */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* template-name write-fan-in */
|
||||
int32_t
|
||||
nsr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
int32_t op_ret, int32_t op_errno,
|
||||
@LONG_ARGS@)
|
||||
{
|
||||
nsr_local_t *local = frame->local;
|
||||
uint8_t call_count;
|
||||
|
||||
gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n",
|
||||
op_ret, op_errno);
|
||||
|
||||
LOCK(&frame->lock);
|
||||
call_count = --(local->call_count);
|
||||
if (op_ret != -1) {
|
||||
/* Increment the number of successful acks *
|
||||
* received for the operation. *
|
||||
*/
|
||||
(local->successful_acks)++;
|
||||
local->successful_op_ret = op_ret;
|
||||
}
|
||||
gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
|
||||
op_ret, op_errno, local->successful_acks);
|
||||
UNLOCK(&frame->lock);
|
||||
|
||||
/* TBD: variable Completion count */
|
||||
if (call_count == 0) {
|
||||
call_resume(local->stub);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* template-name write-continue */
|
||||
int32_t
|
||||
nsr_@NAME@_continue (call_frame_t *frame, xlator_t *this,
|
||||
@LONG_ARGS@)
|
||||
{
|
||||
int32_t ret = -1;
|
||||
gf_boolean_t result = _gf_false;
|
||||
nsr_local_t *local = NULL;
|
||||
nsr_private_t *priv = NULL;
|
||||
|
||||
GF_VALIDATE_OR_GOTO ("nsr", this, out);
|
||||
GF_VALIDATE_OR_GOTO (this->name, frame, out);
|
||||
priv = this->private;
|
||||
local = frame->local;
|
||||
GF_VALIDATE_OR_GOTO (this->name, priv, out);
|
||||
GF_VALIDATE_OR_GOTO (this->name, local, out);
|
||||
|
||||
/* Perform quorum check to see if the leader needs *
|
||||
* to perform the operation. If the operation will not *
|
||||
* meet quorum irrespective of the leader's result *
|
||||
* there is no point in the leader performing the fop *
|
||||
*/
|
||||
result = fop_quorum_check (this, (double)priv->n_children,
|
||||
(double)local->successful_acks + 1);
|
||||
if (result == _gf_false) {
|
||||
STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS,
|
||||
@ERROR_ARGS@);
|
||||
} else {
|
||||
STACK_WIND (frame, nsr_@NAME@_complete,
|
||||
FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
|
||||
@SHORT_ARGS@);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* template-name write-complete */
|
||||
int32_t
|
||||
nsr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
int32_t op_ret, int32_t op_errno,
|
||||
@LONG_ARGS@)
|
||||
{
|
||||
gf_boolean_t result = _gf_false;
|
||||
nsr_private_t *priv = this->private;
|
||||
|
||||
nsr_local_t *local = frame->local;
|
||||
|
||||
/* If the fop failed on the leader, then reduce one succesful ack
|
||||
* before calculating the fop quorum
|
||||
*/
|
||||
LOCK(&frame->lock);
|
||||
if (op_ret == -1)
|
||||
(local->successful_acks)--;
|
||||
UNLOCK(&frame->lock);
|
||||
|
||||
#if defined(NSR_CG_QUEUE)
|
||||
nsr_inode_ctx_t *ictx;
|
||||
nsr_local_t *next;
|
||||
|
||||
if (local->qlinks.next != &local->qlinks) {
|
||||
list_del(&local->qlinks);
|
||||
ictx = nsr_get_inode_ctx(this, local->fd->inode);
|
||||
if (ictx) {
|
||||
LOCK(&ictx->lock);
|
||||
if (ictx->pending) {
|
||||
/*
|
||||
* TBD: dequeue *all* non-conflicting
|
||||
* reqs
|
||||
*
|
||||
* With the stub implementation there
|
||||
* can only be one request active at a
|
||||
* time (zero here) so it's not an
|
||||
* issue. In a real implementation
|
||||
* there might still be other active
|
||||
* requests to check against, and
|
||||
* multiple pending requests that could
|
||||
* continue.
|
||||
*/
|
||||
gf_msg_debug (this->name, 0,
|
||||
"unblocking next request");
|
||||
--(ictx->pending);
|
||||
next = list_entry (ictx->pqueue.next,
|
||||
nsr_local_t, qlinks);
|
||||
list_del(&next->qlinks);
|
||||
list_add_tail(&next->qlinks,
|
||||
&ictx->aqueue);
|
||||
call_resume(next->qstub);
|
||||
} else {
|
||||
--(ictx->active);
|
||||
}
|
||||
UNLOCK(&ictx->lock);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(NSR_CG_FSYNC)
|
||||
nsr_mark_fd_dirty(this, local);
|
||||
#endif
|
||||
|
||||
#if defined(NSR_CG_NEED_FD)
|
||||
fd_unref(local->fd);
|
||||
#endif
|
||||
|
||||
/* After the leader completes the fop, a quorum check is *
|
||||
* performed, taking into account the outcome of the fop *
|
||||
* on the leader. Irrespective of the fop being successful *
|
||||
* or failing on the leader, the result of the quorum will *
|
||||
* determine if the overall fop is successful or not. For *
|
||||
* example, a fop might have succeeded on every node except *
|
||||
* the leader, in which case as quorum is being met, the fop *
|
||||
* will be treated as a successful fop, even though it failed *
|
||||
* on the leader. On follower nodes, no quorum check should *
|
||||
* be done, and the result is returned to the leader as is. *
|
||||
*/
|
||||
if (priv->leader) {
|
||||
result = fop_quorum_check (this, (double)priv->n_children,
|
||||
(double)local->successful_acks + 1);
|
||||
if (result == _gf_false) {
|
||||
op_ret = -1;
|
||||
op_errno = EROFS;
|
||||
gf_msg_debug (this->name, 0,
|
||||
"Quorum is not met. The operation has failed.");
|
||||
} else {
|
||||
#if defined(NSR_CG_NEED_FD)
|
||||
op_ret = local->successful_op_ret;
|
||||
#else
|
||||
op_ret = 0;
|
||||
#endif
|
||||
op_errno = 0;
|
||||
gf_msg_debug (this->name, 0,
|
||||
"Quorum has met. The operation has succeeded.");
|
||||
}
|
||||
}
|
||||
|
||||
STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
|
||||
@SHORT_ARGS@);
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
138
xlators/experimental/nsr-server/src/gen-fops.py
Executable file
138
xlators/experimental/nsr-server/src/gen-fops.py
Executable file
@ -0,0 +1,138 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# This script generates the boilerplate versions of most fops and cbks in the
|
||||
# server. This allows the details of leadership-status checking, sequencing
|
||||
# between leader and followers (including fan-out), and basic error checking
|
||||
# to be centralized one place, with per-operation code kept to a minimum.
|
||||
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
curdir = os.path.dirname(sys.argv[0])
|
||||
gendir = os.path.join(curdir,'../../../../libglusterfs/src')
|
||||
sys.path.append(gendir)
|
||||
from generator import ops, fop_subs, cbk_subs, generate
|
||||
|
||||
# We really want the callback argument list, even when we're generating fop
|
||||
# code, so we propagate here.
|
||||
# TBD: this should probably be right in generate.py
|
||||
for k, v in cbk_subs.iteritems():
|
||||
fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@']
|
||||
|
||||
# Stolen from old codegen.py
|
||||
def load_templates (path):
|
||||
templates = {}
|
||||
tmpl_re = re.compile("/\* template-name (.*) \*/")
|
||||
templates = {}
|
||||
t_name = None
|
||||
for line in open(path,"r").readlines():
|
||||
if not line:
|
||||
break
|
||||
m = tmpl_re.match(line)
|
||||
if m:
|
||||
if t_name:
|
||||
templates[t_name] = string.join(t_contents,'')
|
||||
t_name = m.group(1).strip()
|
||||
t_contents = []
|
||||
elif t_name:
|
||||
t_contents.append(line)
|
||||
if t_name:
|
||||
templates[t_name] = string.join(t_contents,'')
|
||||
return templates
|
||||
|
||||
# We need two types of templates. The first, for pure read operations, just
|
||||
# needs to do a simple am-i-leader check (augmented to allow dirty reads).
|
||||
# The second, for pure writes, needs to do fan-out to followers between those
|
||||
# initial checks and local execution. There are other operations that don't
|
||||
# fit neatly into either category - e.g. lock ops or fsync - so we'll just have
|
||||
# to handle those manually. The table thus includes entries only for those we
|
||||
# can categorize. The special cases, plus any new operations we've never even
|
||||
# heard of, aren't in there.
|
||||
#
|
||||
# Various keywords can be used to define/undefine preprocessor symbols used
|
||||
# in the templates, on a per-function basis. For example, if the keyword here
|
||||
# is "fsync" (lowercase word or abbreviation) that will cause NSR_CG_FSYNC
|
||||
# (prefix plus uppercase version) to be defined above all of the generated code
|
||||
# for that fop.
|
||||
|
||||
fop_table = {
|
||||
"access": "read",
|
||||
"create": "write",
|
||||
"discard": "write",
|
||||
# "entrylk": "read",
|
||||
"fallocate": "write",
|
||||
# "fentrylk": "read",
|
||||
"fgetxattr": "read",
|
||||
# "finodelk": "read",
|
||||
# "flush": "read",
|
||||
"fremovexattr": "write",
|
||||
"fsetattr": "write",
|
||||
"fsetxattr": "write",
|
||||
"fstat": "read",
|
||||
# "fsync": "read",
|
||||
# "fsyncdir": "read",
|
||||
"ftruncate": "write",
|
||||
"fxattrop": "write",
|
||||
"getxattr": "read",
|
||||
# "inodelk": "read",
|
||||
"link": "write",
|
||||
# "lk": "read",
|
||||
# "lookup": "read",
|
||||
"mkdir": "write",
|
||||
"mknod": "write",
|
||||
"open": "write",
|
||||
"opendir": "read",
|
||||
"rchecksum": "read",
|
||||
"readdir": "read",
|
||||
"readdirp": "read",
|
||||
"readlink": "read",
|
||||
"readv": "read",
|
||||
"removexattr": "write",
|
||||
"rename": "write",
|
||||
"rmdir": "write",
|
||||
"setattr": "write",
|
||||
"setxattr": "write",
|
||||
"stat": "read",
|
||||
"statfs": "read",
|
||||
"symlink": "write",
|
||||
"truncate": "write",
|
||||
"unlink": "write",
|
||||
"writev": "write,fsync,queue",
|
||||
"xattrop": "write",
|
||||
}
|
||||
|
||||
# Stolen from gen_fdl.py
|
||||
def gen_server (templates):
|
||||
fops_done = []
|
||||
for name in fop_table.keys():
|
||||
info = fop_table[name].split(",")
|
||||
kind = info[0]
|
||||
flags = info[1:]
|
||||
if ("fsync" in flags) or ("queue" in flags):
|
||||
flags.append("need_fd")
|
||||
for fname in flags:
|
||||
print "#define NSR_CG_%s" % fname.upper()
|
||||
print generate(templates[kind+"-complete"],name,cbk_subs)
|
||||
print generate(templates[kind+"-continue"],name,fop_subs)
|
||||
print generate(templates[kind+"-fan-in"],name,cbk_subs)
|
||||
print generate(templates[kind+"-dispatch"],name,fop_subs)
|
||||
print generate(templates[kind+"-fop"],name,fop_subs)
|
||||
for fname in flags:
|
||||
print "#undef NSR_CG_%s" % fname.upper()
|
||||
fops_done.append(name)
|
||||
# Just for fun, emit the fops table too.
|
||||
print("struct xlator_fops fops = {")
|
||||
for x in fops_done:
|
||||
print(" .%s = nsr_%s,"%(x,x))
|
||||
print("};")
|
||||
|
||||
tmpl = load_templates(sys.argv[1])
|
||||
for l in open(sys.argv[2],'r').readlines():
|
||||
if l.find('#pragma generate') != -1:
|
||||
print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
|
||||
gen_server(tmpl)
|
||||
print "/* END GENERATED CODE */"
|
||||
else:
|
||||
print l[:-1]
|
114
xlators/experimental/nsr-server/src/nsr-internal.h
Normal file
114
xlators/experimental/nsr-server/src/nsr-internal.h
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
|
||||
This file is part of GlusterFS.
|
||||
|
||||
This file is licensed to you under your choice of the GNU Lesser
|
||||
General Public License, version 3 or any later version (LGPLv3 or
|
||||
later), or the GNU General Public License, version 2 (GPLv2), in all
|
||||
cases as published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define LEADER_XATTR "user.nsr.leader"
|
||||
#define SECOND_CHILD(xl) (xl->children->next->xlator)
|
||||
#define RECONCILER_PATH NSR_SCRIPT_PREFIX"/reconciler.py"
|
||||
#define CHANGELOG_ENTRY_SIZE 128
|
||||
|
||||
enum {
|
||||
gf_mt_nsr_private_t = gf_common_mt_end + 1,
|
||||
gf_mt_nsr_fd_ctx_t,
|
||||
gf_mt_nsr_inode_ctx_t,
|
||||
gf_mt_nsr_dirty_t,
|
||||
gf_mt_nsr_end
|
||||
};
|
||||
|
||||
typedef enum nsr_recon_notify_ev_id_t {
|
||||
NSR_RECON_SET_LEADER = 1,
|
||||
NSR_RECON_ADD_CHILD = 2
|
||||
} nsr_recon_notify_ev_id_t;
|
||||
|
||||
typedef struct _nsr_recon_notify_ev_s {
|
||||
nsr_recon_notify_ev_id_t id;
|
||||
uint32_t index; /* in case of add */
|
||||
struct list_head list;
|
||||
} nsr_recon_notify_ev_t;
|
||||
|
||||
typedef struct {
|
||||
/*
|
||||
* This is a hack to allow a non-leader to accept requests while the
|
||||
* leader is down, and it only works for n=2. The way it works is that
|
||||
* "config_leader" indicates the state from our options (via init or
|
||||
* reconfigure) but "leader" is what the fop code actually looks at. If
|
||||
* config_leader is true, then leader will *always* be true as well,
|
||||
* giving that brick precedence. If config_leader is false, then
|
||||
* leader will only be true if there is no connection to the other
|
||||
* brick (tracked in nsr_notify).
|
||||
*
|
||||
* TBD: implement real leader election
|
||||
*/
|
||||
gf_boolean_t config_leader;
|
||||
gf_boolean_t leader;
|
||||
uint8_t up_children;
|
||||
uint8_t n_children;
|
||||
char *vol_file;
|
||||
uint32_t current_term;
|
||||
uint32_t kid_state;
|
||||
gf_lock_t dirty_lock;
|
||||
struct list_head dirty_fds;
|
||||
uint32_t index;
|
||||
gf_lock_t index_lock;
|
||||
double quorum_pct;
|
||||
int term_fd;
|
||||
long term_total;
|
||||
long term_read;
|
||||
/*
|
||||
* This is a super-duper hack, but it will do for now. The reason it's
|
||||
* a hack is that we pass this to dict_set_static_bin, so we don't have
|
||||
* to mess around with allocating and freeing it on every single IPC
|
||||
* request, but it's totally not thread-safe. On the other hand, there
|
||||
* should only be one reconciliation thread running and calling these
|
||||
* functions at a time, so maybe that doesn't matter.
|
||||
*
|
||||
* TBD: re-evaluate how to manage this
|
||||
*/
|
||||
char term_buf[CHANGELOG_ENTRY_SIZE];
|
||||
} nsr_private_t;
|
||||
|
||||
typedef struct {
|
||||
call_stub_t *stub;
|
||||
call_stub_t *qstub;
|
||||
uint32_t call_count;
|
||||
uint32_t successful_acks;
|
||||
uint32_t successful_op_ret;
|
||||
fd_t *fd;
|
||||
struct list_head qlinks;
|
||||
} nsr_local_t;
|
||||
|
||||
/*
|
||||
* This should match whatever changelog returns on the pre-op for us to pass
|
||||
* when we're ready for our post-op.
|
||||
*/
|
||||
typedef uint32_t log_id_t;
|
||||
|
||||
typedef struct {
|
||||
struct list_head links;
|
||||
log_id_t id;
|
||||
} nsr_dirty_list_t;
|
||||
|
||||
typedef struct {
|
||||
fd_t *fd;
|
||||
struct list_head dirty_list;
|
||||
struct list_head fd_list;
|
||||
} nsr_fd_ctx_t;
|
||||
|
||||
typedef struct {
|
||||
gf_lock_t lock;
|
||||
uint32_t active;
|
||||
struct list_head aqueue;
|
||||
uint32_t pending;
|
||||
struct list_head pqueue;
|
||||
} nsr_inode_ctx_t;
|
||||
|
||||
void nsr_start_reconciler (xlator_t *this);
|
1066
xlators/experimental/nsr-server/src/nsr.c
Normal file
1066
xlators/experimental/nsr-server/src/nsr.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1017,7 +1017,7 @@ posix_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = lseek (pfd->fd, offset, whence);
|
||||
ret = sys_lseek (pfd->fd, offset, whence);
|
||||
if (ret == -1) {
|
||||
err = errno;
|
||||
gf_msg (this->name, GF_LOG_ERROR, err, P_MSG_SEEK_FAILED,
|
||||
|
Loading…
x
Reference in New Issue
Block a user