nsr: Introducing a happy path test case

Write infra for nsr_server to not send a
CHILD_UP before it gets a CHILD_UP from a
quorum of it's children. Using the CHILD_UP
received in the nsr client translator from
the server, to decide the right time for
starting the I/Os

Change-Id: I9551638b306bdcbc6bae6aeda00316576ea832fe
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/13623
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
This commit is contained in:
Avra Sengupta 2016-02-12 14:57:47 +05:30 committed by Jeff Darcy
parent b2a5eed9b1
commit b4cbfdac0d
7 changed files with 248 additions and 20 deletions

33
tests/basic/nsr/nsr.t Executable file
View File

@ -0,0 +1,33 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../cluster.rc
. $(dirname $0)/../../snapshot.rc
cleanup;
TEST verify_lvm_version;
#Create cluster with 3 nodes
TEST launch_cluster 3;
TEST setup_lvm 3
TEST $CLI_1 peer probe $H2;
TEST $CLI_1 peer probe $H3;
EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
TEST $CLI_1 volume create $V0 replica 3 $H1:$L1 $H2:$L2 $H3:$L3
TEST $CLI_1 volume set $V0 cluster.nsr on
#TEST $CLI_1 volume set $V0 diagnostics.brick-log-level DEBUG
TEST $CLI_1 volume start $V0
TEST glusterfs --volfile-id=$V0 --volfile-server=$H1 --entry-timeout=0 $M0;
EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" nsrc_child_up_status $V0 0
echo "file" > $M0/file1
TEST stat $L1/file1
TEST stat $L2/file1
TEST stat $L3/file1
cleanup;

View File

@ -110,6 +110,24 @@ function snap_client_connected_status {
echo "$up"
}
function _nsrc_child_up_status {
local vol=$1
#brick_id is (brick-num in volume info - 1)
local brick_id=$2
local gen_state_dump=$3
local fpath=$($gen_state_dump $vol)
up=$(grep -a -B1 child_$brick_id=$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
rm -f $fpath
echo "$up"
}
function nsrc_child_up_status {
local vol=$1
#brick_id is (brick-num in volume info - 1)
local brick_id=$2
_nsrc_child_up_status $vol $brick_id generate_mount_statedump
}
function _afr_child_up_status {
local vol=$1
#brick_id is (brick-num in volume info - 1)

View File

@ -18,6 +18,7 @@
#include "xlator.h"
#include "nsr-messages.h"
#include "nsrc.h"
#include "statedump.h"
#define SCAR_LIMIT 20
#define HILITE(x) (""x"")
@ -168,6 +169,7 @@ int32_t
nsrc_init (xlator_t *this)
{
nsrc_private_t *priv = NULL;
xlator_list_t *trav = NULL;
this->local_pool = mem_pool_new (nsrc_local_t, 128);
if (!this->local_pool) {
@ -181,6 +183,10 @@ nsrc_init (xlator_t *this)
goto err;
}
for (trav = this->children; trav; trav = trav->next) {
++(priv->n_children);
}
priv->active = FIRST_CHILD(this);
this->private = priv;
return 0;
@ -198,33 +204,111 @@ nsrc_fini (xlator_t *this)
GF_FREE(this->private);
}
int
nsrc_get_child_index (xlator_t *this, xlator_t *kid)
{
xlator_list_t *trav;
int retval = -1;
for (trav = this->children; trav; trav = trav->next) {
++retval;
if (trav->xlator == kid) {
return retval;
}
}
return -1;
}
uint8_t
nsrc_count_up_kids (nsrc_private_t *priv)
{
uint8_t retval = 0;
uint8_t i;
for (i = 0; i < priv->n_children; ++i) {
if (priv->kid_state & (1 << i)) {
++retval;
}
}
return retval;
}
int32_t
nsrc_notify (xlator_t *this, int32_t event, void *data, ...)
{
int32_t ret = 0;
int32_t ret = 0;
int32_t index = 0;
nsrc_private_t *priv = NULL;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO (this->name, priv, out);
switch (event) {
case GF_EVENT_CHILD_UP:
index = nsrc_get_child_index(this, data);
if (index >= 0) {
priv->kid_state |= (1 << index);
priv->up_children = nsrc_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
"got CHILD_UP for %s, now %u kids",
((xlator_t *)data)->name,
priv->up_children);
}
ret = default_notify (this, event, data);
break;
case GF_EVENT_CHILD_DOWN:
/*
* TBD: handle this properly
*
* What we really should do is propagate this only if it caused
* us to lose quorum, and likewise for GF_EVENT_CHILD_UP only
* if it caused us to gain quorum. However, that requires
* tracking child states and for now it's easier to swallow
* these unconditionally. The consequence of failing to do
* this is that DHT sees the first GF_EVENT_CHILD_DOWN and gets
* confused, so it doesn't call us and doesn't get up-to-date
* directory listings etc.
*/
index = nsrc_get_child_index(this, data);
if (index >= 0) {
priv->kid_state &= ~(1 << index);
priv->up_children = nsrc_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
"got CHILD_DOWN for %s, now %u kids",
((xlator_t *)data)->name,
priv->up_children);
}
break;
default:
ret = default_notify (this, event, data);
}
out:
return ret;
}
int
nsrc_priv_dump (xlator_t *this)
{
nsrc_private_t *priv = NULL;
char key_prefix[GF_DUMP_MAX_BUF_LEN];
xlator_list_t *trav = NULL;
int32_t i = -1;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO (this->name, priv, out);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s",
this->type, this->name);
gf_proc_dump_add_section(key_prefix);
gf_proc_dump_write("up_children", "%u", priv->up_children);
for (trav = this->children, i = 0; trav; trav = trav->next, i++) {
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i);
gf_proc_dump_write(key_prefix, "%s", trav->xlator->name);
}
out:
return 0;
}
struct xlator_dumpops dumpops = {
.priv = nsrc_priv_dump,
};
class_methods_t class_methods = {
.init = nsrc_init,
.fini = nsrc_fini,

View File

@ -13,6 +13,9 @@
typedef struct {
xlator_t *active;
uint8_t up_children;
uint8_t n_children;
uint32_t kid_state;
} nsrc_private_t;
typedef struct {

View File

@ -83,6 +83,9 @@ nsr_@NAME@ (call_frame_t *frame, xlator_t *this,
if (result == _gf_false) {
/* Emulate the AFR client-side-quorum behavior. */
gf_msg (this->name, GF_LOG_ERROR, EROFS,
N_MSG_QUORUM_NOT_MET, "Sufficient number of "
"subvolumes are not up to meet quorum.");
op_errno = EROFS;
goto err;
}
@ -309,6 +312,10 @@ nsr_@NAME@_continue (call_frame_t *frame, xlator_t *this,
result = fop_quorum_check (this, (double)priv->n_children,
(double)local->successful_acks + 1);
if (result == _gf_false) {
gf_msg (this->name, GF_LOG_ERROR, EROFS,
N_MSG_QUORUM_NOT_MET, "Didn't receive enough acks "
"to meet quorum. Failing the operation without trying "
"it on the leader.");
STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS,
@ERROR_ARGS@);
} else {
@ -406,8 +413,9 @@ nsr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
if (result == _gf_false) {
op_ret = -1;
op_errno = EROFS;
gf_msg_debug (this->name, 0,
"Quorum is not met. The operation has failed.");
gf_msg (this->name, GF_LOG_ERROR, EROFS,
N_MSG_QUORUM_NOT_MET, "Quorum is not met. "
"The operation has failed.");
} else {
#if defined(NSR_CG_NEED_FD)
op_ret = local->successful_op_ret;

View File

@ -74,6 +74,8 @@ typedef struct {
* TBD: re-evaluate how to manage this
*/
char term_buf[CHANGELOG_ENTRY_SIZE];
gf_boolean_t child_up; /* To maintain the state of *
* the translator */
} nsr_private_t;
typedef struct {

View File

@ -860,13 +860,23 @@ nsr_get_child_index (xlator_t *this, xlator_t *kid)
int
nsr_notify (xlator_t *this, int event, void *data, ...)
{
nsr_private_t *priv = this->private;
int index;
nsr_private_t *priv = this->private;
int index = -1;
int ret = -1;
gf_boolean_t result = _gf_false;
gf_boolean_t relevant = _gf_false;
switch (event) {
case GF_EVENT_CHILD_UP:
index = nsr_get_child_index(this, data);
if (index >= 0) {
/* Check if the child was previously down
* and it's not a false CHILD_UP
*/
if (!(priv->kid_state & (1 << index))) {
relevant = _gf_true;
}
priv->kid_state |= (1 << index);
priv->up_children = nsr_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
@ -876,27 +886,96 @@ nsr_notify (xlator_t *this, int event, void *data, ...)
if (!priv->config_leader && (priv->up_children > 1)) {
priv->leader = _gf_false;
}
/* If it's not relevant, or we have already *
* sent CHILD_UP just break */
if (!relevant || priv->child_up)
break;
/* If it's not a leader, just send the notify up */
if (!priv->leader) {
ret = default_notify(this, event, data);
if (!ret)
priv->child_up = _gf_true;
break;
}
result = fop_quorum_check (this,
(double)(priv->n_children - 1),
(double)(priv->up_children - 1));
if (result == _gf_false) {
gf_msg (this->name, GF_LOG_INFO, 0,
N_MSG_GENERIC, "Not enough children "
"are up to meet quorum. Waiting to "
"send CHILD_UP from leader");
} else {
gf_msg (this->name, GF_LOG_INFO, 0,
N_MSG_GENERIC, "Enough children are up "
"to meet quorum. Sending CHILD_UP "
"from leader");
ret = default_notify(this, event, data);
if (!ret)
priv->child_up = _gf_true;
}
}
break;
case GF_EVENT_CHILD_DOWN:
index = nsr_get_child_index(this, data);
if (index >= 0) {
/* Check if the child was previously up
* and it's not a false CHILD_DOWN
*/
if (priv->kid_state & (1 << index)) {
relevant = _gf_true;
}
priv->kid_state &= ~(1 << index);
priv->up_children = nsr_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
"got CHILD_DOWN for %s, now %u kids",
((xlator_t *)data)->name,
priv->up_children);
if (!priv->config_leader && (priv->up_children < 2)) {
if (!priv->config_leader && (priv->up_children < 2)
&& relevant) {
priv->leader = _gf_true;
}
/* If it's not relevant, or we have already *
* sent CHILD_DOWN just break */
if (!relevant || !priv->child_up)
break;
/* If it's not a leader, just break coz we shouldn't *
* propagate the failure from the failure till it *
* itself goes down *
*/
if (!priv->leader) {
break;
}
result = fop_quorum_check (this,
(double)(priv->n_children - 1),
(double)(priv->up_children - 1));
if (result == _gf_false) {
gf_msg (this->name, GF_LOG_INFO, 0,
N_MSG_GENERIC, "Enough children are "
"to down to fail quorum. "
"Sending CHILD_DOWN from leader");
ret = default_notify(this, event, data);
if (!ret)
priv->child_up = _gf_false;
} else {
gf_msg (this->name, GF_LOG_INFO, 0,
N_MSG_GENERIC, "Not enough children "
"are down to fail quorum. Waiting to "
"send CHILD_DOWN from leader");
}
}
break;
default:
;
ret = default_notify(this, event, data);
}
return default_notify(this, event, data);
return ret;
}
@ -995,6 +1074,7 @@ nsr_init (xlator_t *this)
GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err);
priv->leader = priv->config_leader;
priv->child_up = _gf_false;
if (pthread_create(&kid, NULL, nsr_flush_thread,
this) != 0) {