cluster/afr: Implement quorum for lk fop

Problem:
At the moment when we have replica 3 or arbiter setup, even when
lk succeeds on just one brick we give success to application which
is wrong

Fix:
Consider quorum-number of successes as success when quorum is enabled.

BUG: 1461792
Change-Id: I5789e6eb5defb68f8a0eb9cd594d316f5cdebaea
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://review.gluster.org/17524
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
This commit is contained in:
Pranith Kumar K 2017-06-12 22:06:18 +05:30 committed by Pranith Kumar Karampuri
parent b58a15948f
commit 45ebcf7009
3 changed files with 293 additions and 23 deletions

255
tests/basic/afr/lk-quorum.t Normal file
View File

@ -0,0 +1,255 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../fileio.rc
cleanup;
TEST glusterd;
TEST pidof glusterd
#Tests for quorum-type option for replica 2
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
TEST $CLI volume set $V0 performance.quick-read off
TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 performance.open-behind off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.read-ahead off
TEST $CLI volume start $V0
TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
TEST touch $M0/a
#When all bricks are up, lock and unlock should succeed
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST flock -x $fd1
TEST fd_close $fd1
#When all bricks are down, lock/unlock should fail
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST $CLI volume stop $V0
TEST ! flock -x $fd1
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#Check locking behavior with quorum 'fixed' and quorum-count 2
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 2
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^fixed$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^2$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
#When all bricks are up, lock and unlock should succeed
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST flock -x $fd1
TEST fd_close $fd1
#When all bricks are down, lock/unlock should fail
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST $CLI volume stop $V0
TEST ! flock -x $fd1
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#When any of the bricks is down lock/unlock should fail
#kill first brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST ! flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST fd_close $fd1
#kill 2nd brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST ! flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#Check locking behavior with quorum 'fixed' and quorum-count 1
TEST $CLI volume set $V0 cluster.quorum-count 1
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
#When all bricks are up, lock and unlock should succeed
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST flock -x $fd1
TEST fd_close $fd1
#When all bricks are down, lock/unlock should fail
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST $CLI volume stop $V0
TEST ! flock -x $fd1
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#When any of the bricks is down lock/unlock should succeed
#kill first brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST fd_close $fd1
#kill 2nd brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#Check locking behavior with quorum 'auto'
TEST $CLI volume set $V0 cluster.quorum-type auto
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^auto$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
#When all bricks are up, lock and unlock should succeed
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST flock -x $fd1
TEST fd_close $fd1
#When all bricks are down, lock/unlock should fail
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST $CLI volume stop $V0
TEST ! flock -x $fd1
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#When first brick is down lock/unlock should fail
#kill first brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST ! flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST fd_close $fd1
#When second brick is down lock/unlock should succeed
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
cleanup;
TEST glusterd;
TEST pidof glusterd
#Tests for replica 3
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
TEST $CLI volume set $V0 performance.quick-read off
TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 performance.open-behind off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.read-ahead off
TEST $CLI volume start $V0
TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
TEST touch $M0/a
#When all bricks are up, lock and unlock should succeed
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST flock -x $fd1
TEST fd_close $fd1
#When all bricks are down, lock/unlock should fail
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST $CLI volume stop $V0
TEST ! flock -x $fd1
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
TEST fd_close $fd1
#When any of the bricks is down lock/unlock should succeed
#kill first brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST fd_close $fd1
#kill 2nd brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#kill 3rd brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}2
TEST flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
TEST fd_close $fd1
#When any two of the bricks are down lock/unlock should fail
#kill first,second bricks
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST ! flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST fd_close $fd1
#kill 2nd,3rd bricks
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST kill_brick $V0 $H0 $B0/${V0}2
TEST ! flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
TEST fd_close $fd1
#kill 1st,3rd brick
TEST fd1=`fd_available`
TEST fd_open $fd1 'w' $M0/a
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST kill_brick $V0 $H0 $B0/${V0}2
TEST ! flock -x $fd1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST fd_close $fd1
cleanup

View File

@ -3835,7 +3835,7 @@ unwind:
static int
afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int child_index = (long)cookie;
@ -4215,15 +4215,27 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
dict_t *xdata)
{
afr_local_t * local = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = this->private;
int call_count = -1;
int child_index = (long)cookie;
local = frame->local;
call_count = afr_frame_return (frame);
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
gf_msg (this->name, GF_LOG_ERROR, op_errno,
AFR_MSG_UNLOCK_FAIL,
"gfid=%s: unlock failed on subvolume %s "
"with lock owner %s",
uuid_utoa (local->fd->inode->gfid),
priv->children[child_index]->name,
lkowner_utoa (&frame->root->lk_owner));
}
call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
lock, xdata);
NULL, local->xdata_rsp);
return 0;
}
@ -4245,7 +4257,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
&local->cont.lk.ret_flock, NULL);
NULL, local->xdata_rsp);
return 0;
}
@ -4255,8 +4267,8 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (local->cont.lk.locked_nodes[i]) {
STACK_WIND (frame, afr_lk_unlock_cbk,
priv->children[i],
STACK_WIND_COOKIE (frame, afr_lk_unlock_cbk,
(void *) (long) i, priv->children[i],
priv->children[i]->fops->lk,
local->fd, F_SETLK,
&local->cont.lk.user_flock, NULL);
@ -4272,12 +4284,12 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int child_index = -1;
/* int ret = 0; */
local = frame->local;
@ -4285,9 +4297,10 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
child_index = (long) cookie;
if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
if (op_ret < 0 && op_errno == EAGAIN) {
local->op_ret = -1;
local->op_errno = op_errno;
local->op_errno = EAGAIN;
afr_lk_unlock (frame, this);
return 0;
@ -4307,15 +4320,20 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index],
priv->children[child_index]->fops->lk,
local->fd, local->cont.lk.cmd,
&local->cont.lk.user_flock, xdata);
} else if (local->op_ret == -1) {
/* all nodes have gone down */
&local->cont.lk.user_flock,
local->xdata_req);
} else if (priv->quorum_count &&
!afr_has_quorum (local->cont.lk.locked_nodes, this)) {
local->op_ret = -1;
local->op_errno = afr_final_errno (local, priv);
AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
&local->cont.lk.ret_flock, NULL);
afr_lk_unlock (frame, this);
} else {
if (local->op_ret < 0)
local->op_errno = afr_final_errno (local, priv);
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
&local->cont.lk.ret_flock, NULL);
&local->cont.lk.ret_flock, local->xdata_rsp);
}
return 0;
@ -4354,11 +4372,13 @@ afr_lk (call_frame_t *frame, xlator_t *this,
local->cont.lk.cmd = cmd;
local->cont.lk.user_flock = *flock;
local->cont.lk.ret_flock = *flock;
if (xdata)
local->xdata_req = dict_ref (xdata);
STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
priv->children[i],
priv->children[i]->fops->lk,
fd, cmd, flock, xdata);
fd, cmd, flock, local->xdata_req);
return 0;
out:

View File

@ -875,11 +875,6 @@ typedef struct afr_granular_esh_args {
mismatch */
} afr_granular_esh_args_t;
/* did a call fail due to a child failing? */
#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
((op_errno == ENOTCONN) || \
(op_errno == EBADFD)))
int
afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
unsigned char *readable, int *event_p, int type);