cluster/afr: Implementation of quorum-reads
Provide a way of disabling reads when quorum is not met. Change-Id: Ic4f57c2b87a0b8514600759de3a7a47e217fe3b5 BUG: 1187885 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/9543 Reviewed-by: Ravishankar N <ravishankar@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
parent
8fa36bc7a1
commit
3797caabb9
@ -13,10 +13,16 @@ function test_write {
|
||||
|
||||
#Tests for quorum-type option for replica 2
|
||||
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
|
||||
TEST $CLI volume set $V0 performance.quick-read off
|
||||
TEST $CLI volume set $V0 performance.io-cache off
|
||||
TEST $CLI volume set $V0 performance.write-behind off
|
||||
TEST $CLI volume set $V0 performance.stat-prefetch off
|
||||
TEST $CLI volume set $V0 performance.read-ahead off
|
||||
TEST $CLI volume start $V0
|
||||
TEST $GFS -s $H0 --volfile-id=$V0 $M0;
|
||||
TEST $GFS -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable;
|
||||
|
||||
touch $M0/a
|
||||
echo abc > $M0/b
|
||||
|
||||
TEST ! $CLI volume set $V0 cluster.quorum-type ""
|
||||
TEST $CLI volume set $V0 cluster.quorum-type fixed
|
||||
@ -25,6 +31,11 @@ TEST $CLI volume set $V0 cluster.quorum-count 2
|
||||
TEST test_write
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}1
|
||||
TEST ! test_write
|
||||
EXPECT "abc" cat $M0/b
|
||||
TEST $CLI volume set $V0 cluster.quorum-reads on
|
||||
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
||||
TEST ! cat $M0/b
|
||||
TEST $CLI volume reset $V0 cluster.quorum-reads
|
||||
|
||||
TEST $CLI volume set $V0 cluster.quorum-type auto
|
||||
EXPECT auto volume_option $V0 cluster.quorum-type
|
||||
@ -33,6 +44,11 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
||||
TEST test_write
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}1
|
||||
TEST ! test_write
|
||||
EXPECT "abc" cat $M0/b
|
||||
TEST $CLI volume set $V0 cluster.quorum-reads on
|
||||
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
||||
TEST ! cat $M0/b
|
||||
TEST $CLI volume reset $V0 cluster.quorum-reads
|
||||
|
||||
TEST $CLI volume set $V0 cluster.quorum-type none
|
||||
EXPECT none volume_option $V0 cluster.quorum-type
|
||||
@ -40,6 +56,12 @@ TEST test_write
|
||||
#Default is 'none' for even number of bricks in replication
|
||||
TEST $CLI volume reset $V0 cluster.quorum-type
|
||||
TEST test_write
|
||||
EXPECT "abc" cat $M0/b
|
||||
TEST $CLI volume set $V0 cluster.quorum-reads on
|
||||
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
||||
EXPECT "abc" cat $M0/b
|
||||
TEST $CLI volume reset $V0 cluster.quorum-reads
|
||||
|
||||
|
||||
cleanup;
|
||||
TEST glusterd;
|
||||
@ -47,10 +69,16 @@ TEST pidof glusterd
|
||||
|
||||
#Tests for quorum-type option for replica 3
|
||||
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
|
||||
TEST $CLI volume set $V0 performance.quick-read off
|
||||
TEST $CLI volume set $V0 performance.io-cache off
|
||||
TEST $CLI volume set $V0 performance.write-behind off
|
||||
TEST $CLI volume set $V0 performance.stat-prefetch off
|
||||
TEST $CLI volume set $V0 performance.read-ahead off
|
||||
TEST $CLI volume start $V0
|
||||
TEST $GFS -s $H0 --volfile-id=$V0 $M0;
|
||||
TEST $GFS -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable;
|
||||
|
||||
touch $M0/a
|
||||
echo abc > $M0/b
|
||||
|
||||
TEST $CLI volume set $V0 cluster.quorum-type fixed
|
||||
EXPECT fixed volume_option $V0 cluster.quorum-type
|
||||
@ -58,12 +86,24 @@ TEST $CLI volume set $V0 cluster.quorum-count 3
|
||||
TEST test_write
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}1
|
||||
TEST ! test_write
|
||||
EXPECT "abc" cat $M0/b
|
||||
TEST $CLI volume set $V0 cluster.quorum-reads on
|
||||
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
||||
TEST ! cat $M0/b
|
||||
TEST $CLI volume reset $V0 cluster.quorum-reads
|
||||
|
||||
|
||||
TEST $CLI volume set $V0 cluster.quorum-type auto
|
||||
EXPECT auto volume_option $V0 cluster.quorum-type
|
||||
TEST test_write
|
||||
TEST kill_brick $V0 $H0 $B0/${V0}3
|
||||
TEST ! test_write
|
||||
EXPECT "abc" cat $M0/b
|
||||
TEST $CLI volume set $V0 cluster.quorum-reads on
|
||||
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
||||
TEST ! cat $M0/b
|
||||
TEST $CLI volume reset $V0 cluster.quorum-reads
|
||||
|
||||
|
||||
TEST $CLI volume set $V0 cluster.quorum-type none
|
||||
EXPECT none volume_option $V0 cluster.quorum-type
|
||||
|
@ -61,6 +61,7 @@ HEAL_TIMEOUT=60
|
||||
MARKER_UPDATE_TIMEOUT=20
|
||||
JANITOR_TIMEOUT=60
|
||||
UMOUNT_TIMEOUT=5
|
||||
CONFIG_UPDATE_TIMEOUT=5
|
||||
|
||||
statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump
|
||||
|
||||
|
@ -457,3 +457,11 @@ function volgen_volume_option {
|
||||
local xl_option="$5"
|
||||
sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d;/option $xl_option/!d" $volfile | grep " $xl_option " | awk '{print $3}'
|
||||
}
|
||||
|
||||
function mount_get_option_value {
|
||||
local m=$1
|
||||
local subvol=$2
|
||||
local key=$3
|
||||
|
||||
grep "$3" $m/.meta/graphs/active/$subvol/private | awk '{print $3}'
|
||||
}
|
||||
|
@ -3561,6 +3561,7 @@ afr_priv_dump (xlator_t *this)
|
||||
gf_proc_dump_write("read_child", "%d", priv->read_child);
|
||||
gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
|
||||
gf_proc_dump_write("wait_count", "%u", priv->wait_count);
|
||||
gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -195,6 +195,15 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
||||
local->readfn = readfn;
|
||||
local->inode = inode_ref (inode);
|
||||
|
||||
if (priv->quorum_reads &&
|
||||
priv->quorum_count && !afr_has_quorum (priv->child_up, this)) {
|
||||
local->op_ret = -1;
|
||||
local->op_errno = ENOTCONN;
|
||||
read_subvol = -1;
|
||||
goto read;
|
||||
}
|
||||
|
||||
|
||||
local->transaction.type = type;
|
||||
ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
|
||||
&event_generation, type);
|
||||
@ -232,6 +241,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
||||
|
||||
local->read_attempted[read_subvol] = 1;
|
||||
|
||||
read:
|
||||
local->readfn (frame, this, read_subvol);
|
||||
|
||||
return 0;
|
||||
|
@ -28,6 +28,13 @@ int
|
||||
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
|
||||
afr_changelog_resume_t changelog_resume);
|
||||
|
||||
static int32_t
|
||||
afr_quorum_errno (afr_private_t *priv)
|
||||
{
|
||||
if (priv->quorum_reads)
|
||||
return ENOTCONN;
|
||||
return EROFS;
|
||||
}
|
||||
|
||||
int
|
||||
__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
|
||||
@ -558,7 +565,7 @@ afr_handle_quorum (call_frame_t *frame)
|
||||
}
|
||||
|
||||
local->op_ret = -1;
|
||||
local->op_errno = EROFS;
|
||||
local->op_errno = afr_quorum_errno (priv);
|
||||
}
|
||||
|
||||
int
|
||||
@ -992,7 +999,7 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
|
||||
* quorum number of nodes.
|
||||
*/
|
||||
if (priv->quorum_count && !afr_has_fop_quorum (frame)) {
|
||||
op_errno = EROFS;
|
||||
op_errno = afr_quorum_errno (priv);
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -211,6 +211,9 @@ reconfigure (xlator_t *this, dict_t *options)
|
||||
GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
|
||||
int32, out);
|
||||
|
||||
GF_OPTION_RECONF ("quorum-reads", priv->quorum_reads, options,
|
||||
bool, out);
|
||||
|
||||
priv->did_discovery = _gf_false;
|
||||
|
||||
ret = 0;
|
||||
@ -359,6 +362,8 @@ init (xlator_t *this)
|
||||
GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
|
||||
GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
|
||||
|
||||
GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);
|
||||
|
||||
priv->wait_count = 1;
|
||||
|
||||
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
|
||||
@ -724,6 +729,12 @@ struct volume_options options[] = {
|
||||
"this many bricks or present. Other quorum types "
|
||||
"will OVERWRITE this value.",
|
||||
},
|
||||
{ .key = {"quorum-reads"},
|
||||
.type = GF_OPTION_TYPE_BOOL,
|
||||
.default_value = "no",
|
||||
.description = "If quorum-reads is \"true\" only allow reads if "
|
||||
"quorum is met when quorum is enabled.",
|
||||
},
|
||||
{ .key = {"node-uuid"},
|
||||
.type = GF_OPTION_TYPE_STR,
|
||||
.description = "Local glusterd uuid string, used in starting "
|
||||
|
@ -97,6 +97,7 @@ typedef struct _afr_private {
|
||||
gf_boolean_t pre_op_compat; /* on/off */
|
||||
uint32_t post_op_delay_secs;
|
||||
unsigned int quorum_count;
|
||||
gf_boolean_t quorum_reads;
|
||||
|
||||
char vol_uuid[UUID_SIZE + 1];
|
||||
int32_t *last_event;
|
||||
|
@ -1655,6 +1655,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
||||
.op_version = GD_OP_VERSION_3_7_0,
|
||||
.validate_fn = validate_disperse_heal_enable_disable
|
||||
},
|
||||
{ .key = "cluster.quorum-reads",
|
||||
.voltype = "cluster/replicate",
|
||||
.op_version = GD_OP_VERSION_3_7_0,
|
||||
},
|
||||
{ .key = NULL
|
||||
}
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user