cluster/afr: Implementation of quorum-reads

Provide a way of disabling reads when quorum is not met.

Change-Id: Ic4f57c2b87a0b8514600759de3a7a47e217fe3b5
BUG: 1187885
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/9543
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
Pranith Kumar K 2015-02-02 18:49:01 +05:30 committed by Pranith Kumar Karampuri
parent 8fa36bc7a1
commit 3797caabb9
9 changed files with 87 additions and 4 deletions

View File

@ -13,10 +13,16 @@ function test_write {
#Tests for quorum-type option for replica 2
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
TEST $CLI volume set $V0 performance.quick-read off
TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.read-ahead off
TEST $CLI volume start $V0
TEST $GFS -s $H0 --volfile-id=$V0 $M0;
TEST $GFS -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable;
touch $M0/a
echo abc > $M0/b
TEST ! $CLI volume set $V0 cluster.quorum-type ""
TEST $CLI volume set $V0 cluster.quorum-type fixed
@ -25,6 +31,11 @@ TEST $CLI volume set $V0 cluster.quorum-count 2
TEST test_write
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST ! test_write
EXPECT "abc" cat $M0/b
TEST $CLI volume set $V0 cluster.quorum-reads on
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
TEST ! cat $M0/b
TEST $CLI volume reset $V0 cluster.quorum-reads
TEST $CLI volume set $V0 cluster.quorum-type auto
EXPECT auto volume_option $V0 cluster.quorum-type
@ -33,6 +44,11 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST test_write
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST ! test_write
EXPECT "abc" cat $M0/b
TEST $CLI volume set $V0 cluster.quorum-reads on
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
TEST ! cat $M0/b
TEST $CLI volume reset $V0 cluster.quorum-reads
TEST $CLI volume set $V0 cluster.quorum-type none
EXPECT none volume_option $V0 cluster.quorum-type
@ -40,6 +56,12 @@ TEST test_write
#Default is 'none' for even number of bricks in replication
TEST $CLI volume reset $V0 cluster.quorum-type
TEST test_write
EXPECT "abc" cat $M0/b
TEST $CLI volume set $V0 cluster.quorum-reads on
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
EXPECT "abc" cat $M0/b
TEST $CLI volume reset $V0 cluster.quorum-reads
cleanup;
TEST glusterd;
@ -47,10 +69,16 @@ TEST pidof glusterd
#Tests for quorum-type option for replica 3
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
TEST $CLI volume set $V0 performance.quick-read off
TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.read-ahead off
TEST $CLI volume start $V0
TEST $GFS -s $H0 --volfile-id=$V0 $M0;
TEST $GFS -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable;
touch $M0/a
echo abc > $M0/b
TEST $CLI volume set $V0 cluster.quorum-type fixed
EXPECT fixed volume_option $V0 cluster.quorum-type
@ -58,12 +86,24 @@ TEST $CLI volume set $V0 cluster.quorum-count 3
TEST test_write
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST ! test_write
EXPECT "abc" cat $M0/b
TEST $CLI volume set $V0 cluster.quorum-reads on
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
TEST ! cat $M0/b
TEST $CLI volume reset $V0 cluster.quorum-reads
TEST $CLI volume set $V0 cluster.quorum-type auto
EXPECT auto volume_option $V0 cluster.quorum-type
TEST test_write
TEST kill_brick $V0 $H0 $B0/${V0}3
TEST ! test_write
EXPECT "abc" cat $M0/b
TEST $CLI volume set $V0 cluster.quorum-reads on
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
TEST ! cat $M0/b
TEST $CLI volume reset $V0 cluster.quorum-reads
TEST $CLI volume set $V0 cluster.quorum-type none
EXPECT none volume_option $V0 cluster.quorum-type

View File

@ -61,6 +61,7 @@ HEAL_TIMEOUT=60
MARKER_UPDATE_TIMEOUT=20
JANITOR_TIMEOUT=60
UMOUNT_TIMEOUT=5
CONFIG_UPDATE_TIMEOUT=5
statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump

View File

@ -457,3 +457,11 @@ function volgen_volume_option {
local xl_option="$5"
sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d;/option $xl_option/!d" $volfile | grep " $xl_option " | awk '{print $3}'
}
function mount_get_option_value {
local m=$1
local subvol=$2
local key=$3
grep "$3" $m/.meta/graphs/active/$subvol/private | awk '{print $3}'
}

View File

@ -3561,6 +3561,7 @@ afr_priv_dump (xlator_t *this)
gf_proc_dump_write("read_child", "%d", priv->read_child);
gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
gf_proc_dump_write("wait_count", "%u", priv->wait_count);
gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
return 0;
}

View File

@ -195,6 +195,15 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
local->readfn = readfn;
local->inode = inode_ref (inode);
if (priv->quorum_reads &&
priv->quorum_count && !afr_has_quorum (priv->child_up, this)) {
local->op_ret = -1;
local->op_errno = ENOTCONN;
read_subvol = -1;
goto read;
}
local->transaction.type = type;
ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
&event_generation, type);
@ -232,6 +241,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
local->read_attempted[read_subvol] = 1;
read:
local->readfn (frame, this, read_subvol);
return 0;

View File

@ -28,6 +28,13 @@ int
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
afr_changelog_resume_t changelog_resume);
static int32_t
afr_quorum_errno (afr_private_t *priv)
{
if (priv->quorum_reads)
return ENOTCONN;
return EROFS;
}
int
__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
@ -558,7 +565,7 @@ afr_handle_quorum (call_frame_t *frame)
}
local->op_ret = -1;
local->op_errno = EROFS;
local->op_errno = afr_quorum_errno (priv);
}
int
@ -992,7 +999,7 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
* quorum number of nodes.
*/
if (priv->quorum_count && !afr_has_fop_quorum (frame)) {
op_errno = EROFS;
op_errno = afr_quorum_errno (priv);
goto err;
}

View File

@ -211,6 +211,9 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
int32, out);
GF_OPTION_RECONF ("quorum-reads", priv->quorum_reads, options,
bool, out);
priv->did_discovery = _gf_false;
ret = 0;
@ -359,6 +362,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);
priv->wait_count = 1;
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
@ -724,6 +729,12 @@ struct volume_options options[] = {
"this many bricks or present. Other quorum types "
"will OVERWRITE this value.",
},
{ .key = {"quorum-reads"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "no",
.description = "If quorum-reads is \"true\" only allow reads if "
"quorum is met when quorum is enabled.",
},
{ .key = {"node-uuid"},
.type = GF_OPTION_TYPE_STR,
.description = "Local glusterd uuid string, used in starting "

View File

@ -97,6 +97,7 @@ typedef struct _afr_private {
gf_boolean_t pre_op_compat; /* on/off */
uint32_t post_op_delay_secs;
unsigned int quorum_count;
gf_boolean_t quorum_reads;
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;

View File

@ -1655,6 +1655,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_7_0,
.validate_fn = validate_disperse_heal_enable_disable
},
{ .key = "cluster.quorum-reads",
.voltype = "cluster/replicate",
.op_version = GD_OP_VERSION_3_7_0,
},
{ .key = NULL
}
};