mgmt/glusterd: Implementation of server-side quorum

Feature-page:
http://www.gluster.org/community/documentation/index.php/Features/Server-quorum

Change-Id: I747b222519e71022462343d2c1bcd3626e1f9c86
BUG: 839595
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.org/3811
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Pranith Kumar K 2012-11-06 12:18:24 +05:30 committed by Vijay Bellur
parent 76a4afec6e
commit 7c23a94516
13 changed files with 1254 additions and 205 deletions

View File

@ -83,6 +83,42 @@ rpc_clnt_prog_t cli_pmap_prog = {
.progver = GLUSTER_PMAP_VERSION,
};
void
gf_cli_probe_strerror (gf1_cli_probe_rsp *rsp, char *msg, size_t len)
{
switch (rsp->op_errno) {
case GF_PROBE_ANOTHER_CLUSTER:
snprintf (msg, len, "%s is already part of another cluster",
rsp->hostname);
break;
case GF_PROBE_VOLUME_CONFLICT:
snprintf (msg, len, "Atleast one volume on %s conflicts with "
"existing volumes in the cluster", rsp->hostname);
break;
case GF_PROBE_UNKNOWN_PEER:
snprintf (msg, len, "%s responded with 'unknown peer' error, "
"this could happen if %s doesn't have localhost in "
"its peer database", rsp->hostname, rsp->hostname);
break;
case GF_PROBE_ADD_FAILED:
snprintf (msg, len, "Failed to add peer information on %s" ,
rsp->hostname);
break;
case GF_PROBE_SAME_UUID:
snprintf (msg, len, "Peer uuid (host %s) is same as local uuid",
rsp->hostname);
break;
case GF_PROBE_QUORUM_NOT_MET:
snprintf (msg, len, "Cluster quorum is not met. Changing "
"peers is not allowed in this state");
break;
default:
snprintf (msg, len, "Probe returned with unknown "
"errno %d", rsp->op_errno);
break;
}
}
int
gf_cli_probe_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
@ -133,47 +169,7 @@ gf_cli_probe_cbk (struct rpc_req *req, struct iovec *iov,
if (rsp.op_errstr && (strlen (rsp.op_errstr) > 0)) {
snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
} else {
switch (rsp.op_errno) {
case GF_PROBE_ANOTHER_CLUSTER:
snprintf (msg, sizeof (msg),
"%s is already part of "
"another cluster",
rsp.hostname);
break;
case GF_PROBE_VOLUME_CONFLICT:
snprintf (msg, sizeof (msg),
"Atleast one volume on %s "
"conflicts with existing "
"volumes in the cluster",
rsp.hostname);
break;
case GF_PROBE_UNKNOWN_PEER:
snprintf (msg, sizeof (msg),
"%s responded with 'unknown "
"peer' error, this could "
"happen if %s doesn't have "
"localhost in its peer "
"database", rsp.hostname,
rsp.hostname);
break;
case GF_PROBE_ADD_FAILED:
snprintf (msg, sizeof (msg),
"Failed to add peer "
"information on %s" ,
rsp.hostname);
break;
case GF_PROBE_SAME_UUID:
snprintf (msg, sizeof (msg),
"Peer uuid (host %s) is"
"same as local uuid",
rsp.hostname);
break;
default:
snprintf (msg, sizeof (msg),
"Probe returned with unknown "
"errno %d", rsp.op_errno);
break;
}
gf_cli_probe_strerror (&rsp, msg, sizeof (msg));
}
gf_log ("cli", GF_LOG_ERROR, "%s", msg);
}
@ -248,6 +244,12 @@ gf_cli_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
" down. Check with 'peer "
"status'.");
break;
case GF_DEPROBE_QUORUM_NOT_MET:
snprintf (msg, sizeof (msg), "Cluster "
"quorum is not met. Changing "
"peers is not allowed in this"
" state");
break;
default:
snprintf (msg, sizeof (msg),
"Detach returned with unknown"

View File

@ -96,7 +96,8 @@ enum gf_probe_resp {
GF_PROBE_VOLUME_CONFLICT,
GF_PROBE_SAME_UUID,
GF_PROBE_UNKNOWN_PEER,
GF_PROBE_ADD_FAILED
GF_PROBE_ADD_FAILED,
GF_PROBE_QUORUM_NOT_MET
};
enum gf_deprobe_resp {
@ -104,7 +105,8 @@ enum gf_deprobe_resp {
GF_DEPROBE_LOCALHOST,
GF_DEPROBE_NOT_FRIEND,
GF_DEPROBE_BRICK_EXIST,
GF_DEPROBE_FRIEND_DOWN
GF_DEPROBE_FRIEND_DOWN,
GF_DEPROBE_QUORUM_NOT_MET,
};
enum gf_cbk_procnum {

View File

@ -280,6 +280,8 @@ _build_option_key (dict_t *d, char *k, data_t *v, void *tmp)
int ret = -1;
pack = tmp;
if (strcmp (k, GLUSTERD_GLOBAL_OPT_VERSION) == 0)
return 0;
snprintf (reconfig_key, 256, "volume%d.option.%s",
pack->vol_count, k);
ret = dict_set_str (pack->dict, reconfig_key, v->data);
@ -303,12 +305,14 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
glusterd_conf_t *priv = NULL;
char *volume_id_str = NULL;
struct args_pack pack = {0,};
xlator_t *this = NULL;
GF_ASSERT (volinfo);
GF_ASSERT (volumes);
priv = THIS->private;
this = THIS;
priv = this->private;
GF_ASSERT (priv);
@ -388,6 +392,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
pack.vol_count = count;
pack.opt_count = 0;
dict_foreach (dict, _build_option_key, (void *) &pack);
dict_foreach (priv->opts, _build_option_key, &pack);
snprintf (key, 256, "volume%d.opt_count", pack.vol_count);
ret = dict_set_int32 (volumes, key, pack.opt_count);
@ -445,7 +450,6 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx)
GF_ASSERT (priv);
ret = glusterd_lock (MY_UUID);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Unable to acquire local lock, ret: %d", ret);
@ -660,15 +664,30 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req)
gf1_cli_probe_req cli_req = {0,};
glusterd_peerinfo_t *peerinfo = NULL;
gf_boolean_t run_fsm = _gf_true;
GF_ASSERT (req);
xlator_t *this = NULL;
if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_probe_req)) {
GF_ASSERT (req);
this = THIS;
if (!xdr_to_generic (req->msg[0], &cli_req,
(xdrproc_t)xdr_gf1_cli_probe_req)) {
//failed to decode msg;
gf_log ("", GF_LOG_ERROR, "xdr decoding error");
req->rpc_err = GARBAGE_ARGS;
goto out;
}
if (glusterd_is_any_volume_in_server_quorum (this) &&
!does_gd_meet_server_quorum (this)) {
glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET,
NULL,
cli_req.hostname, cli_req.port);
gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, "
"rejecting operation");
ret = 0;
goto out;
}
gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname,
cli_req.port);
gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d",
@ -684,8 +703,9 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req)
&peerinfo))) {
if (strcmp (peerinfo->hostname, cli_req.hostname) == 0) {
gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port %d"
" already a peer", cli_req.hostname, cli_req.port);
gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port "
"%d already a peer", cli_req.hostname,
cli_req.port);
glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND,
NULL, cli_req.hostname,
cli_req.port);
@ -694,8 +714,8 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req)
}
ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port);
gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, cli_req.port,
(ret) ? "FAILED" : "SUCCESS");
gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname,
cli_req.port, (ret) ? "FAILED" : "SUCCESS");
if (ret == GLUSTERD_CONNECTION_AWAITED) {
//fsm should be run after connection establishes
@ -717,7 +737,7 @@ int
glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
{
int32_t ret = -1;
gf1_cli_deprobe_req cli_req = {0,};
gf1_cli_deprobe_req cli_req = {0,};
uuid_t uuid = {0};
int op_errno = 0;
xlator_t *this = NULL;
@ -750,18 +770,29 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
goto out;
}
if (!uuid_is_null (uuid) && !(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) {
/* Check if peers are connected, except peer being detached*/
if (!glusterd_chk_peers_connected_befriended (uuid)) {
ret = -1;
op_errno = GF_DEPROBE_FRIEND_DOWN;
goto out;
if (!(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) {
if (!uuid_is_null (uuid)) {
/* Check if peers are connected, except peer being detached*/
if (!glusterd_chk_peers_connected_befriended (uuid)) {
ret = -1;
op_errno = GF_DEPROBE_FRIEND_DOWN;
goto out;
}
ret = glusterd_all_volume_cond_check (
glusterd_friend_brick_belongs,
-1, &uuid);
if (ret) {
op_errno = GF_DEPROBE_BRICK_EXIST;
goto out;
}
}
ret = glusterd_all_volume_cond_check (
glusterd_friend_brick_belongs,
-1, &uuid);
if (ret) {
op_errno = GF_DEPROBE_BRICK_EXIST;
if (glusterd_is_any_volume_in_server_quorum (this) &&
!does_gd_meet_server_quorum (this)) {
gf_log (this->name, GF_LOG_ERROR, "Quorum does not "
"meet, rejecting operation");
ret = -1;
op_errno = GF_DEPROBE_QUORUM_NOT_MET;
goto out;
}
}
@ -2145,6 +2176,43 @@ out:
return ret;
}
int
glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,
glusterd_peerctx_args_t *args)
{
dict_t *options = NULL;
int ret = -1;
glusterd_peerctx_t *peerctx = NULL;
peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t);
if (!peerctx)
goto out;
if (args)
peerctx->args = *args;
peerctx->peerinfo = peerinfo;
ret = glusterd_transport_inet_options_build (&options,
peerinfo->hostname,
peerinfo->port);
if (ret)
goto out;
ret = glusterd_rpc_create (&peerinfo->rpc, options,
glusterd_peer_rpc_notify, peerctx);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for"
" peer %s", peerinfo->hostname);
goto out;
}
peerctx = NULL;
ret = 0;
out:
GF_FREE (peerctx);
return ret;
}
int
glusterd_friend_add (const char *hoststr, int port,
glusterd_friend_sm_state_t state,
@ -2156,8 +2224,6 @@ glusterd_friend_add (const char *hoststr, int port,
int ret = 0;
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
glusterd_peerctx_t *peerctx = NULL;
dict_t *options = NULL;
gf_boolean_t handover = _gf_false;
this = THIS;
@ -2165,49 +2231,35 @@ glusterd_friend_add (const char *hoststr, int port,
GF_ASSERT (conf);
GF_ASSERT (hoststr);
peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t);
if (!peerctx) {
ret = -1;
goto out;
}
if (args)
peerctx->args = *args;
ret = glusterd_peerinfo_new (friend, state, uuid, hoststr);
ret = glusterd_peerinfo_new (friend, state, uuid, hoststr, port);
if (ret)
goto out;
peerctx->peerinfo = *friend;
//restore needs to first create the list of peers, then create rpcs
//to keep track of quorum in race-free manner. In restore for each peer
//rpc-create calls rpc_notify when the friend-list is partially
//constructed, leading to wrong quorum calculations.
if (restore)
goto done;
ret = glusterd_transport_inet_options_build (&options, hoststr, port);
if (ret)
goto out;
if (!restore) {
ret = glusterd_store_peerinfo (*friend);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to store "
"peerinfo");
goto out;
}
}
list_add_tail (&(*friend)->uuid_list, &conf->peers);
ret = glusterd_rpc_create (&(*friend)->rpc, options,
glusterd_peer_rpc_notify,
peerctx);
ret = glusterd_store_peerinfo (*friend);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for"
" peer %s", (char*)hoststr);
gf_log (this->name, GF_LOG_ERROR, "Failed to store "
"peerinfo");
goto out;
}
ret = glusterd_friend_rpc_create (this, *friend, args);
if (ret)
goto out;
done:
list_add_tail (&(*friend)->uuid_list, &conf->peers);
handover = _gf_true;
out:
if (ret && !handover) {
(void) glusterd_friend_cleanup (*friend);
*friend = NULL;
(void) glusterd_friend_cleanup (*friend);
*friend = NULL;
}
gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret);
@ -2866,6 +2918,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
glusterd_peerctx_t *peerctx = NULL;
uuid_t owner = {0,};
uuid_t *peer_uuid = NULL;
gf_boolean_t quorum_action = _gf_false;
peerctx = mydata;
if (!peerctx)
@ -2880,6 +2933,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
{
gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
peerinfo->connected = 1;
peerinfo->quorum_action = _gf_true;
ret = glusterd_peer_dump_version (this, rpc, peerctx);
if (ret)
@ -2892,6 +2946,14 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d",
peerinfo->state.state);
if ((peerinfo->quorum_contrib != QUORUM_DOWN) &&
(peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) {
peerinfo->quorum_contrib = QUORUM_DOWN;
quorum_action = _gf_true;
peerinfo->quorum_action = _gf_false;
}
peerinfo->connected = 0;
/*
local glusterd (thinks that it) is the owner of the cluster
lock and 'fails' the operation on the first disconnect from
@ -2944,6 +3006,8 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
glusterd_friend_sm ();
glusterd_op_sm ();
if (quorum_action)
glusterd_do_quorum_action ();
return ret;
}

View File

@ -46,6 +46,24 @@
#include <signal.h>
#include <sys/wait.h>
#define ALL_VOLUME_OPTION_CHECK(volname, key, ret, op_errstr, label) \
do { \
gf_boolean_t _all = !strcmp ("all", volname); \
gf_boolean_t _ratio = !strcmp (key, \
GLUSTERD_QUORUM_RATIO_KEY); \
if (_all && !_ratio) { \
ret = -1; \
*op_errstr = gf_strdup ("Not a valid option for all " \
"volumes"); \
goto label; \
} else if (!_all && _ratio) { \
ret = -1; \
*op_errstr = gf_strdup ("Not a valid option for " \
"single volume"); \
goto label; \
} \
} while (0)
static struct list_head gd_op_sm_queue;
pthread_mutex_t gd_op_sm_lock;
glusterd_op_info_t opinfo = {{0},};
@ -290,6 +308,24 @@ out:
return ret;
}
static int
glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value,
char **op_errstr)
{
int ret = 0;
char *key = NULL;
volume_option_t *opt = NULL;
if (!glusterd_is_quorum_option (fullkey))
goto out;
key = strchr (fullkey, '.');
key++;
opt = xlator_volume_option_get (this, key);
ret = xlator_option_validate (this, key, value, opt, op_errstr);
out:
return ret;
}
static int
glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
{
@ -315,6 +351,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
uint32_t local_key_op_version = 0;
gf_boolean_t origin_glusterd = _gf_true;
gf_boolean_t check_op_version = _gf_true;
gf_boolean_t all_vol = _gf_false;
GF_ASSERT (dict);
this = THIS;
@ -399,26 +436,30 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
goto out;
}
exists = glusterd_check_volume_exists (volname);
if (!exists) {
snprintf (errstr, sizeof (errstr), "Volume %s does not exist",
volname);
gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
*op_errstr = gf_strdup (errstr);
ret = -1;
goto out;
}
if (strcasecmp (volname, "all") != 0) {
exists = glusterd_check_volume_exists (volname);
if (!exists) {
snprintf (errstr, sizeof (errstr), "Volume %s does "
"not exist", volname);
gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
*op_errstr = gf_strdup (errstr);
ret = -1;
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Unable to allocate memory");
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Unable to allocate memory");
goto out;
}
ret = glusterd_validate_volume_id (dict, volinfo);
if (ret)
goto out;
ret = glusterd_validate_volume_id (dict, volinfo);
if (ret)
goto out;
} else {
all_vol = _gf_true;
}
local_new_op_version = priv->op_version;
@ -473,6 +514,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
ret = -1;
goto out;
}
if (!exists) {
gf_log (this->name, GF_LOG_ERROR,
"Option with name: %s "
@ -490,6 +532,11 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
if (key_fixed)
key = key_fixed;
ALL_VOLUME_OPTION_CHECK (volname, key, ret, op_errstr, out);
ret = glusterd_validate_quorum_options (this, key, value,
op_errstr);
if (ret)
goto out;
local_key_op_version = glusterd_get_op_version_for_key (key);
if (local_key_op_version > local_new_op_version)
@ -539,9 +586,9 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
}
*op_errstr = NULL;
if (!global_opt)
if (!global_opt && !all_vol)
ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr);
else {
else if (!all_vol) {
voliter = NULL;
list_for_each_entry (voliter, &priv->volumes, vol_list) {
ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr);
@ -626,23 +673,24 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr)
goto out;
}
exists = glusterd_check_volume_exists (volname);
if (strcasecmp (volname, "all") != 0) {
exists = glusterd_check_volume_exists (volname);
if (!exists) {
snprintf (msg, sizeof (msg), "Volume %s does not "
"exist", volname);
gf_log ("", GF_LOG_ERROR, "%s", msg);
*op_errstr = gf_strdup (msg);
ret = -1;
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret)
goto out;
if (!exists) {
snprintf (msg, sizeof (msg), "Volume %s does not "
"exist", volname);
gf_log ("", GF_LOG_ERROR, "%s", msg);
*op_errstr = gf_strdup (msg);
ret = -1;
goto out;
ret = glusterd_validate_volume_id (dict, volinfo);
if (ret)
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret)
goto out;
ret = glusterd_validate_volume_id (dict, volinfo);
if (ret)
goto out;
ret = dict_get_str (dict, "key", &key);
if (ret) {
@ -666,6 +714,11 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr)
*op_errstr = gf_strdup (msg);
ret = -1;
goto out;
} else if (exists > 0) {
if (key_fixed)
key = key_fixed;
ALL_VOLUME_OPTION_CHECK (volname, key, ret,
op_errstr, out);
}
}
@ -992,6 +1045,22 @@ out:
return 0;
}
static int
_delete_reconfig_global_opt (dict_t *this, char *key, data_t *value, void *data)
{
int32_t *is_force = 0;
GF_ASSERT (data);
is_force = (int32_t*)data;
if (strcmp (GLUSTERD_GLOBAL_OPT_VERSION, key) == 0)
goto out;
_delete_reconfig_opt (this, key, value, data);
out:
return 0;
}
static int
glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key,
int32_t *is_force)
@ -1008,15 +1077,6 @@ glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key,
if (!strncmp(key, "all", 3))
dict_foreach (volinfo->dict, _delete_reconfig_opt, is_force);
else {
if (glusterd_check_option_exists (key, &key_fixed) != 1) {
gf_log ("glusterd", GF_LOG_ERROR,
"volinfo dict inconsistency: option %s not found",
key);
ret = -1;
goto out;
}
if (key_fixed)
key = key_fixed;
value = dict_get (volinfo->dict, key);
if (!value) {
gf_log ("glusterd", GF_LOG_DEBUG,
@ -1053,6 +1113,91 @@ out:
return ret;
}
static int
glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict)
{
char *key = NULL;
char *key_fixed = NULL;
int ret = -1;
int32_t is_force = 0;
glusterd_conf_t *conf = NULL;
dict_t *dup_opt = NULL;
gf_boolean_t all = _gf_false;
char *next_version = NULL;
gf_boolean_t quorum_action = _gf_false;
conf = this->private;
ret = dict_get_str (dict, "key", &key);
if (ret)
goto out;
ret = dict_get_int32 (dict, "force", &is_force);
if (ret)
is_force = 0;
if (strcmp (key, "all")) {
ret = glusterd_check_option_exists (key, &key_fixed);
if (ret <= 0) {
gf_log (this->name, GF_LOG_ERROR, "Invalid key %s",
key);
ret = -1;
goto out;
}
} else {
all = _gf_true;
}
if (key_fixed)
key = key_fixed;
ret = -1;
dup_opt = dict_new ();
if (!dup_opt)
goto out;
if (!all) {
dict_copy (conf->opts, dup_opt);
dict_del (dup_opt, key);
}
ret = glusterd_get_next_global_opt_version_str (conf->opts,
&next_version);
if (ret)
goto out;
ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
if (ret)
goto out;
ret = glusterd_store_options (this, dup_opt);
if (ret)
goto out;
if (glusterd_is_quorum_changed (conf->opts, key, NULL))
quorum_action = _gf_true;
ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
next_version);
if (ret)
goto out;
else
next_version = NULL;
if (!all) {
dict_del (conf->opts, key);
} else {
dict_foreach (conf->opts, _delete_reconfig_global_opt,
&is_force);
}
out:
GF_FREE (key_fixed);
if (dup_opt)
dict_unref (dup_opt);
gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
if (quorum_action)
glusterd_do_quorum_action ();
GF_FREE (next_version);
return ret;
}
static int
glusterd_op_reset_volume (dict_t *dict, char **op_errstr)
@ -1061,14 +1206,23 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr)
int ret = -1;
char *volname = NULL;
char *key = NULL;
char *key_fixed = NULL;
int32_t is_force = 0;
gf_boolean_t quorum_action = _gf_false;
xlator_t *this = NULL;
this = THIS;
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to get volume name " );
goto out;
}
if (strcasecmp (volname, "all") == 0) {
ret = glusterd_op_reset_all_volume_options (this, dict);
goto out;
}
ret = dict_get_int32 (dict, "force", &is_force);
if (ret)
is_force = 0;
@ -1085,6 +1239,20 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr)
goto out;
}
if (strcmp (key, "all") &&
glusterd_check_option_exists (key, &key_fixed) != 1) {
gf_log ("glusterd", GF_LOG_ERROR,
"volinfo dict inconsistency: option %s not found",
key);
ret = -1;
goto out;
}
if (key_fixed)
key = key_fixed;
if (glusterd_is_quorum_changed (volinfo->dict, key, NULL))
quorum_action = _gf_true;
ret = glusterd_options_reset (volinfo, key, &is_force);
if (is_force == -1) {
ret = -1;
@ -1093,12 +1261,14 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr)
}
out:
GF_FREE (key_fixed);
if (quorum_action)
glusterd_do_quorum_action ();
gf_log ("", GF_LOG_DEBUG, "'volume reset' returning %d", ret);
return ret;
}
int
glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
{
@ -1127,6 +1297,91 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
return 0;
}
static int
glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict)
{
char *key = NULL;
char *key_fixed = NULL;
char *value = NULL;
char *dup_value = NULL;
int ret = -1;
glusterd_conf_t *conf = NULL;
dict_t *dup_opt = NULL;
char *next_version = NULL;
gf_boolean_t quorum_action = _gf_false;
conf = this->private;
ret = dict_get_str (dict, "key1", &key);
if (ret)
goto out;
ret = dict_get_str (dict, "value1", &value);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"invalid key,value pair in 'volume set'");
goto out;
}
ret = glusterd_check_option_exists (key, &key_fixed);
if (ret <= 0) {
gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", key);
ret = -1;
goto out;
}
if (key_fixed)
key = key_fixed;
ret = -1;
dup_opt = dict_new ();
if (!dup_opt)
goto out;
dict_copy (conf->opts, dup_opt);
ret = dict_set_str (dup_opt, key, value);
if (ret)
goto out;
ret = glusterd_get_next_global_opt_version_str (conf->opts,
&next_version);
if (ret)
goto out;
ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
if (ret)
goto out;
dup_value = gf_strdup (value);
if (!dup_value)
goto out;
ret = glusterd_store_options (this, dup_opt);
if (ret)
goto out;
if (glusterd_is_quorum_changed (conf->opts, key, value))
quorum_action = _gf_true;
ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
next_version);
if (ret)
goto out;
else
next_version = NULL;
ret = dict_set_dynstr (conf->opts, key, dup_value);
if (ret)
goto out;
out:
GF_FREE (key_fixed);
if (dup_opt)
dict_unref (dup_opt);
gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
if (quorum_action)
glusterd_do_quorum_action ();
GF_FREE (next_version);
return ret;
}
static int
glusterd_op_set_volume (dict_t *dict)
{
@ -1146,7 +1401,7 @@ glusterd_op_set_volume (dict_t *dict)
int32_t dict_count = 0;
gf_boolean_t check_op_version = _gf_false;
uint32_t new_op_version = 0;
gf_boolean_t quorum_action = _gf_false;
this = THIS;
GF_ASSERT (this);
@ -1176,6 +1431,11 @@ glusterd_op_set_volume (dict_t *dict)
goto out;
}
if (strcasecmp (volname, "all") == 0) {
ret = glusterd_op_set_all_volume_options (this, dict);
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
@ -1242,7 +1502,6 @@ glusterd_op_set_volume (dict_t *dict)
key_fixed = NULL;
goto out;
}
}
if (glusterd_check_globaloption (key))
@ -1261,6 +1520,9 @@ glusterd_op_set_volume (dict_t *dict)
if (key_fixed)
key = key_fixed;
if (glusterd_is_quorum_changed (volinfo->dict, key, value))
quorum_action = _gf_true;
if (global_opt) {
list_for_each_entry (voliter, &priv->volumes, vol_list) {
value = gf_strdup (value);
@ -1350,6 +1612,8 @@ glusterd_op_set_volume (dict_t *dict)
out:
GF_FREE (key_fixed);
gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
if (quorum_action)
glusterd_do_quorum_action ();
return ret;
}
@ -1875,20 +2139,26 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx)
static int
glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx)
{
int ret = 0;
int ret = 0;
glusterd_op_lock_ctx_t *lock_ctx = NULL;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
GF_ASSERT (event);
GF_ASSERT (ctx);
this = THIS;
priv = this->private;
lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
ret = glusterd_unlock (lock_ctx->uuid);
gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret);
gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret);
glusterd_op_unlock_send_resp (lock_ctx->req, ret);
if (priv->pending_quorum_action)
glusterd_do_quorum_action ();
return ret;
}
@ -2042,7 +2312,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
goto out;
}
if (strcmp (volname, "help") &&
strcmp (volname, "help-xml")) {
strcmp (volname, "help-xml") &&
strcasecmp (volname, "all")) {
ret = glusterd_dict_set_volid
(dict, volname, op_errstr);
if (ret)
@ -2095,10 +2366,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
goto out;
}
ret = glusterd_dict_set_volid (dict, volname,
op_errstr);
if (ret)
goto out;
if (strcasecmp (volname, "all")) {
ret = glusterd_dict_set_volid (dict,
volname,
op_errstr);
if (ret)
goto out;
}
dict_copy (dict, req_dict);
}
break;
@ -2114,6 +2388,105 @@ out:
return ret;
}
gf_boolean_t
glusterd_is_get_op (xlator_t *this, glusterd_op_t op, dict_t *dict)
{
char *key = NULL;
char *volname = NULL;
int ret = 0;
if (op == GD_OP_STATUS_VOLUME)
return _gf_true;
if ((op == GD_OP_SET_VOLUME)) {
//check for set volume help
ret = dict_get_str (dict, "volname", &volname);
if (volname &&
((strcmp (volname, "help") == 0) ||
(strcmp (volname, "help-xml") == 0))) {
ret = dict_get_str (dict, "key1", &key);
if (ret < 0)
return _gf_true;
}
}
return _gf_false;
}
gf_boolean_t
glusterd_is_op_quorum_validation_required (xlator_t *this, glusterd_op_t op,
dict_t *dict)
{
gf_boolean_t required = _gf_true;
char *key = NULL;
char *key_fixed = NULL;
int ret = -1;
if (glusterd_is_get_op (this, op, dict)) {
required = _gf_false;
goto out;
}
if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME))
goto out;
if (op == GD_OP_SET_VOLUME)
ret = dict_get_str (dict, "key1", &key);
else if (op == GD_OP_RESET_VOLUME)
ret = dict_get_str (dict, "key", &key);
if (ret)
goto out;
ret = glusterd_check_option_exists (key, &key_fixed);
if (ret <= 0)
goto out;
if (key_fixed)
key = key_fixed;
if (glusterd_is_quorum_option (key))
required = _gf_false;
out:
GF_FREE (key_fixed);
return required;
}
static int
glusterd_op_validate_quorum (xlator_t *this, glusterd_op_t op,
dict_t *dict, char **op_errstr)
{
int ret = 0;
char *volname = NULL;
glusterd_volinfo_t *volinfo = NULL;
char *errstr = NULL;
errstr = "Quorum not met. Volume operation not allowed.";
if (!glusterd_is_op_quorum_validation_required (this, op, dict))
goto out;
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
ret = 0;
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
ret = 0;
goto out;
}
if (does_gd_meet_server_quorum (this)) {
ret = 0;
goto out;
}
if (glusterd_is_volume_in_server_quorum (volinfo)) {
ret = -1;
*op_errstr = gf_strdup (errstr);
goto out;
}
ret = 0;
out:
return ret;
}
static int
glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
{
@ -2141,6 +2514,13 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
goto out;
}
ret = glusterd_op_validate_quorum (this, op, dict, &op_errstr);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, op_errstr);
opinfo.op_errstr = op_errstr;
goto out;
}
/* rsp_dict NULL from source */
ret = glusterd_op_stage_validate (op, dict, &op_errstr, NULL);
if (ret) {
@ -2817,6 +3197,8 @@ glusterd_op_txn_complete ()
GF_FREE (op_errstr);
if (priv->pending_quorum_action)
glusterd_do_quorum_action ();
gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}

View File

@ -959,6 +959,27 @@ glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event)
}
}
gf_boolean_t
gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state,
glusterd_friend_sm_event_type_t event_type,
glusterd_peerinfo_t *peerinfo)
{
gf_boolean_t affects = _gf_false;
//When glusterd comes up with friends in BEFRIENDED state in store,
//wait until compare-data happens.
if ((old_state == GD_FRIEND_STATE_BEFRIENDED) &&
(event_type != GD_FRIEND_EVENT_RCVD_ACC) &&
(event_type != GD_FRIEND_EVENT_LOCAL_ACC))
goto out;
if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)
&& peerinfo->connected) {
affects = _gf_true;
}
out:
return affects;
}
int
glusterd_friend_sm ()
{
@ -970,6 +991,8 @@ glusterd_friend_sm ()
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_friend_sm_event_type_t event_type = 0;
gf_boolean_t is_await_conn = _gf_false;
gf_boolean_t quorum_action = _gf_false;
glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT;
while (!list_empty (&gd_friend_sm_queue)) {
list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) {
@ -989,6 +1012,7 @@ glusterd_friend_sm ()
glusterd_friend_sm_event_name_get (event_type));
old_state = peerinfo->state.state;
state = glusterd_friend_state_table[peerinfo->state.state];
GF_ASSERT (state);
@ -1029,6 +1053,15 @@ glusterd_friend_sm ()
goto out;
}
if (gd_does_peer_affect_quorum (old_state, event_type,
peerinfo)) {
peerinfo->quorum_contrib = QUORUM_UP;
if (peerinfo->quorum_action) {
peerinfo->quorum_action = _gf_false;
quorum_action = _gf_true;
}
}
ret = glusterd_store_peerinfo (peerinfo);
glusterd_destroy_friend_event_context (event);
@ -1042,6 +1075,8 @@ glusterd_friend_sm ()
ret = 0;
out:
if (quorum_action)
glusterd_do_quorum_action ();
return ret;
}

View File

@ -36,6 +36,20 @@ struct glusterd_store_handle_ {
typedef struct glusterd_store_handle_ glusterd_store_handle_t;
typedef enum gd_quorum_contribution_ {
QUORUM_NONE,
QUORUM_WAITING,
QUORUM_DOWN,
QUORUM_UP
} gd_quorum_contrib_t;
typedef enum gd_quorum_status_ {
QUORUM_UNKNOWN,
QUORUM_NOT_APPLICABLE,
QUORUM_MEETS,
QUORUM_DOES_NOT_MEET
} gd_quorum_status_t;
typedef enum glusterd_friend_sm_state_ {
GD_FRIEND_STATE_DEFAULT = 0,
GD_FRIEND_STATE_REQ_SENT,
@ -91,6 +105,8 @@ struct glusterd_peerinfo_ {
int connected;
glusterd_store_handle_t *shandle;
glusterd_sm_tr_log_t sm_log;
gf_boolean_t quorum_action;
gd_quorum_contrib_t quorum_contrib;
};
typedef struct glusterd_peerinfo_ glusterd_peerinfo_t;

View File

@ -1908,10 +1908,13 @@ glusterd_store_iter_destroy (glusterd_store_iter_t *iter)
{
int32_t ret = -1;
GF_ASSERT (iter);
GF_ASSERT (iter->fd > 0);
if (!iter)
return 0;
ret = fclose (iter->file);
if (iter->file)
ret = fclose (iter->file);
else
ret = 0;
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, "
@ -2203,7 +2206,6 @@ glusterd_store_retrieve_node_state (char *volname)
ret = glusterd_store_handle_retrieve (path,
&volinfo->node_state_shandle);
if (ret)
goto out;
@ -2215,6 +2217,7 @@ glusterd_store_retrieve_node_state (char *volname)
ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno);
if (ret)
goto out;
if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
volinfo->defrag_cmd = atoi (value);
@ -2440,6 +2443,102 @@ out:
return ret;
}
inline void
glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len)
{
snprintf (path, len, "%s/options", conf->workdir);
}
int
_store_global_opts (dict_t *this, char *key, data_t *value, void *data)
{
glusterd_store_handle_t *shandle = data;
glusterd_store_save_value (shandle->fd, key, (char*)value->data);
return 0;
}
int32_t
glusterd_store_options (xlator_t *this, dict_t *opts)
{
glusterd_store_handle_t *shandle = NULL;
glusterd_conf_t *conf = NULL;
char path[PATH_MAX] = {0};
int fd = -1;
int32_t ret = -1;
conf = this->private;
glusterd_store_set_options_path (conf, path, sizeof (path));
ret = glusterd_store_handle_new (path, &shandle);
if (ret)
goto out;
fd = glusterd_store_mkstemp (shandle);
if (fd <= 0) {
ret = -1;
goto out;
}
shandle->fd = fd;
dict_foreach (opts, _store_global_opts, shandle);
shandle->fd = 0;
ret = glusterd_store_rename_tmppath (shandle);
if (ret)
goto out;
out:
glusterd_store_handle_destroy (shandle);
if (fd >=0 )
close (fd);
return ret;
}
int32_t
glusterd_store_retrieve_options (xlator_t *this)
{
char path[PATH_MAX] = {0};
glusterd_conf_t *conf = NULL;
glusterd_store_handle_t *shandle = NULL;
glusterd_store_iter_t *iter = NULL;
char *key = NULL;
char *value = NULL;
glusterd_store_op_errno_t op_errno = 0;
int ret = -1;
conf = this->private;
glusterd_store_set_options_path (conf, path, sizeof (path));
ret = glusterd_store_handle_retrieve (path, &shandle);
if (ret)
goto out;
ret = glusterd_store_iter_new (shandle, &iter);
if (ret)
goto out;
ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno);
while (!ret) {
ret = dict_set_dynstr (conf->opts, key, value);
if (ret) {
GF_FREE (key);
GF_FREE (value);
goto out;
}
GF_FREE (key);
key = NULL;
value = NULL;
ret = glusterd_store_iter_get_next (iter, &key, &value,
&op_errno);
}
if (op_errno != GD_STORE_EOF)
goto out;
ret = 0;
out:
glusterd_store_iter_destroy (iter);
glusterd_store_handle_destroy (shandle);
return ret;
}
int32_t
glusterd_store_retrieve_volumes (xlator_t *this)
@ -2840,9 +2939,8 @@ glusterd_store_retrieve_peers (xlator_t *this)
(void) glusterd_store_iter_destroy (iter);
args.mode = GD_MODE_SWITCH_ON;
ret = glusterd_friend_add (hostname, 0, state, &uuid,
&peerinfo, 1, &args);
&peerinfo, 1, NULL);
GF_FREE (hostname);
if (ret)
@ -2852,6 +2950,13 @@ glusterd_store_retrieve_peers (xlator_t *this)
glusterd_for_each_entry (entry, dir);
}
args.mode = GD_MODE_ON;
list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
ret = glusterd_friend_rpc_create (this, peerinfo, &args);
if (ret)
goto out;
}
out:
if (dir)
closedir (dir);
@ -2905,7 +3010,6 @@ glusterd_restore ()
goto out;
}
ret = glusterd_store_retrieve_volumes (this);
if (ret)
goto out;

View File

@ -138,4 +138,9 @@ glusterd_retrieve_op_version (xlator_t *this, int *op_version);
int
glusterd_store_global_info (xlator_t *this);
int32_t
glusterd_store_retrieve_options (xlator_t *this);
int32_t
glusterd_store_options (xlator_t *this, dict_t *opts);
#endif

View File

@ -68,6 +68,8 @@
#define NLMV4_VERSION 4
#define NLMV1_VERSION 1
#define CEILING_POS(X) (((X)-(int)(X)) > 0 ? (int)((X)+1) : (int)(X))
char *glusterd_sock_dir = "/var/run";
static glusterd_lock_t lock;
@ -939,7 +941,10 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)
{
GF_ASSERT (peerinfo);
glusterd_peerctx_t *peerctx = NULL;
gf_boolean_t quorum_action = _gf_false;
if (peerinfo->quorum_contrib != QUORUM_NONE)
quorum_action = _gf_true;
if (peerinfo->rpc) {
/* cleanup the saved-frames before last unref */
rpc_clnt_connection_cleanup (&peerinfo->rpc->conn);
@ -955,6 +960,8 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)
}
glusterd_peer_destroy (peerinfo);
if (quorum_action)
glusterd_do_quorum_action ();
return 0;
}
@ -982,12 +989,10 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo)
}
}
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
glusterd_service_stop (const char *service, char *pidfile, int sig,
gf_boolean_t force_kill)
@ -1655,25 +1660,25 @@ out:
}
int
_add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data)
_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data)
{
glusterd_voldict_ctx_t *ctx = NULL;
glusterd_dict_ctx_t *ctx = NULL;
char optkey[512] = {0,};
int ret = -1;
ctx = data;
snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count,
snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,
ctx->key_name, ctx->opt_count);
ret = dict_set_str (ctx->dict, optkey, key);
if (ret)
gf_log ("", GF_LOG_ERROR, "option add for %s%d %s",
ctx->key_name, ctx->count, key);
snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count,
ctx->key_name, ctx->opt_count, key);
snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,
ctx->val_name, ctx->opt_count);
ret = dict_set_str (ctx->dict, optkey, value->data);
if (ret)
gf_log ("", GF_LOG_ERROR, "option add for %s%d %s",
ctx->val_name, ctx->count, value->data);
ctx->val_name, ctx->opt_count, value->data);
ctx->opt_count++;
return ret;
@ -1711,6 +1716,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
dict_t *dict, int32_t count)
{
int32_t ret = -1;
char prefix[512] = {0,};
char key[512] = {0,};
glusterd_brickinfo_t *brickinfo = NULL;
int32_t i = 1;
@ -1718,7 +1724,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
char *src_brick = NULL;
char *dst_brick = NULL;
char *str = NULL;
glusterd_voldict_ctx_t ctx = {0};
glusterd_dict_ctx_t ctx = {0};
GF_ASSERT (dict);
GF_ASSERT (volinfo);
@ -1850,14 +1856,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
goto out;
}
snprintf (prefix, sizeof (prefix), "volume%d", count);
ctx.dict = dict;
ctx.count = count;
ctx.prefix = prefix;
ctx.opt_count = 1;
ctx.key_name = "key";
ctx.val_name = "value";
GF_ASSERT (volinfo->dict);
dict_foreach (volinfo->dict, _add_volinfo_dict_to_prdict, &ctx);
dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx);
ctx.opt_count--;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.opt-count", count);
@ -1866,13 +1873,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
goto out;
ctx.dict = dict;
ctx.count = count;
ctx.prefix = prefix;
ctx.opt_count = 1;
ctx.key_name = "slave-num";
ctx.val_name = "slave-val";
GF_ASSERT (volinfo->gsync_slaves);
dict_foreach (volinfo->gsync_slaves, _add_volinfo_dict_to_prdict, &ctx);
dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx);
ctx.opt_count--;
memset (key, 0, sizeof (key));
@ -1914,6 +1921,7 @@ glusterd_build_volume_dict (dict_t **vols)
glusterd_conf_t *priv = NULL;
glusterd_volinfo_t *volinfo = NULL;
int32_t count = 0;
glusterd_dict_ctx_t ctx = {0};
priv = THIS->private;
@ -1934,6 +1942,17 @@ glusterd_build_volume_dict (dict_t **vols)
if (ret)
goto out;
ctx.dict = dict;
ctx.prefix = "global";
ctx.opt_count = 1;
ctx.key_name = "key";
ctx.val_name = "val";
dict_foreach (priv->opts, _add_dict_to_prdict, &ctx);
ctx.opt_count--;
ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count);
if (ret)
goto out;
*vols = dict;
out:
gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
@ -2015,8 +2034,8 @@ out:
}
static int32_t
import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix,
char *value_prefix, int opt_count, int count)
import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix,
char *value_prefix, int opt_count, char *prefix)
{
char key[512] = {0,};
int32_t ret = 0;
@ -2028,8 +2047,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix,
while (i <= opt_count) {
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.%s%d",
count, key_prefix, i);
snprintf (key, sizeof (key), "%s.%s%d",
prefix, key_prefix, i);
ret = dict_get_str (vols, key, &opt_key);
if (ret) {
snprintf (msg, sizeof (msg), "Volume dict key not "
@ -2038,8 +2057,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix,
}
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.%s%d",
count, value_prefix, i);
snprintf (key, sizeof (key), "%s.%s%d",
prefix, value_prefix, i);
ret = dict_get_str (vols, key, &opt_val);
if (ret) {
snprintf (msg, sizeof (msg), "Volume dict value not "
@ -2068,6 +2087,250 @@ out:
}
gf_boolean_t
glusterd_is_quorum_option (char *option)
{
gf_boolean_t res = _gf_false;
int i = 0;
char *keys[] = {GLUSTERD_QUORUM_TYPE_KEY,
GLUSTERD_QUORUM_RATIO_KEY, NULL};
for (i = 0; keys[i]; i++) {
if (strcmp (option, keys[i]) == 0) {
res = _gf_true;
break;
}
}
return res;
}
gf_boolean_t
glusterd_is_quorum_changed (dict_t *options, char *option, char *value)
{
int ret = 0;
gf_boolean_t reconfigured = _gf_false;
gf_boolean_t all = _gf_false;
char *oldquorum = NULL;
char *newquorum = NULL;
char *oldratio = NULL;
char *newratio = NULL;
if ((strcmp ("all", option) != 0) &&
!glusterd_is_quorum_option (option))
goto out;
if (strcmp ("all", option) == 0)
all = _gf_true;
if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) {
newquorum = value;
ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY,
&oldquorum);
}
if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) {
newratio = value;
ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY,
&oldratio);
}
reconfigured = _gf_true;
if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0))
reconfigured = _gf_false;
if (oldratio && newratio && (strcmp (oldratio, newratio) == 0))
reconfigured = _gf_false;
if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) &&
(newquorum == NULL))
reconfigured = _gf_false;
out:
return reconfigured;
}
static inline gf_boolean_t
_is_contributing_to_quorum (gd_quorum_contrib_t contrib)
{
if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN))
return _gf_true;
return _gf_false;
}
static inline gf_boolean_t
_does_quorum_meet (int active_count, int quorum_count)
{
return (active_count >= quorum_count);
}
int
glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count,
int *quorum_count)
{
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_conf_t *conf = NULL;
int ret = -1;
int inquorum_count = 0;
char *val = NULL;
double quorum_percentage = 0.0;
gf_boolean_t ratio = _gf_false;
int count = 0;
conf = this->private;
//Start with counting self
inquorum_count = 1;
if (active_count)
*active_count = 1;
list_for_each_entry (peerinfo, &conf->peers, uuid_list) {
if (peerinfo->quorum_contrib == QUORUM_WAITING)
goto out;
if (_is_contributing_to_quorum (peerinfo->quorum_contrib))
inquorum_count = inquorum_count + 1;
if (active_count && (peerinfo->quorum_contrib == QUORUM_UP))
*active_count = *active_count + 1;
}
ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val);
if (ret == 0) {
ratio = _gf_true;
ret = gf_string2percent (val, &quorum_percentage);
if (!ret)
ratio = _gf_true;
}
if (ratio)
count = CEILING_POS (inquorum_count *
quorum_percentage / 100.0);
else
count = (inquorum_count * 50 / 100) + 1;
*quorum_count = count;
ret = 0;
out:
return ret;
}
gf_boolean_t
glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo)
{
gf_boolean_t res = _gf_false;
char *quorum_type = NULL;
int ret = 0;
ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY,
&quorum_type);
if (ret)
goto out;
if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
res = _gf_true;
out:
return res;
}
gf_boolean_t
glusterd_is_any_volume_in_server_quorum (xlator_t *this)
{
glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
conf = this->private;
list_for_each_entry (volinfo, &conf->volumes, vol_list) {
if (glusterd_is_volume_in_server_quorum (volinfo)) {
return _gf_true;
}
}
return _gf_false;
}
gf_boolean_t
does_gd_meet_server_quorum (xlator_t *this)
{
int quorum_count = 0;
int active_count = 0;
gf_boolean_t in = _gf_false;
glusterd_conf_t *conf = NULL;
int ret = -1;
conf = this->private;
ret = glusterd_get_quorum_cluster_counts (this, &active_count,
&quorum_count);
if (ret)
goto out;
if (!_does_quorum_meet (active_count, quorum_count)) {
goto out;
}
in = _gf_true;
out:
return in;
}
void
glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
gf_boolean_t meets_quorum)
{
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_conf_t *conf = NULL;
conf = this->private;
if (volinfo->status != GLUSTERD_STATUS_STARTED)
goto out;
if (!glusterd_is_volume_in_server_quorum (volinfo))
meets_quorum = _gf_true;
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
if (!glusterd_is_local_brick (this, volinfo, brickinfo))
continue;
if (meets_quorum)
glusterd_brick_start (volinfo, brickinfo, _gf_false);
else
glusterd_brick_stop (volinfo, brickinfo, _gf_false);
}
out:
return;
}
int
glusterd_do_quorum_action ()
{
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
int ret = 0;
int active_count = 0;
int quorum_count = 0;
gf_boolean_t meets = _gf_false;
this = THIS;
conf = this->private;
conf->pending_quorum_action = _gf_true;
ret = glusterd_lock (conf->uuid);
if (ret)
goto out;
{
ret = glusterd_get_quorum_cluster_counts (this, &active_count,
&quorum_count);
if (ret)
goto unlock;
if (_does_quorum_meet (active_count, quorum_count))
meets = _gf_true;
list_for_each_entry (volinfo, &conf->volumes, vol_list) {
glusterd_do_volume_quorum_action (this, volinfo, meets);
}
}
unlock:
(void)glusterd_unlock (conf->uuid);
conf->pending_quorum_action = _gf_false;
out:
return ret;
}
int32_t
glusterd_import_friend_volume_opts (dict_t *vols, int count,
glusterd_volinfo_t *volinfo)
@ -2076,6 +2339,7 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count,
int32_t ret = -1;
int opt_count = 0;
char msg[2048] = {0};
char volume_prefix[1024] = {0};
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.opt-count", count);
@ -2086,8 +2350,9 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count,
goto out;
}
ret = import_prdict_volinfo_dict (vols, volinfo->dict, "key",
"value", opt_count, count);
snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count);
ret = import_prdict_dict (vols, volinfo->dict, "key", "value",
opt_count, volume_prefix);
if (ret) {
snprintf (msg, sizeof (msg), "Unable to import options dict "
"specified for %s", volinfo->volname);
@ -2103,9 +2368,8 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count,
goto out;
}
ret = import_prdict_volinfo_dict (vols, volinfo->gsync_slaves,
"slave-num", "slave-val", opt_count,
count);
ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num",
"slave-val", opt_count, volume_prefix);
if (ret) {
snprintf (msg, sizeof (msg), "Unable to import gsync sessions "
"specified for %s", volinfo->volname);
@ -2602,6 +2866,95 @@ out:
return ret;
}
int
glusterd_get_global_opt_version (dict_t *opts, uint32_t *version)
{
int ret = -1;
char *version_str = NULL;
ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str);
if (ret)
goto out;
ret = gf_string2uint (version_str, version);
if (ret)
goto out;
ret = 0;
out:
return ret;
}
int
glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str)
{
int ret = -1;
char version_string[64] = {0};
uint32_t version = 0;
ret = glusterd_get_global_opt_version (opts, &version);
if (ret)
goto out;
version++;
snprintf (version_string, sizeof (version_string), "%"PRIu32, version);
*version_str = gf_strdup (version_string);
if (*version_str)
ret = 0;
out:
return ret;
}
int32_t
glusterd_import_global_opts (dict_t *friend_data)
{
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
int ret = -1;
dict_t *import_options = NULL;
int count = 0;
uint32_t local_version = 0;
uint32_t remote_version = 0;
this = THIS;
conf = this->private;
ret = dict_get_int32 (friend_data, "global-opt-count", &count);
if (ret) {
//old version peer
ret = 0;
goto out;
}
import_options = dict_new ();
if (!import_options)
goto out;
ret = import_prdict_dict (friend_data, import_options, "key", "val",
count, "global");
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to import"
" global options");
goto out;
}
ret = glusterd_get_global_opt_version (conf->opts, &local_version);
if (ret)
goto out;
ret = glusterd_get_global_opt_version (import_options, &remote_version);
if (ret)
goto out;
if (remote_version > local_version) {
ret = glusterd_store_options (this, import_options);
if (ret)
goto out;
dict_unref (conf->opts);
conf->opts = dict_ref (import_options);
}
ret = 0;
out:
if (import_options)
dict_unref (import_options);
return ret;
}
int32_t
glusterd_compare_friend_data (dict_t *vols, int32_t *status)
{
@ -2639,6 +2992,9 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status)
stale_nfs = _gf_true;
if (glusterd_is_nodesvc_running ("glustershd"))
stale_shd = _gf_true;
ret = glusterd_import_global_opts (vols);
if (ret)
goto out;
ret = glusterd_import_friend_volumes (vols);
if (ret)
goto out;
@ -3523,14 +3879,16 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
int ret = 0;
list_for_each_entry (volinfo, &conf->volumes, vol_list) {
/* If volume status is not started, do not proceed */
if (volinfo->status == GLUSTERD_STATUS_STARTED) {
list_for_each_entry (brickinfo, &volinfo->bricks,
brick_list) {
glusterd_brick_start (volinfo, brickinfo,
_gf_true);
}
start_nodesvcs = _gf_true;
if (volinfo->status != GLUSTERD_STATUS_STARTED)
continue;
start_nodesvcs = _gf_true;
if (glusterd_is_volume_in_server_quorum (volinfo)) {
//these bricks will be restarted once the quorum is met
continue;
}
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
glusterd_brick_start (volinfo, brickinfo, _gf_true);
}
}
@ -4811,8 +5169,8 @@ out:
int
glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo,
glusterd_friend_sm_state_t state,
uuid_t *uuid, const char *hostname)
glusterd_friend_sm_state_t state, uuid_t *uuid,
const char *hostname, int port)
{
glusterd_peerinfo_t *new_peer = NULL;
int ret = -1;
@ -4842,6 +5200,9 @@ glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo,
if (ret)
goto out;
if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED)
new_peer->quorum_contrib = QUORUM_WAITING;
new_peer->port = port;
*peerinfo = new_peer;
out:
if (ret && new_peer)

View File

@ -33,13 +33,13 @@ struct glusterd_lock_ {
time_t timestamp;
};
typedef struct glusterd_voldict_ctx_ {
typedef struct glusterd_dict_ctx_ {
dict_t *dict;
int count;
int opt_count;
char *key_name;
char *val_name;
} glusterd_voldict_ctx_t;
char *prefix;
} glusterd_dict_ctx_t;
int
glusterd_compare_lines (const void *a, const void *b);
@ -323,8 +323,8 @@ glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log,
int event);
int
glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo,
glusterd_friend_sm_state_t state,
uuid_t *uuid, const char *hostname);
glusterd_friend_sm_state_t state, uuid_t *uuid,
const char *hostname, int port);
int
glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log,
char * (*state_name_get) (int),
@ -463,4 +463,25 @@ glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
*/
gf_boolean_t
is_origin_glusterd ();
gf_boolean_t
glusterd_is_quorum_changed (dict_t *options, char *option, char *value);
int
glusterd_do_quorum_action ();
int
glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count,
int *quorum_count);
int
glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str);
gf_boolean_t
glusterd_is_quorum_option (char *option);
gf_boolean_t
glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo);
gf_boolean_t
glusterd_is_any_volume_in_server_quorum (xlator_t *this);
gf_boolean_t
does_gd_meet_server_quorum (xlator_t *this);
#endif

View File

@ -184,6 +184,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"network.inode-lru-limit", "protocol/server", NULL, NULL, NO_DOC, 0, 1},
{AUTH_ALLOW_MAP_KEY, "protocol/server", "!server-auth", "*", DOC, 0, 1},
{AUTH_REJECT_MAP_KEY, "protocol/server", "!server-auth", NULL, DOC, 0},
{"transport.keepalive", "protocol/server", "transport.socket.keepalive", NULL, NO_DOC, 0, 1},
{"server.allow-insecure", "protocol/server", "rpc-auth-allow-insecure", NULL, NO_DOC, 0, 1},
{"server.statedump-path", "protocol/server", "statedump-path", NULL, DOC, 0, 1},
@ -252,6 +253,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"storage.owner-gid", "storage/posix", "brick-gid", NULL, DOC, 0, 2},
{"config.memory-accounting", "configuration", "!config", NULL, DOC, 0, 2},
{"config.transport", "configuration", "!config", NULL, DOC, 0, 2},
{GLUSTERD_QUORUM_TYPE_KEY, "mgmt/glusterd", NULL, "off", DOC, 0},
{GLUSTERD_QUORUM_RATIO_KEY, "mgmt/glusterd", NULL, "0", DOC, 0},
{NULL, }
};

View File

@ -117,6 +117,36 @@ glusterd_uuid_init ()
return 0;
}
int
glusterd_options_init (xlator_t *this)
{
int ret = -1;
glusterd_conf_t *priv = NULL;
char *initial_version = "0";
priv = this->private;
priv->opts = dict_new ();
if (!priv->opts)
goto out;
ret = glusterd_store_retrieve_options (this);
if (ret == 0)
goto out;
ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION,
initial_version);
if (ret)
goto out;
ret = glusterd_store_options (this, priv->opts);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Unable to store version");
return ret;
}
out:
return 0;
}
int
glusterd_fetchspec_notify (xlator_t *this)
{
@ -1028,6 +1058,10 @@ init (xlator_t *this)
if (ret < 0)
goto out;
ret = glusterd_options_init (this);
if (ret < 0)
goto out;
ret = glusterd_handle_upgrade_downgrade (this->options, conf);
if (ret)
goto out;
@ -1172,5 +1206,12 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
},
#endif
{ .key = {"server-quorum-type"},
.type = GF_OPTION_TYPE_STR,
.value = { "none", "server"},
},
{ .key = {"server-quorum-ratio"},
.type = GF_OPTION_TYPE_PERCENT,
},
{ .key = {NULL} },
};

View File

@ -43,7 +43,14 @@
#define GLUSTERD_TR_LOG_SIZE 50
#define GLUSTERD_NAME "glusterd"
#define GLUSTERD_SOCKET_LISTEN_BACKLOG 128
#define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type"
#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio"
#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version"
#define GLUSTERD_SERVER_QUORUM "server"
struct glusterd_volinfo_;
typedef struct glusterd_volinfo_ glusterd_volinfo_t;
typedef enum glusterd_op_ {
GD_OP_NONE = 0,
@ -74,7 +81,6 @@ typedef enum glusterd_op_ {
GD_OP_MAX,
} glusterd_op_t;
struct glusterd_store_iter_ {
int fd;
FILE *file;
@ -86,6 +92,7 @@ typedef struct glusterd_store_iter_ glusterd_store_iter_t;
struct glusterd_volgen {
dict_t *dict;
};
typedef struct {
struct rpc_clnt *rpc;
gf_boolean_t running;
@ -108,6 +115,12 @@ typedef struct {
should keep changing with introduction of newer
versions */
typedef struct {
gf_boolean_t quorum;
double quorum_ratio;
uint64_t gl_opt_version;
} gd_global_opts_t;
typedef struct {
struct _volfile_ctx *volfile;
pthread_mutex_t mutex;
@ -134,11 +147,11 @@ typedef struct {
#endif
pthread_t brick_thread;
void *hooks_priv;
xlator_t *xl; /* Should be set to 'THIS' before creating thread */
/* need for proper handshake_t */
int op_version; /* Starts with 1 for 3.3.0 */
xlator_t *xl; /* Should be set to 'THIS' before creating thread */
gf_boolean_t pending_quorum_action;
dict_t *opts;
} glusterd_conf_t;
@ -170,9 +183,6 @@ struct gf_defrag_brickinfo_ {
int size;
};
struct glusterd_volinfo_;
typedef struct glusterd_volinfo_ glusterd_volinfo_t;
typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo,
gf_defrag_status_t status);
@ -411,6 +421,9 @@ glusterd_friend_add (const char *hoststr, int port,
gf_boolean_t restore, glusterd_peerctx_args_t *args);
int
glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,
glusterd_peerctx_args_t *args);
int
glusterd_friend_remove (uuid_t uuid, char *hostname);
int