glusterd: Introduce option to limit no. of muxed bricks per process
This commit introduces a new global option that can be set to limit the number of multiplexed bricks in one process. Usage: `# gluster volume set all cluster.max-bricks-per-process <value>` If this option is not set then multiplexing will happen for now with no limitations set; i.e. a brick process will have as many bricks multiplexed to it as possible. In other words the current multiplexing behaviour won't change if this option isn't set to any value. This commit also introduces a brick process instance that contains information about brick processes, like the number of bricks handled by the process (which is 1 in non-multiplexing cases), list of bricks, and port number which also serves as an unique identifier for each brick process instance. The brick process list is maintained in 'glusterd_conf_t'. Updates: #151 Change-Id: Ib987d14ab0a4f6034dac01b73a4b2839f7b0b695 Signed-off-by: Samikshan Bairagya <samikshan@gmail.com> Reviewed-on: https://review.gluster.org/17469 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
This commit is contained in:
parent
e304f48fa2
commit
9e8ee31e64
@ -17,6 +17,8 @@ function count_brick_pids {
|
||||
| grep -v "N/A" | sort | uniq | wc -l
|
||||
}
|
||||
|
||||
cleanup
|
||||
|
||||
TEST glusterd
|
||||
TEST $CLI volume set all cluster.brick-multiplex on
|
||||
push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
|
||||
|
57
tests/bugs/core/multiplex-limit-issue-151.t
Normal file
57
tests/bugs/core/multiplex-limit-issue-151.t
Normal file
@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../traps.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
|
||||
function count_up_bricks {
|
||||
$CLI --xml volume status all | grep '<status>1' | wc -l
|
||||
}
|
||||
|
||||
function count_brick_processes {
|
||||
pgrep glusterfsd | wc -l
|
||||
}
|
||||
|
||||
function count_brick_pids {
|
||||
$CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
|
||||
| grep -v "N/A" | sort | uniq | wc -l
|
||||
}
|
||||
|
||||
cleanup;
|
||||
|
||||
TEST glusterd
|
||||
|
||||
TEST $CLI volume set all cluster.brick-multiplex on
|
||||
TEST ! $CLI volume set all cluster.max-bricks-per-process -1
|
||||
TEST ! $CLI volume set all cluster.max-bricks-per-process foobar
|
||||
TEST $CLI volume set all cluster.max-bricks-per-process 3
|
||||
|
||||
push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
|
||||
push_trapfunc "cleanup"
|
||||
|
||||
TEST $CLI volume create $V0 $H0:$B0/brick{0..5}
|
||||
TEST $CLI volume start $V0
|
||||
|
||||
EXPECT 2 count_brick_processes
|
||||
EXPECT 2 count_brick_pids
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
|
||||
|
||||
pkill gluster
|
||||
TEST glusterd
|
||||
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
|
||||
|
||||
TEST $CLI volume add-brick $V0 $H0:$B0/brick6
|
||||
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks
|
||||
|
||||
TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start
|
||||
TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 commit
|
||||
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
|
||||
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
|
@ -71,7 +71,8 @@ typedef enum gf_gld_mem_types_ {
|
||||
gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55,
|
||||
gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56,
|
||||
gf_gld_mt_local_peers_t = gf_common_mt_end + 57,
|
||||
gf_gld_mt_end = gf_common_mt_end + 58,
|
||||
gf_gld_mt_glusterd_brick_proc_t = gf_common_mt_end + 58,
|
||||
gf_gld_mt_end = gf_common_mt_end + 59,
|
||||
} gf_gld_mem_types_t;
|
||||
#endif
|
||||
|
||||
|
@ -41,7 +41,7 @@
|
||||
|
||||
#define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD
|
||||
|
||||
#define GLFS_NUM_MESSAGES 602
|
||||
#define GLFS_NUM_MESSAGES 606
|
||||
|
||||
#define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
|
||||
/* Messaged with message IDs */
|
||||
@ -4869,6 +4869,38 @@
|
||||
*/
|
||||
#define GD_MSG_VOL_SET_VALIDATION_INFO (GLUSTERD_COMP_BASE + 602)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*
|
||||
*/
|
||||
#define GD_MSG_NO_MUX_LIMIT (GLUSTERD_COMP_BASE + 603)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*
|
||||
*/
|
||||
#define GD_MSG_BRICKPROC_REM_BRICK_FAILED (GLUSTERD_COMP_BASE + 604)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*
|
||||
*/
|
||||
#define GD_MSG_BRICKPROC_ADD_BRICK_FAILED (GLUSTERD_COMP_BASE + 605)
|
||||
|
||||
/*!
|
||||
* @messageid
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*
|
||||
*/
|
||||
#define GD_MSG_BRICKPROC_NEW_FAILED (GLUSTERD_COMP_BASE + 606)
|
||||
|
||||
/*------------*/
|
||||
|
||||
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
|
||||
|
@ -79,6 +79,10 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {
|
||||
*/
|
||||
{ GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"},
|
||||
{ GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"},
|
||||
/* Set this value to 1 by default implying non-multiplexed behaviour.
|
||||
* TBD: Discuss the default value for this. Maybe this should be a
|
||||
* dynamic value depending on the memory specifications per node */
|
||||
{ GLUSTERD_BRICKMUX_LIMIT_KEY, "1"},
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
|
@ -488,8 +488,8 @@ __gluster_pmap_signin (rpcsvc_request_t *req)
|
||||
{
|
||||
pmap_signin_req args = {0,};
|
||||
pmap_signin_rsp rsp = {0,};
|
||||
glusterd_brickinfo_t *brickinfo = NULL;
|
||||
int ret = -1;
|
||||
glusterd_brickinfo_t *brickinfo = NULL;
|
||||
|
||||
ret = xdr_to_generic (req->msg[0], &args,
|
||||
(xdrproc_t)xdr_pmap_signin_req);
|
||||
@ -502,6 +502,7 @@ __gluster_pmap_signin (rpcsvc_request_t *req)
|
||||
GF_PMAP_PORT_BRICKSERVER, req->trans);
|
||||
|
||||
ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo);
|
||||
|
||||
fail:
|
||||
glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
|
||||
(xdrproc_t)xdr_pmap_signin_rsp);
|
||||
@ -569,6 +570,22 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
|
||||
* glusterd end when a brick is killed from the
|
||||
* backend */
|
||||
brickinfo->status = GF_BRICK_STOPPED;
|
||||
|
||||
/* Remove brick from brick process if not already
|
||||
* removed in the brick op phase. This situation would
|
||||
* arise when the brick is killed explicitly from the
|
||||
* backend */
|
||||
ret = glusterd_brick_process_remove_brick (brickinfo);
|
||||
if (ret) {
|
||||
gf_msg_debug (this->name, 0, "Couldn't remove "
|
||||
"brick %s:%s from brick process",
|
||||
brickinfo->hostname,
|
||||
brickinfo->path);
|
||||
/* Ignore 'ret' here since the brick might
|
||||
* have already been deleted in brick op phase
|
||||
*/
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -957,6 +957,7 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
|
||||
gd1_mgmt_brick_op_req *req = NULL;
|
||||
int ret = 0;
|
||||
xlator_t *this = NULL;
|
||||
glusterd_brickinfo_t *brickinfo = NULL;
|
||||
|
||||
this = THIS;
|
||||
args.op_ret = -1;
|
||||
@ -986,6 +987,23 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
|
||||
GF_FREE (args.errstr);
|
||||
}
|
||||
|
||||
if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) {
|
||||
if (args.op_ret == 0) {
|
||||
brickinfo = pnode->node;
|
||||
ret = glusterd_brick_process_remove_brick (brickinfo);
|
||||
if (ret) {
|
||||
gf_msg ("glusterd", GF_LOG_ERROR, 0,
|
||||
GD_MSG_BRICKPROC_REM_BRICK_FAILED,
|
||||
"Removing brick %s:%s from brick"
|
||||
" process failed",
|
||||
brickinfo->hostname,
|
||||
brickinfo->path);
|
||||
args.op_ret = ret;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (GD_OP_STATUS_VOLUME == op) {
|
||||
ret = dict_set_int32 (args.dict, "index", pnode->index);
|
||||
if (ret) {
|
||||
@ -1023,7 +1041,6 @@ out:
|
||||
dict_unref (args.dict);
|
||||
gd_brick_op_req_free (req);
|
||||
return args.op_ret;
|
||||
|
||||
}
|
||||
|
||||
int32_t
|
||||
|
@ -117,6 +117,46 @@ is_brick_mx_enabled (void)
|
||||
return ret ? _gf_false: enabled;
|
||||
}
|
||||
|
||||
int
|
||||
get_mux_limit_per_process (int *mux_limit)
|
||||
{
|
||||
char *value = NULL;
|
||||
int ret = -1;
|
||||
int max_bricks_per_proc = -1;
|
||||
xlator_t *this = NULL;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
|
||||
this = THIS;
|
||||
GF_VALIDATE_OR_GOTO ("glusterd", this, out);
|
||||
|
||||
priv = this->private;
|
||||
GF_VALIDATE_OR_GOTO (this->name, priv, out);
|
||||
|
||||
if (!is_brick_mx_enabled()) {
|
||||
max_bricks_per_proc = 1;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = dict_get_str (priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY, &value);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
|
||||
"Can't get limit for number of bricks per brick "
|
||||
"process from dict");
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = gf_string2int (value, &max_bricks_per_proc);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
*mux_limit = max_bricks_per_proc;
|
||||
|
||||
gf_msg_debug ("glusterd", 0, "Mux limit set to %d bricks per process", *mux_limit);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern struct volopt_map_entry glusterd_volopt_map[];
|
||||
extern glusterd_all_vol_opts valid_all_vol_opts[];
|
||||
|
||||
@ -971,6 +1011,33 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t
|
||||
glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess)
|
||||
{
|
||||
glusterd_brick_proc_t *new_brickprocess = NULL;
|
||||
int32_t ret = -1;
|
||||
|
||||
GF_VALIDATE_OR_GOTO (THIS->name, brickprocess, out);
|
||||
|
||||
new_brickprocess = GF_CALLOC (1, sizeof(*new_brickprocess),
|
||||
gf_gld_mt_glusterd_brick_proc_t);
|
||||
|
||||
if (!new_brickprocess)
|
||||
goto out;
|
||||
|
||||
CDS_INIT_LIST_HEAD (&new_brickprocess->bricks);
|
||||
CDS_INIT_LIST_HEAD (&new_brickprocess->brick_proc_list);
|
||||
|
||||
new_brickprocess->brick_count = 0;
|
||||
*brickprocess = new_brickprocess;
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
gf_msg_debug (THIS->name, 0, "Returning %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t
|
||||
glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo)
|
||||
{
|
||||
@ -2033,6 +2100,15 @@ retry:
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s "
|
||||
"to brick process failed.", brickinfo->hostname,
|
||||
brickinfo->path);
|
||||
goto out;
|
||||
}
|
||||
|
||||
connect:
|
||||
ret = glusterd_brick_connect (volinfo, brickinfo, socketpath);
|
||||
if (ret) {
|
||||
@ -2096,6 +2172,200 @@ glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static gf_boolean_t
|
||||
unsafe_option (dict_t *this, char *key, data_t *value, void *arg)
|
||||
{
|
||||
/*
|
||||
* Certain options are safe because they're already being handled other
|
||||
* ways, such as being copied down to the bricks (all auth options) or
|
||||
* being made irrelevant (event-threads). All others are suspect and
|
||||
* must be checked in the next function.
|
||||
*/
|
||||
if (fnmatch ("*auth*", key, 0) == 0) {
|
||||
return _gf_false;
|
||||
}
|
||||
|
||||
if (fnmatch ("*event-threads", key, 0) == 0) {
|
||||
return _gf_false;
|
||||
}
|
||||
|
||||
return _gf_true;
|
||||
}
|
||||
|
||||
static int
|
||||
opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2)
|
||||
{
|
||||
data_t *value2 = dict_get (dict2, key);
|
||||
int32_t min_len;
|
||||
|
||||
/*
|
||||
* If the option is only present on one, we can either look at the
|
||||
* default or assume a mismatch. Looking at the default is pretty
|
||||
* hard, because that's part of a structure within each translator and
|
||||
* there's no dlopen interface to get at it, so we assume a mismatch.
|
||||
* If the user really wants them to match (and for their bricks to be
|
||||
* multiplexed, they can always reset the option).
|
||||
*/
|
||||
if (!value2) {
|
||||
gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key);
|
||||
return -1;
|
||||
}
|
||||
|
||||
min_len = MIN (value1->len, value2->len);
|
||||
if (strncmp (value1->data, value2->data, min_len) != 0) {
|
||||
gf_log (THIS->name, GF_LOG_DEBUG,
|
||||
"option mismatch, %s, %s != %s",
|
||||
key, value1->data, value2->data);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc)
|
||||
{
|
||||
cds_list_del_init (&brick_proc->brick_proc_list);
|
||||
cds_list_del_init (&brick_proc->bricks);
|
||||
|
||||
GF_FREE (brick_proc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo)
|
||||
{
|
||||
int ret = -1;
|
||||
xlator_t *this = NULL;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
glusterd_brick_proc_t *brick_proc = NULL;
|
||||
glusterd_brickinfo_t *brickinfoiter = NULL;
|
||||
glusterd_brick_proc_t *brick_proc_tmp = NULL;
|
||||
glusterd_brickinfo_t *tmp = NULL;
|
||||
|
||||
this = THIS;
|
||||
GF_VALIDATE_OR_GOTO ("glusterd", this, out);
|
||||
|
||||
priv = this->private;
|
||||
GF_VALIDATE_OR_GOTO (this->name, priv, out);
|
||||
GF_VALIDATE_OR_GOTO (this->name, brickinfo, out);
|
||||
|
||||
cds_list_for_each_entry_safe (brick_proc, brick_proc_tmp,
|
||||
&priv->brick_procs, brick_proc_list) {
|
||||
if (brickinfo->port != brick_proc->port) {
|
||||
continue;
|
||||
}
|
||||
|
||||
GF_VALIDATE_OR_GOTO (this->name, (brick_proc->brick_count > 0), out);
|
||||
|
||||
cds_list_for_each_entry_safe (brickinfoiter, tmp,
|
||||
&brick_proc->bricks, brick_list) {
|
||||
if (strcmp (brickinfoiter->path, brickinfo->path) == 0) {
|
||||
cds_list_del_init (&brickinfoiter->brick_list);
|
||||
|
||||
GF_FREE (brickinfoiter->logfile);
|
||||
GF_FREE (brickinfoiter);
|
||||
brick_proc->brick_count--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If all bricks have been removed, delete the brick process */
|
||||
if (brick_proc->brick_count == 0) {
|
||||
ret = glusterd_brickprocess_delete (brick_proc);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
|
||||
glusterd_volinfo_t *volinfo)
|
||||
{
|
||||
int ret = -1;
|
||||
xlator_t *this = NULL;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
glusterd_brick_proc_t *brick_proc = NULL;
|
||||
glusterd_brickinfo_t *brickinfo_dup = NULL;
|
||||
|
||||
this = THIS;
|
||||
GF_VALIDATE_OR_GOTO ("glusterd", this, out);
|
||||
|
||||
priv = this->private;
|
||||
GF_VALIDATE_OR_GOTO (this->name, priv, out);
|
||||
GF_VALIDATE_OR_GOTO (this->name, brickinfo, out);
|
||||
|
||||
ret = glusterd_brickinfo_new (&brickinfo_dup);
|
||||
if (ret) {
|
||||
gf_msg ("glusterd", GF_LOG_ERROR, 0,
|
||||
GD_MSG_BRICK_NEW_INFO_FAIL,
|
||||
"Failed to create new brickinfo");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = glusterd_brickinfo_dup (brickinfo, brickinfo_dup);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_BRICK_SET_INFO_FAIL, "Failed to dup brickinfo");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = glusterd_brick_proc_for_port (brickinfo->port, &brick_proc);
|
||||
if (ret) {
|
||||
ret = glusterd_brickprocess_new (&brick_proc);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_BRICKPROC_NEW_FAILED, "Failed to create "
|
||||
"new brick process instance");
|
||||
goto out;
|
||||
}
|
||||
|
||||
brick_proc->port = brickinfo->port;
|
||||
|
||||
cds_list_add_tail (&brick_proc->brick_proc_list, &priv->brick_procs);
|
||||
}
|
||||
|
||||
cds_list_add_tail (&brickinfo_dup->brick_list, &brick_proc->bricks);
|
||||
brick_proc->brick_count++;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ret = 0 only when you get a brick process associated with the port
|
||||
* ret = -1 otherwise
|
||||
*/
|
||||
int
|
||||
glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess)
|
||||
{
|
||||
int ret = -1;
|
||||
xlator_t *this = NULL;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
glusterd_brick_proc_t *brick_proc = NULL;
|
||||
|
||||
this = THIS;
|
||||
GF_VALIDATE_OR_GOTO ("glusterd", this, out);
|
||||
|
||||
priv = this->private;
|
||||
GF_VALIDATE_OR_GOTO (this->name, priv, out);
|
||||
|
||||
cds_list_for_each_entry (brick_proc, &priv->brick_procs, brick_proc_list) {
|
||||
if (brick_proc->port == port) {
|
||||
*brickprocess = brick_proc;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t
|
||||
glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
|
||||
glusterd_brickinfo_t *brickinfo,
|
||||
@ -2118,6 +2388,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
|
||||
|
||||
ret = 0;
|
||||
|
||||
ret = glusterd_brick_process_remove_brick (brickinfo);
|
||||
if (ret) {
|
||||
gf_msg_debug (this->name, 0, "Couldn't remove brick from"
|
||||
" brick process");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (del_brick)
|
||||
cds_list_del_init (&brickinfo->brick_list);
|
||||
|
||||
@ -2149,11 +2426,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
|
||||
GF_FREE (op_errstr);
|
||||
}
|
||||
}
|
||||
|
||||
(void) glusterd_brick_disconnect (brickinfo);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
|
||||
|
||||
gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile);
|
||||
(void) sys_unlink (pidfile);
|
||||
|
||||
@ -2161,7 +2440,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
|
||||
|
||||
if (del_brick)
|
||||
glusterd_delete_brick (volinfo, brickinfo);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@ -5090,6 +5368,7 @@ attach_brick (xlator_t *this,
|
||||
}
|
||||
(void) build_volfile_path (full_id, path, sizeof(path), NULL);
|
||||
|
||||
|
||||
for (tries = 15; tries > 0; --tries) {
|
||||
rpc = rpc_clnt_ref (other_brick->rpc);
|
||||
if (rpc) {
|
||||
@ -5105,6 +5384,23 @@ attach_brick (xlator_t *this,
|
||||
brickinfo->status = GF_BRICK_STARTED;
|
||||
brickinfo->rpc =
|
||||
rpc_clnt_ref (other_brick->rpc);
|
||||
ret = glusterd_brick_process_add_brick (brickinfo,
|
||||
volinfo);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
|
||||
"Adding brick %s:%s to brick "
|
||||
"process failed", brickinfo->hostname,
|
||||
brickinfo->path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
gf_msg_debug (this->name, 0, "Add brick"
|
||||
" to brick process failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -5126,56 +5422,6 @@ attach_brick (xlator_t *this,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gf_boolean_t
|
||||
unsafe_option (dict_t *this, char *key, data_t *value, void *arg)
|
||||
{
|
||||
/*
|
||||
* Certain options are safe because they're already being handled other
|
||||
* ways, such as being copied down to the bricks (all auth options) or
|
||||
* being made irrelevant (event-threads). All others are suspect and
|
||||
* must be checked in the next function.
|
||||
*/
|
||||
if (fnmatch ("*auth*", key, 0) == 0) {
|
||||
return _gf_false;
|
||||
}
|
||||
|
||||
if (fnmatch ("*event-threads", key, 0) == 0) {
|
||||
return _gf_false;
|
||||
}
|
||||
|
||||
return _gf_true;
|
||||
}
|
||||
|
||||
static int
|
||||
opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2)
|
||||
{
|
||||
data_t *value2 = dict_get (dict2, key);
|
||||
int32_t min_len;
|
||||
|
||||
/*
|
||||
* If the option is only present on one, we can either look at the
|
||||
* default or assume a mismatch. Looking at the default is pretty
|
||||
* hard, because that's part of a structure within each translator and
|
||||
* there's no dlopen interface to get at it, so we assume a mismatch.
|
||||
* If the user really wants them to match (and for their bricks to be
|
||||
* multiplexed, they can always reset the option).
|
||||
*/
|
||||
if (!value2) {
|
||||
gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key);
|
||||
return -1;
|
||||
}
|
||||
|
||||
min_len = MIN (value1->len, value2->len);
|
||||
if (strncmp (value1->data, value2->data, min_len) != 0) {
|
||||
gf_log (THIS->name, GF_LOG_DEBUG,
|
||||
"option mismatch, %s, %s != %s",
|
||||
key, value1->data, value2->data);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This name was just getting too long, hence the abbreviations. */
|
||||
static glusterd_brickinfo_t *
|
||||
find_compat_brick_in_vol (glusterd_conf_t *conf,
|
||||
@ -5184,10 +5430,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
|
||||
glusterd_brickinfo_t *brickinfo)
|
||||
{
|
||||
xlator_t *this = THIS;
|
||||
glusterd_brickinfo_t *other_brick;
|
||||
glusterd_brickinfo_t *other_brick = NULL;
|
||||
glusterd_brick_proc_t *brick_proc = NULL;
|
||||
char pidfile2[PATH_MAX] = {0};
|
||||
int32_t pid2 = -1;
|
||||
int16_t retries = 15;
|
||||
int mux_limit = -1;
|
||||
int ret = -1;
|
||||
|
||||
/*
|
||||
* If comp_vol is provided, we have to check *volume* compatibility
|
||||
@ -5219,6 +5468,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
|
||||
gf_log (THIS->name, GF_LOG_DEBUG, "all options match");
|
||||
}
|
||||
|
||||
ret = get_mux_limit_per_process (&mux_limit);
|
||||
if (ret) {
|
||||
gf_msg_debug (THIS->name, 0, "Retrieving brick mux "
|
||||
"limit failed. Returning NULL");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cds_list_for_each_entry (other_brick, &srch_vol->bricks,
|
||||
brick_list) {
|
||||
if (other_brick == brickinfo) {
|
||||
@ -5232,6 +5488,30 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = glusterd_brick_proc_for_port (other_brick->port,
|
||||
&brick_proc);
|
||||
if (ret) {
|
||||
gf_msg_debug (THIS->name, 0, "Couldn't get brick "
|
||||
"process corresponding to brick %s:%s",
|
||||
other_brick->hostname, other_brick->path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (mux_limit != -1) {
|
||||
if (brick_proc->brick_count >= mux_limit)
|
||||
continue;
|
||||
} else {
|
||||
/* This means that the "cluster.max-bricks-per-process"
|
||||
* options hasn't yet been explicitly set. Continue
|
||||
* as if there's no limit set
|
||||
*/
|
||||
gf_msg (THIS->name, GF_LOG_WARNING, 0,
|
||||
GD_MSG_NO_MUX_LIMIT,
|
||||
"cluster.max-bricks-per-process options isn't "
|
||||
"set. Continuing with no limit set for "
|
||||
"brick multiplexing.");
|
||||
}
|
||||
|
||||
GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
|
||||
conf);
|
||||
|
||||
@ -5508,6 +5788,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
||||
|
||||
(void) glusterd_brick_connect (volinfo, brickinfo,
|
||||
socketpath);
|
||||
|
||||
ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
|
||||
"Adding brick %s:%s to brick process "
|
||||
"failed.", brickinfo->hostname,
|
||||
brickinfo->path);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -146,6 +146,9 @@ glusterd_auth_cleanup (glusterd_volinfo_t *volinfo);
|
||||
gf_boolean_t
|
||||
glusterd_check_volume_exists (char *volname);
|
||||
|
||||
int32_t
|
||||
glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess);
|
||||
|
||||
int32_t
|
||||
glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo);
|
||||
|
||||
@ -175,6 +178,16 @@ glusterd_get_next_available_brickid (glusterd_volinfo_t *volinfo);
|
||||
int32_t
|
||||
glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo);
|
||||
|
||||
int
|
||||
glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
|
||||
glusterd_volinfo_t *volinfo);
|
||||
|
||||
int
|
||||
glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo);
|
||||
|
||||
int
|
||||
glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess);
|
||||
|
||||
int32_t
|
||||
glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
|
||||
glusterd_brickinfo_t *brickinfo,
|
||||
|
@ -2214,8 +2214,6 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
|
||||
goto out;
|
||||
}
|
||||
|
||||
count = volinfo->brick_count;
|
||||
|
||||
ret = dict_get_str (dict, "bricks", &bricks);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
@ -2364,6 +2362,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
|
||||
free_ptr = brick_list;
|
||||
}
|
||||
|
||||
count = volinfo->brick_count;
|
||||
|
||||
if (count)
|
||||
brick = strtok_r (brick_list+1, " \n", &saveptr);
|
||||
caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
|
||||
|
@ -1000,6 +1000,38 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
validate_mux_limit (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
|
||||
char *value, char **op_errstr)
|
||||
{
|
||||
xlator_t *this = NULL;
|
||||
uint val = 0;
|
||||
int ret = -1;
|
||||
|
||||
this = THIS;
|
||||
GF_VALIDATE_OR_GOTO ("glusterd", this, out);
|
||||
|
||||
if (!is_brick_mx_enabled()) {
|
||||
gf_asprintf (op_errstr, "Brick-multiplexing is not enabled. "
|
||||
"Please enable brick multiplexing before trying "
|
||||
"to set this option.");
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_WRONG_OPTS_SETTING, "%s", *op_errstr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = gf_string2uint (value, &val);
|
||||
if (ret) {
|
||||
gf_asprintf (op_errstr, "%s is not a valid count. "
|
||||
"%s expects an unsigned integer.", value, key);
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_INVALID_ENTRY, "%s", *op_errstr);
|
||||
}
|
||||
out:
|
||||
gf_msg_debug ("glusterd", 0, "Returning %d", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
validate_boolean (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
|
||||
@ -3408,6 +3440,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
||||
.op_version = GD_OP_VERSION_3_10_0,
|
||||
.validate_fn = validate_boolean
|
||||
},
|
||||
{ .key = GLUSTERD_BRICKMUX_LIMIT_KEY,
|
||||
.voltype = "mgmt/glusterd",
|
||||
.value = "1",
|
||||
.op_version = GD_OP_VERSION_3_12_0,
|
||||
.validate_fn = validate_mux_limit
|
||||
},
|
||||
{ .key = "disperse.optimistic-change-log",
|
||||
.voltype = "cluster/disperse",
|
||||
.type = NO_DOC,
|
||||
|
@ -1723,6 +1723,7 @@ init (xlator_t *this)
|
||||
CDS_INIT_LIST_HEAD (&conf->volumes);
|
||||
CDS_INIT_LIST_HEAD (&conf->snapshots);
|
||||
CDS_INIT_LIST_HEAD (&conf->missed_snaps_list);
|
||||
CDS_INIT_LIST_HEAD (&conf->brick_procs);
|
||||
|
||||
pthread_mutex_init (&conf->mutex, NULL);
|
||||
conf->rpc = rpc;
|
||||
|
@ -55,6 +55,8 @@
|
||||
#define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"
|
||||
#define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex"
|
||||
|
||||
#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process"
|
||||
|
||||
#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256
|
||||
#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90
|
||||
#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100
|
||||
@ -154,6 +156,7 @@ typedef struct {
|
||||
struct pmap_registry *pmap;
|
||||
struct cds_list_head volumes;
|
||||
struct cds_list_head snapshots; /*List of snap volumes */
|
||||
struct cds_list_head brick_procs; /* List of brick processes */
|
||||
pthread_mutex_t xprt_lock;
|
||||
struct list_head xprt_list;
|
||||
gf_store_handle_t *handle;
|
||||
@ -233,6 +236,15 @@ struct glusterd_brickinfo {
|
||||
|
||||
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
|
||||
|
||||
struct glusterd_brick_proc {
|
||||
int port;
|
||||
uint32_t brick_count;
|
||||
struct cds_list_head brick_proc_list;
|
||||
struct cds_list_head bricks;
|
||||
};
|
||||
|
||||
typedef struct glusterd_brick_proc glusterd_brick_proc_t;
|
||||
|
||||
struct gf_defrag_brickinfo_ {
|
||||
char *name;
|
||||
int files;
|
||||
|
Loading…
x
Reference in New Issue
Block a user