glusterfsd: process attach and detach request inside lock
With brick multiplexing, there is a high possibility that attach and detach requests might be parallely processed and to avoid a concurrent update to the same graph list, a mutex lock is required. Credits : Rafi (rkavunga@redhat.com) for the RCA of this issue Change-Id: Ic8e6d1708655c8a143c5a3690968dfa572a32a9c BUG: 1454865 Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-on: https://review.gluster.org/17374 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
This commit is contained in:
committed by
Jeff Darcy
parent
23930326e0
commit
3ca5ae2f3b
@ -198,8 +198,9 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
|
|||||||
{
|
{
|
||||||
gd1_mgmt_brick_op_req xlator_req = {0,};
|
gd1_mgmt_brick_op_req xlator_req = {0,};
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
xlator_t *top;
|
xlator_t *top = NULL;
|
||||||
xlator_t *victim;
|
xlator_t *victim = NULL;
|
||||||
|
glusterfs_ctx_t *ctx = NULL;
|
||||||
xlator_list_t **trav_p;
|
xlator_list_t **trav_p;
|
||||||
|
|
||||||
ret = xdr_to_generic (req->msg[0], &xlator_req,
|
ret = xdr_to_generic (req->msg[0], &xlator_req,
|
||||||
@ -208,52 +209,62 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
|
|||||||
req->rpc_err = GARBAGE_ARGS;
|
req->rpc_err = GARBAGE_ARGS;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
ctx = glusterfsd_ctx;
|
||||||
|
|
||||||
/* Find the xlator_list_t that points to our victim. */
|
LOCK (&ctx->volfile_lock);
|
||||||
top = glusterfsd_ctx->active->first;
|
{
|
||||||
for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
|
/* Find the xlator_list_t that points to our victim. */
|
||||||
victim = (*trav_p)->xlator;
|
top = glusterfsd_ctx->active->first;
|
||||||
if (strcmp (victim->name, xlator_req.name) == 0) {
|
for (trav_p = &top->children; *trav_p;
|
||||||
break;
|
trav_p = &(*trav_p)->next) {
|
||||||
|
victim = (*trav_p)->xlator;
|
||||||
|
if (strcmp (victim->name, xlator_req.name) == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!*trav_p) {
|
||||||
|
gf_log (THIS->name, GF_LOG_ERROR,
|
||||||
|
"can't terminate %s - not found",
|
||||||
|
xlator_req.name);
|
||||||
|
/*
|
||||||
|
* Used to be -ENOENT. However, the caller asked us to
|
||||||
|
* make sure it's down and if it's already down that's
|
||||||
|
* good enough.
|
||||||
|
*/
|
||||||
|
glusterfs_terminate_response_send (req, 0);
|
||||||
|
goto err;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!*trav_p) {
|
|
||||||
gf_log (THIS->name, GF_LOG_ERROR,
|
|
||||||
"can't terminate %s - not found", xlator_req.name);
|
|
||||||
/*
|
|
||||||
* Used to be -ENOENT. However, the caller asked us to make
|
|
||||||
* sure it's down and if it's already down that's good enough.
|
|
||||||
*/
|
|
||||||
glusterfs_terminate_response_send (req, 0);
|
glusterfs_terminate_response_send (req, 0);
|
||||||
goto err;
|
if ((trav_p == &top->children) && !(*trav_p)->next) {
|
||||||
|
gf_log (THIS->name, GF_LOG_INFO,
|
||||||
|
"terminating after loss of last child %s",
|
||||||
|
xlator_req.name);
|
||||||
|
glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
|
||||||
|
xlator_req.name);
|
||||||
|
kill (getpid(), SIGTERM);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* This is terribly unsafe without quiescing or shutting
|
||||||
|
* things down properly but it gets us to the point
|
||||||
|
* where we can test other stuff.
|
||||||
|
*
|
||||||
|
* TBD: finish implementing this "detach" code properly
|
||||||
|
*/
|
||||||
|
gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
|
||||||
|
" child %s", xlator_req.name);
|
||||||
|
top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
|
||||||
|
glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
|
||||||
|
xlator_req.name);
|
||||||
|
|
||||||
|
*trav_p = (*trav_p)->next;
|
||||||
|
glusterfs_autoscale_threads (THIS->ctx, -1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
glusterfs_terminate_response_send (req, 0);
|
|
||||||
if ((trav_p == &top->children) && !(*trav_p)->next) {
|
|
||||||
gf_log (THIS->name, GF_LOG_INFO,
|
|
||||||
"terminating after loss of last child %s",
|
|
||||||
xlator_req.name);
|
|
||||||
glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
|
|
||||||
cleanup_and_exit (SIGTERM);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* This is terribly unsafe without quiescing or shutting things
|
|
||||||
* down properly (or even locking) but it gets us to the point
|
|
||||||
* where we can test other stuff.
|
|
||||||
*
|
|
||||||
* TBD: finish implementing this "detach" code properly
|
|
||||||
*/
|
|
||||||
gf_log (THIS->name, GF_LOG_INFO, "detaching not-only child %s",
|
|
||||||
xlator_req.name);
|
|
||||||
top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
|
|
||||||
glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
|
|
||||||
|
|
||||||
*trav_p = (*trav_p)->next;
|
|
||||||
glusterfs_autoscale_threads (THIS->ctx, -1);
|
|
||||||
}
|
|
||||||
|
|
||||||
err:
|
err:
|
||||||
|
UNLOCK (&ctx->volfile_lock);
|
||||||
free (xlator_req.name);
|
free (xlator_req.name);
|
||||||
xlator_req.name = NULL;
|
xlator_req.name = NULL;
|
||||||
return 0;
|
return 0;
|
||||||
@ -828,6 +839,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
|
|||||||
gd1_mgmt_brick_op_req xlator_req = {0,};
|
gd1_mgmt_brick_op_req xlator_req = {0,};
|
||||||
xlator_t *this = NULL;
|
xlator_t *this = NULL;
|
||||||
glusterfs_graph_t *newgraph = NULL;
|
glusterfs_graph_t *newgraph = NULL;
|
||||||
|
glusterfs_ctx_t *ctx = NULL;
|
||||||
|
|
||||||
GF_ASSERT (req);
|
GF_ASSERT (req);
|
||||||
this = THIS;
|
this = THIS;
|
||||||
@ -842,32 +854,38 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
ctx = this->ctx;
|
||||||
|
|
||||||
if (this->ctx->active) {
|
LOCK (&ctx->volfile_lock);
|
||||||
gf_log (this->name, GF_LOG_INFO,
|
{
|
||||||
"got attach for %s", xlator_req.name);
|
if (this->ctx->active) {
|
||||||
ret = glusterfs_graph_attach (this->ctx->active,
|
gf_log (this->name, GF_LOG_INFO,
|
||||||
xlator_req.name, &newgraph);
|
"got attach for %s", xlator_req.name);
|
||||||
if (ret == 0) {
|
ret = glusterfs_graph_attach (this->ctx->active,
|
||||||
ret = glusterfs_graph_parent_up (newgraph);
|
xlator_req.name,
|
||||||
if (ret) {
|
&newgraph);
|
||||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
if (ret == 0) {
|
||||||
LG_MSG_EVENT_NOTIFY_FAILED,
|
ret = glusterfs_graph_parent_up (newgraph);
|
||||||
"Parent up notification "
|
if (ret) {
|
||||||
"failed");
|
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||||
goto out;
|
LG_MSG_EVENT_NOTIFY_FAILED,
|
||||||
|
"Parent up notification "
|
||||||
|
"failed");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
glusterfs_autoscale_threads (this->ctx, 1);
|
||||||
}
|
}
|
||||||
glusterfs_autoscale_threads (this->ctx, 1);
|
} else {
|
||||||
|
gf_log (this->name, GF_LOG_WARNING,
|
||||||
|
"got attach for %s but no active graph",
|
||||||
|
xlator_req.name);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
gf_log (this->name, GF_LOG_WARNING,
|
|
||||||
"got attach for %s but no active graph",
|
|
||||||
xlator_req.name);
|
|
||||||
}
|
|
||||||
|
|
||||||
glusterfs_translator_info_response_send (req, ret, NULL, NULL);
|
glusterfs_translator_info_response_send (req, ret, NULL, NULL);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
UNLOCK (&ctx->volfile_lock);
|
||||||
|
}
|
||||||
free (xlator_req.input.input_val);
|
free (xlator_req.input.input_val);
|
||||||
free (xlator_req.name);
|
free (xlator_req.name);
|
||||||
|
|
||||||
@ -1981,23 +1999,28 @@ glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
|
|||||||
xlator_list_t *trav;
|
xlator_list_t *trav;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (ctx->active) {
|
LOCK (&ctx->volfile_lock);
|
||||||
server_xl = ctx->active->first;
|
{
|
||||||
if (strcmp (server_xl->type, "protocol/server") != 0) {
|
if (ctx->active) {
|
||||||
server_xl = NULL;
|
server_xl = ctx->active->first;
|
||||||
|
if (strcmp (server_xl->type, "protocol/server") != 0) {
|
||||||
|
server_xl = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!server_xl) {
|
||||||
|
/* Startup (ctx->active not set) or non-server. */
|
||||||
|
UNLOCK (&ctx->volfile_lock);
|
||||||
|
return glusterfs_volfile_fetch_one
|
||||||
|
(ctx, ctx->cmd_args.volfile_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
for (trav = server_xl->children; trav; trav = trav->next) {
|
||||||
|
ret |= glusterfs_volfile_fetch_one
|
||||||
|
(ctx, trav->xlator->volfile_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!server_xl) {
|
UNLOCK (&ctx->volfile_lock);
|
||||||
/* Startup (ctx->active not set) or non-server. */
|
|
||||||
return glusterfs_volfile_fetch_one (ctx,
|
|
||||||
ctx->cmd_args.volfile_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
for (trav = server_xl->children; trav; trav = trav->next) {
|
|
||||||
ret |= glusterfs_volfile_fetch_one (ctx,
|
|
||||||
trav->xlator->volfile_id);
|
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -412,7 +412,7 @@ server_setvolume (rpcsvc_request_t *req)
|
|||||||
rpc_transport_t *xprt = NULL;
|
rpc_transport_t *xprt = NULL;
|
||||||
int32_t fop_version = 0;
|
int32_t fop_version = 0;
|
||||||
int32_t mgmt_version = 0;
|
int32_t mgmt_version = 0;
|
||||||
|
glusterfs_ctx_t *ctx = NULL;
|
||||||
|
|
||||||
params = dict_new ();
|
params = dict_new ();
|
||||||
reply = dict_new ();
|
reply = dict_new ();
|
||||||
@ -423,6 +423,7 @@ server_setvolume (rpcsvc_request_t *req)
|
|||||||
req->rpc_err = GARBAGE_ARGS;
|
req->rpc_err = GARBAGE_ARGS;
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
ctx = THIS->ctx;
|
||||||
|
|
||||||
this = req->svc->xl;
|
this = req->svc->xl;
|
||||||
/* this is to ensure config_params is populated with the first brick
|
/* this is to ensure config_params is populated with the first brick
|
||||||
@ -468,7 +469,11 @@ server_setvolume (rpcsvc_request_t *req)
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
xl = get_xlator_by_name (this, name);
|
LOCK (&ctx->volfile_lock);
|
||||||
|
{
|
||||||
|
xl = get_xlator_by_name (this, name);
|
||||||
|
}
|
||||||
|
UNLOCK (&ctx->volfile_lock);
|
||||||
if (xl == NULL) {
|
if (xl == NULL) {
|
||||||
ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
|
ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
|
||||||
name);
|
name);
|
||||||
|
Reference in New Issue
Block a user