rpc: destroy transport after client_t

Problem:
1. Ref counting increment on the client_t object is done in
   rpcsvc_request_init() which is incorrect.
2. Ref not taken when delegating to grace_time_handler()

Solution:
1. Only fop requests which require processing down the graph via
   stack 'frames' now ref count the request in get_frame_from_request()
2. Take ref on client_t object in server_rpc_notify() but avoid
   dropping in RPCSVC_EVENT_TRANSPORT_DESRTROY. Drop the ref
   unconditionally when exiting out of grace_time_handler().
   Also, avoid dropping ref on client_t in
   RPCSVC_EVENT_TRANSPORT_DESTROY when ref mangement as been
   delegated to grace_time_handler()

Change-Id: Ic16246bebc7ea4490545b26564658f4b081675e4
BUG: 1481600
Reported-by: Raghavendra G <rgowdapp@redhat.com>
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-on: https://review.gluster.org/17982
Tested-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
Milind Changire 2017-08-30 11:25:29 +05:30 committed by Raghavendra G
parent a4c43ba937
commit 24b95089a1
4 changed files with 70 additions and 27 deletions

View File

@ -411,9 +411,6 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans,
req->progver = rpc_call_progver (callmsg);
req->procnum = rpc_call_progproc (callmsg);
req->trans = rpc_transport_ref (trans);
if (req->trans->xl_private) {
gf_client_ref (req->trans->xl_private);
}
req->count = msg->count;
req->msg[0] = progmsg;
req->iobref = iobref_ref (msg->iobref);

View File

@ -640,9 +640,24 @@ server_setvolume (rpcsvc_request_t *req)
gf_msg_debug (this->name, 0, "Connected to %s", client->client_uid);
cancelled = server_cancel_grace_timer (this, client);
if (cancelled)//Do gf_client_put on behalf of grace-timer-handler.
if (cancelled) {
/* If timer has been successfully cancelled then it means
* that the client has reconnected within grace period.
* Since we've bumped up the bind count with a gf_client_get()
* for this connect attempt, we need to drop the bind count
* for earlier connect, since grace timer handler couldn't
* drop it since the timer was cancelled.
*/
gf_client_put (client, NULL);
/* We need to drop the ref count for this reconnected client
* since one ref was taken before delegating to the grace
* timer handler. Since grace timer handler was cancelled,
* it couldn't run and drop the ref either.
*/
gf_client_unref (client);
}
serv_ctx = server_ctx_get (client, client->this);
if (serv_ctx == NULL) {
gf_msg (this->name, GF_LOG_INFO, 0,

View File

@ -490,6 +490,10 @@ get_frame_from_request (rpcsvc_request_t *req)
}
}
/* Add a ref for this fop */
if (client)
gf_client_ref (client);
frame->root->uid = req->uid;
frame->root->gid = req->gid;
frame->root->pid = req->pid;

View File

@ -73,10 +73,8 @@ grace_time_handler (void *data)
if (cancelled) {
/*
* client must not be destroyed in gf_client_put(),
* so take a ref.
* ref has already been taken in server_rpc_notify()
*/
gf_client_ref (client);
gf_client_put (client, &detached);
if (detached) /* reconnection did not happen :-( */
@ -543,13 +541,12 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
* new transport will be created upon reconnect.
*/
pthread_mutex_lock (&conf->mutex);
client = trans->xl_private;
list_del_init (&trans->list);
rpc_transport_unref (trans);
pthread_mutex_unlock (&conf->mutex);
client = trans->xl_private;
if (!client)
break;
goto unref_transport;
gf_msg (this->name, GF_LOG_INFO, 0,
PS_MSG_CLIENT_DISCONNECTING, "disconnecting connection"
@ -582,18 +579,13 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
trans->myinfo.identifier,
auth_path);
}
gf_client_unref (client);
break;
/*
* gf_client_unref will be done while handling
* RPC_EVENT_TRANSPORT_DESTROY
*/
goto unref_transport;
}
trans->xl_private = NULL;
server_connection_cleanup (this, client, INTERNAL_LOCKS);
gf_event (EVENT_CLIENT_DISCONNECT, "client_uid=%s;"
"client_identifier=%s;server_identifier=%s;"
"brick_path=%s",
client->client_uid,
trans->peerinfo.identifier,
trans->myinfo.identifier,
auth_path);
serv_ctx = server_ctx_get (client, this);
@ -601,7 +593,7 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
gf_msg (this->name, GF_LOG_INFO, 0,
PS_MSG_SERVER_CTX_GET_FAILED,
"server_ctx_get() failed");
goto out;
goto unref_transport;
}
grace_ts.tv_sec = conf->grace_timeout;
@ -616,6 +608,12 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
"starting a grace timer for %s",
client->client_uid);
/* ref to protect against client destruction
* in RPCSVC_EVENT_TRANSPORT_DESTROY while
* we are starting a grace timer
*/
gf_client_ref (client);
serv_ctx->grace_timer =
gf_timer_call_after (this->ctx,
grace_ts,
@ -624,13 +622,42 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
}
}
UNLOCK (&serv_ctx->fdtable_lock);
ret = dict_get_str (this->options, "auth-path", &auth_path);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0,
PS_MSG_DICT_GET_FAILED,
"failed to get auth-path");
auth_path = NULL;
}
gf_event (EVENT_CLIENT_DISCONNECT, "client_uid=%s;"
"client_identifier=%s;server_identifier=%s;"
"brick_path=%s",
client->client_uid,
trans->peerinfo.identifier,
trans->myinfo.identifier,
auth_path);
unref_transport:
/* rpc_transport_unref() causes a RPCSVC_EVENT_TRANSPORT_DESTROY
* to be called in blocking manner
* So no code should ideally be after this unref
*/
rpc_transport_unref (trans);
break;
case RPCSVC_EVENT_TRANSPORT_DESTROY:
/*- conn obj has been disassociated from trans on first
* disconnect.
* conn cleanup and destruction is handed over to
* grace_time_handler or the subsequent handler that 'owns'
* the conn. Nothing left to be done here. */
client = trans->xl_private;
if (!client)
break;
/* unref only for if (!client->lk_heal) */
if (!conf->lk_heal)
gf_client_unref (client);
trans->xl_private = NULL;
break;
default:
break;