rpc: fix for race between rpc and protocol/client
It is possible that the notification thread which notifies protocol/client layer about the disconnection is put to sleep and meanwhile, a fuse thread or a timer thread initiates and completes reconnection to the brick. The notification thread is then woken up and protocol/client layer updates its flags to indicate that network is disconnected. No reconnection is initiated because reconnection is rpc-lib layer's responsibility and its flags indicate that connection is connected. Fix: Serialize connect and disconnect notify Credit: Raghavendra Talur <rtalur@redhat.com> Change-Id: I8ff5d1a3283b47f5c26848a42016a40bc34ffc1d BUG: 1386626 Signed-off-by: Rajesh Joseph <rjoseph@redhat.com> Reviewed-on: http://review.gluster.org/15916 Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Talur <rtalur@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
parent
45f914ec9c
commit
aa22f24f5d
@ -871,6 +871,41 @@ rpc_clnt_destroy (struct rpc_clnt *rpc);
|
||||
|
||||
#define RPC_THIS_RESTORE (THIS = old_THIS)
|
||||
|
||||
static int
|
||||
rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
|
||||
{
|
||||
struct timespec ts = {0, };
|
||||
gf_boolean_t unref_clnt = _gf_false;
|
||||
|
||||
rpc_clnt_connection_cleanup (conn);
|
||||
|
||||
pthread_mutex_lock (&conn->lock);
|
||||
{
|
||||
if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
|
||||
ts.tv_sec = 10;
|
||||
ts.tv_nsec = 0;
|
||||
|
||||
rpc_clnt_ref (clnt);
|
||||
conn->reconnect = gf_timer_call_after (clnt->ctx, ts,
|
||||
rpc_clnt_reconnect, conn);
|
||||
if (conn->reconnect == NULL) {
|
||||
gf_log (conn->name, GF_LOG_WARNING,
|
||||
"Cannot create rpc_clnt_reconnect timer");
|
||||
unref_clnt = _gf_true;
|
||||
}
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock (&conn->lock);
|
||||
|
||||
if (clnt->notifyfn)
|
||||
clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
|
||||
|
||||
if (unref_clnt)
|
||||
rpc_clnt_ref (clnt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
|
||||
rpc_transport_event_t event, void *data, ...)
|
||||
@ -880,9 +915,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
|
||||
int ret = -1;
|
||||
rpc_request_info_t *req_info = NULL;
|
||||
rpc_transport_pollin_t *pollin = NULL;
|
||||
struct timespec ts = {0, };
|
||||
void *clnt_mydata = NULL;
|
||||
gf_boolean_t unref_clnt = _gf_false;
|
||||
DECLARE_OLD_THIS;
|
||||
|
||||
conn = mydata;
|
||||
@ -898,35 +931,11 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
|
||||
switch (event) {
|
||||
case RPC_TRANSPORT_DISCONNECT:
|
||||
{
|
||||
rpc_clnt_connection_cleanup (conn);
|
||||
|
||||
pthread_mutex_lock (&conn->lock);
|
||||
pthread_mutex_lock (&clnt->notifylock);
|
||||
{
|
||||
if (!conn->rpc_clnt->disabled
|
||||
&& (conn->reconnect == NULL)) {
|
||||
ts.tv_sec = 10;
|
||||
ts.tv_nsec = 0;
|
||||
|
||||
rpc_clnt_ref (clnt);
|
||||
conn->reconnect =
|
||||
gf_timer_call_after (clnt->ctx, ts,
|
||||
rpc_clnt_reconnect,
|
||||
conn);
|
||||
if (conn->reconnect == NULL) {
|
||||
gf_log (conn->name, GF_LOG_WARNING,
|
||||
"Cannot create rpc_clnt_reconnect timer");
|
||||
unref_clnt = _gf_true;
|
||||
}
|
||||
}
|
||||
rpc_clnt_handle_disconnect (clnt, conn);
|
||||
}
|
||||
pthread_mutex_unlock (&conn->lock);
|
||||
|
||||
if (clnt->notifyfn)
|
||||
ret = clnt->notifyfn (clnt, clnt->mydata,
|
||||
RPC_CLNT_DISCONNECT, NULL);
|
||||
if (unref_clnt)
|
||||
rpc_clnt_ref (clnt);
|
||||
|
||||
pthread_mutex_unlock (&clnt->notifylock);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -981,17 +990,21 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
|
||||
|
||||
case RPC_TRANSPORT_CONNECT:
|
||||
{
|
||||
/* Every time there is a disconnection, processes
|
||||
should try to connect to 'glusterd' (ie, default
|
||||
port) or whichever port given as 'option remote-port'
|
||||
in volume file. */
|
||||
/* Below code makes sure the (re-)configured port lasts
|
||||
for just one successful attempt */
|
||||
conn->config.remote_port = 0;
|
||||
pthread_mutex_lock (&clnt->notifylock);
|
||||
{
|
||||
/* Every time there is a disconnection, processes
|
||||
* should try to connect to 'glusterd' (ie, default
|
||||
* port) or whichever port given as 'option remote-port'
|
||||
* in volume file. */
|
||||
/* Below code makes sure the (re-)configured port lasts
|
||||
* for just one successful attempt */
|
||||
conn->config.remote_port = 0;
|
||||
|
||||
if (clnt->notifyfn)
|
||||
ret = clnt->notifyfn (clnt, clnt->mydata,
|
||||
RPC_CLNT_CONNECT, NULL);
|
||||
if (clnt->notifyfn)
|
||||
ret = clnt->notifyfn (clnt, clnt->mydata,
|
||||
RPC_CLNT_CONNECT, NULL);
|
||||
}
|
||||
pthread_mutex_unlock (&clnt->notifylock);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1115,6 +1128,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
||||
}
|
||||
|
||||
pthread_mutex_init (&rpc->lock, NULL);
|
||||
pthread_mutex_init (&rpc->notifylock, NULL);
|
||||
rpc->ctx = ctx;
|
||||
rpc->owner = owner;
|
||||
|
||||
@ -1124,6 +1138,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
||||
rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
|
||||
if (rpc->reqpool == NULL) {
|
||||
pthread_mutex_destroy (&rpc->lock);
|
||||
pthread_mutex_destroy (&rpc->notifylock);
|
||||
GF_FREE (rpc);
|
||||
rpc = NULL;
|
||||
goto out;
|
||||
@ -1133,6 +1148,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
||||
reqpool_size);
|
||||
if (rpc->saved_frames_pool == NULL) {
|
||||
pthread_mutex_destroy (&rpc->lock);
|
||||
pthread_mutex_destroy (&rpc->notifylock);
|
||||
mem_pool_destroy (rpc->reqpool);
|
||||
GF_FREE (rpc);
|
||||
rpc = NULL;
|
||||
@ -1142,6 +1158,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
||||
ret = rpc_clnt_connection_init (rpc, ctx, options, name);
|
||||
if (ret == -1) {
|
||||
pthread_mutex_destroy (&rpc->lock);
|
||||
pthread_mutex_destroy (&rpc->notifylock);
|
||||
mem_pool_destroy (rpc->reqpool);
|
||||
mem_pool_destroy (rpc->saved_frames_pool);
|
||||
GF_FREE (rpc);
|
||||
@ -1737,6 +1754,7 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)
|
||||
saved_frames_destroy (rpc->conn.saved_frames);
|
||||
pthread_mutex_destroy (&rpc->lock);
|
||||
pthread_mutex_destroy (&rpc->conn.lock);
|
||||
pthread_mutex_destroy (&rpc->notifylock);
|
||||
|
||||
/* mem-pool should be destroyed, otherwise,
|
||||
it will cause huge memory leaks */
|
||||
|
@ -172,6 +172,7 @@ struct rpc_req {
|
||||
|
||||
typedef struct rpc_clnt {
|
||||
pthread_mutex_t lock;
|
||||
pthread_mutex_t notifylock;
|
||||
rpc_clnt_notify_t notifyfn;
|
||||
rpc_clnt_connection_t conn;
|
||||
void *mydata;
|
||||
|
Loading…
x
Reference in New Issue
Block a user