protocol/client: sequence CHILD_UP, CHILD_DOWN etc notifications
... from all bricks in the volume This patch is important in the context of MT epoll. With MT epoll, notification events from client xlators could reach cluster xlators like afr, dht, ec, stripe etc. in different orders. For e.g, In a distributed replicate volume of 2 bricks, namely Brick1 and Brick2, the following network events are observed by a mount process. - connection to Brick1 is broken. - connection to Brick1 has been restored. - connection to Brick2 is broken. - connection to Brick2 has been restored. Without establishing a total ordering of events, we can't guarantee that cluster xlators like afr, dht perceive them in the same order. While we would expect afr (say) to perceive it as only one of Brick1 and Brick2 going down at any given time, it is possible for the notification of Brick2 going offline to race with the notification of Brick1 coming back online. Change-Id: I78f5a52bfb05593335d0e9ad53ebfff98995593d BUG: 1104462 Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-on: http://review.gluster.org/9591 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
committed by
Vijay Bellur
parent
a7f5893c92
commit
f18a3f30bb
@ -1394,6 +1394,8 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
|
||||
goto out;
|
||||
|
||||
pthread_mutex_init (&(ctx->lock), NULL);
|
||||
pthread_mutex_init (&ctx->notify_lock, NULL);
|
||||
pthread_cond_init (&ctx->notify_cond, NULL);
|
||||
|
||||
ctx->clienttable = gf_clienttable_alloc();
|
||||
if (!ctx->clienttable)
|
||||
|
@ -533,6 +533,10 @@ struct _glusterfs_ctx {
|
||||
/* Buffer to 'save' backtrace even under OOM-kill like situations*/
|
||||
char btbuf[GF_BACKTRACE_LEN];
|
||||
|
||||
pthread_mutex_t notify_lock;
|
||||
pthread_cond_t notify_cond;
|
||||
int notifying;
|
||||
|
||||
};
|
||||
typedef struct _glusterfs_ctx glusterfs_ctx_t;
|
||||
|
||||
|
@ -131,12 +131,11 @@ client_notify_parents_child_up (xlator_t *this)
|
||||
int ret = 0;
|
||||
|
||||
conf = this->private;
|
||||
ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
|
||||
ret = client_notify_dispatch (this, GF_EVENT_CHILD_UP, NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"notify of CHILD_UP failed");
|
||||
|
||||
conf->last_sent_event = GF_EVENT_CHILD_UP;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1146,11 +1145,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
|
||||
op_ret = 0;
|
||||
}
|
||||
if (op_errno == ESTALE) {
|
||||
ret = default_notify (this, GF_EVENT_VOLFILE_MODIFIED, NULL);
|
||||
ret = client_notify_dispatch (this,
|
||||
GF_EVENT_VOLFILE_MODIFIED,
|
||||
NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"notify of VOLFILE_MODIFIED failed");
|
||||
conf->last_sent_event = GF_EVENT_VOLFILE_MODIFIED;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
@ -1223,13 +1223,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
|
||||
out:
|
||||
if (auth_fail) {
|
||||
gf_log (this->name, GF_LOG_INFO, "sending AUTH_FAILED event");
|
||||
ret = default_notify (this, GF_EVENT_AUTH_FAILED, NULL);
|
||||
ret = client_notify_dispatch (this, GF_EVENT_AUTH_FAILED, NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"notify of AUTH_FAILED failed");
|
||||
conf->connecting = 0;
|
||||
conf->connected = 0;
|
||||
conf->last_sent_event = GF_EVENT_AUTH_FAILED;
|
||||
ret = -1;
|
||||
}
|
||||
if (-1 == op_ret) {
|
||||
@ -1238,11 +1237,11 @@ out:
|
||||
* tell the parents that i am all ok..
|
||||
*/
|
||||
gf_log (this->name, GF_LOG_INFO, "sending CHILD_CONNECTING event");
|
||||
ret = default_notify (this, GF_EVENT_CHILD_CONNECTING, NULL);
|
||||
ret = client_notify_dispatch (this, GF_EVENT_CHILD_CONNECTING,
|
||||
NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"notify of CHILD_CONNECTING failed");
|
||||
conf->last_sent_event = GF_EVENT_CHILD_CONNECTING;
|
||||
conf->connecting= 1;
|
||||
ret = 0;
|
||||
}
|
||||
|
@ -34,6 +34,55 @@ int client_init_rpc (xlator_t *this);
|
||||
int client_destroy_rpc (xlator_t *this);
|
||||
int client_mark_fd_bad (xlator_t *this);
|
||||
|
||||
static int
|
||||
client_notify_dispatch_uniq (xlator_t *this, int32_t event, void *data, ...)
|
||||
{
|
||||
clnt_conf_t *conf = this->private;
|
||||
|
||||
if (conf->last_sent_event == event)
|
||||
return 0;
|
||||
|
||||
return client_notify_dispatch (this, event, data);
|
||||
}
|
||||
|
||||
int
|
||||
client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...)
|
||||
{
|
||||
int ret = -1;
|
||||
glusterfs_ctx_t *ctx = this->ctx;
|
||||
clnt_conf_t *conf = this->private;
|
||||
|
||||
pthread_mutex_lock (&ctx->notify_lock);
|
||||
{
|
||||
while (ctx->notifying)
|
||||
pthread_cond_wait (&ctx->notify_cond,
|
||||
&ctx->notify_lock);
|
||||
ctx->notifying = 1;
|
||||
}
|
||||
pthread_mutex_unlock (&ctx->notify_lock);
|
||||
|
||||
/* We assume that all translators in the graph handle notification
|
||||
* events in sequence.
|
||||
* */
|
||||
ret = default_notify (this, event, data);
|
||||
|
||||
/* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed
|
||||
* that there would be atmost one poller thread executing this
|
||||
* notification function. This allows us to update last_sent_event
|
||||
* without explicit synchronization. See epoll(7).
|
||||
*/
|
||||
conf->last_sent_event = event;
|
||||
|
||||
pthread_mutex_lock (&ctx->notify_lock);
|
||||
{
|
||||
ctx->notifying = 0;
|
||||
pthread_cond_signal (&ctx->notify_cond);
|
||||
}
|
||||
pthread_mutex_unlock (&ctx->notify_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t
|
||||
client_type_to_gf_type (short l_type)
|
||||
{
|
||||
@ -2169,14 +2218,12 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
|
||||
"handshake msg returned %d", ret);
|
||||
} else {
|
||||
//conf->rpc->connected = 1;
|
||||
if (conf->last_sent_event != GF_EVENT_CHILD_UP) {
|
||||
ret = default_notify (this, GF_EVENT_CHILD_UP,
|
||||
NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"CHILD_UP notify failed");
|
||||
conf->last_sent_event = GF_EVENT_CHILD_UP;
|
||||
}
|
||||
ret = client_notify_dispatch_uniq (this,
|
||||
GF_EVENT_CHILD_UP,
|
||||
NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"CHILD_UP notify failed");
|
||||
}
|
||||
|
||||
/* Cancel grace timer if set */
|
||||
@ -2224,14 +2271,13 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
|
||||
may get screwed up.. (eg. CHILD_MODIFIED event in
|
||||
replicate), hence make sure events which are passed
|
||||
to parent are genuine */
|
||||
if (conf->last_sent_event != GF_EVENT_CHILD_DOWN) {
|
||||
ret = default_notify (this, GF_EVENT_CHILD_DOWN,
|
||||
NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"CHILD_DOWN notify failed");
|
||||
conf->last_sent_event = GF_EVENT_CHILD_DOWN;
|
||||
}
|
||||
ret = client_notify_dispatch_uniq (this,
|
||||
GF_EVENT_CHILD_DOWN,
|
||||
NULL);
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"CHILD_DOWN notify failed");
|
||||
|
||||
} else {
|
||||
if (conf->connected)
|
||||
gf_log (this->name, GF_LOG_DEBUG,
|
||||
|
@ -262,4 +262,6 @@ int client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd,
|
||||
int64_t remote_fd);
|
||||
gf_boolean_t
|
||||
__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx);
|
||||
int
|
||||
client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...);
|
||||
#endif /* !_CLIENT_H */
|
||||
|
Reference in New Issue
Block a user