protocol/client: sequence CHILD_UP, CHILD_DOWN etc notifications

... from all bricks in the volume

This patch is important in the context of MT epoll. With MT epoll,
notification events from client xlators could reach cluster xlators like
afr, dht, ec, stripe etc. in different orders.

For e.g, In a distributed replicate volume of 2 bricks, namely Brick1
and Brick2, the following network events are observed by a mount
process.

- connection to Brick1 is broken.
- connection to Brick1 has been restored.

- connection to Brick2 is broken.
- connection to Brick2 has been restored.

Without establishing a total ordering of events, we can't guarantee that
cluster xlators like afr, dht perceive them in the same order.  While we
would expect afr (say) to perceive it as only one of Brick1 and Brick2
going down at any given time, it is possible for the notification of
Brick2 going offline to race with the notification of Brick1 coming back
online.

Change-Id: I78f5a52bfb05593335d0e9ad53ebfff98995593d
BUG: 1104462
Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-on: http://review.gluster.org/9591
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Krishnan Parthasarathi
2015-02-05 15:41:35 +05:30
committed by Vijay Bellur
parent a7f5893c92
commit f18a3f30bb
5 changed files with 77 additions and 24 deletions

View File

@ -1394,6 +1394,8 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
goto out;
pthread_mutex_init (&(ctx->lock), NULL);
pthread_mutex_init (&ctx->notify_lock, NULL);
pthread_cond_init (&ctx->notify_cond, NULL);
ctx->clienttable = gf_clienttable_alloc();
if (!ctx->clienttable)

View File

@ -533,6 +533,10 @@ struct _glusterfs_ctx {
/* Buffer to 'save' backtrace even under OOM-kill like situations*/
char btbuf[GF_BACKTRACE_LEN];
pthread_mutex_t notify_lock;
pthread_cond_t notify_cond;
int notifying;
};
typedef struct _glusterfs_ctx glusterfs_ctx_t;

View File

@ -131,12 +131,11 @@ client_notify_parents_child_up (xlator_t *this)
int ret = 0;
conf = this->private;
ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
ret = client_notify_dispatch (this, GF_EVENT_CHILD_UP, NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of CHILD_UP failed");
conf->last_sent_event = GF_EVENT_CHILD_UP;
return 0;
}
@ -1146,11 +1145,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
op_ret = 0;
}
if (op_errno == ESTALE) {
ret = default_notify (this, GF_EVENT_VOLFILE_MODIFIED, NULL);
ret = client_notify_dispatch (this,
GF_EVENT_VOLFILE_MODIFIED,
NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of VOLFILE_MODIFIED failed");
conf->last_sent_event = GF_EVENT_VOLFILE_MODIFIED;
}
goto out;
}
@ -1223,13 +1223,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
out:
if (auth_fail) {
gf_log (this->name, GF_LOG_INFO, "sending AUTH_FAILED event");
ret = default_notify (this, GF_EVENT_AUTH_FAILED, NULL);
ret = client_notify_dispatch (this, GF_EVENT_AUTH_FAILED, NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of AUTH_FAILED failed");
conf->connecting = 0;
conf->connected = 0;
conf->last_sent_event = GF_EVENT_AUTH_FAILED;
ret = -1;
}
if (-1 == op_ret) {
@ -1238,11 +1237,11 @@ out:
* tell the parents that i am all ok..
*/
gf_log (this->name, GF_LOG_INFO, "sending CHILD_CONNECTING event");
ret = default_notify (this, GF_EVENT_CHILD_CONNECTING, NULL);
ret = client_notify_dispatch (this, GF_EVENT_CHILD_CONNECTING,
NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of CHILD_CONNECTING failed");
conf->last_sent_event = GF_EVENT_CHILD_CONNECTING;
conf->connecting= 1;
ret = 0;
}

View File

@ -34,6 +34,55 @@ int client_init_rpc (xlator_t *this);
int client_destroy_rpc (xlator_t *this);
int client_mark_fd_bad (xlator_t *this);
static int
client_notify_dispatch_uniq (xlator_t *this, int32_t event, void *data, ...)
{
clnt_conf_t *conf = this->private;
if (conf->last_sent_event == event)
return 0;
return client_notify_dispatch (this, event, data);
}
int
client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...)
{
int ret = -1;
glusterfs_ctx_t *ctx = this->ctx;
clnt_conf_t *conf = this->private;
pthread_mutex_lock (&ctx->notify_lock);
{
while (ctx->notifying)
pthread_cond_wait (&ctx->notify_cond,
&ctx->notify_lock);
ctx->notifying = 1;
}
pthread_mutex_unlock (&ctx->notify_lock);
/* We assume that all translators in the graph handle notification
* events in sequence.
* */
ret = default_notify (this, event, data);
/* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed
* that there would be atmost one poller thread executing this
* notification function. This allows us to update last_sent_event
* without explicit synchronization. See epoll(7).
*/
conf->last_sent_event = event;
pthread_mutex_lock (&ctx->notify_lock);
{
ctx->notifying = 0;
pthread_cond_signal (&ctx->notify_cond);
}
pthread_mutex_unlock (&ctx->notify_lock);
return ret;
}
int32_t
client_type_to_gf_type (short l_type)
{
@ -2169,14 +2218,12 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
"handshake msg returned %d", ret);
} else {
//conf->rpc->connected = 1;
if (conf->last_sent_event != GF_EVENT_CHILD_UP) {
ret = default_notify (this, GF_EVENT_CHILD_UP,
NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"CHILD_UP notify failed");
conf->last_sent_event = GF_EVENT_CHILD_UP;
}
ret = client_notify_dispatch_uniq (this,
GF_EVENT_CHILD_UP,
NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"CHILD_UP notify failed");
}
/* Cancel grace timer if set */
@ -2224,14 +2271,13 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
may get screwed up.. (eg. CHILD_MODIFIED event in
replicate), hence make sure events which are passed
to parent are genuine */
if (conf->last_sent_event != GF_EVENT_CHILD_DOWN) {
ret = default_notify (this, GF_EVENT_CHILD_DOWN,
NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"CHILD_DOWN notify failed");
conf->last_sent_event = GF_EVENT_CHILD_DOWN;
}
ret = client_notify_dispatch_uniq (this,
GF_EVENT_CHILD_DOWN,
NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"CHILD_DOWN notify failed");
} else {
if (conf->connected)
gf_log (this->name, GF_LOG_DEBUG,

View File

@ -262,4 +262,6 @@ int client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd,
int64_t remote_fd);
gf_boolean_t
__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx);
int
client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...);
#endif /* !_CLIENT_H */