glusterd: compare friend data within mutex
During friend handshake if the glusterd receives more than one friend updates, it might very well become possible that two threads would end up working on two different volinfo references and glusterd might end up updating the store with a old volinfo reference. While debugging glusterd crash from validating-server-quorum.t test file from the line-coverage regression the same was observed. Solution is to run glusterd_compare_friend_data under a mutex. Test: As the crash was more visible in the line-coverage run (given lcov does some instrumentation and exposes the races), 6 manual lcov runs were triggered starting from https://build.gluster.org/job/line-coverage/443 to https://build.gluster.org/job/line-coverage/449/ and no crash was observed from validating-server-quorum.t Change-Id: I86fce473a76fd24742d51bf17a685d28b90a8941 Fixes: bz#1603063 Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
This commit is contained in:
parent
48b93c292c
commit
29d5557854
@ -937,54 +937,59 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
|
||||
*/
|
||||
|
||||
//Build comparison logic here.
|
||||
ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
|
||||
event->peername);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (GLUSTERD_VOL_COMP_RJT != status) {
|
||||
event_type = GD_FRIEND_EVENT_LOCAL_ACC;
|
||||
op_ret = 0;
|
||||
} else {
|
||||
event_type = GD_FRIEND_EVENT_LOCAL_RJT;
|
||||
op_errno = GF_PROBE_VOLUME_CONFLICT;
|
||||
op_ret = -1;
|
||||
}
|
||||
|
||||
/* Compare missed_snapshot list with the peer *
|
||||
* if volume comparison is successful */
|
||||
if ((op_ret == 0) &&
|
||||
(conf->op_version >= GD_OP_VERSION_3_6_0)) {
|
||||
ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
|
||||
pthread_mutex_lock (&conf->import_volumes);
|
||||
{
|
||||
ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
|
||||
event->peername);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
|
||||
"Failed to import peer's "
|
||||
"missed_snaps_list.");
|
||||
pthread_mutex_unlock (&conf->import_volumes);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (GLUSTERD_VOL_COMP_RJT != status) {
|
||||
event_type = GD_FRIEND_EVENT_LOCAL_ACC;
|
||||
op_ret = 0;
|
||||
} else {
|
||||
event_type = GD_FRIEND_EVENT_LOCAL_RJT;
|
||||
op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
|
||||
op_errno = GF_PROBE_VOLUME_CONFLICT;
|
||||
op_ret = -1;
|
||||
}
|
||||
|
||||
/* glusterd_compare_friend_snapshots and functions only require
|
||||
* a peers hostname and uuid. It also does updates, which
|
||||
* require use of synchronize_rcu. So we pass the hostname and
|
||||
* id from the event instead of the peerinfo object to prevent
|
||||
* deadlocks as above.
|
||||
*/
|
||||
ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
|
||||
event->peername,
|
||||
event->peerid);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_SNAP_COMPARE_CONFLICT,
|
||||
"Conflict in comparing peer's snapshots");
|
||||
event_type = GD_FRIEND_EVENT_LOCAL_RJT;
|
||||
op_errno = GF_PROBE_SNAP_CONFLICT;
|
||||
op_ret = -1;
|
||||
/* Compare missed_snapshot list with the peer *
|
||||
* if volume comparison is successful */
|
||||
if ((op_ret == 0) &&
|
||||
(conf->op_version >= GD_OP_VERSION_3_6_0)) {
|
||||
ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
|
||||
"Failed to import peer's "
|
||||
"missed_snaps_list.");
|
||||
event_type = GD_FRIEND_EVENT_LOCAL_RJT;
|
||||
op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
|
||||
op_ret = -1;
|
||||
}
|
||||
|
||||
/* glusterd_compare_friend_snapshots and functions only require
|
||||
* a peers hostname and uuid. It also does updates, which
|
||||
* require use of synchronize_rcu. So we pass the hostname and
|
||||
* id from the event instead of the peerinfo object to prevent
|
||||
* deadlocks as above.
|
||||
*/
|
||||
ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
|
||||
event->peername,
|
||||
event->peerid);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
GD_MSG_SNAP_COMPARE_CONFLICT,
|
||||
"Conflict in comparing peer's snapshots");
|
||||
event_type = GD_FRIEND_EVENT_LOCAL_RJT;
|
||||
op_errno = GF_PROBE_SNAP_CONFLICT;
|
||||
op_ret = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock (&conf->import_volumes);
|
||||
ret = glusterd_friend_sm_new_event (event_type, &new_event);
|
||||
|
||||
if (ret) {
|
||||
|
@ -1854,6 +1854,7 @@ init (xlator_t *this)
|
||||
synclock_init (&conf->big_lock, SYNC_LOCK_RECURSIVE);
|
||||
pthread_mutex_init (&conf->xprt_lock, NULL);
|
||||
INIT_LIST_HEAD (&conf->xprt_list);
|
||||
pthread_mutex_init (&conf->import_volumes, NULL);
|
||||
|
||||
glusterd_friend_sm_init ();
|
||||
glusterd_op_sm_init ();
|
||||
|
@ -162,6 +162,7 @@ typedef struct {
|
||||
struct cds_list_head brick_procs; /* List of brick processes */
|
||||
pthread_mutex_t xprt_lock;
|
||||
struct list_head xprt_list;
|
||||
pthread_mutex_t import_volumes;
|
||||
gf_store_handle_t *handle;
|
||||
gf_timer_t *timer;
|
||||
glusterd_sm_tr_log_t op_sm_log;
|
||||
|
Loading…
x
Reference in New Issue
Block a user