ceph: fix locking for waking session requests after reconnect

The session->s_waiting list is protected by mdsc->mutex, not s_mutex.  This
was causing (rare) s_waiting list corruption.

Fix errors paths too, while we're here.  A more thorough cleanup of this
function is coming soon.

Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
Sage Weil 2010-05-10 21:58:38 -07:00
parent d85b705663
commit 9abf82b8bc

View File

@ -2136,7 +2136,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
struct ceph_mds_session *session = NULL;
struct ceph_msg *reply;
struct rb_node *p;
int err;
int err = -ENOMEM;
struct ceph_pagelist *pagelist;
pr_info("reconnect to recovering mds%d\n", mds);
@ -2185,7 +2185,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
goto fail;
err = iterate_session_caps(session, encode_caps_cb, pagelist);
if (err < 0)
goto out;
goto fail;
/*
* snaprealms. we provide mds with the ino, seq (version), and
@ -2213,28 +2213,31 @@ send:
reply->nr_pages = calc_pages_for(0, pagelist->length);
ceph_con_send(&session->s_con, reply);
if (session) {
session->s_state = CEPH_MDS_SESSION_OPEN;
__wake_requests(mdsc, &session->s_waiting);
}
session->s_state = CEPH_MDS_SESSION_OPEN;
mutex_unlock(&session->s_mutex);
mutex_lock(&mdsc->mutex);
__wake_requests(mdsc, &session->s_waiting);
mutex_unlock(&mdsc->mutex);
ceph_put_mds_session(session);
out:
up_read(&mdsc->snap_rwsem);
if (session) {
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
}
mutex_lock(&mdsc->mutex);
return;
fail:
ceph_msg_put(reply);
up_read(&mdsc->snap_rwsem);
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
fail_nomsg:
ceph_pagelist_release(pagelist);
kfree(pagelist);
fail_nopagelist:
pr_err("ENOMEM preparing reconnect for mds%d\n", mds);
goto out;
pr_err("error %d preparing reconnect for mds%d\n", err, mds);
mutex_lock(&mdsc->mutex);
return;
}