ceph: fix flushing of caps vs cap import

If we are mid-flush and a cap is migrated to another node, we need to
resend the cap flush message to the new MDS, and do so with the original
flush_seq to avoid leaking across a sync boundary.  Previously we didn't
redo the flush (we only flushed newly dirty data), which would cause a
later sync to hang forever.

Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
Sage Weil 2011-01-18 08:56:01 -08:00
parent 24be0c4810
commit 088b3f5e9e

View File

@ -1560,8 +1560,9 @@ retry_locked:
/* NOTE: no side-effects allowed, until we take s_mutex */
revoking = cap->implemented & ~cap->issued;
if (revoking)
dout(" mds%d revoking %s\n", cap->mds,
dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
cap->mds, cap, ceph_cap_string(cap->issued),
ceph_cap_string(cap->implemented),
ceph_cap_string(revoking));
if (cap == ci->i_auth_cap &&
@ -1942,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
}
}
static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *cap;
int delayed = 0;
spin_lock(&inode->i_lock);
cap = ci->i_auth_cap;
dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
__ceph_flush_snaps(ci, &session, 1);
if (ci->i_flushing_caps) {
delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
__ceph_caps_used(ci),
__ceph_caps_wanted(ci),
cap->issued | cap->implemented,
ci->i_flushing_caps, NULL);
if (delayed) {
spin_lock(&inode->i_lock);
__cap_delay_requeue(mdsc, ci);
spin_unlock(&inode->i_lock);
}
} else {
spin_unlock(&inode->i_lock);
}
}
/*
* Take references to capabilities we hold, so that we don't release
@ -2689,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
ceph_add_cap(inode, session, cap_id, -1,
issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
NULL /* no caps context */);
try_flush_caps(inode, session, NULL);
kick_flushing_inode_caps(mdsc, session, inode);
up_read(&mdsc->snap_rwsem);
/* make sure we re-request max_size, if necessary */