ceph: make sure syncfs flushes all cap snaps
Signed-off-by: Yan, Zheng <zyan@redhat.com>
This commit is contained in:
parent
622f3e250f
commit
affbc19a68
@ -1259,14 +1259,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
|
|||||||
* asynchronously back to the MDS once sync writes complete and dirty
|
* asynchronously back to the MDS once sync writes complete and dirty
|
||||||
* data is written out.
|
* data is written out.
|
||||||
*
|
*
|
||||||
* Unless @again is true, skip cap_snaps that were already sent to
|
* Unless @kick is true, skip cap_snaps that were already sent to
|
||||||
* the MDS (i.e., during this session).
|
* the MDS (i.e., during this session).
|
||||||
*
|
*
|
||||||
* Called under i_ceph_lock. Takes s_mutex as needed.
|
* Called under i_ceph_lock. Takes s_mutex as needed.
|
||||||
*/
|
*/
|
||||||
void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
||||||
struct ceph_mds_session **psession,
|
struct ceph_mds_session **psession,
|
||||||
int again)
|
int kick)
|
||||||
__releases(ci->i_ceph_lock)
|
__releases(ci->i_ceph_lock)
|
||||||
__acquires(ci->i_ceph_lock)
|
__acquires(ci->i_ceph_lock)
|
||||||
{
|
{
|
||||||
@ -1307,7 +1307,7 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* only flush each capsnap once */
|
/* only flush each capsnap once */
|
||||||
if (!again && !list_empty(&capsnap->flushing_item)) {
|
if (!kick && !list_empty(&capsnap->flushing_item)) {
|
||||||
dout("already flushed %p, skipping\n", capsnap);
|
dout("already flushed %p, skipping\n", capsnap);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1317,6 +1317,9 @@ retry:
|
|||||||
|
|
||||||
if (session && session->s_mds != mds) {
|
if (session && session->s_mds != mds) {
|
||||||
dout("oops, wrong session %p mutex\n", session);
|
dout("oops, wrong session %p mutex\n", session);
|
||||||
|
if (kick)
|
||||||
|
goto out;
|
||||||
|
|
||||||
mutex_unlock(&session->s_mutex);
|
mutex_unlock(&session->s_mutex);
|
||||||
ceph_put_mds_session(session);
|
ceph_put_mds_session(session);
|
||||||
session = NULL;
|
session = NULL;
|
||||||
@ -1342,10 +1345,9 @@ retry:
|
|||||||
|
|
||||||
capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
|
capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
|
||||||
atomic_inc(&capsnap->nref);
|
atomic_inc(&capsnap->nref);
|
||||||
if (!list_empty(&capsnap->flushing_item))
|
if (list_empty(&capsnap->flushing_item))
|
||||||
list_del_init(&capsnap->flushing_item);
|
list_add_tail(&capsnap->flushing_item,
|
||||||
list_add_tail(&capsnap->flushing_item,
|
&session->s_cap_snaps_flushing);
|
||||||
&session->s_cap_snaps_flushing);
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
|
|
||||||
dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
|
dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
|
||||||
@ -2876,6 +2878,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
|
|||||||
struct ceph_mds_session *session)
|
struct ceph_mds_session *session)
|
||||||
{
|
{
|
||||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||||
|
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
|
||||||
u64 follows = le64_to_cpu(m->snap_follows);
|
u64 follows = le64_to_cpu(m->snap_follows);
|
||||||
struct ceph_cap_snap *capsnap;
|
struct ceph_cap_snap *capsnap;
|
||||||
int drop = 0;
|
int drop = 0;
|
||||||
@ -2899,6 +2902,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
|
|||||||
list_del(&capsnap->ci_item);
|
list_del(&capsnap->ci_item);
|
||||||
list_del(&capsnap->flushing_item);
|
list_del(&capsnap->flushing_item);
|
||||||
ceph_put_cap_snap(capsnap);
|
ceph_put_cap_snap(capsnap);
|
||||||
|
wake_up_all(&mdsc->cap_flushing_wq);
|
||||||
drop = 1;
|
drop = 1;
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1488,17 +1488,22 @@ out_unlocked:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
|
static int check_cap_flush(struct ceph_inode_info *ci,
|
||||||
|
u64 want_flush_seq, u64 want_snap_seq)
|
||||||
{
|
{
|
||||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
int ret1 = 1, ret2 = 1;
|
||||||
int ret;
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
if (ci->i_flushing_caps)
|
if (want_flush_seq > 0 && ci->i_flushing_caps)
|
||||||
ret = ci->i_cap_flush_seq >= want_flush_seq;
|
ret1 = ci->i_cap_flush_seq >= want_flush_seq;
|
||||||
else
|
|
||||||
ret = 1;
|
if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
|
||||||
|
struct ceph_cap_snap *capsnap =
|
||||||
|
list_first_entry(&ci->i_cap_snaps,
|
||||||
|
struct ceph_cap_snap, ci_item);
|
||||||
|
ret2 = capsnap->follows >= want_snap_seq;
|
||||||
|
}
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
return ret;
|
return ret1 && ret2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1506,44 +1511,71 @@ static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
|
|||||||
*
|
*
|
||||||
* returns true if we've flushed through want_flush_seq
|
* returns true if we've flushed through want_flush_seq
|
||||||
*/
|
*/
|
||||||
static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
|
static void wait_caps_flush(struct ceph_mds_client *mdsc,
|
||||||
|
u64 want_flush_seq, u64 want_snap_seq)
|
||||||
{
|
{
|
||||||
int mds;
|
int mds;
|
||||||
|
|
||||||
dout("check_cap_flush want %lld\n", want_flush_seq);
|
dout("check_cap_flush want %lld\n", want_flush_seq);
|
||||||
mutex_lock(&mdsc->mutex);
|
mutex_lock(&mdsc->mutex);
|
||||||
for (mds = 0; mds < mdsc->max_sessions; mds++) {
|
for (mds = 0; mds < mdsc->max_sessions; ) {
|
||||||
struct ceph_mds_session *session = mdsc->sessions[mds];
|
struct ceph_mds_session *session = mdsc->sessions[mds];
|
||||||
struct inode *inode = NULL;
|
struct inode *inode1 = NULL, *inode2 = NULL;
|
||||||
|
|
||||||
if (!session)
|
if (!session) {
|
||||||
|
mds++;
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
get_session(session);
|
get_session(session);
|
||||||
mutex_unlock(&mdsc->mutex);
|
mutex_unlock(&mdsc->mutex);
|
||||||
|
|
||||||
mutex_lock(&session->s_mutex);
|
mutex_lock(&session->s_mutex);
|
||||||
if (!list_empty(&session->s_cap_flushing)) {
|
if (!list_empty(&session->s_cap_flushing)) {
|
||||||
struct ceph_inode_info *ci =
|
struct ceph_inode_info *ci =
|
||||||
list_entry(session->s_cap_flushing.next,
|
list_first_entry(&session->s_cap_flushing,
|
||||||
struct ceph_inode_info,
|
struct ceph_inode_info,
|
||||||
i_flushing_item);
|
i_flushing_item);
|
||||||
|
|
||||||
if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) {
|
if (!check_cap_flush(ci, want_flush_seq, 0)) {
|
||||||
dout("check_cap_flush still flushing %p "
|
dout("check_cap_flush still flushing %p "
|
||||||
"seq %lld <= %lld to mds%d\n",
|
"seq %lld <= %lld to mds%d\n",
|
||||||
&ci->vfs_inode, ci->i_cap_flush_seq,
|
&ci->vfs_inode, ci->i_cap_flush_seq,
|
||||||
want_flush_seq, session->s_mds);
|
want_flush_seq, mds);
|
||||||
inode = igrab(&ci->vfs_inode);
|
inode1 = igrab(&ci->vfs_inode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!list_empty(&session->s_cap_snaps_flushing)) {
|
||||||
|
struct ceph_cap_snap *capsnap =
|
||||||
|
list_first_entry(&session->s_cap_snaps_flushing,
|
||||||
|
struct ceph_cap_snap,
|
||||||
|
flushing_item);
|
||||||
|
struct ceph_inode_info *ci = capsnap->ci;
|
||||||
|
if (!check_cap_flush(ci, 0, want_snap_seq)) {
|
||||||
|
dout("check_cap_flush still flushing snap %p "
|
||||||
|
"follows %lld <= %lld to mds%d\n",
|
||||||
|
&ci->vfs_inode, capsnap->follows,
|
||||||
|
want_snap_seq, mds);
|
||||||
|
inode2 = igrab(&ci->vfs_inode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutex_unlock(&session->s_mutex);
|
mutex_unlock(&session->s_mutex);
|
||||||
ceph_put_mds_session(session);
|
ceph_put_mds_session(session);
|
||||||
|
|
||||||
if (inode) {
|
if (inode1) {
|
||||||
wait_event(mdsc->cap_flushing_wq,
|
wait_event(mdsc->cap_flushing_wq,
|
||||||
check_cap_flush(inode, want_flush_seq));
|
check_cap_flush(ceph_inode(inode1),
|
||||||
iput(inode);
|
want_flush_seq, 0));
|
||||||
|
iput(inode1);
|
||||||
}
|
}
|
||||||
|
if (inode2) {
|
||||||
|
wait_event(mdsc->cap_flushing_wq,
|
||||||
|
check_cap_flush(ceph_inode(inode2),
|
||||||
|
0, want_snap_seq));
|
||||||
|
iput(inode2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inode1 && !inode2)
|
||||||
|
mds++;
|
||||||
|
|
||||||
mutex_lock(&mdsc->mutex);
|
mutex_lock(&mdsc->mutex);
|
||||||
}
|
}
|
||||||
@ -3391,6 +3423,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
|
|||||||
atomic_set(&mdsc->num_sessions, 0);
|
atomic_set(&mdsc->num_sessions, 0);
|
||||||
mdsc->max_sessions = 0;
|
mdsc->max_sessions = 0;
|
||||||
mdsc->stopping = 0;
|
mdsc->stopping = 0;
|
||||||
|
mdsc->last_snap_seq = 0;
|
||||||
init_rwsem(&mdsc->snap_rwsem);
|
init_rwsem(&mdsc->snap_rwsem);
|
||||||
mdsc->snap_realms = RB_ROOT;
|
mdsc->snap_realms = RB_ROOT;
|
||||||
INIT_LIST_HEAD(&mdsc->snap_empty);
|
INIT_LIST_HEAD(&mdsc->snap_empty);
|
||||||
@ -3517,7 +3550,7 @@ restart:
|
|||||||
|
|
||||||
void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
||||||
{
|
{
|
||||||
u64 want_tid, want_flush;
|
u64 want_tid, want_flush, want_snap;
|
||||||
|
|
||||||
if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
|
if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
|
||||||
return;
|
return;
|
||||||
@ -3532,10 +3565,15 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
|||||||
want_flush = mdsc->cap_flush_seq;
|
want_flush = mdsc->cap_flush_seq;
|
||||||
spin_unlock(&mdsc->cap_dirty_lock);
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
|
down_read(&mdsc->snap_rwsem);
|
||||||
|
want_snap = mdsc->last_snap_seq;
|
||||||
|
up_read(&mdsc->snap_rwsem);
|
||||||
|
|
||||||
|
dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
|
||||||
|
want_tid, want_flush, want_snap);
|
||||||
|
|
||||||
wait_unsafe_requests(mdsc, want_tid);
|
wait_unsafe_requests(mdsc, want_tid);
|
||||||
wait_caps_flush(mdsc, want_flush);
|
wait_caps_flush(mdsc, want_flush, want_snap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -290,6 +290,7 @@ struct ceph_mds_client {
|
|||||||
* references (implying they contain no inodes with caps) that
|
* references (implying they contain no inodes with caps) that
|
||||||
* should be destroyed.
|
* should be destroyed.
|
||||||
*/
|
*/
|
||||||
|
u64 last_snap_seq;
|
||||||
struct rw_semaphore snap_rwsem;
|
struct rw_semaphore snap_rwsem;
|
||||||
struct rb_root snap_realms;
|
struct rb_root snap_realms;
|
||||||
struct list_head snap_empty;
|
struct list_head snap_empty;
|
||||||
|
@ -730,6 +730,8 @@ more:
|
|||||||
|
|
||||||
/* queue realm for cap_snap creation */
|
/* queue realm for cap_snap creation */
|
||||||
list_add(&realm->dirty_item, &dirty_realms);
|
list_add(&realm->dirty_item, &dirty_realms);
|
||||||
|
if (realm->seq > mdsc->last_snap_seq)
|
||||||
|
mdsc->last_snap_seq = realm->seq;
|
||||||
|
|
||||||
invalidate = 1;
|
invalidate = 1;
|
||||||
} else if (!realm->cached_context) {
|
} else if (!realm->cached_context) {
|
||||||
|
Loading…
Reference in New Issue
Block a user