ceph: unify cap flush and snapcap flush
This patch includes following changes - Assign flush tid to snapcap flush - Remove session's s_cap_snaps_flushing list. Add inode to session's s_cap_flushing list instead. Inode is removed from the list when there is no pending snapcap flush or cap flush. - make __kick_flushing_caps() re-send both snapcap flushes and cap flushes. Signed-off-by: Yan, Zheng <zyan@redhat.com>
This commit is contained in:
parent
e4500b5e35
commit
0e29438789
291
fs/ceph/caps.c
291
fs/ceph/caps.c
@ -40,6 +40,7 @@
|
|||||||
* cluster to release server state.
|
* cluster to release server state.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generate readable cap strings for debugging output.
|
* Generate readable cap strings for debugging output.
|
||||||
@ -1217,6 +1218,22 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
|
|||||||
return delayed;
|
return delayed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int __send_flush_snap(struct inode *inode,
|
||||||
|
struct ceph_mds_session *session,
|
||||||
|
struct ceph_cap_snap *capsnap,
|
||||||
|
u32 mseq, u64 oldest_flush_tid)
|
||||||
|
{
|
||||||
|
return send_cap_msg(session, ceph_vino(inode).ino, 0,
|
||||||
|
CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
|
||||||
|
capsnap->dirty, 0, capsnap->cap_flush.tid,
|
||||||
|
oldest_flush_tid, 0, mseq, capsnap->size, 0,
|
||||||
|
&capsnap->mtime, &capsnap->atime,
|
||||||
|
&capsnap->ctime, capsnap->time_warp_seq,
|
||||||
|
capsnap->uid, capsnap->gid, capsnap->mode,
|
||||||
|
capsnap->xattr_version, capsnap->xattr_blob,
|
||||||
|
capsnap->follows, capsnap->inline_data);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When a snapshot is taken, clients accumulate dirty metadata on
|
* When a snapshot is taken, clients accumulate dirty metadata on
|
||||||
* inodes with capabilities in ceph_cap_snaps to describe the file
|
* inodes with capabilities in ceph_cap_snaps to describe the file
|
||||||
@ -1224,14 +1241,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
|
|||||||
* asynchronously back to the MDS once sync writes complete and dirty
|
* asynchronously back to the MDS once sync writes complete and dirty
|
||||||
* data is written out.
|
* data is written out.
|
||||||
*
|
*
|
||||||
* Unless @kick is true, skip cap_snaps that were already sent to
|
|
||||||
* the MDS (i.e., during this session).
|
|
||||||
*
|
|
||||||
* Called under i_ceph_lock. Takes s_mutex as needed.
|
* Called under i_ceph_lock. Takes s_mutex as needed.
|
||||||
*/
|
*/
|
||||||
void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
||||||
struct ceph_mds_session **psession,
|
struct ceph_mds_session **psession)
|
||||||
int kick)
|
|
||||||
__releases(ci->i_ceph_lock)
|
__releases(ci->i_ceph_lock)
|
||||||
__acquires(ci->i_ceph_lock)
|
__acquires(ci->i_ceph_lock)
|
||||||
{
|
{
|
||||||
@ -1242,6 +1255,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
|||||||
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
|
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
|
||||||
struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
|
struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
|
||||||
session->s_mutex */
|
session->s_mutex */
|
||||||
|
u64 oldest_flush_tid;
|
||||||
u64 next_follows = 0; /* keep track of how far we've gotten through the
|
u64 next_follows = 0; /* keep track of how far we've gotten through the
|
||||||
i_cap_snaps list, and skip these entries next time
|
i_cap_snaps list, and skip these entries next time
|
||||||
around to avoid an infinite loop */
|
around to avoid an infinite loop */
|
||||||
@ -1272,7 +1286,7 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* only flush each capsnap once */
|
/* only flush each capsnap once */
|
||||||
if (!kick && !list_empty(&capsnap->flushing_item)) {
|
if (capsnap->cap_flush.tid > 0) {
|
||||||
dout("already flushed %p, skipping\n", capsnap);
|
dout("already flushed %p, skipping\n", capsnap);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1282,8 +1296,6 @@ retry:
|
|||||||
|
|
||||||
if (session && session->s_mds != mds) {
|
if (session && session->s_mds != mds) {
|
||||||
dout("oops, wrong session %p mutex\n", session);
|
dout("oops, wrong session %p mutex\n", session);
|
||||||
if (kick)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
mutex_unlock(&session->s_mutex);
|
mutex_unlock(&session->s_mutex);
|
||||||
ceph_put_mds_session(session);
|
ceph_put_mds_session(session);
|
||||||
@ -1309,26 +1321,27 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&mdsc->cap_dirty_lock);
|
spin_lock(&mdsc->cap_dirty_lock);
|
||||||
capsnap->flush_tid = ++mdsc->last_cap_flush_tid;
|
capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid;
|
||||||
|
list_add_tail(&capsnap->cap_flush.g_list,
|
||||||
|
&mdsc->cap_flush_list);
|
||||||
|
oldest_flush_tid = __get_oldest_flush_tid(mdsc);
|
||||||
|
|
||||||
|
if (list_empty(&ci->i_flushing_item)) {
|
||||||
|
list_add_tail(&ci->i_flushing_item,
|
||||||
|
&session->s_cap_flushing);
|
||||||
|
}
|
||||||
spin_unlock(&mdsc->cap_dirty_lock);
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
|
list_add_tail(&capsnap->cap_flush.i_list,
|
||||||
|
&ci->i_cap_flush_list);
|
||||||
|
|
||||||
atomic_inc(&capsnap->nref);
|
atomic_inc(&capsnap->nref);
|
||||||
if (list_empty(&capsnap->flushing_item))
|
|
||||||
list_add_tail(&capsnap->flushing_item,
|
|
||||||
&session->s_cap_snaps_flushing);
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
|
|
||||||
dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
|
dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
|
||||||
inode, capsnap, capsnap->follows, capsnap->flush_tid);
|
inode, capsnap, capsnap->follows, capsnap->cap_flush.tid);
|
||||||
send_cap_msg(session, ceph_vino(inode).ino, 0,
|
__send_flush_snap(inode, session, capsnap, mseq,
|
||||||
CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
|
oldest_flush_tid);
|
||||||
capsnap->dirty, 0, capsnap->flush_tid, 0,
|
|
||||||
0, mseq, capsnap->size, 0,
|
|
||||||
&capsnap->mtime, &capsnap->atime,
|
|
||||||
&capsnap->ctime, capsnap->time_warp_seq,
|
|
||||||
capsnap->uid, capsnap->gid, capsnap->mode,
|
|
||||||
capsnap->xattr_version, capsnap->xattr_blob,
|
|
||||||
capsnap->follows, capsnap->inline_data);
|
|
||||||
|
|
||||||
next_follows = capsnap->follows + 1;
|
next_follows = capsnap->follows + 1;
|
||||||
ceph_put_cap_snap(capsnap);
|
ceph_put_cap_snap(capsnap);
|
||||||
@ -1354,7 +1367,7 @@ out:
|
|||||||
static void ceph_flush_snaps(struct ceph_inode_info *ci)
|
static void ceph_flush_snaps(struct ceph_inode_info *ci)
|
||||||
{
|
{
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
__ceph_flush_snaps(ci, NULL, 0);
|
__ceph_flush_snaps(ci, NULL);
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1476,11 +1489,6 @@ static int __mark_caps_flushing(struct inode *inode,
|
|||||||
if (list_empty(&ci->i_flushing_item)) {
|
if (list_empty(&ci->i_flushing_item)) {
|
||||||
list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
|
list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
|
||||||
mdsc->num_cap_flushing++;
|
mdsc->num_cap_flushing++;
|
||||||
dout(" inode %p now flushing tid %llu\n", inode, cf->tid);
|
|
||||||
} else {
|
|
||||||
list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
|
|
||||||
dout(" inode %p now flushing (more) tid %llu\n",
|
|
||||||
inode, cf->tid);
|
|
||||||
}
|
}
|
||||||
spin_unlock(&mdsc->cap_dirty_lock);
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
@ -1556,7 +1564,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
|
|||||||
|
|
||||||
/* flush snaps first time around only */
|
/* flush snaps first time around only */
|
||||||
if (!list_empty(&ci->i_cap_snaps))
|
if (!list_empty(&ci->i_cap_snaps))
|
||||||
__ceph_flush_snaps(ci, &session, 0);
|
__ceph_flush_snaps(ci, &session);
|
||||||
goto retry_locked;
|
goto retry_locked;
|
||||||
retry:
|
retry:
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
@ -1997,80 +2005,74 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
|
||||||
* After a recovering MDS goes active, we need to resend any caps
|
struct ceph_mds_session *session,
|
||||||
* we were flushing.
|
struct ceph_inode_info *ci,
|
||||||
*
|
u64 oldest_flush_tid)
|
||||||
* Caller holds session->s_mutex.
|
__releases(ci->i_ceph_lock)
|
||||||
*/
|
__acquires(ci->i_ceph_lock)
|
||||||
static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
|
|
||||||
struct ceph_mds_session *session)
|
|
||||||
{
|
|
||||||
struct ceph_cap_snap *capsnap;
|
|
||||||
|
|
||||||
dout("kick_flushing_capsnaps mds%d\n", session->s_mds);
|
|
||||||
list_for_each_entry(capsnap, &session->s_cap_snaps_flushing,
|
|
||||||
flushing_item) {
|
|
||||||
struct ceph_inode_info *ci = capsnap->ci;
|
|
||||||
struct inode *inode = &ci->vfs_inode;
|
|
||||||
struct ceph_cap *cap;
|
|
||||||
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
|
||||||
cap = ci->i_auth_cap;
|
|
||||||
if (cap && cap->session == session) {
|
|
||||||
dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
|
|
||||||
cap, capsnap);
|
|
||||||
__ceph_flush_snaps(ci, &session, 1);
|
|
||||||
} else {
|
|
||||||
pr_err("%p auth cap %p not mds%d ???\n", inode,
|
|
||||||
cap, session->s_mds);
|
|
||||||
}
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
|
|
||||||
struct ceph_mds_session *session,
|
|
||||||
struct ceph_inode_info *ci)
|
|
||||||
{
|
{
|
||||||
struct inode *inode = &ci->vfs_inode;
|
struct inode *inode = &ci->vfs_inode;
|
||||||
struct ceph_cap *cap;
|
struct ceph_cap *cap;
|
||||||
struct ceph_cap_flush *cf;
|
struct ceph_cap_flush *cf;
|
||||||
int delayed = 0;
|
int ret;
|
||||||
u64 first_tid = 0;
|
u64 first_tid = 0;
|
||||||
u64 oldest_flush_tid;
|
|
||||||
|
|
||||||
spin_lock(&mdsc->cap_dirty_lock);
|
|
||||||
oldest_flush_tid = __get_oldest_flush_tid(mdsc);
|
|
||||||
spin_unlock(&mdsc->cap_dirty_lock);
|
|
||||||
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
|
||||||
list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
|
list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
|
||||||
if (cf->tid < first_tid)
|
if (cf->tid < first_tid)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
cap = ci->i_auth_cap;
|
cap = ci->i_auth_cap;
|
||||||
if (!(cap && cap->session == session)) {
|
if (!(cap && cap->session == session)) {
|
||||||
pr_err("%p auth cap %p not mds%d ???\n", inode,
|
pr_err("%p auth cap %p not mds%d ???\n",
|
||||||
cap, session->s_mds);
|
inode, cap, session->s_mds);
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
first_tid = cf->tid + 1;
|
first_tid = cf->tid + 1;
|
||||||
|
|
||||||
dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode,
|
if (cf->caps) {
|
||||||
cap, cf->tid, ceph_cap_string(cf->caps));
|
dout("kick_flushing_caps %p cap %p tid %llu %s\n",
|
||||||
delayed |= __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
|
inode, cap, cf->tid, ceph_cap_string(cf->caps));
|
||||||
__ceph_caps_used(ci),
|
ci->i_ceph_flags |= CEPH_I_NODELAY;
|
||||||
__ceph_caps_wanted(ci),
|
ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
|
||||||
cap->issued | cap->implemented,
|
__ceph_caps_used(ci),
|
||||||
cf->caps, cf->tid, oldest_flush_tid);
|
__ceph_caps_wanted(ci),
|
||||||
|
cap->issued | cap->implemented,
|
||||||
|
cf->caps, cf->tid, oldest_flush_tid);
|
||||||
|
if (ret) {
|
||||||
|
pr_err("kick_flushing_caps: error sending "
|
||||||
|
"cap flush, ino (%llx.%llx) "
|
||||||
|
"tid %llu flushing %s\n",
|
||||||
|
ceph_vinop(inode), cf->tid,
|
||||||
|
ceph_cap_string(cf->caps));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
struct ceph_cap_snap *capsnap =
|
||||||
|
container_of(cf, struct ceph_cap_snap,
|
||||||
|
cap_flush);
|
||||||
|
dout("kick_flushing_caps %p capsnap %p tid %llu %s\n",
|
||||||
|
inode, capsnap, cf->tid,
|
||||||
|
ceph_cap_string(capsnap->dirty));
|
||||||
|
|
||||||
|
atomic_inc(&capsnap->nref);
|
||||||
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
|
|
||||||
|
ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
|
||||||
|
oldest_flush_tid);
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_err("kick_flushing_caps: error sending "
|
||||||
|
"cap flushsnap, ino (%llx.%llx) "
|
||||||
|
"tid %llu follows %llu\n",
|
||||||
|
ceph_vinop(inode), cf->tid,
|
||||||
|
capsnap->follows);
|
||||||
|
}
|
||||||
|
|
||||||
|
ceph_put_cap_snap(capsnap);
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
}
|
}
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
|
||||||
return delayed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
|
void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
|
||||||
@ -2078,8 +2080,14 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
|
|||||||
{
|
{
|
||||||
struct ceph_inode_info *ci;
|
struct ceph_inode_info *ci;
|
||||||
struct ceph_cap *cap;
|
struct ceph_cap *cap;
|
||||||
|
u64 oldest_flush_tid;
|
||||||
|
|
||||||
dout("early_kick_flushing_caps mds%d\n", session->s_mds);
|
dout("early_kick_flushing_caps mds%d\n", session->s_mds);
|
||||||
|
|
||||||
|
spin_lock(&mdsc->cap_dirty_lock);
|
||||||
|
oldest_flush_tid = __get_oldest_flush_tid(mdsc);
|
||||||
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
|
list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
cap = ci->i_auth_cap;
|
cap = ci->i_auth_cap;
|
||||||
@ -2099,10 +2107,8 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
|
|||||||
*/
|
*/
|
||||||
if ((cap->issued & ci->i_flushing_caps) !=
|
if ((cap->issued & ci->i_flushing_caps) !=
|
||||||
ci->i_flushing_caps) {
|
ci->i_flushing_caps) {
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
__kick_flushing_caps(mdsc, session, ci,
|
||||||
if (!__kick_flushing_caps(mdsc, session, ci))
|
oldest_flush_tid);
|
||||||
continue;
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
@ -2113,50 +2119,43 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
|
|||||||
struct ceph_mds_session *session)
|
struct ceph_mds_session *session)
|
||||||
{
|
{
|
||||||
struct ceph_inode_info *ci;
|
struct ceph_inode_info *ci;
|
||||||
|
u64 oldest_flush_tid;
|
||||||
kick_flushing_capsnaps(mdsc, session);
|
|
||||||
|
|
||||||
dout("kick_flushing_caps mds%d\n", session->s_mds);
|
dout("kick_flushing_caps mds%d\n", session->s_mds);
|
||||||
|
|
||||||
|
spin_lock(&mdsc->cap_dirty_lock);
|
||||||
|
oldest_flush_tid = __get_oldest_flush_tid(mdsc);
|
||||||
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
|
list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
|
||||||
int delayed = __kick_flushing_caps(mdsc, session, ci);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
if (delayed) {
|
__kick_flushing_caps(mdsc, session, ci, oldest_flush_tid);
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
__cap_delay_requeue(mdsc, ci);
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
|
static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
|
||||||
struct ceph_mds_session *session,
|
struct ceph_mds_session *session,
|
||||||
struct inode *inode)
|
struct inode *inode)
|
||||||
|
__releases(ci->i_ceph_lock)
|
||||||
{
|
{
|
||||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||||
struct ceph_cap *cap;
|
struct ceph_cap *cap;
|
||||||
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
|
||||||
cap = ci->i_auth_cap;
|
cap = ci->i_auth_cap;
|
||||||
dout("kick_flushing_inode_caps %p flushing %s\n", inode,
|
dout("kick_flushing_inode_caps %p flushing %s\n", inode,
|
||||||
ceph_cap_string(ci->i_flushing_caps));
|
ceph_cap_string(ci->i_flushing_caps));
|
||||||
|
|
||||||
__ceph_flush_snaps(ci, &session, 1);
|
if (!list_empty(&ci->i_cap_flush_list)) {
|
||||||
|
u64 oldest_flush_tid;
|
||||||
if (ci->i_flushing_caps) {
|
|
||||||
int delayed;
|
|
||||||
|
|
||||||
spin_lock(&mdsc->cap_dirty_lock);
|
spin_lock(&mdsc->cap_dirty_lock);
|
||||||
list_move_tail(&ci->i_flushing_item,
|
list_move_tail(&ci->i_flushing_item,
|
||||||
&cap->session->s_cap_flushing);
|
&cap->session->s_cap_flushing);
|
||||||
|
oldest_flush_tid = __get_oldest_flush_tid(mdsc);
|
||||||
spin_unlock(&mdsc->cap_dirty_lock);
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
|
__kick_flushing_caps(mdsc, session, ci, oldest_flush_tid);
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
|
|
||||||
delayed = __kick_flushing_caps(mdsc, session, ci);
|
|
||||||
if (delayed) {
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
|
||||||
__cap_delay_requeue(mdsc, ci);
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
}
|
}
|
||||||
@ -2487,12 +2486,11 @@ static int ceph_try_drop_cap_snap(struct ceph_cap_snap *capsnap)
|
|||||||
{
|
{
|
||||||
if (!capsnap->need_flush &&
|
if (!capsnap->need_flush &&
|
||||||
!capsnap->writing && !capsnap->dirty_pages) {
|
!capsnap->writing && !capsnap->dirty_pages) {
|
||||||
|
|
||||||
dout("dropping cap_snap %p follows %llu\n",
|
dout("dropping cap_snap %p follows %llu\n",
|
||||||
capsnap, capsnap->follows);
|
capsnap, capsnap->follows);
|
||||||
|
BUG_ON(capsnap->cap_flush.tid > 0);
|
||||||
ceph_put_snap_context(capsnap->context);
|
ceph_put_snap_context(capsnap->context);
|
||||||
list_del(&capsnap->ci_item);
|
list_del(&capsnap->ci_item);
|
||||||
list_del(&capsnap->flushing_item);
|
|
||||||
ceph_put_cap_snap(capsnap);
|
ceph_put_cap_snap(capsnap);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -2891,13 +2889,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
|
|||||||
fill_inline = true;
|
fill_inline = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
|
||||||
|
|
||||||
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
|
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
|
||||||
kick_flushing_inode_caps(mdsc, session, inode);
|
|
||||||
up_read(&mdsc->snap_rwsem);
|
|
||||||
if (newcaps & ~issued)
|
if (newcaps & ~issued)
|
||||||
wake = true;
|
wake = true;
|
||||||
|
kick_flushing_inode_caps(mdsc, session, inode);
|
||||||
|
up_read(&mdsc->snap_rwsem);
|
||||||
|
} else {
|
||||||
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fill_inline)
|
if (fill_inline)
|
||||||
@ -2951,6 +2949,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
|
|||||||
list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
|
list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
|
||||||
if (cf->tid == flush_tid)
|
if (cf->tid == flush_tid)
|
||||||
cleaned = cf->caps;
|
cleaned = cf->caps;
|
||||||
|
if (cf->caps == 0) /* capsnap */
|
||||||
|
continue;
|
||||||
if (cf->tid <= flush_tid) {
|
if (cf->tid <= flush_tid) {
|
||||||
list_del(&cf->i_list);
|
list_del(&cf->i_list);
|
||||||
list_add_tail(&cf->i_list, &to_remove);
|
list_add_tail(&cf->i_list, &to_remove);
|
||||||
@ -2985,13 +2985,16 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ci->i_flushing_caps == 0) {
|
if (ci->i_flushing_caps == 0) {
|
||||||
list_del_init(&ci->i_flushing_item);
|
if (list_empty(&ci->i_cap_flush_list)) {
|
||||||
if (!list_empty(&session->s_cap_flushing))
|
list_del_init(&ci->i_flushing_item);
|
||||||
dout(" mds%d still flushing cap on %p\n",
|
if (!list_empty(&session->s_cap_flushing)) {
|
||||||
session->s_mds,
|
dout(" mds%d still flushing cap on %p\n",
|
||||||
&list_entry(session->s_cap_flushing.next,
|
session->s_mds,
|
||||||
struct ceph_inode_info,
|
&list_first_entry(&session->s_cap_flushing,
|
||||||
i_flushing_item)->vfs_inode);
|
struct ceph_inode_info,
|
||||||
|
i_flushing_item)->vfs_inode);
|
||||||
|
}
|
||||||
|
}
|
||||||
mdsc->num_cap_flushing--;
|
mdsc->num_cap_flushing--;
|
||||||
dout(" inode %p now !flushing\n", inode);
|
dout(" inode %p now !flushing\n", inode);
|
||||||
|
|
||||||
@ -3039,7 +3042,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
|
|||||||
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
|
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
|
||||||
u64 follows = le64_to_cpu(m->snap_follows);
|
u64 follows = le64_to_cpu(m->snap_follows);
|
||||||
struct ceph_cap_snap *capsnap;
|
struct ceph_cap_snap *capsnap;
|
||||||
int drop = 0;
|
int flushed = 0;
|
||||||
|
|
||||||
dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
|
dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
|
||||||
inode, ci, session->s_mds, follows);
|
inode, ci, session->s_mds, follows);
|
||||||
@ -3047,30 +3050,47 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
|
|||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
|
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
|
||||||
if (capsnap->follows == follows) {
|
if (capsnap->follows == follows) {
|
||||||
if (capsnap->flush_tid != flush_tid) {
|
if (capsnap->cap_flush.tid != flush_tid) {
|
||||||
dout(" cap_snap %p follows %lld tid %lld !="
|
dout(" cap_snap %p follows %lld tid %lld !="
|
||||||
" %lld\n", capsnap, follows,
|
" %lld\n", capsnap, follows,
|
||||||
flush_tid, capsnap->flush_tid);
|
flush_tid, capsnap->cap_flush.tid);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
WARN_ON(capsnap->dirty_pages || capsnap->writing);
|
flushed = 1;
|
||||||
dout(" removing %p cap_snap %p follows %lld\n",
|
|
||||||
inode, capsnap, follows);
|
|
||||||
ceph_put_snap_context(capsnap->context);
|
|
||||||
list_del(&capsnap->ci_item);
|
|
||||||
list_del(&capsnap->flushing_item);
|
|
||||||
ceph_put_cap_snap(capsnap);
|
|
||||||
wake_up_all(&mdsc->cap_flushing_wq);
|
|
||||||
drop = 1;
|
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
dout(" skipping cap_snap %p follows %lld\n",
|
dout(" skipping cap_snap %p follows %lld\n",
|
||||||
capsnap, capsnap->follows);
|
capsnap, capsnap->follows);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (flushed) {
|
||||||
|
u64 oldest_flush_tid;
|
||||||
|
WARN_ON(capsnap->dirty_pages || capsnap->writing);
|
||||||
|
dout(" removing %p cap_snap %p follows %lld\n",
|
||||||
|
inode, capsnap, follows);
|
||||||
|
list_del(&capsnap->ci_item);
|
||||||
|
list_del(&capsnap->cap_flush.i_list);
|
||||||
|
|
||||||
|
spin_lock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
|
if (list_empty(&ci->i_cap_flush_list))
|
||||||
|
list_del_init(&ci->i_flushing_item);
|
||||||
|
|
||||||
|
list_del(&capsnap->cap_flush.g_list);
|
||||||
|
|
||||||
|
oldest_flush_tid = __get_oldest_flush_tid(mdsc);
|
||||||
|
if (oldest_flush_tid == 0 || oldest_flush_tid > flush_tid)
|
||||||
|
wake_up_all(&mdsc->cap_flushing_wq);
|
||||||
|
|
||||||
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
wake_up_all(&ci->i_cap_wq);
|
||||||
|
}
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
if (drop)
|
if (flushed) {
|
||||||
|
ceph_put_snap_context(capsnap->context);
|
||||||
|
ceph_put_cap_snap(capsnap);
|
||||||
iput(inode);
|
iput(inode);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3175,7 +3195,8 @@ retry:
|
|||||||
tcap->implemented |= issued;
|
tcap->implemented |= issued;
|
||||||
if (cap == ci->i_auth_cap)
|
if (cap == ci->i_auth_cap)
|
||||||
ci->i_auth_cap = tcap;
|
ci->i_auth_cap = tcap;
|
||||||
if (ci->i_flushing_caps && ci->i_auth_cap == tcap) {
|
if (!list_empty(&ci->i_cap_flush_list) &&
|
||||||
|
ci->i_auth_cap == tcap) {
|
||||||
spin_lock(&mdsc->cap_dirty_lock);
|
spin_lock(&mdsc->cap_dirty_lock);
|
||||||
list_move_tail(&ci->i_flushing_item,
|
list_move_tail(&ci->i_flushing_item,
|
||||||
&tcap->session->s_cap_flushing);
|
&tcap->session->s_cap_flushing);
|
||||||
|
@ -472,7 +472,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
|
|||||||
s->s_cap_iterator = NULL;
|
s->s_cap_iterator = NULL;
|
||||||
INIT_LIST_HEAD(&s->s_cap_releases);
|
INIT_LIST_HEAD(&s->s_cap_releases);
|
||||||
INIT_LIST_HEAD(&s->s_cap_flushing);
|
INIT_LIST_HEAD(&s->s_cap_flushing);
|
||||||
INIT_LIST_HEAD(&s->s_cap_snaps_flushing);
|
|
||||||
|
|
||||||
dout("register_session mds%d\n", mds);
|
dout("register_session mds%d\n", mds);
|
||||||
if (mds >= mdsc->max_sessions) {
|
if (mds >= mdsc->max_sessions) {
|
||||||
@ -1479,21 +1478,6 @@ static int trim_caps(struct ceph_mds_client *mdsc,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_capsnap_flush(struct ceph_inode_info *ci,
|
|
||||||
u64 want_snap_seq)
|
|
||||||
{
|
|
||||||
int ret = 1;
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
|
||||||
if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
|
|
||||||
struct ceph_cap_snap *capsnap =
|
|
||||||
list_first_entry(&ci->i_cap_snaps,
|
|
||||||
struct ceph_cap_snap, ci_item);
|
|
||||||
ret = capsnap->follows >= want_snap_seq;
|
|
||||||
}
|
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int check_caps_flush(struct ceph_mds_client *mdsc,
|
static int check_caps_flush(struct ceph_mds_client *mdsc,
|
||||||
u64 want_flush_tid)
|
u64 want_flush_tid)
|
||||||
{
|
{
|
||||||
@ -1520,54 +1504,9 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
|
|||||||
* returns true if we've flushed through want_flush_tid
|
* returns true if we've flushed through want_flush_tid
|
||||||
*/
|
*/
|
||||||
static void wait_caps_flush(struct ceph_mds_client *mdsc,
|
static void wait_caps_flush(struct ceph_mds_client *mdsc,
|
||||||
u64 want_flush_tid, u64 want_snap_seq)
|
u64 want_flush_tid)
|
||||||
{
|
{
|
||||||
int mds;
|
dout("check_caps_flush want %llu\n", want_flush_tid);
|
||||||
|
|
||||||
dout("check_caps_flush want %llu snap want %llu\n",
|
|
||||||
want_flush_tid, want_snap_seq);
|
|
||||||
mutex_lock(&mdsc->mutex);
|
|
||||||
for (mds = 0; mds < mdsc->max_sessions; ) {
|
|
||||||
struct ceph_mds_session *session = mdsc->sessions[mds];
|
|
||||||
struct inode *inode = NULL;
|
|
||||||
|
|
||||||
if (!session) {
|
|
||||||
mds++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
get_session(session);
|
|
||||||
mutex_unlock(&mdsc->mutex);
|
|
||||||
|
|
||||||
mutex_lock(&session->s_mutex);
|
|
||||||
if (!list_empty(&session->s_cap_snaps_flushing)) {
|
|
||||||
struct ceph_cap_snap *capsnap =
|
|
||||||
list_first_entry(&session->s_cap_snaps_flushing,
|
|
||||||
struct ceph_cap_snap,
|
|
||||||
flushing_item);
|
|
||||||
struct ceph_inode_info *ci = capsnap->ci;
|
|
||||||
if (!check_capsnap_flush(ci, want_snap_seq)) {
|
|
||||||
dout("check_cap_flush still flushing snap %p "
|
|
||||||
"follows %lld <= %lld to mds%d\n",
|
|
||||||
&ci->vfs_inode, capsnap->follows,
|
|
||||||
want_snap_seq, mds);
|
|
||||||
inode = igrab(&ci->vfs_inode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mutex_unlock(&session->s_mutex);
|
|
||||||
ceph_put_mds_session(session);
|
|
||||||
|
|
||||||
if (inode) {
|
|
||||||
wait_event(mdsc->cap_flushing_wq,
|
|
||||||
check_capsnap_flush(ceph_inode(inode),
|
|
||||||
want_snap_seq));
|
|
||||||
iput(inode);
|
|
||||||
} else {
|
|
||||||
mds++;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_lock(&mdsc->mutex);
|
|
||||||
}
|
|
||||||
mutex_unlock(&mdsc->mutex);
|
|
||||||
|
|
||||||
wait_event(mdsc->cap_flushing_wq,
|
wait_event(mdsc->cap_flushing_wq,
|
||||||
check_caps_flush(mdsc, want_flush_tid));
|
check_caps_flush(mdsc, want_flush_tid));
|
||||||
@ -3584,7 +3523,7 @@ restart:
|
|||||||
|
|
||||||
void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
||||||
{
|
{
|
||||||
u64 want_tid, want_flush, want_snap;
|
u64 want_tid, want_flush;
|
||||||
|
|
||||||
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||||
return;
|
return;
|
||||||
@ -3599,15 +3538,11 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
|||||||
want_flush = mdsc->last_cap_flush_tid;
|
want_flush = mdsc->last_cap_flush_tid;
|
||||||
spin_unlock(&mdsc->cap_dirty_lock);
|
spin_unlock(&mdsc->cap_dirty_lock);
|
||||||
|
|
||||||
down_read(&mdsc->snap_rwsem);
|
dout("sync want tid %lld flush_seq %lld\n",
|
||||||
want_snap = mdsc->last_snap_seq;
|
want_tid, want_flush);
|
||||||
up_read(&mdsc->snap_rwsem);
|
|
||||||
|
|
||||||
dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
|
|
||||||
want_tid, want_flush, want_snap);
|
|
||||||
|
|
||||||
wait_unsafe_requests(mdsc, want_tid);
|
wait_unsafe_requests(mdsc, want_tid);
|
||||||
wait_caps_flush(mdsc, want_flush, want_snap);
|
wait_caps_flush(mdsc, want_flush);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -152,7 +152,6 @@ struct ceph_mds_session {
|
|||||||
|
|
||||||
/* protected by mutex */
|
/* protected by mutex */
|
||||||
struct list_head s_cap_flushing; /* inodes w/ flushing caps */
|
struct list_head s_cap_flushing; /* inodes w/ flushing caps */
|
||||||
struct list_head s_cap_snaps_flushing;
|
|
||||||
unsigned long s_renew_requested; /* last time we sent a renew req */
|
unsigned long s_renew_requested; /* last time we sent a renew req */
|
||||||
u64 s_renew_seq;
|
u64 s_renew_seq;
|
||||||
|
|
||||||
|
@ -520,9 +520,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
|
|||||||
ihold(inode);
|
ihold(inode);
|
||||||
|
|
||||||
atomic_set(&capsnap->nref, 1);
|
atomic_set(&capsnap->nref, 1);
|
||||||
capsnap->ci = ci;
|
|
||||||
INIT_LIST_HEAD(&capsnap->ci_item);
|
INIT_LIST_HEAD(&capsnap->ci_item);
|
||||||
INIT_LIST_HEAD(&capsnap->flushing_item);
|
|
||||||
|
|
||||||
capsnap->follows = old_snapc->seq;
|
capsnap->follows = old_snapc->seq;
|
||||||
capsnap->issued = __ceph_caps_issued(ci, NULL);
|
capsnap->issued = __ceph_caps_issued(ci, NULL);
|
||||||
@ -800,7 +798,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
|
|||||||
ihold(inode);
|
ihold(inode);
|
||||||
spin_unlock(&mdsc->snap_flush_lock);
|
spin_unlock(&mdsc->snap_flush_lock);
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
__ceph_flush_snaps(ci, &session, 0);
|
__ceph_flush_snaps(ci, &session);
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
iput(inode);
|
iput(inode);
|
||||||
spin_lock(&mdsc->snap_flush_lock);
|
spin_lock(&mdsc->snap_flush_lock);
|
||||||
|
@ -147,6 +147,13 @@ struct ceph_cap {
|
|||||||
#define CHECK_CAPS_AUTHONLY 2 /* only check auth cap */
|
#define CHECK_CAPS_AUTHONLY 2 /* only check auth cap */
|
||||||
#define CHECK_CAPS_FLUSH 4 /* flush any dirty caps */
|
#define CHECK_CAPS_FLUSH 4 /* flush any dirty caps */
|
||||||
|
|
||||||
|
struct ceph_cap_flush {
|
||||||
|
u64 tid;
|
||||||
|
int caps; /* 0 means capsnap */
|
||||||
|
struct list_head g_list; // global
|
||||||
|
struct list_head i_list; // per inode
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Snapped cap state that is pending flush to mds. When a snapshot occurs,
|
* Snapped cap state that is pending flush to mds. When a snapshot occurs,
|
||||||
* we first complete any in-process sync writes and writeback any dirty
|
* we first complete any in-process sync writes and writeback any dirty
|
||||||
@ -154,10 +161,11 @@ struct ceph_cap {
|
|||||||
*/
|
*/
|
||||||
struct ceph_cap_snap {
|
struct ceph_cap_snap {
|
||||||
atomic_t nref;
|
atomic_t nref;
|
||||||
struct ceph_inode_info *ci;
|
struct list_head ci_item;
|
||||||
struct list_head ci_item, flushing_item;
|
|
||||||
|
|
||||||
u64 follows, flush_tid;
|
struct ceph_cap_flush cap_flush;
|
||||||
|
|
||||||
|
u64 follows;
|
||||||
int issued, dirty;
|
int issued, dirty;
|
||||||
struct ceph_snap_context *context;
|
struct ceph_snap_context *context;
|
||||||
|
|
||||||
@ -186,13 +194,6 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ceph_cap_flush {
|
|
||||||
u64 tid;
|
|
||||||
int caps;
|
|
||||||
struct list_head g_list; // global
|
|
||||||
struct list_head i_list; // per inode
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The frag tree describes how a directory is fragmented, potentially across
|
* The frag tree describes how a directory is fragmented, potentially across
|
||||||
* multiple metadata servers. It is also used to indicate points where
|
* multiple metadata servers. It is also used to indicate points where
|
||||||
@ -888,8 +889,7 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
|
|||||||
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
||||||
struct ceph_snap_context *snapc);
|
struct ceph_snap_context *snapc);
|
||||||
extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
||||||
struct ceph_mds_session **psession,
|
struct ceph_mds_session **psession);
|
||||||
int again);
|
|
||||||
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
|
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
|
||||||
struct ceph_mds_session *session);
|
struct ceph_mds_session *session);
|
||||||
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
|
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
|
||||||
|
Loading…
Reference in New Issue
Block a user