ceph: use I_COMPLETE inode flag instead of D_COMPLETE flag
commit c6ffe10015
moved the flag that tracks if the dcache contents
for a directory are complete to dentry. The problem is there are
lots of places that use ceph_dir_{set,clear,test}_complete() while
holding i_ceph_lock. but ceph_dir_{set,clear,test}_complete() may
sleep because they call dput().
This patch basically reverts that commit. For ceph_d_prune(), it's
called with both the dentry to prune and the parent dentry are
locked. So it's safe to access the parent dentry's d_inode and
clear I_COMPLETE flag.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
This commit is contained in:
parent
964266cce9
commit
a8673d61ad
@ -490,15 +490,17 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
|
|||||||
ci->i_rdcache_gen++;
|
ci->i_rdcache_gen++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if we are newly issued FILE_SHARED, clear D_COMPLETE; we
|
* if we are newly issued FILE_SHARED, clear I_COMPLETE; we
|
||||||
* don't know what happened to this directory while we didn't
|
* don't know what happened to this directory while we didn't
|
||||||
* have the cap.
|
* have the cap.
|
||||||
*/
|
*/
|
||||||
if ((issued & CEPH_CAP_FILE_SHARED) &&
|
if ((issued & CEPH_CAP_FILE_SHARED) &&
|
||||||
(had & CEPH_CAP_FILE_SHARED) == 0) {
|
(had & CEPH_CAP_FILE_SHARED) == 0) {
|
||||||
ci->i_shared_gen++;
|
ci->i_shared_gen++;
|
||||||
if (S_ISDIR(ci->vfs_inode.i_mode))
|
if (S_ISDIR(ci->vfs_inode.i_mode)) {
|
||||||
ceph_dir_clear_complete(&ci->vfs_inode);
|
dout(" marking %p NOT complete\n", &ci->vfs_inode);
|
||||||
|
ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,7 +107,7 @@ static unsigned fpos_off(loff_t p)
|
|||||||
* falling back to a "normal" sync readdir if any dentries in the dir
|
* falling back to a "normal" sync readdir if any dentries in the dir
|
||||||
* are dropped.
|
* are dropped.
|
||||||
*
|
*
|
||||||
* D_COMPLETE tells indicates we have all dentries in the dir. It is
|
* I_COMPLETE tells indicates we have all dentries in the dir. It is
|
||||||
* defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
|
* defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
|
||||||
* the MDS if/when the directory is modified).
|
* the MDS if/when the directory is modified).
|
||||||
*/
|
*/
|
||||||
@ -198,8 +198,8 @@ more:
|
|||||||
filp->f_pos++;
|
filp->f_pos++;
|
||||||
|
|
||||||
/* make sure a dentry wasn't dropped while we didn't have parent lock */
|
/* make sure a dentry wasn't dropped while we didn't have parent lock */
|
||||||
if (!ceph_dir_test_complete(dir)) {
|
if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
|
||||||
dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
|
dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
|
||||||
err = -EAGAIN;
|
err = -EAGAIN;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -284,7 +284,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
|||||||
if ((filp->f_pos == 2 || fi->dentry) &&
|
if ((filp->f_pos == 2 || fi->dentry) &&
|
||||||
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
|
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
|
||||||
ceph_snap(inode) != CEPH_SNAPDIR &&
|
ceph_snap(inode) != CEPH_SNAPDIR &&
|
||||||
ceph_dir_test_complete(inode) &&
|
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
|
||||||
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
|
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
err = __dcache_readdir(filp, dirent, filldir);
|
err = __dcache_readdir(filp, dirent, filldir);
|
||||||
@ -350,7 +350,7 @@ more:
|
|||||||
|
|
||||||
if (!req->r_did_prepopulate) {
|
if (!req->r_did_prepopulate) {
|
||||||
dout("readdir !did_prepopulate");
|
dout("readdir !did_prepopulate");
|
||||||
fi->dir_release_count--; /* preclude D_COMPLETE */
|
fi->dir_release_count--; /* preclude I_COMPLETE */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* note next offset and last dentry name */
|
/* note next offset and last dentry name */
|
||||||
@ -429,7 +429,8 @@ more:
|
|||||||
*/
|
*/
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
if (ci->i_release_count == fi->dir_release_count) {
|
if (ci->i_release_count == fi->dir_release_count) {
|
||||||
ceph_dir_set_complete(inode);
|
dout(" marking %p complete\n", inode);
|
||||||
|
ci->i_ceph_flags |= CEPH_I_COMPLETE;
|
||||||
ci->i_max_offset = filp->f_pos;
|
ci->i_max_offset = filp->f_pos;
|
||||||
}
|
}
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
@ -604,7 +605,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
|
|||||||
fsc->mount_options->snapdir_name,
|
fsc->mount_options->snapdir_name,
|
||||||
dentry->d_name.len) &&
|
dentry->d_name.len) &&
|
||||||
!is_root_ceph_dentry(dir, dentry) &&
|
!is_root_ceph_dentry(dir, dentry) &&
|
||||||
ceph_dir_test_complete(dir) &&
|
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
|
||||||
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
|
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
dout(" dir %p complete, -ENOENT\n", dir);
|
dout(" dir %p complete, -ENOENT\n", dir);
|
||||||
@ -908,7 +909,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* d_move screws up d_subdirs order */
|
/* d_move screws up d_subdirs order */
|
||||||
ceph_dir_clear_complete(new_dir);
|
ceph_i_clear(new_dir, CEPH_I_COMPLETE);
|
||||||
|
|
||||||
d_move(old_dentry, new_dentry);
|
d_move(old_dentry, new_dentry);
|
||||||
|
|
||||||
@ -1064,44 +1065,6 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Set/clear/test dir complete flag on the dir's dentry.
|
|
||||||
*/
|
|
||||||
void ceph_dir_set_complete(struct inode *inode)
|
|
||||||
{
|
|
||||||
struct dentry *dentry = d_find_any_alias(inode);
|
|
||||||
|
|
||||||
if (dentry && ceph_dentry(dentry) &&
|
|
||||||
ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) {
|
|
||||||
dout(" marking %p (%p) complete\n", inode, dentry);
|
|
||||||
set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
|
|
||||||
}
|
|
||||||
dput(dentry);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ceph_dir_clear_complete(struct inode *inode)
|
|
||||||
{
|
|
||||||
struct dentry *dentry = d_find_any_alias(inode);
|
|
||||||
|
|
||||||
if (dentry && ceph_dentry(dentry)) {
|
|
||||||
dout(" marking %p (%p) complete\n", inode, dentry);
|
|
||||||
set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
|
|
||||||
}
|
|
||||||
dput(dentry);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ceph_dir_test_complete(struct inode *inode)
|
|
||||||
{
|
|
||||||
struct dentry *dentry = d_find_any_alias(inode);
|
|
||||||
|
|
||||||
if (dentry && ceph_dentry(dentry)) {
|
|
||||||
dout(" marking %p (%p) NOT complete\n", inode, dentry);
|
|
||||||
clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
|
|
||||||
}
|
|
||||||
dput(dentry);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When the VFS prunes a dentry from the cache, we need to clear the
|
* When the VFS prunes a dentry from the cache, we need to clear the
|
||||||
* complete flag on the parent directory.
|
* complete flag on the parent directory.
|
||||||
@ -1110,15 +1073,13 @@ bool ceph_dir_test_complete(struct inode *inode)
|
|||||||
*/
|
*/
|
||||||
static void ceph_d_prune(struct dentry *dentry)
|
static void ceph_d_prune(struct dentry *dentry)
|
||||||
{
|
{
|
||||||
struct ceph_dentry_info *di;
|
|
||||||
|
|
||||||
dout("ceph_d_prune %p\n", dentry);
|
dout("ceph_d_prune %p\n", dentry);
|
||||||
|
|
||||||
/* do we have a valid parent? */
|
/* do we have a valid parent? */
|
||||||
if (IS_ROOT(dentry))
|
if (IS_ROOT(dentry))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* if we are not hashed, we don't affect D_COMPLETE */
|
/* if we are not hashed, we don't affect I_COMPLETE */
|
||||||
if (d_unhashed(dentry))
|
if (d_unhashed(dentry))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1126,8 +1087,7 @@ static void ceph_d_prune(struct dentry *dentry)
|
|||||||
* we hold d_lock, so d_parent is stable, and d_fsdata is never
|
* we hold d_lock, so d_parent is stable, and d_fsdata is never
|
||||||
* cleared until d_release
|
* cleared until d_release
|
||||||
*/
|
*/
|
||||||
di = ceph_dentry(dentry->d_parent);
|
ceph_i_clear(dentry->d_parent->d_inode, CEPH_I_COMPLETE);
|
||||||
clear_bit(CEPH_D_COMPLETE, &di->flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -561,7 +561,6 @@ static int fill_inode(struct inode *inode,
|
|||||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||||
int i;
|
int i;
|
||||||
int issued = 0, implemented;
|
int issued = 0, implemented;
|
||||||
int updating_inode = 0;
|
|
||||||
struct timespec mtime, atime, ctime;
|
struct timespec mtime, atime, ctime;
|
||||||
u32 nsplits;
|
u32 nsplits;
|
||||||
struct ceph_buffer *xattr_blob = NULL;
|
struct ceph_buffer *xattr_blob = NULL;
|
||||||
@ -601,7 +600,6 @@ static int fill_inode(struct inode *inode,
|
|||||||
(ci->i_version & ~1) >= le64_to_cpu(info->version))
|
(ci->i_version & ~1) >= le64_to_cpu(info->version))
|
||||||
goto no_change;
|
goto no_change;
|
||||||
|
|
||||||
updating_inode = 1;
|
|
||||||
issued = __ceph_caps_issued(ci, &implemented);
|
issued = __ceph_caps_issued(ci, &implemented);
|
||||||
issued |= implemented | __ceph_caps_dirty(ci);
|
issued |= implemented | __ceph_caps_dirty(ci);
|
||||||
|
|
||||||
@ -717,6 +715,17 @@ static int fill_inode(struct inode *inode,
|
|||||||
ceph_vinop(inode), inode->i_mode);
|
ceph_vinop(inode), inode->i_mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* set dir completion flag? */
|
||||||
|
if (S_ISDIR(inode->i_mode) &&
|
||||||
|
ci->i_files == 0 && ci->i_subdirs == 0 &&
|
||||||
|
ceph_snap(inode) == CEPH_NOSNAP &&
|
||||||
|
(le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
|
||||||
|
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
|
||||||
|
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
|
||||||
|
dout(" marking %p complete (empty)\n", inode);
|
||||||
|
ci->i_ceph_flags |= CEPH_I_COMPLETE;
|
||||||
|
ci->i_max_offset = 2;
|
||||||
|
}
|
||||||
no_change:
|
no_change:
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
|
|
||||||
@ -767,19 +776,6 @@ no_change:
|
|||||||
__ceph_get_fmode(ci, cap_fmode);
|
__ceph_get_fmode(ci, cap_fmode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set dir completion flag? */
|
|
||||||
if (S_ISDIR(inode->i_mode) &&
|
|
||||||
updating_inode && /* didn't jump to no_change */
|
|
||||||
ci->i_files == 0 && ci->i_subdirs == 0 &&
|
|
||||||
ceph_snap(inode) == CEPH_NOSNAP &&
|
|
||||||
(le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
|
|
||||||
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
|
|
||||||
!ceph_dir_test_complete(inode)) {
|
|
||||||
dout(" marking %p complete (empty)\n", inode);
|
|
||||||
ceph_dir_set_complete(inode);
|
|
||||||
ci->i_max_offset = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update delegation info? */
|
/* update delegation info? */
|
||||||
if (dirinfo)
|
if (dirinfo)
|
||||||
ceph_fill_dirfrag(inode, dirinfo);
|
ceph_fill_dirfrag(inode, dirinfo);
|
||||||
@ -861,7 +857,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
|
|||||||
di = ceph_dentry(dn);
|
di = ceph_dentry(dn);
|
||||||
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
if (!ceph_dir_test_complete(inode)) {
|
if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1066,7 +1062,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
|
|||||||
* d_move() puts the renamed dentry at the end of
|
* d_move() puts the renamed dentry at the end of
|
||||||
* d_subdirs. We need to assign it an appropriate
|
* d_subdirs. We need to assign it an appropriate
|
||||||
* directory offset so we can behave when holding
|
* directory offset so we can behave when holding
|
||||||
* D_COMPLETE.
|
* I_COMPLETE.
|
||||||
*/
|
*/
|
||||||
ceph_set_dentry_offset(req->r_old_dentry);
|
ceph_set_dentry_offset(req->r_old_dentry);
|
||||||
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
|
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
|
||||||
|
@ -2029,7 +2029,7 @@ out:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS
|
* Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS
|
||||||
* namespace request.
|
* namespace request.
|
||||||
*/
|
*/
|
||||||
void ceph_invalidate_dir_request(struct ceph_mds_request *req)
|
void ceph_invalidate_dir_request(struct ceph_mds_request *req)
|
||||||
@ -2037,9 +2037,9 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
|
|||||||
struct inode *inode = req->r_locked_dir;
|
struct inode *inode = req->r_locked_dir;
|
||||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||||
|
|
||||||
dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
|
dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode);
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
ceph_dir_clear_complete(inode);
|
ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
|
||||||
ci->i_release_count++;
|
ci->i_release_count++;
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
|
|
||||||
|
@ -204,7 +204,6 @@ struct ceph_inode_xattr {
|
|||||||
* Ceph dentry state
|
* Ceph dentry state
|
||||||
*/
|
*/
|
||||||
struct ceph_dentry_info {
|
struct ceph_dentry_info {
|
||||||
unsigned long flags;
|
|
||||||
struct ceph_mds_session *lease_session;
|
struct ceph_mds_session *lease_session;
|
||||||
u32 lease_gen, lease_shared_gen;
|
u32 lease_gen, lease_shared_gen;
|
||||||
u32 lease_seq;
|
u32 lease_seq;
|
||||||
@ -215,18 +214,6 @@ struct ceph_dentry_info {
|
|||||||
u64 offset;
|
u64 offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* dentry flags
|
|
||||||
*
|
|
||||||
* The locking for D_COMPLETE is a bit odd:
|
|
||||||
* - we can clear it at almost any time (see ceph_d_prune)
|
|
||||||
* - it is only meaningful if:
|
|
||||||
* - we hold dir inode i_ceph_lock
|
|
||||||
* - we hold dir FILE_SHARED caps
|
|
||||||
* - the dentry D_COMPLETE is set
|
|
||||||
*/
|
|
||||||
#define CEPH_D_COMPLETE 1 /* if set, d_u.d_subdirs is complete directory */
|
|
||||||
|
|
||||||
struct ceph_inode_xattrs_info {
|
struct ceph_inode_xattrs_info {
|
||||||
/*
|
/*
|
||||||
* (still encoded) xattr blob. we avoid the overhead of parsing
|
* (still encoded) xattr blob. we avoid the overhead of parsing
|
||||||
@ -267,7 +254,7 @@ struct ceph_inode_info {
|
|||||||
struct timespec i_rctime;
|
struct timespec i_rctime;
|
||||||
u64 i_rbytes, i_rfiles, i_rsubdirs;
|
u64 i_rbytes, i_rfiles, i_rsubdirs;
|
||||||
u64 i_files, i_subdirs;
|
u64 i_files, i_subdirs;
|
||||||
u64 i_max_offset; /* largest readdir offset, set with D_COMPLETE */
|
u64 i_max_offset; /* largest readdir offset, set with I_COMPLETE */
|
||||||
|
|
||||||
struct rb_root i_fragtree;
|
struct rb_root i_fragtree;
|
||||||
struct mutex i_fragtree_mutex;
|
struct mutex i_fragtree_mutex;
|
||||||
@ -432,6 +419,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
|
|||||||
/*
|
/*
|
||||||
* Ceph inode.
|
* Ceph inode.
|
||||||
*/
|
*/
|
||||||
|
#define CEPH_I_COMPLETE 1 /* we have complete directory cached */
|
||||||
#define CEPH_I_NODELAY 4 /* do not delay cap release */
|
#define CEPH_I_NODELAY 4 /* do not delay cap release */
|
||||||
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
|
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
|
||||||
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
|
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
|
||||||
@ -488,13 +476,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
|
|||||||
return ((loff_t)frag << 32) | (loff_t)off;
|
return ((loff_t)frag << 32) | (loff_t)off;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* set/clear directory D_COMPLETE flag
|
|
||||||
*/
|
|
||||||
void ceph_dir_set_complete(struct inode *inode);
|
|
||||||
void ceph_dir_clear_complete(struct inode *inode);
|
|
||||||
bool ceph_dir_test_complete(struct inode *inode);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* caps helpers
|
* caps helpers
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user