xfs: replace i_flock with a sleeping bitlock
We almost never block on i_flock, the exception is synchronous inode flushing. Instead of bloating the inode with a 16/24-byte completion that we abuse as a semaphore just implement it as a bitlock that uses a bit waitqueue for the rare sleeping path. This primarily is a tradeoff between a much smaller inode and a faster non-blocking path vs faster wakeups, and we are much better off with the former. A small downside is that we will lose lockdep checking for i_flock, but given that it's always taken inside the ilock that should be acceptable. Note that for example the inode writeback locking is implemented in a very similar way. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
parent
49e4c70e52
commit
474fce0675
@ -77,7 +77,7 @@ xfs_inode_alloc(
|
||||
|
||||
ASSERT(atomic_read(&ip->i_pincount) == 0);
|
||||
ASSERT(!spin_is_locked(&ip->i_flags_lock));
|
||||
ASSERT(completion_done(&ip->i_flush));
|
||||
ASSERT(!xfs_isiflocked(ip));
|
||||
ASSERT(ip->i_ino == 0);
|
||||
|
||||
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
|
||||
@ -150,7 +150,7 @@ xfs_inode_free(
|
||||
/* asserts to verify all state is correct here */
|
||||
ASSERT(atomic_read(&ip->i_pincount) == 0);
|
||||
ASSERT(!spin_is_locked(&ip->i_flags_lock));
|
||||
ASSERT(completion_done(&ip->i_flush));
|
||||
ASSERT(!xfs_isiflocked(ip));
|
||||
|
||||
/*
|
||||
* Because we use RCU freeing we need to ensure the inode always
|
||||
@ -713,3 +713,19 @@ xfs_isilocked(
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
__xfs_iflock(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
|
||||
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
|
||||
|
||||
do {
|
||||
prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
|
||||
if (xfs_isiflocked(ip))
|
||||
io_schedule();
|
||||
} while (!xfs_iflock_nowait(ip));
|
||||
|
||||
finish_wait(wq, &wait.wait);
|
||||
}
|
||||
|
@ -2396,7 +2396,7 @@ xfs_iflush(
|
||||
XFS_STATS_INC(xs_iflush_count);
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
|
||||
ASSERT(!completion_done(&ip->i_flush));
|
||||
ASSERT(xfs_isiflocked(ip));
|
||||
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
||||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
|
||||
|
||||
@ -2512,7 +2512,7 @@ xfs_iflush_int(
|
||||
#endif
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
|
||||
ASSERT(!completion_done(&ip->i_flush));
|
||||
ASSERT(xfs_isiflocked(ip));
|
||||
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
||||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
|
||||
|
||||
|
@ -237,7 +237,6 @@ typedef struct xfs_inode {
|
||||
struct xfs_inode_log_item *i_itemp; /* logging information */
|
||||
mrlock_t i_lock; /* inode lock */
|
||||
mrlock_t i_iolock; /* inode IO lock */
|
||||
struct completion i_flush; /* inode flush completion q */
|
||||
atomic_t i_pincount; /* inode pin count */
|
||||
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
|
||||
spinlock_t i_flags_lock; /* inode i_flags lock */
|
||||
@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int
|
||||
xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
ret = ip->i_flags & flags;
|
||||
if (!ret)
|
||||
ip->i_flags |= flags;
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Project quota id helpers (previously projid was 16bit only
|
||||
* and using two 16bit values to hold new 32bit projid was chosen
|
||||
@ -343,36 +355,18 @@ xfs_set_projid(struct xfs_inode *ip,
|
||||
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Manage the i_flush queue embedded in the inode. This completion
|
||||
* queue synchronizes processes attempting to flush the in-core
|
||||
* inode back to disk.
|
||||
*/
|
||||
static inline void xfs_iflock(xfs_inode_t *ip)
|
||||
{
|
||||
wait_for_completion(&ip->i_flush);
|
||||
}
|
||||
|
||||
static inline int xfs_iflock_nowait(xfs_inode_t *ip)
|
||||
{
|
||||
return try_wait_for_completion(&ip->i_flush);
|
||||
}
|
||||
|
||||
static inline void xfs_ifunlock(xfs_inode_t *ip)
|
||||
{
|
||||
complete(&ip->i_flush);
|
||||
}
|
||||
|
||||
/*
|
||||
* In-core inode flags.
|
||||
*/
|
||||
#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
|
||||
#define XFS_ISTALE 0x0002 /* inode has been staled */
|
||||
#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
|
||||
#define XFS_INEW 0x0008 /* inode has just been allocated */
|
||||
#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
|
||||
#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
|
||||
#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
|
||||
#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
|
||||
#define XFS_ISTALE (1 << 1) /* inode has been staled */
|
||||
#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
|
||||
#define XFS_INEW (1 << 3) /* inode has just been allocated */
|
||||
#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
|
||||
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
|
||||
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
|
||||
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
|
||||
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
|
||||
|
||||
/*
|
||||
* Per-lifetime flags need to be reset when re-using a reclaimable inode during
|
||||
@ -384,6 +378,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
|
||||
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
|
||||
XFS_IFILESTREAM);
|
||||
|
||||
/*
|
||||
* Synchronize processes attempting to flush the in-core inode back to disk.
|
||||
*/
|
||||
|
||||
extern void __xfs_iflock(struct xfs_inode *ip);
|
||||
|
||||
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
|
||||
{
|
||||
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
|
||||
}
|
||||
|
||||
static inline void xfs_iflock(struct xfs_inode *ip)
|
||||
{
|
||||
if (!xfs_iflock_nowait(ip))
|
||||
__xfs_iflock(ip);
|
||||
}
|
||||
|
||||
static inline void xfs_ifunlock(struct xfs_inode *ip)
|
||||
{
|
||||
xfs_iflags_clear(ip, XFS_IFLOCK);
|
||||
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
|
||||
}
|
||||
|
||||
static inline int xfs_isiflocked(struct xfs_inode *ip)
|
||||
{
|
||||
return xfs_iflags_test(ip, XFS_IFLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flags for inode locking.
|
||||
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
|
||||
|
@ -717,7 +717,7 @@ xfs_inode_item_pushbuf(
|
||||
* If a flush is not in progress anymore, chances are that the
|
||||
* inode was taken off the AIL. So, just get out.
|
||||
*/
|
||||
if (completion_done(&ip->i_flush) ||
|
||||
if (!xfs_isiflocked(ip) ||
|
||||
!(lip->li_flags & XFS_LI_IN_AIL)) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
return true;
|
||||
@ -750,7 +750,7 @@ xfs_inode_item_push(
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
|
||||
ASSERT(!completion_done(&ip->i_flush));
|
||||
ASSERT(xfs_isiflocked(ip));
|
||||
|
||||
/*
|
||||
* Since we were able to lock the inode's flush lock and
|
||||
|
@ -829,13 +829,6 @@ xfs_fs_inode_init_once(
|
||||
atomic_set(&ip->i_pincount, 0);
|
||||
spin_lock_init(&ip->i_flags_lock);
|
||||
init_waitqueue_head(&ip->i_ipin_wait);
|
||||
/*
|
||||
* Because we want to use a counting completion, complete
|
||||
* the flush completion once to allow a single access to
|
||||
* the flush completion without blocking.
|
||||
*/
|
||||
init_completion(&ip->i_flush);
|
||||
complete(&ip->i_flush);
|
||||
|
||||
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
|
||||
"xfsino", ip->i_ino);
|
||||
|
@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* do some unlocked checks first to avoid unnecessary lock traffic.
|
||||
* The first is a flush lock check, the second is a already in reclaim
|
||||
* check. Only do these checks if we are not going to block on locks.
|
||||
* If we are asked for non-blocking operation, do unlocked checks to
|
||||
* see if the inode already is being flushed or in reclaim to avoid
|
||||
* lock traffic.
|
||||
*/
|
||||
if ((flags & SYNC_TRYLOCK) &&
|
||||
(!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
|
||||
__xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The radix tree lock here protects a thread in xfs_iget from racing
|
||||
|
Loading…
Reference in New Issue
Block a user