[XFS] Fix race when looking up reclaimable inodes

If we get a race looking up a reclaimable inode, we can end up with the
winner proceeding to use the inode before it has been completely
re-initialised. This is a Bad Thing.

Fix the race by checking whether we are still initialising the inod eonce
we have a reference to it, and if so wait for the initialisation to
complete before continuing.

While there, fix a leaked reference count in the same code when
encountering an unlinked inode and we are not doing a lookup for a create
operation.

SGI-PV: 987246

SGI-Modid: xfs-linux-melb:xfs-kern:32429a

Signed-off-by: David Chinner <david@fromorbit.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
This commit is contained in:
David Chinner 2008-10-30 18:32:43 +11:00 committed by Lachlan McIlroy
parent e0b8e8b65d
commit 6bfb3d065f
2 changed files with 23 additions and 10 deletions

View File

@ -77,6 +77,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/writeback.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/div64.h> #include <asm/div64.h>

View File

@ -52,7 +52,7 @@ xfs_iget_cache_hit(
int lock_flags) __releases(pag->pag_ici_lock) int lock_flags) __releases(pag->pag_ici_lock)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
int error = 0; int error = EAGAIN;
/* /*
* If INEW is set this inode is being set up * If INEW is set this inode is being set up
@ -60,7 +60,6 @@ xfs_iget_cache_hit(
* Pause and try again. * Pause and try again.
*/ */
if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
error = EAGAIN;
XFS_STATS_INC(xs_ig_frecycle); XFS_STATS_INC(xs_ig_frecycle);
goto out_error; goto out_error;
} }
@ -73,7 +72,6 @@ xfs_iget_cache_hit(
* error immediately so we don't remove it from the reclaim * error immediately so we don't remove it from the reclaim
* list and potentially leak the inode. * list and potentially leak the inode.
*/ */
if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
error = ENOENT; error = ENOENT;
goto out_error; goto out_error;
@ -91,27 +89,42 @@ xfs_iget_cache_hit(
error = ENOMEM; error = ENOMEM;
goto out_error; goto out_error;
} }
/*
* We must set the XFS_INEW flag before clearing the
* XFS_IRECLAIMABLE flag so that if a racing lookup does
* not find the XFS_IRECLAIMABLE above but has the igrab()
* below succeed we can safely check XFS_INEW to detect
* that this inode is still being initialised.
*/
xfs_iflags_set(ip, XFS_INEW); xfs_iflags_set(ip, XFS_INEW);
xfs_iflags_clear(ip, XFS_IRECLAIMABLE); xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
/* clear the radix tree reclaim flag as well. */ /* clear the radix tree reclaim flag as well. */
__xfs_inode_clear_reclaim_tag(mp, pag, ip); __xfs_inode_clear_reclaim_tag(mp, pag, ip);
read_unlock(&pag->pag_ici_lock);
} else if (!igrab(VFS_I(ip))) { } else if (!igrab(VFS_I(ip))) {
/* If the VFS inode is being torn down, pause and try again. */ /* If the VFS inode is being torn down, pause and try again. */
error = EAGAIN;
XFS_STATS_INC(xs_ig_frecycle); XFS_STATS_INC(xs_ig_frecycle);
goto out_error; goto out_error;
} else { } else if (xfs_iflags_test(ip, XFS_INEW)) {
/* we've got a live one */ /*
read_unlock(&pag->pag_ici_lock); * We are racing with another cache hit that is
* currently recycling this inode out of the XFS_IRECLAIMABLE
* state. Wait for the initialisation to complete before
* continuing.
*/
wait_on_inode(VFS_I(ip));
} }
if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
error = ENOENT; error = ENOENT;
goto out; iput(VFS_I(ip));
goto out_error;
} }
/* We've got a live one. */
read_unlock(&pag->pag_ici_lock);
if (lock_flags != 0) if (lock_flags != 0)
xfs_ilock(ip, lock_flags); xfs_ilock(ip, lock_flags);
@ -122,7 +135,6 @@ xfs_iget_cache_hit(
out_error: out_error:
read_unlock(&pag->pag_ici_lock); read_unlock(&pag->pag_ici_lock);
out:
return error; return error;
} }