Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
Pull XFS update from Ben Myers: - Removal of xfsbufd - Background CIL flushes have been moved to a workqueue. - Fix to xfs_check_page_type applicable to filesystems where blocksize < page size - Fix for stale data exposure when extsize hints are used. - A series of xfs_buf cache cleanups. - Fix for XFS_IOC_ALLOCSP - Cleanups for includes and removal of xfs_lrw.[ch]. - Moved all busy extent handling to it's own file so that it is easier to merge with userspace. - Fix for log mount failure. - Fix to enable inode reclaim during quotacheck at mount time. - Fix for delalloc quota accounting. - Fix for memory reclaim deadlock on agi buffer. - Fixes for failed writes and to clean up stale delalloc blocks. - Fix to use GFP_NOFS in blkdev_issue_flush - SEEK_DATA/SEEK_HOLE support * 'for-linus' of git://oss.sgi.com/xfs/xfs: (57 commits) xfs: add trace points for log forces xfs: fix memory reclaim deadlock on agi buffer xfs: fix delalloc quota accounting on failure xfs: protect xfs_sync_worker with s_umount semaphore xfs: introduce SEEK_DATA/SEEK_HOLE support xfs: make xfs_extent_busy_trim not static xfs: make XBF_MAPPED the default behaviour xfs: flush outstanding buffers on log mount failure xfs: Properly exclude IO type flags from buffer flags xfs: clean up xfs_bit.h includes xfs: move xfs_do_force_shutdown() and kill xfs_rw.c xfs: move xfs_get_extsz_hint() and kill xfs_rw.h xfs: move xfs_fsb_to_db to xfs_bmap.h xfs: clean up busy extent naming xfs: move busy extent handling to it's own file xfs: move xfsagino_t to xfs_types.h xfs: use iolock on XFS_IOC_ALLOCSP calls xfs: kill XBF_DONTBLOCK xfs: kill xfs_read_buf() xfs: kill XBF_LOCK ...
This commit is contained in:
commit
9978306e31
@ -7623,7 +7623,7 @@ XFS FILESYSTEM
|
||||
P: Silicon Graphics Inc
|
||||
M: Ben Myers <bpm@sgi.com>
|
||||
M: Alex Elder <elder@kernel.org>
|
||||
M: xfs-masters@oss.sgi.com
|
||||
M: xfs@oss.sgi.com
|
||||
L: xfs@oss.sgi.com
|
||||
W: http://oss.sgi.com/projects/xfs
|
||||
T: git git://oss.sgi.com/xfs/xfs.git
|
||||
|
@ -33,6 +33,7 @@ xfs-y += xfs_aops.o \
|
||||
xfs_discard.o \
|
||||
xfs_error.o \
|
||||
xfs_export.o \
|
||||
xfs_extent_busy.o \
|
||||
xfs_file.o \
|
||||
xfs_filestream.o \
|
||||
xfs_fsops.o \
|
||||
@ -49,7 +50,6 @@ xfs-y += xfs_aops.o \
|
||||
xfs_sync.o \
|
||||
xfs_xattr.o \
|
||||
xfs_rename.o \
|
||||
xfs_rw.o \
|
||||
xfs_utils.o \
|
||||
xfs_vnodeops.o \
|
||||
kmem.o \
|
||||
|
@ -174,24 +174,6 @@ typedef struct xfs_agfl {
|
||||
__be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */
|
||||
} xfs_agfl_t;
|
||||
|
||||
/*
|
||||
* Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that
|
||||
* have been freed but whose transactions aren't committed to disk yet.
|
||||
*
|
||||
* Note that we use the transaction ID to record the transaction, not the
|
||||
* transaction structure itself. See xfs_alloc_busy_insert() for details.
|
||||
*/
|
||||
struct xfs_busy_extent {
|
||||
struct rb_node rb_node; /* ag by-bno indexed search tree */
|
||||
struct list_head list; /* transaction busy extent list */
|
||||
xfs_agnumber_t agno;
|
||||
xfs_agblock_t bno;
|
||||
xfs_extlen_t length;
|
||||
unsigned int flags;
|
||||
#define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */
|
||||
#define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-ag incore structure, copies of information in agf and agi,
|
||||
* to improve the performance of allocation group selection.
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -32,6 +31,7 @@
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
@ -47,8 +47,6 @@ STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
|
||||
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
|
||||
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
|
||||
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
|
||||
STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
|
||||
xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
|
||||
|
||||
/*
|
||||
* Lookup the record equal to [bno, len] in the btree given by cur.
|
||||
@ -152,7 +150,7 @@ xfs_alloc_compute_aligned(
|
||||
xfs_extlen_t len;
|
||||
|
||||
/* Trim busy sections out of found extent */
|
||||
xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
|
||||
xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
|
||||
|
||||
if (args->alignment > 1 && len >= args->minlen) {
|
||||
xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
|
||||
@ -536,7 +534,7 @@ xfs_alloc_ag_vextent(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
|
||||
ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
|
||||
args->agbno, args->len));
|
||||
}
|
||||
|
||||
@ -603,7 +601,7 @@ xfs_alloc_ag_vextent_exact(
|
||||
/*
|
||||
* Check for overlapping busy extents.
|
||||
*/
|
||||
xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
|
||||
xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
|
||||
|
||||
/*
|
||||
* Give up if the start of the extent is busy, or the freespace isn't
|
||||
@ -1391,7 +1389,7 @@ xfs_alloc_ag_vextent_small(
|
||||
if (error)
|
||||
goto error0;
|
||||
if (fbno != NULLAGBLOCK) {
|
||||
xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
|
||||
xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
|
||||
args->userdata);
|
||||
|
||||
if (args->userdata) {
|
||||
@ -2496,579 +2494,8 @@ xfs_free_extent(
|
||||
|
||||
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
|
||||
if (!error)
|
||||
xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0);
|
||||
xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
|
||||
error0:
|
||||
xfs_perag_put(args.pag);
|
||||
return error;
|
||||
}
|
||||
|
||||
void
|
||||
xfs_alloc_busy_insert(
|
||||
struct xfs_trans *tp,
|
||||
xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno,
|
||||
xfs_extlen_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct xfs_busy_extent *new;
|
||||
struct xfs_busy_extent *busyp;
|
||||
struct xfs_perag *pag;
|
||||
struct rb_node **rbp;
|
||||
struct rb_node *parent = NULL;
|
||||
|
||||
new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
|
||||
if (!new) {
|
||||
/*
|
||||
* No Memory! Since it is now not possible to track the free
|
||||
* block, make this a synchronous transaction to insure that
|
||||
* the block is not reused before this transaction commits.
|
||||
*/
|
||||
trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
|
||||
xfs_trans_set_sync(tp);
|
||||
return;
|
||||
}
|
||||
|
||||
new->agno = agno;
|
||||
new->bno = bno;
|
||||
new->length = len;
|
||||
INIT_LIST_HEAD(&new->list);
|
||||
new->flags = flags;
|
||||
|
||||
/* trace before insert to be able to see failed inserts */
|
||||
trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
|
||||
|
||||
pag = xfs_perag_get(tp->t_mountp, new->agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
rbp = &pag->pagb_tree.rb_node;
|
||||
while (*rbp) {
|
||||
parent = *rbp;
|
||||
busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
|
||||
|
||||
if (new->bno < busyp->bno) {
|
||||
rbp = &(*rbp)->rb_left;
|
||||
ASSERT(new->bno + new->length <= busyp->bno);
|
||||
} else if (new->bno > busyp->bno) {
|
||||
rbp = &(*rbp)->rb_right;
|
||||
ASSERT(bno >= busyp->bno + busyp->length);
|
||||
} else {
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&new->rb_node, parent, rbp);
|
||||
rb_insert_color(&new->rb_node, &pag->pagb_tree);
|
||||
|
||||
list_add(&new->list, &tp->t_busy);
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for a busy extent within the range of the extent we are about to
|
||||
* allocate. You need to be holding the busy extent tree lock when calling
|
||||
* xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
|
||||
* extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
|
||||
* match. This is done so that a non-zero return indicates an overlap that
|
||||
* will require a synchronous transaction, but it can still be
|
||||
* used to distinguish between a partial or exact match.
|
||||
*/
|
||||
int
|
||||
xfs_alloc_busy_search(
|
||||
struct xfs_mount *mp,
|
||||
xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno,
|
||||
xfs_extlen_t len)
|
||||
{
|
||||
struct xfs_perag *pag;
|
||||
struct rb_node *rbp;
|
||||
struct xfs_busy_extent *busyp;
|
||||
int match = 0;
|
||||
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
|
||||
rbp = pag->pagb_tree.rb_node;
|
||||
|
||||
/* find closest start bno overlap */
|
||||
while (rbp) {
|
||||
busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node);
|
||||
if (bno < busyp->bno) {
|
||||
/* may overlap, but exact start block is lower */
|
||||
if (bno + len > busyp->bno)
|
||||
match = -1;
|
||||
rbp = rbp->rb_left;
|
||||
} else if (bno > busyp->bno) {
|
||||
/* may overlap, but exact start block is higher */
|
||||
if (bno < busyp->bno + busyp->length)
|
||||
match = -1;
|
||||
rbp = rbp->rb_right;
|
||||
} else {
|
||||
/* bno matches busyp, length determines exact match */
|
||||
match = (busyp->length == len) ? 1 : -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
return match;
|
||||
}
|
||||
|
||||
/*
|
||||
* The found free extent [fbno, fend] overlaps part or all of the given busy
|
||||
* extent. If the overlap covers the beginning, the end, or all of the busy
|
||||
* extent, the overlapping portion can be made unbusy and used for the
|
||||
* allocation. We can't split a busy extent because we can't modify a
|
||||
* transaction/CIL context busy list, but we can update an entries block
|
||||
* number or length.
|
||||
*
|
||||
* Returns true if the extent can safely be reused, or false if the search
|
||||
* needs to be restarted.
|
||||
*/
|
||||
STATIC bool
|
||||
xfs_alloc_busy_update_extent(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_busy_extent *busyp,
|
||||
xfs_agblock_t fbno,
|
||||
xfs_extlen_t flen,
|
||||
bool userdata)
|
||||
{
|
||||
xfs_agblock_t fend = fbno + flen;
|
||||
xfs_agblock_t bbno = busyp->bno;
|
||||
xfs_agblock_t bend = bbno + busyp->length;
|
||||
|
||||
/*
|
||||
* This extent is currently being discarded. Give the thread
|
||||
* performing the discard a chance to mark the extent unbusy
|
||||
* and retry.
|
||||
*/
|
||||
if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) {
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
delay(1);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a busy extent overlapping a user allocation, we have
|
||||
* no choice but to force the log and retry the search.
|
||||
*
|
||||
* Fortunately this does not happen during normal operation, but
|
||||
* only if the filesystem is very low on space and has to dip into
|
||||
* the AGFL for normal allocations.
|
||||
*/
|
||||
if (userdata)
|
||||
goto out_force_log;
|
||||
|
||||
if (bbno < fbno && bend > fend) {
|
||||
/*
|
||||
* Case 1:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +---------+
|
||||
* fbno fend
|
||||
*/
|
||||
|
||||
/*
|
||||
* We would have to split the busy extent to be able to track
|
||||
* it correct, which we cannot do because we would have to
|
||||
* modify the list of busy extents attached to the transaction
|
||||
* or CIL context, which is immutable.
|
||||
*
|
||||
* Force out the log to clear the busy extent and retry the
|
||||
* search.
|
||||
*/
|
||||
goto out_force_log;
|
||||
} else if (bbno >= fbno && bend <= fend) {
|
||||
/*
|
||||
* Case 2:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 3:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 4:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 5:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* The busy extent is fully covered by the extent we are
|
||||
* allocating, and can simply be removed from the rbtree.
|
||||
* However we cannot remove it from the immutable list
|
||||
* tracking busy extents in the transaction or CIL context,
|
||||
* so set the length to zero to mark it invalid.
|
||||
*
|
||||
* We also need to restart the busy extent search from the
|
||||
* tree root, because erasing the node can rearrange the
|
||||
* tree topology.
|
||||
*/
|
||||
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
||||
busyp->length = 0;
|
||||
return false;
|
||||
} else if (fend < bend) {
|
||||
/*
|
||||
* Case 6:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +---------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 7:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +------------------+
|
||||
* fbno fend
|
||||
*
|
||||
*/
|
||||
busyp->bno = fend;
|
||||
} else if (bbno < fbno) {
|
||||
/*
|
||||
* Case 8:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 9:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +----------------------+
|
||||
* fbno fend
|
||||
*/
|
||||
busyp->length = fbno - busyp->bno;
|
||||
} else {
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
|
||||
return true;
|
||||
|
||||
out_force_log:
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_log_force(mp, XFS_LOG_SYNC);
|
||||
trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* For a given extent [fbno, flen], make sure we can reuse it safely.
|
||||
*/
|
||||
void
|
||||
xfs_alloc_busy_reuse(
|
||||
struct xfs_mount *mp,
|
||||
xfs_agnumber_t agno,
|
||||
xfs_agblock_t fbno,
|
||||
xfs_extlen_t flen,
|
||||
bool userdata)
|
||||
{
|
||||
struct xfs_perag *pag;
|
||||
struct rb_node *rbp;
|
||||
|
||||
ASSERT(flen > 0);
|
||||
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
restart:
|
||||
rbp = pag->pagb_tree.rb_node;
|
||||
while (rbp) {
|
||||
struct xfs_busy_extent *busyp =
|
||||
rb_entry(rbp, struct xfs_busy_extent, rb_node);
|
||||
xfs_agblock_t bbno = busyp->bno;
|
||||
xfs_agblock_t bend = bbno + busyp->length;
|
||||
|
||||
if (fbno + flen <= bbno) {
|
||||
rbp = rbp->rb_left;
|
||||
continue;
|
||||
} else if (fbno >= bend) {
|
||||
rbp = rbp->rb_right;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
|
||||
userdata))
|
||||
goto restart;
|
||||
}
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given extent [fbno, flen], search the busy extent list to find a
|
||||
* subset of the extent that is not busy. If *rlen is smaller than
|
||||
* args->minlen no suitable extent could be found, and the higher level
|
||||
* code needs to force out the log and retry the allocation.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_alloc_busy_trim(
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_agblock_t bno,
|
||||
xfs_extlen_t len,
|
||||
xfs_agblock_t *rbno,
|
||||
xfs_extlen_t *rlen)
|
||||
{
|
||||
xfs_agblock_t fbno;
|
||||
xfs_extlen_t flen;
|
||||
struct rb_node *rbp;
|
||||
|
||||
ASSERT(len > 0);
|
||||
|
||||
spin_lock(&args->pag->pagb_lock);
|
||||
restart:
|
||||
fbno = bno;
|
||||
flen = len;
|
||||
rbp = args->pag->pagb_tree.rb_node;
|
||||
while (rbp && flen >= args->minlen) {
|
||||
struct xfs_busy_extent *busyp =
|
||||
rb_entry(rbp, struct xfs_busy_extent, rb_node);
|
||||
xfs_agblock_t fend = fbno + flen;
|
||||
xfs_agblock_t bbno = busyp->bno;
|
||||
xfs_agblock_t bend = bbno + busyp->length;
|
||||
|
||||
if (fend <= bbno) {
|
||||
rbp = rbp->rb_left;
|
||||
continue;
|
||||
} else if (fbno >= bend) {
|
||||
rbp = rbp->rb_right;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a metadata allocation, try to reuse the busy
|
||||
* extent instead of trimming the allocation.
|
||||
*/
|
||||
if (!args->userdata &&
|
||||
!(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) {
|
||||
if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
|
||||
busyp, fbno, flen,
|
||||
false))
|
||||
goto restart;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bbno <= fbno) {
|
||||
/* start overlap */
|
||||
|
||||
/*
|
||||
* Case 1:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +---------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 2:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 3:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 4:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------+
|
||||
* fbno fend
|
||||
*
|
||||
* No unbusy region in extent, return failure.
|
||||
*/
|
||||
if (fend <= bend)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Case 5:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +----------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 6:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Needs to be trimmed to:
|
||||
* +-------+
|
||||
* fbno fend
|
||||
*/
|
||||
fbno = bend;
|
||||
} else if (bend >= fend) {
|
||||
/* end overlap */
|
||||
|
||||
/*
|
||||
* Case 7:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 8:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Needs to be trimmed to:
|
||||
* +-------+
|
||||
* fbno fend
|
||||
*/
|
||||
fend = bbno;
|
||||
} else {
|
||||
/* middle overlap */
|
||||
|
||||
/*
|
||||
* Case 9:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Can be trimmed to:
|
||||
* +-------+ OR +-------+
|
||||
* fbno fend fbno fend
|
||||
*
|
||||
* Backward allocation leads to significant
|
||||
* fragmentation of directories, which degrades
|
||||
* directory performance, therefore we always want to
|
||||
* choose the option that produces forward allocation
|
||||
* patterns.
|
||||
* Preferring the lower bno extent will make the next
|
||||
* request use "fend" as the start of the next
|
||||
* allocation; if the segment is no longer busy at
|
||||
* that point, we'll get a contiguous allocation, but
|
||||
* even if it is still busy, we will get a forward
|
||||
* allocation.
|
||||
* We try to avoid choosing the segment at "bend",
|
||||
* because that can lead to the next allocation
|
||||
* taking the segment at "fbno", which would be a
|
||||
* backward allocation. We only use the segment at
|
||||
* "fbno" if it is much larger than the current
|
||||
* requested size, because in that case there's a
|
||||
* good chance subsequent allocations will be
|
||||
* contiguous.
|
||||
*/
|
||||
if (bbno - fbno >= args->maxlen) {
|
||||
/* left candidate fits perfect */
|
||||
fend = bbno;
|
||||
} else if (fend - bend >= args->maxlen * 4) {
|
||||
/* right candidate has enough free space */
|
||||
fbno = bend;
|
||||
} else if (bbno - fbno >= args->minlen) {
|
||||
/* left candidate fits minimum requirement */
|
||||
fend = bbno;
|
||||
} else {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
flen = fend - fbno;
|
||||
}
|
||||
spin_unlock(&args->pag->pagb_lock);
|
||||
|
||||
if (fbno != bno || flen != len) {
|
||||
trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
|
||||
fbno, flen);
|
||||
}
|
||||
*rbno = fbno;
|
||||
*rlen = flen;
|
||||
return;
|
||||
fail:
|
||||
/*
|
||||
* Return a zero extent length as failure indications. All callers
|
||||
* re-check if the trimmed extent satisfies the minlen requirement.
|
||||
*/
|
||||
spin_unlock(&args->pag->pagb_lock);
|
||||
trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
|
||||
*rbno = fbno;
|
||||
*rlen = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_alloc_busy_clear_one(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_busy_extent *busyp)
|
||||
{
|
||||
if (busyp->length) {
|
||||
trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
|
||||
busyp->length);
|
||||
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
||||
}
|
||||
|
||||
list_del_init(&busyp->list);
|
||||
kmem_free(busyp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all extents on the passed in list from the busy extents tree.
|
||||
* If do_discard is set skip extents that need to be discarded, and mark
|
||||
* these as undergoing a discard operation instead.
|
||||
*/
|
||||
void
|
||||
xfs_alloc_busy_clear(
|
||||
struct xfs_mount *mp,
|
||||
struct list_head *list,
|
||||
bool do_discard)
|
||||
{
|
||||
struct xfs_busy_extent *busyp, *n;
|
||||
struct xfs_perag *pag = NULL;
|
||||
xfs_agnumber_t agno = NULLAGNUMBER;
|
||||
|
||||
list_for_each_entry_safe(busyp, n, list, list) {
|
||||
if (busyp->agno != agno) {
|
||||
if (pag) {
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
pag = xfs_perag_get(mp, busyp->agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
agno = busyp->agno;
|
||||
}
|
||||
|
||||
if (do_discard && busyp->length &&
|
||||
!(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD))
|
||||
busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
|
||||
else
|
||||
xfs_alloc_busy_clear_one(mp, pag, busyp);
|
||||
}
|
||||
|
||||
if (pag) {
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback for list_sort to sort busy extents by the AG they reside in.
|
||||
*/
|
||||
int
|
||||
xfs_busy_extent_ag_cmp(
|
||||
void *priv,
|
||||
struct list_head *a,
|
||||
struct list_head *b)
|
||||
{
|
||||
return container_of(a, struct xfs_busy_extent, list)->agno -
|
||||
container_of(b, struct xfs_busy_extent, list)->agno;
|
||||
}
|
||||
|
@ -23,7 +23,6 @@ struct xfs_btree_cur;
|
||||
struct xfs_mount;
|
||||
struct xfs_perag;
|
||||
struct xfs_trans;
|
||||
struct xfs_busy_extent;
|
||||
|
||||
extern struct workqueue_struct *xfs_alloc_wq;
|
||||
|
||||
@ -139,33 +138,6 @@ xfs_extlen_t
|
||||
xfs_alloc_longest_free_extent(struct xfs_mount *mp,
|
||||
struct xfs_perag *pag);
|
||||
|
||||
#ifdef __KERNEL__
|
||||
void
|
||||
xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
|
||||
|
||||
void
|
||||
xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list,
|
||||
bool do_discard);
|
||||
|
||||
int
|
||||
xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno, xfs_extlen_t len);
|
||||
|
||||
void
|
||||
xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
|
||||
|
||||
int
|
||||
xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
|
||||
|
||||
static inline void xfs_alloc_busy_sort(struct list_head *list)
|
||||
{
|
||||
list_sort(NULL, list, xfs_busy_extent_ag_cmp);
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
/*
|
||||
* Compute and fill in value of m_ag_maxlevels.
|
||||
*/
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -32,6 +30,7 @@
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
@ -94,7 +93,7 @@ xfs_allocbt_alloc_block(
|
||||
return 0;
|
||||
}
|
||||
|
||||
xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
|
||||
xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
|
||||
|
||||
xfs_trans_agbtree_delta(cur->bc_tp, 1);
|
||||
new->s = cpu_to_be32(bno);
|
||||
@ -119,8 +118,8 @@ xfs_allocbt_free_block(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
|
||||
XFS_ALLOC_BUSY_SKIP_DISCARD);
|
||||
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
|
||||
XFS_EXTENT_BUSY_SKIP_DISCARD);
|
||||
xfs_trans_agbtree_delta(cur->bc_tp, -1);
|
||||
return 0;
|
||||
}
|
||||
|
@ -16,9 +16,7 @@
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_trans.h"
|
||||
@ -29,7 +27,6 @@
|
||||
#include "xfs_inode_item.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_iomap.h"
|
||||
#include "xfs_vnodeops.h"
|
||||
#include "xfs_trace.h"
|
||||
@ -623,7 +620,7 @@ xfs_map_at_offset(
|
||||
* or delayed allocate extent.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_is_delayed_page(
|
||||
xfs_check_page_type(
|
||||
struct page *page,
|
||||
unsigned int type)
|
||||
{
|
||||
@ -637,11 +634,11 @@ xfs_is_delayed_page(
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
if (buffer_unwritten(bh))
|
||||
acceptable = (type == IO_UNWRITTEN);
|
||||
acceptable += (type == IO_UNWRITTEN);
|
||||
else if (buffer_delay(bh))
|
||||
acceptable = (type == IO_DELALLOC);
|
||||
acceptable += (type == IO_DELALLOC);
|
||||
else if (buffer_dirty(bh) && buffer_mapped(bh))
|
||||
acceptable = (type == IO_OVERWRITE);
|
||||
acceptable += (type == IO_OVERWRITE);
|
||||
else
|
||||
break;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
@ -684,7 +681,7 @@ xfs_convert_page(
|
||||
goto fail_unlock_page;
|
||||
if (page->mapping != inode->i_mapping)
|
||||
goto fail_unlock_page;
|
||||
if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
|
||||
if (!xfs_check_page_type(page, (*ioendp)->io_type))
|
||||
goto fail_unlock_page;
|
||||
|
||||
/*
|
||||
@ -834,7 +831,7 @@ xfs_aops_discard_page(
|
||||
struct buffer_head *bh, *head;
|
||||
loff_t offset = page_offset(page);
|
||||
|
||||
if (!xfs_is_delayed_page(page, IO_DELALLOC))
|
||||
if (!xfs_check_page_type(page, IO_DELALLOC))
|
||||
goto out_invalidate;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
@ -1146,7 +1143,14 @@ __xfs_get_blocks(
|
||||
if (!create && direct && offset >= i_size_read(inode))
|
||||
return 0;
|
||||
|
||||
if (create) {
|
||||
/*
|
||||
* Direct I/O is usually done on preallocated files, so try getting
|
||||
* a block mapping without an exclusive lock first. For buffered
|
||||
* writes we already have the exclusive iolock anyway, so avoiding
|
||||
* a lock roundtrip here by taking the ilock exclusive from the
|
||||
* beginning is a useful micro optimization.
|
||||
*/
|
||||
if (create && !direct) {
|
||||
lockmode = XFS_ILOCK_EXCL;
|
||||
xfs_ilock(ip, lockmode);
|
||||
} else {
|
||||
@ -1168,23 +1172,45 @@ __xfs_get_blocks(
|
||||
(!nimaps ||
|
||||
(imap.br_startblock == HOLESTARTBLOCK ||
|
||||
imap.br_startblock == DELAYSTARTBLOCK))) {
|
||||
if (direct) {
|
||||
if (direct || xfs_get_extsz_hint(ip)) {
|
||||
/*
|
||||
* Drop the ilock in preparation for starting the block
|
||||
* allocation transaction. It will be retaken
|
||||
* exclusively inside xfs_iomap_write_direct for the
|
||||
* actual allocation.
|
||||
*/
|
||||
xfs_iunlock(ip, lockmode);
|
||||
error = xfs_iomap_write_direct(ip, offset, size,
|
||||
&imap, nimaps);
|
||||
if (error)
|
||||
return -error;
|
||||
new = 1;
|
||||
} else {
|
||||
/*
|
||||
* Delalloc reservations do not require a transaction,
|
||||
* we can go on without dropping the lock here. If we
|
||||
* are allocating a new delalloc block, make sure that
|
||||
* we set the new flag so that we mark the buffer new so
|
||||
* that we know that it is newly allocated if the write
|
||||
* fails.
|
||||
*/
|
||||
if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
|
||||
new = 1;
|
||||
error = xfs_iomap_write_delay(ip, offset, size, &imap);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
xfs_iunlock(ip, lockmode);
|
||||
}
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
|
||||
} else if (nimaps) {
|
||||
trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
} else {
|
||||
trace_xfs_get_blocks_notfound(ip, offset, size);
|
||||
goto out_unlock;
|
||||
}
|
||||
xfs_iunlock(ip, lockmode);
|
||||
|
||||
if (imap.br_startblock != HOLESTARTBLOCK &&
|
||||
imap.br_startblock != DELAYSTARTBLOCK) {
|
||||
@ -1386,52 +1412,91 @@ out_destroy_ioend:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Punch out the delalloc blocks we have already allocated.
|
||||
*
|
||||
* Don't bother with xfs_setattr given that nothing can have made it to disk yet
|
||||
* as the page is still locked at this point.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_vm_write_failed(
|
||||
struct address_space *mapping,
|
||||
loff_t to)
|
||||
xfs_vm_kill_delalloc_range(
|
||||
struct inode *inode,
|
||||
loff_t start,
|
||||
loff_t end)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
xfs_fileoff_t start_fsb;
|
||||
xfs_fileoff_t end_fsb;
|
||||
int error;
|
||||
|
||||
if (to > inode->i_size) {
|
||||
/*
|
||||
* Punch out the delalloc blocks we have already allocated.
|
||||
*
|
||||
* Don't bother with xfs_setattr given that nothing can have
|
||||
* made it to disk yet as the page is still locked at this
|
||||
* point.
|
||||
*/
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
xfs_fileoff_t start_fsb;
|
||||
xfs_fileoff_t end_fsb;
|
||||
int error;
|
||||
start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
|
||||
end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
|
||||
if (end_fsb <= start_fsb)
|
||||
return;
|
||||
|
||||
truncate_pagecache(inode, to, inode->i_size);
|
||||
|
||||
/*
|
||||
* Check if there are any blocks that are outside of i_size
|
||||
* that need to be trimmed back.
|
||||
*/
|
||||
start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
|
||||
end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
|
||||
if (end_fsb <= start_fsb)
|
||||
return;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
||||
end_fsb - start_fsb);
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_alert(ip->i_mount,
|
||||
"xfs_vm_write_failed: unable to clean up ino %lld",
|
||||
ip->i_ino);
|
||||
}
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
||||
end_fsb - start_fsb);
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_alert(ip->i_mount,
|
||||
"xfs_vm_write_failed: unable to clean up ino %lld",
|
||||
ip->i_ino);
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_vm_write_failed(
|
||||
struct inode *inode,
|
||||
struct page *page,
|
||||
loff_t pos,
|
||||
unsigned len)
|
||||
{
|
||||
loff_t block_offset = pos & PAGE_MASK;
|
||||
loff_t block_start;
|
||||
loff_t block_end;
|
||||
loff_t from = pos & (PAGE_CACHE_SIZE - 1);
|
||||
loff_t to = from + len;
|
||||
struct buffer_head *bh, *head;
|
||||
|
||||
ASSERT(block_offset + from == pos);
|
||||
|
||||
head = page_buffers(page);
|
||||
block_start = 0;
|
||||
for (bh = head; bh != head || !block_start;
|
||||
bh = bh->b_this_page, block_start = block_end,
|
||||
block_offset += bh->b_size) {
|
||||
block_end = block_start + bh->b_size;
|
||||
|
||||
/* skip buffers before the write */
|
||||
if (block_end <= from)
|
||||
continue;
|
||||
|
||||
/* if the buffer is after the write, we're done */
|
||||
if (block_start >= to)
|
||||
break;
|
||||
|
||||
if (!buffer_delay(bh))
|
||||
continue;
|
||||
|
||||
if (!buffer_new(bh) && block_offset < i_size_read(inode))
|
||||
continue;
|
||||
|
||||
xfs_vm_kill_delalloc_range(inode, block_offset,
|
||||
block_offset + bh->b_size);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* This used to call block_write_begin(), but it unlocks and releases the page
|
||||
* on error, and we need that page to be able to punch stale delalloc blocks out
|
||||
* on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
|
||||
* the appropriate point.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_vm_write_begin(
|
||||
struct file *file,
|
||||
@ -1442,15 +1507,40 @@ xfs_vm_write_begin(
|
||||
struct page **pagep,
|
||||
void **fsdata)
|
||||
{
|
||||
int ret;
|
||||
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
|
||||
struct page *page;
|
||||
int status;
|
||||
|
||||
ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
|
||||
pagep, xfs_get_blocks);
|
||||
if (unlikely(ret))
|
||||
xfs_vm_write_failed(mapping, pos + len);
|
||||
return ret;
|
||||
ASSERT(len <= PAGE_CACHE_SIZE);
|
||||
|
||||
page = grab_cache_page_write_begin(mapping, index,
|
||||
flags | AOP_FLAG_NOFS);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
status = __block_write_begin(page, pos, len, xfs_get_blocks);
|
||||
if (unlikely(status)) {
|
||||
struct inode *inode = mapping->host;
|
||||
|
||||
xfs_vm_write_failed(inode, page, pos, len);
|
||||
unlock_page(page);
|
||||
|
||||
if (pos + len > i_size_read(inode))
|
||||
truncate_pagecache(inode, pos + len, i_size_read(inode));
|
||||
|
||||
page_cache_release(page);
|
||||
page = NULL;
|
||||
}
|
||||
|
||||
*pagep = page;
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* On failure, we only need to kill delalloc blocks beyond EOF because they
|
||||
* will never be written. For blocks within EOF, generic_write_end() zeros them
|
||||
* so they are safe to leave alone and be written with all the other valid data.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_vm_write_end(
|
||||
struct file *file,
|
||||
@ -1463,9 +1553,19 @@ xfs_vm_write_end(
|
||||
{
|
||||
int ret;
|
||||
|
||||
ASSERT(len <= PAGE_CACHE_SIZE);
|
||||
|
||||
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||
if (unlikely(ret < len))
|
||||
xfs_vm_write_failed(mapping, pos + len);
|
||||
if (unlikely(ret < len)) {
|
||||
struct inode *inode = mapping->host;
|
||||
size_t isize = i_size_read(inode);
|
||||
loff_t to = pos + len;
|
||||
|
||||
if (to > isize) {
|
||||
truncate_pagecache(inode, to, isize);
|
||||
xfs_vm_kill_delalloc_range(inode, isize, to);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -39,7 +38,6 @@
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_vnodeops.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
@ -1987,14 +1985,12 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
|
||||
(map[i].br_startblock != HOLESTARTBLOCK));
|
||||
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
|
||||
blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
|
||||
error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
|
||||
blkcnt, XBF_LOCK | XBF_DONT_BLOCK,
|
||||
&bp);
|
||||
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
|
||||
dblkno, blkcnt, 0, &bp);
|
||||
if (error)
|
||||
return(error);
|
||||
|
||||
tmp = (valuelen < XFS_BUF_SIZE(bp))
|
||||
? valuelen : XFS_BUF_SIZE(bp);
|
||||
tmp = min_t(int, valuelen, BBTOB(bp->b_length));
|
||||
xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
|
||||
xfs_buf_relse(bp);
|
||||
dst += tmp;
|
||||
@ -2097,6 +2093,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
|
||||
lblkno = args->rmtblkno;
|
||||
valuelen = args->valuelen;
|
||||
while (valuelen > 0) {
|
||||
int buflen;
|
||||
|
||||
/*
|
||||
* Try to remember where we decided to put the value.
|
||||
*/
|
||||
@ -2114,15 +2112,16 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
|
||||
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
|
||||
blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
|
||||
|
||||
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
|
||||
XBF_LOCK | XBF_DONT_BLOCK);
|
||||
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
|
||||
if (!bp)
|
||||
return ENOMEM;
|
||||
tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
|
||||
XFS_BUF_SIZE(bp);
|
||||
|
||||
buflen = BBTOB(bp->b_length);
|
||||
tmp = min_t(int, valuelen, buflen);
|
||||
xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
|
||||
if (tmp < XFS_BUF_SIZE(bp))
|
||||
xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
|
||||
if (tmp < buflen)
|
||||
xfs_buf_zero(bp, tmp, buflen - tmp);
|
||||
|
||||
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
|
||||
xfs_buf_relse(bp);
|
||||
if (error)
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -2983,7 +2982,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
|
||||
map.br_blockcount);
|
||||
bp = xfs_trans_get_buf(*trans,
|
||||
dp->i_mount->m_ddev_targp,
|
||||
dblkno, dblkcnt, XBF_LOCK);
|
||||
dblkno, dblkcnt, 0);
|
||||
if (!bp)
|
||||
return ENOMEM;
|
||||
xfs_trans_binval(*trans, bp);
|
||||
|
@ -41,7 +41,6 @@
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_attr_leaf.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_buf_item.h"
|
||||
@ -4527,7 +4526,7 @@ out_unreserve_blocks:
|
||||
xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
|
||||
out_unreserve_quota:
|
||||
if (XFS_IS_QUOTA_ON(mp))
|
||||
xfs_trans_unreserve_quota_nblks(NULL, ip, alen, 0, rt ?
|
||||
xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
|
||||
XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
|
||||
return error;
|
||||
}
|
||||
@ -5621,8 +5620,20 @@ xfs_getbmap(
|
||||
XFS_FSB_TO_BB(mp, map[i].br_blockcount);
|
||||
out[cur_ext].bmv_unused1 = 0;
|
||||
out[cur_ext].bmv_unused2 = 0;
|
||||
ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
|
||||
(map[i].br_startblock != DELAYSTARTBLOCK));
|
||||
|
||||
/*
|
||||
* delayed allocation extents that start beyond EOF can
|
||||
* occur due to speculative EOF allocation when the
|
||||
* delalloc extent is larger than the largest freespace
|
||||
* extent at conversion time. These extents cannot be
|
||||
* converted by data writeback, so can exist here even
|
||||
* if we are not supposed to be finding delalloc
|
||||
* extents.
|
||||
*/
|
||||
if (map[i].br_startblock == DELAYSTARTBLOCK &&
|
||||
map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
|
||||
ASSERT((iflags & BMV_IF_DELALLOC) != 0);
|
||||
|
||||
if (map[i].br_startblock == HOLESTARTBLOCK &&
|
||||
whichfork == XFS_ATTR_FORK) {
|
||||
/* came to the end of attribute fork */
|
||||
@ -6157,3 +6168,16 @@ next_block:
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert the given file system block to a disk block. We have to treat it
|
||||
* differently based on whether the file is a real time file or not, because the
|
||||
* bmap code does.
|
||||
*/
|
||||
xfs_daddr_t
|
||||
xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
|
||||
{
|
||||
return (XFS_IS_REALTIME_INODE(ip) ? \
|
||||
(xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
|
||||
XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
|
||||
}
|
||||
|
@ -211,6 +211,9 @@ int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
int whichfork, int *count);
|
||||
int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
|
||||
xfs_fileoff_t start_fsb, xfs_fileoff_t length);
|
||||
|
||||
xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* __XFS_BMAP_H__ */
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
611
fs/xfs/xfs_buf.c
611
fs/xfs/xfs_buf.c
File diff suppressed because it is too large
Load Diff
@ -32,11 +32,6 @@
|
||||
|
||||
#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
|
||||
|
||||
#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
|
||||
#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
|
||||
#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
|
||||
#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
|
||||
|
||||
typedef enum {
|
||||
XBRW_READ = 1, /* transfer into target memory */
|
||||
XBRW_WRITE = 2, /* transfer from target memory */
|
||||
@ -46,11 +41,9 @@ typedef enum {
|
||||
#define XBF_READ (1 << 0) /* buffer intended for reading from device */
|
||||
#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
|
||||
#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
|
||||
#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */
|
||||
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
|
||||
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
|
||||
#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
|
||||
#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
|
||||
#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
|
||||
|
||||
/* I/O hints for the BIO layer */
|
||||
#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
|
||||
@ -58,14 +51,13 @@ typedef enum {
|
||||
#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
|
||||
|
||||
/* flags used only as arguments to access routines */
|
||||
#define XBF_LOCK (1 << 15)/* lock requested */
|
||||
#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
|
||||
#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
|
||||
#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */
|
||||
|
||||
/* flags used only internally */
|
||||
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
|
||||
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
|
||||
#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */
|
||||
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
|
||||
|
||||
typedef unsigned int xfs_buf_flags_t;
|
||||
|
||||
@ -73,25 +65,18 @@ typedef unsigned int xfs_buf_flags_t;
|
||||
{ XBF_READ, "READ" }, \
|
||||
{ XBF_WRITE, "WRITE" }, \
|
||||
{ XBF_READ_AHEAD, "READ_AHEAD" }, \
|
||||
{ XBF_MAPPED, "MAPPED" }, \
|
||||
{ XBF_ASYNC, "ASYNC" }, \
|
||||
{ XBF_DONE, "DONE" }, \
|
||||
{ XBF_DELWRI, "DELWRI" }, \
|
||||
{ XBF_STALE, "STALE" }, \
|
||||
{ XBF_SYNCIO, "SYNCIO" }, \
|
||||
{ XBF_FUA, "FUA" }, \
|
||||
{ XBF_FLUSH, "FLUSH" }, \
|
||||
{ XBF_LOCK, "LOCK" }, /* should never be set */\
|
||||
{ XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
|
||||
{ XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
|
||||
{ XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
|
||||
{ XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
|
||||
{ _XBF_PAGES, "PAGES" }, \
|
||||
{ _XBF_KMEM, "KMEM" }, \
|
||||
{ _XBF_DELWRI_Q, "DELWRI_Q" }
|
||||
|
||||
typedef enum {
|
||||
XBT_FORCE_FLUSH = 0,
|
||||
} xfs_buftarg_flags_t;
|
||||
|
||||
typedef struct xfs_buftarg {
|
||||
dev_t bt_dev;
|
||||
struct block_device *bt_bdev;
|
||||
@ -101,12 +86,6 @@ typedef struct xfs_buftarg {
|
||||
unsigned int bt_sshift;
|
||||
size_t bt_smask;
|
||||
|
||||
/* per device delwri queue */
|
||||
struct task_struct *bt_task;
|
||||
struct list_head bt_delwri_queue;
|
||||
spinlock_t bt_delwri_lock;
|
||||
unsigned long bt_flags;
|
||||
|
||||
/* LRU control structures */
|
||||
struct shrinker bt_shrinker;
|
||||
struct list_head bt_lru;
|
||||
@ -128,8 +107,8 @@ typedef struct xfs_buf {
|
||||
* fast-path on locking.
|
||||
*/
|
||||
struct rb_node b_rbnode; /* rbtree node */
|
||||
xfs_off_t b_file_offset; /* offset in file */
|
||||
size_t b_buffer_length;/* size of buffer in bytes */
|
||||
xfs_daddr_t b_bn; /* block number for I/O */
|
||||
int b_length; /* size of buffer in BBs */
|
||||
atomic_t b_hold; /* reference count */
|
||||
atomic_t b_lru_ref; /* lru reclaim ref count */
|
||||
xfs_buf_flags_t b_flags; /* status flags */
|
||||
@ -140,8 +119,6 @@ typedef struct xfs_buf {
|
||||
struct list_head b_list;
|
||||
struct xfs_perag *b_pag; /* contains rbtree root */
|
||||
xfs_buftarg_t *b_target; /* buffer target (device) */
|
||||
xfs_daddr_t b_bn; /* block number for I/O */
|
||||
size_t b_count_desired;/* desired transfer size */
|
||||
void *b_addr; /* virtual address of buffer */
|
||||
struct work_struct b_iodone_work;
|
||||
xfs_buf_iodone_t b_iodone; /* I/O completion function */
|
||||
@ -150,7 +127,7 @@ typedef struct xfs_buf {
|
||||
struct xfs_trans *b_transp;
|
||||
struct page **b_pages; /* array of page pointers */
|
||||
struct page *b_page_array[XB_PAGES]; /* inline pages */
|
||||
unsigned long b_queuetime; /* time buffer was queued */
|
||||
int b_io_length; /* IO size in BBs */
|
||||
atomic_t b_pin_count; /* pin count */
|
||||
atomic_t b_io_remaining; /* #outstanding I/O requests */
|
||||
unsigned int b_page_count; /* size of page array */
|
||||
@ -163,26 +140,30 @@ typedef struct xfs_buf {
|
||||
|
||||
|
||||
/* Finding and Reading Buffers */
|
||||
extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
|
||||
xfs_buf_flags_t, xfs_buf_t *);
|
||||
struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno,
|
||||
size_t numblks, xfs_buf_flags_t flags,
|
||||
struct xfs_buf *new_bp);
|
||||
#define xfs_incore(buftarg,blkno,len,lockit) \
|
||||
_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
|
||||
|
||||
extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
|
||||
xfs_buf_flags_t);
|
||||
extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
|
||||
xfs_buf_flags_t);
|
||||
struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno,
|
||||
size_t numblks, xfs_buf_flags_t flags);
|
||||
struct xfs_buf *xfs_buf_read(struct xfs_buftarg *target, xfs_daddr_t blkno,
|
||||
size_t numblks, xfs_buf_flags_t flags);
|
||||
void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno,
|
||||
size_t numblks);
|
||||
|
||||
struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *, xfs_off_t, size_t,
|
||||
xfs_buf_flags_t);
|
||||
extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
|
||||
extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
|
||||
extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
|
||||
extern void xfs_buf_hold(xfs_buf_t *);
|
||||
extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
|
||||
struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
|
||||
struct xfs_buftarg *target,
|
||||
xfs_daddr_t daddr, size_t length, int flags);
|
||||
struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
|
||||
struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *target, xfs_daddr_t blkno,
|
||||
size_t numblks, xfs_buf_flags_t flags);
|
||||
void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
|
||||
int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
|
||||
|
||||
struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
|
||||
int flags);
|
||||
struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target,
|
||||
xfs_daddr_t daddr, size_t numblks, int flags);
|
||||
void xfs_buf_hold(struct xfs_buf *bp);
|
||||
|
||||
/* Releasing Buffers */
|
||||
extern void xfs_buf_free(xfs_buf_t *);
|
||||
@ -204,7 +185,7 @@ extern int xfs_bdstrat_cb(struct xfs_buf *);
|
||||
extern void xfs_buf_ioend(xfs_buf_t *, int);
|
||||
extern void xfs_buf_ioerror(xfs_buf_t *, int);
|
||||
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
|
||||
extern int xfs_buf_iorequest(xfs_buf_t *);
|
||||
extern void xfs_buf_iorequest(xfs_buf_t *);
|
||||
extern int xfs_buf_iowait(xfs_buf_t *);
|
||||
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
|
||||
xfs_buf_rw_t);
|
||||
@ -220,24 +201,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
|
||||
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
|
||||
|
||||
/* Delayed Write Buffer Routines */
|
||||
extern void xfs_buf_delwri_queue(struct xfs_buf *);
|
||||
extern void xfs_buf_delwri_dequeue(struct xfs_buf *);
|
||||
extern void xfs_buf_delwri_promote(struct xfs_buf *);
|
||||
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
|
||||
extern int xfs_buf_delwri_submit(struct list_head *);
|
||||
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
|
||||
|
||||
/* Buffer Daemon Setup Routines */
|
||||
extern int xfs_buf_init(void);
|
||||
extern void xfs_buf_terminate(void);
|
||||
|
||||
#define XFS_BUF_ZEROFLAGS(bp) \
|
||||
((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
|
||||
((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
|
||||
XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
|
||||
|
||||
void xfs_buf_stale(struct xfs_buf *bp);
|
||||
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
|
||||
#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
|
||||
|
||||
#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
|
||||
|
||||
#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
|
||||
#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)
|
||||
#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)
|
||||
@ -256,12 +235,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
|
||||
|
||||
#define XFS_BUF_ADDR(bp) ((bp)->b_bn)
|
||||
#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))
|
||||
#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset)
|
||||
#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off))
|
||||
#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired)
|
||||
#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt))
|
||||
#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
|
||||
#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
|
||||
|
||||
static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
|
||||
{
|
||||
@ -287,7 +260,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
|
||||
extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
|
||||
extern void xfs_wait_buftarg(xfs_buftarg_t *);
|
||||
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
|
||||
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
|
||||
|
||||
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
|
||||
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -123,11 +122,11 @@ xfs_buf_item_log_check(
|
||||
ASSERT(bip->bli_logged != NULL);
|
||||
|
||||
bp = bip->bli_buf;
|
||||
ASSERT(XFS_BUF_COUNT(bp) > 0);
|
||||
ASSERT(bp->b_length > 0);
|
||||
ASSERT(bp->b_addr != NULL);
|
||||
orig = bip->bli_orig;
|
||||
buffer = bp->b_addr;
|
||||
for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
|
||||
for (x = 0; x < BBTOB(bp->b_length); x++) {
|
||||
if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
|
||||
xfs_emerg(bp->b_mount,
|
||||
"%s: bip %x buffer %x orig %x index %d",
|
||||
@ -418,7 +417,6 @@ xfs_buf_item_unpin(
|
||||
if (freed && stale) {
|
||||
ASSERT(bip->bli_flags & XFS_BLI_STALE);
|
||||
ASSERT(xfs_buf_islocked(bp));
|
||||
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
|
||||
ASSERT(XFS_BUF_ISSTALE(bp));
|
||||
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
|
||||
|
||||
@ -455,42 +453,42 @@ xfs_buf_item_unpin(
|
||||
bp->b_iodone = NULL;
|
||||
} else {
|
||||
spin_lock(&ailp->xa_lock);
|
||||
xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
|
||||
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
|
||||
xfs_buf_item_relse(bp);
|
||||
ASSERT(bp->b_fspriv == NULL);
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
} else if (freed && remove) {
|
||||
xfs_buf_lock(bp);
|
||||
xfs_buf_ioerror(bp, EIO);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_ioend(bp, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called to attempt to lock the buffer associated with this
|
||||
* buf log item. Don't sleep on the buffer lock. If we can't get
|
||||
* the lock right away, return 0. If we can get the lock, take a
|
||||
* reference to the buffer. If this is a delayed write buffer that
|
||||
* needs AIL help to be written back, invoke the pushbuf routine
|
||||
* rather than the normal success path.
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_buf_item_trylock(
|
||||
struct xfs_log_item *lip)
|
||||
xfs_buf_item_push(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
||||
struct xfs_buf *bp = bip->bli_buf;
|
||||
uint rval = XFS_ITEM_SUCCESS;
|
||||
|
||||
if (xfs_buf_ispinned(bp))
|
||||
return XFS_ITEM_PINNED;
|
||||
if (!xfs_buf_trylock(bp))
|
||||
return XFS_ITEM_LOCKED;
|
||||
|
||||
/* take a reference to the buffer. */
|
||||
xfs_buf_hold(bp);
|
||||
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
trace_xfs_buf_item_trylock(bip);
|
||||
if (XFS_BUF_ISDELAYWRITE(bp))
|
||||
return XFS_ITEM_PUSHBUF;
|
||||
return XFS_ITEM_SUCCESS;
|
||||
|
||||
trace_xfs_buf_item_push(bip);
|
||||
|
||||
if (!xfs_buf_delwri_queue(bp, buffer_list))
|
||||
rval = XFS_ITEM_FLUSHING;
|
||||
xfs_buf_unlock(bp);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -603,49 +601,6 @@ xfs_buf_item_committed(
|
||||
return lsn;
|
||||
}
|
||||
|
||||
/*
|
||||
* The buffer is locked, but is not a delayed write buffer. This happens
|
||||
* if we race with IO completion and hence we don't want to try to write it
|
||||
* again. Just release the buffer.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_buf_item_push(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
||||
struct xfs_buf *bp = bip->bli_buf;
|
||||
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
|
||||
|
||||
trace_xfs_buf_item_push(bip);
|
||||
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* The buffer is locked and is a delayed write buffer. Promote the buffer
|
||||
* in the delayed write queue as the caller knows that they must invoke
|
||||
* the xfsbufd to get this buffer written. We have to unlock the buffer
|
||||
* to allow the xfsbufd to write it, too.
|
||||
*/
|
||||
STATIC bool
|
||||
xfs_buf_item_pushbuf(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
||||
struct xfs_buf *bp = bip->bli_buf;
|
||||
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
ASSERT(XFS_BUF_ISDELAYWRITE(bp));
|
||||
|
||||
trace_xfs_buf_item_pushbuf(bip);
|
||||
|
||||
xfs_buf_delwri_promote(bp);
|
||||
xfs_buf_relse(bp);
|
||||
return true;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_buf_item_committing(
|
||||
struct xfs_log_item *lip,
|
||||
@ -661,11 +616,9 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
|
||||
.iop_format = xfs_buf_item_format,
|
||||
.iop_pin = xfs_buf_item_pin,
|
||||
.iop_unpin = xfs_buf_item_unpin,
|
||||
.iop_trylock = xfs_buf_item_trylock,
|
||||
.iop_unlock = xfs_buf_item_unlock,
|
||||
.iop_committed = xfs_buf_item_committed,
|
||||
.iop_push = xfs_buf_item_push,
|
||||
.iop_pushbuf = xfs_buf_item_pushbuf,
|
||||
.iop_committing = xfs_buf_item_committing
|
||||
};
|
||||
|
||||
@ -703,7 +656,8 @@ xfs_buf_item_init(
|
||||
* truncate any pieces. map_size is the size of the
|
||||
* bitmap needed to describe the chunks of the buffer.
|
||||
*/
|
||||
chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT);
|
||||
chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
|
||||
XFS_BLF_SHIFT);
|
||||
map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
|
||||
|
||||
bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
|
||||
@ -713,7 +667,7 @@ xfs_buf_item_init(
|
||||
xfs_buf_hold(bp);
|
||||
bip->bli_format.blf_type = XFS_LI_BUF;
|
||||
bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
|
||||
bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
|
||||
bip->bli_format.blf_len = (ushort)bp->b_length;
|
||||
bip->bli_format.blf_map_size = map_size;
|
||||
|
||||
#ifdef XFS_TRANS_DEBUG
|
||||
@ -725,9 +679,9 @@ xfs_buf_item_init(
|
||||
* the buffer to indicate which bytes the callers have asked
|
||||
* to have logged.
|
||||
*/
|
||||
bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
|
||||
memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp));
|
||||
bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
|
||||
bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
|
||||
memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
|
||||
bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -984,20 +938,27 @@ xfs_buf_iodone_callbacks(
|
||||
* If the write was asynchronous then no one will be looking for the
|
||||
* error. Clear the error state and write the buffer out again.
|
||||
*
|
||||
* During sync or umount we'll write all pending buffers again
|
||||
* synchronous, which will catch these errors if they keep hanging
|
||||
* around.
|
||||
* XXX: This helps against transient write errors, but we need to find
|
||||
* a way to shut the filesystem down if the writes keep failing.
|
||||
*
|
||||
* In practice we'll shut the filesystem down soon as non-transient
|
||||
* erorrs tend to affect the whole device and a failing log write
|
||||
* will make us give up. But we really ought to do better here.
|
||||
*/
|
||||
if (XFS_BUF_ISASYNC(bp)) {
|
||||
ASSERT(bp->b_iodone != NULL);
|
||||
|
||||
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
|
||||
|
||||
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
|
||||
|
||||
if (!XFS_BUF_ISSTALE(bp)) {
|
||||
xfs_buf_delwri_queue(bp);
|
||||
XFS_BUF_DONE(bp);
|
||||
bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
|
||||
xfs_bdstrat_cb(bp);
|
||||
} else {
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
ASSERT(bp->b_iodone != NULL);
|
||||
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
|
||||
xfs_buf_relse(bp);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1045,6 +1006,6 @@ xfs_buf_iodone(
|
||||
* Either way, AIL is useless if we're forcing a shutdown.
|
||||
*/
|
||||
spin_lock(&ailp->xa_lock);
|
||||
xfs_trans_ail_delete(ailp, lip);
|
||||
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
|
||||
xfs_buf_item_free(BUF_ITEM(lip));
|
||||
}
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -2277,20 +2276,20 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
|
||||
if (nbuf == 1) {
|
||||
dabuf->nbuf = 1;
|
||||
bp = bps[0];
|
||||
dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
|
||||
dabuf->bbcount = bp->b_length;
|
||||
dabuf->data = bp->b_addr;
|
||||
dabuf->bps[0] = bp;
|
||||
} else {
|
||||
dabuf->nbuf = nbuf;
|
||||
for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
|
||||
dabuf->bps[i] = bp = bps[i];
|
||||
dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
|
||||
dabuf->bbcount += bp->b_length;
|
||||
}
|
||||
dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
|
||||
for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
|
||||
for (i = off = 0; i < nbuf; i++, off += BBTOB(bp->b_length)) {
|
||||
bp = bps[i];
|
||||
memcpy((char *)dabuf->data + off, bp->b_addr,
|
||||
XFS_BUF_COUNT(bp));
|
||||
BBTOB(bp->b_length));
|
||||
}
|
||||
}
|
||||
return dabuf;
|
||||
@ -2310,10 +2309,10 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf)
|
||||
ASSERT(dabuf->nbuf > 1);
|
||||
dabuf->dirty = 0;
|
||||
for (i = off = 0; i < dabuf->nbuf;
|
||||
i++, off += XFS_BUF_COUNT(bp)) {
|
||||
i++, off += BBTOB(bp->b_length)) {
|
||||
bp = dabuf->bps[i];
|
||||
memcpy(bp->b_addr, dabuf->data + off,
|
||||
XFS_BUF_COUNT(bp));
|
||||
BBTOB(bp->b_length));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2356,10 +2355,10 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
|
||||
}
|
||||
dabuf->dirty = 1;
|
||||
ASSERT(first <= last);
|
||||
for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
|
||||
for (i = off = 0; i < dabuf->nbuf; i++, off += BBTOB(bp->b_length)) {
|
||||
bp = dabuf->bps[i];
|
||||
f = off;
|
||||
l = f + XFS_BUF_COUNT(bp) - 1;
|
||||
l = f + BBTOB(bp->b_length) - 1;
|
||||
if (f < first)
|
||||
f = first;
|
||||
if (l > last)
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -17,7 +17,6 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_mount.h"
|
||||
@ -30,6 +29,7 @@
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_discard.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
@ -118,7 +118,7 @@ xfs_trim_extents(
|
||||
* If any blocks in the range are still busy, skip the
|
||||
* discard and try again the next time.
|
||||
*/
|
||||
if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
|
||||
if (xfs_extent_busy_search(mp, agno, fbno, flen)) {
|
||||
trace_xfs_discard_busy(mp, agno, fbno, flen);
|
||||
goto next_extent;
|
||||
}
|
||||
@ -212,7 +212,7 @@ xfs_discard_extents(
|
||||
struct xfs_mount *mp,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct xfs_busy_extent *busyp;
|
||||
struct xfs_extent_busy *busyp;
|
||||
int error = 0;
|
||||
|
||||
list_for_each_entry(busyp, list, list) {
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -857,7 +856,7 @@ xfs_qm_dqflush_done(
|
||||
/* xfs_trans_ail_delete() drops the AIL lock. */
|
||||
spin_lock(&ailp->xa_lock);
|
||||
if (lip->li_lsn == qip->qli_flush_lsn)
|
||||
xfs_trans_ail_delete(ailp, lip);
|
||||
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
|
||||
else
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
@ -878,8 +877,8 @@ xfs_qm_dqflush_done(
|
||||
*/
|
||||
int
|
||||
xfs_qm_dqflush(
|
||||
xfs_dquot_t *dqp,
|
||||
uint flags)
|
||||
struct xfs_dquot *dqp,
|
||||
struct xfs_buf **bpp)
|
||||
{
|
||||
struct xfs_mount *mp = dqp->q_mount;
|
||||
struct xfs_buf *bp;
|
||||
@ -891,25 +890,30 @@ xfs_qm_dqflush(
|
||||
|
||||
trace_xfs_dqflush(dqp);
|
||||
|
||||
/*
|
||||
* If not dirty, or it's pinned and we are not supposed to block, nada.
|
||||
*/
|
||||
if (!XFS_DQ_IS_DIRTY(dqp) ||
|
||||
((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
|
||||
xfs_dqfunlock(dqp);
|
||||
return 0;
|
||||
}
|
||||
*bpp = NULL;
|
||||
|
||||
xfs_qm_dqunpin_wait(dqp);
|
||||
|
||||
/*
|
||||
* This may have been unpinned because the filesystem is shutting
|
||||
* down forcibly. If that's the case we must not write this dquot
|
||||
* to disk, because the log record didn't make it to disk!
|
||||
* to disk, because the log record didn't make it to disk.
|
||||
*
|
||||
* We also have to remove the log item from the AIL in this case,
|
||||
* as we wait for an emptry AIL as part of the unmount process.
|
||||
*/
|
||||
if (XFS_FORCED_SHUTDOWN(mp)) {
|
||||
struct xfs_log_item *lip = &dqp->q_logitem.qli_item;
|
||||
dqp->dq_flags &= ~XFS_DQ_DIRTY;
|
||||
xfs_dqfunlock(dqp);
|
||||
return XFS_ERROR(EIO);
|
||||
|
||||
spin_lock(&mp->m_ail->xa_lock);
|
||||
if (lip->li_flags & XFS_LI_IN_AIL)
|
||||
xfs_trans_ail_delete(mp->m_ail, lip,
|
||||
SHUTDOWN_CORRUPT_INCORE);
|
||||
else
|
||||
spin_unlock(&mp->m_ail->xa_lock);
|
||||
error = XFS_ERROR(EIO);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -917,11 +921,8 @@ xfs_qm_dqflush(
|
||||
*/
|
||||
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
|
||||
mp->m_quotainfo->qi_dqchunklen, 0, &bp);
|
||||
if (error) {
|
||||
ASSERT(error != ENOENT);
|
||||
xfs_dqfunlock(dqp);
|
||||
return error;
|
||||
}
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Calculate the location of the dquot inside the buffer.
|
||||
@ -967,20 +968,13 @@ xfs_qm_dqflush(
|
||||
xfs_log_force(mp, 0);
|
||||
}
|
||||
|
||||
if (flags & SYNC_WAIT)
|
||||
error = xfs_bwrite(bp);
|
||||
else
|
||||
xfs_buf_delwri_queue(bp);
|
||||
|
||||
xfs_buf_relse(bp);
|
||||
|
||||
trace_xfs_dqflush_done(dqp);
|
||||
*bpp = bp;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* dqp is still locked, but caller is free to unlock it now.
|
||||
*/
|
||||
return error;
|
||||
|
||||
out_unlock:
|
||||
xfs_dqfunlock(dqp);
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1011,39 +1005,6 @@ xfs_dqlock2(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Give the buffer a little push if it is incore and
|
||||
* wait on the flush lock.
|
||||
*/
|
||||
void
|
||||
xfs_dqflock_pushbuf_wait(
|
||||
xfs_dquot_t *dqp)
|
||||
{
|
||||
xfs_mount_t *mp = dqp->q_mount;
|
||||
xfs_buf_t *bp;
|
||||
|
||||
/*
|
||||
* Check to see if the dquot has been flushed delayed
|
||||
* write. If so, grab its buffer and send it
|
||||
* out immediately. We'll be able to acquire
|
||||
* the flush lock when the I/O completes.
|
||||
*/
|
||||
bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
|
||||
mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
|
||||
if (!bp)
|
||||
goto out_lock;
|
||||
|
||||
if (XFS_BUF_ISDELAYWRITE(bp)) {
|
||||
if (xfs_buf_ispinned(bp))
|
||||
xfs_log_force(mp, 0);
|
||||
xfs_buf_delwri_promote(bp);
|
||||
wake_up_process(bp->b_target->bt_task);
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
out_lock:
|
||||
xfs_dqflock(dqp);
|
||||
}
|
||||
|
||||
int __init
|
||||
xfs_qm_init(void)
|
||||
{
|
||||
|
@ -141,7 +141,7 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
|
||||
extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
|
||||
uint, struct xfs_dquot **);
|
||||
extern void xfs_qm_dqdestroy(xfs_dquot_t *);
|
||||
extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
|
||||
extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
|
||||
extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
|
||||
extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
|
||||
xfs_disk_dquot_t *);
|
||||
@ -152,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
|
||||
extern void xfs_qm_dqput(xfs_dquot_t *);
|
||||
|
||||
extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
|
||||
extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
|
||||
|
||||
static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
|
||||
{
|
||||
|
@ -17,9 +17,7 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -108,38 +106,6 @@ xfs_qm_dquot_logitem_unpin(
|
||||
wake_up(&dqp->q_pinwait);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given the logitem, this writes the corresponding dquot entry to disk
|
||||
* asynchronously. This is called with the dquot entry securely locked;
|
||||
* we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
|
||||
* at the end.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_qm_dquot_logitem_push(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
||||
int error;
|
||||
|
||||
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
||||
ASSERT(!completion_done(&dqp->q_flush));
|
||||
|
||||
/*
|
||||
* Since we were able to lock the dquot's flush lock and
|
||||
* we found it on the AIL, the dquot must be dirty. This
|
||||
* is because the dquot is removed from the AIL while still
|
||||
* holding the flush lock in xfs_dqflush_done(). Thus, if
|
||||
* we found it in the AIL and were able to obtain the flush
|
||||
* lock without sleeping, then there must not have been
|
||||
* anyone in the process of flushing the dquot.
|
||||
*/
|
||||
error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
|
||||
if (error)
|
||||
xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
|
||||
__func__, error, dqp);
|
||||
xfs_dqunlock(dqp);
|
||||
}
|
||||
|
||||
STATIC xfs_lsn_t
|
||||
xfs_qm_dquot_logitem_committed(
|
||||
struct xfs_log_item *lip,
|
||||
@ -171,67 +137,15 @@ xfs_qm_dqunpin_wait(
|
||||
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
|
||||
* the dquot is locked by us, but the flush lock isn't. So, here we are
|
||||
* going to see if the relevant dquot buffer is incore, waiting on DELWRI.
|
||||
* If so, we want to push it out to help us take this item off the AIL as soon
|
||||
* as possible.
|
||||
*
|
||||
* We must not be holding the AIL lock at this point. Calling incore() to
|
||||
* search the buffer cache can be a time consuming thing, and AIL lock is a
|
||||
* spinlock.
|
||||
*/
|
||||
STATIC bool
|
||||
xfs_qm_dquot_logitem_pushbuf(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
|
||||
struct xfs_dquot *dqp = qlip->qli_dquot;
|
||||
struct xfs_buf *bp;
|
||||
bool ret = true;
|
||||
|
||||
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
||||
|
||||
/*
|
||||
* If flushlock isn't locked anymore, chances are that the
|
||||
* inode flush completed and the inode was taken off the AIL.
|
||||
* So, just get out.
|
||||
*/
|
||||
if (completion_done(&dqp->q_flush) ||
|
||||
!(lip->li_flags & XFS_LI_IN_AIL)) {
|
||||
xfs_dqunlock(dqp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
|
||||
dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
|
||||
xfs_dqunlock(dqp);
|
||||
if (!bp)
|
||||
return true;
|
||||
if (XFS_BUF_ISDELAYWRITE(bp))
|
||||
xfs_buf_delwri_promote(bp);
|
||||
if (xfs_buf_ispinned(bp))
|
||||
ret = false;
|
||||
xfs_buf_relse(bp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called to attempt to lock the dquot associated with this
|
||||
* dquot log item. Don't sleep on the dquot lock or the flush lock.
|
||||
* If the flush lock is already held, indicating that the dquot has
|
||||
* been or is in the process of being flushed, then see if we can
|
||||
* find the dquot's buffer in the buffer cache without sleeping. If
|
||||
* we can and it is marked delayed write, then we want to send it out.
|
||||
* We delay doing so until the push routine, though, to avoid sleeping
|
||||
* in any device strategy routines.
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_qm_dquot_logitem_trylock(
|
||||
struct xfs_log_item *lip)
|
||||
xfs_qm_dquot_logitem_push(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
||||
struct xfs_buf *bp = NULL;
|
||||
uint rval = XFS_ITEM_SUCCESS;
|
||||
int error;
|
||||
|
||||
if (atomic_read(&dqp->q_pincount) > 0)
|
||||
return XFS_ITEM_PINNED;
|
||||
@ -239,16 +153,41 @@ xfs_qm_dquot_logitem_trylock(
|
||||
if (!xfs_dqlock_nowait(dqp))
|
||||
return XFS_ITEM_LOCKED;
|
||||
|
||||
if (!xfs_dqflock_nowait(dqp)) {
|
||||
/*
|
||||
* dquot has already been flushed to the backing buffer,
|
||||
* leave it locked, pushbuf routine will unlock it.
|
||||
*/
|
||||
return XFS_ITEM_PUSHBUF;
|
||||
/*
|
||||
* Re-check the pincount now that we stabilized the value by
|
||||
* taking the quota lock.
|
||||
*/
|
||||
if (atomic_read(&dqp->q_pincount) > 0) {
|
||||
rval = XFS_ITEM_PINNED;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ASSERT(lip->li_flags & XFS_LI_IN_AIL);
|
||||
return XFS_ITEM_SUCCESS;
|
||||
/*
|
||||
* Someone else is already flushing the dquot. Nothing we can do
|
||||
* here but wait for the flush to finish and remove the item from
|
||||
* the AIL.
|
||||
*/
|
||||
if (!xfs_dqflock_nowait(dqp)) {
|
||||
rval = XFS_ITEM_FLUSHING;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
spin_unlock(&lip->li_ailp->xa_lock);
|
||||
|
||||
error = xfs_qm_dqflush(dqp, &bp);
|
||||
if (error) {
|
||||
xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
|
||||
__func__, error, dqp);
|
||||
} else {
|
||||
if (!xfs_buf_delwri_queue(bp, buffer_list))
|
||||
rval = XFS_ITEM_FLUSHING;
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
|
||||
spin_lock(&lip->li_ailp->xa_lock);
|
||||
out_unlock:
|
||||
xfs_dqunlock(dqp);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -299,11 +238,9 @@ static const struct xfs_item_ops xfs_dquot_item_ops = {
|
||||
.iop_format = xfs_qm_dquot_logitem_format,
|
||||
.iop_pin = xfs_qm_dquot_logitem_pin,
|
||||
.iop_unpin = xfs_qm_dquot_logitem_unpin,
|
||||
.iop_trylock = xfs_qm_dquot_logitem_trylock,
|
||||
.iop_unlock = xfs_qm_dquot_logitem_unlock,
|
||||
.iop_committed = xfs_qm_dquot_logitem_committed,
|
||||
.iop_push = xfs_qm_dquot_logitem_push,
|
||||
.iop_pushbuf = xfs_qm_dquot_logitem_pushbuf,
|
||||
.iop_committing = xfs_qm_dquot_logitem_committing
|
||||
};
|
||||
|
||||
@ -398,11 +335,13 @@ xfs_qm_qoff_logitem_unpin(
|
||||
}
|
||||
|
||||
/*
|
||||
* Quotaoff items have no locking, so just return success.
|
||||
* There isn't much you can do to push a quotaoff item. It is simply
|
||||
* stuck waiting for the log to be flushed to disk.
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_qm_qoff_logitem_trylock(
|
||||
struct xfs_log_item *lip)
|
||||
xfs_qm_qoff_logitem_push(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
return XFS_ITEM_LOCKED;
|
||||
}
|
||||
@ -429,17 +368,6 @@ xfs_qm_qoff_logitem_committed(
|
||||
return lsn;
|
||||
}
|
||||
|
||||
/*
|
||||
* There isn't much you can do to push on an quotaoff item. It is simply
|
||||
* stuck waiting for the log to be flushed to disk.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_qm_qoff_logitem_push(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
STATIC xfs_lsn_t
|
||||
xfs_qm_qoffend_logitem_committed(
|
||||
struct xfs_log_item *lip,
|
||||
@ -454,7 +382,7 @@ xfs_qm_qoffend_logitem_committed(
|
||||
* xfs_trans_ail_delete() drops the AIL lock.
|
||||
*/
|
||||
spin_lock(&ailp->xa_lock);
|
||||
xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
|
||||
xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
|
||||
|
||||
kmem_free(qfs);
|
||||
kmem_free(qfe);
|
||||
@ -487,7 +415,6 @@ static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
|
||||
.iop_format = xfs_qm_qoff_logitem_format,
|
||||
.iop_pin = xfs_qm_qoff_logitem_pin,
|
||||
.iop_unpin = xfs_qm_qoff_logitem_unpin,
|
||||
.iop_trylock = xfs_qm_qoff_logitem_trylock,
|
||||
.iop_unlock = xfs_qm_qoff_logitem_unlock,
|
||||
.iop_committed = xfs_qm_qoffend_logitem_committed,
|
||||
.iop_push = xfs_qm_qoff_logitem_push,
|
||||
@ -502,7 +429,6 @@ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
|
||||
.iop_format = xfs_qm_qoff_logitem_format,
|
||||
.iop_pin = xfs_qm_qoff_logitem_pin,
|
||||
.iop_unpin = xfs_qm_qoff_logitem_unpin,
|
||||
.iop_trylock = xfs_qm_qoff_logitem_trylock,
|
||||
.iop_unlock = xfs_qm_qoff_logitem_unlock,
|
||||
.iop_committed = xfs_qm_qoff_logitem_committed,
|
||||
.iop_push = xfs_qm_qoff_logitem_push,
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -17,7 +17,6 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
|
603
fs/xfs/xfs_extent_busy.c
Normal file
603
fs/xfs/xfs_extent_busy.c
Normal file
@ -0,0 +1,603 @@
|
||||
/*
|
||||
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
|
||||
* Copyright (c) 2010 David Chinner.
|
||||
* Copyright (c) 2011 Christoph Hellwig.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
void
|
||||
xfs_extent_busy_insert(
|
||||
struct xfs_trans *tp,
|
||||
xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno,
|
||||
xfs_extlen_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct xfs_extent_busy *new;
|
||||
struct xfs_extent_busy *busyp;
|
||||
struct xfs_perag *pag;
|
||||
struct rb_node **rbp;
|
||||
struct rb_node *parent = NULL;
|
||||
|
||||
new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
|
||||
if (!new) {
|
||||
/*
|
||||
* No Memory! Since it is now not possible to track the free
|
||||
* block, make this a synchronous transaction to insure that
|
||||
* the block is not reused before this transaction commits.
|
||||
*/
|
||||
trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
|
||||
xfs_trans_set_sync(tp);
|
||||
return;
|
||||
}
|
||||
|
||||
new->agno = agno;
|
||||
new->bno = bno;
|
||||
new->length = len;
|
||||
INIT_LIST_HEAD(&new->list);
|
||||
new->flags = flags;
|
||||
|
||||
/* trace before insert to be able to see failed inserts */
|
||||
trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
|
||||
|
||||
pag = xfs_perag_get(tp->t_mountp, new->agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
rbp = &pag->pagb_tree.rb_node;
|
||||
while (*rbp) {
|
||||
parent = *rbp;
|
||||
busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
|
||||
|
||||
if (new->bno < busyp->bno) {
|
||||
rbp = &(*rbp)->rb_left;
|
||||
ASSERT(new->bno + new->length <= busyp->bno);
|
||||
} else if (new->bno > busyp->bno) {
|
||||
rbp = &(*rbp)->rb_right;
|
||||
ASSERT(bno >= busyp->bno + busyp->length);
|
||||
} else {
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&new->rb_node, parent, rbp);
|
||||
rb_insert_color(&new->rb_node, &pag->pagb_tree);
|
||||
|
||||
list_add(&new->list, &tp->t_busy);
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for a busy extent within the range of the extent we are about to
|
||||
* allocate. You need to be holding the busy extent tree lock when calling
|
||||
* xfs_extent_busy_search(). This function returns 0 for no overlapping busy
|
||||
* extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
|
||||
* match. This is done so that a non-zero return indicates an overlap that
|
||||
* will require a synchronous transaction, but it can still be
|
||||
* used to distinguish between a partial or exact match.
|
||||
*/
|
||||
int
|
||||
xfs_extent_busy_search(
|
||||
struct xfs_mount *mp,
|
||||
xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno,
|
||||
xfs_extlen_t len)
|
||||
{
|
||||
struct xfs_perag *pag;
|
||||
struct rb_node *rbp;
|
||||
struct xfs_extent_busy *busyp;
|
||||
int match = 0;
|
||||
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
|
||||
rbp = pag->pagb_tree.rb_node;
|
||||
|
||||
/* find closest start bno overlap */
|
||||
while (rbp) {
|
||||
busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
|
||||
if (bno < busyp->bno) {
|
||||
/* may overlap, but exact start block is lower */
|
||||
if (bno + len > busyp->bno)
|
||||
match = -1;
|
||||
rbp = rbp->rb_left;
|
||||
} else if (bno > busyp->bno) {
|
||||
/* may overlap, but exact start block is higher */
|
||||
if (bno < busyp->bno + busyp->length)
|
||||
match = -1;
|
||||
rbp = rbp->rb_right;
|
||||
} else {
|
||||
/* bno matches busyp, length determines exact match */
|
||||
match = (busyp->length == len) ? 1 : -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
return match;
|
||||
}
|
||||
|
||||
/*
|
||||
* The found free extent [fbno, fend] overlaps part or all of the given busy
|
||||
* extent. If the overlap covers the beginning, the end, or all of the busy
|
||||
* extent, the overlapping portion can be made unbusy and used for the
|
||||
* allocation. We can't split a busy extent because we can't modify a
|
||||
* transaction/CIL context busy list, but we can update an entries block
|
||||
* number or length.
|
||||
*
|
||||
* Returns true if the extent can safely be reused, or false if the search
|
||||
* needs to be restarted.
|
||||
*/
|
||||
STATIC bool
|
||||
xfs_extent_busy_update_extent(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_extent_busy *busyp,
|
||||
xfs_agblock_t fbno,
|
||||
xfs_extlen_t flen,
|
||||
bool userdata)
|
||||
{
|
||||
xfs_agblock_t fend = fbno + flen;
|
||||
xfs_agblock_t bbno = busyp->bno;
|
||||
xfs_agblock_t bend = bbno + busyp->length;
|
||||
|
||||
/*
|
||||
* This extent is currently being discarded. Give the thread
|
||||
* performing the discard a chance to mark the extent unbusy
|
||||
* and retry.
|
||||
*/
|
||||
if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
delay(1);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a busy extent overlapping a user allocation, we have
|
||||
* no choice but to force the log and retry the search.
|
||||
*
|
||||
* Fortunately this does not happen during normal operation, but
|
||||
* only if the filesystem is very low on space and has to dip into
|
||||
* the AGFL for normal allocations.
|
||||
*/
|
||||
if (userdata)
|
||||
goto out_force_log;
|
||||
|
||||
if (bbno < fbno && bend > fend) {
|
||||
/*
|
||||
* Case 1:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +---------+
|
||||
* fbno fend
|
||||
*/
|
||||
|
||||
/*
|
||||
* We would have to split the busy extent to be able to track
|
||||
* it correct, which we cannot do because we would have to
|
||||
* modify the list of busy extents attached to the transaction
|
||||
* or CIL context, which is immutable.
|
||||
*
|
||||
* Force out the log to clear the busy extent and retry the
|
||||
* search.
|
||||
*/
|
||||
goto out_force_log;
|
||||
} else if (bbno >= fbno && bend <= fend) {
|
||||
/*
|
||||
* Case 2:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 3:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 4:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 5:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* The busy extent is fully covered by the extent we are
|
||||
* allocating, and can simply be removed from the rbtree.
|
||||
* However we cannot remove it from the immutable list
|
||||
* tracking busy extents in the transaction or CIL context,
|
||||
* so set the length to zero to mark it invalid.
|
||||
*
|
||||
* We also need to restart the busy extent search from the
|
||||
* tree root, because erasing the node can rearrange the
|
||||
* tree topology.
|
||||
*/
|
||||
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
||||
busyp->length = 0;
|
||||
return false;
|
||||
} else if (fend < bend) {
|
||||
/*
|
||||
* Case 6:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +---------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 7:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +------------------+
|
||||
* fbno fend
|
||||
*
|
||||
*/
|
||||
busyp->bno = fend;
|
||||
} else if (bbno < fbno) {
|
||||
/*
|
||||
* Case 8:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 9:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +----------------------+
|
||||
* fbno fend
|
||||
*/
|
||||
busyp->length = fbno - busyp->bno;
|
||||
} else {
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
|
||||
return true;
|
||||
|
||||
out_force_log:
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_log_force(mp, XFS_LOG_SYNC);
|
||||
trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* For a given extent [fbno, flen], make sure we can reuse it safely.
|
||||
*/
|
||||
void
|
||||
xfs_extent_busy_reuse(
|
||||
struct xfs_mount *mp,
|
||||
xfs_agnumber_t agno,
|
||||
xfs_agblock_t fbno,
|
||||
xfs_extlen_t flen,
|
||||
bool userdata)
|
||||
{
|
||||
struct xfs_perag *pag;
|
||||
struct rb_node *rbp;
|
||||
|
||||
ASSERT(flen > 0);
|
||||
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
restart:
|
||||
rbp = pag->pagb_tree.rb_node;
|
||||
while (rbp) {
|
||||
struct xfs_extent_busy *busyp =
|
||||
rb_entry(rbp, struct xfs_extent_busy, rb_node);
|
||||
xfs_agblock_t bbno = busyp->bno;
|
||||
xfs_agblock_t bend = bbno + busyp->length;
|
||||
|
||||
if (fbno + flen <= bbno) {
|
||||
rbp = rbp->rb_left;
|
||||
continue;
|
||||
} else if (fbno >= bend) {
|
||||
rbp = rbp->rb_right;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
|
||||
userdata))
|
||||
goto restart;
|
||||
}
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given extent [fbno, flen], search the busy extent list to find a
|
||||
* subset of the extent that is not busy. If *rlen is smaller than
|
||||
* args->minlen no suitable extent could be found, and the higher level
|
||||
* code needs to force out the log and retry the allocation.
|
||||
*/
|
||||
void
|
||||
xfs_extent_busy_trim(
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_agblock_t bno,
|
||||
xfs_extlen_t len,
|
||||
xfs_agblock_t *rbno,
|
||||
xfs_extlen_t *rlen)
|
||||
{
|
||||
xfs_agblock_t fbno;
|
||||
xfs_extlen_t flen;
|
||||
struct rb_node *rbp;
|
||||
|
||||
ASSERT(len > 0);
|
||||
|
||||
spin_lock(&args->pag->pagb_lock);
|
||||
restart:
|
||||
fbno = bno;
|
||||
flen = len;
|
||||
rbp = args->pag->pagb_tree.rb_node;
|
||||
while (rbp && flen >= args->minlen) {
|
||||
struct xfs_extent_busy *busyp =
|
||||
rb_entry(rbp, struct xfs_extent_busy, rb_node);
|
||||
xfs_agblock_t fend = fbno + flen;
|
||||
xfs_agblock_t bbno = busyp->bno;
|
||||
xfs_agblock_t bend = bbno + busyp->length;
|
||||
|
||||
if (fend <= bbno) {
|
||||
rbp = rbp->rb_left;
|
||||
continue;
|
||||
} else if (fbno >= bend) {
|
||||
rbp = rbp->rb_right;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a metadata allocation, try to reuse the busy
|
||||
* extent instead of trimming the allocation.
|
||||
*/
|
||||
if (!args->userdata &&
|
||||
!(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
|
||||
if (!xfs_extent_busy_update_extent(args->mp, args->pag,
|
||||
busyp, fbno, flen,
|
||||
false))
|
||||
goto restart;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bbno <= fbno) {
|
||||
/* start overlap */
|
||||
|
||||
/*
|
||||
* Case 1:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +---------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 2:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 3:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 4:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------+
|
||||
* fbno fend
|
||||
*
|
||||
* No unbusy region in extent, return failure.
|
||||
*/
|
||||
if (fend <= bend)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Case 5:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +----------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 6:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Needs to be trimmed to:
|
||||
* +-------+
|
||||
* fbno fend
|
||||
*/
|
||||
fbno = bend;
|
||||
} else if (bend >= fend) {
|
||||
/* end overlap */
|
||||
|
||||
/*
|
||||
* Case 7:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Case 8:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +--------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Needs to be trimmed to:
|
||||
* +-------+
|
||||
* fbno fend
|
||||
*/
|
||||
fend = bbno;
|
||||
} else {
|
||||
/* middle overlap */
|
||||
|
||||
/*
|
||||
* Case 9:
|
||||
* bbno bend
|
||||
* +BBBBBBBBBBBBBBBBB+
|
||||
* +-----------------------------------+
|
||||
* fbno fend
|
||||
*
|
||||
* Can be trimmed to:
|
||||
* +-------+ OR +-------+
|
||||
* fbno fend fbno fend
|
||||
*
|
||||
* Backward allocation leads to significant
|
||||
* fragmentation of directories, which degrades
|
||||
* directory performance, therefore we always want to
|
||||
* choose the option that produces forward allocation
|
||||
* patterns.
|
||||
* Preferring the lower bno extent will make the next
|
||||
* request use "fend" as the start of the next
|
||||
* allocation; if the segment is no longer busy at
|
||||
* that point, we'll get a contiguous allocation, but
|
||||
* even if it is still busy, we will get a forward
|
||||
* allocation.
|
||||
* We try to avoid choosing the segment at "bend",
|
||||
* because that can lead to the next allocation
|
||||
* taking the segment at "fbno", which would be a
|
||||
* backward allocation. We only use the segment at
|
||||
* "fbno" if it is much larger than the current
|
||||
* requested size, because in that case there's a
|
||||
* good chance subsequent allocations will be
|
||||
* contiguous.
|
||||
*/
|
||||
if (bbno - fbno >= args->maxlen) {
|
||||
/* left candidate fits perfect */
|
||||
fend = bbno;
|
||||
} else if (fend - bend >= args->maxlen * 4) {
|
||||
/* right candidate has enough free space */
|
||||
fbno = bend;
|
||||
} else if (bbno - fbno >= args->minlen) {
|
||||
/* left candidate fits minimum requirement */
|
||||
fend = bbno;
|
||||
} else {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
flen = fend - fbno;
|
||||
}
|
||||
spin_unlock(&args->pag->pagb_lock);
|
||||
|
||||
if (fbno != bno || flen != len) {
|
||||
trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
|
||||
fbno, flen);
|
||||
}
|
||||
*rbno = fbno;
|
||||
*rlen = flen;
|
||||
return;
|
||||
fail:
|
||||
/*
|
||||
* Return a zero extent length as failure indications. All callers
|
||||
* re-check if the trimmed extent satisfies the minlen requirement.
|
||||
*/
|
||||
spin_unlock(&args->pag->pagb_lock);
|
||||
trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
|
||||
*rbno = fbno;
|
||||
*rlen = 0;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_extent_busy_clear_one(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_extent_busy *busyp)
|
||||
{
|
||||
if (busyp->length) {
|
||||
trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
|
||||
busyp->length);
|
||||
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
||||
}
|
||||
|
||||
list_del_init(&busyp->list);
|
||||
kmem_free(busyp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all extents on the passed in list from the busy extents tree.
|
||||
* If do_discard is set skip extents that need to be discarded, and mark
|
||||
* these as undergoing a discard operation instead.
|
||||
*/
|
||||
void
|
||||
xfs_extent_busy_clear(
|
||||
struct xfs_mount *mp,
|
||||
struct list_head *list,
|
||||
bool do_discard)
|
||||
{
|
||||
struct xfs_extent_busy *busyp, *n;
|
||||
struct xfs_perag *pag = NULL;
|
||||
xfs_agnumber_t agno = NULLAGNUMBER;
|
||||
|
||||
list_for_each_entry_safe(busyp, n, list, list) {
|
||||
if (busyp->agno != agno) {
|
||||
if (pag) {
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
pag = xfs_perag_get(mp, busyp->agno);
|
||||
spin_lock(&pag->pagb_lock);
|
||||
agno = busyp->agno;
|
||||
}
|
||||
|
||||
if (do_discard && busyp->length &&
|
||||
!(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
|
||||
busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
|
||||
else
|
||||
xfs_extent_busy_clear_one(mp, pag, busyp);
|
||||
}
|
||||
|
||||
if (pag) {
|
||||
spin_unlock(&pag->pagb_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback for list_sort to sort busy extents by the AG they reside in.
|
||||
*/
|
||||
int
|
||||
xfs_extent_busy_ag_cmp(
|
||||
void *priv,
|
||||
struct list_head *a,
|
||||
struct list_head *b)
|
||||
{
|
||||
return container_of(a, struct xfs_extent_busy, list)->agno -
|
||||
container_of(b, struct xfs_extent_busy, list)->agno;
|
||||
}
|
69
fs/xfs/xfs_extent_busy.h
Normal file
69
fs/xfs/xfs_extent_busy.h
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
|
||||
* Copyright (c) 2010 David Chinner.
|
||||
* Copyright (c) 2011 Christoph Hellwig.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#ifndef __XFS_EXTENT_BUSY_H__
|
||||
#define __XFS_EXTENT_BUSY_H__
|
||||
|
||||
/*
|
||||
* Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that
|
||||
* have been freed but whose transactions aren't committed to disk yet.
|
||||
*
|
||||
* Note that we use the transaction ID to record the transaction, not the
|
||||
* transaction structure itself. See xfs_extent_busy_insert() for details.
|
||||
*/
|
||||
struct xfs_extent_busy {
|
||||
struct rb_node rb_node; /* ag by-bno indexed search tree */
|
||||
struct list_head list; /* transaction busy extent list */
|
||||
xfs_agnumber_t agno;
|
||||
xfs_agblock_t bno;
|
||||
xfs_extlen_t length;
|
||||
unsigned int flags;
|
||||
#define XFS_EXTENT_BUSY_DISCARDED 0x01 /* undergoing a discard op. */
|
||||
#define XFS_EXTENT_BUSY_SKIP_DISCARD 0x02 /* do not discard */
|
||||
};
|
||||
|
||||
void
|
||||
xfs_extent_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
|
||||
|
||||
void
|
||||
xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list,
|
||||
bool do_discard);
|
||||
|
||||
int
|
||||
xfs_extent_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t bno, xfs_extlen_t len);
|
||||
|
||||
void
|
||||
xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
|
||||
|
||||
void
|
||||
xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno,
|
||||
xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen);
|
||||
|
||||
int
|
||||
xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
|
||||
|
||||
static inline void xfs_extent_busy_sort(struct list_head *list)
|
||||
{
|
||||
list_sort(NULL, list, xfs_extent_busy_ag_cmp);
|
||||
}
|
||||
|
||||
#endif /* __XFS_EXTENT_BUSY_H__ */
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_sb.h"
|
||||
@ -64,7 +63,8 @@ __xfs_efi_release(
|
||||
if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
|
||||
spin_lock(&ailp->xa_lock);
|
||||
/* xfs_trans_ail_delete() drops the AIL lock. */
|
||||
xfs_trans_ail_delete(ailp, &efip->efi_item);
|
||||
xfs_trans_ail_delete(ailp, &efip->efi_item,
|
||||
SHUTDOWN_LOG_IO_ERROR);
|
||||
xfs_efi_item_free(efip);
|
||||
}
|
||||
}
|
||||
@ -147,22 +147,20 @@ xfs_efi_item_unpin(
|
||||
}
|
||||
|
||||
/*
|
||||
* Efi items have no locking or pushing. However, since EFIs are
|
||||
* pulled from the AIL when their corresponding EFDs are committed
|
||||
* to disk, their situation is very similar to being pinned. Return
|
||||
* XFS_ITEM_PINNED so that the caller will eventually flush the log.
|
||||
* This should help in getting the EFI out of the AIL.
|
||||
* Efi items have no locking or pushing. However, since EFIs are pulled from
|
||||
* the AIL when their corresponding EFDs are committed to disk, their situation
|
||||
* is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
|
||||
* will eventually flush the log. This should help in getting the EFI out of
|
||||
* the AIL.
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_efi_item_trylock(
|
||||
struct xfs_log_item *lip)
|
||||
xfs_efi_item_push(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
return XFS_ITEM_PINNED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Efi items have no locking, so just return.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_efi_item_unlock(
|
||||
struct xfs_log_item *lip)
|
||||
@ -189,17 +187,6 @@ xfs_efi_item_committed(
|
||||
return lsn;
|
||||
}
|
||||
|
||||
/*
|
||||
* There isn't much you can do to push on an efi item. It is simply
|
||||
* stuck waiting for all of its corresponding efd items to be
|
||||
* committed to disk.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_efi_item_push(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* The EFI dependency tracking op doesn't do squat. It can't because
|
||||
* it doesn't know where the free extent is coming from. The dependency
|
||||
@ -222,7 +209,6 @@ static const struct xfs_item_ops xfs_efi_item_ops = {
|
||||
.iop_format = xfs_efi_item_format,
|
||||
.iop_pin = xfs_efi_item_pin,
|
||||
.iop_unpin = xfs_efi_item_unpin,
|
||||
.iop_trylock = xfs_efi_item_trylock,
|
||||
.iop_unlock = xfs_efi_item_unlock,
|
||||
.iop_committed = xfs_efi_item_committed,
|
||||
.iop_push = xfs_efi_item_push,
|
||||
@ -404,19 +390,17 @@ xfs_efd_item_unpin(
|
||||
}
|
||||
|
||||
/*
|
||||
* Efd items have no locking, so just return success.
|
||||
* There isn't much you can do to push on an efd item. It is simply stuck
|
||||
* waiting for the log to be flushed to disk.
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_efd_item_trylock(
|
||||
struct xfs_log_item *lip)
|
||||
xfs_efd_item_push(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
return XFS_ITEM_LOCKED;
|
||||
return XFS_ITEM_PINNED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Efd items have no locking or pushing, so return failure
|
||||
* so that the caller doesn't bother with us.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_efd_item_unlock(
|
||||
struct xfs_log_item *lip)
|
||||
@ -450,16 +434,6 @@ xfs_efd_item_committed(
|
||||
return (xfs_lsn_t)-1;
|
||||
}
|
||||
|
||||
/*
|
||||
* There isn't much you can do to push on an efd item. It is simply
|
||||
* stuck waiting for the log to be flushed to disk.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_efd_item_push(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* The EFD dependency tracking op doesn't do squat. It can't because
|
||||
* it doesn't know where the free extent is coming from. The dependency
|
||||
@ -482,7 +456,6 @@ static const struct xfs_item_ops xfs_efd_item_ops = {
|
||||
.iop_format = xfs_efd_item_format,
|
||||
.iop_pin = xfs_efd_item_pin,
|
||||
.iop_unpin = xfs_efd_item_unpin,
|
||||
.iop_trylock = xfs_efd_item_trylock,
|
||||
.iop_unlock = xfs_efd_item_unlock,
|
||||
.iop_committed = xfs_efd_item_committed,
|
||||
.iop_push = xfs_efd_item_push,
|
||||
|
@ -17,9 +17,7 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_trans.h"
|
||||
@ -396,114 +394,96 @@ xfs_file_splice_write(
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called to handle zeroing any space in the last
|
||||
* block of the file that is beyond the EOF. We do this since the
|
||||
* size is being increased without writing anything to that block
|
||||
* and we don't want anyone to read the garbage on the disk.
|
||||
* This routine is called to handle zeroing any space in the last block of the
|
||||
* file that is beyond the EOF. We do this since the size is being increased
|
||||
* without writing anything to that block and we don't want to read the
|
||||
* garbage on the disk.
|
||||
*/
|
||||
STATIC int /* error (positive) */
|
||||
xfs_zero_last_block(
|
||||
xfs_inode_t *ip,
|
||||
xfs_fsize_t offset,
|
||||
xfs_fsize_t isize)
|
||||
struct xfs_inode *ip,
|
||||
xfs_fsize_t offset,
|
||||
xfs_fsize_t isize)
|
||||
{
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
int nimaps;
|
||||
int zero_offset;
|
||||
int zero_len;
|
||||
int error = 0;
|
||||
xfs_bmbt_irec_t imap;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
|
||||
int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
|
||||
int zero_len;
|
||||
int nimaps = 1;
|
||||
int error = 0;
|
||||
struct xfs_bmbt_irec imap;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
|
||||
zero_offset = XFS_B_FSB_OFFSET(mp, isize);
|
||||
if (zero_offset == 0) {
|
||||
/*
|
||||
* There are no extra bytes in the last block on disk to
|
||||
* zero, so return.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
last_fsb = XFS_B_TO_FSBT(mp, isize);
|
||||
nimaps = 1;
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ASSERT(nimaps > 0);
|
||||
|
||||
/*
|
||||
* If the block underlying isize is just a hole, then there
|
||||
* is nothing to zero.
|
||||
*/
|
||||
if (imap.br_startblock == HOLESTARTBLOCK) {
|
||||
if (imap.br_startblock == HOLESTARTBLOCK)
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Zero the part of the last block beyond the EOF, and write it
|
||||
* out sync. We need to drop the ilock while we do this so we
|
||||
* don't deadlock when the buffer cache calls back to us.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
zero_len = mp->m_sb.sb_blocksize - zero_offset;
|
||||
if (isize + zero_len > offset)
|
||||
zero_len = offset - isize;
|
||||
error = xfs_iozero(ip, isize, zero_len);
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ASSERT(error >= 0);
|
||||
return error;
|
||||
return xfs_iozero(ip, isize, zero_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero any on disk space between the current EOF and the new,
|
||||
* larger EOF. This handles the normal case of zeroing the remainder
|
||||
* of the last block in the file and the unusual case of zeroing blocks
|
||||
* out beyond the size of the file. This second case only happens
|
||||
* with fixed size extents and when the system crashes before the inode
|
||||
* size was updated but after blocks were allocated. If fill is set,
|
||||
* then any holes in the range are filled and zeroed. If not, the holes
|
||||
* are left alone as holes.
|
||||
* Zero any on disk space between the current EOF and the new, larger EOF.
|
||||
*
|
||||
* This handles the normal case of zeroing the remainder of the last block in
|
||||
* the file and the unusual case of zeroing blocks out beyond the size of the
|
||||
* file. This second case only happens with fixed size extents and when the
|
||||
* system crashes before the inode size was updated but after blocks were
|
||||
* allocated.
|
||||
*
|
||||
* Expects the iolock to be held exclusive, and will take the ilock internally.
|
||||
*/
|
||||
|
||||
int /* error (positive) */
|
||||
xfs_zero_eof(
|
||||
xfs_inode_t *ip,
|
||||
xfs_off_t offset, /* starting I/O offset */
|
||||
xfs_fsize_t isize) /* current inode size */
|
||||
struct xfs_inode *ip,
|
||||
xfs_off_t offset, /* starting I/O offset */
|
||||
xfs_fsize_t isize) /* current inode size */
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
xfs_fileoff_t start_zero_fsb;
|
||||
xfs_fileoff_t end_zero_fsb;
|
||||
xfs_fileoff_t zero_count_fsb;
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_fileoff_t zero_off;
|
||||
xfs_fsize_t zero_len;
|
||||
int nimaps;
|
||||
int error = 0;
|
||||
xfs_bmbt_irec_t imap;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t start_zero_fsb;
|
||||
xfs_fileoff_t end_zero_fsb;
|
||||
xfs_fileoff_t zero_count_fsb;
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_fileoff_t zero_off;
|
||||
xfs_fsize_t zero_len;
|
||||
int nimaps;
|
||||
int error = 0;
|
||||
struct xfs_bmbt_irec imap;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
|
||||
ASSERT(offset > isize);
|
||||
|
||||
/*
|
||||
* First handle zeroing the block on which isize resides.
|
||||
*
|
||||
* We only zero a part of that block so it is handled specially.
|
||||
*/
|
||||
error = xfs_zero_last_block(ip, offset, isize);
|
||||
if (error) {
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
return error;
|
||||
if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
|
||||
error = xfs_zero_last_block(ip, offset, isize);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the range between the new size and the old
|
||||
* where blocks needing to be zeroed may exist. To get the
|
||||
* block where the last byte in the file currently resides,
|
||||
* we need to subtract one from the size and truncate back
|
||||
* to a block boundary. We subtract 1 in case the size is
|
||||
* exactly on a block boundary.
|
||||
* Calculate the range between the new size and the old where blocks
|
||||
* needing to be zeroed may exist.
|
||||
*
|
||||
* To get the block where the last byte in the file currently resides,
|
||||
* we need to subtract one from the size and truncate back to a block
|
||||
* boundary. We subtract 1 in case the size is exactly on a block
|
||||
* boundary.
|
||||
*/
|
||||
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
|
||||
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
|
||||
@ -521,23 +501,18 @@ xfs_zero_eof(
|
||||
while (start_zero_fsb <= end_zero_fsb) {
|
||||
nimaps = 1;
|
||||
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
|
||||
&imap, &nimaps, 0);
|
||||
if (error) {
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
ASSERT(nimaps > 0);
|
||||
|
||||
if (imap.br_state == XFS_EXT_UNWRITTEN ||
|
||||
imap.br_startblock == HOLESTARTBLOCK) {
|
||||
/*
|
||||
* This loop handles initializing pages that were
|
||||
* partially initialized by the code below this
|
||||
* loop. It basically zeroes the part of the page
|
||||
* that sits on a hole and sets the page as P_HOLE
|
||||
* and calls remapf if it is a mapped file.
|
||||
*/
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
continue;
|
||||
@ -545,11 +520,7 @@ xfs_zero_eof(
|
||||
|
||||
/*
|
||||
* There are blocks we need to zero.
|
||||
* Drop the inode lock while we're doing the I/O.
|
||||
* We'll still have the iolock to protect us.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
|
||||
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
|
||||
|
||||
@ -557,22 +528,14 @@ xfs_zero_eof(
|
||||
zero_len = offset - zero_off;
|
||||
|
||||
error = xfs_iozero(ip, zero_off, zero_len);
|
||||
if (error) {
|
||||
goto out_lock;
|
||||
}
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_lock:
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ASSERT(error >= 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -593,35 +556,29 @@ xfs_file_aio_write_checks(
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
int error = 0;
|
||||
|
||||
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
|
||||
restart:
|
||||
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
|
||||
if (error) {
|
||||
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the offset is beyond the size of the file, we need to zero any
|
||||
* blocks that fall between the existing EOF and the start of this
|
||||
* write. If zeroing is needed and we are currently holding the
|
||||
* iolock shared, we need to update it to exclusive which involves
|
||||
* dropping all locks and relocking to maintain correct locking order.
|
||||
* If we do this, restart the function to ensure all checks and values
|
||||
* are still valid.
|
||||
* iolock shared, we need to update it to exclusive which implies
|
||||
* having to redo all checks before.
|
||||
*/
|
||||
if (*pos > i_size_read(inode)) {
|
||||
if (*iolock == XFS_IOLOCK_SHARED) {
|
||||
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
|
||||
xfs_rw_iunlock(ip, *iolock);
|
||||
*iolock = XFS_IOLOCK_EXCL;
|
||||
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
|
||||
xfs_rw_ilock(ip, *iolock);
|
||||
goto restart;
|
||||
}
|
||||
error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Updating the timestamps will grab the ilock again from
|
||||
@ -638,7 +595,6 @@ restart:
|
||||
* people from modifying setuid and setgid binaries.
|
||||
*/
|
||||
return file_remove_suid(file);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1007,8 +963,149 @@ xfs_vm_page_mkwrite(
|
||||
return block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
||||
}
|
||||
|
||||
STATIC loff_t
|
||||
xfs_seek_data(
|
||||
struct file *file,
|
||||
loff_t start,
|
||||
u32 type)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_bmbt_irec map[2];
|
||||
int nmap = 2;
|
||||
loff_t uninitialized_var(offset);
|
||||
xfs_fsize_t isize;
|
||||
xfs_fileoff_t fsbno;
|
||||
xfs_filblks_t end;
|
||||
uint lock;
|
||||
int error;
|
||||
|
||||
lock = xfs_ilock_map_shared(ip);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (start >= isize) {
|
||||
error = ENXIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
fsbno = XFS_B_TO_FSBT(mp, start);
|
||||
|
||||
/*
|
||||
* Try to read extents from the first block indicated
|
||||
* by fsbno to the end block of the file.
|
||||
*/
|
||||
end = XFS_B_TO_FSB(mp, isize);
|
||||
|
||||
error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
|
||||
XFS_BMAPI_ENTIRE);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Treat unwritten extent as data extent since it might
|
||||
* contains dirty data in page cache.
|
||||
*/
|
||||
if (map[0].br_startblock != HOLESTARTBLOCK) {
|
||||
offset = max_t(loff_t, start,
|
||||
XFS_FSB_TO_B(mp, map[0].br_startoff));
|
||||
} else {
|
||||
if (nmap == 1) {
|
||||
error = ENXIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
offset = max_t(loff_t, start,
|
||||
XFS_FSB_TO_B(mp, map[1].br_startoff));
|
||||
}
|
||||
|
||||
if (offset != file->f_pos)
|
||||
file->f_pos = offset;
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock_map_shared(ip, lock);
|
||||
|
||||
if (error)
|
||||
return -error;
|
||||
return offset;
|
||||
}
|
||||
|
||||
STATIC loff_t
|
||||
xfs_seek_hole(
|
||||
struct file *file,
|
||||
loff_t start,
|
||||
u32 type)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
loff_t uninitialized_var(offset);
|
||||
loff_t holeoff;
|
||||
xfs_fsize_t isize;
|
||||
xfs_fileoff_t fsbno;
|
||||
uint lock;
|
||||
int error;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -XFS_ERROR(EIO);
|
||||
|
||||
lock = xfs_ilock_map_shared(ip);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (start >= isize) {
|
||||
error = ENXIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
fsbno = XFS_B_TO_FSBT(mp, start);
|
||||
error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
holeoff = XFS_FSB_TO_B(mp, fsbno);
|
||||
if (holeoff <= start)
|
||||
offset = start;
|
||||
else {
|
||||
/*
|
||||
* xfs_bmap_first_unused() could return a value bigger than
|
||||
* isize if there are no more holes past the supplied offset.
|
||||
*/
|
||||
offset = min_t(loff_t, holeoff, isize);
|
||||
}
|
||||
|
||||
if (offset != file->f_pos)
|
||||
file->f_pos = offset;
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock_map_shared(ip, lock);
|
||||
|
||||
if (error)
|
||||
return -error;
|
||||
return offset;
|
||||
}
|
||||
|
||||
STATIC loff_t
|
||||
xfs_file_llseek(
|
||||
struct file *file,
|
||||
loff_t offset,
|
||||
int origin)
|
||||
{
|
||||
switch (origin) {
|
||||
case SEEK_END:
|
||||
case SEEK_CUR:
|
||||
case SEEK_SET:
|
||||
return generic_file_llseek(file, offset, origin);
|
||||
case SEEK_DATA:
|
||||
return xfs_seek_data(file, offset, origin);
|
||||
case SEEK_HOLE:
|
||||
return xfs_seek_hole(file, offset, origin);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
const struct file_operations xfs_file_operations = {
|
||||
.llseek = generic_file_llseek,
|
||||
.llseek = xfs_file_llseek,
|
||||
.read = do_sync_read,
|
||||
.write = do_sync_write,
|
||||
.aio_read = xfs_file_aio_read,
|
||||
|
@ -18,8 +18,6 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
@ -39,7 +37,6 @@
|
||||
#include "xfs_itable.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_filestream.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
@ -147,9 +144,9 @@ xfs_growfs_data_private(
|
||||
if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
|
||||
return error;
|
||||
dpct = pct - mp->m_sb.sb_imax_pct;
|
||||
bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
|
||||
bp = xfs_buf_read_uncached(mp->m_ddev_targp,
|
||||
XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
|
||||
BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
|
||||
XFS_FSS_TO_BB(mp, 1), 0);
|
||||
if (!bp)
|
||||
return EIO;
|
||||
xfs_buf_relse(bp);
|
||||
@ -193,7 +190,7 @@ xfs_growfs_data_private(
|
||||
*/
|
||||
bp = xfs_buf_get(mp->m_ddev_targp,
|
||||
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
|
||||
XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
|
||||
XFS_FSS_TO_BB(mp, 1), 0);
|
||||
if (!bp) {
|
||||
error = ENOMEM;
|
||||
goto error0;
|
||||
@ -230,7 +227,7 @@ xfs_growfs_data_private(
|
||||
*/
|
||||
bp = xfs_buf_get(mp->m_ddev_targp,
|
||||
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
|
||||
XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
|
||||
XFS_FSS_TO_BB(mp, 1), 0);
|
||||
if (!bp) {
|
||||
error = ENOMEM;
|
||||
goto error0;
|
||||
@ -259,8 +256,7 @@ xfs_growfs_data_private(
|
||||
*/
|
||||
bp = xfs_buf_get(mp->m_ddev_targp,
|
||||
XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
|
||||
BTOBB(mp->m_sb.sb_blocksize),
|
||||
XBF_LOCK | XBF_MAPPED);
|
||||
BTOBB(mp->m_sb.sb_blocksize), 0);
|
||||
if (!bp) {
|
||||
error = ENOMEM;
|
||||
goto error0;
|
||||
@ -286,8 +282,7 @@ xfs_growfs_data_private(
|
||||
*/
|
||||
bp = xfs_buf_get(mp->m_ddev_targp,
|
||||
XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
|
||||
BTOBB(mp->m_sb.sb_blocksize),
|
||||
XBF_LOCK | XBF_MAPPED);
|
||||
BTOBB(mp->m_sb.sb_blocksize), 0);
|
||||
if (!bp) {
|
||||
error = ENOMEM;
|
||||
goto error0;
|
||||
@ -314,8 +309,7 @@ xfs_growfs_data_private(
|
||||
*/
|
||||
bp = xfs_buf_get(mp->m_ddev_targp,
|
||||
XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
|
||||
BTOBB(mp->m_sb.sb_blocksize),
|
||||
XBF_LOCK | XBF_MAPPED);
|
||||
BTOBB(mp->m_sb.sb_blocksize), 0);
|
||||
if (!bp) {
|
||||
error = ENOMEM;
|
||||
goto error0;
|
||||
@ -405,7 +399,7 @@ xfs_growfs_data_private(
|
||||
|
||||
/* update secondary superblocks. */
|
||||
for (agno = 1; agno < nagcount; agno++) {
|
||||
error = xfs_read_buf(mp, mp->m_ddev_targp,
|
||||
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
|
||||
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
|
||||
XFS_FSS_TO_BB(mp, 1), 0, &bp);
|
||||
if (error) {
|
||||
@ -693,3 +687,63 @@ xfs_fs_goingdown(
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Force a shutdown of the filesystem instantly while keeping the filesystem
|
||||
* consistent. We don't do an unmount here; just shutdown the shop, make sure
|
||||
* that absolutely nothing persistent happens to this filesystem after this
|
||||
* point.
|
||||
*/
|
||||
void
|
||||
xfs_do_force_shutdown(
|
||||
xfs_mount_t *mp,
|
||||
int flags,
|
||||
char *fname,
|
||||
int lnnum)
|
||||
{
|
||||
int logerror;
|
||||
|
||||
logerror = flags & SHUTDOWN_LOG_IO_ERROR;
|
||||
|
||||
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
|
||||
xfs_notice(mp,
|
||||
"%s(0x%x) called from line %d of file %s. Return address = 0x%p",
|
||||
__func__, flags, lnnum, fname, __return_address);
|
||||
}
|
||||
/*
|
||||
* No need to duplicate efforts.
|
||||
*/
|
||||
if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
|
||||
return;
|
||||
|
||||
/*
|
||||
* This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
|
||||
* queue up anybody new on the log reservations, and wakes up
|
||||
* everybody who's sleeping on log reservations to tell them
|
||||
* the bad news.
|
||||
*/
|
||||
if (xfs_log_force_umount(mp, logerror))
|
||||
return;
|
||||
|
||||
if (flags & SHUTDOWN_CORRUPT_INCORE) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
|
||||
"Corruption of in-memory data detected. Shutting down filesystem");
|
||||
if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
|
||||
xfs_stack_trace();
|
||||
} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
|
||||
if (logerror) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
|
||||
"Log I/O Error Detected. Shutting down filesystem");
|
||||
} else if (flags & SHUTDOWN_DEVICE_REQ) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
|
||||
"All device paths lost. Shutting down filesystem");
|
||||
} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
|
||||
"I/O Error Detected. Shutting down filesystem");
|
||||
}
|
||||
}
|
||||
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
|
||||
xfs_alert(mp,
|
||||
"Please umount the filesystem and rectify the problem(s)");
|
||||
}
|
||||
}
|
||||
|
@ -200,8 +200,7 @@ xfs_ialloc_inode_init(
|
||||
*/
|
||||
d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
|
||||
fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
|
||||
mp->m_bsize * blks_per_cluster,
|
||||
XBF_LOCK);
|
||||
mp->m_bsize * blks_per_cluster, 0);
|
||||
if (!fbuf)
|
||||
return ENOMEM;
|
||||
/*
|
||||
@ -610,6 +609,13 @@ xfs_ialloc_get_rec(
|
||||
/*
|
||||
* Visible inode allocation functions.
|
||||
*/
|
||||
/*
|
||||
* Find a free (set) bit in the inode bitmask.
|
||||
*/
|
||||
static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
|
||||
{
|
||||
return xfs_lowbit64(*fp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an inode on disk.
|
||||
|
@ -46,15 +46,6 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
|
||||
(xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a free (set) bit in the inode bitmask.
|
||||
*/
|
||||
static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
|
||||
{
|
||||
return xfs_lowbit64(*fp);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Allocate an inode on disk.
|
||||
* Mode is used to tell whether the new inode will need space, and whether
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_acl.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
@ -123,23 +122,7 @@ xfs_inode_free(
|
||||
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
|
||||
|
||||
if (ip->i_itemp) {
|
||||
/*
|
||||
* Only if we are shutting down the fs will we see an
|
||||
* inode still in the AIL. If it is there, we should remove
|
||||
* it to prevent a use-after-free from occurring.
|
||||
*/
|
||||
xfs_log_item_t *lip = &ip->i_itemp->ili_item;
|
||||
struct xfs_ail *ailp = lip->li_ailp;
|
||||
|
||||
ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
|
||||
XFS_FORCED_SHUTDOWN(ip->i_mount));
|
||||
if (lip->li_flags & XFS_LI_IN_AIL) {
|
||||
spin_lock(&ailp->xa_lock);
|
||||
if (lip->li_flags & XFS_LI_IN_AIL)
|
||||
xfs_trans_ail_delete(ailp, lip);
|
||||
else
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
|
||||
xfs_inode_item_destroy(ip);
|
||||
ip->i_itemp = NULL;
|
||||
}
|
||||
@ -334,9 +317,10 @@ xfs_iget_cache_miss(
|
||||
/*
|
||||
* Preload the radix tree so we can insert safely under the
|
||||
* write spinlock. Note that we cannot sleep inside the preload
|
||||
* region.
|
||||
* region. Since we can be called from transaction context, don't
|
||||
* recurse into the file system.
|
||||
*/
|
||||
if (radix_tree_preload(GFP_KERNEL)) {
|
||||
if (radix_tree_preload(GFP_NOFS)) {
|
||||
error = EAGAIN;
|
||||
goto out_destroy;
|
||||
}
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
@ -61,6 +60,20 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
|
||||
STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
|
||||
STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
|
||||
|
||||
/*
|
||||
* helper function to extract extent size hint from inode
|
||||
*/
|
||||
xfs_extlen_t
|
||||
xfs_get_extsz_hint(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
|
||||
return ip->i_d.di_extsize;
|
||||
if (XFS_IS_REALTIME_INODE(ip))
|
||||
return ip->i_mount->m_sb.sb_rextsize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* Make sure that the extents in the given memory buffer
|
||||
@ -137,6 +150,7 @@ xfs_imap_to_bp(
|
||||
int ni;
|
||||
xfs_buf_t *bp;
|
||||
|
||||
buf_flags |= XBF_UNMAPPED;
|
||||
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
|
||||
(int)imap->im_len, buf_flags, &bp);
|
||||
if (error) {
|
||||
@ -226,7 +240,7 @@ xfs_inotobp(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
|
||||
error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -782,8 +796,7 @@ xfs_iread(
|
||||
/*
|
||||
* Get pointers to the on-disk inode and the buffer containing it.
|
||||
*/
|
||||
error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
|
||||
XBF_LOCK, iget_flags);
|
||||
error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags);
|
||||
if (error)
|
||||
return error;
|
||||
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
|
||||
@ -1342,7 +1355,7 @@ xfs_iunlink(
|
||||
* Here we put the head pointer into our next pointer,
|
||||
* and then we fall through to point the head at us.
|
||||
*/
|
||||
error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
|
||||
error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -1423,7 +1436,7 @@ xfs_iunlink_remove(
|
||||
* of dealing with the buffer when there is no need to
|
||||
* change it.
|
||||
*/
|
||||
error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
|
||||
error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
|
||||
if (error) {
|
||||
xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
|
||||
__func__, error);
|
||||
@ -1484,7 +1497,7 @@ xfs_iunlink_remove(
|
||||
* Now last_ibp points to the buffer previous to us on
|
||||
* the unlinked list. Pull us from the list.
|
||||
*/
|
||||
error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
|
||||
error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
|
||||
if (error) {
|
||||
xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
|
||||
__func__, error);
|
||||
@ -1566,8 +1579,7 @@ xfs_ifree_cluster(
|
||||
* to mark all the active inodes on the buffer stale.
|
||||
*/
|
||||
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
|
||||
mp->m_bsize * blks_per_cluster,
|
||||
XBF_LOCK);
|
||||
mp->m_bsize * blks_per_cluster, 0);
|
||||
|
||||
if (!bp)
|
||||
return ENOMEM;
|
||||
@ -1737,7 +1749,7 @@ xfs_ifree(
|
||||
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
|
||||
error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
|
||||
error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -2347,11 +2359,11 @@ cluster_corrupt_out:
|
||||
*/
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* Clean up the buffer. If it was B_DELWRI, just release it --
|
||||
* Clean up the buffer. If it was delwri, just release it --
|
||||
* brelse can handle it with no problems. If not, shut down the
|
||||
* filesystem before releasing the buffer.
|
||||
*/
|
||||
bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
|
||||
bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
|
||||
if (bufwasdelwri)
|
||||
xfs_buf_relse(bp);
|
||||
|
||||
@ -2377,30 +2389,29 @@ cluster_corrupt_out:
|
||||
/*
|
||||
* Unlocks the flush lock
|
||||
*/
|
||||
xfs_iflush_abort(iq);
|
||||
xfs_iflush_abort(iq, false);
|
||||
kmem_free(ilist);
|
||||
xfs_perag_put(pag);
|
||||
return XFS_ERROR(EFSCORRUPTED);
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_iflush() will write a modified inode's changes out to the
|
||||
* inode's on disk home. The caller must have the inode lock held
|
||||
* in at least shared mode and the inode flush completion must be
|
||||
* active as well. The inode lock will still be held upon return from
|
||||
* the call and the caller is free to unlock it.
|
||||
* The inode flush will be completed when the inode reaches the disk.
|
||||
* The flags indicate how the inode's buffer should be written out.
|
||||
* Flush dirty inode metadata into the backing buffer.
|
||||
*
|
||||
* The caller must have the inode lock and the inode flush lock held. The
|
||||
* inode lock will still be held upon return to the caller, and the inode
|
||||
* flush lock will be released after the inode has reached the disk.
|
||||
*
|
||||
* The caller must write out the buffer returned in *bpp and release it.
|
||||
*/
|
||||
int
|
||||
xfs_iflush(
|
||||
xfs_inode_t *ip,
|
||||
uint flags)
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_buf **bpp)
|
||||
{
|
||||
xfs_inode_log_item_t *iip;
|
||||
xfs_buf_t *bp;
|
||||
xfs_dinode_t *dip;
|
||||
xfs_mount_t *mp;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_buf *bp;
|
||||
struct xfs_dinode *dip;
|
||||
int error;
|
||||
|
||||
XFS_STATS_INC(xs_iflush_count);
|
||||
@ -2410,25 +2421,8 @@ xfs_iflush(
|
||||
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
||||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
|
||||
|
||||
iip = ip->i_itemp;
|
||||
mp = ip->i_mount;
|
||||
*bpp = NULL;
|
||||
|
||||
/*
|
||||
* We can't flush the inode until it is unpinned, so wait for it if we
|
||||
* are allowed to block. We know no one new can pin it, because we are
|
||||
* holding the inode lock shared and you need to hold it exclusively to
|
||||
* pin the inode.
|
||||
*
|
||||
* If we are not allowed to block, force the log out asynchronously so
|
||||
* that when we come back the inode will be unpinned. If other inodes
|
||||
* in the same cluster are dirty, they will probably write the inode
|
||||
* out for us if they occur after the log force completes.
|
||||
*/
|
||||
if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
|
||||
xfs_iunpin(ip);
|
||||
xfs_ifunlock(ip);
|
||||
return EAGAIN;
|
||||
}
|
||||
xfs_iunpin_wait(ip);
|
||||
|
||||
/*
|
||||
@ -2447,20 +2441,20 @@ xfs_iflush(
|
||||
/*
|
||||
* This may have been unpinned because the filesystem is shutting
|
||||
* down forcibly. If that's the case we must not write this inode
|
||||
* to disk, because the log record didn't make it to disk!
|
||||
* to disk, because the log record didn't make it to disk.
|
||||
*
|
||||
* We also have to remove the log item from the AIL in this case,
|
||||
* as we wait for an empty AIL as part of the unmount process.
|
||||
*/
|
||||
if (XFS_FORCED_SHUTDOWN(mp)) {
|
||||
if (iip)
|
||||
iip->ili_fields = 0;
|
||||
xfs_ifunlock(ip);
|
||||
return XFS_ERROR(EIO);
|
||||
error = XFS_ERROR(EIO);
|
||||
goto abort_out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the buffer containing the on-disk inode.
|
||||
*/
|
||||
error = xfs_itobp(mp, NULL, ip, &dip, &bp,
|
||||
(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
|
||||
error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
|
||||
if (error || !bp) {
|
||||
xfs_ifunlock(ip);
|
||||
return error;
|
||||
@ -2488,23 +2482,20 @@ xfs_iflush(
|
||||
if (error)
|
||||
goto cluster_corrupt_out;
|
||||
|
||||
if (flags & SYNC_WAIT)
|
||||
error = xfs_bwrite(bp);
|
||||
else
|
||||
xfs_buf_delwri_queue(bp);
|
||||
|
||||
xfs_buf_relse(bp);
|
||||
return error;
|
||||
*bpp = bp;
|
||||
return 0;
|
||||
|
||||
corrupt_out:
|
||||
xfs_buf_relse(bp);
|
||||
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
|
||||
cluster_corrupt_out:
|
||||
error = XFS_ERROR(EFSCORRUPTED);
|
||||
abort_out:
|
||||
/*
|
||||
* Unlocks the flush lock
|
||||
*/
|
||||
xfs_iflush_abort(ip);
|
||||
return XFS_ERROR(EFSCORRUPTED);
|
||||
xfs_iflush_abort(ip, false);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
@ -2706,27 +2697,6 @@ corrupt_out:
|
||||
return XFS_ERROR(EFSCORRUPTED);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_promote_inode(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct xfs_buf *bp;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
|
||||
|
||||
bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
|
||||
ip->i_imap.im_len, XBF_TRYLOCK);
|
||||
if (!bp)
|
||||
return;
|
||||
|
||||
if (XFS_BUF_ISDELAYWRITE(bp)) {
|
||||
xfs_buf_delwri_promote(bp);
|
||||
wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
|
||||
}
|
||||
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a pointer to the extent record at file index idx.
|
||||
*/
|
||||
|
@ -529,11 +529,12 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
|
||||
|
||||
void xfs_iext_realloc(xfs_inode_t *, int, int);
|
||||
void xfs_iunpin_wait(xfs_inode_t *);
|
||||
int xfs_iflush(xfs_inode_t *, uint);
|
||||
void xfs_promote_inode(struct xfs_inode *);
|
||||
int xfs_iflush(struct xfs_inode *, struct xfs_buf **);
|
||||
void xfs_lock_inodes(xfs_inode_t **, int, uint);
|
||||
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
|
||||
|
||||
xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
|
||||
|
||||
#define IHOLD(ip) \
|
||||
do { \
|
||||
ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -480,25 +478,16 @@ xfs_inode_item_unpin(
|
||||
wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called to attempt to lock the inode associated with this
|
||||
* inode log item, in preparation for the push routine which does the actual
|
||||
* iflush. Don't sleep on the inode lock or the flush lock.
|
||||
*
|
||||
* If the flush lock is already held, indicating that the inode has
|
||||
* been or is in the process of being flushed, then (ideally) we'd like to
|
||||
* see if the inode's buffer is still incore, and if so give it a nudge.
|
||||
* We delay doing so until the pushbuf routine, though, to avoid holding
|
||||
* the AIL lock across a call to the blackhole which is the buffer cache.
|
||||
* Also we don't want to sleep in any device strategy routines, which can happen
|
||||
* if we do the subsequent bawrite in here.
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_inode_item_trylock(
|
||||
struct xfs_log_item *lip)
|
||||
xfs_inode_item_push(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
struct xfs_buf *bp = NULL;
|
||||
uint rval = XFS_ITEM_SUCCESS;
|
||||
int error;
|
||||
|
||||
if (xfs_ipincount(ip) > 0)
|
||||
return XFS_ITEM_PINNED;
|
||||
@ -506,30 +495,50 @@ xfs_inode_item_trylock(
|
||||
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
|
||||
return XFS_ITEM_LOCKED;
|
||||
|
||||
if (!xfs_iflock_nowait(ip)) {
|
||||
/*
|
||||
* inode has already been flushed to the backing buffer,
|
||||
* leave it locked in shared mode, pushbuf routine will
|
||||
* unlock it.
|
||||
*/
|
||||
return XFS_ITEM_PUSHBUF;
|
||||
/*
|
||||
* Re-check the pincount now that we stabilized the value by
|
||||
* taking the ilock.
|
||||
*/
|
||||
if (xfs_ipincount(ip) > 0) {
|
||||
rval = XFS_ITEM_PINNED;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Stale items should force out the iclog */
|
||||
/*
|
||||
* Someone else is already flushing the inode. Nothing we can do
|
||||
* here but wait for the flush to finish and remove the item from
|
||||
* the AIL.
|
||||
*/
|
||||
if (!xfs_iflock_nowait(ip)) {
|
||||
rval = XFS_ITEM_FLUSHING;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stale inode items should force out the iclog.
|
||||
*/
|
||||
if (ip->i_flags & XFS_ISTALE) {
|
||||
xfs_ifunlock(ip);
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
return XFS_ITEM_PINNED;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
ASSERT(iip->ili_fields != 0);
|
||||
ASSERT(iip->ili_logged == 0);
|
||||
ASSERT(lip->li_flags & XFS_LI_IN_AIL);
|
||||
ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
|
||||
ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
|
||||
|
||||
spin_unlock(&lip->li_ailp->xa_lock);
|
||||
|
||||
error = xfs_iflush(ip, &bp);
|
||||
if (!error) {
|
||||
if (!xfs_buf_delwri_queue(bp, buffer_list))
|
||||
rval = XFS_ITEM_FLUSHING;
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
#endif
|
||||
return XFS_ITEM_SUCCESS;
|
||||
|
||||
spin_lock(&lip->li_ailp->xa_lock);
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -613,86 +622,6 @@ xfs_inode_item_committed(
|
||||
return lsn;
|
||||
}
|
||||
|
||||
/*
|
||||
* This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
|
||||
* failed to get the inode flush lock but did get the inode locked SHARED.
|
||||
* Here we're trying to see if the inode buffer is incore, and if so whether it's
|
||||
* marked delayed write. If that's the case, we'll promote it and that will
|
||||
* allow the caller to write the buffer by triggering the xfsbufd to run.
|
||||
*/
|
||||
STATIC bool
|
||||
xfs_inode_item_pushbuf(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
struct xfs_buf *bp;
|
||||
bool ret = true;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
|
||||
|
||||
/*
|
||||
* If a flush is not in progress anymore, chances are that the
|
||||
* inode was taken off the AIL. So, just get out.
|
||||
*/
|
||||
if (!xfs_isiflocked(ip) ||
|
||||
!(lip->li_flags & XFS_LI_IN_AIL)) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
return true;
|
||||
}
|
||||
|
||||
bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
|
||||
iip->ili_format.ilf_len, XBF_TRYLOCK);
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
if (!bp)
|
||||
return true;
|
||||
if (XFS_BUF_ISDELAYWRITE(bp))
|
||||
xfs_buf_delwri_promote(bp);
|
||||
if (xfs_buf_ispinned(bp))
|
||||
ret = false;
|
||||
xfs_buf_relse(bp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called to asynchronously write the inode associated with this
|
||||
* inode log item out to disk. The inode will already have been locked by
|
||||
* a successful call to xfs_inode_item_trylock().
|
||||
*/
|
||||
STATIC void
|
||||
xfs_inode_item_push(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
|
||||
ASSERT(xfs_isiflocked(ip));
|
||||
|
||||
/*
|
||||
* Since we were able to lock the inode's flush lock and
|
||||
* we found it on the AIL, the inode must be dirty. This
|
||||
* is because the inode is removed from the AIL while still
|
||||
* holding the flush lock in xfs_iflush_done(). Thus, if
|
||||
* we found it in the AIL and were able to obtain the flush
|
||||
* lock without sleeping, then there must not have been
|
||||
* anyone in the process of flushing the inode.
|
||||
*/
|
||||
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
|
||||
|
||||
/*
|
||||
* Push the inode to it's backing buffer. This will not remove the
|
||||
* inode from the AIL - a further push will be required to trigger a
|
||||
* buffer push. However, this allows all the dirty inodes to be pushed
|
||||
* to the buffer before it is pushed to disk. The buffer IO completion
|
||||
* will pull the inode from the AIL, mark it clean and unlock the flush
|
||||
* lock.
|
||||
*/
|
||||
(void) xfs_iflush(ip, SYNC_TRYLOCK);
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX rcc - this one really has to do something. Probably needs
|
||||
* to stamp in a new field in the incore inode.
|
||||
@ -713,11 +642,9 @@ static const struct xfs_item_ops xfs_inode_item_ops = {
|
||||
.iop_format = xfs_inode_item_format,
|
||||
.iop_pin = xfs_inode_item_pin,
|
||||
.iop_unpin = xfs_inode_item_unpin,
|
||||
.iop_trylock = xfs_inode_item_trylock,
|
||||
.iop_unlock = xfs_inode_item_unlock,
|
||||
.iop_committed = xfs_inode_item_committed,
|
||||
.iop_push = xfs_inode_item_push,
|
||||
.iop_pushbuf = xfs_inode_item_pushbuf,
|
||||
.iop_committing = xfs_inode_item_committing
|
||||
};
|
||||
|
||||
@ -848,7 +775,8 @@ xfs_iflush_done(
|
||||
ASSERT(i <= need_ail);
|
||||
}
|
||||
/* xfs_trans_ail_delete_bulk() drops the AIL lock. */
|
||||
xfs_trans_ail_delete_bulk(ailp, log_items, i);
|
||||
xfs_trans_ail_delete_bulk(ailp, log_items, i,
|
||||
SHUTDOWN_CORRUPT_INCORE);
|
||||
}
|
||||
|
||||
|
||||
@ -869,16 +797,15 @@ xfs_iflush_done(
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the inode flushing abort routine. It is called
|
||||
* from xfs_iflush when the filesystem is shutting down to clean
|
||||
* up the inode state.
|
||||
* It is responsible for removing the inode item
|
||||
* from the AIL if it has not been re-logged, and unlocking the inode's
|
||||
* flush lock.
|
||||
* This is the inode flushing abort routine. It is called from xfs_iflush when
|
||||
* the filesystem is shutting down to clean up the inode state. It is
|
||||
* responsible for removing the inode item from the AIL if it has not been
|
||||
* re-logged, and unlocking the inode's flush lock.
|
||||
*/
|
||||
void
|
||||
xfs_iflush_abort(
|
||||
xfs_inode_t *ip)
|
||||
xfs_inode_t *ip,
|
||||
bool stale)
|
||||
{
|
||||
xfs_inode_log_item_t *iip = ip->i_itemp;
|
||||
|
||||
@ -888,7 +815,10 @@ xfs_iflush_abort(
|
||||
spin_lock(&ailp->xa_lock);
|
||||
if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
|
||||
/* xfs_trans_ail_delete() drops the AIL lock. */
|
||||
xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
|
||||
xfs_trans_ail_delete(ailp, &iip->ili_item,
|
||||
stale ?
|
||||
SHUTDOWN_LOG_IO_ERROR :
|
||||
SHUTDOWN_CORRUPT_INCORE);
|
||||
} else
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
@ -915,7 +845,7 @@ xfs_istale_done(
|
||||
struct xfs_buf *bp,
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
xfs_iflush_abort(INODE_ITEM(lip)->ili_inode);
|
||||
xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -165,7 +165,7 @@ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
|
||||
extern void xfs_inode_item_destroy(struct xfs_inode *);
|
||||
extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
|
||||
extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
|
||||
extern void xfs_iflush_abort(struct xfs_inode *);
|
||||
extern void xfs_iflush_abort(struct xfs_inode *, bool);
|
||||
extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
|
||||
xfs_inode_log_format_t *);
|
||||
|
||||
|
@ -26,11 +26,6 @@
|
||||
* high agno_log-agblklog-inopblog bits - 0
|
||||
*/
|
||||
|
||||
typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */
|
||||
|
||||
#define NULLFSINO ((xfs_ino_t)-1)
|
||||
#define NULLAGINO ((xfs_agino_t)-1)
|
||||
|
||||
struct xfs_mount;
|
||||
|
||||
#define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1)
|
||||
|
@ -17,9 +17,7 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -22,9 +22,7 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -17,9 +17,7 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -37,7 +35,6 @@
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_itable.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_trans_space.h"
|
||||
@ -142,11 +139,7 @@ xfs_iomap_write_direct(
|
||||
int committed;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Make sure that the dquots are there. This doesn't hold
|
||||
* the ilock across a disk read.
|
||||
*/
|
||||
error = xfs_qm_dqattach_locked(ip, 0);
|
||||
error = xfs_qm_dqattach(ip, 0);
|
||||
if (error)
|
||||
return XFS_ERROR(error);
|
||||
|
||||
@ -158,7 +151,7 @@ xfs_iomap_write_direct(
|
||||
if ((offset + count) > XFS_ISIZE(ip)) {
|
||||
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
|
||||
if (error)
|
||||
goto error_out;
|
||||
return XFS_ERROR(error);
|
||||
} else {
|
||||
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
|
||||
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
|
||||
@ -190,7 +183,6 @@ xfs_iomap_write_direct(
|
||||
/*
|
||||
* Allocate and setup the transaction
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
|
||||
error = xfs_trans_reserve(tp, resblks,
|
||||
XFS_WRITE_LOG_RES(mp), resrtextents,
|
||||
@ -199,15 +191,16 @@ xfs_iomap_write_direct(
|
||||
/*
|
||||
* Check for running out of space, note: need lock to return
|
||||
*/
|
||||
if (error)
|
||||
if (error) {
|
||||
xfs_trans_cancel(tp, 0);
|
||||
return XFS_ERROR(error);
|
||||
}
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
goto error_out;
|
||||
|
||||
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
|
||||
if (error)
|
||||
goto error1;
|
||||
goto out_trans_cancel;
|
||||
|
||||
xfs_trans_ijoin(tp, ip, 0);
|
||||
|
||||
@ -224,42 +217,39 @@ xfs_iomap_write_direct(
|
||||
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
|
||||
&firstfsb, 0, imap, &nimaps, &free_list);
|
||||
if (error)
|
||||
goto error0;
|
||||
goto out_bmap_cancel;
|
||||
|
||||
/*
|
||||
* Complete the transaction
|
||||
*/
|
||||
error = xfs_bmap_finish(&tp, &free_list, &committed);
|
||||
if (error)
|
||||
goto error0;
|
||||
goto out_bmap_cancel;
|
||||
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
|
||||
if (error)
|
||||
goto error_out;
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Copy any maps to caller's array and return any error.
|
||||
*/
|
||||
if (nimaps == 0) {
|
||||
error = ENOSPC;
|
||||
goto error_out;
|
||||
error = XFS_ERROR(ENOSPC);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
|
||||
if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
|
||||
error = xfs_alert_fsblock_zero(ip, imap);
|
||||
goto error_out;
|
||||
}
|
||||
|
||||
return 0;
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
return error;
|
||||
|
||||
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
|
||||
out_bmap_cancel:
|
||||
xfs_bmap_cancel(&free_list);
|
||||
xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
|
||||
|
||||
error1: /* Just cancel transaction */
|
||||
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
|
||||
out_trans_cancel:
|
||||
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
|
||||
|
||||
error_out:
|
||||
return XFS_ERROR(error);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -422,6 +412,15 @@ retry:
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure preallocation does not create extents beyond the range we
|
||||
* actually support in this filesystem.
|
||||
*/
|
||||
if (last_fsb > XFS_B_TO_FSB(mp, mp->m_maxioffset))
|
||||
last_fsb = XFS_B_TO_FSB(mp, mp->m_maxioffset);
|
||||
|
||||
ASSERT(last_fsb > offset_fsb);
|
||||
|
||||
nimaps = XFS_WRITE_IMAPS;
|
||||
error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb,
|
||||
imap, &nimaps, XFS_BMAPI_ENTIRE);
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_acl.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -34,7 +32,6 @@
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_itable.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_utils.h"
|
||||
@ -700,7 +697,7 @@ xfs_setattr_size(
|
||||
xfs_off_t oldsize, newsize;
|
||||
struct xfs_trans *tp;
|
||||
int error;
|
||||
uint lock_flags;
|
||||
uint lock_flags = 0;
|
||||
uint commit_flags = 0;
|
||||
|
||||
trace_xfs_setattr(ip);
|
||||
@ -720,10 +717,10 @@ xfs_setattr_size(
|
||||
ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
|
||||
ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
|
||||
|
||||
lock_flags = XFS_ILOCK_EXCL;
|
||||
if (!(flags & XFS_ATTR_NOLOCK))
|
||||
if (!(flags & XFS_ATTR_NOLOCK)) {
|
||||
lock_flags |= XFS_IOLOCK_EXCL;
|
||||
xfs_ilock(ip, lock_flags);
|
||||
xfs_ilock(ip, lock_flags);
|
||||
}
|
||||
|
||||
oldsize = inode->i_size;
|
||||
newsize = iattr->ia_size;
|
||||
@ -746,7 +743,7 @@ xfs_setattr_size(
|
||||
/*
|
||||
* Make sure that the dquots are attached to the inode.
|
||||
*/
|
||||
error = xfs_qm_dqattach_locked(ip, 0);
|
||||
error = xfs_qm_dqattach(ip, 0);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
@ -768,8 +765,6 @@ xfs_setattr_size(
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
lock_flags &= ~XFS_ILOCK_EXCL;
|
||||
|
||||
/*
|
||||
* We are going to log the inode size change in this transaction so
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -35,7 +33,6 @@
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_dinode.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
kmem_zone_t *xfs_log_ticket_zone;
|
||||
@ -916,27 +913,42 @@ xfs_log_need_covered(xfs_mount_t *mp)
|
||||
* We may be holding the log iclog lock upon entering this routine.
|
||||
*/
|
||||
xfs_lsn_t
|
||||
xlog_assign_tail_lsn(
|
||||
xlog_assign_tail_lsn_locked(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
xfs_lsn_t tail_lsn;
|
||||
struct log *log = mp->m_log;
|
||||
struct xfs_log_item *lip;
|
||||
xfs_lsn_t tail_lsn;
|
||||
|
||||
assert_spin_locked(&mp->m_ail->xa_lock);
|
||||
|
||||
/*
|
||||
* To make sure we always have a valid LSN for the log tail we keep
|
||||
* track of the last LSN which was committed in log->l_last_sync_lsn,
|
||||
* and use that when the AIL was empty and xfs_ail_min_lsn returns 0.
|
||||
*
|
||||
* If the AIL has been emptied we also need to wake any process
|
||||
* waiting for this condition.
|
||||
* and use that when the AIL was empty.
|
||||
*/
|
||||
tail_lsn = xfs_ail_min_lsn(mp->m_ail);
|
||||
if (!tail_lsn)
|
||||
lip = xfs_ail_min(mp->m_ail);
|
||||
if (lip)
|
||||
tail_lsn = lip->li_lsn;
|
||||
else
|
||||
tail_lsn = atomic64_read(&log->l_last_sync_lsn);
|
||||
atomic64_set(&log->l_tail_lsn, tail_lsn);
|
||||
return tail_lsn;
|
||||
}
|
||||
|
||||
xfs_lsn_t
|
||||
xlog_assign_tail_lsn(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
xfs_lsn_t tail_lsn;
|
||||
|
||||
spin_lock(&mp->m_ail->xa_lock);
|
||||
tail_lsn = xlog_assign_tail_lsn_locked(mp);
|
||||
spin_unlock(&mp->m_ail->xa_lock);
|
||||
|
||||
return tail_lsn;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the space in the log between the tail and the head. The head
|
||||
* is passed in the cycle/bytes formal parms. In the special case where
|
||||
@ -1172,7 +1184,7 @@ xlog_alloc_log(xfs_mount_t *mp,
|
||||
xlog_get_iclog_buffer_size(mp, log);
|
||||
|
||||
error = ENOMEM;
|
||||
bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0);
|
||||
bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
|
||||
if (!bp)
|
||||
goto out_free_log;
|
||||
bp->b_iodone = xlog_iodone;
|
||||
@ -1182,9 +1194,6 @@ xlog_alloc_log(xfs_mount_t *mp,
|
||||
spin_lock_init(&log->l_icloglock);
|
||||
init_waitqueue_head(&log->l_flush_wait);
|
||||
|
||||
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
|
||||
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
|
||||
|
||||
iclogp = &log->l_iclog;
|
||||
/*
|
||||
* The amount of memory to allocate for the iclog structure is
|
||||
@ -1204,7 +1213,7 @@ xlog_alloc_log(xfs_mount_t *mp,
|
||||
prev_iclog = iclog;
|
||||
|
||||
bp = xfs_buf_get_uncached(mp->m_logdev_targp,
|
||||
log->l_iclog_size, 0);
|
||||
BTOBB(log->l_iclog_size), 0);
|
||||
if (!bp)
|
||||
goto out_free_iclog;
|
||||
|
||||
@ -1224,7 +1233,7 @@ xlog_alloc_log(xfs_mount_t *mp,
|
||||
head->h_fmt = cpu_to_be32(XLOG_FMT);
|
||||
memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
|
||||
|
||||
iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
|
||||
iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
|
||||
iclog->ic_state = XLOG_STATE_ACTIVE;
|
||||
iclog->ic_log = log;
|
||||
atomic_set(&iclog->ic_refcnt, 0);
|
||||
@ -1475,7 +1484,7 @@ xlog_sync(xlog_t *log,
|
||||
} else {
|
||||
iclog->ic_bwritecnt = 1;
|
||||
}
|
||||
XFS_BUF_SET_COUNT(bp, count);
|
||||
bp->b_io_length = BTOBB(count);
|
||||
bp->b_fspriv = iclog;
|
||||
XFS_BUF_ZEROFLAGS(bp);
|
||||
XFS_BUF_ASYNC(bp);
|
||||
@ -1573,7 +1582,7 @@ xlog_dealloc_log(xlog_t *log)
|
||||
* always need to ensure that the extra buffer does not point to memory
|
||||
* owned by another log buffer before we free it.
|
||||
*/
|
||||
xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
|
||||
xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
|
||||
xfs_buf_free(log->l_xbuf);
|
||||
|
||||
iclog = log->l_iclog;
|
||||
@ -2932,6 +2941,7 @@ xfs_log_force(
|
||||
{
|
||||
int error;
|
||||
|
||||
trace_xfs_log_force(mp, 0);
|
||||
error = _xfs_log_force(mp, flags, NULL);
|
||||
if (error)
|
||||
xfs_warn(mp, "%s: error %d returned.", __func__, error);
|
||||
@ -3080,6 +3090,7 @@ xfs_log_force_lsn(
|
||||
{
|
||||
int error;
|
||||
|
||||
trace_xfs_log_force(mp, lsn);
|
||||
error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
|
||||
if (error)
|
||||
xfs_warn(mp, "%s: error %d returned.", __func__, error);
|
||||
|
@ -152,6 +152,7 @@ int xfs_log_mount(struct xfs_mount *mp,
|
||||
int num_bblocks);
|
||||
int xfs_log_mount_finish(struct xfs_mount *mp);
|
||||
xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
|
||||
xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
|
||||
void xfs_log_space_wake(struct xfs_mount *mp);
|
||||
int xfs_log_notify(struct xfs_mount *mp,
|
||||
struct xlog_in_core *iclog,
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_log_priv.h"
|
||||
@ -29,60 +27,9 @@
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_discard.h"
|
||||
|
||||
/*
|
||||
* Perform initial CIL structure initialisation.
|
||||
*/
|
||||
int
|
||||
xlog_cil_init(
|
||||
struct log *log)
|
||||
{
|
||||
struct xfs_cil *cil;
|
||||
struct xfs_cil_ctx *ctx;
|
||||
|
||||
cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
|
||||
if (!cil)
|
||||
return ENOMEM;
|
||||
|
||||
ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
|
||||
if (!ctx) {
|
||||
kmem_free(cil);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&cil->xc_cil);
|
||||
INIT_LIST_HEAD(&cil->xc_committing);
|
||||
spin_lock_init(&cil->xc_cil_lock);
|
||||
init_rwsem(&cil->xc_ctx_lock);
|
||||
init_waitqueue_head(&cil->xc_commit_wait);
|
||||
|
||||
INIT_LIST_HEAD(&ctx->committing);
|
||||
INIT_LIST_HEAD(&ctx->busy_extents);
|
||||
ctx->sequence = 1;
|
||||
ctx->cil = cil;
|
||||
cil->xc_ctx = ctx;
|
||||
cil->xc_current_sequence = ctx->sequence;
|
||||
|
||||
cil->xc_log = log;
|
||||
log->l_cilp = cil;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
xlog_cil_destroy(
|
||||
struct log *log)
|
||||
{
|
||||
if (log->l_cilp->xc_ctx) {
|
||||
if (log->l_cilp->xc_ctx->ticket)
|
||||
xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
|
||||
kmem_free(log->l_cilp->xc_ctx);
|
||||
}
|
||||
|
||||
ASSERT(list_empty(&log->l_cilp->xc_cil));
|
||||
kmem_free(log->l_cilp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new ticket. Failing to get a new ticket makes it really hard to
|
||||
* recover, so we don't allow failure here. Also, we allocate in a context that
|
||||
@ -390,8 +337,8 @@ xlog_cil_committed(
|
||||
xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
|
||||
ctx->start_lsn, abort);
|
||||
|
||||
xfs_alloc_busy_sort(&ctx->busy_extents);
|
||||
xfs_alloc_busy_clear(mp, &ctx->busy_extents,
|
||||
xfs_extent_busy_sort(&ctx->busy_extents);
|
||||
xfs_extent_busy_clear(mp, &ctx->busy_extents,
|
||||
(mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
|
||||
|
||||
spin_lock(&ctx->cil->xc_cil_lock);
|
||||
@ -404,7 +351,7 @@ xlog_cil_committed(
|
||||
ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
|
||||
|
||||
xfs_discard_extents(mp, &ctx->busy_extents);
|
||||
xfs_alloc_busy_clear(mp, &ctx->busy_extents, false);
|
||||
xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
|
||||
}
|
||||
|
||||
kmem_free(ctx);
|
||||
@ -426,8 +373,7 @@ xlog_cil_committed(
|
||||
*/
|
||||
STATIC int
|
||||
xlog_cil_push(
|
||||
struct log *log,
|
||||
xfs_lsn_t push_seq)
|
||||
struct log *log)
|
||||
{
|
||||
struct xfs_cil *cil = log->l_cilp;
|
||||
struct xfs_log_vec *lv;
|
||||
@ -443,39 +389,36 @@ xlog_cil_push(
|
||||
struct xfs_log_iovec lhdr;
|
||||
struct xfs_log_vec lvhdr = { NULL };
|
||||
xfs_lsn_t commit_lsn;
|
||||
xfs_lsn_t push_seq;
|
||||
|
||||
if (!cil)
|
||||
return 0;
|
||||
|
||||
ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
|
||||
|
||||
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
|
||||
new_ctx->ticket = xlog_cil_ticket_alloc(log);
|
||||
|
||||
/*
|
||||
* Lock out transaction commit, but don't block for background pushes
|
||||
* unless we are well over the CIL space limit. See the definition of
|
||||
* XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
|
||||
* used here.
|
||||
*/
|
||||
if (!down_write_trylock(&cil->xc_ctx_lock)) {
|
||||
if (!push_seq &&
|
||||
cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
|
||||
goto out_free_ticket;
|
||||
down_write(&cil->xc_ctx_lock);
|
||||
}
|
||||
down_write(&cil->xc_ctx_lock);
|
||||
ctx = cil->xc_ctx;
|
||||
|
||||
/* check if we've anything to push */
|
||||
if (list_empty(&cil->xc_cil))
|
||||
goto out_skip;
|
||||
spin_lock(&cil->xc_cil_lock);
|
||||
push_seq = cil->xc_push_seq;
|
||||
ASSERT(push_seq <= ctx->sequence);
|
||||
|
||||
/* check for spurious background flush */
|
||||
if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
|
||||
/*
|
||||
* Check if we've anything to push. If there is nothing, then we don't
|
||||
* move on to a new sequence number and so we have to be able to push
|
||||
* this sequence again later.
|
||||
*/
|
||||
if (list_empty(&cil->xc_cil)) {
|
||||
cil->xc_push_seq = 0;
|
||||
spin_unlock(&cil->xc_cil_lock);
|
||||
goto out_skip;
|
||||
}
|
||||
spin_unlock(&cil->xc_cil_lock);
|
||||
|
||||
|
||||
/* check for a previously pushed seqeunce */
|
||||
if (push_seq && push_seq < cil->xc_ctx->sequence)
|
||||
if (push_seq < cil->xc_ctx->sequence)
|
||||
goto out_skip;
|
||||
|
||||
/*
|
||||
@ -629,7 +572,6 @@ restart:
|
||||
|
||||
out_skip:
|
||||
up_write(&cil->xc_ctx_lock);
|
||||
out_free_ticket:
|
||||
xfs_log_ticket_put(new_ctx->ticket);
|
||||
kmem_free(new_ctx);
|
||||
return 0;
|
||||
@ -641,6 +583,82 @@ out_abort:
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_cil_push_work(
|
||||
struct work_struct *work)
|
||||
{
|
||||
struct xfs_cil *cil = container_of(work, struct xfs_cil,
|
||||
xc_push_work);
|
||||
xlog_cil_push(cil->xc_log);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to push CIL every so often so we don't cache more than we can fit in
|
||||
* the log. The limit really is that a checkpoint can't be more than half the
|
||||
* log (the current checkpoint is not allowed to overwrite the previous
|
||||
* checkpoint), but commit latency and memory usage limit this to a smaller
|
||||
* size.
|
||||
*/
|
||||
static void
|
||||
xlog_cil_push_background(
|
||||
struct log *log)
|
||||
{
|
||||
struct xfs_cil *cil = log->l_cilp;
|
||||
|
||||
/*
|
||||
* The cil won't be empty because we are called while holding the
|
||||
* context lock so whatever we added to the CIL will still be there
|
||||
*/
|
||||
ASSERT(!list_empty(&cil->xc_cil));
|
||||
|
||||
/*
|
||||
* don't do a background push if we haven't used up all the
|
||||
* space available yet.
|
||||
*/
|
||||
if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
|
||||
return;
|
||||
|
||||
spin_lock(&cil->xc_cil_lock);
|
||||
if (cil->xc_push_seq < cil->xc_current_sequence) {
|
||||
cil->xc_push_seq = cil->xc_current_sequence;
|
||||
queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
|
||||
}
|
||||
spin_unlock(&cil->xc_cil_lock);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_cil_push_foreground(
|
||||
struct log *log,
|
||||
xfs_lsn_t push_seq)
|
||||
{
|
||||
struct xfs_cil *cil = log->l_cilp;
|
||||
|
||||
if (!cil)
|
||||
return;
|
||||
|
||||
ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
|
||||
|
||||
/* start on any pending background push to minimise wait time on it */
|
||||
flush_work(&cil->xc_push_work);
|
||||
|
||||
/*
|
||||
* If the CIL is empty or we've already pushed the sequence then
|
||||
* there's no work we need to do.
|
||||
*/
|
||||
spin_lock(&cil->xc_cil_lock);
|
||||
if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
|
||||
spin_unlock(&cil->xc_cil_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
cil->xc_push_seq = push_seq;
|
||||
spin_unlock(&cil->xc_cil_lock);
|
||||
|
||||
/* do the push now */
|
||||
xlog_cil_push(log);
|
||||
}
|
||||
|
||||
/*
|
||||
* Commit a transaction with the given vector to the Committed Item List.
|
||||
*
|
||||
@ -667,7 +685,6 @@ xfs_log_commit_cil(
|
||||
{
|
||||
struct log *log = mp->m_log;
|
||||
int log_flags = 0;
|
||||
int push = 0;
|
||||
struct xfs_log_vec *log_vector;
|
||||
|
||||
if (flags & XFS_TRANS_RELEASE_LOG_RES)
|
||||
@ -719,21 +736,9 @@ xfs_log_commit_cil(
|
||||
*/
|
||||
xfs_trans_free_items(tp, *commit_lsn, 0);
|
||||
|
||||
/* check for background commit before unlock */
|
||||
if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
|
||||
push = 1;
|
||||
xlog_cil_push_background(log);
|
||||
|
||||
up_read(&log->l_cilp->xc_ctx_lock);
|
||||
|
||||
/*
|
||||
* We need to push CIL every so often so we don't cache more than we
|
||||
* can fit in the log. The limit really is that a checkpoint can't be
|
||||
* more than half the log (the current checkpoint is not allowed to
|
||||
* overwrite the previous checkpoint), but commit latency and memory
|
||||
* usage limit this to a smaller size in most cases.
|
||||
*/
|
||||
if (push)
|
||||
xlog_cil_push(log, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -746,9 +751,6 @@ xfs_log_commit_cil(
|
||||
*
|
||||
* We return the current commit lsn to allow the callers to determine if a
|
||||
* iclog flush is necessary following this call.
|
||||
*
|
||||
* XXX: Initially, just push the CIL unconditionally and return whatever
|
||||
* commit lsn is there. It'll be empty, so this is broken for now.
|
||||
*/
|
||||
xfs_lsn_t
|
||||
xlog_cil_force_lsn(
|
||||
@ -766,8 +768,7 @@ xlog_cil_force_lsn(
|
||||
* xlog_cil_push() handles racing pushes for the same sequence,
|
||||
* so no need to deal with it here.
|
||||
*/
|
||||
if (sequence == cil->xc_current_sequence)
|
||||
xlog_cil_push(log, sequence);
|
||||
xlog_cil_push_foreground(log, sequence);
|
||||
|
||||
/*
|
||||
* See if we can find a previous sequence still committing.
|
||||
@ -826,3 +827,57 @@ xfs_log_item_in_current_chkpt(
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform initial CIL structure initialisation.
|
||||
*/
|
||||
int
|
||||
xlog_cil_init(
|
||||
struct log *log)
|
||||
{
|
||||
struct xfs_cil *cil;
|
||||
struct xfs_cil_ctx *ctx;
|
||||
|
||||
cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
|
||||
if (!cil)
|
||||
return ENOMEM;
|
||||
|
||||
ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
|
||||
if (!ctx) {
|
||||
kmem_free(cil);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
|
||||
INIT_LIST_HEAD(&cil->xc_cil);
|
||||
INIT_LIST_HEAD(&cil->xc_committing);
|
||||
spin_lock_init(&cil->xc_cil_lock);
|
||||
init_rwsem(&cil->xc_ctx_lock);
|
||||
init_waitqueue_head(&cil->xc_commit_wait);
|
||||
|
||||
INIT_LIST_HEAD(&ctx->committing);
|
||||
INIT_LIST_HEAD(&ctx->busy_extents);
|
||||
ctx->sequence = 1;
|
||||
ctx->cil = cil;
|
||||
cil->xc_ctx = ctx;
|
||||
cil->xc_current_sequence = ctx->sequence;
|
||||
|
||||
cil->xc_log = log;
|
||||
log->l_cilp = cil;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
xlog_cil_destroy(
|
||||
struct log *log)
|
||||
{
|
||||
if (log->l_cilp->xc_ctx) {
|
||||
if (log->l_cilp->xc_ctx->ticket)
|
||||
xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
|
||||
kmem_free(log->l_cilp->xc_ctx);
|
||||
}
|
||||
|
||||
ASSERT(list_empty(&log->l_cilp->xc_cil));
|
||||
kmem_free(log->l_cilp);
|
||||
}
|
||||
|
||||
|
@ -417,6 +417,8 @@ struct xfs_cil {
|
||||
struct list_head xc_committing;
|
||||
wait_queue_head_t xc_commit_wait;
|
||||
xfs_lsn_t xc_current_sequence;
|
||||
struct work_struct xc_push_work;
|
||||
xfs_lsn_t xc_push_seq;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -40,7 +40,6 @@
|
||||
#include "xfs_extfree_item.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_utils.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
@ -120,7 +119,7 @@ xlog_get_bp(
|
||||
nbblks += log->l_sectBBsize;
|
||||
nbblks = round_up(nbblks, log->l_sectBBsize);
|
||||
|
||||
bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0);
|
||||
bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
|
||||
if (bp)
|
||||
xfs_buf_unlock(bp);
|
||||
return bp;
|
||||
@ -146,7 +145,7 @@ xlog_align(
|
||||
{
|
||||
xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
|
||||
|
||||
ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
|
||||
ASSERT(offset + nbblks <= bp->b_length);
|
||||
return bp->b_addr + BBTOB(offset);
|
||||
}
|
||||
|
||||
@ -174,11 +173,12 @@ xlog_bread_noalign(
|
||||
nbblks = round_up(nbblks, log->l_sectBBsize);
|
||||
|
||||
ASSERT(nbblks > 0);
|
||||
ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
|
||||
ASSERT(nbblks <= bp->b_length);
|
||||
|
||||
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
|
||||
XFS_BUF_READ(bp);
|
||||
XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
|
||||
bp->b_io_length = nbblks;
|
||||
bp->b_error = 0;
|
||||
|
||||
xfsbdstrat(log->l_mp, bp);
|
||||
error = xfs_buf_iowait(bp);
|
||||
@ -218,7 +218,7 @@ xlog_bread_offset(
|
||||
xfs_caddr_t offset)
|
||||
{
|
||||
xfs_caddr_t orig_offset = bp->b_addr;
|
||||
int orig_len = bp->b_buffer_length;
|
||||
int orig_len = BBTOB(bp->b_length);
|
||||
int error, error2;
|
||||
|
||||
error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
|
||||
@ -259,13 +259,14 @@ xlog_bwrite(
|
||||
nbblks = round_up(nbblks, log->l_sectBBsize);
|
||||
|
||||
ASSERT(nbblks > 0);
|
||||
ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
|
||||
ASSERT(nbblks <= bp->b_length);
|
||||
|
||||
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
|
||||
XFS_BUF_ZEROFLAGS(bp);
|
||||
xfs_buf_hold(bp);
|
||||
xfs_buf_lock(bp);
|
||||
XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
|
||||
bp->b_io_length = nbblks;
|
||||
bp->b_error = 0;
|
||||
|
||||
error = xfs_bwrite(bp);
|
||||
if (error)
|
||||
@ -440,6 +441,8 @@ xlog_find_verify_cycle(
|
||||
* a log sector, or we're out of luck.
|
||||
*/
|
||||
bufblks = 1 << ffs(nbblks);
|
||||
while (bufblks > log->l_logBBsize)
|
||||
bufblks >>= 1;
|
||||
while (!(bp = xlog_get_bp(log, bufblks))) {
|
||||
bufblks >>= 1;
|
||||
if (bufblks < log->l_sectBBsize)
|
||||
@ -1225,6 +1228,8 @@ xlog_write_log_records(
|
||||
* log sector, or we're out of luck.
|
||||
*/
|
||||
bufblks = 1 << ffs(blocks);
|
||||
while (bufblks > log->l_logBBsize)
|
||||
bufblks >>= 1;
|
||||
while (!(bp = xlog_get_bp(log, bufblks))) {
|
||||
bufblks >>= 1;
|
||||
if (bufblks < sectbb)
|
||||
@ -1772,7 +1777,7 @@ xlog_recover_do_inode_buffer(
|
||||
|
||||
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
|
||||
|
||||
inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
|
||||
inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
|
||||
for (i = 0; i < inodes_per_buf; i++) {
|
||||
next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
|
||||
offsetof(xfs_dinode_t, di_next_unlinked);
|
||||
@ -1814,7 +1819,8 @@ xlog_recover_do_inode_buffer(
|
||||
|
||||
ASSERT(item->ri_buf[item_index].i_addr != NULL);
|
||||
ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
|
||||
ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp));
|
||||
ASSERT((reg_buf_offset + reg_buf_bytes) <=
|
||||
BBTOB(bp->b_io_length));
|
||||
|
||||
/*
|
||||
* The current logged region contains a copy of the
|
||||
@ -1873,8 +1879,8 @@ xlog_recover_do_reg_buffer(
|
||||
ASSERT(nbits > 0);
|
||||
ASSERT(item->ri_buf[i].i_addr != NULL);
|
||||
ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
|
||||
ASSERT(XFS_BUF_COUNT(bp) >=
|
||||
((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT));
|
||||
ASSERT(BBTOB(bp->b_io_length) >=
|
||||
((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
|
||||
|
||||
/*
|
||||
* Do a sanity check if this is a dquot buffer. Just checking
|
||||
@ -2103,6 +2109,7 @@ xlog_recover_do_dquot_buffer(
|
||||
STATIC int
|
||||
xlog_recover_buffer_pass2(
|
||||
xlog_t *log,
|
||||
struct list_head *buffer_list,
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
|
||||
@ -2123,9 +2130,9 @@ xlog_recover_buffer_pass2(
|
||||
|
||||
trace_xfs_log_recover_buf_recover(log, buf_f);
|
||||
|
||||
buf_flags = XBF_LOCK;
|
||||
if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF))
|
||||
buf_flags |= XBF_MAPPED;
|
||||
buf_flags = 0;
|
||||
if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
|
||||
buf_flags |= XBF_UNMAPPED;
|
||||
|
||||
bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
|
||||
buf_flags);
|
||||
@ -2166,14 +2173,14 @@ xlog_recover_buffer_pass2(
|
||||
*/
|
||||
if (XFS_DINODE_MAGIC ==
|
||||
be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
|
||||
(XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize,
|
||||
(BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
|
||||
(__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
|
||||
xfs_buf_stale(bp);
|
||||
error = xfs_bwrite(bp);
|
||||
} else {
|
||||
ASSERT(bp->b_target->bt_mount == mp);
|
||||
bp->b_iodone = xlog_recover_iodone;
|
||||
xfs_buf_delwri_queue(bp);
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
}
|
||||
|
||||
xfs_buf_relse(bp);
|
||||
@ -2183,6 +2190,7 @@ xlog_recover_buffer_pass2(
|
||||
STATIC int
|
||||
xlog_recover_inode_pass2(
|
||||
xlog_t *log,
|
||||
struct list_head *buffer_list,
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_inode_log_format_t *in_f;
|
||||
@ -2220,8 +2228,7 @@ xlog_recover_inode_pass2(
|
||||
}
|
||||
trace_xfs_log_recover_inode_recover(log, in_f);
|
||||
|
||||
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
|
||||
XBF_LOCK);
|
||||
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0);
|
||||
if (!bp) {
|
||||
error = ENOMEM;
|
||||
goto error;
|
||||
@ -2436,7 +2443,7 @@ xlog_recover_inode_pass2(
|
||||
write_inode_buffer:
|
||||
ASSERT(bp->b_target->bt_mount == mp);
|
||||
bp->b_iodone = xlog_recover_iodone;
|
||||
xfs_buf_delwri_queue(bp);
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
xfs_buf_relse(bp);
|
||||
error:
|
||||
if (need_free)
|
||||
@ -2477,6 +2484,7 @@ xlog_recover_quotaoff_pass1(
|
||||
STATIC int
|
||||
xlog_recover_dquot_pass2(
|
||||
xlog_t *log,
|
||||
struct list_head *buffer_list,
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_mount_t *mp = log->l_mp;
|
||||
@ -2530,14 +2538,11 @@ xlog_recover_dquot_pass2(
|
||||
return XFS_ERROR(EIO);
|
||||
ASSERT(dq_f->qlf_len == 1);
|
||||
|
||||
error = xfs_read_buf(mp, mp->m_ddev_targp,
|
||||
dq_f->qlf_blkno,
|
||||
XFS_FSB_TO_BB(mp, dq_f->qlf_len),
|
||||
0, &bp);
|
||||
if (error) {
|
||||
xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#3)");
|
||||
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
|
||||
XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
ASSERT(bp);
|
||||
ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
|
||||
|
||||
@ -2558,7 +2563,7 @@ xlog_recover_dquot_pass2(
|
||||
ASSERT(dq_f->qlf_size == 2);
|
||||
ASSERT(bp->b_target->bt_mount == mp);
|
||||
bp->b_iodone = xlog_recover_iodone;
|
||||
xfs_buf_delwri_queue(bp);
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
xfs_buf_relse(bp);
|
||||
|
||||
return (0);
|
||||
@ -2642,7 +2647,8 @@ xlog_recover_efd_pass2(
|
||||
* xfs_trans_ail_delete() drops the
|
||||
* AIL lock.
|
||||
*/
|
||||
xfs_trans_ail_delete(ailp, lip);
|
||||
xfs_trans_ail_delete(ailp, lip,
|
||||
SHUTDOWN_CORRUPT_INCORE);
|
||||
xfs_efi_item_free(efip);
|
||||
spin_lock(&ailp->xa_lock);
|
||||
break;
|
||||
@ -2712,21 +2718,22 @@ STATIC int
|
||||
xlog_recover_commit_pass2(
|
||||
struct log *log,
|
||||
struct xlog_recover *trans,
|
||||
struct list_head *buffer_list,
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
|
||||
|
||||
switch (ITEM_TYPE(item)) {
|
||||
case XFS_LI_BUF:
|
||||
return xlog_recover_buffer_pass2(log, item);
|
||||
return xlog_recover_buffer_pass2(log, buffer_list, item);
|
||||
case XFS_LI_INODE:
|
||||
return xlog_recover_inode_pass2(log, item);
|
||||
return xlog_recover_inode_pass2(log, buffer_list, item);
|
||||
case XFS_LI_EFI:
|
||||
return xlog_recover_efi_pass2(log, item, trans->r_lsn);
|
||||
case XFS_LI_EFD:
|
||||
return xlog_recover_efd_pass2(log, item);
|
||||
case XFS_LI_DQUOT:
|
||||
return xlog_recover_dquot_pass2(log, item);
|
||||
return xlog_recover_dquot_pass2(log, buffer_list, item);
|
||||
case XFS_LI_QUOTAOFF:
|
||||
/* nothing to do in pass2 */
|
||||
return 0;
|
||||
@ -2750,8 +2757,9 @@ xlog_recover_commit_trans(
|
||||
struct xlog_recover *trans,
|
||||
int pass)
|
||||
{
|
||||
int error = 0;
|
||||
int error = 0, error2;
|
||||
xlog_recover_item_t *item;
|
||||
LIST_HEAD (buffer_list);
|
||||
|
||||
hlist_del(&trans->r_list);
|
||||
|
||||
@ -2760,16 +2768,27 @@ xlog_recover_commit_trans(
|
||||
return error;
|
||||
|
||||
list_for_each_entry(item, &trans->r_itemq, ri_list) {
|
||||
if (pass == XLOG_RECOVER_PASS1)
|
||||
switch (pass) {
|
||||
case XLOG_RECOVER_PASS1:
|
||||
error = xlog_recover_commit_pass1(log, trans, item);
|
||||
else
|
||||
error = xlog_recover_commit_pass2(log, trans, item);
|
||||
break;
|
||||
case XLOG_RECOVER_PASS2:
|
||||
error = xlog_recover_commit_pass2(log, trans,
|
||||
&buffer_list, item);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
goto out;
|
||||
}
|
||||
|
||||
xlog_recover_free_trans(trans);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
error2 = xfs_buf_delwri_submit(&buffer_list);
|
||||
return error ? error : error2;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
@ -3079,7 +3098,7 @@ xlog_recover_process_one_iunlink(
|
||||
/*
|
||||
* Get the on disk inode to find the next inode in the bucket.
|
||||
*/
|
||||
error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
|
||||
error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0);
|
||||
if (error)
|
||||
goto fail_iput;
|
||||
|
||||
@ -3639,11 +3658,8 @@ xlog_do_recover(
|
||||
* First replay the images in the log.
|
||||
*/
|
||||
error = xlog_do_log_recovery(log, head_blk, tail_blk);
|
||||
if (error) {
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
xfs_flush_buftarg(log->l_mp->m_ddev_targp, 1);
|
||||
|
||||
/*
|
||||
* If IO errors happened during recovery, bail out.
|
||||
@ -3670,7 +3686,6 @@ xlog_do_recover(
|
||||
bp = xfs_getsb(log->l_mp, 0);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
ASSERT(!(XFS_BUF_ISWRITE(bp)));
|
||||
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
|
||||
XFS_BUF_READ(bp);
|
||||
XFS_BUF_UNASYNC(bp);
|
||||
xfsbdstrat(log->l_mp, bp);
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_dir2.h"
|
||||
@ -37,7 +38,6 @@
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_fsops.h"
|
||||
#include "xfs_utils.h"
|
||||
@ -683,8 +683,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
|
||||
sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
|
||||
|
||||
reread:
|
||||
bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
|
||||
XFS_SB_DADDR, sector_size, 0);
|
||||
bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
|
||||
BTOBB(sector_size), 0);
|
||||
if (!bp) {
|
||||
if (loud)
|
||||
xfs_warn(mp, "SB buffer read failed");
|
||||
@ -1032,9 +1032,9 @@ xfs_check_sizes(xfs_mount_t *mp)
|
||||
xfs_warn(mp, "filesystem size mismatch detected");
|
||||
return XFS_ERROR(EFBIG);
|
||||
}
|
||||
bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
|
||||
bp = xfs_buf_read_uncached(mp->m_ddev_targp,
|
||||
d - XFS_FSS_TO_BB(mp, 1),
|
||||
BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
|
||||
XFS_FSS_TO_BB(mp, 1), 0);
|
||||
if (!bp) {
|
||||
xfs_warn(mp, "last sector read failed");
|
||||
return EIO;
|
||||
@ -1047,9 +1047,9 @@ xfs_check_sizes(xfs_mount_t *mp)
|
||||
xfs_warn(mp, "log size mismatch detected");
|
||||
return XFS_ERROR(EFBIG);
|
||||
}
|
||||
bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
|
||||
bp = xfs_buf_read_uncached(mp->m_logdev_targp,
|
||||
d - XFS_FSB_TO_BB(mp, 1),
|
||||
XFS_FSB_TO_B(mp, 1), 0);
|
||||
XFS_FSB_TO_BB(mp, 1), 0);
|
||||
if (!bp) {
|
||||
xfs_warn(mp, "log device read failed");
|
||||
return EIO;
|
||||
@ -1288,7 +1288,7 @@ xfs_mountfs(
|
||||
XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
|
||||
if (error) {
|
||||
xfs_warn(mp, "log mount failed");
|
||||
goto out_free_perag;
|
||||
goto out_fail_wait;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1315,7 +1315,7 @@ xfs_mountfs(
|
||||
!mp->m_sb.sb_inprogress) {
|
||||
error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
|
||||
if (error)
|
||||
goto out_free_perag;
|
||||
goto out_fail_wait;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1439,6 +1439,10 @@ xfs_mountfs(
|
||||
IRELE(rip);
|
||||
out_log_dealloc:
|
||||
xfs_log_unmount(mp);
|
||||
out_fail_wait:
|
||||
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
|
||||
xfs_wait_buftarg(mp->m_logdev_targp);
|
||||
xfs_wait_buftarg(mp->m_ddev_targp);
|
||||
out_free_perag:
|
||||
xfs_free_perag(mp);
|
||||
out_remove_uuid:
|
||||
@ -1475,15 +1479,15 @@ xfs_unmountfs(
|
||||
xfs_log_force(mp, XFS_LOG_SYNC);
|
||||
|
||||
/*
|
||||
* Do a delwri reclaim pass first so that as many dirty inodes are
|
||||
* queued up for IO as possible. Then flush the buffers before making
|
||||
* a synchronous path to catch all the remaining inodes are reclaimed.
|
||||
* This makes the reclaim process as quick as possible by avoiding
|
||||
* synchronous writeout and blocking on inodes already in the delwri
|
||||
* state as much as possible.
|
||||
* Flush all pending changes from the AIL.
|
||||
*/
|
||||
xfs_ail_push_all_sync(mp->m_ail);
|
||||
|
||||
/*
|
||||
* And reclaim all inodes. At this point there should be no dirty
|
||||
* inode, and none should be pinned or locked, but use synchronous
|
||||
* reclaim just to be sure.
|
||||
*/
|
||||
xfs_reclaim_inodes(mp, 0);
|
||||
xfs_flush_buftarg(mp->m_ddev_targp, 1);
|
||||
xfs_reclaim_inodes(mp, SYNC_WAIT);
|
||||
|
||||
xfs_qm_unmount(mp);
|
||||
@ -1519,15 +1523,12 @@ xfs_unmountfs(
|
||||
if (error)
|
||||
xfs_warn(mp, "Unable to update superblock counters. "
|
||||
"Freespace may not be correct on next mount.");
|
||||
xfs_unmountfs_writesb(mp);
|
||||
|
||||
/*
|
||||
* Make sure all buffers have been flushed and completed before
|
||||
* unmounting the log.
|
||||
* At this point we might have modified the superblock again and thus
|
||||
* added an item to the AIL, thus flush it again.
|
||||
*/
|
||||
error = xfs_flush_buftarg(mp->m_ddev_targp, 1);
|
||||
if (error)
|
||||
xfs_warn(mp, "%d busy buffers during unmount.", error);
|
||||
xfs_ail_push_all_sync(mp->m_ail);
|
||||
xfs_wait_buftarg(mp->m_ddev_targp);
|
||||
|
||||
xfs_log_unmount_write(mp);
|
||||
@ -1588,36 +1589,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_unmountfs_writesb(xfs_mount_t *mp)
|
||||
{
|
||||
xfs_buf_t *sbp;
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* skip superblock write if fs is read-only, or
|
||||
* if we are doing a forced umount.
|
||||
*/
|
||||
if (!((mp->m_flags & XFS_MOUNT_RDONLY) ||
|
||||
XFS_FORCED_SHUTDOWN(mp))) {
|
||||
|
||||
sbp = xfs_getsb(mp, 0);
|
||||
|
||||
XFS_BUF_UNDONE(sbp);
|
||||
XFS_BUF_UNREAD(sbp);
|
||||
xfs_buf_delwri_dequeue(sbp);
|
||||
XFS_BUF_WRITE(sbp);
|
||||
XFS_BUF_UNASYNC(sbp);
|
||||
ASSERT(sbp->b_target == mp->m_ddev_targp);
|
||||
xfsbdstrat(mp, sbp);
|
||||
error = xfs_buf_iowait(sbp);
|
||||
if (error)
|
||||
xfs_buf_ioerror_alert(sbp, __func__);
|
||||
xfs_buf_relse(sbp);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_mod_sb() can be used to copy arbitrary changes to the
|
||||
* in-core superblock into the superblock buffer to be logged.
|
||||
|
@ -214,6 +214,7 @@ typedef struct xfs_mount {
|
||||
|
||||
struct workqueue_struct *m_data_workqueue;
|
||||
struct workqueue_struct *m_unwritten_workqueue;
|
||||
struct workqueue_struct *m_cil_workqueue;
|
||||
} xfs_mount_t;
|
||||
|
||||
/*
|
||||
@ -378,7 +379,6 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
|
||||
extern int xfs_mountfs(xfs_mount_t *mp);
|
||||
|
||||
extern void xfs_unmountfs(xfs_mount_t *);
|
||||
extern int xfs_unmountfs_writesb(xfs_mount_t *);
|
||||
extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
|
||||
extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
|
||||
uint, int);
|
||||
|
196
fs/xfs/xfs_qm.c
196
fs/xfs/xfs_qm.c
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -65,7 +64,8 @@ STATIC int
|
||||
xfs_qm_dquot_walk(
|
||||
struct xfs_mount *mp,
|
||||
int type,
|
||||
int (*execute)(struct xfs_dquot *dqp))
|
||||
int (*execute)(struct xfs_dquot *dqp, void *data),
|
||||
void *data)
|
||||
{
|
||||
struct xfs_quotainfo *qi = mp->m_quotainfo;
|
||||
struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
|
||||
@ -97,7 +97,7 @@ restart:
|
||||
|
||||
next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
|
||||
|
||||
error = execute(batch[i]);
|
||||
error = execute(batch[i], data);
|
||||
if (error == EAGAIN) {
|
||||
skipped++;
|
||||
continue;
|
||||
@ -129,7 +129,8 @@ restart:
|
||||
*/
|
||||
STATIC int
|
||||
xfs_qm_dqpurge(
|
||||
struct xfs_dquot *dqp)
|
||||
struct xfs_dquot *dqp,
|
||||
void *data)
|
||||
{
|
||||
struct xfs_mount *mp = dqp->q_mount;
|
||||
struct xfs_quotainfo *qi = mp->m_quotainfo;
|
||||
@ -153,21 +154,7 @@ xfs_qm_dqpurge(
|
||||
|
||||
dqp->dq_flags |= XFS_DQ_FREEING;
|
||||
|
||||
/*
|
||||
* If we're turning off quotas, we have to make sure that, for
|
||||
* example, we don't delete quota disk blocks while dquots are
|
||||
* in the process of getting written to those disk blocks.
|
||||
* This dquot might well be on AIL, and we can't leave it there
|
||||
* if we're turning off quotas. Basically, we need this flush
|
||||
* lock, and are willing to block on it.
|
||||
*/
|
||||
if (!xfs_dqflock_nowait(dqp)) {
|
||||
/*
|
||||
* Block on the flush lock after nudging dquot buffer,
|
||||
* if it is incore.
|
||||
*/
|
||||
xfs_dqflock_pushbuf_wait(dqp);
|
||||
}
|
||||
xfs_dqflock(dqp);
|
||||
|
||||
/*
|
||||
* If we are turning this type of quotas off, we don't care
|
||||
@ -175,16 +162,21 @@ xfs_qm_dqpurge(
|
||||
* we're unmounting, we do care, so we flush it and wait.
|
||||
*/
|
||||
if (XFS_DQ_IS_DIRTY(dqp)) {
|
||||
int error;
|
||||
struct xfs_buf *bp = NULL;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* We don't care about getting disk errors here. We need
|
||||
* to purge this dquot anyway, so we go ahead regardless.
|
||||
*/
|
||||
error = xfs_qm_dqflush(dqp, SYNC_WAIT);
|
||||
if (error)
|
||||
error = xfs_qm_dqflush(dqp, &bp);
|
||||
if (error) {
|
||||
xfs_warn(mp, "%s: dquot %p flush failed",
|
||||
__func__, dqp);
|
||||
} else {
|
||||
error = xfs_bwrite(bp);
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
xfs_dqflock(dqp);
|
||||
}
|
||||
|
||||
@ -226,11 +218,11 @@ xfs_qm_dqpurge_all(
|
||||
uint flags)
|
||||
{
|
||||
if (flags & XFS_QMOPT_UQUOTA)
|
||||
xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge);
|
||||
xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
|
||||
if (flags & XFS_QMOPT_GQUOTA)
|
||||
xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge);
|
||||
xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
|
||||
if (flags & XFS_QMOPT_PQUOTA)
|
||||
xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge);
|
||||
xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -483,6 +475,23 @@ done:
|
||||
xfs_dqunlock(udq);
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_qm_need_dqattach(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
if (!XFS_IS_QUOTA_RUNNING(mp))
|
||||
return false;
|
||||
if (!XFS_IS_QUOTA_ON(mp))
|
||||
return false;
|
||||
if (!XFS_NOT_DQATTACHED(mp, ip))
|
||||
return false;
|
||||
if (ip->i_ino == mp->m_sb.sb_uquotino ||
|
||||
ip->i_ino == mp->m_sb.sb_gquotino)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
|
||||
@ -500,11 +509,7 @@ xfs_qm_dqattach_locked(
|
||||
uint nquotas = 0;
|
||||
int error = 0;
|
||||
|
||||
if (!XFS_IS_QUOTA_RUNNING(mp) ||
|
||||
!XFS_IS_QUOTA_ON(mp) ||
|
||||
!XFS_NOT_DQATTACHED(mp, ip) ||
|
||||
ip->i_ino == mp->m_sb.sb_uquotino ||
|
||||
ip->i_ino == mp->m_sb.sb_gquotino)
|
||||
if (!xfs_qm_need_dqattach(ip))
|
||||
return 0;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
@ -575,6 +580,9 @@ xfs_qm_dqattach(
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!xfs_qm_need_dqattach(ip))
|
||||
return 0;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_qm_dqattach_locked(ip, flags);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
@ -855,15 +863,16 @@ xfs_qm_reset_dqcounts(
|
||||
|
||||
STATIC int
|
||||
xfs_qm_dqiter_bufs(
|
||||
xfs_mount_t *mp,
|
||||
xfs_dqid_t firstid,
|
||||
xfs_fsblock_t bno,
|
||||
xfs_filblks_t blkcnt,
|
||||
uint flags)
|
||||
struct xfs_mount *mp,
|
||||
xfs_dqid_t firstid,
|
||||
xfs_fsblock_t bno,
|
||||
xfs_filblks_t blkcnt,
|
||||
uint flags,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
xfs_buf_t *bp;
|
||||
int error;
|
||||
int type;
|
||||
struct xfs_buf *bp;
|
||||
int error;
|
||||
int type;
|
||||
|
||||
ASSERT(blkcnt > 0);
|
||||
type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
|
||||
@ -887,7 +896,7 @@ xfs_qm_dqiter_bufs(
|
||||
break;
|
||||
|
||||
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
|
||||
xfs_buf_delwri_queue(bp);
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
xfs_buf_relse(bp);
|
||||
/*
|
||||
* goto the next block.
|
||||
@ -895,6 +904,7 @@ xfs_qm_dqiter_bufs(
|
||||
bno++;
|
||||
firstid += mp->m_quotainfo->qi_dqperchunk;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -904,11 +914,12 @@ xfs_qm_dqiter_bufs(
|
||||
*/
|
||||
STATIC int
|
||||
xfs_qm_dqiterate(
|
||||
xfs_mount_t *mp,
|
||||
xfs_inode_t *qip,
|
||||
uint flags)
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_inode *qip,
|
||||
uint flags,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
xfs_bmbt_irec_t *map;
|
||||
struct xfs_bmbt_irec *map;
|
||||
int i, nmaps; /* number of map entries */
|
||||
int error; /* return value */
|
||||
xfs_fileoff_t lblkno;
|
||||
@ -975,21 +986,17 @@ xfs_qm_dqiterate(
|
||||
* Iterate thru all the blks in the extent and
|
||||
* reset the counters of all the dquots inside them.
|
||||
*/
|
||||
if ((error = xfs_qm_dqiter_bufs(mp,
|
||||
firstid,
|
||||
map[i].br_startblock,
|
||||
map[i].br_blockcount,
|
||||
flags))) {
|
||||
break;
|
||||
}
|
||||
error = xfs_qm_dqiter_bufs(mp, firstid,
|
||||
map[i].br_startblock,
|
||||
map[i].br_blockcount,
|
||||
flags, buffer_list);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (error)
|
||||
break;
|
||||
} while (nmaps > 0);
|
||||
|
||||
out:
|
||||
kmem_free(map);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -1182,8 +1189,11 @@ error0:
|
||||
|
||||
STATIC int
|
||||
xfs_qm_flush_one(
|
||||
struct xfs_dquot *dqp)
|
||||
struct xfs_dquot *dqp,
|
||||
void *data)
|
||||
{
|
||||
struct list_head *buffer_list = data;
|
||||
struct xfs_buf *bp = NULL;
|
||||
int error = 0;
|
||||
|
||||
xfs_dqlock(dqp);
|
||||
@ -1192,11 +1202,13 @@ xfs_qm_flush_one(
|
||||
if (!XFS_DQ_IS_DIRTY(dqp))
|
||||
goto out_unlock;
|
||||
|
||||
if (!xfs_dqflock_nowait(dqp))
|
||||
xfs_dqflock_pushbuf_wait(dqp);
|
||||
|
||||
error = xfs_qm_dqflush(dqp, 0);
|
||||
xfs_dqflock(dqp);
|
||||
error = xfs_qm_dqflush(dqp, &bp);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
xfs_buf_relse(bp);
|
||||
out_unlock:
|
||||
xfs_dqunlock(dqp);
|
||||
return error;
|
||||
@ -1215,6 +1227,7 @@ xfs_qm_quotacheck(
|
||||
size_t structsz;
|
||||
xfs_inode_t *uip, *gip;
|
||||
uint flags;
|
||||
LIST_HEAD (buffer_list);
|
||||
|
||||
count = INT_MAX;
|
||||
structsz = 1;
|
||||
@ -1233,7 +1246,8 @@ xfs_qm_quotacheck(
|
||||
*/
|
||||
uip = mp->m_quotainfo->qi_uquotaip;
|
||||
if (uip) {
|
||||
error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
|
||||
error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA,
|
||||
&buffer_list);
|
||||
if (error)
|
||||
goto error_return;
|
||||
flags |= XFS_UQUOTA_CHKD;
|
||||
@ -1242,7 +1256,8 @@ xfs_qm_quotacheck(
|
||||
gip = mp->m_quotainfo->qi_gquotaip;
|
||||
if (gip) {
|
||||
error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
|
||||
XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
|
||||
XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA,
|
||||
&buffer_list);
|
||||
if (error)
|
||||
goto error_return;
|
||||
flags |= XFS_OQUOTA_CHKD;
|
||||
@ -1265,19 +1280,27 @@ xfs_qm_quotacheck(
|
||||
* We've made all the changes that we need to make incore. Flush them
|
||||
* down to disk buffers if everything was updated successfully.
|
||||
*/
|
||||
if (XFS_IS_UQUOTA_ON(mp))
|
||||
error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one);
|
||||
if (XFS_IS_UQUOTA_ON(mp)) {
|
||||
error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one,
|
||||
&buffer_list);
|
||||
}
|
||||
if (XFS_IS_GQUOTA_ON(mp)) {
|
||||
error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one);
|
||||
error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one,
|
||||
&buffer_list);
|
||||
if (!error)
|
||||
error = error2;
|
||||
}
|
||||
if (XFS_IS_PQUOTA_ON(mp)) {
|
||||
error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one);
|
||||
error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one,
|
||||
&buffer_list);
|
||||
if (!error)
|
||||
error = error2;
|
||||
}
|
||||
|
||||
error2 = xfs_buf_delwri_submit(&buffer_list);
|
||||
if (!error)
|
||||
error = error2;
|
||||
|
||||
/*
|
||||
* We can get this error if we couldn't do a dquot allocation inside
|
||||
* xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
|
||||
@ -1290,15 +1313,6 @@ xfs_qm_quotacheck(
|
||||
goto error_return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We didn't log anything, because if we crashed, we'll have to
|
||||
* start the quotacheck from scratch anyway. However, we must make
|
||||
* sure that our dquot changes are secure before we put the
|
||||
* quotacheck'd stamp on the superblock. So, here we do a synchronous
|
||||
* flush.
|
||||
*/
|
||||
xfs_flush_buftarg(mp->m_ddev_targp, 1);
|
||||
|
||||
/*
|
||||
* If one type of quotas is off, then it will lose its
|
||||
* quotachecked status, since we won't be doing accounting for
|
||||
@ -1308,6 +1322,13 @@ xfs_qm_quotacheck(
|
||||
mp->m_qflags |= flags;
|
||||
|
||||
error_return:
|
||||
while (!list_empty(&buffer_list)) {
|
||||
struct xfs_buf *bp =
|
||||
list_first_entry(&buffer_list, struct xfs_buf, b_list);
|
||||
list_del_init(&bp->b_list);
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
xfs_warn(mp,
|
||||
"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
|
||||
@ -1424,6 +1445,7 @@ xfs_qm_dqfree_one(
|
||||
STATIC void
|
||||
xfs_qm_dqreclaim_one(
|
||||
struct xfs_dquot *dqp,
|
||||
struct list_head *buffer_list,
|
||||
struct list_head *dispose_list)
|
||||
{
|
||||
struct xfs_mount *mp = dqp->q_mount;
|
||||
@ -1456,25 +1478,20 @@ xfs_qm_dqreclaim_one(
|
||||
if (!xfs_dqflock_nowait(dqp))
|
||||
goto out_busy;
|
||||
|
||||
/*
|
||||
* We have the flush lock so we know that this is not in the
|
||||
* process of being flushed. So, if this is dirty, flush it
|
||||
* DELWRI so that we don't get a freelist infested with
|
||||
* dirty dquots.
|
||||
*/
|
||||
if (XFS_DQ_IS_DIRTY(dqp)) {
|
||||
struct xfs_buf *bp = NULL;
|
||||
|
||||
trace_xfs_dqreclaim_dirty(dqp);
|
||||
|
||||
/*
|
||||
* We flush it delayed write, so don't bother releasing the
|
||||
* freelist lock.
|
||||
*/
|
||||
error = xfs_qm_dqflush(dqp, 0);
|
||||
error = xfs_qm_dqflush(dqp, &bp);
|
||||
if (error) {
|
||||
xfs_warn(mp, "%s: dquot %p flush failed",
|
||||
__func__, dqp);
|
||||
goto out_busy;
|
||||
}
|
||||
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
xfs_buf_relse(bp);
|
||||
/*
|
||||
* Give the dquot another try on the freelist, as the
|
||||
* flushing will take some time.
|
||||
@ -1518,8 +1535,10 @@ xfs_qm_shake(
|
||||
struct xfs_quotainfo *qi =
|
||||
container_of(shrink, struct xfs_quotainfo, qi_shrinker);
|
||||
int nr_to_scan = sc->nr_to_scan;
|
||||
LIST_HEAD (buffer_list);
|
||||
LIST_HEAD (dispose_list);
|
||||
struct xfs_dquot *dqp;
|
||||
int error;
|
||||
|
||||
if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
|
||||
return 0;
|
||||
@ -1532,15 +1551,20 @@ xfs_qm_shake(
|
||||
break;
|
||||
dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
|
||||
q_lru);
|
||||
xfs_qm_dqreclaim_one(dqp, &dispose_list);
|
||||
xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list);
|
||||
}
|
||||
mutex_unlock(&qi->qi_lru_lock);
|
||||
|
||||
error = xfs_buf_delwri_submit(&buffer_list);
|
||||
if (error)
|
||||
xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
|
||||
|
||||
while (!list_empty(&dispose_list)) {
|
||||
dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
|
||||
list_del_init(&dqp->q_lru);
|
||||
xfs_qm_dqfree_one(dqp);
|
||||
}
|
||||
|
||||
out:
|
||||
return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
|
||||
}
|
||||
|
@ -17,9 +17,7 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -17,7 +17,6 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_mount.h"
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -34,7 +33,6 @@
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_fsops.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_inode_item.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_utils.h"
|
||||
@ -1872,9 +1870,9 @@ xfs_growfs_rt(
|
||||
/*
|
||||
* Read in the last block of the device, make sure it exists.
|
||||
*/
|
||||
bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
|
||||
bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
|
||||
XFS_FSB_TO_BB(mp, nrblocks - 1),
|
||||
XFS_FSB_TO_B(mp, 1), 0);
|
||||
XFS_FSB_TO_BB(mp, 1), 0);
|
||||
if (!bp)
|
||||
return EIO;
|
||||
xfs_buf_relse(bp);
|
||||
@ -2219,9 +2217,9 @@ xfs_rtmount_init(
|
||||
(unsigned long long) mp->m_sb.sb_rblocks);
|
||||
return XFS_ERROR(EFBIG);
|
||||
}
|
||||
bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
|
||||
bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
|
||||
d - XFS_FSB_TO_BB(mp, 1),
|
||||
XFS_FSB_TO_B(mp, 1), 0);
|
||||
XFS_FSB_TO_BB(mp, 1), 0);
|
||||
if (!bp) {
|
||||
xfs_warn(mp, "realtime device size check failed");
|
||||
return EIO;
|
||||
|
156
fs/xfs/xfs_rw.c
156
fs/xfs/xfs_rw.c
@ -1,156 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_dinode.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_rw.h"
|
||||
|
||||
/*
|
||||
* Force a shutdown of the filesystem instantly while keeping
|
||||
* the filesystem consistent. We don't do an unmount here; just shutdown
|
||||
* the shop, make sure that absolutely nothing persistent happens to
|
||||
* this filesystem after this point.
|
||||
*/
|
||||
void
|
||||
xfs_do_force_shutdown(
|
||||
xfs_mount_t *mp,
|
||||
int flags,
|
||||
char *fname,
|
||||
int lnnum)
|
||||
{
|
||||
int logerror;
|
||||
|
||||
logerror = flags & SHUTDOWN_LOG_IO_ERROR;
|
||||
|
||||
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
|
||||
xfs_notice(mp,
|
||||
"%s(0x%x) called from line %d of file %s. Return address = 0x%p",
|
||||
__func__, flags, lnnum, fname, __return_address);
|
||||
}
|
||||
/*
|
||||
* No need to duplicate efforts.
|
||||
*/
|
||||
if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
|
||||
return;
|
||||
|
||||
/*
|
||||
* This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
|
||||
* queue up anybody new on the log reservations, and wakes up
|
||||
* everybody who's sleeping on log reservations to tell them
|
||||
* the bad news.
|
||||
*/
|
||||
if (xfs_log_force_umount(mp, logerror))
|
||||
return;
|
||||
|
||||
if (flags & SHUTDOWN_CORRUPT_INCORE) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
|
||||
"Corruption of in-memory data detected. Shutting down filesystem");
|
||||
if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
|
||||
xfs_stack_trace();
|
||||
} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
|
||||
if (logerror) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
|
||||
"Log I/O Error Detected. Shutting down filesystem");
|
||||
} else if (flags & SHUTDOWN_DEVICE_REQ) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
|
||||
"All device paths lost. Shutting down filesystem");
|
||||
} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
|
||||
"I/O Error Detected. Shutting down filesystem");
|
||||
}
|
||||
}
|
||||
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
|
||||
xfs_alert(mp,
|
||||
"Please umount the filesystem and rectify the problem(s)");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This isn't an absolute requirement, but it is
|
||||
* just a good idea to call xfs_read_buf instead of
|
||||
* directly doing a read_buf call. For one, we shouldn't
|
||||
* be doing this disk read if we are in SHUTDOWN state anyway,
|
||||
* so this stops that from happening. Secondly, this does all
|
||||
* the error checking stuff and the brelse if appropriate for
|
||||
* the caller, so the code can be a little leaner.
|
||||
*/
|
||||
|
||||
int
|
||||
xfs_read_buf(
|
||||
struct xfs_mount *mp,
|
||||
xfs_buftarg_t *target,
|
||||
xfs_daddr_t blkno,
|
||||
int len,
|
||||
uint flags,
|
||||
xfs_buf_t **bpp)
|
||||
{
|
||||
xfs_buf_t *bp;
|
||||
int error;
|
||||
|
||||
if (!flags)
|
||||
flags = XBF_LOCK | XBF_MAPPED;
|
||||
|
||||
bp = xfs_buf_read(target, blkno, len, flags);
|
||||
if (!bp)
|
||||
return XFS_ERROR(EIO);
|
||||
error = bp->b_error;
|
||||
if (!error && !XFS_FORCED_SHUTDOWN(mp)) {
|
||||
*bpp = bp;
|
||||
} else {
|
||||
*bpp = NULL;
|
||||
if (error) {
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
} else {
|
||||
error = XFS_ERROR(EIO);
|
||||
}
|
||||
if (bp) {
|
||||
XFS_BUF_UNDONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
/*
|
||||
* brelse clears B_ERROR and b_error
|
||||
*/
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* helper function to extract extent size hint from inode
|
||||
*/
|
||||
xfs_extlen_t
|
||||
xfs_get_extsz_hint(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
|
||||
return ip->i_d.di_extsize;
|
||||
if (XFS_IS_REALTIME_INODE(ip))
|
||||
return ip->i_mount->m_sb.sb_rextsize;
|
||||
return 0;
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#ifndef __XFS_RW_H__
|
||||
#define __XFS_RW_H__
|
||||
|
||||
struct xfs_buf;
|
||||
struct xfs_inode;
|
||||
struct xfs_mount;
|
||||
|
||||
/*
|
||||
* Convert the given file system block to a disk block.
|
||||
* We have to treat it differently based on whether the
|
||||
* file is a real time file or not, because the bmap code
|
||||
* does.
|
||||
*/
|
||||
static inline xfs_daddr_t
|
||||
xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
|
||||
{
|
||||
return (XFS_IS_REALTIME_INODE(ip) ? \
|
||||
(xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
|
||||
XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Prototypes for functions in xfs_rw.c.
|
||||
*/
|
||||
extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
|
||||
xfs_daddr_t blkno, int len, uint flags,
|
||||
struct xfs_buf **bpp);
|
||||
extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
|
||||
|
||||
#endif /* __XFS_RW_H__ */
|
@ -17,7 +17,6 @@
|
||||
*/
|
||||
|
||||
#include "xfs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
@ -622,7 +621,7 @@ void
|
||||
xfs_blkdev_issue_flush(
|
||||
xfs_buftarg_t *buftarg)
|
||||
{
|
||||
blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
|
||||
blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
@ -773,8 +772,14 @@ xfs_init_mount_workqueues(
|
||||
if (!mp->m_unwritten_workqueue)
|
||||
goto out_destroy_data_iodone_queue;
|
||||
|
||||
mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
|
||||
WQ_MEM_RECLAIM, 0, mp->m_fsname);
|
||||
if (!mp->m_cil_workqueue)
|
||||
goto out_destroy_unwritten;
|
||||
return 0;
|
||||
|
||||
out_destroy_unwritten:
|
||||
destroy_workqueue(mp->m_unwritten_workqueue);
|
||||
out_destroy_data_iodone_queue:
|
||||
destroy_workqueue(mp->m_data_workqueue);
|
||||
out:
|
||||
@ -785,6 +790,7 @@ STATIC void
|
||||
xfs_destroy_mount_workqueues(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
destroy_workqueue(mp->m_cil_workqueue);
|
||||
destroy_workqueue(mp->m_data_workqueue);
|
||||
destroy_workqueue(mp->m_unwritten_workqueue);
|
||||
}
|
||||
@ -981,18 +987,9 @@ xfs_fs_put_super(
|
||||
{
|
||||
struct xfs_mount *mp = XFS_M(sb);
|
||||
|
||||
xfs_syncd_stop(mp);
|
||||
|
||||
/*
|
||||
* Blow away any referenced inode in the filestreams cache.
|
||||
* This can and will cause log traffic as inodes go inactive
|
||||
* here.
|
||||
*/
|
||||
xfs_filestream_unmount(mp);
|
||||
|
||||
xfs_flush_buftarg(mp->m_ddev_targp, 1);
|
||||
|
||||
xfs_unmountfs(mp);
|
||||
xfs_syncd_stop(mp);
|
||||
xfs_freesb(mp);
|
||||
xfs_icsb_destroy_counters(mp);
|
||||
xfs_destroy_mount_workqueues(mp);
|
||||
@ -1072,7 +1069,7 @@ xfs_fs_statfs(
|
||||
|
||||
spin_unlock(&mp->m_sb_lock);
|
||||
|
||||
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
|
||||
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
|
||||
((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
|
||||
(XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
|
||||
xfs_qm_statvfs(ip, statp);
|
||||
@ -1362,31 +1359,32 @@ xfs_fs_fill_super(
|
||||
sb->s_time_gran = 1;
|
||||
set_posix_acl_flag(sb);
|
||||
|
||||
error = xfs_mountfs(mp);
|
||||
error = xfs_syncd_init(mp);
|
||||
if (error)
|
||||
goto out_filestream_unmount;
|
||||
|
||||
error = xfs_syncd_init(mp);
|
||||
error = xfs_mountfs(mp);
|
||||
if (error)
|
||||
goto out_unmount;
|
||||
goto out_syncd_stop;
|
||||
|
||||
root = igrab(VFS_I(mp->m_rootip));
|
||||
if (!root) {
|
||||
error = ENOENT;
|
||||
goto out_syncd_stop;
|
||||
goto out_unmount;
|
||||
}
|
||||
if (is_bad_inode(root)) {
|
||||
error = EINVAL;
|
||||
goto out_syncd_stop;
|
||||
goto out_unmount;
|
||||
}
|
||||
sb->s_root = d_make_root(root);
|
||||
if (!sb->s_root) {
|
||||
error = ENOMEM;
|
||||
goto out_syncd_stop;
|
||||
goto out_unmount;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_syncd_stop:
|
||||
xfs_syncd_stop(mp);
|
||||
out_filestream_unmount:
|
||||
xfs_filestream_unmount(mp);
|
||||
out_free_sb:
|
||||
@ -1403,19 +1401,10 @@ out_destroy_workqueues:
|
||||
out:
|
||||
return -error;
|
||||
|
||||
out_syncd_stop:
|
||||
xfs_syncd_stop(mp);
|
||||
out_unmount:
|
||||
/*
|
||||
* Blow away any referenced inode in the filestreams cache.
|
||||
* This can and will cause log traffic as inodes go inactive
|
||||
* here.
|
||||
*/
|
||||
xfs_filestream_unmount(mp);
|
||||
|
||||
xfs_flush_buftarg(mp->m_ddev_targp, 1);
|
||||
|
||||
xfs_unmountfs(mp);
|
||||
xfs_syncd_stop(mp);
|
||||
goto out_free_sb;
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
@ -241,45 +240,6 @@ xfs_sync_inode_data(
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_sync_inode_attr(
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_perag *pag,
|
||||
int flags)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||
if (xfs_inode_clean(ip))
|
||||
goto out_unlock;
|
||||
if (!xfs_iflock_nowait(ip)) {
|
||||
if (!(flags & SYNC_WAIT))
|
||||
goto out_unlock;
|
||||
xfs_iflock(ip);
|
||||
}
|
||||
|
||||
if (xfs_inode_clean(ip)) {
|
||||
xfs_ifunlock(ip);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
error = xfs_iflush(ip, flags);
|
||||
|
||||
/*
|
||||
* We don't want to try again on non-blocking flushes that can't run
|
||||
* again immediately. If an inode really must be written, then that's
|
||||
* what the SYNC_WAIT flag is for.
|
||||
*/
|
||||
if (error == EAGAIN) {
|
||||
ASSERT(!(flags & SYNC_WAIT));
|
||||
error = 0;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write out pagecache data for the whole filesystem.
|
||||
*/
|
||||
@ -300,19 +260,6 @@ xfs_sync_data(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write out inode metadata (attributes) for the whole filesystem.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_sync_attr(
|
||||
struct xfs_mount *mp,
|
||||
int flags)
|
||||
{
|
||||
ASSERT((flags & ~SYNC_WAIT) == 0);
|
||||
|
||||
return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_sync_fsdata(
|
||||
struct xfs_mount *mp)
|
||||
@ -350,7 +297,7 @@ xfs_sync_fsdata(
|
||||
* First stage of freeze - no writers will make progress now we are here,
|
||||
* so we flush delwri and delalloc buffers here, then wait for all I/O to
|
||||
* complete. Data is frozen at that point. Metadata is not frozen,
|
||||
* transactions can still occur here so don't bother flushing the buftarg
|
||||
* transactions can still occur here so don't bother emptying the AIL
|
||||
* because it'll just get dirty again.
|
||||
*/
|
||||
int
|
||||
@ -365,47 +312,13 @@ xfs_quiesce_data(
|
||||
/* write superblock and hoover up shutdown errors */
|
||||
error = xfs_sync_fsdata(mp);
|
||||
|
||||
/* make sure all delwri buffers are written out */
|
||||
xfs_flush_buftarg(mp->m_ddev_targp, 1);
|
||||
|
||||
/* mark the log as covered if needed */
|
||||
if (xfs_log_need_covered(mp))
|
||||
error2 = xfs_fs_log_dummy(mp);
|
||||
|
||||
/* flush data-only devices */
|
||||
if (mp->m_rtdev_targp)
|
||||
xfs_flush_buftarg(mp->m_rtdev_targp, 1);
|
||||
|
||||
return error ? error : error2;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_quiesce_fs(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
int count = 0, pincount;
|
||||
|
||||
xfs_reclaim_inodes(mp, 0);
|
||||
xfs_flush_buftarg(mp->m_ddev_targp, 0);
|
||||
|
||||
/*
|
||||
* This loop must run at least twice. The first instance of the loop
|
||||
* will flush most meta data but that will generate more meta data
|
||||
* (typically directory updates). Which then must be flushed and
|
||||
* logged before we can write the unmount record. We also so sync
|
||||
* reclaim of inodes to catch any that the above delwri flush skipped.
|
||||
*/
|
||||
do {
|
||||
xfs_reclaim_inodes(mp, SYNC_WAIT);
|
||||
xfs_sync_attr(mp, SYNC_WAIT);
|
||||
pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
|
||||
if (!pincount) {
|
||||
delay(50);
|
||||
count++;
|
||||
}
|
||||
} while (count < 2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Second stage of a quiesce. The data is already synced, now we have to take
|
||||
* care of the metadata. New transactions are already blocked, so we need to
|
||||
@ -421,8 +334,12 @@ xfs_quiesce_attr(
|
||||
while (atomic_read(&mp->m_active_trans) > 0)
|
||||
delay(100);
|
||||
|
||||
/* flush inodes and push all remaining buffers out to disk */
|
||||
xfs_quiesce_fs(mp);
|
||||
/* reclaim inodes to do any IO before the freeze completes */
|
||||
xfs_reclaim_inodes(mp, 0);
|
||||
xfs_reclaim_inodes(mp, SYNC_WAIT);
|
||||
|
||||
/* flush all pending changes from the AIL */
|
||||
xfs_ail_push_all_sync(mp->m_ail);
|
||||
|
||||
/*
|
||||
* Just warn here till VFS can correctly support
|
||||
@ -436,7 +353,12 @@ xfs_quiesce_attr(
|
||||
xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
|
||||
"Frozen image may not be consistent.");
|
||||
xfs_log_unmount_write(mp);
|
||||
xfs_unmountfs_writesb(mp);
|
||||
|
||||
/*
|
||||
* At this point we might have modified the superblock again and thus
|
||||
* added an item to the AIL, thus flush it again.
|
||||
*/
|
||||
xfs_ail_push_all_sync(mp->m_ail);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -460,16 +382,27 @@ xfs_sync_worker(
|
||||
struct xfs_mount, m_sync_work);
|
||||
int error;
|
||||
|
||||
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
|
||||
/* dgc: errors ignored here */
|
||||
if (mp->m_super->s_frozen == SB_UNFROZEN &&
|
||||
xfs_log_need_covered(mp))
|
||||
error = xfs_fs_log_dummy(mp);
|
||||
else
|
||||
xfs_log_force(mp, 0);
|
||||
/*
|
||||
* We shouldn't write/force the log if we are in the mount/unmount
|
||||
* process or on a read only filesystem. The workqueue still needs to be
|
||||
* active in both cases, however, because it is used for inode reclaim
|
||||
* during these times. Use the s_umount semaphore to provide exclusion
|
||||
* with unmount.
|
||||
*/
|
||||
if (down_read_trylock(&mp->m_super->s_umount)) {
|
||||
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
|
||||
/* dgc: errors ignored here */
|
||||
if (mp->m_super->s_frozen == SB_UNFROZEN &&
|
||||
xfs_log_need_covered(mp))
|
||||
error = xfs_fs_log_dummy(mp);
|
||||
else
|
||||
xfs_log_force(mp, 0);
|
||||
|
||||
/* start pushing all the metadata that is currently dirty */
|
||||
xfs_ail_push_all(mp->m_ail);
|
||||
/* start pushing all the metadata that is currently
|
||||
* dirty */
|
||||
xfs_ail_push_all(mp->m_ail);
|
||||
}
|
||||
up_read(&mp->m_super->s_umount);
|
||||
}
|
||||
|
||||
/* queue us up again */
|
||||
@ -488,14 +421,6 @@ xfs_syncd_queue_reclaim(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
|
||||
/*
|
||||
* We can have inodes enter reclaim after we've shut down the syncd
|
||||
* workqueue during unmount, so don't allow reclaim work to be queued
|
||||
* during unmount.
|
||||
*/
|
||||
if (!(mp->m_super->s_flags & MS_ACTIVE))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
|
||||
queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
|
||||
@ -564,7 +489,6 @@ xfs_syncd_init(
|
||||
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
|
||||
|
||||
xfs_syncd_queue_sync(mp);
|
||||
xfs_syncd_queue_reclaim(mp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -702,11 +626,8 @@ xfs_reclaim_inode_grab(
|
||||
}
|
||||
|
||||
/*
|
||||
* Inodes in different states need to be treated differently, and the return
|
||||
* value of xfs_iflush is not sufficient to get this right. The following table
|
||||
* lists the inode states and the reclaim actions necessary for non-blocking
|
||||
* reclaim:
|
||||
*
|
||||
* Inodes in different states need to be treated differently. The following
|
||||
* table lists the inode states and the reclaim actions necessary:
|
||||
*
|
||||
* inode state iflush ret required action
|
||||
* --------------- ---------- ---------------
|
||||
@ -716,39 +637,31 @@ xfs_reclaim_inode_grab(
|
||||
* stale, unpinned 0 reclaim
|
||||
* clean, pinned(*) 0 requeue
|
||||
* stale, pinned EAGAIN requeue
|
||||
* dirty, delwri ok 0 requeue
|
||||
* dirty, delwri blocked EAGAIN requeue
|
||||
* dirty, sync flush 0 reclaim
|
||||
* dirty, async - requeue
|
||||
* dirty, sync 0 reclaim
|
||||
*
|
||||
* (*) dgc: I don't think the clean, pinned state is possible but it gets
|
||||
* handled anyway given the order of checks implemented.
|
||||
*
|
||||
* As can be seen from the table, the return value of xfs_iflush() is not
|
||||
* sufficient to correctly decide the reclaim action here. The checks in
|
||||
* xfs_iflush() might look like duplicates, but they are not.
|
||||
*
|
||||
* Also, because we get the flush lock first, we know that any inode that has
|
||||
* been flushed delwri has had the flush completed by the time we check that
|
||||
* the inode is clean. The clean inode check needs to be done before flushing
|
||||
* the inode delwri otherwise we would loop forever requeuing clean inodes as
|
||||
* we cannot tell apart a successful delwri flush and a clean inode from the
|
||||
* return value of xfs_iflush().
|
||||
* the inode is clean.
|
||||
*
|
||||
* Note that because the inode is flushed delayed write by background
|
||||
* writeback, the flush lock may already be held here and waiting on it can
|
||||
* result in very long latencies. Hence for sync reclaims, where we wait on the
|
||||
* flush lock, the caller should push out delayed write inodes first before
|
||||
* trying to reclaim them to minimise the amount of time spent waiting. For
|
||||
* background relaim, we just requeue the inode for the next pass.
|
||||
* Note that because the inode is flushed delayed write by AIL pushing, the
|
||||
* flush lock may already be held here and waiting on it can result in very
|
||||
* long latencies. Hence for sync reclaims, where we wait on the flush lock,
|
||||
* the caller should push the AIL first before trying to reclaim inodes to
|
||||
* minimise the amount of time spent waiting. For background relaim, we only
|
||||
* bother to reclaim clean inodes anyway.
|
||||
*
|
||||
* Hence the order of actions after gaining the locks should be:
|
||||
* bad => reclaim
|
||||
* shutdown => unpin and reclaim
|
||||
* pinned, delwri => requeue
|
||||
* pinned, async => requeue
|
||||
* pinned, sync => unpin
|
||||
* stale => reclaim
|
||||
* clean => reclaim
|
||||
* dirty, delwri => flush and requeue
|
||||
* dirty, async => requeue
|
||||
* dirty, sync => flush, wait and reclaim
|
||||
*/
|
||||
STATIC int
|
||||
@ -757,7 +670,8 @@ xfs_reclaim_inode(
|
||||
struct xfs_perag *pag,
|
||||
int sync_mode)
|
||||
{
|
||||
int error;
|
||||
struct xfs_buf *bp = NULL;
|
||||
int error;
|
||||
|
||||
restart:
|
||||
error = 0;
|
||||
@ -765,17 +679,6 @@ restart:
|
||||
if (!xfs_iflock_nowait(ip)) {
|
||||
if (!(sync_mode & SYNC_WAIT))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If we only have a single dirty inode in a cluster there is
|
||||
* a fair chance that the AIL push may have pushed it into
|
||||
* the buffer, but xfsbufd won't touch it until 30 seconds
|
||||
* from now, and thus we will lock up here.
|
||||
*
|
||||
* Promote the inode buffer to the front of the delwri list
|
||||
* and wake up xfsbufd now.
|
||||
*/
|
||||
xfs_promote_inode(ip);
|
||||
xfs_iflock(ip);
|
||||
}
|
||||
|
||||
@ -783,13 +686,12 @@ restart:
|
||||
goto reclaim;
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_iunpin_wait(ip);
|
||||
xfs_iflush_abort(ip, false);
|
||||
goto reclaim;
|
||||
}
|
||||
if (xfs_ipincount(ip)) {
|
||||
if (!(sync_mode & SYNC_WAIT)) {
|
||||
xfs_ifunlock(ip);
|
||||
goto out;
|
||||
}
|
||||
if (!(sync_mode & SYNC_WAIT))
|
||||
goto out_ifunlock;
|
||||
xfs_iunpin_wait(ip);
|
||||
}
|
||||
if (xfs_iflags_test(ip, XFS_ISTALE))
|
||||
@ -797,61 +699,43 @@ restart:
|
||||
if (xfs_inode_clean(ip))
|
||||
goto reclaim;
|
||||
|
||||
/*
|
||||
* Never flush out dirty data during non-blocking reclaim, as it would
|
||||
* just contend with AIL pushing trying to do the same job.
|
||||
*/
|
||||
if (!(sync_mode & SYNC_WAIT))
|
||||
goto out_ifunlock;
|
||||
|
||||
/*
|
||||
* Now we have an inode that needs flushing.
|
||||
*
|
||||
* We do a nonblocking flush here even if we are doing a SYNC_WAIT
|
||||
* reclaim as we can deadlock with inode cluster removal.
|
||||
* Note that xfs_iflush will never block on the inode buffer lock, as
|
||||
* xfs_ifree_cluster() can lock the inode buffer before it locks the
|
||||
* ip->i_lock, and we are doing the exact opposite here. As a result,
|
||||
* doing a blocking xfs_itobp() to get the cluster buffer will result
|
||||
* ip->i_lock, and we are doing the exact opposite here. As a result,
|
||||
* doing a blocking xfs_itobp() to get the cluster buffer would result
|
||||
* in an ABBA deadlock with xfs_ifree_cluster().
|
||||
*
|
||||
* As xfs_ifree_cluser() must gather all inodes that are active in the
|
||||
* cache to mark them stale, if we hit this case we don't actually want
|
||||
* to do IO here - we want the inode marked stale so we can simply
|
||||
* reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
|
||||
* just unlock the inode, back off and try again. Hopefully the next
|
||||
* pass through will see the stale flag set on the inode.
|
||||
* reclaim it. Hence if we get an EAGAIN error here, just unlock the
|
||||
* inode, back off and try again. Hopefully the next pass through will
|
||||
* see the stale flag set on the inode.
|
||||
*/
|
||||
error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
|
||||
if (sync_mode & SYNC_WAIT) {
|
||||
if (error == EAGAIN) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
/* backoff longer than in xfs_ifree_cluster */
|
||||
delay(2);
|
||||
goto restart;
|
||||
}
|
||||
xfs_iflock(ip);
|
||||
goto reclaim;
|
||||
error = xfs_iflush(ip, &bp);
|
||||
if (error == EAGAIN) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
/* backoff longer than in xfs_ifree_cluster */
|
||||
delay(2);
|
||||
goto restart;
|
||||
}
|
||||
|
||||
/*
|
||||
* When we have to flush an inode but don't have SYNC_WAIT set, we
|
||||
* flush the inode out using a delwri buffer and wait for the next
|
||||
* call into reclaim to find it in a clean state instead of waiting for
|
||||
* it now. We also don't return errors here - if the error is transient
|
||||
* then the next reclaim pass will flush the inode, and if the error
|
||||
* is permanent then the next sync reclaim will reclaim the inode and
|
||||
* pass on the error.
|
||||
*/
|
||||
if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_warn(ip->i_mount,
|
||||
"inode 0x%llx background reclaim flush failed with %d",
|
||||
(long long)ip->i_ino, error);
|
||||
if (!error) {
|
||||
error = xfs_bwrite(bp);
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
out:
|
||||
xfs_iflags_clear(ip, XFS_IRECLAIM);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
/*
|
||||
* We could return EAGAIN here to make reclaim rescan the inode tree in
|
||||
* a short while. However, this just burns CPU time scanning the tree
|
||||
* waiting for IO to complete and xfssyncd never goes back to the idle
|
||||
* state. Instead, return 0 to let the next scheduled background reclaim
|
||||
* attempt to reclaim the inode again.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
xfs_iflock(ip);
|
||||
reclaim:
|
||||
xfs_ifunlock(ip);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
@ -884,8 +768,21 @@ reclaim:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
xfs_inode_free(ip);
|
||||
|
||||
return error;
|
||||
|
||||
out_ifunlock:
|
||||
xfs_ifunlock(ip);
|
||||
out:
|
||||
xfs_iflags_clear(ip, XFS_IRECLAIM);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
/*
|
||||
* We could return EAGAIN here to make reclaim rescan the inode tree in
|
||||
* a short while. However, this just burns CPU time scanning the tree
|
||||
* waiting for IO to complete and xfssyncd never goes back to the idle
|
||||
* state. Instead, return 0 to let the next scheduled background reclaim
|
||||
* attempt to reclaim the inode again.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -281,7 +281,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_daddr_t, bno)
|
||||
__field(size_t, buffer_length)
|
||||
__field(int, nblks)
|
||||
__field(int, hold)
|
||||
__field(int, pincount)
|
||||
__field(unsigned, lockval)
|
||||
@ -291,18 +291,18 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bp->b_target->bt_dev;
|
||||
__entry->bno = bp->b_bn;
|
||||
__entry->buffer_length = bp->b_buffer_length;
|
||||
__entry->nblks = bp->b_length;
|
||||
__entry->hold = atomic_read(&bp->b_hold);
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->flags = bp->b_flags;
|
||||
__entry->caller_ip = caller_ip;
|
||||
),
|
||||
TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
|
||||
TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
|
||||
"lock %d flags %s caller %pf",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
(unsigned long long)__entry->bno,
|
||||
__entry->buffer_length,
|
||||
__entry->nblks,
|
||||
__entry->hold,
|
||||
__entry->pincount,
|
||||
__entry->lockval,
|
||||
@ -328,7 +328,7 @@ DEFINE_BUF_EVENT(xfs_buf_unlock);
|
||||
DEFINE_BUF_EVENT(xfs_buf_iowait);
|
||||
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
|
||||
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
|
||||
DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
|
||||
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
|
||||
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
|
||||
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
|
||||
DEFINE_BUF_EVENT(xfs_bdstrat_shut);
|
||||
@ -362,7 +362,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bp->b_target->bt_dev;
|
||||
__entry->bno = bp->b_bn;
|
||||
__entry->buffer_length = bp->b_buffer_length;
|
||||
__entry->buffer_length = BBTOB(bp->b_length);
|
||||
__entry->flags = flags;
|
||||
__entry->hold = atomic_read(&bp->b_hold);
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
@ -406,7 +406,7 @@ TRACE_EVENT(xfs_buf_ioerror,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bp->b_target->bt_dev;
|
||||
__entry->bno = bp->b_bn;
|
||||
__entry->buffer_length = bp->b_buffer_length;
|
||||
__entry->buffer_length = BBTOB(bp->b_length);
|
||||
__entry->hold = atomic_read(&bp->b_hold);
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
@ -450,7 +450,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
|
||||
__entry->bli_recur = bip->bli_recur;
|
||||
__entry->bli_refcount = atomic_read(&bip->bli_refcount);
|
||||
__entry->buf_bno = bip->bli_buf->b_bn;
|
||||
__entry->buf_len = bip->bli_buf->b_buffer_length;
|
||||
__entry->buf_len = BBTOB(bip->bli_buf->b_length);
|
||||
__entry->buf_flags = bip->bli_buf->b_flags;
|
||||
__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
|
||||
__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
|
||||
@ -486,12 +486,10 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
|
||||
@ -876,15 +874,30 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
|
||||
__print_flags(__entry->flags, "|", XFS_LI_FLAGS))
|
||||
)
|
||||
|
||||
TRACE_EVENT(xfs_log_force,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn),
|
||||
TP_ARGS(mp, lsn),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_lsn_t, lsn)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = mp->m_super->s_dev;
|
||||
__entry->lsn = lsn;
|
||||
),
|
||||
TP_printk("dev %d:%d lsn 0x%llx",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->lsn)
|
||||
)
|
||||
|
||||
#define DEFINE_LOG_ITEM_EVENT(name) \
|
||||
DEFINE_EVENT(xfs_log_item_class, name, \
|
||||
TP_PROTO(struct xfs_log_item *lip), \
|
||||
TP_ARGS(lip))
|
||||
DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
|
||||
DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf);
|
||||
DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf_pinned);
|
||||
DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
|
||||
DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
|
||||
DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
|
||||
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_file_class,
|
||||
@ -1145,7 +1158,7 @@ TRACE_EVENT(xfs_bunmap,
|
||||
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_busy_class,
|
||||
DECLARE_EVENT_CLASS(xfs_extent_busy_class,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t agbno, xfs_extlen_t len),
|
||||
TP_ARGS(mp, agno, agbno, len),
|
||||
@ -1168,17 +1181,17 @@ DECLARE_EVENT_CLASS(xfs_busy_class,
|
||||
__entry->len)
|
||||
);
|
||||
#define DEFINE_BUSY_EVENT(name) \
|
||||
DEFINE_EVENT(xfs_busy_class, name, \
|
||||
DEFINE_EVENT(xfs_extent_busy_class, name, \
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
|
||||
xfs_agblock_t agbno, xfs_extlen_t len), \
|
||||
TP_ARGS(mp, agno, agbno, len))
|
||||
DEFINE_BUSY_EVENT(xfs_alloc_busy);
|
||||
DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
|
||||
DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
|
||||
DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
|
||||
DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
|
||||
DEFINE_BUSY_EVENT(xfs_extent_busy);
|
||||
DEFINE_BUSY_EVENT(xfs_extent_busy_enomem);
|
||||
DEFINE_BUSY_EVENT(xfs_extent_busy_force);
|
||||
DEFINE_BUSY_EVENT(xfs_extent_busy_reuse);
|
||||
DEFINE_BUSY_EVENT(xfs_extent_busy_clear);
|
||||
|
||||
TRACE_EVENT(xfs_alloc_busy_trim,
|
||||
TRACE_EVENT(xfs_extent_busy_trim,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t agbno, xfs_extlen_t len,
|
||||
xfs_agblock_t tbno, xfs_extlen_t tlen),
|
||||
|
@ -19,9 +19,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -36,6 +34,7 @@
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
@ -608,8 +607,8 @@ STATIC void
|
||||
xfs_trans_free(
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
xfs_alloc_busy_sort(&tp->t_busy);
|
||||
xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false);
|
||||
xfs_extent_busy_sort(&tp->t_busy);
|
||||
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
|
||||
|
||||
atomic_dec(&tp->t_mountp->m_active_trans);
|
||||
xfs_trans_free_dqinfo(tp);
|
||||
|
@ -345,11 +345,9 @@ struct xfs_item_ops {
|
||||
void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
|
||||
void (*iop_pin)(xfs_log_item_t *);
|
||||
void (*iop_unpin)(xfs_log_item_t *, int remove);
|
||||
uint (*iop_trylock)(xfs_log_item_t *);
|
||||
uint (*iop_push)(struct xfs_log_item *, struct list_head *);
|
||||
void (*iop_unlock)(xfs_log_item_t *);
|
||||
xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
|
||||
void (*iop_push)(xfs_log_item_t *);
|
||||
bool (*iop_pushbuf)(xfs_log_item_t *);
|
||||
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
|
||||
};
|
||||
|
||||
@ -357,20 +355,18 @@ struct xfs_item_ops {
|
||||
#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
|
||||
#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
|
||||
#define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove)
|
||||
#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
|
||||
#define IOP_PUSH(ip, list) (*(ip)->li_ops->iop_push)(ip, list)
|
||||
#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
|
||||
#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
|
||||
#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
|
||||
#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
|
||||
#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
|
||||
|
||||
/*
|
||||
* Return values for the IOP_TRYLOCK() routines.
|
||||
* Return values for the IOP_PUSH() routines.
|
||||
*/
|
||||
#define XFS_ITEM_SUCCESS 0
|
||||
#define XFS_ITEM_PINNED 1
|
||||
#define XFS_ITEM_LOCKED 2
|
||||
#define XFS_ITEM_PUSHBUF 3
|
||||
#define XFS_ITEM_SUCCESS 0
|
||||
#define XFS_ITEM_PINNED 1
|
||||
#define XFS_ITEM_LOCKED 2
|
||||
#define XFS_ITEM_FLUSHING 3
|
||||
|
||||
/*
|
||||
* This is the type of function which can be given to xfs_trans_callback()
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -79,7 +78,7 @@ xfs_ail_check(
|
||||
* Return a pointer to the first item in the AIL. If the AIL is empty, then
|
||||
* return NULL.
|
||||
*/
|
||||
static xfs_log_item_t *
|
||||
xfs_log_item_t *
|
||||
xfs_ail_min(
|
||||
struct xfs_ail *ailp)
|
||||
{
|
||||
@ -364,30 +363,31 @@ xfsaild_push(
|
||||
xfs_log_item_t *lip;
|
||||
xfs_lsn_t lsn;
|
||||
xfs_lsn_t target;
|
||||
long tout = 10;
|
||||
long tout;
|
||||
int stuck = 0;
|
||||
int flushing = 0;
|
||||
int count = 0;
|
||||
int push_xfsbufd = 0;
|
||||
|
||||
/*
|
||||
* If last time we ran we encountered pinned items, force the log first
|
||||
* and wait for it before pushing again.
|
||||
* If we encountered pinned items or did not finish writing out all
|
||||
* buffers the last time we ran, force the log first and wait for it
|
||||
* before pushing again.
|
||||
*/
|
||||
spin_lock(&ailp->xa_lock);
|
||||
if (ailp->xa_last_pushed_lsn == 0 && ailp->xa_log_flush &&
|
||||
!list_empty(&ailp->xa_ail)) {
|
||||
if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 &&
|
||||
(!list_empty_careful(&ailp->xa_buf_list) ||
|
||||
xfs_ail_min_lsn(ailp))) {
|
||||
ailp->xa_log_flush = 0;
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
|
||||
XFS_STATS_INC(xs_push_ail_flush);
|
||||
xfs_log_force(mp, XFS_LOG_SYNC);
|
||||
spin_lock(&ailp->xa_lock);
|
||||
}
|
||||
|
||||
target = ailp->xa_target;
|
||||
spin_lock(&ailp->xa_lock);
|
||||
lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
|
||||
if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
|
||||
if (!lip) {
|
||||
/*
|
||||
* AIL is empty or our push has reached the end.
|
||||
* If the AIL is empty or our push has reached the end we are
|
||||
* done now.
|
||||
*/
|
||||
xfs_trans_ail_cursor_done(ailp, &cur);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
@ -396,54 +396,42 @@ xfsaild_push(
|
||||
|
||||
XFS_STATS_INC(xs_push_ail);
|
||||
|
||||
/*
|
||||
* While the item we are looking at is below the given threshold
|
||||
* try to flush it out. We'd like not to stop until we've at least
|
||||
* tried to push on everything in the AIL with an LSN less than
|
||||
* the given threshold.
|
||||
*
|
||||
* However, we will stop after a certain number of pushes and wait
|
||||
* for a reduced timeout to fire before pushing further. This
|
||||
* prevents use from spinning when we can't do anything or there is
|
||||
* lots of contention on the AIL lists.
|
||||
*/
|
||||
lsn = lip->li_lsn;
|
||||
target = ailp->xa_target;
|
||||
while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
|
||||
int lock_result;
|
||||
|
||||
/*
|
||||
* If we can lock the item without sleeping, unlock the AIL
|
||||
* lock and flush the item. Then re-grab the AIL lock so we
|
||||
* can look for the next item on the AIL. List changes are
|
||||
* handled by the AIL lookup functions internally
|
||||
*
|
||||
* If we can't lock the item, either its holder will flush it
|
||||
* or it is already being flushed or it is being relogged. In
|
||||
* any of these case it is being taken care of and we can just
|
||||
* skip to the next item in the list.
|
||||
* Note that IOP_PUSH may unlock and reacquire the AIL lock. We
|
||||
* rely on the AIL cursor implementation to be able to deal with
|
||||
* the dropped lock.
|
||||
*/
|
||||
lock_result = IOP_TRYLOCK(lip);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
lock_result = IOP_PUSH(lip, &ailp->xa_buf_list);
|
||||
switch (lock_result) {
|
||||
case XFS_ITEM_SUCCESS:
|
||||
XFS_STATS_INC(xs_push_ail_success);
|
||||
trace_xfs_ail_push(lip);
|
||||
|
||||
IOP_PUSH(lip);
|
||||
ailp->xa_last_pushed_lsn = lsn;
|
||||
break;
|
||||
|
||||
case XFS_ITEM_PUSHBUF:
|
||||
XFS_STATS_INC(xs_push_ail_pushbuf);
|
||||
trace_xfs_ail_pushbuf(lip);
|
||||
case XFS_ITEM_FLUSHING:
|
||||
/*
|
||||
* The item or its backing buffer is already beeing
|
||||
* flushed. The typical reason for that is that an
|
||||
* inode buffer is locked because we already pushed the
|
||||
* updates to it as part of inode clustering.
|
||||
*
|
||||
* We do not want to to stop flushing just because lots
|
||||
* of items are already beeing flushed, but we need to
|
||||
* re-try the flushing relatively soon if most of the
|
||||
* AIL is beeing flushed.
|
||||
*/
|
||||
XFS_STATS_INC(xs_push_ail_flushing);
|
||||
trace_xfs_ail_flushing(lip);
|
||||
|
||||
if (!IOP_PUSHBUF(lip)) {
|
||||
trace_xfs_ail_pushbuf_pinned(lip);
|
||||
stuck++;
|
||||
ailp->xa_log_flush++;
|
||||
} else {
|
||||
ailp->xa_last_pushed_lsn = lsn;
|
||||
}
|
||||
push_xfsbufd = 1;
|
||||
flushing++;
|
||||
ailp->xa_last_pushed_lsn = lsn;
|
||||
break;
|
||||
|
||||
case XFS_ITEM_PINNED:
|
||||
@ -453,28 +441,22 @@ xfsaild_push(
|
||||
stuck++;
|
||||
ailp->xa_log_flush++;
|
||||
break;
|
||||
|
||||
case XFS_ITEM_LOCKED:
|
||||
XFS_STATS_INC(xs_push_ail_locked);
|
||||
trace_xfs_ail_locked(lip);
|
||||
|
||||
stuck++;
|
||||
break;
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock(&ailp->xa_lock);
|
||||
/* should we bother continuing? */
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
break;
|
||||
ASSERT(mp->m_log);
|
||||
|
||||
count++;
|
||||
|
||||
/*
|
||||
* Are there too many items we can't do anything with?
|
||||
*
|
||||
* If we we are skipping too many items because we can't flush
|
||||
* them or they are already being flushed, we back off and
|
||||
* given them time to complete whatever operation is being
|
||||
@ -496,42 +478,36 @@ xfsaild_push(
|
||||
xfs_trans_ail_cursor_done(ailp, &cur);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
|
||||
if (push_xfsbufd) {
|
||||
/* we've got delayed write buffers to flush */
|
||||
wake_up_process(mp->m_ddev_targp->bt_task);
|
||||
}
|
||||
if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
|
||||
ailp->xa_log_flush++;
|
||||
|
||||
/* assume we have more work to do in a short while */
|
||||
if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
|
||||
out_done:
|
||||
if (!count) {
|
||||
/* We're past our target or empty, so idle */
|
||||
ailp->xa_last_pushed_lsn = 0;
|
||||
ailp->xa_log_flush = 0;
|
||||
|
||||
tout = 50;
|
||||
} else if (XFS_LSN_CMP(lsn, target) >= 0) {
|
||||
/*
|
||||
* We reached the target so wait a bit longer for I/O to
|
||||
* complete and remove pushed items from the AIL before we
|
||||
* start the next scan from the start of the AIL.
|
||||
* We reached the target or the AIL is empty, so wait a bit
|
||||
* longer for I/O to complete and remove pushed items from the
|
||||
* AIL before we start the next scan from the start of the AIL.
|
||||
*/
|
||||
tout = 50;
|
||||
ailp->xa_last_pushed_lsn = 0;
|
||||
} else if ((stuck * 100) / count > 90) {
|
||||
} else if (((stuck + flushing) * 100) / count > 90) {
|
||||
/*
|
||||
* Either there is a lot of contention on the AIL or we
|
||||
* are stuck due to operations in progress. "Stuck" in this
|
||||
* case is defined as >90% of the items we tried to push
|
||||
* were stuck.
|
||||
* Either there is a lot of contention on the AIL or we are
|
||||
* stuck due to operations in progress. "Stuck" in this case
|
||||
* is defined as >90% of the items we tried to push were stuck.
|
||||
*
|
||||
* Backoff a bit more to allow some I/O to complete before
|
||||
* restarting from the start of the AIL. This prevents us
|
||||
* from spinning on the same items, and if they are pinned will
|
||||
* all the restart to issue a log force to unpin the stuck
|
||||
* items.
|
||||
* restarting from the start of the AIL. This prevents us from
|
||||
* spinning on the same items, and if they are pinned will all
|
||||
* the restart to issue a log force to unpin the stuck items.
|
||||
*/
|
||||
tout = 20;
|
||||
ailp->xa_last_pushed_lsn = 0;
|
||||
} else {
|
||||
/*
|
||||
* Assume we have more work to do in a short while.
|
||||
*/
|
||||
tout = 10;
|
||||
}
|
||||
|
||||
return tout;
|
||||
@ -544,6 +520,8 @@ xfsaild(
|
||||
struct xfs_ail *ailp = data;
|
||||
long tout = 0; /* milliseconds */
|
||||
|
||||
current->flags |= PF_MEMALLOC;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (tout && tout <= 20)
|
||||
__set_current_state(TASK_KILLABLE);
|
||||
@ -610,6 +588,30 @@ xfs_ail_push_all(
|
||||
xfs_ail_push(ailp, threshold_lsn);
|
||||
}
|
||||
|
||||
/*
|
||||
* Push out all items in the AIL immediately and wait until the AIL is empty.
|
||||
*/
|
||||
void
|
||||
xfs_ail_push_all_sync(
|
||||
struct xfs_ail *ailp)
|
||||
{
|
||||
struct xfs_log_item *lip;
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
spin_lock(&ailp->xa_lock);
|
||||
while ((lip = xfs_ail_max(ailp)) != NULL) {
|
||||
prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE);
|
||||
ailp->xa_target = lip->li_lsn;
|
||||
wake_up_process(ailp->xa_task);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
schedule();
|
||||
spin_lock(&ailp->xa_lock);
|
||||
}
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
|
||||
finish_wait(&ailp->xa_empty, &wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_trans_ail_update - bulk AIL insertion operation.
|
||||
*
|
||||
@ -667,11 +669,15 @@ xfs_trans_ail_update_bulk(
|
||||
|
||||
if (!list_empty(&tmp))
|
||||
xfs_ail_splice(ailp, cur, &tmp, lsn);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
|
||||
if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
|
||||
xlog_assign_tail_lsn(ailp->xa_mount);
|
||||
if (mlip_changed) {
|
||||
if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
|
||||
xlog_assign_tail_lsn_locked(ailp->xa_mount);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
|
||||
xfs_log_space_wake(ailp->xa_mount);
|
||||
} else {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -700,7 +706,8 @@ void
|
||||
xfs_trans_ail_delete_bulk(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_item **log_items,
|
||||
int nr_items) __releases(ailp->xa_lock)
|
||||
int nr_items,
|
||||
int shutdown_type) __releases(ailp->xa_lock)
|
||||
{
|
||||
xfs_log_item_t *mlip;
|
||||
int mlip_changed = 0;
|
||||
@ -718,7 +725,7 @@ xfs_trans_ail_delete_bulk(
|
||||
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
|
||||
"%s: attempting to delete a log item that is not in the AIL",
|
||||
__func__);
|
||||
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
|
||||
xfs_force_shutdown(mp, shutdown_type);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -729,28 +736,20 @@ xfs_trans_ail_delete_bulk(
|
||||
if (mlip == lip)
|
||||
mlip_changed = 1;
|
||||
}
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
|
||||
if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
|
||||
xlog_assign_tail_lsn(ailp->xa_mount);
|
||||
if (mlip_changed) {
|
||||
if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
|
||||
xlog_assign_tail_lsn_locked(ailp->xa_mount);
|
||||
if (list_empty(&ailp->xa_ail))
|
||||
wake_up_all(&ailp->xa_empty);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
|
||||
xfs_log_space_wake(ailp->xa_mount);
|
||||
} else {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The active item list (AIL) is a doubly linked list of log
|
||||
* items sorted by ascending lsn. The base of the list is
|
||||
* a forw/back pointer pair embedded in the xfs mount structure.
|
||||
* The base is initialized with both pointers pointing to the
|
||||
* base. This case always needs to be distinguished, because
|
||||
* the base has no lsn to look at. We almost always insert
|
||||
* at the end of the list, so on inserts we search from the
|
||||
* end of the list to find where the new item belongs.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Initialize the doubly linked list to point only to itself.
|
||||
*/
|
||||
int
|
||||
xfs_trans_ail_init(
|
||||
xfs_mount_t *mp)
|
||||
@ -765,6 +764,8 @@ xfs_trans_ail_init(
|
||||
INIT_LIST_HEAD(&ailp->xa_ail);
|
||||
INIT_LIST_HEAD(&ailp->xa_cursors);
|
||||
spin_lock_init(&ailp->xa_lock);
|
||||
INIT_LIST_HEAD(&ailp->xa_buf_list);
|
||||
init_waitqueue_head(&ailp->xa_empty);
|
||||
|
||||
ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
|
||||
ailp->xa_mount->m_fsname);
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -33,7 +31,6 @@
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
/*
|
||||
@ -56,7 +53,7 @@ xfs_trans_buf_item_match(
|
||||
if (blip->bli_item.li_type == XFS_LI_BUF &&
|
||||
blip->bli_buf->b_target == target &&
|
||||
XFS_BUF_ADDR(blip->bli_buf) == blkno &&
|
||||
XFS_BUF_COUNT(blip->bli_buf) == len)
|
||||
BBTOB(blip->bli_buf->b_length) == len)
|
||||
return blip->bli_buf;
|
||||
}
|
||||
|
||||
@ -141,15 +138,11 @@ xfs_trans_get_buf(xfs_trans_t *tp,
|
||||
xfs_buf_t *bp;
|
||||
xfs_buf_log_item_t *bip;
|
||||
|
||||
if (flags == 0)
|
||||
flags = XBF_LOCK | XBF_MAPPED;
|
||||
|
||||
/*
|
||||
* Default to a normal get_buf() call if the tp is NULL.
|
||||
*/
|
||||
if (tp == NULL)
|
||||
return xfs_buf_get(target_dev, blkno, len,
|
||||
flags | XBF_DONT_BLOCK);
|
||||
return xfs_buf_get(target_dev, blkno, len, flags);
|
||||
|
||||
/*
|
||||
* If we find the buffer in the cache with this transaction
|
||||
@ -165,14 +158,6 @@ xfs_trans_get_buf(xfs_trans_t *tp,
|
||||
XFS_BUF_DONE(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the buffer is stale then it was binval'ed
|
||||
* since last read. This doesn't matter since the
|
||||
* caller isn't allowed to use the data anyway.
|
||||
*/
|
||||
else if (XFS_BUF_ISSTALE(bp))
|
||||
ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
|
||||
|
||||
ASSERT(bp->b_transp == tp);
|
||||
bip = bp->b_fspriv;
|
||||
ASSERT(bip != NULL);
|
||||
@ -182,15 +167,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
|
||||
return (bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* We always specify the XBF_DONT_BLOCK flag within a transaction
|
||||
* so that get_buf does not try to push out a delayed write buffer
|
||||
* which might cause another transaction to take place (if the
|
||||
* buffer was delayed alloc). Such recursive transactions can
|
||||
* easily deadlock with our current transaction as well as cause
|
||||
* us to run out of stack space.
|
||||
*/
|
||||
bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK);
|
||||
bp = xfs_buf_get(target_dev, blkno, len, flags);
|
||||
if (bp == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
@ -282,14 +259,13 @@ xfs_trans_read_buf(
|
||||
xfs_buf_log_item_t *bip;
|
||||
int error;
|
||||
|
||||
if (flags == 0)
|
||||
flags = XBF_LOCK | XBF_MAPPED;
|
||||
*bpp = NULL;
|
||||
|
||||
/*
|
||||
* Default to a normal get_buf() call if the tp is NULL.
|
||||
*/
|
||||
if (tp == NULL) {
|
||||
bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
|
||||
bp = xfs_buf_read(target, blkno, len, flags);
|
||||
if (!bp)
|
||||
return (flags & XBF_TRYLOCK) ?
|
||||
EAGAIN : XFS_ERROR(ENOMEM);
|
||||
@ -297,6 +273,8 @@ xfs_trans_read_buf(
|
||||
if (bp->b_error) {
|
||||
error = bp->b_error;
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_relse(bp);
|
||||
return error;
|
||||
}
|
||||
@ -371,15 +349,7 @@ xfs_trans_read_buf(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We always specify the XBF_DONT_BLOCK flag within a transaction
|
||||
* so that get_buf does not try to push out a delayed write buffer
|
||||
* which might cause another transaction to take place (if the
|
||||
* buffer was delayed alloc). Such recursive transactions can
|
||||
* easily deadlock with our current transaction as well as cause
|
||||
* us to run out of stack space.
|
||||
*/
|
||||
bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
|
||||
bp = xfs_buf_read(target, blkno, len, flags);
|
||||
if (bp == NULL) {
|
||||
*bpp = NULL;
|
||||
return (flags & XBF_TRYLOCK) ?
|
||||
@ -418,19 +388,6 @@ xfs_trans_read_buf(
|
||||
return 0;
|
||||
|
||||
shutdown_abort:
|
||||
/*
|
||||
* the theory here is that buffer is good but we're
|
||||
* bailing out because the filesystem is being forcibly
|
||||
* shut down. So we should leave the b_flags alone since
|
||||
* the buffer's not staled and just get out.
|
||||
*/
|
||||
#if defined(DEBUG)
|
||||
if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
|
||||
xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
|
||||
#endif
|
||||
ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) !=
|
||||
(XBF_STALE|XBF_DELWRI));
|
||||
|
||||
trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
|
||||
xfs_buf_relse(bp);
|
||||
*bpp = NULL;
|
||||
@ -606,7 +563,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
|
||||
|
||||
ASSERT(bp->b_transp == tp);
|
||||
ASSERT(bip != NULL);
|
||||
ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
|
||||
ASSERT(first <= last && last < BBTOB(bp->b_length));
|
||||
ASSERT(bp->b_iodone == NULL ||
|
||||
bp->b_iodone == xfs_buf_iodone_callbacks);
|
||||
|
||||
@ -626,8 +583,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
|
||||
bp->b_iodone = xfs_buf_iodone_callbacks;
|
||||
bip->bli_item.li_cb = xfs_buf_iodone;
|
||||
|
||||
xfs_buf_delwri_queue(bp);
|
||||
|
||||
trace_xfs_trans_log_buf(bip);
|
||||
|
||||
/*
|
||||
@ -651,22 +606,33 @@ xfs_trans_log_buf(xfs_trans_t *tp,
|
||||
|
||||
|
||||
/*
|
||||
* This called to invalidate a buffer that is being used within
|
||||
* a transaction. Typically this is because the blocks in the
|
||||
* buffer are being freed, so we need to prevent it from being
|
||||
* written out when we're done. Allowing it to be written again
|
||||
* might overwrite data in the free blocks if they are reallocated
|
||||
* to a file.
|
||||
* Invalidate a buffer that is being used within a transaction.
|
||||
*
|
||||
* We prevent the buffer from being written out by clearing the
|
||||
* B_DELWRI flag. We can't always
|
||||
* get rid of the buf log item at this point, though, because
|
||||
* the buffer may still be pinned by another transaction. If that
|
||||
* is the case, then we'll wait until the buffer is committed to
|
||||
* disk for the last time (we can tell by the ref count) and
|
||||
* free it in xfs_buf_item_unpin(). Until it is cleaned up we
|
||||
* will keep the buffer locked so that the buffer and buf log item
|
||||
* are not reused.
|
||||
* Typically this is because the blocks in the buffer are being freed, so we
|
||||
* need to prevent it from being written out when we're done. Allowing it
|
||||
* to be written again might overwrite data in the free blocks if they are
|
||||
* reallocated to a file.
|
||||
*
|
||||
* We prevent the buffer from being written out by marking it stale. We can't
|
||||
* get rid of the buf log item at this point because the buffer may still be
|
||||
* pinned by another transaction. If that is the case, then we'll wait until
|
||||
* the buffer is committed to disk for the last time (we can tell by the ref
|
||||
* count) and free it in xfs_buf_item_unpin(). Until that happens we will
|
||||
* keep the buffer locked so that the buffer and buf log item are not reused.
|
||||
*
|
||||
* We also set the XFS_BLF_CANCEL flag in the buf log format structure and log
|
||||
* the buf item. This will be used at recovery time to determine that copies
|
||||
* of the buffer in the log before this should not be replayed.
|
||||
*
|
||||
* We mark the item descriptor and the transaction dirty so that we'll hold
|
||||
* the buffer until after the commit.
|
||||
*
|
||||
* Since we're invalidating the buffer, we also clear the state about which
|
||||
* parts of the buffer have been logged. We also clear the flag indicating
|
||||
* that this is an inode buffer since the data in the buffer will no longer
|
||||
* be valid.
|
||||
*
|
||||
* We set the stale bit in the buffer as well since we're getting rid of it.
|
||||
*/
|
||||
void
|
||||
xfs_trans_binval(
|
||||
@ -686,7 +652,6 @@ xfs_trans_binval(
|
||||
* If the buffer is already invalidated, then
|
||||
* just return.
|
||||
*/
|
||||
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
|
||||
ASSERT(XFS_BUF_ISSTALE(bp));
|
||||
ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
|
||||
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
|
||||
@ -696,27 +661,8 @@ xfs_trans_binval(
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the dirty bit in the buffer and set the STALE flag
|
||||
* in the buf log item. The STALE flag will be used in
|
||||
* xfs_buf_item_unpin() to determine if it should clean up
|
||||
* when the last reference to the buf item is given up.
|
||||
* We set the XFS_BLF_CANCEL flag in the buf log format structure
|
||||
* and log the buf item. This will be used at recovery time
|
||||
* to determine that copies of the buffer in the log before
|
||||
* this should not be replayed.
|
||||
* We mark the item descriptor and the transaction dirty so
|
||||
* that we'll hold the buffer until after the commit.
|
||||
*
|
||||
* Since we're invalidating the buffer, we also clear the state
|
||||
* about which parts of the buffer have been logged. We also
|
||||
* clear the flag indicating that this is an inode buffer since
|
||||
* the data in the buffer will no longer be valid.
|
||||
*
|
||||
* We set the stale bit in the buffer as well since we're getting
|
||||
* rid of it.
|
||||
*/
|
||||
xfs_buf_stale(bp);
|
||||
|
||||
bip->bli_flags |= XFS_BLI_STALE;
|
||||
bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
|
||||
bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
|
||||
|
@ -17,9 +17,7 @@
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -71,6 +71,8 @@ struct xfs_ail {
|
||||
spinlock_t xa_lock;
|
||||
xfs_lsn_t xa_last_pushed_lsn;
|
||||
int xa_log_flush;
|
||||
struct list_head xa_buf_list;
|
||||
wait_queue_head_t xa_empty;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -90,18 +92,22 @@ xfs_trans_ail_update(
|
||||
}
|
||||
|
||||
void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
|
||||
struct xfs_log_item **log_items, int nr_items)
|
||||
struct xfs_log_item **log_items, int nr_items,
|
||||
int shutdown_type)
|
||||
__releases(ailp->xa_lock);
|
||||
static inline void
|
||||
xfs_trans_ail_delete(
|
||||
struct xfs_ail *ailp,
|
||||
xfs_log_item_t *lip) __releases(ailp->xa_lock)
|
||||
xfs_log_item_t *lip,
|
||||
int shutdown_type) __releases(ailp->xa_lock)
|
||||
{
|
||||
xfs_trans_ail_delete_bulk(ailp, &lip, 1);
|
||||
xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type);
|
||||
}
|
||||
|
||||
void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
|
||||
void xfs_ail_push_all(struct xfs_ail *);
|
||||
void xfs_ail_push_all_sync(struct xfs_ail *);
|
||||
struct xfs_log_item *xfs_ail_min(struct xfs_ail *ailp);
|
||||
xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
|
||||
|
||||
struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
|
||||
|
@ -57,6 +57,7 @@ typedef __uint64_t __psunsigned_t;
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */
|
||||
typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */
|
||||
typedef __uint32_t xfs_extlen_t; /* extent length in blocks */
|
||||
typedef __uint32_t xfs_agnumber_t; /* allocation group number */
|
||||
typedef __int32_t xfs_extnum_t; /* # of extents in a file */
|
||||
@ -101,6 +102,7 @@ typedef __uint64_t xfs_fileoff_t; /* block number in a file */
|
||||
typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
|
||||
typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
|
||||
|
||||
|
||||
/*
|
||||
* Null values for the types.
|
||||
*/
|
||||
@ -120,6 +122,9 @@ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
|
||||
|
||||
#define NULLCOMMITLSN ((xfs_lsn_t)-1)
|
||||
|
||||
#define NULLFSINO ((xfs_ino_t)-1)
|
||||
#define NULLAGINO ((xfs_agino_t)-1)
|
||||
|
||||
/*
|
||||
* Max values for extlen, extnum, aextnum.
|
||||
*/
|
||||
|
@ -18,9 +18,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include "xfs_types.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -39,7 +38,6 @@
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_acl.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_utils.h"
|
||||
@ -81,8 +79,7 @@ xfs_readlink_bmap(
|
||||
d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
|
||||
byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
|
||||
|
||||
bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
|
||||
XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
|
||||
bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0);
|
||||
if (!bp)
|
||||
return XFS_ERROR(ENOMEM);
|
||||
error = bp->b_error;
|
||||
@ -1919,7 +1916,7 @@ xfs_alloc_file_space(
|
||||
|
||||
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
|
||||
xfs_bmap_cancel(&free_list);
|
||||
xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
|
||||
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
|
||||
|
||||
error1: /* Just cancel transaction */
|
||||
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
|
||||
@ -1966,7 +1963,7 @@ xfs_zero_remaining_bytes(
|
||||
|
||||
bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp,
|
||||
mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
|
||||
BTOBB(mp->m_sb.sb_blocksize), 0);
|
||||
if (!bp)
|
||||
return XFS_ERROR(ENOMEM);
|
||||
|
||||
@ -2315,17 +2312,33 @@ xfs_change_file_space(
|
||||
case XFS_IOC_ALLOCSP64:
|
||||
case XFS_IOC_FREESP:
|
||||
case XFS_IOC_FREESP64:
|
||||
/*
|
||||
* These operations actually do IO when extending the file, but
|
||||
* the allocation is done seperately to the zeroing that is
|
||||
* done. This set of operations need to be serialised against
|
||||
* other IO operations, such as truncate and buffered IO. We
|
||||
* need to take the IOLOCK here to serialise the allocation and
|
||||
* zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
|
||||
* truncate, direct IO) from racing against the transient
|
||||
* allocated but not written state we can have here.
|
||||
*/
|
||||
xfs_ilock(ip, XFS_IOLOCK_EXCL);
|
||||
if (startoffset > fsize) {
|
||||
error = xfs_alloc_file_space(ip, fsize,
|
||||
startoffset - fsize, 0, attr_flags);
|
||||
if (error)
|
||||
startoffset - fsize, 0,
|
||||
attr_flags | XFS_ATTR_NOLOCK);
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
iattr.ia_valid = ATTR_SIZE;
|
||||
iattr.ia_size = startoffset;
|
||||
|
||||
error = xfs_setattr_size(ip, &iattr, attr_flags);
|
||||
error = xfs_setattr_size(ip, &iattr,
|
||||
attr_flags | XFS_ATTR_NOLOCK);
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
|
Loading…
x
Reference in New Issue
Block a user