New code for 6.3-rc1, part 2:

* Fix a deadlock in the free space allocator due to the AG-walking
    algorithm forgetting to follow AG-order locking rules.
  * Make the inode allocator prefer existing free inodes instead of
    failing to allocate new inode chunks when free space is low.
  * Set minleft correctly when setting allocator parameters for bmap
    changes.
  * Fix uninitialized variable access in the getfsmap code.
  * Make a distinction between active and passive per-AG structure
    references.  For now, active references are taken to perform some
    work in an AG on behalf of a high level operation; passive references
    are used by lower level code to finish operations started by other
    threads.  Eventually this will become part of online shrink.
  * Split out all the different allocator strategies into separate
    functions to move us away from design antipattern of filling out a
    huge structure for various differentish things and issuing a single
    function multiplexing call.
  * Various cleanups in the filestreams allocator code, which we might
    very well want to deprecate instead of continuing.
  * Fix a bug with the agi rotor code that was introduced earlier in this
    series.
 
 Signed-off-by: Darrick J. Wong <djwong@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQ2qTKExjcn+O1o2YRKO3ySh0YRpgUCY/zgqgAKCRBKO3ySh0YR
 plIkAQDIscqdqXGH01gF19/ncqG2GUaXY+/zeOReuk1Iv3VEVgD+MVXf+QvHk7LD
 /LTWNl2K6NQmE/9RtaBt0aFNDzvIAgU=
 =k7r8
 -----END PGP SIGNATURE-----

Merge tag 'xfs-6.3-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull moar xfs updates from Darrick Wong:
 "This contains a fix for a deadlock in the allocator. It continues the
  slow march towards being able to offline AGs, and it refactors the
  interface to the xfs allocator to be less indirection happy.

  Summary:

   - Fix a deadlock in the free space allocator due to the AG-walking
     algorithm forgetting to follow AG-order locking rules

   - Make the inode allocator prefer existing free inodes instead of
     failing to allocate new inode chunks when free space is low

   - Set minleft correctly when setting allocator parameters for bmap
     changes

   - Fix uninitialized variable access in the getfsmap code

   - Make a distinction between active and passive per-AG structure
     references. For now, active references are taken to perform some
     work in an AG on behalf of a high level operation; passive
     references are used by lower level code to finish operations
     started by other threads. Eventually this will become part of
     online shrink

   - Split out all the different allocator strategies into separate
     functions to move us away from design antipattern of filling out a
     huge structure for various differentish things and issuing a single
     function multiplexing call

   - Various cleanups in the filestreams allocator code, which we might
     very well want to deprecate instead of continuing

   - Fix a bug with the agi rotor code that was introduced earlier in
     this series"

* tag 'xfs-6.3-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (44 commits)
  xfs: restore old agirotor behavior
  xfs: fix uninitialized variable access
  xfs: refactor the filestreams allocator pick functions
  xfs: return a referenced perag from filestreams allocator
  xfs: pass perag to filestreams tracing
  xfs: use for_each_perag_wrap in xfs_filestream_pick_ag
  xfs: track an active perag reference in filestreams
  xfs: factor out MRU hit case in xfs_filestream_select_ag
  xfs: remove xfs_filestream_select_ag() longest extent check
  xfs: merge new filestream AG selection into xfs_filestream_select_ag()
  xfs: merge filestream AG lookup into xfs_filestream_select_ag()
  xfs: move xfs_bmap_btalloc_filestreams() to xfs_filestreams.c
  xfs: use xfs_bmap_longest_free_extent() in filestreams
  xfs: get rid of notinit from xfs_bmap_longest_free_extent
  xfs: factor out filestreams from xfs_bmap_btalloc_nullfb
  xfs: convert trim to use for_each_perag_range
  xfs: convert xfs_alloc_vextent_iterate_ags() to use perag walker
  xfs: move the minimum agno checks into xfs_alloc_vextent_check_args
  xfs: fold xfs_alloc_ag_vextent() into callers
  xfs: move allocation accounting to xfs_alloc_vextent_set_fsbno()
  ...
This commit is contained in:
Linus Torvalds 2023-02-28 16:08:30 -08:00
commit c0927a7a53
36 changed files with 1564 additions and 1271 deletions

View File

@ -44,16 +44,15 @@ xfs_perag_get(
xfs_agnumber_t agno) xfs_agnumber_t agno)
{ {
struct xfs_perag *pag; struct xfs_perag *pag;
int ref = 0;
rcu_read_lock(); rcu_read_lock();
pag = radix_tree_lookup(&mp->m_perag_tree, agno); pag = radix_tree_lookup(&mp->m_perag_tree, agno);
if (pag) { if (pag) {
trace_xfs_perag_get(pag, _RET_IP_);
ASSERT(atomic_read(&pag->pag_ref) >= 0); ASSERT(atomic_read(&pag->pag_ref) >= 0);
ref = atomic_inc_return(&pag->pag_ref); atomic_inc(&pag->pag_ref);
} }
rcu_read_unlock(); rcu_read_unlock();
trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
return pag; return pag;
} }
@ -68,7 +67,6 @@ xfs_perag_get_tag(
{ {
struct xfs_perag *pag; struct xfs_perag *pag;
int found; int found;
int ref;
rcu_read_lock(); rcu_read_lock();
found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
@ -77,9 +75,9 @@ xfs_perag_get_tag(
rcu_read_unlock(); rcu_read_unlock();
return NULL; return NULL;
} }
ref = atomic_inc_return(&pag->pag_ref); trace_xfs_perag_get_tag(pag, _RET_IP_);
atomic_inc(&pag->pag_ref);
rcu_read_unlock(); rcu_read_unlock();
trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
return pag; return pag;
} }
@ -87,11 +85,68 @@ void
xfs_perag_put( xfs_perag_put(
struct xfs_perag *pag) struct xfs_perag *pag)
{ {
int ref; trace_xfs_perag_put(pag, _RET_IP_);
ASSERT(atomic_read(&pag->pag_ref) > 0); ASSERT(atomic_read(&pag->pag_ref) > 0);
ref = atomic_dec_return(&pag->pag_ref); atomic_dec(&pag->pag_ref);
trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); }
/*
* Active references for perag structures. This is for short term access to the
* per ag structures for walking trees or accessing state. If an AG is being
* shrunk or is offline, then this will fail to find that AG and return NULL
* instead.
*/
struct xfs_perag *
xfs_perag_grab(
struct xfs_mount *mp,
xfs_agnumber_t agno)
{
struct xfs_perag *pag;
rcu_read_lock();
pag = radix_tree_lookup(&mp->m_perag_tree, agno);
if (pag) {
trace_xfs_perag_grab(pag, _RET_IP_);
if (!atomic_inc_not_zero(&pag->pag_active_ref))
pag = NULL;
}
rcu_read_unlock();
return pag;
}
/*
* search from @first to find the next perag with the given tag set.
*/
struct xfs_perag *
xfs_perag_grab_tag(
struct xfs_mount *mp,
xfs_agnumber_t first,
int tag)
{
struct xfs_perag *pag;
int found;
rcu_read_lock();
found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
(void **)&pag, first, 1, tag);
if (found <= 0) {
rcu_read_unlock();
return NULL;
}
trace_xfs_perag_grab_tag(pag, _RET_IP_);
if (!atomic_inc_not_zero(&pag->pag_active_ref))
pag = NULL;
rcu_read_unlock();
return pag;
}
void
xfs_perag_rele(
struct xfs_perag *pag)
{
trace_xfs_perag_rele(pag, _RET_IP_);
if (atomic_dec_and_test(&pag->pag_active_ref))
wake_up(&pag->pag_active_wq);
} }
/* /*
@ -196,6 +251,10 @@ xfs_free_perag(
cancel_delayed_work_sync(&pag->pag_blockgc_work); cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_buf_hash_destroy(pag); xfs_buf_hash_destroy(pag);
/* drop the mount's active reference */
xfs_perag_rele(pag);
XFS_IS_CORRUPT(pag->pag_mount,
atomic_read(&pag->pag_active_ref) != 0);
call_rcu(&pag->rcu_head, __xfs_free_perag); call_rcu(&pag->rcu_head, __xfs_free_perag);
} }
} }
@ -314,6 +373,7 @@ xfs_initialize_perag(
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
init_waitqueue_head(&pag->pagb_wait); init_waitqueue_head(&pag->pagb_wait);
init_waitqueue_head(&pag->pag_active_wq);
pag->pagb_count = 0; pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT; pag->pagb_tree = RB_ROOT;
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
@ -322,6 +382,9 @@ xfs_initialize_perag(
if (error) if (error)
goto out_remove_pag; goto out_remove_pag;
/* Active ref owned by mount indicates AG is online. */
atomic_set(&pag->pag_active_ref, 1);
/* first new pag is fully initialized */ /* first new pag is fully initialized */
if (first_initialised == NULLAGNUMBER) if (first_initialised == NULLAGNUMBER)
first_initialised = index; first_initialised = index;
@ -824,7 +887,7 @@ xfs_ag_shrink_space(
struct xfs_alloc_arg args = { struct xfs_alloc_arg args = {
.tp = *tpp, .tp = *tpp,
.mp = mp, .mp = mp,
.type = XFS_ALLOCTYPE_THIS_BNO, .pag = pag,
.minlen = delta, .minlen = delta,
.maxlen = delta, .maxlen = delta,
.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
@ -856,14 +919,11 @@ xfs_ag_shrink_space(
if (delta >= aglen) if (delta >= aglen)
return -EINVAL; return -EINVAL;
args.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta);
/* /*
* Make sure that the last inode cluster cannot overlap with the new * Make sure that the last inode cluster cannot overlap with the new
* end of the AG, even if it's sparse. * end of the AG, even if it's sparse.
*/ */
error = xfs_ialloc_check_shrink(*tpp, pag->pag_agno, agibp, error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta);
aglen - delta);
if (error) if (error)
return error; return error;
@ -876,7 +936,8 @@ xfs_ag_shrink_space(
return error; return error;
/* internal log shouldn't also show up in the free space btrees */ /* internal log shouldn't also show up in the free space btrees */
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_exact_bno(&args,
XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta));
if (!error && args.agbno == NULLAGBLOCK) if (!error && args.agbno == NULLAGBLOCK)
error = -ENOSPC; error = -ENOSPC;

View File

@ -32,14 +32,12 @@ struct xfs_ag_resv {
struct xfs_perag { struct xfs_perag {
struct xfs_mount *pag_mount; /* owner filesystem */ struct xfs_mount *pag_mount; /* owner filesystem */
xfs_agnumber_t pag_agno; /* AG this structure belongs to */ xfs_agnumber_t pag_agno; /* AG this structure belongs to */
atomic_t pag_ref; /* perag reference count */ atomic_t pag_ref; /* passive reference count */
char pagf_init; /* this agf's entry is initialized */ atomic_t pag_active_ref; /* active reference count */
char pagi_init; /* this agi's entry is initialized */ wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */
char pagf_metadata; /* the agf is preferred to be metadata */ unsigned long pag_opstate;
char pagi_inodeok; /* The agi is ok for inodes */
uint8_t pagf_levels[XFS_BTNUM_AGF]; uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */ /* # of levels in bno & cnt btree */
bool pagf_agflreset; /* agfl requires reset before use */
uint32_t pagf_flcount; /* count of blocks in freelist */ uint32_t pagf_flcount; /* count of blocks in freelist */
xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_freeblks; /* total free blocks */
xfs_extlen_t pagf_longest; /* longest free space */ xfs_extlen_t pagf_longest; /* longest free space */
@ -106,16 +104,44 @@ struct xfs_perag {
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
}; };
/*
* Per-AG operational state. These are atomic flag bits.
*/
#define XFS_AGSTATE_AGF_INIT 0
#define XFS_AGSTATE_AGI_INIT 1
#define XFS_AGSTATE_PREFERS_METADATA 2
#define XFS_AGSTATE_ALLOWS_INODES 3
#define XFS_AGSTATE_AGFL_NEEDS_RESET 4
#define __XFS_AG_OPSTATE(name, NAME) \
static inline bool xfs_perag_ ## name (struct xfs_perag *pag) \
{ \
return test_bit(XFS_AGSTATE_ ## NAME, &pag->pag_opstate); \
}
__XFS_AG_OPSTATE(initialised_agf, AGF_INIT)
__XFS_AG_OPSTATE(initialised_agi, AGI_INIT)
__XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)
int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
void xfs_free_perag(struct xfs_mount *mp); void xfs_free_perag(struct xfs_mount *mp);
/* Passive AG references */
struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
unsigned int tag); unsigned int tag);
void xfs_perag_put(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag);
/* Active AG references */
struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t);
struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t,
int tag);
void xfs_perag_rele(struct xfs_perag *pag);
/* /*
* Per-ag geometry infomation and validation * Per-ag geometry infomation and validation
*/ */
@ -193,31 +219,86 @@ xfs_perag_next(
struct xfs_mount *mp = pag->pag_mount; struct xfs_mount *mp = pag->pag_mount;
*agno = pag->pag_agno + 1; *agno = pag->pag_agno + 1;
xfs_perag_put(pag); xfs_perag_rele(pag);
if (*agno > end_agno) while (*agno <= end_agno) {
pag = xfs_perag_grab(mp, *agno);
if (pag)
return pag;
(*agno)++;
}
return NULL; return NULL;
return xfs_perag_get(mp, *agno);
} }
#define for_each_perag_range(mp, agno, end_agno, pag) \ #define for_each_perag_range(mp, agno, end_agno, pag) \
for ((pag) = xfs_perag_get((mp), (agno)); \ for ((pag) = xfs_perag_grab((mp), (agno)); \
(pag) != NULL; \ (pag) != NULL; \
(pag) = xfs_perag_next((pag), &(agno), (end_agno))) (pag) = xfs_perag_next((pag), &(agno), (end_agno)))
#define for_each_perag_from(mp, agno, pag) \ #define for_each_perag_from(mp, agno, pag) \
for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
#define for_each_perag(mp, agno, pag) \ #define for_each_perag(mp, agno, pag) \
(agno) = 0; \ (agno) = 0; \
for_each_perag_from((mp), (agno), (pag)) for_each_perag_from((mp), (agno), (pag))
#define for_each_perag_tag(mp, agno, pag, tag) \ #define for_each_perag_tag(mp, agno, pag, tag) \
for ((agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \ for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
(pag) != NULL; \ (pag) != NULL; \
(agno) = (pag)->pag_agno + 1, \ (agno) = (pag)->pag_agno + 1, \
xfs_perag_put(pag), \ xfs_perag_rele(pag), \
(pag) = xfs_perag_get_tag((mp), (agno), (tag))) (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
static inline struct xfs_perag *
xfs_perag_next_wrap(
struct xfs_perag *pag,
xfs_agnumber_t *agno,
xfs_agnumber_t stop_agno,
xfs_agnumber_t restart_agno,
xfs_agnumber_t wrap_agno)
{
struct xfs_mount *mp = pag->pag_mount;
*agno = pag->pag_agno + 1;
xfs_perag_rele(pag);
while (*agno != stop_agno) {
if (*agno >= wrap_agno) {
if (restart_agno >= stop_agno)
break;
*agno = restart_agno;
}
pag = xfs_perag_grab(mp, *agno);
if (pag)
return pag;
(*agno)++;
}
return NULL;
}
/*
* Iterate all AGs from start_agno through wrap_agno, then restart_agno through
* (start_agno - 1).
*/
#define for_each_perag_wrap_range(mp, start_agno, restart_agno, wrap_agno, agno, pag) \
for ((agno) = (start_agno), (pag) = xfs_perag_grab((mp), (agno)); \
(pag) != NULL; \
(pag) = xfs_perag_next_wrap((pag), &(agno), (start_agno), \
(restart_agno), (wrap_agno)))
/*
* Iterate all AGs from start_agno through wrap_agno, then 0 through
* (start_agno - 1).
*/
#define for_each_perag_wrap_at(mp, start_agno, wrap_agno, agno, pag) \
for_each_perag_wrap_range((mp), (start_agno), 0, (wrap_agno), (agno), (pag))
/*
* Iterate all AGs from start_agno through to the end of the filesystem, then 0
* through (start_agno - 1).
*/
#define for_each_perag_wrap(mp, start_agno, agno, pag) \
for_each_perag_wrap_at((mp), (start_agno), (mp)->m_sb.sb_agcount, \
(agno), (pag))
struct aghdr_init_data { struct aghdr_init_data {
/* per ag data */ /* per ag data */

View File

@ -264,7 +264,7 @@ xfs_ag_resv_init(
if (error) if (error)
goto out; goto out;
error = xfs_finobt_calc_reserves(mp, tp, pag, &ask, &used); error = xfs_finobt_calc_reserves(pag, tp, &ask, &used);
if (error) if (error)
goto out; goto out;

View File

@ -36,10 +36,6 @@ struct workqueue_struct *xfs_alloc_wq;
#define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2 #define XFSA_FIXUP_CNT_OK 2
STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
/* /*
* Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
* the beginning of the block for a proper header with the location information * the beginning of the block for a proper header with the location information
@ -772,8 +768,6 @@ xfs_alloc_cur_setup(
int error; int error;
int i; int i;
ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
acur->cur_len = args->maxlen; acur->cur_len = args->maxlen;
acur->rec_bno = 0; acur->rec_bno = 0;
acur->rec_len = 0; acur->rec_len = 0;
@ -887,7 +881,6 @@ xfs_alloc_cur_check(
* We have an aligned record that satisfies minlen and beats or matches * We have an aligned record that satisfies minlen and beats or matches
* the candidate extent size. Compare locality for near allocation mode. * the candidate extent size. Compare locality for near allocation mode.
*/ */
ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
diff = xfs_alloc_compute_diff(args->agbno, args->len, diff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, args->datatype, args->alignment, args->datatype,
bnoa, lena, &bnew); bnoa, lena, &bnew);
@ -1132,78 +1125,6 @@ error:
return error; return error;
} }
/*
* Allocate a variable extent in the allocation group agno.
* Type and bno are used to determine where in the allocation group the
* extent will start.
* Extent's length (returned in *len) will be between minlen and maxlen,
* and of the form k * prod + mod unless there's nothing that large.
* Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
*/
STATIC int /* error */
xfs_alloc_ag_vextent(
xfs_alloc_arg_t *args) /* argument structure for allocation */
{
int error=0;
ASSERT(args->minlen > 0);
ASSERT(args->maxlen > 0);
ASSERT(args->minlen <= args->maxlen);
ASSERT(args->mod < args->prod);
ASSERT(args->alignment > 0);
/*
* Branch to correct routine based on the type.
*/
args->wasfromfl = 0;
switch (args->type) {
case XFS_ALLOCTYPE_THIS_AG:
error = xfs_alloc_ag_vextent_size(args);
break;
case XFS_ALLOCTYPE_NEAR_BNO:
error = xfs_alloc_ag_vextent_near(args);
break;
case XFS_ALLOCTYPE_THIS_BNO:
error = xfs_alloc_ag_vextent_exact(args);
break;
default:
ASSERT(0);
/* NOTREACHED */
}
if (error || args->agbno == NULLAGBLOCK)
return error;
ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen);
ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
ASSERT(args->agbno % args->alignment == 0);
/* if not file data, insert new block into the reverse map btree */
if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
args->agbno, args->len, &args->oinfo);
if (error)
return error;
}
if (!args->wasfromfl) {
error = xfs_alloc_update_counters(args->tp, args->agbp,
-((long)(args->len)));
if (error)
return error;
ASSERT(!xfs_extent_busy_search(args->mp, args->pag,
args->agbno, args->len));
}
xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
XFS_STATS_INC(args->mp, xs_allocx);
XFS_STATS_ADD(args->mp, xs_allocb, args->len);
return error;
}
/* /*
* Allocate a variable extent at exactly agno/bno. * Allocate a variable extent at exactly agno/bno.
* Extent's length (returned in *len) will be between minlen and maxlen, * Extent's length (returned in *len) will be between minlen and maxlen,
@ -1389,7 +1310,6 @@ xfs_alloc_ag_vextent_locality(
bool fbinc; bool fbinc;
ASSERT(acur->len == 0); ASSERT(acur->len == 0);
ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
*stat = 0; *stat = 0;
@ -2435,7 +2355,7 @@ xfs_agfl_reset(
struct xfs_mount *mp = tp->t_mountp; struct xfs_mount *mp = tp->t_mountp;
struct xfs_agf *agf = agbp->b_addr; struct xfs_agf *agf = agbp->b_addr;
ASSERT(pag->pagf_agflreset); ASSERT(xfs_perag_agfl_needs_reset(pag));
trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
xfs_warn(mp, xfs_warn(mp,
@ -2450,7 +2370,7 @@ xfs_agfl_reset(
XFS_AGF_FLCOUNT); XFS_AGF_FLCOUNT);
pag->pagf_flcount = 0; pag->pagf_flcount = 0;
pag->pagf_agflreset = false; clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
} }
/* /*
@ -2605,7 +2525,7 @@ xfs_alloc_fix_freelist(
/* deferred ops (AGFL block frees) require permanent transactions */ /* deferred ops (AGFL block frees) require permanent transactions */
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
if (!pag->pagf_init) { if (!xfs_perag_initialised_agf(pag)) {
error = xfs_alloc_read_agf(pag, tp, flags, &agbp); error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
if (error) { if (error) {
/* Couldn't lock the AGF so skip this AG. */ /* Couldn't lock the AGF so skip this AG. */
@ -2620,7 +2540,8 @@ xfs_alloc_fix_freelist(
* somewhere else if we are not being asked to try harder at this * somewhere else if we are not being asked to try harder at this
* point * point
*/ */
if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) && if (xfs_perag_prefers_metadata(pag) &&
(args->datatype & XFS_ALLOC_USERDATA) &&
(flags & XFS_ALLOC_FLAG_TRYLOCK)) { (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
goto out_agbp_relse; goto out_agbp_relse;
@ -2646,7 +2567,7 @@ xfs_alloc_fix_freelist(
} }
/* reset a padding mismatched agfl before final free space check */ /* reset a padding mismatched agfl before final free space check */
if (pag->pagf_agflreset) if (xfs_perag_agfl_needs_reset(pag))
xfs_agfl_reset(tp, agbp, pag); xfs_agfl_reset(tp, agbp, pag);
/* If there isn't enough total space or single-extent, reject it. */ /* If there isn't enough total space or single-extent, reject it. */
@ -2707,7 +2628,6 @@ xfs_alloc_fix_freelist(
targs.agbp = agbp; targs.agbp = agbp;
targs.agno = args->agno; targs.agno = args->agno;
targs.alignment = targs.minlen = targs.prod = 1; targs.alignment = targs.minlen = targs.prod = 1;
targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag; targs.pag = pag;
error = xfs_alloc_read_agfl(pag, tp, &agflbp); error = xfs_alloc_read_agfl(pag, tp, &agflbp);
if (error) if (error)
@ -2720,7 +2640,7 @@ xfs_alloc_fix_freelist(
targs.resv = XFS_AG_RESV_AGFL; targs.resv = XFS_AG_RESV_AGFL;
/* Allocate as many blocks as possible at once. */ /* Allocate as many blocks as possible at once. */
error = xfs_alloc_ag_vextent(&targs); error = xfs_alloc_ag_vextent_size(&targs);
if (error) if (error)
goto out_agflbp_relse; goto out_agflbp_relse;
@ -2734,6 +2654,18 @@ xfs_alloc_fix_freelist(
break; break;
goto out_agflbp_relse; goto out_agflbp_relse;
} }
if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
error = xfs_rmap_alloc(tp, agbp, pag,
targs.agbno, targs.len, &targs.oinfo);
if (error)
goto out_agflbp_relse;
}
error = xfs_alloc_update_counters(tp, agbp,
-((long)(targs.len)));
if (error)
goto out_agflbp_relse;
/* /*
* Put each allocated block on the list. * Put each allocated block on the list.
*/ */
@ -2803,7 +2735,7 @@ xfs_alloc_get_freelist(
if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
agf->agf_flfirst = 0; agf->agf_flfirst = 0;
ASSERT(!pag->pagf_agflreset); ASSERT(!xfs_perag_agfl_needs_reset(pag));
be32_add_cpu(&agf->agf_flcount, -1); be32_add_cpu(&agf->agf_flcount, -1);
pag->pagf_flcount--; pag->pagf_flcount--;
@ -2892,7 +2824,7 @@ xfs_alloc_put_freelist(
if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
agf->agf_fllast = 0; agf->agf_fllast = 0;
ASSERT(!pag->pagf_agflreset); ASSERT(!xfs_perag_agfl_needs_reset(pag));
be32_add_cpu(&agf->agf_flcount, 1); be32_add_cpu(&agf->agf_flcount, 1);
pag->pagf_flcount++; pag->pagf_flcount++;
@ -3099,7 +3031,7 @@ xfs_alloc_read_agf(
return error; return error;
agf = agfbp->b_addr; agf = agfbp->b_addr;
if (!pag->pagf_init) { if (!xfs_perag_initialised_agf(pag)) {
pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
@ -3111,8 +3043,8 @@ xfs_alloc_read_agf(
pag->pagf_levels[XFS_BTNUM_RMAPi] = pag->pagf_levels[XFS_BTNUM_RMAPi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
pag->pagf_init = 1; if (xfs_agfl_needs_reset(pag->pag_mount, agf))
pag->pagf_agflreset = xfs_agfl_needs_reset(pag->pag_mount, agf); set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
/* /*
* Update the in-core allocbt counter. Filter out the rmapbt * Update the in-core allocbt counter. Filter out the rmapbt
@ -3127,6 +3059,8 @@ xfs_alloc_read_agf(
if (allocbt_blks > 0) if (allocbt_blks > 0)
atomic64_add(allocbt_blks, atomic64_add(allocbt_blks,
&pag->pag_mount->m_allocbt_blks); &pag->pag_mount->m_allocbt_blks);
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
} }
#ifdef DEBUG #ifdef DEBUG
else if (!xfs_is_shutdown(pag->pag_mount)) { else if (!xfs_is_shutdown(pag->pag_mount)) {
@ -3148,26 +3082,25 @@ xfs_alloc_read_agf(
} }
/* /*
* Allocate an extent (variable-size). * Pre-proces allocation arguments to set initial state that we don't require
* Depending on the allocation type, we either look in a single allocation * callers to set up correctly, as well as bounds check the allocation args
* group or loop over the allocation groups to find the result. * that are set up.
*/ */
int /* error */ static int
xfs_alloc_vextent( xfs_alloc_vextent_check_args(
struct xfs_alloc_arg *args) /* allocation argument structure */ struct xfs_alloc_arg *args,
xfs_fsblock_t target,
xfs_agnumber_t *minimum_agno)
{ {
xfs_agblock_t agsize; /* allocation group size */ struct xfs_mount *mp = args->mp;
int error; xfs_agblock_t agsize;
int flags; /* XFS_ALLOC_FLAG_... locking flags */
struct xfs_mount *mp; /* mount structure pointer */ args->fsbno = NULLFSBLOCK;
xfs_agnumber_t sagno; /* starting allocation group number */
xfs_alloctype_t type; /* input allocation type */ *minimum_agno = 0;
int bump_rotor = 0; if (args->tp->t_highest_agno != NULLAGNUMBER)
xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ *minimum_agno = args->tp->t_highest_agno;
mp = args->mp;
type = args->otype = args->type;
args->agbno = NULLAGBLOCK;
/* /*
* Just fix this up, for the case where the last a.g. is shorter * Just fix this up, for the case where the last a.g. is shorter
* (or there's only one a.g.) and the caller couldn't easily figure * (or there's only one a.g.) and the caller couldn't easily figure
@ -3178,170 +3111,416 @@ xfs_alloc_vextent(
args->maxlen = agsize; args->maxlen = agsize;
if (args->alignment == 0) if (args->alignment == 0)
args->alignment = 1; args->alignment = 1;
ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); ASSERT(args->minlen > 0);
ASSERT(args->maxlen > 0);
ASSERT(args->alignment > 0);
ASSERT(args->resv != XFS_AG_RESV_AGFL);
ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
ASSERT(args->minlen <= args->maxlen); ASSERT(args->minlen <= args->maxlen);
ASSERT(args->minlen <= agsize); ASSERT(args->minlen <= agsize);
ASSERT(args->mod < args->prod); ASSERT(args->mod < args->prod);
if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
args->minlen > args->maxlen || args->minlen > agsize || args->minlen > args->maxlen || args->minlen > agsize ||
args->mod >= args->prod) { args->mod >= args->prod) {
args->fsbno = NULLFSBLOCK;
trace_xfs_alloc_vextent_badargs(args); trace_xfs_alloc_vextent_badargs(args);
return 0; return -ENOSPC;
}
if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
trace_xfs_alloc_vextent_skip_deadlock(args);
return -ENOSPC;
}
return 0;
} }
switch (type) {
case XFS_ALLOCTYPE_THIS_AG:
case XFS_ALLOCTYPE_NEAR_BNO:
case XFS_ALLOCTYPE_THIS_BNO:
/* /*
* These three force us into a single a.g. * Prepare an AG for allocation. If the AG is not prepared to accept the
* allocation, return failure.
*
* XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
* modified to hold their own perag references.
*/ */
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); static int
args->pag = xfs_perag_get(mp, args->agno); xfs_alloc_vextent_prepare_ag(
struct xfs_alloc_arg *args)
{
bool need_pag = !args->pag;
int error;
if (need_pag)
args->pag = xfs_perag_get(args->mp, args->agno);
args->agbp = NULL;
error = xfs_alloc_fix_freelist(args, 0); error = xfs_alloc_fix_freelist(args, 0);
if (error) { if (error) {
trace_xfs_alloc_vextent_nofix(args); trace_xfs_alloc_vextent_nofix(args);
goto error0; if (need_pag)
xfs_perag_put(args->pag);
args->agbno = NULLAGBLOCK;
return error;
} }
if (!args->agbp) { if (!args->agbp) {
/* cannot allocate in this AG at all */
trace_xfs_alloc_vextent_noagbp(args); trace_xfs_alloc_vextent_noagbp(args);
args->agbno = NULLAGBLOCK;
return 0;
}
args->wasfromfl = 0;
return 0;
}
/*
* Post-process allocation results to account for the allocation if it succeed
* and set the allocated block number correctly for the caller.
*
* XXX: we should really be returning ENOSPC for ENOSPC, not
* hiding it behind a "successful" NULLFSBLOCK allocation.
*/
static int
xfs_alloc_vextent_finish(
struct xfs_alloc_arg *args,
xfs_agnumber_t minimum_agno,
int alloc_error,
bool drop_perag)
{
struct xfs_mount *mp = args->mp;
int error = 0;
/*
* We can end up here with a locked AGF. If we failed, the caller is
* likely going to try to allocate again with different parameters, and
* that can widen the AGs that are searched for free space. If we have
* to do BMBT block allocation, we have to do a new allocation.
*
* Hence leaving this function with the AGF locked opens up potential
* ABBA AGF deadlocks because a future allocation attempt in this
* transaction may attempt to lock a lower number AGF.
*
* We can't release the AGF until the transaction is commited, so at
* this point we must update the "first allocation" tracker to point at
* this AG if the tracker is empty or points to a lower AG. This allows
* the next allocation attempt to be modified appropriately to avoid
* deadlocks.
*/
if (args->agbp &&
(args->tp->t_highest_agno == NULLAGNUMBER ||
args->agno > minimum_agno))
args->tp->t_highest_agno = args->agno;
/*
* If the allocation failed with an error or we had an ENOSPC result,
* preserve the returned error whilst also marking the allocation result
* as "no extent allocated". This ensures that callers that fail to
* capture the error will still treat it as a failed allocation.
*/
if (alloc_error || args->agbno == NULLAGBLOCK) {
args->fsbno = NULLFSBLOCK;
error = alloc_error;
goto out_drop_perag;
}
args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen);
ASSERT(args->agbno % args->alignment == 0);
XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
/* if not file data, insert new block into the reverse map btree */
if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
args->agbno, args->len, &args->oinfo);
if (error)
goto out_drop_perag;
}
if (!args->wasfromfl) {
error = xfs_alloc_update_counters(args->tp, args->agbp,
-((long)(args->len)));
if (error)
goto out_drop_perag;
ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
args->len));
}
xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
XFS_STATS_INC(mp, xs_allocx);
XFS_STATS_ADD(mp, xs_allocb, args->len);
out_drop_perag:
if (drop_perag && args->pag) {
xfs_perag_rele(args->pag);
args->pag = NULL;
}
return error;
}
/*
* Allocate within a single AG only. This uses a best-fit length algorithm so if
* you need an exact sized allocation without locality constraints, this is the
* fastest way to do it.
*
* Caller is expected to hold a perag reference in args->pag.
*/
int
xfs_alloc_vextent_this_ag(
struct xfs_alloc_arg *args,
xfs_agnumber_t agno)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
int error;
args->agno = agno;
args->agbno = 0;
error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
&minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
error = xfs_alloc_vextent_prepare_ag(args);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_size(args);
return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
}
/*
* Iterate all AGs trying to allocate an extent starting from @start_ag.
*
* If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
* allocation attempts in @start_agno have locality information. If we fail to
* allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
* we attempt to allocation in as there is no locality optimisation possible for
* those allocations.
*
* On return, args->pag may be left referenced if we finish before the "all
* failed" return point. The allocation finish still needs the perag, and
* so the caller will release it once they've finished the allocation.
*
* When we wrap the AG iteration at the end of the filesystem, we have to be
* careful not to wrap into AGs below ones we already have locked in the
* transaction if we are doing a blocking iteration. This will result in an
* out-of-order locking of AGFs and hence can cause deadlocks.
*/
static int
xfs_alloc_vextent_iterate_ags(
struct xfs_alloc_arg *args,
xfs_agnumber_t minimum_agno,
xfs_agnumber_t start_agno,
xfs_agblock_t target_agbno,
uint32_t flags)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t agno;
int error = 0;
restart:
for_each_perag_wrap_range(mp, start_agno, minimum_agno,
mp->m_sb.sb_agcount, agno, args->pag) {
args->agno = agno;
error = xfs_alloc_vextent_prepare_ag(args);
if (error)
break;
if (!args->agbp) {
trace_xfs_alloc_vextent_loopfailed(args);
continue;
}
/*
* Allocation is supposed to succeed now, so break out of the
* loop regardless of whether we succeed or not.
*/
if (args->agno == start_agno && target_agbno) {
args->agbno = target_agbno;
error = xfs_alloc_ag_vextent_near(args);
} else {
args->agbno = 0;
error = xfs_alloc_ag_vextent_size(args);
}
break; break;
} }
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); if (error) {
if ((error = xfs_alloc_ag_vextent(args))) xfs_perag_rele(args->pag);
goto error0; args->pag = NULL;
break; return error;
case XFS_ALLOCTYPE_START_BNO: }
if (args->agbp)
return 0;
/* /*
* Try near allocation first, then anywhere-in-ag after * We didn't find an AG we can alloation from. If we were given
* the first a.g. fails. * constraining flags by the caller, drop them and retry the allocation
* without any constraints being set.
*/ */
if (flags) {
flags = 0;
goto restart;
}
ASSERT(args->pag == NULL);
trace_xfs_alloc_vextent_allfailed(args);
return 0;
}
/*
* Iterate from the AGs from the start AG to the end of the filesystem, trying
* to allocate blocks. It starts with a near allocation attempt in the initial
* AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
* back to zero if allowed by previous allocations in this transaction,
* otherwise will wrap back to the start AG and run a second blocking pass to
* the end of the filesystem.
*/
int
xfs_alloc_vextent_start_ag(
struct xfs_alloc_arg *args,
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
xfs_agnumber_t start_agno;
xfs_agnumber_t rotorstep = xfs_rotorstep;
bool bump_rotor = false;
int error;
args->agno = NULLAGNUMBER;
args->agbno = NULLAGBLOCK;
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
xfs_is_inode32(mp)) { xfs_is_inode32(mp)) {
args->fsbno = XFS_AGB_TO_FSB(mp, target = XFS_AGB_TO_FSB(mp,
((mp->m_agfrotor / rotorstep) % ((mp->m_agfrotor / rotorstep) %
mp->m_sb.sb_agcount), 0); mp->m_sb.sb_agcount), 0);
bump_rotor = 1; bump_rotor = 1;
} }
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
args->type = XFS_ALLOCTYPE_NEAR_BNO;
fallthrough;
case XFS_ALLOCTYPE_FIRST_AG:
/*
* Rotate through the allocation groups looking for a winner.
*/
if (type == XFS_ALLOCTYPE_FIRST_AG) {
/*
* Start with allocation group given by bno.
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
args->type = XFS_ALLOCTYPE_THIS_AG;
sagno = 0;
flags = 0;
} else {
/*
* Start with the given allocation group.
*/
args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
flags = XFS_ALLOC_FLAG_TRYLOCK;
}
/*
* Loop over allocation groups twice; first time with
* trylock set, second time without.
*/
for (;;) {
args->pag = xfs_perag_get(mp, args->agno);
error = xfs_alloc_fix_freelist(args, flags);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
goto error0;
}
/*
* If we get a buffer back then the allocation will fly.
*/
if (args->agbp) {
if ((error = xfs_alloc_ag_vextent(args)))
goto error0;
break;
}
trace_xfs_alloc_vextent_loopfailed(args); start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK);
/*
* Didn't work, figure out the next iteration.
*/
if (args->agno == sagno &&
type == XFS_ALLOCTYPE_START_BNO)
args->type = XFS_ALLOCTYPE_THIS_AG;
/*
* For the first allocation, we can try any AG to get
* space. However, if we already have allocated a
* block, we don't want to try AGs whose number is below
* sagno. Otherwise, we may end up with out-of-order
* locking of AGF, which might cause deadlock.
*/
if (++(args->agno) == mp->m_sb.sb_agcount) {
if (args->tp->t_firstblock != NULLFSBLOCK)
args->agno = sagno;
else
args->agno = 0;
}
/*
* Reached the starting a.g., must either be done
* or switch to non-trylock mode.
*/
if (args->agno == sagno) {
if (flags == 0) {
args->agbno = NULLAGBLOCK;
trace_xfs_alloc_vextent_allfailed(args);
break;
}
flags = 0;
if (type == XFS_ALLOCTYPE_START_BNO) {
args->agbno = XFS_FSB_TO_AGBNO(mp,
args->fsbno);
args->type = XFS_ALLOCTYPE_NEAR_BNO;
}
}
xfs_perag_put(args->pag);
}
if (bump_rotor) { if (bump_rotor) {
if (args->agno == sagno) if (args->agno == start_agno)
mp->m_agfrotor = (mp->m_agfrotor + 1) % mp->m_agfrotor = (mp->m_agfrotor + 1) %
(mp->m_sb.sb_agcount * rotorstep); (mp->m_sb.sb_agcount * rotorstep);
else else
mp->m_agfrotor = (args->agno * rotorstep + 1) % mp->m_agfrotor = (args->agno * rotorstep + 1) %
(mp->m_sb.sb_agcount * rotorstep); (mp->m_sb.sb_agcount * rotorstep);
} }
break;
default:
ASSERT(0);
/* NOTREACHED */
}
if (args->agbno == NULLAGBLOCK)
args->fsbno = NULLFSBLOCK;
else {
args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
#ifdef DEBUG
ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen);
ASSERT(args->agbno % args->alignment == 0);
XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
args->len);
#endif
return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
} }
xfs_perag_put(args->pag);
/*
* Iterate from the agno indicated via @target through to the end of the
* filesystem attempting blocking allocation. This does not wrap or try a second
* pass, so will not recurse into AGs lower than indicated by the target.
*/
int
xfs_alloc_vextent_first_ag(
struct xfs_alloc_arg *args,
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
xfs_agnumber_t start_agno;
int error;
args->agno = NULLAGNUMBER;
args->agbno = NULLAGBLOCK;
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0; return 0;
error0:
xfs_perag_put(args->pag);
return error; return error;
} }
start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
XFS_FSB_TO_AGBNO(mp, target), 0);
return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
}
/*
* Allocate at the exact block target or fail. Caller is expected to hold a
* perag reference in args->pag.
*/
int
xfs_alloc_vextent_exact_bno(
struct xfs_alloc_arg *args,
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
int error;
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
error = xfs_alloc_vextent_prepare_ag(args);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_exact(args);
return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
}
/*
* Allocate an extent as close to the target as possible. If there are not
* viable candidates in the AG, then fail the allocation.
*
* Caller may or may not have a per-ag reference in args->pag.
*/
int
xfs_alloc_vextent_near_bno(
struct xfs_alloc_arg *args,
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
bool needs_perag = args->pag == NULL;
int error;
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
if (needs_perag)
args->pag = xfs_perag_grab(mp, args->agno);
error = xfs_alloc_vextent_prepare_ag(args);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_near(args);
return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
}
/* Ensure that the freelist is at full capacity. */ /* Ensure that the freelist is at full capacity. */
int int
xfs_free_extent_fix_freelist( xfs_free_extent_fix_freelist(

View File

@ -16,25 +16,6 @@ extern struct workqueue_struct *xfs_alloc_wq;
unsigned int xfs_agfl_size(struct xfs_mount *mp); unsigned int xfs_agfl_size(struct xfs_mount *mp);
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/
#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */
#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */
#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */
#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */
#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */
/* this should become an enum again when the tracing code is fixed */
typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_TYPES \
{ XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \
{ XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \
{ XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \
{ XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \
{ XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" }
/* /*
* Flags for xfs_alloc_fix_freelist. * Flags for xfs_alloc_fix_freelist.
*/ */
@ -68,8 +49,6 @@ typedef struct xfs_alloc_arg {
xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */ xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */
xfs_agblock_t max_agbno; /* ... */ xfs_agblock_t max_agbno; /* ... */
xfs_extlen_t len; /* output: actual size of extent */ xfs_extlen_t len; /* output: actual size of extent */
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
xfs_alloctype_t otype; /* original allocation type */
int datatype; /* mask defining data type treatment */ int datatype; /* mask defining data type treatment */
char wasdel; /* set if allocation was prev delayed */ char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */ char wasfromfl; /* set if allocation is from freelist */
@ -118,11 +97,43 @@ xfs_alloc_log_agf(
uint32_t fields);/* mask of fields to be logged (XFS_AGF_...) */ uint32_t fields);/* mask of fields to be logged (XFS_AGF_...) */
/* /*
* Allocate an extent (variable-size). * Allocate an extent anywhere in the specific AG given. If there is no
* space matching the requirements in that AG, then the allocation will fail.
*/ */
int /* error */ int xfs_alloc_vextent_this_ag(struct xfs_alloc_arg *args, xfs_agnumber_t agno);
xfs_alloc_vextent(
xfs_alloc_arg_t *args); /* allocation argument structure */ /*
* Allocate an extent as close to the target as possible. If there are not
* viable candidates in the AG, then fail the allocation.
*/
int xfs_alloc_vextent_near_bno(struct xfs_alloc_arg *args,
xfs_fsblock_t target);
/*
* Allocate an extent exactly at the target given. If this is not possible
* then the allocation fails.
*/
int xfs_alloc_vextent_exact_bno(struct xfs_alloc_arg *args,
xfs_fsblock_t target);
/*
* Best effort full filesystem allocation scan.
*
* Locality aware allocation will be attempted in the initial AG, but on failure
* non-localised attempts will be made. The AGs are constrained by previous
* allocations in the current transaction. Two passes will be made - the first
* non-blocking, the second blocking.
*/
int xfs_alloc_vextent_start_ag(struct xfs_alloc_arg *args,
xfs_fsblock_t target);
/*
* Iterate from the AG indicated from args->fsbno through to the end of the
* filesystem attempting blocking allocation. This is for use in last
* resort allocation attempts when everything else has failed.
*/
int xfs_alloc_vextent_first_ag(struct xfs_alloc_arg *args,
xfs_fsblock_t target);
/* /*
* Free an extent. * Free an extent.

View File

@ -315,7 +315,7 @@ xfs_allocbt_verify(
level = be16_to_cpu(block->bb_level); level = be16_to_cpu(block->bb_level);
if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC))
btnum = XFS_BTNUM_CNTi; btnum = XFS_BTNUM_CNTi;
if (pag && pag->pagf_init) { if (pag && xfs_perag_initialised_agf(pag)) {
if (level >= pag->pagf_levels[btnum]) if (level >= pag->pagf_levels[btnum])
return __this_address; return __this_address;
} else if (level >= mp->m_alloc_maxlevels) } else if (level >= mp->m_alloc_maxlevels)

View File

@ -645,34 +645,23 @@ xfs_bmap_extents_to_btree(
args.tp = tp; args.tp = tp;
args.mp = mp; args.mp = mp;
xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
if (tp->t_firstblock == NULLFSBLOCK) {
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = tp->t_firstblock;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.fsbno = tp->t_firstblock;
}
args.minlen = args.maxlen = args.prod = 1; args.minlen = args.maxlen = args.prod = 1;
args.wasdel = wasdel; args.wasdel = wasdel;
*logflagsp = 0; *logflagsp = 0;
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_start_ag(&args,
XFS_INO_TO_FSB(mp, ip->i_ino));
if (error) if (error)
goto out_root_realloc; goto out_root_realloc;
/*
* Allocation can't fail, the space was reserved.
*/
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
error = -ENOSPC; error = -ENOSPC;
goto out_root_realloc; goto out_root_realloc;
} }
/*
* Allocation can't fail, the space was reserved.
*/
ASSERT(tp->t_firstblock == NULLFSBLOCK ||
args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
tp->t_firstblock = args.fsbno;
cur->bc_ino.allocated++; cur->bc_ino.allocated++;
ip->i_nblocks++; ip->i_nblocks++;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
@ -799,28 +788,24 @@ xfs_bmap_local_to_extents(
memset(&args, 0, sizeof(args)); memset(&args, 0, sizeof(args));
args.tp = tp; args.tp = tp;
args.mp = ip->i_mount; args.mp = ip->i_mount;
args.total = total;
args.minlen = args.maxlen = args.prod = 1;
xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
/* /*
* Allocate a block. We know we need only one, since the * Allocate a block. We know we need only one, since the
* file currently fits in an inode. * file currently fits in an inode.
*/ */
if (tp->t_firstblock == NULLFSBLOCK) {
args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
args.type = XFS_ALLOCTYPE_START_BNO;
} else {
args.fsbno = tp->t_firstblock;
args.type = XFS_ALLOCTYPE_NEAR_BNO;
}
args.total = total; args.total = total;
args.minlen = args.maxlen = args.prod = 1; args.minlen = args.maxlen = args.prod = 1;
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_start_ag(&args,
XFS_INO_TO_FSB(args.mp, ip->i_ino));
if (error) if (error)
goto done; goto done;
/* Can't fail, the space was reserved. */ /* Can't fail, the space was reserved. */
ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(args.len == 1); ASSERT(args.len == 1);
tp->t_firstblock = args.fsbno;
error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
XFS_FSB_TO_DADDR(args.mp, args.fsbno), XFS_FSB_TO_DADDR(args.mp, args.fsbno),
args.mp->m_bsize, 0, &bp); args.mp->m_bsize, 0, &bp);
@ -854,8 +839,7 @@ xfs_bmap_local_to_extents(
ifp->if_nextents = 1; ifp->if_nextents = 1;
ip->i_nblocks = 1; ip->i_nblocks = 1;
xfs_trans_mod_dquot_byino(tp, ip, xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
XFS_TRANS_DQ_BCOUNT, 1L);
flags |= xfs_ilog_fext(whichfork); flags |= xfs_ilog_fext(whichfork);
done: done:
@ -3025,9 +3009,7 @@ xfs_bmap_adjacent(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */ struct xfs_bmalloca *ap) /* bmap alloc argument struct */
{ {
xfs_fsblock_t adjust; /* adjustment to block numbers */ xfs_fsblock_t adjust; /* adjustment to block numbers */
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_mount_t *mp; /* mount point structure */ xfs_mount_t *mp; /* mount point structure */
int nullfb; /* true if ap->firstblock isn't set */
int rt; /* true if inode is realtime */ int rt; /* true if inode is realtime */
#define ISVALID(x,y) \ #define ISVALID(x,y) \
@ -3038,11 +3020,8 @@ xfs_bmap_adjacent(
XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
mp = ap->ip->i_mount; mp = ap->ip->i_mount;
nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
rt = XFS_IS_REALTIME_INODE(ap->ip) && rt = XFS_IS_REALTIME_INODE(ap->ip) &&
(ap->datatype & XFS_ALLOC_USERDATA); (ap->datatype & XFS_ALLOC_USERDATA);
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
ap->tp->t_firstblock);
/* /*
* If allocating at eof, and there's a previous real block, * If allocating at eof, and there's a previous real block,
* try to use its last block as our starting point. * try to use its last block as our starting point.
@ -3101,13 +3080,6 @@ xfs_bmap_adjacent(
prevbno += adjust; prevbno += adjust;
else else
prevdiff += adjust; prevdiff += adjust;
/*
* If the firstblock forbids it, can't use it,
* must use default.
*/
if (!rt && !nullfb &&
XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
prevbno = NULLFSBLOCK;
} }
/* /*
* No previous block or can't follow it, just default. * No previous block or can't follow it, just default.
@ -3143,13 +3115,6 @@ xfs_bmap_adjacent(
gotdiff += adjust - ap->length; gotdiff += adjust - ap->length;
} else } else
gotdiff += adjust; gotdiff += adjust;
/*
* If the firstblock forbids it, can't use it,
* must use default.
*/
if (!rt && !nullfb &&
XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
gotbno = NULLFSBLOCK;
} }
/* /*
* No next block, just default. * No next block, just default.
@ -3170,147 +3135,91 @@ xfs_bmap_adjacent(
#undef ISVALID #undef ISVALID
} }
static int int
xfs_bmap_longest_free_extent( xfs_bmap_longest_free_extent(
struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
xfs_agnumber_t ag, xfs_extlen_t *blen)
xfs_extlen_t *blen,
int *notinit)
{ {
struct xfs_mount *mp = tp->t_mountp;
struct xfs_perag *pag;
xfs_extlen_t longest; xfs_extlen_t longest;
int error = 0; int error = 0;
pag = xfs_perag_get(mp, ag); if (!xfs_perag_initialised_agf(pag)) {
if (!pag->pagf_init) {
error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK, error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK,
NULL); NULL);
if (error) { if (error)
/* Couldn't lock the AGF, so skip this AG. */ return error;
if (error == -EAGAIN) {
*notinit = 1;
error = 0;
}
goto out;
}
} }
longest = xfs_alloc_longest_free_extent(pag, longest = xfs_alloc_longest_free_extent(pag,
xfs_alloc_min_freelist(mp, pag), xfs_alloc_min_freelist(pag->pag_mount, pag),
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
if (*blen < longest) if (*blen < longest)
*blen = longest; *blen = longest;
out: return 0;
xfs_perag_put(pag);
return error;
} }
static void static xfs_extlen_t
xfs_bmap_select_minlen( xfs_bmap_select_minlen(
struct xfs_bmalloca *ap, struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args, struct xfs_alloc_arg *args,
xfs_extlen_t *blen, xfs_extlen_t blen)
int notinit)
{ {
if (notinit || *blen < ap->minlen) {
/* /*
* Since we did a BUF_TRYLOCK above, it is possible that * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
* there is space for this request. * possible that there is enough contiguous free space for this request.
*/ */
args->minlen = ap->minlen; if (blen < ap->minlen)
} else if (*blen < args->maxlen) { return ap->minlen;
/* /*
* If the best seen length is less than the request length, * If the best seen length is less than the request length,
* use the best as the minimum. * use the best as the minimum, otherwise we've got the maxlen we
* were asked for.
*/ */
args->minlen = *blen; if (blen < args->maxlen)
} else { return blen;
/* return args->maxlen;
* Otherwise we've seen an extent as big as maxlen, use that
* as the minimum.
*/
args->minlen = args->maxlen;
}
} }
STATIC int static int
xfs_bmap_btalloc_nullfb( xfs_bmap_btalloc_select_lengths(
struct xfs_bmalloca *ap, struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args, struct xfs_alloc_arg *args,
xfs_extlen_t *blen) xfs_extlen_t *blen)
{ {
struct xfs_mount *mp = ap->ip->i_mount; struct xfs_mount *mp = args->mp;
xfs_agnumber_t ag, startag; struct xfs_perag *pag;
int notinit = 0; xfs_agnumber_t agno, startag;
int error; int error = 0;
if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
args->total = ap->minlen;
args->minlen = ap->minlen;
return 0;
}
args->type = XFS_ALLOCTYPE_START_BNO;
args->total = ap->total; args->total = ap->total;
startag = XFS_FSB_TO_AGNO(mp, ap->blkno);
startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (startag == NULLAGNUMBER) if (startag == NULLAGNUMBER)
startag = ag = 0; startag = 0;
while (*blen < args->maxlen) { *blen = 0;
error = xfs_bmap_longest_free_extent(args->tp, ag, blen, for_each_perag_wrap(mp, startag, agno, pag) {
&notinit); error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
if (error) if (error && error != -EAGAIN)
return error; break;
error = 0;
if (++ag == mp->m_sb.sb_agcount) if (*blen >= args->maxlen)
ag = 0;
if (ag == startag)
break; break;
} }
if (pag)
xfs_perag_rele(pag);
xfs_bmap_select_minlen(ap, args, blen, notinit); args->minlen = xfs_bmap_select_minlen(ap, args, *blen);
return 0;
}
STATIC int
xfs_bmap_btalloc_filestreams(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t *blen)
{
struct xfs_mount *mp = ap->ip->i_mount;
xfs_agnumber_t ag;
int notinit = 0;
int error;
args->type = XFS_ALLOCTYPE_NEAR_BNO;
args->total = ap->total;
ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (ag == NULLAGNUMBER)
ag = 0;
error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
if (error)
return error; return error;
if (*blen < args->maxlen) {
error = xfs_filestream_new_ag(ap, &ag);
if (error)
return error;
error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
&notinit);
if (error)
return error;
}
xfs_bmap_select_minlen(ap, args, blen, notinit);
/*
* Set the failure fallback case to look in the selected AG as stream
* may have moved.
*/
ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
return 0;
} }
/* Update all inode and quota accounting for the allocation we just did. */ /* Update all inode and quota accounting for the allocation we just did. */
@ -3413,21 +3322,7 @@ xfs_bmap_process_allocated_extent(
xfs_fileoff_t orig_offset, xfs_fileoff_t orig_offset,
xfs_extlen_t orig_length) xfs_extlen_t orig_length)
{ {
int nullfb;
nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
/*
* check the allocation happened at the same or higher AG than
* the first block that was allocated.
*/
ASSERT(nullfb ||
XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <=
XFS_FSB_TO_AGNO(args->mp, args->fsbno));
ap->blkno = args->fsbno; ap->blkno = args->fsbno;
if (nullfb)
ap->tp->t_firstblock = args->fsbno;
ap->length = args->len; ap->length = args->len;
/* /*
* If the extent size hint is active, we tried to round the * If the extent size hint is active, we tried to round the
@ -3474,7 +3369,6 @@ xfs_bmap_exact_minlen_extent_alloc(
xfs_bmap_compute_alignments(ap, &args); xfs_bmap_compute_alignments(ap, &args);
if (ap->tp->t_firstblock == NULLFSBLOCK) {
/* /*
* Unlike the longest extent available in an AG, we don't track * Unlike the longest extent available in an AG, we don't track
* the length of an AG's shortest extent. * the length of an AG's shortest extent.
@ -3484,13 +3378,8 @@ xfs_bmap_exact_minlen_extent_alloc(
* "debug only" code paths. * "debug only" code paths.
*/ */
ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0); ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
} else {
ap->blkno = ap->tp->t_firstblock;
}
args.fsbno = ap->blkno;
args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
args.type = XFS_ALLOCTYPE_FIRST_AG;
args.minlen = args.maxlen = ap->minlen; args.minlen = args.maxlen = ap->minlen;
args.total = ap->total; args.total = ap->total;
@ -3502,7 +3391,7 @@ xfs_bmap_exact_minlen_extent_alloc(
args.resv = XFS_AG_RESV_NONE; args.resv = XFS_AG_RESV_NONE;
args.datatype = ap->datatype; args.datatype = ap->datatype;
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
if (error) if (error)
return error; return error;
@ -3522,22 +3411,250 @@ xfs_bmap_exact_minlen_extent_alloc(
#endif #endif
STATIC int /*
* If we are not low on available data blocks and we are allocating at
* EOF, optimise allocation for contiguous file extension and/or stripe
* alignment of the new extent.
*
* NOTE: ap->aeof is only set if the allocation length is >= the
* stripe unit and the allocation offset is at the end of file.
*/
static int
xfs_bmap_btalloc_at_eof(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t blen,
int stripe_align,
bool ag_only)
{
struct xfs_mount *mp = args->mp;
struct xfs_perag *caller_pag = args->pag;
int error;
/*
* If there are already extents in the file, try an exact EOF block
* allocation to extend the file as a contiguous extent. If that fails,
* or it's the first allocation in a file, just try for a stripe aligned
* allocation.
*/
if (ap->offset) {
xfs_extlen_t nextminlen = 0;
/*
* Compute the minlen+alignment for the next case. Set slop so
* that the value of minlen+alignment+slop doesn't go up between
* the calls.
*/
args->alignment = 1;
if (blen > stripe_align && blen <= args->maxlen)
nextminlen = blen - stripe_align;
else
nextminlen = args->minlen;
if (nextminlen + stripe_align > args->minlen + 1)
args->minalignslop = nextminlen + stripe_align -
args->minlen - 1;
else
args->minalignslop = 0;
if (!caller_pag)
args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
if (!caller_pag)
xfs_perag_put(args->pag);
if (error)
return error;
if (args->fsbno != NULLFSBLOCK)
return 0;
/*
* Exact allocation failed. Reset to try an aligned allocation
* according to the original allocation specification.
*/
args->pag = NULL;
args->alignment = stripe_align;
args->minlen = nextminlen;
args->minalignslop = 0;
} else {
/*
* Adjust minlen to try and preserve alignment if we
* can't guarantee an aligned maxlen extent.
*/
args->alignment = stripe_align;
if (blen > args->alignment &&
blen <= args->maxlen + args->alignment)
args->minlen = blen - args->alignment;
args->minalignslop = 0;
}
if (ag_only) {
error = xfs_alloc_vextent_near_bno(args, ap->blkno);
} else {
args->pag = NULL;
error = xfs_alloc_vextent_start_ag(args, ap->blkno);
ASSERT(args->pag == NULL);
args->pag = caller_pag;
}
if (error)
return error;
if (args->fsbno != NULLFSBLOCK)
return 0;
/*
* Allocation failed, so turn return the allocation args to their
* original non-aligned state so the caller can proceed on allocation
* failure as if this function was never called.
*/
args->fsbno = ap->blkno;
args->alignment = 1;
return 0;
}
/*
* We have failed multiple allocation attempts so now are in a low space
* allocation situation. Try a locality first full filesystem minimum length
* allocation whilst still maintaining necessary total block reservation
* requirements.
*
* If that fails, we are now critically low on space, so perform a last resort
* allocation attempt: no reserve, no locality, blocking, minimum length, full
* filesystem free space scan. We also indicate to future allocations in this
* transaction that we are critically low on space so they don't waste time on
* allocation modes that are unlikely to succeed.
*/
int
xfs_bmap_btalloc_low_space(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args)
{
int error;
if (args->minlen > ap->minlen) {
args->minlen = ap->minlen;
error = xfs_alloc_vextent_start_ag(args, ap->blkno);
if (error || args->fsbno != NULLFSBLOCK)
return error;
}
/* Last ditch attempt before failure is declared. */
args->total = ap->minlen;
error = xfs_alloc_vextent_first_ag(args, 0);
if (error)
return error;
ap->tp->t_flags |= XFS_TRANS_LOWMODE;
return 0;
}
static int
xfs_bmap_btalloc_filestreams(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
int stripe_align)
{
xfs_extlen_t blen = 0;
int error = 0;
error = xfs_filestream_select_ag(ap, args, &blen);
if (error)
return error;
ASSERT(args->pag);
/*
* If we are in low space mode, then optimal allocation will fail so
* prepare for minimal allocation and jump to the low space algorithm
* immediately.
*/
if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
args->minlen = ap->minlen;
ASSERT(args->fsbno == NULLFSBLOCK);
goto out_low_space;
}
args->minlen = xfs_bmap_select_minlen(ap, args, blen);
if (ap->aeof)
error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
true);
if (!error && args->fsbno == NULLFSBLOCK)
error = xfs_alloc_vextent_near_bno(args, ap->blkno);
out_low_space:
/*
* We are now done with the perag reference for the filestreams
* association provided by xfs_filestream_select_ag(). Release it now as
* we've either succeeded, had a fatal error or we are out of space and
* need to do a full filesystem scan for free space which will take it's
* own references.
*/
xfs_perag_rele(args->pag);
args->pag = NULL;
if (error || args->fsbno != NULLFSBLOCK)
return error;
return xfs_bmap_btalloc_low_space(ap, args);
}
static int
xfs_bmap_btalloc_best_length(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
int stripe_align)
{
xfs_extlen_t blen = 0;
int error;
ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
xfs_bmap_adjacent(ap);
/*
* Search for an allocation group with a single extent large enough for
* the request. If one isn't found, then adjust the minimum allocation
* size to the largest space found.
*/
error = xfs_bmap_btalloc_select_lengths(ap, args, &blen);
if (error)
return error;
/*
* Don't attempt optimal EOF allocation if previous allocations barely
* succeeded due to being near ENOSPC. It is highly unlikely we'll get
* optimal or even aligned allocations in this case, so don't waste time
* trying.
*/
if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
false);
if (error || args->fsbno != NULLFSBLOCK)
return error;
}
error = xfs_alloc_vextent_start_ag(args, ap->blkno);
if (error || args->fsbno != NULLFSBLOCK)
return error;
return xfs_bmap_btalloc_low_space(ap, args);
}
static int
xfs_bmap_btalloc( xfs_bmap_btalloc(
struct xfs_bmalloca *ap) struct xfs_bmalloca *ap)
{ {
struct xfs_mount *mp = ap->ip->i_mount; struct xfs_mount *mp = ap->ip->i_mount;
struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp }; struct xfs_alloc_arg args = {
xfs_alloctype_t atype = 0; .tp = ap->tp,
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ .mp = mp,
xfs_agnumber_t ag; .fsbno = NULLFSBLOCK,
.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
.minleft = ap->minleft,
.wasdel = ap->wasdel,
.resv = XFS_AG_RESV_NONE,
.datatype = ap->datatype,
.alignment = 1,
.minalignslop = 0,
};
xfs_fileoff_t orig_offset; xfs_fileoff_t orig_offset;
xfs_extlen_t orig_length; xfs_extlen_t orig_length;
xfs_extlen_t blen;
xfs_extlen_t nextminlen = 0;
int nullfb; /* true if ap->firstblock isn't set */
int isaligned;
int tryagain;
int error; int error;
int stripe_align; int stripe_align;
@ -3547,167 +3664,16 @@ xfs_bmap_btalloc(
stripe_align = xfs_bmap_compute_alignments(ap, &args); stripe_align = xfs_bmap_compute_alignments(ap, &args);
nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
ap->tp->t_firstblock);
if (nullfb) {
if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
ap->blkno = ap->tp->t_firstblock;
xfs_bmap_adjacent(ap);
/*
* If allowed, use ap->blkno; otherwise must use firstblock since
* it's in the right allocation group.
*/
if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
;
else
ap->blkno = ap->tp->t_firstblock;
/*
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
args.fsbno = ap->blkno;
args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
/* Trim the allocation back to the maximum an AG can fit. */ /* Trim the allocation back to the maximum an AG can fit. */
args.maxlen = min(ap->length, mp->m_ag_max_usable); args.maxlen = min(ap->length, mp->m_ag_max_usable);
blen = 0;
if (nullfb) {
/*
* Search for an allocation group with a single extent large
* enough for the request. If one isn't found, then adjust
* the minimum allocation size to the largest space found.
*/
if ((ap->datatype & XFS_ALLOC_USERDATA) && if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs_inode_is_filestream(ap->ip)) xfs_inode_is_filestream(ap->ip))
error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
else else
error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
if (error) if (error)
return error; return error;
} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
if (xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_FIRST_AG;
else
args.type = XFS_ALLOCTYPE_START_BNO;
args.total = args.minlen = ap->minlen;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.total = ap->total;
args.minlen = ap->minlen;
}
/*
* If we are not low on available data blocks, and the underlying
* logical volume manager is a stripe, and the file offset is zero then
* try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
* is only set if the allocation length is >= the stripe unit and the
* allocation offset is at the end of file.
*/
if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
if (!ap->offset) {
args.alignment = stripe_align;
atype = args.type;
isaligned = 1;
/*
* Adjust minlen to try and preserve alignment if we
* can't guarantee an aligned maxlen extent.
*/
if (blen > args.alignment &&
blen <= args.maxlen + args.alignment)
args.minlen = blen - args.alignment;
args.minalignslop = 0;
} else {
/*
* First try an exact bno allocation.
* If it fails then do a near or start bno
* allocation with alignment turned on.
*/
atype = args.type;
tryagain = 1;
args.type = XFS_ALLOCTYPE_THIS_BNO;
args.alignment = 1;
/*
* Compute the minlen+alignment for the
* next case. Set slop so that the value
* of minlen+alignment+slop doesn't go up
* between the calls.
*/
if (blen > stripe_align && blen <= args.maxlen)
nextminlen = blen - stripe_align;
else
nextminlen = args.minlen;
if (nextminlen + stripe_align > args.minlen + 1)
args.minalignslop =
nextminlen + stripe_align -
args.minlen - 1;
else
args.minalignslop = 0;
}
} else {
args.alignment = 1;
args.minalignslop = 0;
}
args.minleft = ap->minleft;
args.wasdel = ap->wasdel;
args.resv = XFS_AG_RESV_NONE;
args.datatype = ap->datatype;
error = xfs_alloc_vextent(&args);
if (error)
return error;
if (tryagain && args.fsbno == NULLFSBLOCK) {
/*
* Exact allocation failed. Now try with alignment
* turned on.
*/
args.type = atype;
args.fsbno = ap->blkno;
args.alignment = stripe_align;
args.minlen = nextminlen;
args.minalignslop = 0;
isaligned = 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
if (isaligned && args.fsbno == NULLFSBLOCK) {
/*
* allocation failed, so turn off alignment and
* try again.
*/
args.type = atype;
args.fsbno = ap->blkno;
args.alignment = 0;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
if (args.fsbno == NULLFSBLOCK && nullfb &&
args.minlen > ap->minlen) {
args.minlen = ap->minlen;
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = ap->blkno;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
if (args.fsbno == NULLFSBLOCK && nullfb) {
args.fsbno = 0;
args.type = XFS_ALLOCTYPE_FIRST_AG;
args.total = ap->minlen;
if ((error = xfs_alloc_vextent(&args)))
return error;
ap->tp->t_flags |= XFS_TRANS_LOWMODE;
}
if (args.fsbno != NULLFSBLOCK) { if (args.fsbno != NULLFSBLOCK) {
xfs_bmap_process_allocated_extent(ap, &args, orig_offset, xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
@ -4256,7 +4222,7 @@ xfs_bmapi_convert_unwritten(
return 0; return 0;
} }
static inline xfs_extlen_t xfs_extlen_t
xfs_bmapi_minleft( xfs_bmapi_minleft(
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_inode *ip, struct xfs_inode *ip,
@ -4264,7 +4230,7 @@ xfs_bmapi_minleft(
{ {
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, fork); struct xfs_ifork *ifp = xfs_ifork_ptr(ip, fork);
if (tp && tp->t_firstblock != NULLFSBLOCK) if (tp && tp->t_highest_agno != NULLAGNUMBER)
return 0; return 0;
if (ifp->if_format != XFS_DINODE_FMT_BTREE) if (ifp->if_format != XFS_DINODE_FMT_BTREE)
return 1; return 1;
@ -6151,7 +6117,7 @@ xfs_bmap_finish_one(
struct xfs_bmbt_irec *bmap = &bi->bi_bmap; struct xfs_bmbt_irec *bmap = &bi->bi_bmap;
int error = 0; int error = 0;
ASSERT(tp->t_firstblock == NULLFSBLOCK); ASSERT(tp->t_highest_agno == NULLAGNUMBER);
trace_xfs_bmap_deferred(tp->t_mountp, trace_xfs_bmap_deferred(tp->t_mountp,
XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),

View File

@ -12,6 +12,7 @@ struct xfs_ifork;
struct xfs_inode; struct xfs_inode;
struct xfs_mount; struct xfs_mount;
struct xfs_trans; struct xfs_trans;
struct xfs_alloc_arg;
/* /*
* Argument structure for xfs_bmap_alloc. * Argument structure for xfs_bmap_alloc.
@ -168,6 +169,8 @@ static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec)
#define xfs_valid_startblock(ip, startblock) \ #define xfs_valid_startblock(ip, startblock) \
((startblock) != 0 || XFS_IS_REALTIME_INODE(ip)) ((startblock) != 0 || XFS_IS_REALTIME_INODE(ip))
int xfs_bmap_longest_free_extent(struct xfs_perag *pag,
struct xfs_trans *tp, xfs_extlen_t *blen);
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len); xfs_filblks_t len);
unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp);
@ -220,6 +223,10 @@ int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork, struct xfs_inode *ip, int whichfork,
struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp,
struct xfs_bmbt_irec *new, int *logflagsp); struct xfs_bmbt_irec *new, int *logflagsp);
xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct xfs_inode *ip,
int fork);
int xfs_bmap_btalloc_low_space(struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args);
enum xfs_bmap_intent_type { enum xfs_bmap_intent_type {
XFS_BMAP_MAP = 1, XFS_BMAP_MAP = 1,

View File

@ -21,6 +21,7 @@
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_trace.h" #include "xfs_trace.h"
#include "xfs_rmap.h" #include "xfs_rmap.h"
#include "xfs_ag.h"
static struct kmem_cache *xfs_bmbt_cur_cache; static struct kmem_cache *xfs_bmbt_cur_cache;
@ -184,11 +185,11 @@ xfs_bmbt_update_cursor(
struct xfs_btree_cur *src, struct xfs_btree_cur *src,
struct xfs_btree_cur *dst) struct xfs_btree_cur *dst)
{ {
ASSERT((dst->bc_tp->t_firstblock != NULLFSBLOCK) || ASSERT((dst->bc_tp->t_highest_agno != NULLAGNUMBER) ||
(dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME)); (dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME));
dst->bc_ino.allocated += src->bc_ino.allocated; dst->bc_ino.allocated += src->bc_ino.allocated;
dst->bc_tp->t_firstblock = src->bc_tp->t_firstblock; dst->bc_tp->t_highest_agno = src->bc_tp->t_highest_agno;
src->bc_ino.allocated = 0; src->bc_ino.allocated = 0;
} }
@ -200,46 +201,32 @@ xfs_bmbt_alloc_block(
union xfs_btree_ptr *new, union xfs_btree_ptr *new,
int *stat) int *stat)
{ {
xfs_alloc_arg_t args; /* block allocation args */ struct xfs_alloc_arg args;
int error; /* error return value */ int error;
memset(&args, 0, sizeof(args)); memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp; args.tp = cur->bc_tp;
args.mp = cur->bc_mp; args.mp = cur->bc_mp;
args.fsbno = cur->bc_tp->t_firstblock;
xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino, xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino,
cur->bc_ino.whichfork); cur->bc_ino.whichfork);
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
args.type = XFS_ALLOCTYPE_START_BNO;
/*
* Make sure there is sufficient room left in the AG to
* complete a full tree split for an extent insert. If
* we are converting the middle part of an extent then
* we may need space for two tree splits.
*
* We are relying on the caller to make the correct block
* reservation for this operation to succeed. If the
* reservation amount is insufficient then we may fail a
* block allocation here and corrupt the filesystem.
*/
args.minleft = args.tp->t_blk_res;
} else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) {
args.type = XFS_ALLOCTYPE_START_BNO;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
}
args.minlen = args.maxlen = args.prod = 1; args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL; args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL;
if (!args.wasdel && args.tp->t_blk_res == 0) { if (!args.wasdel && args.tp->t_blk_res == 0)
error = -ENOSPC; return -ENOSPC;
goto error0;
} /*
error = xfs_alloc_vextent(&args); * If we are coming here from something like unwritten extent
* conversion, there has been no data extent allocation already done, so
* we have to ensure that we attempt to locate the entire set of bmbt
* allocations in the same AG, as xfs_bmapi_write() would have reserved.
*/
if (cur->bc_tp->t_highest_agno == NULLAGNUMBER)
args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur->bc_ino.ip,
cur->bc_ino.whichfork);
error = xfs_alloc_vextent_start_ag(&args, be64_to_cpu(start->l));
if (error) if (error)
goto error0; return error;
if (args.fsbno == NULLFSBLOCK && args.minleft) { if (args.fsbno == NULLFSBLOCK && args.minleft) {
/* /*
@ -247,11 +234,10 @@ xfs_bmbt_alloc_block(
* a full btree split. Try again and if * a full btree split. Try again and if
* successful activate the lowspace algorithm. * successful activate the lowspace algorithm.
*/ */
args.fsbno = 0; args.minleft = 0;
args.type = XFS_ALLOCTYPE_FIRST_AG; error = xfs_alloc_vextent_start_ag(&args, 0);
error = xfs_alloc_vextent(&args);
if (error) if (error)
goto error0; return error;
cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE; cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE;
} }
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
@ -260,7 +246,6 @@ xfs_bmbt_alloc_block(
} }
ASSERT(args.len == 1); ASSERT(args.len == 1);
cur->bc_tp->t_firstblock = args.fsbno;
cur->bc_ino.allocated++; cur->bc_ino.allocated++;
cur->bc_ino.ip->i_nblocks++; cur->bc_ino.ip->i_nblocks++;
xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE); xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE);
@ -271,9 +256,6 @@ xfs_bmbt_alloc_block(
*stat = 1; *stat = 1;
return 0; return 0;
error0:
return error;
} }
STATIC int STATIC int

View File

@ -2943,7 +2943,7 @@ xfs_btree_split(
DECLARE_COMPLETION_ONSTACK(done); DECLARE_COMPLETION_ONSTACK(done);
if (cur->bc_btnum != XFS_BTNUM_BMAP || if (cur->bc_btnum != XFS_BTNUM_BMAP ||
cur->bc_tp->t_firstblock == NULLFSBLOCK) cur->bc_tp->t_highest_agno == NULLAGNUMBER)
return __xfs_btree_split(cur, level, ptrp, key, curp, stat); return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
args.cur = cur; args.cur = cur;

View File

@ -169,10 +169,9 @@ xfs_inobt_insert_rec(
*/ */
STATIC int STATIC int
xfs_inobt_insert( xfs_inobt_insert(
struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp,
struct xfs_perag *pag,
xfs_agino_t newino, xfs_agino_t newino,
xfs_agino_t newlen, xfs_agino_t newlen,
xfs_btnum_t btnum) xfs_btnum_t btnum)
@ -182,7 +181,7 @@ xfs_inobt_insert(
int i; int i;
int error; int error;
cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum); cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
for (thisino = newino; for (thisino = newino;
thisino < newino + newlen; thisino < newino + newlen;
@ -514,20 +513,20 @@ __xfs_inobt_rec_merge(
*/ */
STATIC int STATIC int
xfs_inobt_insert_sprec( xfs_inobt_insert_sprec(
struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp,
struct xfs_perag *pag,
int btnum, int btnum,
struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */ struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
bool merge) /* merge or replace */ bool merge) /* merge or replace */
{ {
struct xfs_mount *mp = pag->pag_mount;
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
int error; int error;
int i; int i;
struct xfs_inobt_rec_incore rec; struct xfs_inobt_rec_incore rec;
cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum); cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
/* the new record is pre-aligned so we know where to look */ /* the new record is pre-aligned so we know where to look */
error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
@ -609,9 +608,9 @@ error:
*/ */
STATIC int STATIC int
xfs_ialloc_ag_alloc( xfs_ialloc_ag_alloc(
struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp)
struct xfs_perag *pag)
{ {
struct xfs_agi *agi; struct xfs_agi *agi;
struct xfs_alloc_arg args; struct xfs_alloc_arg args;
@ -631,6 +630,7 @@ xfs_ialloc_ag_alloc(
args.mp = tp->t_mountp; args.mp = tp->t_mountp;
args.fsbno = NULLFSBLOCK; args.fsbno = NULLFSBLOCK;
args.oinfo = XFS_RMAP_OINFO_INODES; args.oinfo = XFS_RMAP_OINFO_INODES;
args.pag = pag;
#ifdef DEBUG #ifdef DEBUG
/* randomly do sparse inode allocations */ /* randomly do sparse inode allocations */
@ -662,8 +662,6 @@ xfs_ialloc_ag_alloc(
goto sparse_alloc; goto sparse_alloc;
if (likely(newino != NULLAGINO && if (likely(newino != NULLAGINO &&
(args.agbno < be32_to_cpu(agi->agi_length)))) { (args.agbno < be32_to_cpu(agi->agi_length)))) {
args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
args.type = XFS_ALLOCTYPE_THIS_BNO;
args.prod = 1; args.prod = 1;
/* /*
@ -684,7 +682,10 @@ xfs_ialloc_ag_alloc(
/* Allow space for the inode btree to split. */ /* Allow space for the inode btree to split. */
args.minleft = igeo->inobt_maxlevels; args.minleft = igeo->inobt_maxlevels;
if ((error = xfs_alloc_vextent(&args))) error = xfs_alloc_vextent_exact_bno(&args,
XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
args.agbno));
if (error)
return error; return error;
/* /*
@ -716,23 +717,18 @@ xfs_ialloc_ag_alloc(
isaligned = 1; isaligned = 1;
} else } else
args.alignment = igeo->cluster_align; args.alignment = igeo->cluster_align;
/*
* Need to figure out where to allocate the inode blocks.
* Ideally they should be spaced out through the a.g.
* For now, just allocate blocks up front.
*/
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
/* /*
* Allocate a fixed-size extent of inodes. * Allocate a fixed-size extent of inodes.
*/ */
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.prod = 1; args.prod = 1;
/* /*
* Allow space for the inode btree to split. * Allow space for the inode btree to split.
*/ */
args.minleft = igeo->inobt_maxlevels; args.minleft = igeo->inobt_maxlevels;
if ((error = xfs_alloc_vextent(&args))) error = xfs_alloc_vextent_near_bno(&args,
XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
be32_to_cpu(agi->agi_root)));
if (error)
return error; return error;
} }
@ -741,11 +737,11 @@ xfs_ialloc_ag_alloc(
* alignment. * alignment.
*/ */
if (isaligned && args.fsbno == NULLFSBLOCK) { if (isaligned && args.fsbno == NULLFSBLOCK) {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
args.alignment = igeo->cluster_align; args.alignment = igeo->cluster_align;
if ((error = xfs_alloc_vextent(&args))) error = xfs_alloc_vextent_near_bno(&args,
XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
be32_to_cpu(agi->agi_root)));
if (error)
return error; return error;
} }
@ -757,9 +753,6 @@ xfs_ialloc_ag_alloc(
igeo->ialloc_min_blks < igeo->ialloc_blks && igeo->ialloc_min_blks < igeo->ialloc_blks &&
args.fsbno == NULLFSBLOCK) { args.fsbno == NULLFSBLOCK) {
sparse_alloc: sparse_alloc:
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
args.alignment = args.mp->m_sb.sb_spino_align; args.alignment = args.mp->m_sb.sb_spino_align;
args.prod = 1; args.prod = 1;
@ -781,7 +774,9 @@ sparse_alloc:
args.mp->m_sb.sb_inoalignmt) - args.mp->m_sb.sb_inoalignmt) -
igeo->ialloc_blks; igeo->ialloc_blks;
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_near_bno(&args,
XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
be32_to_cpu(agi->agi_root)));
if (error) if (error)
return error; return error;
@ -831,7 +826,7 @@ sparse_alloc:
* if necessary. If a merge does occur, rec is updated to the * if necessary. If a merge does occur, rec is updated to the
* merged record. * merged record.
*/ */
error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag, error = xfs_inobt_insert_sprec(pag, tp, agbp,
XFS_BTNUM_INO, &rec, true); XFS_BTNUM_INO, &rec, true);
if (error == -EFSCORRUPTED) { if (error == -EFSCORRUPTED) {
xfs_alert(args.mp, xfs_alert(args.mp,
@ -856,20 +851,20 @@ sparse_alloc:
* existing record with this one. * existing record with this one.
*/ */
if (xfs_has_finobt(args.mp)) { if (xfs_has_finobt(args.mp)) {
error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag, error = xfs_inobt_insert_sprec(pag, tp, agbp,
XFS_BTNUM_FINO, &rec, false); XFS_BTNUM_FINO, &rec, false);
if (error) if (error)
return error; return error;
} }
} else { } else {
/* full chunk - insert new records to both btrees */ /* full chunk - insert new records to both btrees */
error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, newlen, error = xfs_inobt_insert(pag, tp, agbp, newino, newlen,
XFS_BTNUM_INO); XFS_BTNUM_INO);
if (error) if (error)
return error; return error;
if (xfs_has_finobt(args.mp)) { if (xfs_has_finobt(args.mp)) {
error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, error = xfs_inobt_insert(pag, tp, agbp, newino,
newlen, XFS_BTNUM_FINO); newlen, XFS_BTNUM_FINO);
if (error) if (error)
return error; return error;
@ -981,9 +976,9 @@ xfs_inobt_first_free_inode(
*/ */
STATIC int STATIC int
xfs_dialloc_ag_inobt( xfs_dialloc_ag_inobt(
struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp,
struct xfs_perag *pag,
xfs_ino_t parent, xfs_ino_t parent,
xfs_ino_t *inop) xfs_ino_t *inop)
{ {
@ -999,12 +994,12 @@ xfs_dialloc_ag_inobt(
int i, j; int i, j;
int searchdistance = 10; int searchdistance = 10;
ASSERT(pag->pagi_init); ASSERT(xfs_perag_initialised_agi(pag));
ASSERT(pag->pagi_inodeok); ASSERT(xfs_perag_allows_inodes(pag));
ASSERT(pag->pagi_freecount > 0); ASSERT(pag->pagi_freecount > 0);
restart_pagno: restart_pagno:
cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
/* /*
* If pagino is 0 (this is the root inode allocation) use newino. * If pagino is 0 (this is the root inode allocation) use newino.
* This must work because we've just allocated some. * This must work because we've just allocated some.
@ -1429,9 +1424,9 @@ xfs_dialloc_ag_update_inobt(
*/ */
static int static int
xfs_dialloc_ag( xfs_dialloc_ag(
struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp,
struct xfs_perag *pag,
xfs_ino_t parent, xfs_ino_t parent,
xfs_ino_t *inop) xfs_ino_t *inop)
{ {
@ -1448,7 +1443,7 @@ xfs_dialloc_ag(
int i; int i;
if (!xfs_has_finobt(mp)) if (!xfs_has_finobt(mp))
return xfs_dialloc_ag_inobt(tp, agbp, pag, parent, inop); return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop);
/* /*
* If pagino is 0 (this is the root inode allocation) use newino. * If pagino is 0 (this is the root inode allocation) use newino.
@ -1457,7 +1452,7 @@ xfs_dialloc_ag(
if (!pagino) if (!pagino)
pagino = be32_to_cpu(agi->agi_newino); pagino = be32_to_cpu(agi->agi_newino);
cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO); cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
error = xfs_check_agi_freecount(cur); error = xfs_check_agi_freecount(cur);
if (error) if (error)
@ -1500,7 +1495,7 @@ xfs_dialloc_ag(
* the original freecount. If all is well, make the equivalent update to * the original freecount. If all is well, make the equivalent update to
* the inobt using the finobt record and offset information. * the inobt using the finobt record and offset information.
*/ */
icur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
error = xfs_check_agi_freecount(icur); error = xfs_check_agi_freecount(icur);
if (error) if (error)
@ -1577,25 +1572,10 @@ xfs_dialloc_roll(
return error; return error;
} }
static xfs_agnumber_t
xfs_ialloc_next_ag(
xfs_mount_t *mp)
{
xfs_agnumber_t agno;
spin_lock(&mp->m_agirotor_lock);
agno = mp->m_agirotor;
if (++mp->m_agirotor >= mp->m_maxagi)
mp->m_agirotor = 0;
spin_unlock(&mp->m_agirotor_lock);
return agno;
}
static bool static bool
xfs_dialloc_good_ag( xfs_dialloc_good_ag(
struct xfs_trans *tp,
struct xfs_perag *pag, struct xfs_perag *pag,
struct xfs_trans *tp,
umode_t mode, umode_t mode,
int flags, int flags,
bool ok_alloc) bool ok_alloc)
@ -1606,10 +1586,12 @@ xfs_dialloc_good_ag(
int needspace; int needspace;
int error; int error;
if (!pag->pagi_inodeok) if (!pag)
return false;
if (!xfs_perag_allows_inodes(pag))
return false; return false;
if (!pag->pagi_init) { if (!xfs_perag_initialised_agi(pag)) {
error = xfs_ialloc_read_agi(pag, tp, NULL); error = xfs_ialloc_read_agi(pag, tp, NULL);
if (error) if (error)
return false; return false;
@ -1620,7 +1602,7 @@ xfs_dialloc_good_ag(
if (!ok_alloc) if (!ok_alloc)
return false; return false;
if (!pag->pagf_init) { if (!xfs_perag_initialised_agf(pag)) {
error = xfs_alloc_read_agf(pag, tp, flags, NULL); error = xfs_alloc_read_agf(pag, tp, flags, NULL);
if (error) if (error)
return false; return false;
@ -1665,8 +1647,8 @@ xfs_dialloc_good_ag(
static int static int
xfs_dialloc_try_ag( xfs_dialloc_try_ag(
struct xfs_trans **tpp,
struct xfs_perag *pag, struct xfs_perag *pag,
struct xfs_trans **tpp,
xfs_ino_t parent, xfs_ino_t parent,
xfs_ino_t *new_ino, xfs_ino_t *new_ino,
bool ok_alloc) bool ok_alloc)
@ -1689,7 +1671,7 @@ xfs_dialloc_try_ag(
goto out_release; goto out_release;
} }
error = xfs_ialloc_ag_alloc(*tpp, agbp, pag); error = xfs_ialloc_ag_alloc(pag, *tpp, agbp);
if (error < 0) if (error < 0)
goto out_release; goto out_release;
@ -1705,7 +1687,7 @@ xfs_dialloc_try_ag(
} }
/* Allocate an inode in the found AG */ /* Allocate an inode in the found AG */
error = xfs_dialloc_ag(*tpp, agbp, pag, parent, &ino); error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino);
if (!error) if (!error)
*new_ino = ino; *new_ino = ino;
return error; return error;
@ -1737,8 +1719,9 @@ xfs_dialloc(
struct xfs_perag *pag; struct xfs_perag *pag;
struct xfs_ino_geometry *igeo = M_IGEO(mp); struct xfs_ino_geometry *igeo = M_IGEO(mp);
bool ok_alloc = true; bool ok_alloc = true;
bool low_space = false;
int flags; int flags;
xfs_ino_t ino; xfs_ino_t ino = NULLFSINO;
/* /*
* Directories, symlinks, and regular files frequently allocate at least * Directories, symlinks, and regular files frequently allocate at least
@ -1746,7 +1729,8 @@ xfs_dialloc(
* an AG has enough space for file creation. * an AG has enough space for file creation.
*/ */
if (S_ISDIR(mode)) if (S_ISDIR(mode))
start_agno = xfs_ialloc_next_ag(mp); start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
mp->m_maxagi;
else { else {
start_agno = XFS_INO_TO_AGNO(mp, parent); start_agno = XFS_INO_TO_AGNO(mp, parent);
if (start_agno >= mp->m_maxagi) if (start_agno >= mp->m_maxagi)
@ -1767,42 +1751,56 @@ xfs_dialloc(
ok_alloc = false; ok_alloc = false;
} }
/*
* If we are near to ENOSPC, we want to prefer allocation from AGs that
* have free inodes in them rather than use up free space allocating new
* inode chunks. Hence we turn off allocation for the first non-blocking
* pass through the AGs if we are near ENOSPC to consume free inodes
* that we can immediately allocate, but then we allow allocation on the
* second pass if we fail to find an AG with free inodes in it.
*/
if (percpu_counter_read_positive(&mp->m_fdblocks) <
mp->m_low_space[XFS_LOWSP_1_PCNT]) {
ok_alloc = false;
low_space = true;
}
/* /*
* Loop until we find an allocation group that either has free inodes * Loop until we find an allocation group that either has free inodes
* or in which we can allocate some inodes. Iterate through the * or in which we can allocate some inodes. Iterate through the
* allocation groups upward, wrapping at the end. * allocation groups upward, wrapping at the end.
*/ */
agno = start_agno;
flags = XFS_ALLOC_FLAG_TRYLOCK; flags = XFS_ALLOC_FLAG_TRYLOCK;
for (;;) { retry:
pag = xfs_perag_get(mp, agno); for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) {
if (xfs_dialloc_good_ag(*tpp, pag, mode, flags, ok_alloc)) { if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) {
error = xfs_dialloc_try_ag(tpp, pag, parent, error = xfs_dialloc_try_ag(pag, tpp, parent,
&ino, ok_alloc); &ino, ok_alloc);
if (error != -EAGAIN) if (error != -EAGAIN)
break; break;
error = 0;
} }
if (xfs_is_shutdown(mp)) { if (xfs_is_shutdown(mp)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
break; break;
} }
if (++agno == mp->m_maxagi)
agno = 0;
if (agno == start_agno) {
if (!flags) {
error = -ENOSPC;
break;
} }
flags = 0; if (pag)
} xfs_perag_rele(pag);
xfs_perag_put(pag); if (error)
}
if (!error)
*new_ino = ino;
xfs_perag_put(pag);
return error; return error;
if (ino == NULLFSINO) {
if (flags) {
flags = 0;
if (low_space)
ok_alloc = true;
goto retry;
}
return -ENOSPC;
}
*new_ino = ino;
return 0;
} }
/* /*
@ -1885,14 +1883,14 @@ next:
STATIC int STATIC int
xfs_difree_inobt( xfs_difree_inobt(
struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp,
struct xfs_perag *pag,
xfs_agino_t agino, xfs_agino_t agino,
struct xfs_icluster *xic, struct xfs_icluster *xic,
struct xfs_inobt_rec_incore *orec) struct xfs_inobt_rec_incore *orec)
{ {
struct xfs_mount *mp = pag->pag_mount;
struct xfs_agi *agi = agbp->b_addr; struct xfs_agi *agi = agbp->b_addr;
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec; struct xfs_inobt_rec_incore rec;
@ -1907,7 +1905,7 @@ xfs_difree_inobt(
/* /*
* Initialize the cursor. * Initialize the cursor.
*/ */
cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
error = xfs_check_agi_freecount(cur); error = xfs_check_agi_freecount(cur);
if (error) if (error)
@ -2019,20 +2017,20 @@ error0:
*/ */
STATIC int STATIC int
xfs_difree_finobt( xfs_difree_finobt(
struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp,
struct xfs_perag *pag,
xfs_agino_t agino, xfs_agino_t agino,
struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
{ {
struct xfs_mount *mp = pag->pag_mount;
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec; struct xfs_inobt_rec_incore rec;
int offset = agino - ibtrec->ir_startino; int offset = agino - ibtrec->ir_startino;
int error; int error;
int i; int i;
cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO); cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error) if (error)
@ -2179,7 +2177,7 @@ xfs_difree(
/* /*
* Fix up the inode allocation btree. * Fix up the inode allocation btree.
*/ */
error = xfs_difree_inobt(mp, tp, agbp, pag, agino, xic, &rec); error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec);
if (error) if (error)
goto error0; goto error0;
@ -2187,7 +2185,7 @@ xfs_difree(
* Fix up the free inode btree. * Fix up the free inode btree.
*/ */
if (xfs_has_finobt(mp)) { if (xfs_has_finobt(mp)) {
error = xfs_difree_finobt(mp, tp, agbp, pag, agino, &rec); error = xfs_difree_finobt(pag, tp, agbp, agino, &rec);
if (error) if (error)
goto error0; goto error0;
} }
@ -2200,15 +2198,15 @@ error0:
STATIC int STATIC int
xfs_imap_lookup( xfs_imap_lookup(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_perag *pag, struct xfs_perag *pag,
struct xfs_trans *tp,
xfs_agino_t agino, xfs_agino_t agino,
xfs_agblock_t agbno, xfs_agblock_t agbno,
xfs_agblock_t *chunk_agbno, xfs_agblock_t *chunk_agbno,
xfs_agblock_t *offset_agbno, xfs_agblock_t *offset_agbno,
int flags) int flags)
{ {
struct xfs_mount *mp = pag->pag_mount;
struct xfs_inobt_rec_incore rec; struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
struct xfs_buf *agbp; struct xfs_buf *agbp;
@ -2229,7 +2227,7 @@ xfs_imap_lookup(
* we have a record, we need to ensure it contains the inode number * we have a record, we need to ensure it contains the inode number
* we are looking up. * we are looking up.
*/ */
cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (!error) { if (!error) {
if (i) if (i)
@ -2263,12 +2261,13 @@ xfs_imap_lookup(
*/ */
int int
xfs_imap( xfs_imap(
struct xfs_mount *mp, /* file system mount structure */ struct xfs_perag *pag,
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp,
xfs_ino_t ino, /* inode to locate */ xfs_ino_t ino, /* inode to locate */
struct xfs_imap *imap, /* location map structure */ struct xfs_imap *imap, /* location map structure */
uint flags) /* flags for inode btree lookup */ uint flags) /* flags for inode btree lookup */
{ {
struct xfs_mount *mp = pag->pag_mount;
xfs_agblock_t agbno; /* block number of inode in the alloc group */ xfs_agblock_t agbno; /* block number of inode in the alloc group */
xfs_agino_t agino; /* inode number within alloc group */ xfs_agino_t agino; /* inode number within alloc group */
xfs_agblock_t chunk_agbno; /* first block in inode chunk */ xfs_agblock_t chunk_agbno; /* first block in inode chunk */
@ -2276,17 +2275,15 @@ xfs_imap(
int error; /* error code */ int error; /* error code */
int offset; /* index of inode in its buffer */ int offset; /* index of inode in its buffer */
xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ xfs_agblock_t offset_agbno; /* blks from chunk start to inode */
struct xfs_perag *pag;
ASSERT(ino != NULLFSINO); ASSERT(ino != NULLFSINO);
/* /*
* Split up the inode number into its parts. * Split up the inode number into its parts.
*/ */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
agino = XFS_INO_TO_AGINO(mp, ino); agino = XFS_INO_TO_AGINO(mp, ino);
agbno = XFS_AGINO_TO_AGBNO(mp, agino); agbno = XFS_AGINO_TO_AGBNO(mp, agino);
if (!pag || agbno >= mp->m_sb.sb_agblocks || if (agbno >= mp->m_sb.sb_agblocks ||
ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
error = -EINVAL; error = -EINVAL;
#ifdef DEBUG #ifdef DEBUG
@ -2295,20 +2292,14 @@ xfs_imap(
* as they can be invalid without implying corruption. * as they can be invalid without implying corruption.
*/ */
if (flags & XFS_IGET_UNTRUSTED) if (flags & XFS_IGET_UNTRUSTED)
goto out_drop; return error;
if (!pag) {
xfs_alert(mp,
"%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
__func__, XFS_INO_TO_AGNO(mp, ino),
mp->m_sb.sb_agcount);
}
if (agbno >= mp->m_sb.sb_agblocks) { if (agbno >= mp->m_sb.sb_agblocks) {
xfs_alert(mp, xfs_alert(mp,
"%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
__func__, (unsigned long long)agbno, __func__, (unsigned long long)agbno,
(unsigned long)mp->m_sb.sb_agblocks); (unsigned long)mp->m_sb.sb_agblocks);
} }
if (pag && ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
xfs_alert(mp, xfs_alert(mp,
"%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
__func__, ino, __func__, ino,
@ -2316,7 +2307,7 @@ xfs_imap(
} }
xfs_stack_trace(); xfs_stack_trace();
#endif /* DEBUG */ #endif /* DEBUG */
goto out_drop; return error;
} }
/* /*
@ -2327,10 +2318,10 @@ xfs_imap(
* in all cases where an untrusted inode number is passed. * in all cases where an untrusted inode number is passed.
*/ */
if (flags & XFS_IGET_UNTRUSTED) { if (flags & XFS_IGET_UNTRUSTED) {
error = xfs_imap_lookup(mp, tp, pag, agino, agbno, error = xfs_imap_lookup(pag, tp, agino, agbno,
&chunk_agbno, &offset_agbno, flags); &chunk_agbno, &offset_agbno, flags);
if (error) if (error)
goto out_drop; return error;
goto out_map; goto out_map;
} }
@ -2346,8 +2337,7 @@ xfs_imap(
imap->im_len = XFS_FSB_TO_BB(mp, 1); imap->im_len = XFS_FSB_TO_BB(mp, 1);
imap->im_boffset = (unsigned short)(offset << imap->im_boffset = (unsigned short)(offset <<
mp->m_sb.sb_inodelog); mp->m_sb.sb_inodelog);
error = 0; return 0;
goto out_drop;
} }
/* /*
@ -2359,10 +2349,10 @@ xfs_imap(
offset_agbno = agbno & M_IGEO(mp)->inoalign_mask; offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
chunk_agbno = agbno - offset_agbno; chunk_agbno = agbno - offset_agbno;
} else { } else {
error = xfs_imap_lookup(mp, tp, pag, agino, agbno, error = xfs_imap_lookup(pag, tp, agino, agbno,
&chunk_agbno, &offset_agbno, flags); &chunk_agbno, &offset_agbno, flags);
if (error) if (error)
goto out_drop; return error;
} }
out_map: out_map:
@ -2390,14 +2380,9 @@ out_map:
__func__, (unsigned long long) imap->im_blkno, __func__, (unsigned long long) imap->im_blkno,
(unsigned long long) imap->im_len, (unsigned long long) imap->im_len,
XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
error = -EINVAL; return -EINVAL;
goto out_drop;
} }
error = 0; return 0;
out_drop:
if (pag)
xfs_perag_put(pag);
return error;
} }
/* /*
@ -2613,10 +2598,10 @@ xfs_ialloc_read_agi(
return error; return error;
agi = agibp->b_addr; agi = agibp->b_addr;
if (!pag->pagi_init) { if (!xfs_perag_initialised_agi(pag)) {
pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
pag->pagi_count = be32_to_cpu(agi->agi_count); pag->pagi_count = be32_to_cpu(agi->agi_count);
pag->pagi_init = 1; set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
} }
/* /*
@ -2924,26 +2909,24 @@ xfs_ialloc_calc_rootino(
*/ */
int int
xfs_ialloc_check_shrink( xfs_ialloc_check_shrink(
struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
xfs_agnumber_t agno,
struct xfs_buf *agibp, struct xfs_buf *agibp,
xfs_agblock_t new_length) xfs_agblock_t new_length)
{ {
struct xfs_inobt_rec_incore rec; struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
struct xfs_mount *mp = tp->t_mountp; xfs_agino_t agino;
struct xfs_perag *pag;
xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length);
int has; int has;
int error; int error;
if (!xfs_has_sparseinodes(mp)) if (!xfs_has_sparseinodes(pag->pag_mount))
return 0; return 0;
pag = xfs_perag_get(mp, agno); cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO);
cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO);
/* Look up the inobt record that would correspond to the new EOFS. */ /* Look up the inobt record that would correspond to the new EOFS. */
agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
if (error || !has) if (error || !has)
goto out; goto out;
@ -2964,6 +2947,5 @@ xfs_ialloc_check_shrink(
} }
out: out:
xfs_btree_del_cursor(cur, error); xfs_btree_del_cursor(cur, error);
xfs_perag_put(pag);
return error; return error;
} }

View File

@ -12,6 +12,7 @@ struct xfs_imap;
struct xfs_mount; struct xfs_mount;
struct xfs_trans; struct xfs_trans;
struct xfs_btree_cur; struct xfs_btree_cur;
struct xfs_perag;
/* Move inodes in clusters of this size */ /* Move inodes in clusters of this size */
#define XFS_INODE_BIG_CLUSTER_SIZE 8192 #define XFS_INODE_BIG_CLUSTER_SIZE 8192
@ -47,7 +48,7 @@ int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag,
*/ */
int int
xfs_imap( xfs_imap(
struct xfs_mount *mp, /* file system mount structure */ struct xfs_perag *pag,
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t ino, /* inode to locate */ xfs_ino_t ino, /* inode to locate */
struct xfs_imap *imap, /* location map structure */ struct xfs_imap *imap, /* location map structure */
@ -106,7 +107,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno, int xfs_ialloc_check_shrink(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf *agibp, xfs_agblock_t new_length); struct xfs_buf *agibp, xfs_agblock_t new_length);
#endif /* __XFS_IALLOC_H__ */ #endif /* __XFS_IALLOC_H__ */

View File

@ -36,8 +36,8 @@ STATIC struct xfs_btree_cur *
xfs_inobt_dup_cursor( xfs_inobt_dup_cursor(
struct xfs_btree_cur *cur) struct xfs_btree_cur *cur)
{ {
return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum); cur->bc_ag.agbp, cur->bc_btnum);
} }
STATIC void STATIC void
@ -103,15 +103,15 @@ __xfs_inobt_alloc_block(
memset(&args, 0, sizeof(args)); memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp; args.tp = cur->bc_tp;
args.mp = cur->bc_mp; args.mp = cur->bc_mp;
args.pag = cur->bc_ag.pag;
args.oinfo = XFS_RMAP_OINFO_INOBT; args.oinfo = XFS_RMAP_OINFO_INOBT;
args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_ag.pag->pag_agno, sbno);
args.minlen = 1; args.minlen = 1;
args.maxlen = 1; args.maxlen = 1;
args.prod = 1; args.prod = 1;
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.resv = resv; args.resv = resv;
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_near_bno(&args,
XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, sbno));
if (error) if (error)
return error; return error;
@ -291,8 +291,8 @@ xfs_inobt_verify(
* Similarly, during log recovery we will have a perag structure * Similarly, during log recovery we will have a perag structure
* attached, but the agi information will not yet have been initialised * attached, but the agi information will not yet have been initialised
* from the on disk AGI. We don't currently use any of this information, * from the on disk AGI. We don't currently use any of this information,
* but beware of the landmine (i.e. need to check pag->pagi_init) if we * but beware of the landmine (i.e. need to check
* ever do. * xfs_perag_initialised_agi(pag)) if we ever do.
*/ */
if (xfs_has_crc(mp)) { if (xfs_has_crc(mp)) {
fa = xfs_btree_sblock_v5hdr_verify(bp); fa = xfs_btree_sblock_v5hdr_verify(bp);
@ -427,11 +427,11 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
*/ */
static struct xfs_btree_cur * static struct xfs_btree_cur *
xfs_inobt_init_common( xfs_inobt_init_common(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
struct xfs_perag *pag, struct xfs_perag *pag,
struct xfs_trans *tp, /* transaction pointer */
xfs_btnum_t btnum) /* ialloc or free ino btree */ xfs_btnum_t btnum) /* ialloc or free ino btree */
{ {
struct xfs_mount *mp = pag->pag_mount;
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
cur = xfs_btree_alloc_cursor(mp, tp, btnum, cur = xfs_btree_alloc_cursor(mp, tp, btnum,
@ -456,16 +456,15 @@ xfs_inobt_init_common(
/* Create an inode btree cursor. */ /* Create an inode btree cursor. */
struct xfs_btree_cur * struct xfs_btree_cur *
xfs_inobt_init_cursor( xfs_inobt_init_cursor(
struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_buf *agbp, struct xfs_buf *agbp,
struct xfs_perag *pag,
xfs_btnum_t btnum) xfs_btnum_t btnum)
{ {
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
struct xfs_agi *agi = agbp->b_addr; struct xfs_agi *agi = agbp->b_addr;
cur = xfs_inobt_init_common(mp, tp, pag, btnum); cur = xfs_inobt_init_common(pag, tp, btnum);
if (btnum == XFS_BTNUM_INO) if (btnum == XFS_BTNUM_INO)
cur->bc_nlevels = be32_to_cpu(agi->agi_level); cur->bc_nlevels = be32_to_cpu(agi->agi_level);
else else
@ -477,14 +476,13 @@ xfs_inobt_init_cursor(
/* Create an inode btree cursor with a fake root for staging. */ /* Create an inode btree cursor with a fake root for staging. */
struct xfs_btree_cur * struct xfs_btree_cur *
xfs_inobt_stage_cursor( xfs_inobt_stage_cursor(
struct xfs_mount *mp,
struct xbtree_afakeroot *afake,
struct xfs_perag *pag, struct xfs_perag *pag,
struct xbtree_afakeroot *afake,
xfs_btnum_t btnum) xfs_btnum_t btnum)
{ {
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
cur = xfs_inobt_init_common(mp, NULL, pag, btnum); cur = xfs_inobt_init_common(pag, NULL, btnum);
xfs_btree_stage_afakeroot(cur, afake); xfs_btree_stage_afakeroot(cur, afake);
return cur; return cur;
} }
@ -708,9 +706,8 @@ xfs_inobt_max_size(
/* Read AGI and create inobt cursor. */ /* Read AGI and create inobt cursor. */
int int
xfs_inobt_cur( xfs_inobt_cur(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_perag *pag, struct xfs_perag *pag,
struct xfs_trans *tp,
xfs_btnum_t which, xfs_btnum_t which,
struct xfs_btree_cur **curpp, struct xfs_btree_cur **curpp,
struct xfs_buf **agi_bpp) struct xfs_buf **agi_bpp)
@ -725,16 +722,15 @@ xfs_inobt_cur(
if (error) if (error)
return error; return error;
cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, pag, which); cur = xfs_inobt_init_cursor(pag, tp, *agi_bpp, which);
*curpp = cur; *curpp = cur;
return 0; return 0;
} }
static int static int
xfs_inobt_count_blocks( xfs_inobt_count_blocks(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_perag *pag, struct xfs_perag *pag,
struct xfs_trans *tp,
xfs_btnum_t btnum, xfs_btnum_t btnum,
xfs_extlen_t *tree_blocks) xfs_extlen_t *tree_blocks)
{ {
@ -742,7 +738,7 @@ xfs_inobt_count_blocks(
struct xfs_btree_cur *cur = NULL; struct xfs_btree_cur *cur = NULL;
int error; int error;
error = xfs_inobt_cur(mp, tp, pag, btnum, &cur, &agbp); error = xfs_inobt_cur(pag, tp, btnum, &cur, &agbp);
if (error) if (error)
return error; return error;
@ -779,22 +775,21 @@ xfs_finobt_read_blocks(
*/ */
int int
xfs_finobt_calc_reserves( xfs_finobt_calc_reserves(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_perag *pag, struct xfs_perag *pag,
struct xfs_trans *tp,
xfs_extlen_t *ask, xfs_extlen_t *ask,
xfs_extlen_t *used) xfs_extlen_t *used)
{ {
xfs_extlen_t tree_len = 0; xfs_extlen_t tree_len = 0;
int error; int error;
if (!xfs_has_finobt(mp)) if (!xfs_has_finobt(pag->pag_mount))
return 0; return 0;
if (xfs_has_inobtcounts(mp)) if (xfs_has_inobtcounts(pag->pag_mount))
error = xfs_finobt_read_blocks(pag, tp, &tree_len); error = xfs_finobt_read_blocks(pag, tp, &tree_len);
else else
error = xfs_inobt_count_blocks(mp, tp, pag, XFS_BTNUM_FINO, error = xfs_inobt_count_blocks(pag, tp, XFS_BTNUM_FINO,
&tree_len); &tree_len);
if (error) if (error)
return error; return error;

View File

@ -46,12 +46,10 @@ struct xfs_perag;
(maxrecs) * sizeof(xfs_inobt_key_t) + \ (maxrecs) * sizeof(xfs_inobt_key_t) + \
((index) - 1) * sizeof(xfs_inobt_ptr_t))) ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *mp, extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_trans *tp, struct xfs_buf *agbp, xfs_btnum_t btnum);
struct xfs_perag *pag, xfs_btnum_t btnum); struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_perag *pag,
struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_mount *mp, struct xbtree_afakeroot *afake, xfs_btnum_t btnum);
struct xbtree_afakeroot *afake, struct xfs_perag *pag,
xfs_btnum_t btnum);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
/* ir_holemask to inode allocation bitmap conversion */ /* ir_holemask to inode allocation bitmap conversion */
@ -64,13 +62,13 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
#define xfs_inobt_rec_check_count(mp, rec) 0 #define xfs_inobt_rec_check_count(mp, rec) 0
#endif /* DEBUG */ #endif /* DEBUG */
int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, int xfs_finobt_calc_reserves(struct xfs_perag *perag, struct xfs_trans *tp,
struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); xfs_extlen_t *ask, xfs_extlen_t *used);
extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
unsigned long long len); unsigned long long len);
int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, int xfs_inobt_cur(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_perag *pag, xfs_btnum_t btnum, xfs_btnum_t btnum, struct xfs_btree_cur **curpp,
struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); struct xfs_buf **agi_bpp);
void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, struct xfs_buf *agbp); struct xfs_trans *tp, struct xfs_buf *agbp);

View File

@ -67,14 +67,14 @@ xfs_refcountbt_alloc_block(
memset(&args, 0, sizeof(args)); memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp; args.tp = cur->bc_tp;
args.mp = cur->bc_mp; args.mp = cur->bc_mp;
args.type = XFS_ALLOCTYPE_NEAR_BNO; args.pag = cur->bc_ag.pag;
args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno,
xfs_refc_block(args.mp));
args.oinfo = XFS_RMAP_OINFO_REFC; args.oinfo = XFS_RMAP_OINFO_REFC;
args.minlen = args.maxlen = args.prod = 1; args.minlen = args.maxlen = args.prod = 1;
args.resv = XFS_AG_RESV_METADATA; args.resv = XFS_AG_RESV_METADATA;
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_near_bno(&args,
XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno,
xfs_refc_block(args.mp)));
if (error) if (error)
goto out_error; goto out_error;
trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
@ -227,7 +227,7 @@ xfs_refcountbt_verify(
return fa; return fa;
level = be16_to_cpu(block->bb_level); level = be16_to_cpu(block->bb_level);
if (pag && pag->pagf_init) { if (pag && xfs_perag_initialised_agf(pag)) {
if (level >= pag->pagf_refcount_level) if (level >= pag->pagf_refcount_level)
return __this_address; return __this_address;
} else if (level >= mp->m_refc_maxlevels) } else if (level >= mp->m_refc_maxlevels)

View File

@ -313,7 +313,7 @@ xfs_rmapbt_verify(
return fa; return fa;
level = be16_to_cpu(block->bb_level); level = be16_to_cpu(block->bb_level);
if (pag && pag->pagf_init) { if (pag && xfs_perag_initialised_agf(pag)) {
if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
return __this_address; return __this_address;
} else if (level >= mp->m_rmap_maxlevels) } else if (level >= mp->m_rmap_maxlevels)

View File

@ -909,7 +909,8 @@ xfs_sb_mount_common(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_sb *sbp) struct xfs_sb *sbp)
{ {
mp->m_agfrotor = mp->m_agirotor = 0; mp->m_agfrotor = 0;
atomic_set(&mp->m_agirotor, 0);
mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_maxagi = mp->m_sb.sb_agcount;
mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;

View File

@ -191,14 +191,15 @@ xrep_agf_init_header(
struct xfs_agf *old_agf) struct xfs_agf *old_agf)
{ {
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
struct xfs_perag *pag = sc->sa.pag;
struct xfs_agf *agf = agf_bp->b_addr; struct xfs_agf *agf = agf_bp->b_addr;
memcpy(old_agf, agf, sizeof(*old_agf)); memcpy(old_agf, agf, sizeof(*old_agf));
memset(agf, 0, BBTOB(agf_bp->b_length)); memset(agf, 0, BBTOB(agf_bp->b_length));
agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
agf->agf_seqno = cpu_to_be32(sc->sa.pag->pag_agno); agf->agf_seqno = cpu_to_be32(pag->pag_agno);
agf->agf_length = cpu_to_be32(sc->sa.pag->block_count); agf->agf_length = cpu_to_be32(pag->block_count);
agf->agf_flfirst = old_agf->agf_flfirst; agf->agf_flfirst = old_agf->agf_flfirst;
agf->agf_fllast = old_agf->agf_fllast; agf->agf_fllast = old_agf->agf_fllast;
agf->agf_flcount = old_agf->agf_flcount; agf->agf_flcount = old_agf->agf_flcount;
@ -206,8 +207,8 @@ xrep_agf_init_header(
uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
/* Mark the incore AGF data stale until we're done fixing things. */ /* Mark the incore AGF data stale until we're done fixing things. */
ASSERT(sc->sa.pag->pagf_init); ASSERT(xfs_perag_initialised_agf(pag));
sc->sa.pag->pagf_init = 0; clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
} }
/* Set btree root information in an AGF. */ /* Set btree root information in an AGF. */
@ -333,7 +334,7 @@ xrep_agf_commit_new(
pag->pagf_levels[XFS_BTNUM_RMAPi] = pag->pagf_levels[XFS_BTNUM_RMAPi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
pag->pagf_init = 1; set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
return 0; return 0;
} }
@ -434,7 +435,7 @@ xrep_agf(
out_revert: out_revert:
/* Mark the incore AGF state stale and revert the AGF. */ /* Mark the incore AGF state stale and revert the AGF. */
sc->sa.pag->pagf_init = 0; clear_bit(XFS_AGSTATE_AGF_INIT, &sc->sa.pag->pag_opstate);
memcpy(agf, &old_agf, sizeof(old_agf)); memcpy(agf, &old_agf, sizeof(old_agf));
return error; return error;
} }
@ -618,7 +619,7 @@ xrep_agfl_update_agf(
xfs_force_summary_recalc(sc->mp); xfs_force_summary_recalc(sc->mp);
/* Update the AGF counters. */ /* Update the AGF counters. */
if (sc->sa.pag->pagf_init) if (xfs_perag_initialised_agf(sc->sa.pag))
sc->sa.pag->pagf_flcount = flcount; sc->sa.pag->pagf_flcount = flcount;
agf->agf_flfirst = cpu_to_be32(0); agf->agf_flfirst = cpu_to_be32(0);
agf->agf_flcount = cpu_to_be32(flcount); agf->agf_flcount = cpu_to_be32(flcount);
@ -822,14 +823,15 @@ xrep_agi_init_header(
struct xfs_agi *old_agi) struct xfs_agi *old_agi)
{ {
struct xfs_agi *agi = agi_bp->b_addr; struct xfs_agi *agi = agi_bp->b_addr;
struct xfs_perag *pag = sc->sa.pag;
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
memcpy(old_agi, agi, sizeof(*old_agi)); memcpy(old_agi, agi, sizeof(*old_agi));
memset(agi, 0, BBTOB(agi_bp->b_length)); memset(agi, 0, BBTOB(agi_bp->b_length));
agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
agi->agi_seqno = cpu_to_be32(sc->sa.pag->pag_agno); agi->agi_seqno = cpu_to_be32(pag->pag_agno);
agi->agi_length = cpu_to_be32(sc->sa.pag->block_count); agi->agi_length = cpu_to_be32(pag->block_count);
agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_newino = cpu_to_be32(NULLAGINO);
agi->agi_dirino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO);
if (xfs_has_crc(mp)) if (xfs_has_crc(mp))
@ -840,8 +842,8 @@ xrep_agi_init_header(
sizeof(agi->agi_unlinked)); sizeof(agi->agi_unlinked));
/* Mark the incore AGF data stale until we're done fixing things. */ /* Mark the incore AGF data stale until we're done fixing things. */
ASSERT(sc->sa.pag->pagi_init); ASSERT(xfs_perag_initialised_agi(pag));
sc->sa.pag->pagi_init = 0; clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
} }
/* Set btree root information in an AGI. */ /* Set btree root information in an AGI. */
@ -873,8 +875,7 @@ xrep_agi_calc_from_btrees(
xfs_agino_t freecount; xfs_agino_t freecount;
int error; int error;
cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, agi_bp, XFS_BTNUM_INO);
sc->sa.pag, XFS_BTNUM_INO);
error = xfs_ialloc_count_inodes(cur, &count, &freecount); error = xfs_ialloc_count_inodes(cur, &count, &freecount);
if (error) if (error)
goto err; goto err;
@ -894,8 +895,8 @@ xrep_agi_calc_from_btrees(
if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) { if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) {
xfs_agblock_t blocks; xfs_agblock_t blocks;
cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, agi_bp,
sc->sa.pag, XFS_BTNUM_FINO); XFS_BTNUM_FINO);
error = xfs_btree_count_blocks(cur, &blocks); error = xfs_btree_count_blocks(cur, &blocks);
if (error) if (error)
goto err; goto err;
@ -929,7 +930,7 @@ xrep_agi_commit_new(
pag = sc->sa.pag; pag = sc->sa.pag;
pag->pagi_count = be32_to_cpu(agi->agi_count); pag->pagi_count = be32_to_cpu(agi->agi_count);
pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
pag->pagi_init = 1; set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
return 0; return 0;
} }
@ -994,7 +995,7 @@ xrep_agi(
out_revert: out_revert:
/* Mark the incore AGI state stale and revert the AGI. */ /* Mark the incore AGI state stale and revert the AGI. */
sc->sa.pag->pagi_init = 0; clear_bit(XFS_AGSTATE_AGI_INIT, &sc->sa.pag->pag_opstate);
memcpy(agi, &old_agi, sizeof(old_agi)); memcpy(agi, &old_agi, sizeof(old_agi));
return error; return error;
} }

View File

@ -662,7 +662,7 @@ xchk_bmap_check_rmaps(
error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
if (error || if (error ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
xfs_perag_put(pag); xfs_perag_rele(pag);
return error; return error;
} }
} }

View File

@ -478,15 +478,15 @@ xchk_ag_btcur_init(
/* Set up a inobt cursor for cross-referencing. */ /* Set up a inobt cursor for cross-referencing. */
if (sa->agi_bp && if (sa->agi_bp &&
xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) { xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
sa->pag, XFS_BTNUM_INO); XFS_BTNUM_INO);
} }
/* Set up a finobt cursor for cross-referencing. */ /* Set up a finobt cursor for cross-referencing. */
if (sa->agi_bp && xfs_has_finobt(mp) && if (sa->agi_bp && xfs_has_finobt(mp) &&
xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
sa->pag, XFS_BTNUM_FINO); XFS_BTNUM_FINO);
} }
/* Set up a rmapbt cursor for cross-referencing. */ /* Set up a rmapbt cursor for cross-referencing. */
@ -636,6 +636,7 @@ xchk_get_inode(
{ {
struct xfs_imap imap; struct xfs_imap imap;
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
struct xfs_perag *pag;
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
struct xfs_inode *ip = NULL; struct xfs_inode *ip = NULL;
int error; int error;
@ -671,10 +672,14 @@ xchk_get_inode(
* Otherwise, we really couldn't find it so tell userspace * Otherwise, we really couldn't find it so tell userspace
* that it no longer exists. * that it no longer exists.
*/ */
error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
if (pag) {
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
xfs_perag_put(pag);
if (error) if (error)
return -ENOENT; return -ENOENT;
}
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
fallthrough; fallthrough;
default: default:

View File

@ -86,7 +86,8 @@ xchk_fscount_warmup(
for_each_perag(mp, agno, pag) { for_each_perag(mp, agno, pag) {
if (xchk_should_terminate(sc, &error)) if (xchk_should_terminate(sc, &error))
break; break;
if (pag->pagi_init && pag->pagf_init) if (xfs_perag_initialised_agi(pag) &&
xfs_perag_initialised_agf(pag))
continue; continue;
/* Lock both AG headers. */ /* Lock both AG headers. */
@ -101,7 +102,8 @@ xchk_fscount_warmup(
* These are supposed to be initialized by the header read * These are supposed to be initialized by the header read
* function. * function.
*/ */
if (!pag->pagi_init || !pag->pagf_init) { if (!xfs_perag_initialised_agi(pag) ||
!xfs_perag_initialised_agf(pag)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
break; break;
} }
@ -117,7 +119,7 @@ xchk_fscount_warmup(
if (agi_bp) if (agi_bp)
xfs_buf_relse(agi_bp); xfs_buf_relse(agi_bp);
if (pag) if (pag)
xfs_perag_put(pag); xfs_perag_rele(pag);
return error; return error;
} }
@ -220,7 +222,8 @@ retry:
break; break;
/* This somehow got unset since the warmup? */ /* This somehow got unset since the warmup? */
if (!pag->pagi_init || !pag->pagf_init) { if (!xfs_perag_initialised_agi(pag) ||
!xfs_perag_initialised_agf(pag)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
break; break;
} }
@ -249,7 +252,7 @@ retry:
} }
if (pag) if (pag)
xfs_perag_put(pag); xfs_perag_rele(pag);
if (error) { if (error) {
xchk_set_incomplete(sc); xchk_set_incomplete(sc);
return error; return error;

View File

@ -206,7 +206,7 @@ xrep_calc_ag_resblks(
return 0; return 0;
pag = xfs_perag_get(mp, sm->sm_agno); pag = xfs_perag_get(mp, sm->sm_agno);
if (pag->pagi_init) { if (xfs_perag_initialised_agi(pag)) {
/* Use in-core icount if possible. */ /* Use in-core icount if possible. */
icount = pag->pagi_count; icount = pag->pagi_count;
} else { } else {
@ -326,15 +326,14 @@ xrep_alloc_ag_block(
args.tp = sc->tp; args.tp = sc->tp;
args.mp = sc->mp; args.mp = sc->mp;
args.pag = sc->sa.pag;
args.oinfo = *oinfo; args.oinfo = *oinfo;
args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.pag->pag_agno, 0);
args.minlen = 1; args.minlen = 1;
args.maxlen = 1; args.maxlen = 1;
args.prod = 1; args.prod = 1;
args.type = XFS_ALLOCTYPE_THIS_AG;
args.resv = resv; args.resv = resv;
error = xfs_alloc_vextent(&args); error = xfs_alloc_vextent_this_ag(&args, sc->sa.pag->pag_agno);
if (error) if (error)
return error; return error;
if (args.fsbno == NULLFSBLOCK) if (args.fsbno == NULLFSBLOCK)

View File

@ -1410,7 +1410,7 @@ xfs_swap_extent_rmap(
/* Unmap the old blocks in the source file. */ /* Unmap the old blocks in the source file. */
while (tirec.br_blockcount) { while (tirec.br_blockcount) {
ASSERT(tp->t_firstblock == NULLFSBLOCK); ASSERT(tp->t_highest_agno == NULLAGNUMBER);
trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
/* Read extent from the source file */ /* Read extent from the source file */

View File

@ -21,23 +21,20 @@
STATIC int STATIC int
xfs_trim_extents( xfs_trim_extents(
struct xfs_mount *mp, struct xfs_perag *pag,
xfs_agnumber_t agno,
xfs_daddr_t start, xfs_daddr_t start,
xfs_daddr_t end, xfs_daddr_t end,
xfs_daddr_t minlen, xfs_daddr_t minlen,
uint64_t *blocks_trimmed) uint64_t *blocks_trimmed)
{ {
struct xfs_mount *mp = pag->pag_mount;
struct block_device *bdev = mp->m_ddev_targp->bt_bdev; struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
struct xfs_btree_cur *cur; struct xfs_btree_cur *cur;
struct xfs_buf *agbp; struct xfs_buf *agbp;
struct xfs_agf *agf; struct xfs_agf *agf;
struct xfs_perag *pag;
int error; int error;
int i; int i;
pag = xfs_perag_get(mp, agno);
/* /*
* Force out the log. This means any transactions that might have freed * Force out the log. This means any transactions that might have freed
* space before we take the AGF buffer lock are now on disk, and the * space before we take the AGF buffer lock are now on disk, and the
@ -47,7 +44,7 @@ xfs_trim_extents(
error = xfs_alloc_read_agf(pag, NULL, 0, &agbp); error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
if (error) if (error)
goto out_put_perag; return error;
agf = agbp->b_addr; agf = agbp->b_addr;
cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT); cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
@ -71,10 +68,10 @@ xfs_trim_extents(
error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
if (error) if (error)
goto out_del_cursor; break;
if (XFS_IS_CORRUPT(mp, i != 1)) { if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
goto out_del_cursor; break;
} }
ASSERT(flen <= be32_to_cpu(agf->agf_longest)); ASSERT(flen <= be32_to_cpu(agf->agf_longest));
@ -83,15 +80,15 @@ xfs_trim_extents(
* the format the range/len variables are supplied in by * the format the range/len variables are supplied in by
* userspace. * userspace.
*/ */
dbno = XFS_AGB_TO_DADDR(mp, agno, fbno); dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
dlen = XFS_FSB_TO_BB(mp, flen); dlen = XFS_FSB_TO_BB(mp, flen);
/* /*
* Too small? Give up. * Too small? Give up.
*/ */
if (dlen < minlen) { if (dlen < minlen) {
trace_xfs_discard_toosmall(mp, agno, fbno, flen); trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen);
goto out_del_cursor; break;
} }
/* /*
@ -100,7 +97,7 @@ xfs_trim_extents(
* down partially overlapping ranges for now. * down partially overlapping ranges for now.
*/ */
if (dbno + dlen < start || dbno > end) { if (dbno + dlen < start || dbno > end) {
trace_xfs_discard_exclude(mp, agno, fbno, flen); trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
goto next_extent; goto next_extent;
} }
@ -109,32 +106,30 @@ xfs_trim_extents(
* discard and try again the next time. * discard and try again the next time.
*/ */
if (xfs_extent_busy_search(mp, pag, fbno, flen)) { if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
trace_xfs_discard_busy(mp, agno, fbno, flen); trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
goto next_extent; goto next_extent;
} }
trace_xfs_discard_extent(mp, agno, fbno, flen); trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen);
error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS); error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
if (error) if (error)
goto out_del_cursor; break;
*blocks_trimmed += flen; *blocks_trimmed += flen;
next_extent: next_extent:
error = xfs_btree_decrement(cur, 0, &i); error = xfs_btree_decrement(cur, 0, &i);
if (error) if (error)
goto out_del_cursor; break;
if (fatal_signal_pending(current)) { if (fatal_signal_pending(current)) {
error = -ERESTARTSYS; error = -ERESTARTSYS;
goto out_del_cursor; break;
} }
} }
out_del_cursor: out_del_cursor:
xfs_btree_del_cursor(cur, error); xfs_btree_del_cursor(cur, error);
xfs_buf_relse(agbp); xfs_buf_relse(agbp);
out_put_perag:
xfs_perag_put(pag);
return error; return error;
} }
@ -152,11 +147,12 @@ xfs_ioc_trim(
struct xfs_mount *mp, struct xfs_mount *mp,
struct fstrim_range __user *urange) struct fstrim_range __user *urange)
{ {
struct xfs_perag *pag;
unsigned int granularity = unsigned int granularity =
bdev_discard_granularity(mp->m_ddev_targp->bt_bdev); bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
struct fstrim_range range; struct fstrim_range range;
xfs_daddr_t start, end, minlen; xfs_daddr_t start, end, minlen;
xfs_agnumber_t start_agno, end_agno, agno; xfs_agnumber_t agno;
uint64_t blocks_trimmed = 0; uint64_t blocks_trimmed = 0;
int error, last_error = 0; int error, last_error = 0;
@ -195,18 +191,18 @@ xfs_ioc_trim(
if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1; end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
start_agno = xfs_daddr_to_agno(mp, start); agno = xfs_daddr_to_agno(mp, start);
end_agno = xfs_daddr_to_agno(mp, end); for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
error = xfs_trim_extents(pag, start, end, minlen,
for (agno = start_agno; agno <= end_agno; agno++) {
error = xfs_trim_extents(mp, agno, start, end, minlen,
&blocks_trimmed); &blocks_trimmed);
if (error) { if (error) {
last_error = error; last_error = error;
if (error == -ERESTARTSYS) if (error == -ERESTARTSYS) {
xfs_perag_rele(pag);
break; break;
} }
} }
}
if (last_error) if (last_error)
return last_error; return last_error;

View File

@ -12,6 +12,7 @@
#include "xfs_mount.h" #include "xfs_mount.h"
#include "xfs_inode.h" #include "xfs_inode.h"
#include "xfs_bmap.h" #include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_alloc.h" #include "xfs_alloc.h"
#include "xfs_mru_cache.h" #include "xfs_mru_cache.h"
#include "xfs_trace.h" #include "xfs_trace.h"
@ -22,7 +23,7 @@
struct xfs_fstrm_item { struct xfs_fstrm_item {
struct xfs_mru_cache_elem mru; struct xfs_mru_cache_elem mru;
xfs_agnumber_t ag; /* AG in use for this directory */ struct xfs_perag *pag; /* AG in use for this directory */
}; };
enum xfs_fstrm_alloc { enum xfs_fstrm_alloc {
@ -30,117 +31,68 @@ enum xfs_fstrm_alloc {
XFS_PICK_LOWSPACE = 2, XFS_PICK_LOWSPACE = 2,
}; };
/*
* Allocation group filestream associations are tracked with per-ag atomic
* counters. These counters allow xfs_filestream_pick_ag() to tell whether a
* particular AG already has active filestreams associated with it.
*/
int
xfs_filestream_peek_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
struct xfs_perag *pag;
int ret;
pag = xfs_perag_get(mp, agno);
ret = atomic_read(&pag->pagf_fstrms);
xfs_perag_put(pag);
return ret;
}
static int
xfs_filestream_get_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
struct xfs_perag *pag;
int ret;
pag = xfs_perag_get(mp, agno);
ret = atomic_inc_return(&pag->pagf_fstrms);
xfs_perag_put(pag);
return ret;
}
static void
xfs_filestream_put_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
struct xfs_perag *pag;
pag = xfs_perag_get(mp, agno);
atomic_dec(&pag->pagf_fstrms);
xfs_perag_put(pag);
}
static void static void
xfs_fstrm_free_func( xfs_fstrm_free_func(
void *data, void *data,
struct xfs_mru_cache_elem *mru) struct xfs_mru_cache_elem *mru)
{ {
struct xfs_mount *mp = data;
struct xfs_fstrm_item *item = struct xfs_fstrm_item *item =
container_of(mru, struct xfs_fstrm_item, mru); container_of(mru, struct xfs_fstrm_item, mru);
struct xfs_perag *pag = item->pag;
xfs_filestream_put_ag(mp, item->ag); trace_xfs_filestream_free(pag, mru->key);
trace_xfs_filestream_free(mp, mru->key, item->ag); atomic_dec(&pag->pagf_fstrms);
xfs_perag_rele(pag);
kmem_free(item); kmem_free(item);
} }
/* /*
* Scan the AGs starting at startag looking for an AG that isn't in use and has * Scan the AGs starting at start_agno looking for an AG that isn't in use and
* at least minlen blocks free. * has at least minlen blocks free. If no AG is found to match the allocation
* requirements, pick the AG with the most free space in it.
*/ */
static int static int
xfs_filestream_pick_ag( xfs_filestream_pick_ag(
struct xfs_inode *ip, struct xfs_alloc_arg *args,
xfs_agnumber_t startag, xfs_ino_t pino,
xfs_agnumber_t *agp, xfs_agnumber_t start_agno,
int flags, int flags,
xfs_extlen_t minlen) xfs_extlen_t *longest)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = args->mp;
struct xfs_fstrm_item *item;
struct xfs_perag *pag; struct xfs_perag *pag;
xfs_extlen_t longest, free = 0, minfree, maxfree = 0; struct xfs_perag *max_pag = NULL;
xfs_agnumber_t ag, max_ag = NULLAGNUMBER; xfs_extlen_t minlen = *longest;
int err, trylock, nscan; xfs_extlen_t free = 0, minfree, maxfree = 0;
xfs_agnumber_t agno;
ASSERT(S_ISDIR(VFS_I(ip)->i_mode)); bool first_pass = true;
int err;
/* 2% of an AG's blocks must be free for it to be chosen. */ /* 2% of an AG's blocks must be free for it to be chosen. */
minfree = mp->m_sb.sb_agblocks / 50; minfree = mp->m_sb.sb_agblocks / 50;
ag = startag; restart:
*agp = NULLAGNUMBER; for_each_perag_wrap(mp, start_agno, agno, pag) {
trace_xfs_filestream_scan(pag, pino);
/* For the first pass, don't sleep trying to init the per-AG. */ *longest = 0;
trylock = XFS_ALLOC_FLAG_TRYLOCK; err = xfs_bmap_longest_free_extent(pag, NULL, longest);
for (nscan = 0; 1; nscan++) {
trace_xfs_filestream_scan(mp, ip->i_ino, ag);
pag = xfs_perag_get(mp, ag);
if (!pag->pagf_init) {
err = xfs_alloc_read_agf(pag, NULL, trylock, NULL);
if (err) { if (err) {
if (err != -EAGAIN) { xfs_perag_rele(pag);
xfs_perag_put(pag); if (err != -EAGAIN)
return err; break;
}
/* Couldn't lock the AGF, skip this AG. */ /* Couldn't lock the AGF, skip this AG. */
goto next_ag; err = 0;
} continue;
} }
/* Keep track of the AG with the most free blocks. */ /* Keep track of the AG with the most free blocks. */
if (pag->pagf_freeblks > maxfree) { if (pag->pagf_freeblks > maxfree) {
maxfree = pag->pagf_freeblks; maxfree = pag->pagf_freeblks;
max_ag = ag; if (max_pag)
xfs_perag_rele(max_pag);
atomic_inc(&pag->pag_active_ref);
max_pag = pag;
} }
/* /*
@ -149,93 +101,73 @@ xfs_filestream_pick_ag(
* loop, and it guards against two filestreams being established * loop, and it guards against two filestreams being established
* in the same AG as each other. * in the same AG as each other.
*/ */
if (xfs_filestream_get_ag(mp, ag) > 1) { if (atomic_inc_return(&pag->pagf_fstrms) <= 1) {
xfs_filestream_put_ag(mp, ag); if (((minlen && *longest >= minlen) ||
goto next_ag;
}
longest = xfs_alloc_longest_free_extent(pag,
xfs_alloc_min_freelist(mp, pag),
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
if (((minlen && longest >= minlen) ||
(!minlen && pag->pagf_freeblks >= minfree)) && (!minlen && pag->pagf_freeblks >= minfree)) &&
(!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || (!xfs_perag_prefers_metadata(pag) ||
!(flags & XFS_PICK_USERDATA) ||
(flags & XFS_PICK_LOWSPACE))) { (flags & XFS_PICK_LOWSPACE))) {
/* Break out, retaining the reference on the AG. */ /* Break out, retaining the reference on the AG. */
free = pag->pagf_freeblks; free = pag->pagf_freeblks;
xfs_perag_put(pag);
*agp = ag;
break; break;
} }
}
/* Drop the reference on this AG, it's not usable. */ /* Drop the reference on this AG, it's not usable. */
xfs_filestream_put_ag(mp, ag); atomic_dec(&pag->pagf_fstrms);
next_ag:
xfs_perag_put(pag);
/* Move to the next AG, wrapping to AG 0 if necessary. */
if (++ag >= mp->m_sb.sb_agcount)
ag = 0;
/* If a full pass of the AGs hasn't been done yet, continue. */
if (ag != startag)
continue;
/* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */
if (trylock != 0) {
trylock = 0;
continue;
} }
/* Finally, if lowspace wasn't set, set it for the 3rd pass. */ if (err) {
if (!(flags & XFS_PICK_LOWSPACE)) { xfs_perag_rele(pag);
flags |= XFS_PICK_LOWSPACE; if (max_pag)
continue; xfs_perag_rele(max_pag);
return err;
}
if (!pag) {
/*
* Allow a second pass to give xfs_bmap_longest_free_extent()
* another attempt at locking AGFs that it might have skipped
* over before we fail.
*/
if (first_pass) {
first_pass = false;
goto restart;
} }
/* /*
* Take the AG with the most free space, regardless of whether * We must be low on data space, so run a final lowspace
* it's already in use by another filestream. * optimised selection pass if we haven't already.
*/ */
if (max_ag != NULLAGNUMBER) { if (!(flags & XFS_PICK_LOWSPACE)) {
xfs_filestream_get_ag(mp, max_ag); flags |= XFS_PICK_LOWSPACE;
free = maxfree; goto restart;
*agp = max_ag; }
/*
* No unassociated AGs are available, so select the AG with the
* most free space, regardless of whether it's already in use by
* another filestream. It none suit, just use whatever AG we can
* grab.
*/
if (!max_pag) {
for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
break; break;
atomic_inc(&args->pag->pagf_fstrms);
*longest = 0;
} else {
pag = max_pag;
free = maxfree;
atomic_inc(&pag->pagf_fstrms);
}
} else if (max_pag) {
xfs_perag_rele(max_pag);
} }
/* take AG 0 if none matched */ trace_xfs_filestream_pick(pag, pino, free);
trace_xfs_filestream_pick(ip, *agp, free, nscan); args->pag = pag;
*agp = 0;
return 0;
}
trace_xfs_filestream_pick(ip, *agp, free, nscan);
if (*agp == NULLAGNUMBER)
return 0; return 0;
err = -ENOMEM;
item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
if (!item)
goto out_put_ag;
item->ag = *agp;
err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
if (err) {
if (err == -EEXIST)
err = 0;
goto out_free_item;
}
return 0;
out_free_item:
kmem_free(item);
out_put_ag:
xfs_filestream_put_ag(mp, *agp);
return err;
} }
static struct xfs_inode * static struct xfs_inode *
@ -263,104 +195,187 @@ out:
} }
/* /*
* Find the right allocation group for a file, either by finding an * Lookup the mru cache for an existing association. If one exists and we can
* existing file stream or creating a new one. * use it, return with an active perag reference indicating that the allocation
* will proceed with that association.
* *
* Returns NULLAGNUMBER in case of an error. * If we have no association, or we cannot use the current one and have to
* destroy it, return with longest = 0 to tell the caller to create a new
* association.
*/ */
xfs_agnumber_t static int
xfs_filestream_lookup_ag( xfs_filestream_lookup_association(
struct xfs_inode *ip) struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_ino_t pino,
xfs_extlen_t *longest)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = args->mp;
struct xfs_inode *pip = NULL; struct xfs_perag *pag;
xfs_agnumber_t startag, ag = NULLAGNUMBER;
struct xfs_mru_cache_elem *mru; struct xfs_mru_cache_elem *mru;
int error = 0;
ASSERT(S_ISREG(VFS_I(ip)->i_mode)); *longest = 0;
mru = xfs_mru_cache_lookup(mp->m_filestream, pino);
pip = xfs_filestream_get_parent(ip); if (!mru)
if (!pip) return 0;
return NULLAGNUMBER; /*
* Grab the pag and take an extra active reference for the caller whilst
mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); * the mru item cannot go away. This means we'll pin the perag with
if (mru) { * the reference we get here even if the filestreams association is torn
ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; * down immediately after we mark the lookup as done.
*/
pag = container_of(mru, struct xfs_fstrm_item, mru)->pag;
atomic_inc(&pag->pag_active_ref);
xfs_mru_cache_done(mp->m_filestream); xfs_mru_cache_done(mp->m_filestream);
trace_xfs_filestream_lookup(mp, ip->i_ino, ag); trace_xfs_filestream_lookup(pag, ap->ip->i_ino);
goto out;
} ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0);
xfs_bmap_adjacent(ap);
/* /*
* Set the starting AG using the rotor for inode32, otherwise * If there is very little free space before we start a filestreams
* use the directory inode's AG. * allocation, we're almost guaranteed to fail to find a large enough
* free space available so just use the cached AG.
*/ */
if (xfs_is_inode32(mp)) { if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
xfs_agnumber_t rotorstep = xfs_rotorstep; *longest = 1;
startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; goto out_done;
mp->m_agfrotor = (mp->m_agfrotor + 1) %
(mp->m_sb.sb_agcount * rotorstep);
} else
startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
ag = NULLAGNUMBER;
out:
xfs_irele(pip);
return ag;
} }
/* error = xfs_bmap_longest_free_extent(pag, args->tp, longest);
* Pick a new allocation group for the current file and its file stream. if (error == -EAGAIN)
* error = 0;
* This is called when the allocator can't find a suitable extent in the if (error || *longest < args->maxlen) {
* current AG, and we have to move the stream into a new AG with more space. /* We aren't going to use this perag */
*/ *longest = 0;
int xfs_perag_rele(pag);
xfs_filestream_new_ag( return error;
}
out_done:
args->pag = pag;
return 0;
}
static int
xfs_filestream_create_association(
struct xfs_bmalloca *ap, struct xfs_bmalloca *ap,
xfs_agnumber_t *agp) struct xfs_alloc_arg *args,
xfs_ino_t pino,
xfs_extlen_t *longest)
{ {
struct xfs_inode *ip = ap->ip, *pip; struct xfs_mount *mp = args->mp;
struct xfs_mount *mp = ip->i_mount;
xfs_extlen_t minlen = ap->length;
xfs_agnumber_t startag = 0;
int flags = 0;
int err = 0;
struct xfs_mru_cache_elem *mru; struct xfs_mru_cache_elem *mru;
struct xfs_fstrm_item *item;
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, pino);
int flags = 0;
int error;
*agp = NULLAGNUMBER; /* Changing parent AG association now, so remove the existing one. */
mru = xfs_mru_cache_remove(mp->m_filestream, pino);
pip = xfs_filestream_get_parent(ip);
if (!pip)
goto exit;
mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
if (mru) { if (mru) {
struct xfs_fstrm_item *item = struct xfs_fstrm_item *item =
container_of(mru, struct xfs_fstrm_item, mru); container_of(mru, struct xfs_fstrm_item, mru);
startag = (item->ag + 1) % mp->m_sb.sb_agcount;
agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount;
xfs_fstrm_free_func(mp, mru);
} else if (xfs_is_inode32(mp)) {
xfs_agnumber_t rotorstep = xfs_rotorstep;
agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
mp->m_agfrotor = (mp->m_agfrotor + 1) %
(mp->m_sb.sb_agcount * rotorstep);
} }
ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
xfs_bmap_adjacent(ap);
if (ap->datatype & XFS_ALLOC_USERDATA) if (ap->datatype & XFS_ALLOC_USERDATA)
flags |= XFS_PICK_USERDATA; flags |= XFS_PICK_USERDATA;
if (ap->tp->t_flags & XFS_TRANS_LOWMODE) if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
flags |= XFS_PICK_LOWSPACE; flags |= XFS_PICK_LOWSPACE;
err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); *longest = ap->length;
error = xfs_filestream_pick_ag(args, pino, agno, flags, longest);
if (error)
return error;
/* /*
* Only free the item here so we skip over the old AG earlier. * We are going to use this perag now, so create an assoication for it.
* xfs_filestream_pick_ag() has already bumped the perag fstrms counter
* for us, so all we need to do here is take another active reference to
* the perag for the cached association.
*
* If we fail to store the association, we need to drop the fstrms
* counter as well as drop the perag reference we take here for the
* item. We do not need to return an error for this failure - as long as
* we return a referenced AG, the allocation can still go ahead just
* fine.
*/ */
if (mru) item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
xfs_fstrm_free_func(mp, mru); if (!item)
goto out_put_fstrms;
atomic_inc(&args->pag->pag_active_ref);
item->pag = args->pag;
error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
if (error)
goto out_free_item;
return 0;
out_free_item:
xfs_perag_rele(item->pag);
kmem_free(item);
out_put_fstrms:
atomic_dec(&args->pag->pagf_fstrms);
return 0;
}
/*
* Search for an allocation group with a single extent large enough for
* the request. First we look for an existing association and use that if it
* is found. Otherwise, we create a new association by selecting an AG that fits
* the allocation criteria.
*
* We return with a referenced perag in args->pag to indicate which AG we are
* allocating into or an error with no references held.
*/
int
xfs_filestream_select_ag(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t *longest)
{
struct xfs_mount *mp = args->mp;
struct xfs_inode *pip;
xfs_ino_t ino = 0;
int error = 0;
*longest = 0;
args->total = ap->total;
pip = xfs_filestream_get_parent(ap->ip);
if (pip) {
ino = pip->i_ino;
error = xfs_filestream_lookup_association(ap, args, ino,
longest);
xfs_irele(pip); xfs_irele(pip);
exit: if (error)
if (*agp == NULLAGNUMBER) return error;
*agp = 0; if (*longest >= args->maxlen)
return err; goto out_select;
if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
goto out_select;
}
error = xfs_filestream_create_association(ap, args, ino, longest);
if (error)
return error;
out_select:
ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0);
return 0;
} }
void void

View File

@ -9,13 +9,13 @@
struct xfs_mount; struct xfs_mount;
struct xfs_inode; struct xfs_inode;
struct xfs_bmalloca; struct xfs_bmalloca;
struct xfs_alloc_arg;
int xfs_filestream_mount(struct xfs_mount *mp); int xfs_filestream_mount(struct xfs_mount *mp);
void xfs_filestream_unmount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp);
void xfs_filestream_deassociate(struct xfs_inode *ip); void xfs_filestream_deassociate(struct xfs_inode *ip);
xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); int xfs_filestream_select_ag(struct xfs_bmalloca *ap,
int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); struct xfs_alloc_arg *args, xfs_extlen_t *blen);
int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno);
static inline int static inline int
xfs_inode_is_filestream( xfs_inode_is_filestream(

View File

@ -688,11 +688,11 @@ __xfs_getfsmap_datadev(
info->agf_bp = NULL; info->agf_bp = NULL;
} }
if (info->pag) { if (info->pag) {
xfs_perag_put(info->pag); xfs_perag_rele(info->pag);
info->pag = NULL; info->pag = NULL;
} else if (pag) { } else if (pag) {
/* loop termination case */ /* loop termination case */
xfs_perag_put(pag); xfs_perag_rele(pag);
} }
return error; return error;
@ -761,6 +761,7 @@ xfs_getfsmap_datadev_bnobt(
{ {
struct xfs_alloc_rec_incore akeys[2]; struct xfs_alloc_rec_incore akeys[2];
memset(akeys, 0, sizeof(akeys));
info->missing_owner = XFS_FMR_OWN_UNKNOWN; info->missing_owner = XFS_FMR_OWN_UNKNOWN;
return __xfs_getfsmap_datadev(tp, keys, info, return __xfs_getfsmap_datadev(tp, keys, info,
xfs_getfsmap_datadev_bnobt_query, &akeys[0]); xfs_getfsmap_datadev_bnobt_query, &akeys[0]);

View File

@ -255,7 +255,7 @@ xfs_perag_set_inode_tag(
break; break;
} }
trace_xfs_perag_set_inode_tag(mp, pag->pag_agno, tag, _RET_IP_); trace_xfs_perag_set_inode_tag(pag, _RET_IP_);
} }
/* Clear a tag on both the AG incore inode tree and the AG radix tree. */ /* Clear a tag on both the AG incore inode tree and the AG radix tree. */
@ -289,7 +289,7 @@ xfs_perag_clear_inode_tag(
radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag); radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag);
spin_unlock(&mp->m_perag_lock); spin_unlock(&mp->m_perag_lock);
trace_xfs_perag_clear_inode_tag(mp, pag->pag_agno, tag, _RET_IP_); trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
} }
/* /*
@ -586,7 +586,7 @@ xfs_iget_cache_miss(
if (!ip) if (!ip)
return -ENOMEM; return -ENOMEM;
error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags); error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags);
if (error) if (error)
goto out_destroy; goto out_destroy;
@ -1767,7 +1767,7 @@ xfs_icwalk(
if (error) { if (error) {
last_error = error; last_error = error;
if (error == -EFSCORRUPTED) { if (error == -EFSCORRUPTED) {
xfs_perag_put(pag); xfs_perag_rele(pag);
break; break;
} }
} }

View File

@ -1367,7 +1367,7 @@ xfs_itruncate_extents_flags(
unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1; unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
while (unmap_len > 0) { while (unmap_len > 0) {
ASSERT(tp->t_firstblock == NULLFSBLOCK); ASSERT(tp->t_highest_agno == NULLAGNUMBER);
error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len, error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
flags, XFS_ITRUNC_MAX_EXTENTS); flags, XFS_ITRUNC_MAX_EXTENTS);
if (error) if (error)

View File

@ -275,7 +275,7 @@ xfs_iwalk_ag_start(
/* Set up a fresh cursor and empty the inobt cache. */ /* Set up a fresh cursor and empty the inobt cache. */
iwag->nr_recs = 0; iwag->nr_recs = 0;
error = xfs_inobt_cur(mp, tp, pag, XFS_BTNUM_INO, curpp, agi_bpp); error = xfs_inobt_cur(pag, tp, XFS_BTNUM_INO, curpp, agi_bpp);
if (error) if (error)
return error; return error;
@ -390,7 +390,7 @@ xfs_iwalk_run_callbacks(
} }
/* ...and recreate the cursor just past where we left off. */ /* ...and recreate the cursor just past where we left off. */
error = xfs_inobt_cur(mp, iwag->tp, iwag->pag, XFS_BTNUM_INO, curpp, error = xfs_inobt_cur(iwag->pag, iwag->tp, XFS_BTNUM_INO, curpp,
agi_bpp); agi_bpp);
if (error) if (error)
return error; return error;
@ -591,7 +591,7 @@ xfs_iwalk(
} }
if (iwag.pag) if (iwag.pag)
xfs_perag_put(pag); xfs_perag_rele(pag);
xfs_iwalk_free(&iwag); xfs_iwalk_free(&iwag);
return error; return error;
} }
@ -683,7 +683,7 @@ xfs_iwalk_threaded(
break; break;
} }
if (pag) if (pag)
xfs_perag_put(pag); xfs_perag_rele(pag);
if (polled) if (polled)
xfs_pwork_poll(&pctl); xfs_pwork_poll(&pctl);
return xfs_pwork_destroy(&pctl); return xfs_pwork_destroy(&pctl);
@ -776,7 +776,7 @@ xfs_inobt_walk(
} }
if (iwag.pag) if (iwag.pag)
xfs_perag_put(pag); xfs_perag_rele(pag);
xfs_iwalk_free(&iwag); xfs_iwalk_free(&iwag);
return error; return error;
} }

View File

@ -210,8 +210,7 @@ typedef struct xfs_mount {
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
struct xstats m_stats; /* per-fs stats */ struct xstats m_stats; /* per-fs stats */
xfs_agnumber_t m_agfrotor; /* last ag where space found */ xfs_agnumber_t m_agfrotor; /* last ag where space found */
xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ atomic_t m_agirotor; /* last ag dir inode alloced */
spinlock_t m_agirotor_lock;/* .. and lock protecting it */
/* Memory shrinker to throttle and reprioritize inodegc */ /* Memory shrinker to throttle and reprioritize inodegc */
struct shrinker m_inodegc_shrinker; struct shrinker m_inodegc_shrinker;

View File

@ -610,7 +610,7 @@ xfs_reflink_cancel_cow_blocks(
if (error) if (error)
break; break;
} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
ASSERT((*tpp)->t_firstblock == NULLFSBLOCK); ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
/* Free the CoW orphan record. */ /* Free the CoW orphan record. */
xfs_refcount_free_cow_extent(*tpp, del.br_startblock, xfs_refcount_free_cow_extent(*tpp, del.br_startblock,
@ -927,7 +927,7 @@ xfs_reflink_recover_cow(
for_each_perag(mp, agno, pag) { for_each_perag(mp, agno, pag) {
error = xfs_refcount_recover_cow_leftovers(mp, pag); error = xfs_refcount_recover_cow_leftovers(mp, pag);
if (error) { if (error) {
xfs_perag_put(pag); xfs_perag_rele(pag);
break; break;
} }
} }

View File

@ -247,6 +247,32 @@ xfs_fs_show_options(
return 0; return 0;
} }
static bool
xfs_set_inode_alloc_perag(
struct xfs_perag *pag,
xfs_ino_t ino,
xfs_agnumber_t max_metadata)
{
if (!xfs_is_inode32(pag->pag_mount)) {
set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
return false;
}
if (ino > XFS_MAXINUMBER_32) {
clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
return false;
}
set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
if (pag->pag_agno < max_metadata)
set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
else
clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
return true;
}
/* /*
* Set parameters for inode allocation heuristics, taking into account * Set parameters for inode allocation heuristics, taking into account
* filesystem size and inode32/inode64 mount options; i.e. specifically * filesystem size and inode32/inode64 mount options; i.e. specifically
@ -310,24 +336,8 @@ xfs_set_inode_alloc(
ino = XFS_AGINO_TO_INO(mp, index, agino); ino = XFS_AGINO_TO_INO(mp, index, agino);
pag = xfs_perag_get(mp, index); pag = xfs_perag_get(mp, index);
if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
if (xfs_is_inode32(mp)) {
if (ino > XFS_MAXINUMBER_32) {
pag->pagi_inodeok = 0;
pag->pagf_metadata = 0;
} else {
pag->pagi_inodeok = 1;
maxagi++; maxagi++;
if (index < max_metadata)
pag->pagf_metadata = 1;
else
pag->pagf_metadata = 0;
}
} else {
pag->pagi_inodeok = 1;
pag->pagf_metadata = 0;
}
xfs_perag_put(pag); xfs_perag_put(pag);
} }
@ -1922,7 +1932,6 @@ static int xfs_init_fs_context(
return -ENOMEM; return -ENOMEM;
spin_lock_init(&mp->m_sb_lock); spin_lock_init(&mp->m_sb_lock);
spin_lock_init(&mp->m_agirotor_lock);
INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
spin_lock_init(&mp->m_perag_lock); spin_lock_init(&mp->m_perag_lock);
mutex_init(&mp->m_growlock); mutex_init(&mp->m_growlock);

View File

@ -74,6 +74,7 @@ struct xfs_inobt_rec_incore;
union xfs_btree_ptr; union xfs_btree_ptr;
struct xfs_dqtrx; struct xfs_dqtrx;
struct xfs_icwalk; struct xfs_icwalk;
struct xfs_perag;
#define XFS_ATTR_FILTER_FLAGS \ #define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \ { XFS_ATTR_ROOT, "ROOT" }, \
@ -159,36 +160,40 @@ TRACE_EVENT(xlog_intent_recovery_failed,
); );
DECLARE_EVENT_CLASS(xfs_perag_class, DECLARE_EVENT_CLASS(xfs_perag_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip),
unsigned long caller_ip), TP_ARGS(pag, caller_ip),
TP_ARGS(mp, agno, refcount, caller_ip),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_agnumber_t, agno) __field(xfs_agnumber_t, agno)
__field(int, refcount) __field(int, refcount)
__field(int, active_refcount)
__field(unsigned long, caller_ip) __field(unsigned long, caller_ip)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = mp->m_super->s_dev; __entry->dev = pag->pag_mount->m_super->s_dev;
__entry->agno = agno; __entry->agno = pag->pag_agno;
__entry->refcount = refcount; __entry->refcount = atomic_read(&pag->pag_ref);
__entry->active_refcount = atomic_read(&pag->pag_active_ref);
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
), ),
TP_printk("dev %d:%d agno 0x%x refcount %d caller %pS", TP_printk("dev %d:%d agno 0x%x passive refs %d active refs %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno, __entry->agno,
__entry->refcount, __entry->refcount,
__entry->active_refcount,
(char *)__entry->caller_ip) (char *)__entry->caller_ip)
); );
#define DEFINE_PERAG_REF_EVENT(name) \ #define DEFINE_PERAG_REF_EVENT(name) \
DEFINE_EVENT(xfs_perag_class, name, \ DEFINE_EVENT(xfs_perag_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \
unsigned long caller_ip), \ TP_ARGS(pag, caller_ip))
TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get); DEFINE_PERAG_REF_EVENT(xfs_perag_get);
DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
@ -634,8 +639,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
DECLARE_EVENT_CLASS(xfs_filestream_class, DECLARE_EVENT_CLASS(xfs_filestream_class,
TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
TP_ARGS(mp, ino, agno), TP_ARGS(pag, ino),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_ino_t, ino) __field(xfs_ino_t, ino)
@ -643,10 +648,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
__field(int, streams) __field(int, streams)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = mp->m_super->s_dev; __entry->dev = pag->pag_mount->m_super->s_dev;
__entry->ino = ino; __entry->ino = ino;
__entry->agno = agno; __entry->agno = pag->pag_agno;
__entry->streams = xfs_filestream_peek_ag(mp, agno); __entry->streams = atomic_read(&pag->pagf_fstrms);
), ),
TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d", TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
@ -656,39 +661,40 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
) )
#define DEFINE_FILESTREAM_EVENT(name) \ #define DEFINE_FILESTREAM_EVENT(name) \
DEFINE_EVENT(xfs_filestream_class, name, \ DEFINE_EVENT(xfs_filestream_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \ TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), \
TP_ARGS(mp, ino, agno)) TP_ARGS(pag, ino))
DEFINE_FILESTREAM_EVENT(xfs_filestream_free); DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
TRACE_EVENT(xfs_filestream_pick, TRACE_EVENT(xfs_filestream_pick,
TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno, TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free),
xfs_extlen_t free, int nscan), TP_ARGS(pag, ino, free),
TP_ARGS(ip, agno, free, nscan),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_ino_t, ino) __field(xfs_ino_t, ino)
__field(xfs_agnumber_t, agno) __field(xfs_agnumber_t, agno)
__field(int, streams) __field(int, streams)
__field(xfs_extlen_t, free) __field(xfs_extlen_t, free)
__field(int, nscan)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->dev = pag->pag_mount->m_super->s_dev;
__entry->ino = ip->i_ino; __entry->ino = ino;
__entry->agno = agno; if (pag) {
__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); __entry->agno = pag->pag_agno;
__entry->streams = atomic_read(&pag->pagf_fstrms);
} else {
__entry->agno = NULLAGNUMBER;
__entry->streams = 0;
}
__entry->free = free; __entry->free = free;
__entry->nscan = nscan;
), ),
TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d nscan %d", TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino, __entry->ino,
__entry->agno, __entry->agno,
__entry->streams, __entry->streams,
__entry->free, __entry->free)
__entry->nscan)
); );
DECLARE_EVENT_CLASS(xfs_lock_class, DECLARE_EVENT_CLASS(xfs_lock_class,
@ -1795,13 +1801,11 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__field(xfs_extlen_t, alignment) __field(xfs_extlen_t, alignment)
__field(xfs_extlen_t, minalignslop) __field(xfs_extlen_t, minalignslop)
__field(xfs_extlen_t, len) __field(xfs_extlen_t, len)
__field(short, type)
__field(short, otype)
__field(char, wasdel) __field(char, wasdel)
__field(char, wasfromfl) __field(char, wasfromfl)
__field(int, resv) __field(int, resv)
__field(int, datatype) __field(int, datatype)
__field(xfs_fsblock_t, firstblock) __field(xfs_agnumber_t, highest_agno)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = args->mp->m_super->s_dev; __entry->dev = args->mp->m_super->s_dev;
@ -1816,18 +1820,16 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__entry->alignment = args->alignment; __entry->alignment = args->alignment;
__entry->minalignslop = args->minalignslop; __entry->minalignslop = args->minalignslop;
__entry->len = args->len; __entry->len = args->len;
__entry->type = args->type;
__entry->otype = args->otype;
__entry->wasdel = args->wasdel; __entry->wasdel = args->wasdel;
__entry->wasfromfl = args->wasfromfl; __entry->wasfromfl = args->wasfromfl;
__entry->resv = args->resv; __entry->resv = args->resv;
__entry->datatype = args->datatype; __entry->datatype = args->datatype;
__entry->firstblock = args->tp->t_firstblock; __entry->highest_agno = args->tp->t_highest_agno;
), ),
TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u " TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u "
"prod %u minleft %u total %u alignment %u minalignslop %u " "prod %u minleft %u total %u alignment %u minalignslop %u "
"len %u type %s otype %s wasdel %d wasfromfl %d resv %d " "len %u wasdel %d wasfromfl %d resv %d "
"datatype 0x%x firstblock 0x%llx", "datatype 0x%x highest_agno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno, __entry->agno,
__entry->agbno, __entry->agbno,
@ -1840,13 +1842,11 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__entry->alignment, __entry->alignment,
__entry->minalignslop, __entry->minalignslop,
__entry->len, __entry->len,
__print_symbolic(__entry->type, XFS_ALLOC_TYPES),
__print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
__entry->wasdel, __entry->wasdel,
__entry->wasfromfl, __entry->wasfromfl,
__entry->resv, __entry->resv,
__entry->datatype, __entry->datatype,
(unsigned long long)__entry->firstblock) __entry->highest_agno)
) )
#define DEFINE_ALLOC_EVENT(name) \ #define DEFINE_ALLOC_EVENT(name) \
@ -1877,6 +1877,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
DEFINE_ALLOC_EVENT(xfs_alloc_small_done); DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
DEFINE_ALLOC_EVENT(xfs_alloc_small_error); DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_skip_deadlock);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);

View File

@ -102,7 +102,7 @@ xfs_trans_dup(
INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_items);
INIT_LIST_HEAD(&ntp->t_busy); INIT_LIST_HEAD(&ntp->t_busy);
INIT_LIST_HEAD(&ntp->t_dfops); INIT_LIST_HEAD(&ntp->t_dfops);
ntp->t_firstblock = NULLFSBLOCK; ntp->t_highest_agno = NULLAGNUMBER;
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(tp->t_ticket != NULL); ASSERT(tp->t_ticket != NULL);
@ -278,7 +278,7 @@ retry:
INIT_LIST_HEAD(&tp->t_items); INIT_LIST_HEAD(&tp->t_items);
INIT_LIST_HEAD(&tp->t_busy); INIT_LIST_HEAD(&tp->t_busy);
INIT_LIST_HEAD(&tp->t_dfops); INIT_LIST_HEAD(&tp->t_dfops);
tp->t_firstblock = NULLFSBLOCK; tp->t_highest_agno = NULLAGNUMBER;
error = xfs_trans_reserve(tp, resp, blocks, rtextents); error = xfs_trans_reserve(tp, resp, blocks, rtextents);
if (error == -ENOSPC && want_retry) { if (error == -ENOSPC && want_retry) {
@ -1078,10 +1078,10 @@ xfs_trans_cancel(
/* /*
* It's never valid to cancel a transaction with deferred ops attached, * It's never valid to cancel a transaction with deferred ops attached,
* because the transaction is effectively dirty. Complain about this * because the transaction is effectively dirty. Complain about this
* loudly before freeing the in-memory defer items. * loudly before freeing the in-memory defer items and shutting down the
* filesystem.
*/ */
if (!list_empty(&tp->t_dfops)) { if (!list_empty(&tp->t_dfops)) {
ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
dirty = true; dirty = true;
xfs_defer_cancel(tp); xfs_defer_cancel(tp);

View File

@ -132,7 +132,7 @@ typedef struct xfs_trans {
unsigned int t_rtx_res; /* # of rt extents resvd */ unsigned int t_rtx_res; /* # of rt extents resvd */
unsigned int t_rtx_res_used; /* # of resvd rt extents used */ unsigned int t_rtx_res_used; /* # of resvd rt extents used */
unsigned int t_flags; /* misc flags */ unsigned int t_flags; /* misc flags */
xfs_fsblock_t t_firstblock; /* first block allocated */ xfs_agnumber_t t_highest_agno; /* highest AGF locked */
struct xlog_ticket *t_ticket; /* log mgr ticket */ struct xlog_ticket *t_ticket; /* log mgr ticket */
struct xfs_mount *t_mountp; /* ptr to fs mount struct */ struct xfs_mount *t_mountp; /* ptr to fs mount struct */
struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */