2018-06-05 19:42:14 -07:00
// SPDX-License-Identifier: GPL-2.0
2005-04-16 15:20:36 -07:00
/*
2005-11-02 14:58:39 +11:00
* Copyright ( c ) 2000 - 2005 Silicon Graphics , Inc .
* All Rights Reserved .
2005-04-16 15:20:36 -07:00
*/
# include "xfs.h"
2005-11-02 14:38:42 +11:00
# include "xfs_fs.h"
2013-10-23 10:36:05 +11:00
# include "xfs_shared.h"
2013-10-23 10:50:10 +11:00
# include "xfs_format.h"
# include "xfs_log_format.h"
# include "xfs_trans_resv.h"
2005-11-02 14:38:42 +11:00
# include "xfs_bit.h"
2005-04-16 15:20:36 -07:00
# include "xfs_mount.h"
# include "xfs_inode.h"
# include "xfs_bmap.h"
2013-10-23 10:51:50 +11:00
# include "xfs_bmap_btree.h"
2023-12-18 05:57:21 +01:00
# include "xfs_bmap_util.h"
2013-10-23 10:50:10 +11:00
# include "xfs_trans.h"
2005-04-16 15:20:36 -07:00
# include "xfs_trans_space.h"
2012-10-08 21:56:11 +11:00
# include "xfs_icache.h"
2013-10-15 09:17:56 +11:00
# include "xfs_rtalloc.h"
2020-10-07 13:57:52 -07:00
# include "xfs_sb.h"
2023-10-16 09:21:47 -07:00
# include "xfs_rtbitmap.h"
2023-12-18 05:57:21 +01:00
# include "xfs_quota.h"
2024-02-22 12:32:44 -08:00
# include "xfs_log_priv.h"
# include "xfs_health.h"
2005-04-16 15:20:36 -07:00
/*
2013-10-15 09:17:56 +11:00
* Return whether there are any free extents in the size range given
* by low and high , for the bitmap block bbno .
2005-04-16 15:20:36 -07:00
*/
2023-10-16 09:54:19 -07:00
STATIC int
2013-10-15 09:17:56 +11:00
xfs_rtany_summary (
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args * args ,
int low , /* low log2 extent size */
int high , /* high log2 extent size */
xfs_fileoff_t bbno , /* bitmap block number */
2023-10-16 10:43:42 -07:00
int * maxlog ) /* out: max log2 extent size free */
2005-04-16 15:20:36 -07:00
{
2023-10-16 09:54:19 -07:00
struct xfs_mount * mp = args - > mp ;
int error ;
int log ; /* loop counter, log2 of ext. size */
xfs_suminfo_t sum ; /* summary data */
2005-04-16 15:20:36 -07:00
2023-10-16 10:41:55 -07:00
/* There are no extents at levels >= m_rsum_cache[bbno]. */
if ( mp - > m_rsum_cache ) {
high = min ( high , mp - > m_rsum_cache [ bbno ] - 1 ) ;
if ( low > high ) {
2023-10-16 10:43:42 -07:00
* maxlog = - 1 ;
2023-10-16 10:41:55 -07:00
return 0 ;
}
}
xfs: cache minimum realtime summary level
The realtime summary is a two-dimensional array on disk, effectively:
u32 rsum[log2(number of realtime extents) + 1][number of blocks in the bitmap]
rsum[log][bbno] is the number of extents of size 2**log which start in
bitmap block bbno.
xfs_rtallocate_extent_near() uses xfs_rtany_summary() to check whether
rsum[log][bbno] != 0 for any log level. However, the summary array is
stored in row-major order (i.e., like an array in C), so all of these
entries are not adjacent, but rather spread across the entire summary
file. In the worst case (a full bitmap block), xfs_rtany_summary() has
to check every level.
This means that on a moderately-used realtime device, an allocation will
waste a lot of time finding, reading, and releasing buffers for the
realtime summary. In particular, one of our storage services (which runs
on servers with 8 very slow CPUs and 15 8 TB XFS realtime filesystems)
spends almost 5% of its CPU cycles in xfs_rtbuf_get() and
xfs_trans_brelse() called from xfs_rtany_summary().
One solution would be to also store the summary with the dimensions
swapped. However, this would require a disk format change to a very old
component of XFS.
Instead, we can cache the minimum size which contains any extents. We do
so lazily; rather than guaranteeing that the cache contains the precise
minimum, it always contains a loose lower bound which we tighten when we
read or update a summary block. This only uses a few kilobytes of memory
and is already serialized via the realtime bitmap and summary inode
locks, so the cost is minimal. With this change, the same workload only
spends 0.2% of its CPU cycles in the realtime allocator.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-12-12 08:46:32 -08:00
2005-04-16 15:20:36 -07:00
/*
xfs: cache minimum realtime summary level
The realtime summary is a two-dimensional array on disk, effectively:
u32 rsum[log2(number of realtime extents) + 1][number of blocks in the bitmap]
rsum[log][bbno] is the number of extents of size 2**log which start in
bitmap block bbno.
xfs_rtallocate_extent_near() uses xfs_rtany_summary() to check whether
rsum[log][bbno] != 0 for any log level. However, the summary array is
stored in row-major order (i.e., like an array in C), so all of these
entries are not adjacent, but rather spread across the entire summary
file. In the worst case (a full bitmap block), xfs_rtany_summary() has
to check every level.
This means that on a moderately-used realtime device, an allocation will
waste a lot of time finding, reading, and releasing buffers for the
realtime summary. In particular, one of our storage services (which runs
on servers with 8 very slow CPUs and 15 8 TB XFS realtime filesystems)
spends almost 5% of its CPU cycles in xfs_rtbuf_get() and
xfs_trans_brelse() called from xfs_rtany_summary().
One solution would be to also store the summary with the dimensions
swapped. However, this would require a disk format change to a very old
component of XFS.
Instead, we can cache the minimum size which contains any extents. We do
so lazily; rather than guaranteeing that the cache contains the precise
minimum, it always contains a loose lower bound which we tighten when we
read or update a summary block. This only uses a few kilobytes of memory
and is already serialized via the realtime bitmap and summary inode
locks, so the cost is minimal. With this change, the same workload only
spends 0.2% of its CPU cycles in the realtime allocator.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-12-12 08:46:32 -08:00
* Loop over logs of extent sizes .
2005-04-16 15:20:36 -07:00
*/
2023-10-16 10:41:55 -07:00
for ( log = high ; log > = low ; log - - ) {
2005-04-16 15:20:36 -07:00
/*
2013-10-15 09:17:56 +11:00
* Get one summary datum .
2005-04-16 15:20:36 -07:00
*/
2023-10-16 10:13:22 -07:00
error = xfs_rtget_summary ( args , log , bbno , & sum ) ;
2005-04-16 15:20:36 -07:00
if ( error ) {
return error ;
}
/*
2013-10-15 09:17:56 +11:00
* If there are any , return success .
2005-04-16 15:20:36 -07:00
*/
2013-10-15 09:17:56 +11:00
if ( sum ) {
2023-10-16 10:43:42 -07:00
* maxlog = log ;
xfs: cache minimum realtime summary level
The realtime summary is a two-dimensional array on disk, effectively:
u32 rsum[log2(number of realtime extents) + 1][number of blocks in the bitmap]
rsum[log][bbno] is the number of extents of size 2**log which start in
bitmap block bbno.
xfs_rtallocate_extent_near() uses xfs_rtany_summary() to check whether
rsum[log][bbno] != 0 for any log level. However, the summary array is
stored in row-major order (i.e., like an array in C), so all of these
entries are not adjacent, but rather spread across the entire summary
file. In the worst case (a full bitmap block), xfs_rtany_summary() has
to check every level.
This means that on a moderately-used realtime device, an allocation will
waste a lot of time finding, reading, and releasing buffers for the
realtime summary. In particular, one of our storage services (which runs
on servers with 8 very slow CPUs and 15 8 TB XFS realtime filesystems)
spends almost 5% of its CPU cycles in xfs_rtbuf_get() and
xfs_trans_brelse() called from xfs_rtany_summary().
One solution would be to also store the summary with the dimensions
swapped. However, this would require a disk format change to a very old
component of XFS.
Instead, we can cache the minimum size which contains any extents. We do
so lazily; rather than guaranteeing that the cache contains the precise
minimum, it always contains a loose lower bound which we tighten when we
read or update a summary block. This only uses a few kilobytes of memory
and is already serialized via the realtime bitmap and summary inode
locks, so the cost is minimal. With this change, the same workload only
spends 0.2% of its CPU cycles in the realtime allocator.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-12-12 08:46:32 -08:00
goto out ;
2005-04-16 15:20:36 -07:00
}
}
/*
2013-10-15 09:17:56 +11:00
* Found nothing , return failure .
2005-04-16 15:20:36 -07:00
*/
2023-10-16 10:43:42 -07:00
* maxlog = - 1 ;
xfs: cache minimum realtime summary level
The realtime summary is a two-dimensional array on disk, effectively:
u32 rsum[log2(number of realtime extents) + 1][number of blocks in the bitmap]
rsum[log][bbno] is the number of extents of size 2**log which start in
bitmap block bbno.
xfs_rtallocate_extent_near() uses xfs_rtany_summary() to check whether
rsum[log][bbno] != 0 for any log level. However, the summary array is
stored in row-major order (i.e., like an array in C), so all of these
entries are not adjacent, but rather spread across the entire summary
file. In the worst case (a full bitmap block), xfs_rtany_summary() has
to check every level.
This means that on a moderately-used realtime device, an allocation will
waste a lot of time finding, reading, and releasing buffers for the
realtime summary. In particular, one of our storage services (which runs
on servers with 8 very slow CPUs and 15 8 TB XFS realtime filesystems)
spends almost 5% of its CPU cycles in xfs_rtbuf_get() and
xfs_trans_brelse() called from xfs_rtany_summary().
One solution would be to also store the summary with the dimensions
swapped. However, this would require a disk format change to a very old
component of XFS.
Instead, we can cache the minimum size which contains any extents. We do
so lazily; rather than guaranteeing that the cache contains the precise
minimum, it always contains a loose lower bound which we tighten when we
read or update a summary block. This only uses a few kilobytes of memory
and is already serialized via the realtime bitmap and summary inode
locks, so the cost is minimal. With this change, the same workload only
spends 0.2% of its CPU cycles in the realtime allocator.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-12-12 08:46:32 -08:00
out :
2023-10-16 10:41:55 -07:00
/* There were no extents at levels > log. */
if ( mp - > m_rsum_cache & & log + 1 < mp - > m_rsum_cache [ bbno ] )
mp - > m_rsum_cache [ bbno ] = log + 1 ;
2013-10-15 09:17:56 +11:00
return 0 ;
}
2005-04-16 15:20:36 -07:00
2013-10-15 09:17:56 +11:00
/*
* Copy and transform the summary file , given the old and new
* parameters in the mount structures .
*/
2023-10-16 09:54:19 -07:00
STATIC int
2013-10-15 09:17:56 +11:00
xfs_rtcopy_summary (
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args * oargs ,
struct xfs_rtalloc_args * nargs )
2013-10-15 09:17:56 +11:00
{
2023-10-16 09:54:19 -07:00
xfs_fileoff_t bbno ; /* bitmap block number */
int error ;
int log ; /* summary level number (log length) */
xfs_suminfo_t sum ; /* summary data */
2013-10-15 09:17:56 +11:00
2023-10-16 09:54:19 -07:00
for ( log = oargs - > mp - > m_rsumlevels - 1 ; log > = 0 ; log - - ) {
for ( bbno = oargs - > mp - > m_sb . sb_rbmblocks - 1 ;
2013-10-15 09:17:56 +11:00
( xfs_srtblock_t ) bbno > = 0 ;
bbno - - ) {
2023-10-16 10:13:22 -07:00
error = xfs_rtget_summary ( oargs , log , bbno , & sum ) ;
2013-10-15 09:17:56 +11:00
if ( error )
2023-10-16 10:13:22 -07:00
goto out ;
2013-10-15 09:17:56 +11:00
if ( sum = = 0 )
continue ;
2023-10-16 10:13:22 -07:00
error = xfs_rtmodify_summary ( oargs , log , bbno , - sum ) ;
2013-10-15 09:17:56 +11:00
if ( error )
2023-10-16 10:13:22 -07:00
goto out ;
error = xfs_rtmodify_summary ( nargs , log , bbno , sum ) ;
2013-10-15 09:17:56 +11:00
if ( error )
2023-10-16 10:13:22 -07:00
goto out ;
2013-10-15 09:17:56 +11:00
ASSERT ( sum > 0 ) ;
}
}
2023-10-16 10:13:22 -07:00
error = 0 ;
out :
xfs_rtbuf_cache_relse ( oargs ) ;
2013-10-15 09:17:56 +11:00
return 0 ;
}
/*
* Mark an extent specified by start and len allocated .
* Updates all the summary information as well as the bitmap .
*/
2023-10-16 09:54:19 -07:00
STATIC int
2013-10-15 09:17:56 +11:00
xfs_rtallocate_range (
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args * args ,
xfs_rtxnum_t start , /* start rtext to allocate */
2023-10-16 10:13:22 -07:00
xfs_rtxlen_t len ) /* in/out: summary block number */
2013-10-15 09:17:56 +11:00
{
2023-10-16 09:54:19 -07:00
struct xfs_mount * mp = args - > mp ;
xfs_rtxnum_t end ; /* end of the allocated rtext */
int error ;
xfs_rtxnum_t postblock = 0 ; /* first rtext allocated > end */
xfs_rtxnum_t preblock = 0 ; /* first rtext allocated < start */
2013-10-15 09:17:56 +11:00
end = start + len - 1 ;
/*
* Assume we ' re allocating out of the middle of a free extent .
* We need to find the beginning and end of the extent so we can
* properly update the summary .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtfind_back ( args , start , 0 , & preblock ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2013-10-15 09:17:56 +11:00
return error ;
2023-12-18 05:57:22 +01:00
2013-10-15 09:17:56 +11:00
/*
* Find the next allocated block ( end of free extent ) .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtfind_forw ( args , end , mp - > m_sb . sb_rextents - 1 ,
2023-10-16 10:13:22 -07:00
& postblock ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2013-10-15 09:17:56 +11:00
return error ;
2023-12-18 05:57:22 +01:00
2013-10-15 09:17:56 +11:00
/*
* Decrement the summary information corresponding to the entire
* ( old ) free extent .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtmodify_summary ( args ,
2023-12-18 05:57:33 +01:00
xfs_highbit64 ( postblock + 1 - preblock ) ,
2023-10-16 10:13:22 -07:00
xfs_rtx_to_rbmblock ( mp , preblock ) , - 1 ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2013-10-15 09:17:56 +11:00
return error ;
2023-12-18 05:57:22 +01:00
2013-10-15 09:17:56 +11:00
/*
* If there are blocks not being allocated at the front of the
* old extent , add summary data for them to be free .
*/
if ( preblock < start ) {
2023-10-16 09:54:19 -07:00
error = xfs_rtmodify_summary ( args ,
2023-12-18 05:57:33 +01:00
xfs_highbit64 ( start - preblock ) ,
2023-10-16 10:13:22 -07:00
xfs_rtx_to_rbmblock ( mp , preblock ) , 1 ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2013-10-15 09:17:56 +11:00
return error ;
}
2023-12-18 05:57:22 +01:00
2013-10-15 09:17:56 +11:00
/*
* If there are blocks not being allocated at the end of the
* old extent , add summary data for them to be free .
*/
if ( postblock > end ) {
2023-10-16 09:54:19 -07:00
error = xfs_rtmodify_summary ( args ,
2023-12-18 05:57:33 +01:00
xfs_highbit64 ( postblock - end ) ,
2023-10-16 10:13:22 -07:00
xfs_rtx_to_rbmblock ( mp , end + 1 ) , 1 ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2013-10-15 09:17:56 +11:00
return error ;
}
2023-12-18 05:57:22 +01:00
2013-10-15 09:17:56 +11:00
/*
* Modify the bitmap to mark this extent allocated .
*/
2023-12-18 05:57:22 +01:00
return xfs_rtmodify_range ( args , start , len , 0 ) ;
2013-10-15 09:17:56 +11:00
}
2023-10-16 09:21:38 -07:00
/*
* Make sure we don ' t run off the end of the rt volume . Be careful that
* adjusting maxlen downwards doesn ' t cause us to fail the alignment checks .
*/
2023-10-16 09:31:11 -07:00
static inline xfs_rtxlen_t
2023-10-16 09:21:38 -07:00
xfs_rtallocate_clamp_len (
struct xfs_mount * mp ,
2023-10-16 09:32:45 -07:00
xfs_rtxnum_t startrtx ,
2023-10-16 09:31:11 -07:00
xfs_rtxlen_t rtxlen ,
xfs_rtxlen_t prod )
2023-10-16 09:21:38 -07:00
{
2023-10-16 09:31:11 -07:00
xfs_rtxlen_t ret ;
2023-10-16 09:21:38 -07:00
ret = min ( mp - > m_sb . sb_rextents , startrtx + rtxlen ) - startrtx ;
return rounddown ( ret , prod ) ;
}
2013-10-15 09:17:56 +11:00
/*
* Attempt to allocate an extent minlen < = len < = maxlen starting from
* bitmap block bbno . If we don ' t get maxlen then use prod to trim
2023-10-16 09:32:45 -07:00
* the length , if given . Returns error ; returns starting block in * rtx .
2013-10-15 09:17:56 +11:00
* The lengths are all in rtextents .
*/
2023-10-16 09:54:19 -07:00
STATIC int
2013-10-15 09:17:56 +11:00
xfs_rtallocate_extent_block (
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args * args ,
xfs_fileoff_t bbno , /* bitmap block number */
xfs_rtxlen_t minlen , /* minimum length to allocate */
xfs_rtxlen_t maxlen , /* maximum length to allocate */
xfs_rtxlen_t * len , /* out: actual length allocated */
xfs_rtxnum_t * nextp , /* out: next rtext to try */
xfs_rtxlen_t prod , /* extent product factor */
xfs_rtxnum_t * rtx ) /* out: start rtext allocated */
2013-10-15 09:17:56 +11:00
{
2023-10-16 09:54:19 -07:00
struct xfs_mount * mp = args - > mp ;
xfs_rtxnum_t besti ; /* best rtext found so far */
xfs_rtxnum_t bestlen ; /* best length found so far */
xfs_rtxnum_t end ; /* last rtext in chunk */
int error ;
xfs_rtxnum_t i ; /* current rtext trying */
xfs_rtxnum_t next ; /* next rtext to try */
int stat ; /* status from internal calls */
2013-10-15 09:17:56 +11:00
/*
* Loop over all the extents starting in this bitmap block ,
* looking for one that ' s long enough .
*/
2023-10-16 09:44:13 -07:00
for ( i = xfs_rbmblock_to_rtx ( mp , bbno ) , besti = - 1 , bestlen = 0 ,
end = xfs_rbmblock_to_rtx ( mp , bbno + 1 ) - 1 ;
2013-10-15 09:17:56 +11:00
i < = end ;
i + + ) {
2020-09-09 14:21:06 -07:00
/* Make sure we don't scan off the end of the rt volume. */
2023-10-16 09:21:38 -07:00
maxlen = xfs_rtallocate_clamp_len ( mp , i , maxlen , prod ) ;
2020-09-09 14:21:06 -07:00
2013-10-15 09:17:56 +11:00
/*
* See if there ' s a free extent of maxlen starting at i .
* If it ' s not so then next will contain the first non - free .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtcheck_range ( args , i , maxlen , 1 , & next , & stat ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2013-10-15 09:17:56 +11:00
return error ;
if ( stat ) {
/*
* i for maxlen is all free , allocate and return that .
*/
2023-12-18 05:57:30 +01:00
bestlen = maxlen ;
besti = i ;
goto allocate ;
2013-10-15 09:17:56 +11:00
}
2023-12-18 05:57:30 +01:00
2013-10-15 09:17:56 +11:00
/*
* In the case where we have a variable - sized allocation
* request , figure out how big this free piece is ,
* and if it ' s big enough for the minimum , and the best
* so far , remember it .
*/
if ( minlen < maxlen ) {
2023-10-16 09:32:45 -07:00
xfs_rtxnum_t thislen ; /* this extent size */
2013-10-15 09:17:56 +11:00
thislen = next - i ;
if ( thislen > = minlen & & thislen > bestlen ) {
besti = i ;
bestlen = thislen ;
}
}
/*
* If not done yet , find the start of the next free space .
*/
2023-12-18 05:57:28 +01:00
if ( next > = end )
2013-10-15 09:17:56 +11:00
break ;
2023-12-18 05:57:28 +01:00
error = xfs_rtfind_forw ( args , next , end , & i ) ;
if ( error )
return error ;
2013-10-15 09:17:56 +11:00
}
2023-12-18 05:57:29 +01:00
2013-10-15 09:17:56 +11:00
/*
* Searched the whole thing & didn ' t find a maxlen free extent .
*/
2023-12-18 05:57:29 +01:00
if ( minlen > maxlen | | besti = = - 1 ) {
2013-10-15 09:17:56 +11:00
/*
2023-12-18 05:57:29 +01:00
* Allocation failed . Set * nextp to the next block to try .
2013-10-15 09:17:56 +11:00
*/
2023-12-18 05:57:29 +01:00
* nextp = next ;
return - ENOSPC ;
}
2018-06-08 09:54:22 -07:00
2023-12-18 05:57:29 +01:00
/*
* If size should be a multiple of prod , make that so .
*/
if ( prod > 1 ) {
xfs_rtxlen_t p ; /* amount to trim length by */
div_u64_rem ( bestlen , prod , & p ) ;
if ( p )
bestlen - = p ;
2005-04-16 15:20:36 -07:00
}
2023-12-18 05:57:29 +01:00
2005-04-16 15:20:36 -07:00
/*
2023-12-18 05:57:29 +01:00
* Allocate besti for bestlen & return that .
2005-04-16 15:20:36 -07:00
*/
2023-12-18 05:57:30 +01:00
allocate :
2023-12-18 05:57:29 +01:00
error = xfs_rtallocate_range ( args , besti , bestlen ) ;
if ( error )
return error ;
* len = bestlen ;
* rtx = besti ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
/*
* Allocate an extent of length minlen < = len < = maxlen , starting at block
* bno . If we don ' t get maxlen then use prod to trim the length , if given .
2023-10-16 09:32:45 -07:00
* Returns error ; returns starting block in * rtx .
2005-04-16 15:20:36 -07:00
* The lengths are all in rtextents .
*/
2023-10-16 09:54:19 -07:00
STATIC int
2005-04-16 15:20:36 -07:00
xfs_rtallocate_extent_exact (
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args * args ,
xfs_rtxnum_t start , /* starting rtext number to allocate */
xfs_rtxlen_t minlen , /* minimum length to allocate */
xfs_rtxlen_t maxlen , /* maximum length to allocate */
xfs_rtxlen_t * len , /* out: actual length allocated */
xfs_rtxlen_t prod , /* extent product factor */
xfs_rtxnum_t * rtx ) /* out: start rtext allocated */
2005-04-16 15:20:36 -07:00
{
2023-10-16 09:54:19 -07:00
int error ;
xfs_rtxlen_t i ; /* extent length trimmed due to prod */
int isfree ; /* extent is free */
xfs_rtxnum_t next ; /* next rtext to try (dummy) */
2005-04-16 15:20:36 -07:00
2023-10-16 09:21:38 -07:00
ASSERT ( minlen % prod = = 0 ) ;
ASSERT ( maxlen % prod = = 0 ) ;
2005-04-16 15:20:36 -07:00
/*
* Check if the range in question ( for maxlen ) is free .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtcheck_range ( args , start , maxlen , 1 , & next , & isfree ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2005-04-16 15:20:36 -07:00
return error ;
2023-12-18 05:57:22 +01:00
2023-12-18 05:57:31 +01:00
if ( ! isfree ) {
2005-04-16 15:20:36 -07:00
/*
2023-12-18 05:57:31 +01:00
* If not , allocate what there is , if it ' s at least minlen .
2005-04-16 15:20:36 -07:00
*/
2023-12-18 05:57:31 +01:00
maxlen = next - start ;
2023-12-18 05:57:22 +01:00
if ( maxlen < minlen )
return - ENOSPC ;
2023-12-18 05:57:31 +01:00
/*
* Trim off tail of extent , if prod is specified .
*/
if ( prod > 1 & & ( i = maxlen % prod ) ) {
maxlen - = i ;
if ( maxlen < minlen )
return - ENOSPC ;
}
2005-04-16 15:20:36 -07:00
}
2023-12-18 05:57:31 +01:00
2005-04-16 15:20:36 -07:00
/*
* Allocate what we can and return it .
*/
2023-10-16 10:13:22 -07:00
error = xfs_rtallocate_range ( args , start , maxlen ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2005-04-16 15:20:36 -07:00
return error ;
* len = maxlen ;
2023-10-16 09:32:45 -07:00
* rtx = start ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
/*
* Allocate an extent of length minlen < = len < = maxlen , starting as near
2023-10-16 09:32:45 -07:00
* to start as possible . If we don ' t get maxlen then use prod to trim
2005-04-16 15:20:36 -07:00
* the length , if given . The lengths are all in rtextents .
*/
2023-10-16 09:54:19 -07:00
STATIC int
2005-04-16 15:20:36 -07:00
xfs_rtallocate_extent_near (
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args * args ,
xfs_rtxnum_t start , /* starting rtext number to allocate */
xfs_rtxlen_t minlen , /* minimum length to allocate */
xfs_rtxlen_t maxlen , /* maximum length to allocate */
xfs_rtxlen_t * len , /* out: actual length allocated */
xfs_rtxlen_t prod , /* extent product factor */
xfs_rtxnum_t * rtx ) /* out: start rtext allocated */
2005-04-16 15:20:36 -07:00
{
2023-10-16 09:54:19 -07:00
struct xfs_mount * mp = args - > mp ;
2023-10-16 10:43:42 -07:00
int maxlog ; /* max useful extent from summary */
2023-10-16 09:54:19 -07:00
xfs_fileoff_t bbno ; /* bitmap block number */
int error ;
int i ; /* bitmap block offset (loop control) */
int j ; /* secondary loop control */
int log2len ; /* log2 of minlen */
xfs_rtxnum_t n ; /* next rtext to try */
2005-04-16 15:20:36 -07:00
2023-10-16 09:21:38 -07:00
ASSERT ( minlen % prod = = 0 ) ;
ASSERT ( maxlen % prod = = 0 ) ;
2005-04-16 15:20:36 -07:00
/*
* If the block number given is off the end , silently set it to
* the last block .
*/
2023-10-16 09:32:45 -07:00
if ( start > = mp - > m_sb . sb_rextents )
start = mp - > m_sb . sb_rextents - 1 ;
2020-09-09 14:21:06 -07:00
/* Make sure we don't run off the end of the rt volume. */
2023-10-16 09:32:45 -07:00
maxlen = xfs_rtallocate_clamp_len ( mp , start , maxlen , prod ) ;
2023-12-18 05:57:22 +01:00
if ( maxlen < minlen )
return - ENOSPC ;
2020-09-09 14:21:06 -07:00
2005-04-16 15:20:36 -07:00
/*
* Try the exact allocation first .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtallocate_extent_exact ( args , start , minlen , maxlen , len ,
2023-12-18 05:57:22 +01:00
prod , rtx ) ;
if ( error ! = - ENOSPC )
2005-04-16 15:20:36 -07:00
return error ;
2023-12-18 05:57:22 +01:00
2023-10-16 09:44:13 -07:00
bbno = xfs_rtx_to_rbmblock ( mp , start ) ;
2005-04-16 15:20:36 -07:00
i = 0 ;
xfs: don't try redundant allocations in xfs_rtallocate_extent_near()
xfs_rtallocate_extent_near() tries to find a free extent as close to a
target bitmap block given by bbno as possible, which may be before or
after bbno. Searching backwards has a complication: the realtime summary
accounts for free space _starting_ in a bitmap block, but not straddling
or ending in a bitmap block. So, when the negative search finds a free
extent in the realtime summary, in order to end up closer to the target,
it looks for the end of the free extent. For example, if bbno - 2 has a
free extent, then it will check bbno - 1, then bbno - 2. But then if
bbno - 3 has a free extent, it will check bbno - 1 again, then bbno - 2
again, and then bbno - 3. This results in a quadratic loop, which is
completely pointless since the repeated checks won't find anything new.
Fix it by remembering where we last checked up to and continue from
there. This also obviates the need for a check of the realtime summary.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-10-16 10:47:04 -07:00
j = - 1 ;
2008-08-13 15:41:12 +10:00
ASSERT ( minlen ! = 0 ) ;
2005-04-16 15:20:36 -07:00
log2len = xfs_highbit32 ( minlen ) ;
/*
* Loop over all bitmap blocks ( bbno + i is current block ) .
*/
for ( ; ; ) {
/*
* Get summary information of extents of all useful levels
* starting in this bitmap block .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtany_summary ( args , log2len , mp - > m_rsumlevels - 1 ,
2023-10-16 10:43:42 -07:00
bbno + i , & maxlog ) ;
2023-12-18 05:57:22 +01:00
if ( error )
2005-04-16 15:20:36 -07:00
return error ;
2023-12-18 05:57:22 +01:00
2005-04-16 15:20:36 -07:00
/*
* If there are any useful extents starting here , try
* allocating one .
*/
2023-10-16 10:43:42 -07:00
if ( maxlog > = 0 ) {
2023-10-16 10:45:46 -07:00
xfs_extlen_t maxavail =
min_t ( xfs_rtblock_t , maxlen ,
( 1ULL < < ( maxlog + 1 ) ) - 1 ) ;
2005-04-16 15:20:36 -07:00
/*
* On the positive side of the starting location .
*/
if ( i > = 0 ) {
/*
* Try to allocate an extent starting in
* this block .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtallocate_extent_block ( args ,
2023-10-16 10:45:46 -07:00
bbno + i , minlen , maxavail , len ,
2023-12-18 05:57:22 +01:00
& n , prod , rtx ) ;
if ( error ! = - ENOSPC )
2005-04-16 15:20:36 -07:00
return error ;
}
/*
* On the negative side of the starting location .
*/
else { /* i < 0 */
2023-10-16 10:48:50 -07:00
int maxblocks ;
2005-04-16 15:20:36 -07:00
/*
2023-10-16 10:48:50 -07:00
* Loop backwards to find the end of the extent
* we found in the realtime summary .
*
* maxblocks is the maximum possible number of
* bitmap blocks from the start of the extent
* to the end of the extent .
2005-04-16 15:20:36 -07:00
*/
2023-10-16 10:48:50 -07:00
if ( maxlog = = 0 )
maxblocks = 0 ;
else if ( maxlog < mp - > m_blkbit_log )
maxblocks = 1 ;
else
maxblocks = 2 < < ( maxlog - mp - > m_blkbit_log ) ;
2005-04-16 15:20:36 -07:00
/*
2023-10-16 10:48:50 -07:00
* We need to check bbno + i + maxblocks down to
* bbno + i . We already checked bbno down to
* bbno + j + 1 , so we don ' t need to check those
* again .
2005-04-16 15:20:36 -07:00
*/
2023-10-16 10:48:50 -07:00
j = min ( i + maxblocks , j ) ;
xfs: don't try redundant allocations in xfs_rtallocate_extent_near()
xfs_rtallocate_extent_near() tries to find a free extent as close to a
target bitmap block given by bbno as possible, which may be before or
after bbno. Searching backwards has a complication: the realtime summary
accounts for free space _starting_ in a bitmap block, but not straddling
or ending in a bitmap block. So, when the negative search finds a free
extent in the realtime summary, in order to end up closer to the target,
it looks for the end of the free extent. For example, if bbno - 2 has a
free extent, then it will check bbno - 1, then bbno - 2. But then if
bbno - 3 has a free extent, it will check bbno - 1 again, then bbno - 2
again, and then bbno - 3. This results in a quadratic loop, which is
completely pointless since the repeated checks won't find anything new.
Fix it by remembering where we last checked up to and continue from
there. This also obviates the need for a check of the realtime summary.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-10-16 10:47:04 -07:00
for ( ; j > = i ; j - - ) {
2023-10-16 09:54:19 -07:00
error = xfs_rtallocate_extent_block ( args ,
2023-10-16 10:13:22 -07:00
bbno + j , minlen ,
2023-10-16 10:45:46 -07:00
maxavail , len , & n , prod ,
2023-12-18 05:57:22 +01:00
rtx ) ;
if ( error ! = - ENOSPC )
2005-04-16 15:20:36 -07:00
return error ;
}
}
}
/*
* Loop control . If we were on the positive side , and there ' s
* still more blocks on the negative side , go there .
*/
if ( i > 0 & & ( int ) bbno - i > = 0 )
i = - i ;
/*
* If positive , and no more negative , but there are more
* positive , go there .
*/
else if ( i > 0 & & ( int ) bbno + i < mp - > m_sb . sb_rbmblocks - 1 )
i + + ;
/*
* If negative or 0 ( just started ) , and there are positive
* blocks to go , go there . The 0 case moves to block 1.
*/
else if ( i < = 0 & & ( int ) bbno - i < mp - > m_sb . sb_rbmblocks - 1 )
i = 1 - i ;
/*
* If negative or 0 and there are more negative blocks ,
* go there .
*/
else if ( i < = 0 & & ( int ) bbno + i > 0 )
i - - ;
/*
* Must be done . Return failure .
*/
else
break ;
}
2023-12-18 05:57:22 +01:00
return - ENOSPC ;
2005-04-16 15:20:36 -07:00
}
2023-12-18 05:57:32 +01:00
static int
xfs_rtalloc_sumlevel (
struct xfs_rtalloc_args * args ,
int l , /* level number */
xfs_rtxlen_t minlen , /* minimum length to allocate */
xfs_rtxlen_t maxlen , /* maximum length to allocate */
xfs_rtxlen_t prod , /* extent product factor */
xfs_rtxlen_t * len , /* out: actual length allocated */
xfs_rtxnum_t * rtx ) /* out: start rtext allocated */
{
xfs_fileoff_t i ; /* bitmap block number */
for ( i = 0 ; i < args - > mp - > m_sb . sb_rbmblocks ; i + + ) {
xfs_suminfo_t sum ; /* summary information for extents */
xfs_rtxnum_t n ; /* next rtext to be tried */
int error ;
error = xfs_rtget_summary ( args , l , i , & sum ) ;
if ( error )
return error ;
/*
* Nothing there , on to the next block .
*/
if ( ! sum )
continue ;
/*
* Try allocating the extent .
*/
error = xfs_rtallocate_extent_block ( args , i , minlen , maxlen ,
len , & n , prod , rtx ) ;
if ( error ! = - ENOSPC )
return error ;
/*
* If the " next block to try " returned from the allocator is
* beyond the next bitmap block , skip to that bitmap block .
*/
if ( xfs_rtx_to_rbmblock ( args - > mp , n ) > i + 1 )
i = xfs_rtx_to_rbmblock ( args - > mp , n ) - 1 ;
}
return - ENOSPC ;
}
2005-04-16 15:20:36 -07:00
/*
* Allocate an extent of length minlen < = len < = maxlen , with no position
* specified . If we don ' t get maxlen then use prod to trim
* the length , if given . The lengths are all in rtextents .
*/
2023-10-16 09:54:19 -07:00
STATIC int
2005-04-16 15:20:36 -07:00
xfs_rtallocate_extent_size (
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args * args ,
xfs_rtxlen_t minlen , /* minimum length to allocate */
xfs_rtxlen_t maxlen , /* maximum length to allocate */
xfs_rtxlen_t * len , /* out: actual length allocated */
xfs_rtxlen_t prod , /* extent product factor */
xfs_rtxnum_t * rtx ) /* out: start rtext allocated */
2005-04-16 15:20:36 -07:00
{
2023-10-16 09:54:19 -07:00
int error ;
int l ; /* level number (loop control) */
2005-04-16 15:20:36 -07:00
2023-10-16 09:21:38 -07:00
ASSERT ( minlen % prod = = 0 ) ;
ASSERT ( maxlen % prod = = 0 ) ;
2008-08-13 15:41:12 +10:00
ASSERT ( maxlen ! = 0 ) ;
2005-04-16 15:20:36 -07:00
/*
* Loop over all the levels starting with maxlen .
2023-12-18 05:57:32 +01:00
*
* At each level , look at all the bitmap blocks , to see if there are
* extents starting there that are long enough ( > = maxlen ) .
*
* Note , only on the initial level can the allocation fail if the
* summary says there ' s an extent .
2005-04-16 15:20:36 -07:00
*/
2023-12-18 05:57:32 +01:00
for ( l = xfs_highbit32 ( maxlen ) ; l < args - > mp - > m_rsumlevels ; l + + ) {
error = xfs_rtalloc_sumlevel ( args , l , minlen , maxlen , prod , len ,
rtx ) ;
if ( error ! = - ENOSPC )
return error ;
2005-04-16 15:20:36 -07:00
}
2023-12-18 05:57:32 +01:00
2005-04-16 15:20:36 -07:00
/*
2023-12-18 05:57:32 +01:00
* Didn ' t find any maxlen blocks . Try smaller ones , unless we are
* looking for a fixed size extent .
2005-04-16 15:20:36 -07:00
*/
2023-12-18 05:57:22 +01:00
if ( minlen > - - maxlen )
return - ENOSPC ;
2013-10-15 09:17:56 +11:00
ASSERT ( minlen ! = 0 ) ;
ASSERT ( maxlen ! = 0 ) ;
2005-04-16 15:20:36 -07:00
/*
2013-10-15 09:17:56 +11:00
* Loop over sizes , from maxlen down to minlen .
2023-12-18 05:57:32 +01:00
*
* This time , when we do the allocations , allow smaller ones to succeed ,
* but make sure the specified minlen / maxlen are in the possible range
* for this summary level .
2005-04-16 15:20:36 -07:00
*/
2013-10-15 09:17:56 +11:00
for ( l = xfs_highbit32 ( maxlen ) ; l > = xfs_highbit32 ( minlen ) ; l - - ) {
2023-12-18 05:57:34 +01:00
error = xfs_rtalloc_sumlevel ( args , l ,
max_t ( xfs_rtxlen_t , minlen , 1 < < l ) ,
min_t ( xfs_rtxlen_t , maxlen , ( 1 < < ( l + 1 ) ) - 1 ) ,
prod , len , rtx ) ;
2023-12-18 05:57:32 +01:00
if ( error ! = - ENOSPC )
return error ;
2005-04-16 15:20:36 -07:00
}
2023-12-18 05:57:32 +01:00
2023-12-18 05:57:22 +01:00
return - ENOSPC ;
2005-04-16 15:20:36 -07:00
}
/*
2013-10-15 09:17:56 +11:00
* Allocate space to the bitmap or summary file , and zero it , for growfs .
2005-04-16 15:20:36 -07:00
*/
2015-08-19 10:01:40 +10:00
STATIC int
2013-10-15 09:17:56 +11:00
xfs_growfs_rt_alloc (
2015-08-19 10:01:40 +10:00
struct xfs_mount * mp , /* file system mount point */
xfs_extlen_t oblocks , /* old count of blocks */
xfs_extlen_t nblocks , /* new count of blocks */
struct xfs_inode * ip ) /* inode (bitmap/summary) */
2005-04-16 15:20:36 -07:00
{
2015-08-19 10:01:40 +10:00
xfs_fileoff_t bno ; /* block number in file */
struct xfs_buf * bp ; /* temporary buffer for zeroing */
xfs_daddr_t d ; /* disk block address */
int error ; /* error return value */
xfs_fsblock_t fsbno ; /* filesystem block for bno */
struct xfs_bmbt_irec map ; /* block map output */
int nmap ; /* number of block maps */
int resblks ; /* space reservation */
2020-09-15 20:50:42 -07:00
enum xfs_blft buf_type ;
2015-08-19 10:01:40 +10:00
struct xfs_trans * tp ;
2005-04-16 15:20:36 -07:00
2020-09-15 20:50:42 -07:00
if ( ip = = mp - > m_rsumip )
buf_type = XFS_BLFT_RTSUMMARY_BUF ;
else
buf_type = XFS_BLFT_RTBITMAP_BUF ;
2005-04-16 15:20:36 -07:00
/*
2013-10-15 09:17:56 +11:00
* Allocate space to the file , as necessary .
2005-04-16 15:20:36 -07:00
*/
2013-10-15 09:17:56 +11:00
while ( oblocks < nblocks ) {
resblks = XFS_GROWFSRT_SPACE_RES ( mp , nblocks - oblocks ) ;
2005-04-16 15:20:36 -07:00
/*
2013-10-15 09:17:56 +11:00
* Reserve space & log for one extent added to the file .
2005-04-16 15:20:36 -07:00
*/
2016-04-06 09:19:55 +10:00
error = xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_growrtalloc , resblks ,
0 , 0 , & tp ) ;
2013-10-15 09:17:56 +11:00
if ( error )
2016-04-06 09:19:55 +10:00
return error ;
2005-04-16 15:20:36 -07:00
/*
2013-10-15 09:17:56 +11:00
* Lock the inode .
2005-04-16 15:20:36 -07:00
*/
2013-10-15 09:17:56 +11:00
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
xfs_trans_ijoin ( tp , ip , XFS_ILOCK_EXCL ) ;
2021-01-22 16:48:11 -08:00
error = xfs_iext_count_may_overflow ( ip , XFS_DATA_FORK ,
XFS_IEXT_ADD_NOSPLIT_CNT ) ;
2022-03-09 07:49:36 +00:00
if ( error = = - EFBIG )
error = xfs_iext_count_upgrade ( tp , ip ,
XFS_IEXT_ADD_NOSPLIT_CNT ) ;
2021-01-22 16:48:11 -08:00
if ( error )
goto out_trans_cancel ;
2013-10-15 09:17:56 +11:00
/*
* Allocate blocks to the bitmap file .
*/
nmap = 1 ;
error = xfs_bmapi_write ( tp , ip , oblocks , nblocks - oblocks ,
xfs: don't set bmapi total block req where minleft is
xfs_bmapi_write() takes a total block requirement parameter that is
passed down to the block allocation code and is used to specify the
total block requirement of the associated transaction. This is used
to try and select an AG that can not only satisfy the requested
extent allocation, but can also accommodate subsequent allocations
that might be required to complete the transaction. For example,
additional bmbt block allocations may be required on insertion of
the resulting extent to an inode data fork.
While it's important for callers to calculate and reserve such extra
blocks in the transaction, it is not necessary to pass the total
value to xfs_bmapi_write() in all cases. The latter automatically
sets minleft to ensure that sufficient free blocks remain after the
allocation attempt to expand the format of the associated inode
(i.e., such as extent to btree conversion, btree splits, etc).
Therefore, any callers that pass a total block requirement of the
bmap mapping length plus worst case bmbt expansion essentially
specify the additional reservation requirement twice. These callers
can pass a total of zero to rely on the bmapi minleft policy.
Beyond being superfluous, the primary motivation for this change is
that the total reservation logic in the bmbt code is dubious in
scenarios where minlen < maxlen and a maxlen extent cannot be
allocated (which is more common for data extent allocations where
contiguity is not required). The total value is based on maxlen in
the xfs_bmapi_write() caller. If the bmbt code falls back to an
allocation between minlen and maxlen, that allocation will not
succeed until total is reset to minlen, which essentially throws
away any additional reservation included in total by the caller. In
addition, the total value is not reset until after alignment is
dropped, which means that such callers drop alignment far too
aggressively than necessary.
Update all callers of xfs_bmapi_write() that pass a total block
value of the mapping length plus bmbt reservation to instead pass
zero and rely on xfs_bmapi_minleft() to enforce the bmbt reservation
requirement. This trades off slightly less conservative AG selection
for the ability to preserve alignment in more scenarios.
xfs_bmapi_write() callers that incorporate unrelated or additional
reservations in total beyond what is already included in minleft
must continue to use the former.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-10-21 09:26:48 -07:00
XFS_BMAPI_METADATA , 0 , & map , & nmap ) ;
2013-10-15 09:17:56 +11:00
if ( ! error & & nmap < 1 )
2014-06-25 14:58:08 +10:00
error = - ENOSPC ;
2013-10-15 09:17:56 +11:00
if ( error )
2018-07-24 13:43:13 -07:00
goto out_trans_cancel ;
2013-10-15 09:17:56 +11:00
/*
* Free any blocks freed up in the transaction , then commit .
*/
2015-06-04 13:48:08 +10:00
error = xfs_trans_commit ( tp ) ;
2013-10-15 09:17:56 +11:00
if ( error )
2015-08-19 10:01:40 +10:00
return error ;
2013-10-15 09:17:56 +11:00
/*
* Now we need to clear the allocated blocks .
* Do this one block per transaction , to keep it simple .
*/
for ( bno = map . br_startoff , fsbno = map . br_startblock ;
bno < map . br_startoff + map . br_blockcount ;
bno + + , fsbno + + ) {
/*
* Reserve log for one block zeroing .
*/
2016-04-06 09:19:55 +10:00
error = xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_growrtzero ,
0 , 0 , 0 , & tp ) ;
2013-10-15 09:17:56 +11:00
if ( error )
2016-04-06 09:19:55 +10:00
return error ;
2013-10-15 09:17:56 +11:00
/*
* Lock the bitmap inode .
*/
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
xfs_trans_ijoin ( tp , ip , XFS_ILOCK_EXCL ) ;
/*
* Get a buffer for the block .
*/
d = XFS_FSB_TO_DADDR ( mp , fsbno ) ;
2020-01-23 17:01:18 -08:00
error = xfs_trans_get_buf ( tp , mp - > m_ddev_targp , d ,
mp - > m_bsize , 0 , & bp ) ;
if ( error )
2015-08-19 10:01:40 +10:00
goto out_trans_cancel ;
2020-09-15 20:50:42 -07:00
xfs_trans_buf_set_type ( tp , bp , buf_type ) ;
xfs: Set xfs_buf's b_ops member when zeroing bitmap/summary files
In xfs_growfs_rt(), we enlarge bitmap and summary files by allocating
new blocks for both files. For each of the new blocks allocated, we
allocate an xfs_buf, zero the payload, log the contents and commit the
transaction. Hence these buffers will eventually find themselves
appended to list at xfs_ail->ail_buf_list.
Later, xfs_growfs_rt() loops across all of the new blocks belonging to
the bitmap inode to set the bitmap values to 1. In doing so, it
allocates a new transaction and invokes the following sequence of
functions,
- xfs_rtfree_range()
- xfs_rtmodify_range()
- xfs_rtbuf_get()
We pass '&xfs_rtbuf_ops' as the ops pointer to xfs_trans_read_buf().
- xfs_trans_read_buf()
We find the xfs_buf of interest in per-ag hash table, invoke
xfs_buf_reverify() which ends up assigning '&xfs_rtbuf_ops' to
xfs_buf->b_ops.
On the other hand, if xfs_growfs_rt_alloc() had allocated a few blocks
for the bitmap inode and returned with an error, all the xfs_bufs
corresponding to the new bitmap blocks that have been allocated would
continue to be on xfs_ail->ail_buf_list list without ever having a
non-NULL value assigned to their b_ops members. An AIL flush operation
would then trigger the following warning message to be printed on the
console,
XFS (loop0): _xfs_buf_ioapply: no buf ops on daddr 0x58 len 8
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
CPU: 3 PID: 449 Comm: xfsaild/loop0 Not tainted 5.8.0-rc4-chandan-00038-g4d8c2b9de9ab-dirty #37
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
Call Trace:
dump_stack+0x57/0x70
_xfs_buf_ioapply+0x37c/0x3b0
? xfs_rw_bdev+0x1e0/0x1e0
? xfs_buf_delwri_submit_buffers+0xd4/0x210
__xfs_buf_submit+0x6d/0x1f0
xfs_buf_delwri_submit_buffers+0xd4/0x210
xfsaild+0x2c8/0x9e0
? __switch_to_asm+0x42/0x70
? xfs_trans_ail_cursor_first+0x80/0x80
kthread+0xfe/0x140
? kthread_park+0x90/0x90
ret_from_fork+0x22/0x30
This message indicates that the xfs_buf had its b_ops member set to
NULL.
This commit fixes the issue by assigning "&xfs_rtbuf_ops" to b_ops
member of each of the xfs_bufs logged by xfs_growfs_rt_alloc().
Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2020-09-17 11:12:08 -07:00
bp - > b_ops = & xfs_rtbuf_ops ;
2013-10-15 09:17:56 +11:00
memset ( bp - > b_addr , 0 , mp - > m_sb . sb_blocksize ) ;
xfs_trans_log_buf ( tp , bp , 0 , mp - > m_sb . sb_blocksize - 1 ) ;
/*
* Commit the transaction .
*/
2015-06-04 13:48:08 +10:00
error = xfs_trans_commit ( tp ) ;
2013-10-15 09:17:56 +11:00
if ( error )
2015-08-19 10:01:40 +10:00
return error ;
2005-04-16 15:20:36 -07:00
}
2013-10-15 09:17:56 +11:00
/*
* Go on to the next extent , if any .
*/
oblocks = map . br_startoff + map . br_blockcount ;
2005-04-16 15:20:36 -07:00
}
2015-08-19 10:01:40 +10:00
2005-04-16 15:20:36 -07:00
return 0 ;
2013-10-15 09:17:56 +11:00
2015-08-19 10:01:40 +10:00
out_trans_cancel :
xfs_trans_cancel ( tp ) ;
2013-10-15 09:17:56 +11:00
return error ;
2005-04-16 15:20:36 -07:00
}
2018-12-21 18:45:18 -08:00
static void
xfs_alloc_rsum_cache (
xfs_mount_t * mp , /* file system mount structure */
xfs_extlen_t rbmblocks ) /* number of rt bitmap blocks */
{
/*
2023-10-16 10:41:55 -07:00
* The rsum cache is initialized to the maximum value , which is
* trivially an upper bound on the maximum level with any free extents .
* We can continue without the cache if it couldn ' t be allocated .
2018-12-21 18:45:18 -08:00
*/
2023-10-16 10:41:55 -07:00
mp - > m_rsum_cache = kvmalloc ( rbmblocks , GFP_KERNEL ) ;
if ( mp - > m_rsum_cache )
memset ( mp - > m_rsum_cache , - 1 , rbmblocks ) ;
else
2018-12-21 18:45:18 -08:00
xfs_warn ( mp , " could not allocate realtime summary cache " ) ;
}
2005-04-16 15:20:36 -07:00
/*
* Visible ( exported ) functions .
*/
/*
* Grow the realtime area of the filesystem .
*/
int
xfs_growfs_rt (
xfs_mount_t * mp , /* mount point for filesystem */
xfs_growfs_rt_t * in ) /* growfs rt input struct */
{
2023-10-16 09:31:11 -07:00
xfs_fileoff_t bmbno ; /* bitmap block number */
2020-12-16 16:07:34 -08:00
struct xfs_buf * bp ; /* temporary buffer */
2005-04-16 15:20:36 -07:00
int error ; /* error return value */
xfs_mount_t * nmp ; /* new (fake) mount structure */
2014-07-30 09:12:05 +10:00
xfs_rfsblock_t nrblocks ; /* new number of realtime blocks */
2005-04-16 15:20:36 -07:00
xfs_extlen_t nrbmblocks ; /* new number of rt bitmap blocks */
2023-10-16 09:32:45 -07:00
xfs_rtxnum_t nrextents ; /* new number of realtime extents */
2005-04-16 15:20:36 -07:00
uint8_t nrextslog ; /* new log2 of sb_rextents */
xfs_extlen_t nrsumblocks ; /* new number of summary blocks */
uint nrsumlevels ; /* new rt summary levels */
uint nrsumsize ; /* new size of rt summary, bytes */
xfs_sb_t * nsbp ; /* new superblock */
xfs_extlen_t rbmblocks ; /* current number of rt bitmap blocks */
xfs_extlen_t rsumblocks ; /* current number of rt summary blks */
xfs_sb_t * sbp ; /* old superblock */
2018-12-21 18:45:18 -08:00
uint8_t * rsum_cache ; /* old summary cache */
2005-04-16 15:20:36 -07:00
sbp = & mp - > m_sb ;
2021-07-12 12:58:48 -07:00
2008-11-25 21:20:06 -06:00
if ( ! capable ( CAP_SYS_ADMIN ) )
2014-06-25 14:58:08 +10:00
return - EPERM ;
2021-07-12 12:58:48 -07:00
/* Needs to have been mounted with an rt device. */
if ( ! XFS_IS_REALTIME_MOUNT ( mp ) )
return - EINVAL ;
/*
* Mount should fail if the rt bitmap / summary files don ' t load , but
* we ' ll check anyway .
*/
if ( ! mp - > m_rbmip | | ! mp - > m_rsumip )
return - EINVAL ;
/* Shrink not supported. */
if ( in - > newblocks < = sbp - > sb_rblocks )
return - EINVAL ;
/* Can only change rt extent size when adding rt volume. */
if ( sbp - > sb_rblocks > 0 & & in - > extsize ! = sbp - > sb_rextsize )
return - EINVAL ;
/* Range check the extent size. */
if ( XFS_FSB_TO_B ( mp , in - > extsize ) > XFS_MAX_RTEXTSIZE | |
XFS_FSB_TO_B ( mp , in - > extsize ) < XFS_MIN_RTEXTSIZE )
2014-06-25 14:58:08 +10:00
return - EINVAL ;
2021-07-12 12:58:48 -07:00
/* Unsupported realtime features. */
2023-10-16 09:21:05 -07:00
if ( xfs_has_rmapbt ( mp ) | | xfs_has_reflink ( mp ) | | xfs_has_quota ( mp ) )
2021-07-12 12:58:48 -07:00
return - EOPNOTSUPP ;
nrblocks = in - > newblocks ;
error = xfs_sb_validate_fsb_count ( sbp , nrblocks ) ;
if ( error )
2007-05-14 18:24:02 +10:00
return error ;
2005-04-16 15:20:36 -07:00
/*
* Read in the last block of the device , make sure it exists .
*/
2014-10-02 09:05:32 +10:00
error = xfs_buf_read_uncached ( mp - > m_rtdev_targp ,
2010-09-22 10:47:20 +10:00
XFS_FSB_TO_BB ( mp , nrblocks - 1 ) ,
2014-10-02 09:05:32 +10:00
XFS_FSB_TO_BB ( mp , 1 ) , 0 , & bp , NULL ) ;
if ( error )
2012-11-12 22:54:02 +11:00
return error ;
2005-04-16 15:20:36 -07:00
xfs_buf_relse ( bp ) ;
2010-09-22 10:47:20 +10:00
2005-04-16 15:20:36 -07:00
/*
* Calculate new parameters . These are the final values to be reached .
*/
nrextents = nrblocks ;
do_div ( nrextents , in - > extsize ) ;
2023-12-01 09:24:18 -08:00
if ( ! xfs_validate_rtextents ( nrextents ) )
return - EINVAL ;
2023-10-16 09:48:20 -07:00
nrbmblocks = xfs_rtbitmap_blockcount ( mp , nrextents ) ;
xfs: make rextslog computation consistent with mkfs
There's a weird discrepancy in xfsprogs dating back to the creation of
the Linux port -- if there are zero rt extents, mkfs will set
sb_rextents and sb_rextslog both to zero:
sbp->sb_rextslog =
(uint8_t)(rtextents ?
libxfs_highbit32((unsigned int)rtextents) : 0);
However, that's not the check that xfs_repair uses for nonzero rtblocks:
if (sb->sb_rextslog !=
libxfs_highbit32((unsigned int)sb->sb_rextents))
The difference here is that xfs_highbit32 returns -1 if its argument is
zero. Unfortunately, this means that in the weird corner case of a
realtime volume shorter than 1 rt extent, xfs_repair will immediately
flag a freshly formatted filesystem as corrupt. Because mkfs has been
writing ondisk artifacts like this for decades, we have to accept that
as "correct". TBH, zero rextslog for zero rtextents makes more sense to
me anyway.
Regrettably, the superblock verifier checks created in commit copied
xfs_repair even though mkfs has been writing out such filesystems for
ages. Fix the superblock verifier to accept what mkfs spits out; the
userspace version of this patch will have to fix xfs_repair as well.
Note that the new helper leaves the zeroday bug where the upper 32 bits
of sb_rextents is ripped off and fed to highbit32. This leads to a
seriously undersized rt summary file, which immediately breaks mkfs:
$ hugedisk.sh foo /dev/sdc $(( 0x100000080 * 4096))B
$ /sbin/mkfs.xfs -f /dev/sda -m rmapbt=0,reflink=0 -r rtdev=/dev/mapper/foo
meta-data=/dev/sda isize=512 agcount=4, agsize=1298176 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=1, sparse=1, rmapbt=0
= reflink=0 bigtime=1 inobtcount=1 nrext64=1
data = bsize=4096 blocks=5192704, imaxpct=25
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
log =internal log bsize=4096 blocks=16384, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =/dev/mapper/foo extsz=4096 blocks=4294967424, rtextents=4294967424
Discarding blocks...Done.
mkfs.xfs: Error initializing the realtime space [117 - Structure needs cleaning]
The next patch will drop support for rt volumes with fewer than 1 or
more than 2^32-1 rt extents, since they've clearly been broken forever.
Fixes: f8e566c0f5e1f ("xfs: validate the realtime geometry in xfs_validate_sb_common")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-01 09:17:40 -08:00
nrextslog = xfs_compute_rextslog ( nrextents ) ;
2005-04-16 15:20:36 -07:00
nrsumlevels = nrextslog + 1 ;
2023-10-16 09:50:34 -07:00
nrsumblocks = xfs_rtsummary_blockcount ( mp , nrsumlevels , nrbmblocks ) ;
2005-04-16 15:20:36 -07:00
nrsumsize = XFS_FSB_TO_B ( mp , nrsumblocks ) ;
/*
* New summary size can ' t be more than half the size of
* the log . This prevents us from getting a log overflow ,
* since we ' ll log basically the whole summary file at once .
*/
if ( nrsumblocks > ( mp - > m_sb . sb_logblocks > > 1 ) )
2014-06-25 14:58:08 +10:00
return - EINVAL ;
2005-04-16 15:20:36 -07:00
/*
* Get the old block counts for bitmap and summary inodes .
* These can ' t change since other growfs callers are locked out .
*/
2021-03-29 11:11:40 -07:00
rbmblocks = XFS_B_TO_FSB ( mp , mp - > m_rbmip - > i_disk_size ) ;
rsumblocks = XFS_B_TO_FSB ( mp , mp - > m_rsumip - > i_disk_size ) ;
2005-04-16 15:20:36 -07:00
/*
* Allocate space to the bitmap and summary files , as necessary .
*/
2011-02-13 13:25:31 +00:00
error = xfs_growfs_rt_alloc ( mp , rbmblocks , nrbmblocks , mp - > m_rbmip ) ;
if ( error )
2005-04-16 15:20:36 -07:00
return error ;
2011-02-13 13:25:31 +00:00
error = xfs_growfs_rt_alloc ( mp , rsumblocks , nrsumblocks , mp - > m_rsumip ) ;
if ( error )
2005-04-16 15:20:36 -07:00
return error ;
2018-12-21 18:45:18 -08:00
rsum_cache = mp - > m_rsum_cache ;
if ( nrbmblocks ! = sbp - > sb_rbmblocks )
xfs_alloc_rsum_cache ( mp , nrbmblocks ) ;
2006-09-28 11:03:44 +10:00
/*
* Allocate a new ( fake ) mount / sb .
*/
2024-01-16 09:59:40 +11:00
nmp = kmalloc ( sizeof ( * nmp ) , GFP_KERNEL | __GFP_NOFAIL ) ;
2005-04-16 15:20:36 -07:00
/*
* Loop over the bitmap blocks .
* We will do everything one bitmap block at a time .
* Skip the current block if it is exactly full .
* This also deals with the case where there were no rtextents before .
*/
for ( bmbno = sbp - > sb_rbmblocks -
( ( sbp - > sb_rextents & ( ( 1 < < mp - > m_blkbit_log ) - 1 ) ) ! = 0 ) ;
bmbno < nrbmblocks ;
bmbno + + ) {
2023-10-16 09:54:19 -07:00
struct xfs_rtalloc_args args = {
. mp = mp ,
} ;
struct xfs_rtalloc_args nargs = {
. mp = nmp ,
} ;
2021-07-12 12:58:49 -07:00
struct xfs_trans * tp ;
xfs_rfsblock_t nrblocks_step ;
2008-11-28 14:23:34 +11:00
2005-04-16 15:20:36 -07:00
* nmp = * mp ;
nsbp = & nmp - > m_sb ;
/*
* Calculate new sb and mount fields for this round .
*/
nsbp - > sb_rextsize = in - > extsize ;
2023-10-16 09:40:11 -07:00
nmp - > m_rtxblklog = - 1 ; /* don't use shift or masking */
2005-04-16 15:20:36 -07:00
nsbp - > sb_rbmblocks = bmbno + 1 ;
2021-07-12 12:58:49 -07:00
nrblocks_step = ( bmbno + 1 ) * NBBY * nsbp - > sb_blocksize *
nsbp - > sb_rextsize ;
nsbp - > sb_rblocks = min ( nrblocks , nrblocks_step ) ;
2023-10-16 09:37:47 -07:00
nsbp - > sb_rextents = xfs_rtb_to_rtx ( nmp , nsbp - > sb_rblocks ) ;
2008-08-13 15:41:12 +10:00
ASSERT ( nsbp - > sb_rextents ! = 0 ) ;
xfs: make rextslog computation consistent with mkfs
There's a weird discrepancy in xfsprogs dating back to the creation of
the Linux port -- if there are zero rt extents, mkfs will set
sb_rextents and sb_rextslog both to zero:
sbp->sb_rextslog =
(uint8_t)(rtextents ?
libxfs_highbit32((unsigned int)rtextents) : 0);
However, that's not the check that xfs_repair uses for nonzero rtblocks:
if (sb->sb_rextslog !=
libxfs_highbit32((unsigned int)sb->sb_rextents))
The difference here is that xfs_highbit32 returns -1 if its argument is
zero. Unfortunately, this means that in the weird corner case of a
realtime volume shorter than 1 rt extent, xfs_repair will immediately
flag a freshly formatted filesystem as corrupt. Because mkfs has been
writing ondisk artifacts like this for decades, we have to accept that
as "correct". TBH, zero rextslog for zero rtextents makes more sense to
me anyway.
Regrettably, the superblock verifier checks created in commit copied
xfs_repair even though mkfs has been writing out such filesystems for
ages. Fix the superblock verifier to accept what mkfs spits out; the
userspace version of this patch will have to fix xfs_repair as well.
Note that the new helper leaves the zeroday bug where the upper 32 bits
of sb_rextents is ripped off and fed to highbit32. This leads to a
seriously undersized rt summary file, which immediately breaks mkfs:
$ hugedisk.sh foo /dev/sdc $(( 0x100000080 * 4096))B
$ /sbin/mkfs.xfs -f /dev/sda -m rmapbt=0,reflink=0 -r rtdev=/dev/mapper/foo
meta-data=/dev/sda isize=512 agcount=4, agsize=1298176 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=1, sparse=1, rmapbt=0
= reflink=0 bigtime=1 inobtcount=1 nrext64=1
data = bsize=4096 blocks=5192704, imaxpct=25
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
log =internal log bsize=4096 blocks=16384, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =/dev/mapper/foo extsz=4096 blocks=4294967424, rtextents=4294967424
Discarding blocks...Done.
mkfs.xfs: Error initializing the realtime space [117 - Structure needs cleaning]
The next patch will drop support for rt volumes with fewer than 1 or
more than 2^32-1 rt extents, since they've clearly been broken forever.
Fixes: f8e566c0f5e1f ("xfs: validate the realtime geometry in xfs_validate_sb_common")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-01 09:17:40 -08:00
nsbp - > sb_rextslog = xfs_compute_rextslog ( nsbp - > sb_rextents ) ;
2005-04-16 15:20:36 -07:00
nrsumlevels = nmp - > m_rsumlevels = nsbp - > sb_rextslog + 1 ;
2023-10-16 09:50:34 -07:00
nrsumblocks = xfs_rtsummary_blockcount ( mp , nrsumlevels ,
nsbp - > sb_rbmblocks ) ;
2005-04-16 15:20:36 -07:00
nmp - > m_rsumsize = nrsumsize = XFS_FSB_TO_B ( mp , nrsumblocks ) ;
2023-12-11 10:41:51 -08:00
/* recompute growfsrt reservation from new rsumsize */
xfs_trans_resv_calc ( nmp , & nmp - > m_resv ) ;
2005-04-16 15:20:36 -07:00
/*
* Start a transaction , get the log reservation .
*/
2016-04-06 09:19:55 +10:00
error = xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_growrtfree , 0 , 0 , 0 ,
& tp ) ;
2013-08-12 20:49:59 +10:00
if ( error )
2016-04-06 09:19:55 +10:00
break ;
2023-10-16 09:54:19 -07:00
args . tp = tp ;
nargs . tp = tp ;
2005-04-16 15:20:36 -07:00
/*
2024-04-22 13:20:08 +02:00
* Lock out other callers by grabbing the bitmap and summary
* inode locks and joining them to the transaction .
2005-04-16 15:20:36 -07:00
*/
2024-04-22 13:20:08 +02:00
xfs_rtbitmap_lock ( tp , mp ) ;
2005-04-16 15:20:36 -07:00
/*
2020-10-07 13:55:16 -07:00
* Update the bitmap inode ' s size ondisk and incore . We need
* to update the incore size so that inode inactivation won ' t
* punch what it thinks are " posteof " blocks .
2005-04-16 15:20:36 -07:00
*/
2021-03-29 11:11:40 -07:00
mp - > m_rbmip - > i_disk_size =
2005-04-16 15:20:36 -07:00
nsbp - > sb_rbmblocks * nsbp - > sb_blocksize ;
2021-03-29 11:11:40 -07:00
i_size_write ( VFS_I ( mp - > m_rbmip ) , mp - > m_rbmip - > i_disk_size ) ;
2005-04-16 15:20:36 -07:00
xfs_trans_log_inode ( tp , mp - > m_rbmip , XFS_ILOG_CORE ) ;
/*
2020-10-07 13:55:16 -07:00
* Update the summary inode ' s size . We need to update the
* incore size so that inode inactivation won ' t punch what it
* thinks are " posteof " blocks .
2005-04-16 15:20:36 -07:00
*/
2021-03-29 11:11:40 -07:00
mp - > m_rsumip - > i_disk_size = nmp - > m_rsumsize ;
i_size_write ( VFS_I ( mp - > m_rsumip ) , mp - > m_rsumip - > i_disk_size ) ;
2005-04-16 15:20:36 -07:00
xfs_trans_log_inode ( tp , mp - > m_rsumip , XFS_ILOG_CORE ) ;
/*
* Copy summary data from old to new sizes .
* Do this when the real size ( not block - aligned ) changes .
*/
if ( sbp - > sb_rbmblocks ! = nsbp - > sb_rbmblocks | |
mp - > m_rsumlevels ! = nmp - > m_rsumlevels ) {
2023-10-16 09:54:19 -07:00
error = xfs_rtcopy_summary ( & args , & nargs ) ;
2005-04-16 15:20:36 -07:00
if ( error )
2008-11-28 14:23:34 +11:00
goto error_cancel ;
2005-04-16 15:20:36 -07:00
}
/*
* Update superblock fields .
*/
if ( nsbp - > sb_rextsize ! = sbp - > sb_rextsize )
xfs_trans_mod_sb ( tp , XFS_TRANS_SB_REXTSIZE ,
nsbp - > sb_rextsize - sbp - > sb_rextsize ) ;
if ( nsbp - > sb_rbmblocks ! = sbp - > sb_rbmblocks )
xfs_trans_mod_sb ( tp , XFS_TRANS_SB_RBMBLOCKS ,
nsbp - > sb_rbmblocks - sbp - > sb_rbmblocks ) ;
if ( nsbp - > sb_rblocks ! = sbp - > sb_rblocks )
xfs_trans_mod_sb ( tp , XFS_TRANS_SB_RBLOCKS ,
nsbp - > sb_rblocks - sbp - > sb_rblocks ) ;
if ( nsbp - > sb_rextents ! = sbp - > sb_rextents )
xfs_trans_mod_sb ( tp , XFS_TRANS_SB_REXTENTS ,
nsbp - > sb_rextents - sbp - > sb_rextents ) ;
if ( nsbp - > sb_rextslog ! = sbp - > sb_rextslog )
xfs_trans_mod_sb ( tp , XFS_TRANS_SB_REXTSLOG ,
nsbp - > sb_rextslog - sbp - > sb_rextslog ) ;
/*
* Free new extent .
*/
2023-10-16 09:54:19 -07:00
error = xfs_rtfree_range ( & nargs , sbp - > sb_rextents ,
2023-10-16 10:13:22 -07:00
nsbp - > sb_rextents - sbp - > sb_rextents ) ;
xfs_rtbuf_cache_relse ( & nargs ) ;
2008-11-28 14:23:34 +11:00
if ( error ) {
error_cancel :
2015-06-04 13:47:56 +10:00
xfs_trans_cancel ( tp ) ;
2006-09-28 11:03:44 +10:00
break ;
2008-11-28 14:23:34 +11:00
}
2005-04-16 15:20:36 -07:00
/*
* Mark more blocks free in the superblock .
*/
xfs_trans_mod_sb ( tp , XFS_TRANS_SB_FREXTENTS ,
nsbp - > sb_rextents - sbp - > sb_rextents ) ;
/*
* Update mp values into the real mp structure .
*/
mp - > m_rsumlevels = nrsumlevels ;
mp - > m_rsumsize = nrsumsize ;
2023-12-11 10:41:51 -08:00
/* recompute growfsrt reservation from new rsumsize */
xfs_trans_resv_calc ( mp , & mp - > m_resv ) ;
2008-04-10 12:21:18 +10:00
2015-06-04 13:48:08 +10:00
error = xfs_trans_commit ( tp ) ;
2008-11-28 14:23:34 +11:00
if ( error )
2008-04-10 12:21:18 +10:00
break ;
2021-08-18 18:46:26 -07:00
/* Ensure the mount RT feature flag is now set. */
mp - > m_features | = XFS_FEAT_REALTIME ;
2005-04-16 15:20:36 -07:00
}
2020-10-07 13:57:52 -07:00
if ( error )
goto out_free ;
/* Update secondary superblocks now the physical grow has completed */
error = xfs_update_secondary_sbs ( mp ) ;
2006-09-28 11:03:44 +10:00
2020-10-07 13:57:52 -07:00
out_free :
2005-04-16 15:20:36 -07:00
/*
2006-09-28 11:03:44 +10:00
* Free the fake mp structure .
2005-04-16 15:20:36 -07:00
*/
2024-01-16 09:59:43 +11:00
kfree ( nmp ) ;
2006-09-28 11:03:44 +10:00
2018-12-21 18:45:18 -08:00
/*
* If we had to allocate a new rsum_cache , we either need to free the
* old one ( if we succeeded ) or free the new one and restore the old one
* ( if there was an error ) .
*/
if ( rsum_cache ! = mp - > m_rsum_cache ) {
if ( error ) {
2024-01-16 09:59:42 +11:00
kvfree ( mp - > m_rsum_cache ) ;
2018-12-21 18:45:18 -08:00
mp - > m_rsum_cache = rsum_cache ;
} else {
2024-01-16 09:59:42 +11:00
kvfree ( rsum_cache ) ;
2018-12-21 18:45:18 -08:00
}
}
2005-04-16 15:20:36 -07:00
return error ;
}
/*
* Initialize realtime fields in the mount structure .
*/
int /* error */
xfs_rtmount_init (
2014-10-02 09:05:32 +10:00
struct xfs_mount * mp ) /* file system mount structure */
2005-04-16 15:20:36 -07:00
{
2014-10-02 09:05:32 +10:00
struct xfs_buf * bp ; /* buffer for last block of subvolume */
struct xfs_sb * sbp ; /* filesystem superblock copy in mount */
xfs_daddr_t d ; /* address of last block of subvolume */
2023-10-16 09:50:34 -07:00
unsigned int rsumblocks ;
2014-10-02 09:05:32 +10:00
int error ;
2005-04-16 15:20:36 -07:00
sbp = & mp - > m_sb ;
if ( sbp - > sb_rblocks = = 0 )
return 0 ;
if ( mp - > m_rtdev_targp = = NULL ) {
2011-03-07 10:08:35 +11:00
xfs_warn ( mp ,
" Filesystem has a realtime volume, use rtdev=device option " ) ;
2014-06-25 14:58:08 +10:00
return - ENODEV ;
2005-04-16 15:20:36 -07:00
}
mp - > m_rsumlevels = sbp - > sb_rextslog + 1 ;
2023-10-16 09:50:34 -07:00
rsumblocks = xfs_rtsummary_blockcount ( mp , mp - > m_rsumlevels ,
mp - > m_sb . sb_rbmblocks ) ;
mp - > m_rsumsize = XFS_FSB_TO_B ( mp , rsumblocks ) ;
2005-04-16 15:20:36 -07:00
mp - > m_rbmip = mp - > m_rsumip = NULL ;
/*
* Check that the realtime section is an ok size .
*/
d = ( xfs_daddr_t ) XFS_FSB_TO_BB ( mp , mp - > m_sb . sb_rblocks ) ;
if ( XFS_BB_TO_FSB ( mp , d ) ! = mp - > m_sb . sb_rblocks ) {
2011-03-07 10:08:35 +11:00
xfs_warn ( mp , " realtime mount -- %llu != %llu " ,
2005-04-16 15:20:36 -07:00
( unsigned long long ) XFS_BB_TO_FSB ( mp , d ) ,
( unsigned long long ) mp - > m_sb . sb_rblocks ) ;
2014-06-25 14:58:08 +10:00
return - EFBIG ;
2005-04-16 15:20:36 -07:00
}
2014-10-02 09:05:32 +10:00
error = xfs_buf_read_uncached ( mp - > m_rtdev_targp ,
2010-09-22 10:47:20 +10:00
d - XFS_FSB_TO_BB ( mp , 1 ) ,
2014-10-02 09:05:32 +10:00
XFS_FSB_TO_BB ( mp , 1 ) , 0 , & bp , NULL ) ;
if ( error ) {
2011-03-07 10:08:35 +11:00
xfs_warn ( mp , " realtime device size check failed " ) ;
2014-10-02 09:05:32 +10:00
return error ;
2005-04-16 15:20:36 -07:00
}
xfs_buf_relse ( bp ) ;
return 0 ;
}
2022-04-12 06:49:42 +10:00
static int
xfs_rtalloc_count_frextent (
struct xfs_mount * mp ,
struct xfs_trans * tp ,
const struct xfs_rtalloc_rec * rec ,
void * priv )
{
uint64_t * valp = priv ;
* valp + = rec - > ar_extcount ;
return 0 ;
}
/*
* Reinitialize the number of free realtime extents from the realtime bitmap .
* Callers must ensure that there is no other activity in the filesystem .
*/
int
xfs_rtalloc_reinit_frextents (
struct xfs_mount * mp )
{
uint64_t val = 0 ;
int error ;
2024-04-22 13:20:08 +02:00
xfs_rtbitmap_lock_shared ( mp , XFS_RBMLOCK_BITMAP ) ;
2022-04-12 06:49:42 +10:00
error = xfs_rtalloc_query_all ( mp , NULL , xfs_rtalloc_count_frextent ,
& val ) ;
2024-04-22 13:20:08 +02:00
xfs_rtbitmap_unlock_shared ( mp , XFS_RBMLOCK_BITMAP ) ;
2022-04-12 06:49:42 +10:00
if ( error )
return error ;
spin_lock ( & mp - > m_sb_lock ) ;
mp - > m_sb . sb_frextents = val ;
spin_unlock ( & mp - > m_sb_lock ) ;
2022-04-12 06:49:42 +10:00
percpu_counter_set ( & mp - > m_frextents , mp - > m_sb . sb_frextents ) ;
2022-04-12 06:49:42 +10:00
return 0 ;
}
2022-11-06 17:03:18 -08:00
/*
* Read in the bmbt of an rt metadata inode so that we never have to load them
* at runtime . This enables the use of shared ILOCKs for rtbitmap scans . Use
* an empty transaction to avoid deadlocking on loops in the bmbt .
*/
static inline int
xfs_rtmount_iread_extents (
struct xfs_inode * ip ,
unsigned int lock_class )
{
struct xfs_trans * tp ;
int error ;
error = xfs_trans_alloc_empty ( ip - > i_mount , & tp ) ;
if ( error )
return error ;
xfs_ilock ( ip , XFS_ILOCK_EXCL | lock_class ) ;
error = xfs_iread_extents ( tp , ip , XFS_DATA_FORK ) ;
if ( error )
goto out_unlock ;
if ( xfs_inode_has_attr_fork ( ip ) ) {
error = xfs_iread_extents ( tp , ip , XFS_ATTR_FORK ) ;
if ( error )
goto out_unlock ;
}
out_unlock :
xfs_iunlock ( ip , XFS_ILOCK_EXCL | lock_class ) ;
xfs_trans_cancel ( tp ) ;
return error ;
}
2005-04-16 15:20:36 -07:00
/*
xfs: cache minimum realtime summary level
The realtime summary is a two-dimensional array on disk, effectively:
u32 rsum[log2(number of realtime extents) + 1][number of blocks in the bitmap]
rsum[log][bbno] is the number of extents of size 2**log which start in
bitmap block bbno.
xfs_rtallocate_extent_near() uses xfs_rtany_summary() to check whether
rsum[log][bbno] != 0 for any log level. However, the summary array is
stored in row-major order (i.e., like an array in C), so all of these
entries are not adjacent, but rather spread across the entire summary
file. In the worst case (a full bitmap block), xfs_rtany_summary() has
to check every level.
This means that on a moderately-used realtime device, an allocation will
waste a lot of time finding, reading, and releasing buffers for the
realtime summary. In particular, one of our storage services (which runs
on servers with 8 very slow CPUs and 15 8 TB XFS realtime filesystems)
spends almost 5% of its CPU cycles in xfs_rtbuf_get() and
xfs_trans_brelse() called from xfs_rtany_summary().
One solution would be to also store the summary with the dimensions
swapped. However, this would require a disk format change to a very old
component of XFS.
Instead, we can cache the minimum size which contains any extents. We do
so lazily; rather than guaranteeing that the cache contains the precise
minimum, it always contains a loose lower bound which we tighten when we
read or update a summary block. This only uses a few kilobytes of memory
and is already serialized via the realtime bitmap and summary inode
locks, so the cost is minimal. With this change, the same workload only
spends 0.2% of its CPU cycles in the realtime allocator.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-12-12 08:46:32 -08:00
* Get the bitmap and summary inodes and the summary cache into the mount
* structure at mount time .
2005-04-16 15:20:36 -07:00
*/
int /* error */
xfs_rtmount_inodes (
xfs_mount_t * mp ) /* file system mount structure */
{
int error ; /* error return value */
xfs_sb_t * sbp ;
sbp = & mp - > m_sb ;
2010-06-24 11:35:17 +10:00
error = xfs_iget ( mp , NULL , sbp - > sb_rbmino , 0 , 0 , & mp - > m_rbmip ) ;
2024-02-22 12:32:44 -08:00
if ( xfs_metadata_is_sick ( error ) )
xfs_rt_mark_sick ( mp , XFS_SICK_RT_BITMAP ) ;
2005-04-16 15:20:36 -07:00
if ( error )
return error ;
ASSERT ( mp - > m_rbmip ! = NULL ) ;
2018-12-12 15:18:52 -08:00
2022-11-06 17:03:18 -08:00
error = xfs_rtmount_iread_extents ( mp - > m_rbmip , XFS_ILOCK_RTBITMAP ) ;
if ( error )
goto out_rele_bitmap ;
2010-06-24 11:35:17 +10:00
error = xfs_iget ( mp , NULL , sbp - > sb_rsumino , 0 , 0 , & mp - > m_rsumip ) ;
2024-02-22 12:32:44 -08:00
if ( xfs_metadata_is_sick ( error ) )
xfs_rt_mark_sick ( mp , XFS_SICK_RT_SUMMARY ) ;
2022-11-06 17:03:18 -08:00
if ( error )
goto out_rele_bitmap ;
2005-04-16 15:20:36 -07:00
ASSERT ( mp - > m_rsumip ! = NULL ) ;
2022-11-06 17:03:18 -08:00
error = xfs_rtmount_iread_extents ( mp - > m_rsumip , XFS_ILOCK_RTSUM ) ;
if ( error )
goto out_rele_summary ;
2018-12-21 18:45:18 -08:00
xfs_alloc_rsum_cache ( mp , sbp - > sb_rbmblocks ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
2022-11-06 17:03:18 -08:00
out_rele_summary :
xfs_irele ( mp - > m_rsumip ) ;
out_rele_bitmap :
xfs_irele ( mp - > m_rbmip ) ;
return error ;
2005-04-16 15:20:36 -07:00
}
2009-02-04 09:33:58 +01:00
void
xfs_rtunmount_inodes (
struct xfs_mount * mp )
{
2024-01-16 09:59:42 +11:00
kvfree ( mp - > m_rsum_cache ) ;
2009-02-04 09:33:58 +01:00
if ( mp - > m_rbmip )
2018-07-25 12:52:32 -07:00
xfs_irele ( mp - > m_rbmip ) ;
2009-02-04 09:33:58 +01:00
if ( mp - > m_rsumip )
2018-07-25 12:52:32 -07:00
xfs_irele ( mp - > m_rsumip ) ;
2009-02-04 09:33:58 +01:00
}
2005-04-16 15:20:36 -07:00
/*
* Pick an extent for allocation at the start of a new realtime file .
* Use the sequence number stored in the atime field of the bitmap inode .
* Translate this to a fraction of the rtextents , and return the product
* of rtextents and the fraction .
* The fraction sequence is 0 , 1 / 2 , 1 / 4 , 3 / 4 , 1 / 8 , . . . , 7 / 8 , 1 / 16 , . . .
*/
2023-12-18 05:57:21 +01:00
static int
2005-04-16 15:20:36 -07:00
xfs_rtpick_extent (
2023-10-04 14:53:02 -04:00
xfs_mount_t * mp , /* file system mount point */
xfs_trans_t * tp , /* transaction pointer */
2023-11-08 13:22:16 -08:00
xfs_rtxlen_t len , /* allocation length (rtextents) */
xfs_rtxnum_t * pick ) /* result rt extent */
2005-04-16 15:20:36 -07:00
{
2023-11-08 13:22:16 -08:00
xfs_rtxnum_t b ; /* result rtext */
2023-10-04 14:53:02 -04:00
int log2 ; /* log of sequence number */
uint64_t resid ; /* residual after log removed */
uint64_t seq ; /* sequence number of file creation */
2023-11-08 13:22:16 -08:00
struct timespec64 ts ; /* timespec in inode */
2005-04-16 15:20:36 -07:00
2024-02-19 15:41:12 +00:00
xfs_assert_ilocked ( mp - > m_rbmip , XFS_ILOCK_EXCL ) ;
2011-01-25 09:06:19 +00:00
2023-11-08 13:22:16 -08:00
ts = inode_get_atime ( VFS_I ( mp - > m_rbmip ) ) ;
2021-03-29 11:11:44 -07:00
if ( ! ( mp - > m_rbmip - > i_diflags & XFS_DIFLAG_NEWRTBM ) ) {
mp - > m_rbmip - > i_diflags | = XFS_DIFLAG_NEWRTBM ;
2023-10-04 14:53:02 -04:00
seq = 0 ;
} else {
2023-11-08 13:22:16 -08:00
seq = ts . tv_sec ;
2005-04-16 15:20:36 -07:00
}
if ( ( log2 = xfs_highbit64 ( seq ) ) = = - 1 )
b = 0 ;
else {
resid = seq - ( 1ULL < < log2 ) ;
b = ( mp - > m_sb . sb_rextents * ( ( resid < < 1 ) + 1ULL ) ) > >
( log2 + 1 ) ;
if ( b > = mp - > m_sb . sb_rextents )
2018-06-08 09:54:22 -07:00
div64_u64_rem ( b , mp - > m_sb . sb_rextents , & b ) ;
2005-04-16 15:20:36 -07:00
if ( b + len > mp - > m_sb . sb_rextents )
b = mp - > m_sb . sb_rextents - len ;
}
2023-11-08 13:22:16 -08:00
ts . tv_sec = seq + 1 ;
2023-10-04 14:53:02 -04:00
inode_set_atime_to_ts ( VFS_I ( mp - > m_rbmip ) , ts ) ;
2011-01-25 09:06:19 +00:00
xfs_trans_log_inode ( tp , mp - > m_rbmip , XFS_ILOG_CORE ) ;
2005-04-16 15:20:36 -07:00
* pick = b ;
return 0 ;
}
2023-12-18 05:57:21 +01:00
2023-12-18 05:57:36 +01:00
static void
xfs_rtalloc_align_minmax (
xfs_rtxlen_t * raminlen ,
xfs_rtxlen_t * ramaxlen ,
xfs_rtxlen_t * prod )
{
xfs_rtxlen_t newmaxlen = * ramaxlen ;
xfs_rtxlen_t newminlen = * raminlen ;
xfs_rtxlen_t slack ;
slack = newmaxlen % * prod ;
if ( slack )
newmaxlen - = slack ;
slack = newminlen % * prod ;
if ( slack )
newminlen + = * prod - slack ;
/*
* If adjusting for extent size hint alignment produces an invalid
* min / max len combination , go ahead without it .
*/
if ( newmaxlen < newminlen ) {
* prod = 1 ;
return ;
}
* ramaxlen = newmaxlen ;
* raminlen = newminlen ;
}
2023-12-18 05:57:21 +01:00
int
xfs_bmap_rtalloc (
struct xfs_bmalloca * ap )
{
struct xfs_mount * mp = ap - > ip - > i_mount ;
xfs_fileoff_t orig_offset = ap - > offset ;
2023-12-18 05:57:25 +01:00
xfs_rtxnum_t start ; /* allocation hint rtextent no */
xfs_rtxnum_t rtx ; /* actually allocated rtextent no */
2023-12-18 05:57:21 +01:00
xfs_rtxlen_t prod = 0 ; /* product factor for allocators */
xfs_extlen_t mod = 0 ; /* product factor for allocators */
xfs_rtxlen_t ralen = 0 ; /* realtime allocation length */
xfs_extlen_t align ; /* minimum allocation alignment */
xfs_extlen_t orig_length = ap - > length ;
xfs_extlen_t minlen = mp - > m_sb . sb_rextsize ;
xfs_rtxlen_t raminlen ;
bool rtlocked = false ;
bool ignore_locality = false ;
2023-12-18 05:57:37 +01:00
struct xfs_rtalloc_args args = {
. mp = mp ,
. tp = ap - > tp ,
} ;
2023-12-18 05:57:21 +01:00
int error ;
align = xfs_get_extsz_hint ( ap - > ip ) ;
2024-04-22 13:20:19 +02:00
if ( ! align )
align = 1 ;
2023-12-18 05:57:21 +01:00
retry :
error = xfs_bmap_extsize_align ( mp , & ap - > got , & ap - > prev ,
align , 1 , ap - > eof , 0 ,
ap - > conv , & ap - > offset , & ap - > length ) ;
if ( error )
return error ;
ASSERT ( ap - > length ) ;
ASSERT ( xfs_extlen_to_rtxmod ( mp , ap - > length ) = = 0 ) ;
/*
* If we shifted the file offset downward to satisfy an extent size
* hint , increase minlen by that amount so that the allocator won ' t
* give us an allocation that ' s too short to cover at least one of the
* blocks that the caller asked for .
*/
if ( ap - > offset ! = orig_offset )
minlen + = orig_offset - ap - > offset ;
/*
* Set ralen to be the actual requested length in rtextents .
*
* If the old value was close enough to XFS_BMBT_MAX_EXTLEN that
* we rounded up to it , cut it back so it ' s valid again .
* Note that if it ' s a really large request ( bigger than
* XFS_BMBT_MAX_EXTLEN ) , we don ' t hear about that number , and can ' t
* adjust the starting point to match it .
*/
ralen = xfs_extlen_to_rtxlen ( mp , min ( ap - > length , XFS_MAX_BMBT_EXTLEN ) ) ;
2023-12-18 05:57:35 +01:00
raminlen = max_t ( xfs_rtxlen_t , 1 , xfs_extlen_to_rtxlen ( mp , minlen ) ) ;
2023-12-18 05:57:37 +01:00
ASSERT ( raminlen > 0 ) ;
ASSERT ( raminlen < = ralen ) ;
2023-12-18 05:57:21 +01:00
/*
* Lock out modifications to both the RT bitmap and summary inodes
*/
if ( ! rtlocked ) {
2024-04-22 13:20:08 +02:00
xfs_rtbitmap_lock ( ap - > tp , mp ) ;
2023-12-18 05:57:21 +01:00
rtlocked = true ;
}
2023-12-18 05:57:25 +01:00
if ( ignore_locality ) {
start = 0 ;
} else if ( xfs_bmap_adjacent ( ap ) ) {
start = xfs_rtb_to_rtx ( mp , ap - > blkno ) ;
} else if ( ap - > eof & & ap - > offset = = 0 ) {
/*
* If it ' s an allocation to an empty file at offset 0 , pick an
* extent that will space things out in the rt area .
*/
error = xfs_rtpick_extent ( mp , ap - > tp , ralen , & start ) ;
2023-12-18 05:57:21 +01:00
if ( error )
return error ;
} else {
2023-12-18 05:57:25 +01:00
start = 0 ;
2023-12-18 05:57:21 +01:00
}
2023-12-18 05:57:35 +01:00
/*
* Only bother calculating a real prod factor if offset & length are
* perfectly aligned , otherwise it will just get us in trouble .
*/
div_u64_rem ( ap - > offset , align , & mod ) ;
2023-12-18 05:57:36 +01:00
if ( mod | | ap - > length % align ) {
2023-12-18 05:57:35 +01:00
prod = 1 ;
2023-12-18 05:57:36 +01:00
} else {
2023-12-18 05:57:35 +01:00
prod = xfs_extlen_to_rtxlen ( mp , align ) ;
2023-12-18 05:57:36 +01:00
if ( prod > 1 )
xfs_rtalloc_align_minmax ( & raminlen , & ralen , & prod ) ;
}
2023-12-18 05:57:35 +01:00
2023-12-18 05:57:37 +01:00
if ( start ) {
error = xfs_rtallocate_extent_near ( & args , start , raminlen ,
ralen , & ralen , prod , & rtx ) ;
} else {
error = xfs_rtallocate_extent_size ( & args , raminlen ,
ralen , & ralen , prod , & rtx ) ;
}
xfs_rtbuf_cache_relse ( & args ) ;
2023-12-18 05:57:23 +01:00
if ( error = = - ENOSPC ) {
if ( align > mp - > m_sb . sb_rextsize ) {
/*
* We previously enlarged the request length to try to
* satisfy an extent size hint . The allocator didn ' t
* return anything , so reset the parameters to the
* original values and try again without alignment
* criteria .
*/
ap - > offset = orig_offset ;
ap - > length = orig_length ;
minlen = align = mp - > m_sb . sb_rextsize ;
goto retry ;
}
2023-12-18 05:57:22 +01:00
2023-12-18 05:57:25 +01:00
if ( ! ignore_locality & & start ! = 0 ) {
2023-12-18 05:57:23 +01:00
/*
* If we can ' t allocate near a specific rt extent , try
* again without locality criteria .
*/
ignore_locality = true ;
goto retry ;
}
2023-12-18 05:57:21 +01:00
2023-12-18 05:57:23 +01:00
ap - > blkno = NULLFSBLOCK ;
ap - > length = 0 ;
return 0 ;
2023-12-18 05:57:21 +01:00
}
2023-12-18 05:57:23 +01:00
if ( error )
return error ;
2023-12-18 05:57:21 +01:00
2023-12-18 05:57:37 +01:00
xfs_trans_mod_sb ( ap - > tp , ap - > wasdel ?
XFS_TRANS_SB_RES_FREXTENTS : XFS_TRANS_SB_FREXTENTS ,
- ( long ) ralen ) ;
2023-12-18 05:57:23 +01:00
ap - > blkno = xfs_rtx_to_rtb ( mp , rtx ) ;
ap - > length = xfs_rtxlen_to_extlen ( mp , ralen ) ;
xfs_bmap_alloc_account ( ap ) ;
2023-12-18 05:57:21 +01:00
return 0 ;
}