2023-12-06 18:40:59 -08:00
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright ( C ) 2022 - 2023 Oracle . All Rights Reserved .
* Author : Darrick J . Wong < djwong @ kernel . org >
*/
# include "xfs.h"
# include "xfs_fs.h"
# include "xfs_shared.h"
# include "xfs_format.h"
# include "xfs_trans_resv.h"
# include "xfs_mount.h"
# include "xfs_btree.h"
# include "xfs_btree_staging.h"
# include "xfs_log_format.h"
# include "xfs_trans.h"
# include "xfs_sb.h"
# include "xfs_inode.h"
# include "xfs_alloc.h"
# include "xfs_rmap.h"
# include "xfs_ag.h"
# include "xfs_defer.h"
# include "scrub/scrub.h"
# include "scrub/common.h"
# include "scrub/trace.h"
# include "scrub/repair.h"
# include "scrub/newbt.h"
/*
* Estimate proper slack values for a btree that ' s being reloaded .
*
* Under most circumstances , we ' ll take whatever default loading value the
* btree bulk loading code calculates for us . However , there are some
* exceptions to this rule :
*
2023-12-15 10:03:28 -08:00
* ( 0 ) If someone turned one of the debug knobs .
2023-12-06 18:40:59 -08:00
* ( 1 ) If this is a per - AG btree and the AG has less than 10 % space free .
* ( 2 ) If this is an inode btree and the FS has less than 10 % space free .
* In either case , format the new btree blocks almost completely full to
* minimize space usage .
*/
static void
xrep_newbt_estimate_slack (
struct xrep_newbt * xnr )
{
struct xfs_scrub * sc = xnr - > sc ;
struct xfs_btree_bload * bload = & xnr - > bload ;
uint64_t free ;
uint64_t sz ;
2023-12-15 10:03:28 -08:00
/*
* The xfs_globals values are set to - 1 ( i . e . take the bload defaults )
* unless someone has set them otherwise , so we just pull the values
* here .
*/
bload - > leaf_slack = xfs_globals . bload_leaf_slack ;
bload - > node_slack = xfs_globals . bload_node_slack ;
2023-12-06 18:40:59 -08:00
if ( sc - > ops - > type = = ST_PERAG ) {
free = sc - > sa . pag - > pagf_freeblks ;
sz = xfs_ag_block_count ( sc - > mp , sc - > sa . pag - > pag_agno ) ;
} else {
free = percpu_counter_sum ( & sc - > mp - > m_fdblocks ) ;
sz = sc - > mp - > m_sb . sb_dblocks ;
}
/* No further changes if there's more than 10% free space left. */
if ( free > = div_u64 ( sz , 10 ) )
return ;
/*
* We ' re low on space ; load the btrees as tightly as possible . Leave
* a couple of open slots in each btree block so that we don ' t end up
* splitting the btrees like crazy after a mount .
*/
if ( bload - > leaf_slack < 0 )
bload - > leaf_slack = 2 ;
if ( bload - > node_slack < 0 )
bload - > node_slack = 2 ;
}
/* Initialize accounting resources for staging a new AG btree. */
void
xrep_newbt_init_ag (
struct xrep_newbt * xnr ,
struct xfs_scrub * sc ,
const struct xfs_owner_info * oinfo ,
xfs_fsblock_t alloc_hint ,
enum xfs_ag_resv_type resv )
{
memset ( xnr , 0 , sizeof ( struct xrep_newbt ) ) ;
xnr - > sc = sc ;
xnr - > oinfo = * oinfo ; /* structure copy */
xnr - > alloc_hint = alloc_hint ;
xnr - > resv = resv ;
INIT_LIST_HEAD ( & xnr - > resv_list ) ;
2023-12-15 10:03:29 -08:00
xnr - > bload . max_dirty = XFS_B_TO_FSBT ( sc - > mp , 256U < < 10 ) ; /* 256K */
2023-12-06 18:40:59 -08:00
xrep_newbt_estimate_slack ( xnr ) ;
}
/* Initialize accounting resources for staging a new inode fork btree. */
int
xrep_newbt_init_inode (
struct xrep_newbt * xnr ,
struct xfs_scrub * sc ,
int whichfork ,
const struct xfs_owner_info * oinfo )
{
struct xfs_ifork * ifp ;
ifp = kmem_cache_zalloc ( xfs_ifork_cache , XCHK_GFP_FLAGS ) ;
if ( ! ifp )
return - ENOMEM ;
xrep_newbt_init_ag ( xnr , sc , oinfo ,
XFS_INO_TO_FSB ( sc - > mp , sc - > ip - > i_ino ) ,
XFS_AG_RESV_NONE ) ;
xnr - > ifake . if_fork = ifp ;
xnr - > ifake . if_fork_size = xfs_inode_fork_size ( sc - > ip , whichfork ) ;
return 0 ;
}
/*
* Initialize accounting resources for staging a new btree . Callers are
* expected to add their own reservations ( and clean them up ) manually .
*/
void
xrep_newbt_init_bare (
struct xrep_newbt * xnr ,
struct xfs_scrub * sc )
{
xrep_newbt_init_ag ( xnr , sc , & XFS_RMAP_OINFO_ANY_OWNER , NULLFSBLOCK ,
XFS_AG_RESV_NONE ) ;
}
/*
* Designate specific blocks to be used to build our new btree . @ pag must be
* a passive reference .
*/
STATIC int
xrep_newbt_add_blocks (
struct xrep_newbt * xnr ,
struct xfs_perag * pag ,
const struct xfs_alloc_arg * args )
{
struct xfs_mount * mp = xnr - > sc - > mp ;
struct xrep_newbt_resv * resv ;
2023-12-06 18:41:00 -08:00
int error ;
2023-12-06 18:40:59 -08:00
resv = kmalloc ( sizeof ( struct xrep_newbt_resv ) , XCHK_GFP_FLAGS ) ;
if ( ! resv )
return - ENOMEM ;
INIT_LIST_HEAD ( & resv - > list ) ;
resv - > agbno = XFS_FSB_TO_AGBNO ( mp , args - > fsbno ) ;
resv - > len = args - > len ;
resv - > used = 0 ;
resv - > pag = xfs_perag_hold ( pag ) ;
2023-12-15 10:03:32 -08:00
if ( args - > tp ) {
ASSERT ( xnr - > oinfo . oi_offset = = 0 ) ;
2023-12-06 18:41:00 -08:00
2023-12-15 10:03:32 -08:00
error = xfs_alloc_schedule_autoreap ( args , true , & resv - > autoreap ) ;
if ( error )
goto out_pag ;
}
2023-12-06 18:41:00 -08:00
2023-12-06 18:40:59 -08:00
list_add_tail ( & resv - > list , & xnr - > resv_list ) ;
return 0 ;
2023-12-06 18:41:00 -08:00
out_pag :
xfs_perag_put ( resv - > pag ) ;
kfree ( resv ) ;
return error ;
2023-12-06 18:40:59 -08:00
}
2023-12-15 10:03:32 -08:00
/*
* Add an extent to the new btree reservation pool . Callers are required to
* reap this reservation manually if the repair is cancelled . @ pag must be a
* passive reference .
*/
int
xrep_newbt_add_extent (
struct xrep_newbt * xnr ,
struct xfs_perag * pag ,
xfs_agblock_t agbno ,
xfs_extlen_t len )
{
struct xfs_mount * mp = xnr - > sc - > mp ;
struct xfs_alloc_arg args = {
. tp = NULL , /* no autoreap */
. oinfo = xnr - > oinfo ,
. fsbno = XFS_AGB_TO_FSB ( mp , pag - > pag_agno , agbno ) ,
. len = len ,
. resv = xnr - > resv ,
} ;
return xrep_newbt_add_blocks ( xnr , pag , & args ) ;
}
2023-12-06 18:40:59 -08:00
/* Don't let our allocation hint take us beyond this AG */
static inline void
xrep_newbt_validate_ag_alloc_hint (
struct xrep_newbt * xnr )
{
struct xfs_scrub * sc = xnr - > sc ;
xfs_agnumber_t agno = XFS_FSB_TO_AGNO ( sc - > mp , xnr - > alloc_hint ) ;
if ( agno = = sc - > sa . pag - > pag_agno & &
xfs_verify_fsbno ( sc - > mp , xnr - > alloc_hint ) )
return ;
xnr - > alloc_hint = XFS_AGB_TO_FSB ( sc - > mp , sc - > sa . pag - > pag_agno ,
XFS_AGFL_BLOCK ( sc - > mp ) + 1 ) ;
}
/* Allocate disk space for a new per-AG btree. */
STATIC int
xrep_newbt_alloc_ag_blocks (
struct xrep_newbt * xnr ,
uint64_t nr_blocks )
{
struct xfs_scrub * sc = xnr - > sc ;
struct xfs_mount * mp = sc - > mp ;
int error = 0 ;
ASSERT ( sc - > sa . pag ! = NULL ) ;
while ( nr_blocks > 0 ) {
struct xfs_alloc_arg args = {
. tp = sc - > tp ,
. mp = mp ,
. oinfo = xnr - > oinfo ,
. minlen = 1 ,
. maxlen = nr_blocks ,
. prod = 1 ,
. resv = xnr - > resv ,
} ;
xfs_agnumber_t agno ;
xrep_newbt_validate_ag_alloc_hint ( xnr ) ;
2024-02-22 12:43:38 -08:00
if ( xnr - > alloc_vextent )
error = xnr - > alloc_vextent ( sc , & args , xnr - > alloc_hint ) ;
else
error = xfs_alloc_vextent_near_bno ( & args ,
xnr - > alloc_hint ) ;
2023-12-06 18:40:59 -08:00
if ( error )
return error ;
if ( args . fsbno = = NULLFSBLOCK )
return - ENOSPC ;
agno = XFS_FSB_TO_AGNO ( mp , args . fsbno ) ;
trace_xrep_newbt_alloc_ag_blocks ( mp , agno ,
XFS_FSB_TO_AGBNO ( mp , args . fsbno ) , args . len ,
xnr - > oinfo . oi_owner ) ;
if ( agno ! = sc - > sa . pag - > pag_agno ) {
ASSERT ( agno = = sc - > sa . pag - > pag_agno ) ;
return - EFSCORRUPTED ;
}
error = xrep_newbt_add_blocks ( xnr , sc - > sa . pag , & args ) ;
if ( error )
return error ;
nr_blocks - = args . len ;
xnr - > alloc_hint = args . fsbno + args . len ;
error = xrep_defer_finish ( sc ) ;
if ( error )
return error ;
}
return 0 ;
}
/* Don't let our allocation hint take us beyond EOFS */
static inline void
xrep_newbt_validate_file_alloc_hint (
struct xrep_newbt * xnr )
{
struct xfs_scrub * sc = xnr - > sc ;
if ( xfs_verify_fsbno ( sc - > mp , xnr - > alloc_hint ) )
return ;
xnr - > alloc_hint = XFS_AGB_TO_FSB ( sc - > mp , 0 , XFS_AGFL_BLOCK ( sc - > mp ) + 1 ) ;
}
/* Allocate disk space for our new file-based btree. */
STATIC int
xrep_newbt_alloc_file_blocks (
struct xrep_newbt * xnr ,
uint64_t nr_blocks )
{
struct xfs_scrub * sc = xnr - > sc ;
struct xfs_mount * mp = sc - > mp ;
int error = 0 ;
while ( nr_blocks > 0 ) {
struct xfs_alloc_arg args = {
. tp = sc - > tp ,
. mp = mp ,
. oinfo = xnr - > oinfo ,
. minlen = 1 ,
. maxlen = nr_blocks ,
. prod = 1 ,
. resv = xnr - > resv ,
} ;
struct xfs_perag * pag ;
xfs_agnumber_t agno ;
xrep_newbt_validate_file_alloc_hint ( xnr ) ;
2024-02-22 12:43:38 -08:00
if ( xnr - > alloc_vextent )
error = xnr - > alloc_vextent ( sc , & args , xnr - > alloc_hint ) ;
else
error = xfs_alloc_vextent_start_ag ( & args ,
xnr - > alloc_hint ) ;
2023-12-06 18:40:59 -08:00
if ( error )
return error ;
if ( args . fsbno = = NULLFSBLOCK )
return - ENOSPC ;
agno = XFS_FSB_TO_AGNO ( mp , args . fsbno ) ;
trace_xrep_newbt_alloc_file_blocks ( mp , agno ,
XFS_FSB_TO_AGBNO ( mp , args . fsbno ) , args . len ,
xnr - > oinfo . oi_owner ) ;
pag = xfs_perag_get ( mp , agno ) ;
if ( ! pag ) {
ASSERT ( 0 ) ;
return - EFSCORRUPTED ;
}
error = xrep_newbt_add_blocks ( xnr , pag , & args ) ;
xfs_perag_put ( pag ) ;
if ( error )
return error ;
nr_blocks - = args . len ;
xnr - > alloc_hint = args . fsbno + args . len ;
error = xrep_defer_finish ( sc ) ;
if ( error )
return error ;
}
return 0 ;
}
/* Allocate disk space for our new btree. */
int
xrep_newbt_alloc_blocks (
struct xrep_newbt * xnr ,
uint64_t nr_blocks )
{
if ( xnr - > sc - > ip )
return xrep_newbt_alloc_file_blocks ( xnr , nr_blocks ) ;
return xrep_newbt_alloc_ag_blocks ( xnr , nr_blocks ) ;
}
/*
* Free the unused part of a space extent that was reserved for a new ondisk
* structure . Returns the number of EFIs logged or a negative errno .
*/
STATIC int
xrep_newbt_free_extent (
struct xrep_newbt * xnr ,
struct xrep_newbt_resv * resv ,
bool btree_committed )
{
struct xfs_scrub * sc = xnr - > sc ;
xfs_agblock_t free_agbno = resv - > agbno ;
xfs_extlen_t free_aglen = resv - > len ;
xfs_fsblock_t fsbno ;
int error ;
if ( ! btree_committed | | resv - > used = = 0 ) {
/*
* If we ' re not committing a new btree or we didn ' t use the
2023-12-06 18:41:00 -08:00
* space reservation , let the existing EFI free the entire
* space extent .
2023-12-06 18:40:59 -08:00
*/
2023-12-06 18:41:00 -08:00
trace_xrep_newbt_free_blocks ( sc - > mp , resv - > pag - > pag_agno ,
free_agbno , free_aglen , xnr - > oinfo . oi_owner ) ;
xfs_alloc_commit_autoreap ( sc - > tp , & resv - > autoreap ) ;
return 1 ;
2023-12-06 18:40:59 -08:00
}
/*
2023-12-06 18:41:00 -08:00
* We used space and committed the btree . Cancel the autoreap , remove
* the written blocks from the reservation , and possibly log a new EFI
* to free any unused reservation space .
2023-12-06 18:40:59 -08:00
*/
2023-12-06 18:41:00 -08:00
xfs_alloc_cancel_autoreap ( sc - > tp , & resv - > autoreap ) ;
2023-12-06 18:40:59 -08:00
free_agbno + = resv - > used ;
free_aglen - = resv - > used ;
if ( free_aglen = = 0 )
return 0 ;
trace_xrep_newbt_free_blocks ( sc - > mp , resv - > pag - > pag_agno , free_agbno ,
free_aglen , xnr - > oinfo . oi_owner ) ;
ASSERT ( xnr - > resv ! = XFS_AG_RESV_AGFL ) ;
2023-12-15 10:03:32 -08:00
ASSERT ( xnr - > resv ! = XFS_AG_RESV_IGNORE ) ;
2023-12-06 18:40:59 -08:00
/*
* Use EFIs to free the reservations . This reduces the chance
* that we leak blocks if the system goes down .
*/
fsbno = XFS_AGB_TO_FSB ( sc - > mp , resv - > pag - > pag_agno , free_agbno ) ;
error = xfs_free_extent_later ( sc - > tp , fsbno , free_aglen , & xnr - > oinfo ,
xnr - > resv , true ) ;
if ( error )
return error ;
return 1 ;
}
/* Free all the accounting info and disk space we reserved for a new btree. */
STATIC int
xrep_newbt_free (
struct xrep_newbt * xnr ,
bool btree_committed )
{
struct xfs_scrub * sc = xnr - > sc ;
struct xrep_newbt_resv * resv , * n ;
unsigned int freed = 0 ;
int error = 0 ;
/*
* If the filesystem already went down , we can ' t free the blocks . Skip
* ahead to freeing the incore metadata because we can ' t fix anything .
*/
if ( xfs_is_shutdown ( sc - > mp ) )
goto junkit ;
list_for_each_entry_safe ( resv , n , & xnr - > resv_list , list ) {
int ret ;
ret = xrep_newbt_free_extent ( xnr , resv , btree_committed ) ;
list_del ( & resv - > list ) ;
xfs_perag_put ( resv - > pag ) ;
kfree ( resv ) ;
if ( ret < 0 ) {
error = ret ;
goto junkit ;
}
freed + = ret ;
if ( freed > = XREP_MAX_ITRUNCATE_EFIS ) {
error = xrep_defer_finish ( sc ) ;
if ( error )
goto junkit ;
freed = 0 ;
}
}
if ( freed )
error = xrep_defer_finish ( sc ) ;
junkit :
/*
* If we still have reservations attached to @ newbt , cleanup must have
* failed and the filesystem is about to go down . Clean up the incore
2023-12-06 18:41:00 -08:00
* reservations and try to commit to freeing the space we used .
2023-12-06 18:40:59 -08:00
*/
list_for_each_entry_safe ( resv , n , & xnr - > resv_list , list ) {
2023-12-06 18:41:00 -08:00
xfs_alloc_commit_autoreap ( sc - > tp , & resv - > autoreap ) ;
2023-12-06 18:40:59 -08:00
list_del ( & resv - > list ) ;
xfs_perag_put ( resv - > pag ) ;
kfree ( resv ) ;
}
if ( sc - > ip ) {
kmem_cache_free ( xfs_ifork_cache , xnr - > ifake . if_fork ) ;
xnr - > ifake . if_fork = NULL ;
}
return error ;
}
/*
* Free all the accounting info and unused disk space allocations after
* committing a new btree .
*/
int
xrep_newbt_commit (
struct xrep_newbt * xnr )
{
return xrep_newbt_free ( xnr , true ) ;
}
/*
* Free all the accounting info and all of the disk space we reserved for a new
* btree that we ' re not going to commit . We want to try to roll things back
* cleanly for things like ENOSPC midway through allocation .
*/
void
xrep_newbt_cancel (
struct xrep_newbt * xnr )
{
xrep_newbt_free ( xnr , false ) ;
}
/* Feed one of the reserved btree blocks to the bulk loader. */
int
xrep_newbt_claim_block (
struct xfs_btree_cur * cur ,
struct xrep_newbt * xnr ,
union xfs_btree_ptr * ptr )
{
struct xrep_newbt_resv * resv ;
struct xfs_mount * mp = cur - > bc_mp ;
xfs_agblock_t agbno ;
/*
* The first item in the list should always have a free block unless
* we ' re completely out .
*/
resv = list_first_entry ( & xnr - > resv_list , struct xrep_newbt_resv , list ) ;
if ( resv - > used = = resv - > len )
return - ENOSPC ;
/*
* Peel off a block from the start of the reservation . We allocate
* blocks in order to place blocks on disk in increasing record or key
* order . The block reservations tend to end up on the list in
* decreasing order , which hopefully results in leaf blocks ending up
* together .
*/
agbno = resv - > agbno + resv - > used ;
resv - > used + + ;
/* If we used all the blocks in this reservation, move it to the end. */
if ( resv - > used = = resv - > len )
list_move_tail ( & resv - > list , & xnr - > resv_list ) ;
trace_xrep_newbt_claim_block ( mp , resv - > pag - > pag_agno , agbno , 1 ,
xnr - > oinfo . oi_owner ) ;
2024-02-22 12:35:36 -08:00
if ( cur - > bc_ops - > ptr_len = = XFS_BTREE_LONG_PTR_LEN )
2023-12-06 18:40:59 -08:00
ptr - > l = cpu_to_be64 ( XFS_AGB_TO_FSB ( mp , resv - > pag - > pag_agno ,
agbno ) ) ;
else
ptr - > s = cpu_to_be32 ( agbno ) ;
2023-12-06 18:41:00 -08:00
/* Relog all the EFIs. */
return xrep_defer_finish ( xnr - > sc ) ;
2023-12-06 18:40:59 -08:00
}
2023-12-15 10:03:32 -08:00
/* How many reserved blocks are unused? */
unsigned int
xrep_newbt_unused_blocks (
struct xrep_newbt * xnr )
{
struct xrep_newbt_resv * resv ;
unsigned int unused = 0 ;
list_for_each_entry ( resv , & xnr - > resv_list , list )
unused + = resv - > len - resv - > used ;
return unused ;
}