2018-06-06 05:42:14 +03:00
// SPDX-License-Identifier: GPL-2.0
2005-04-17 02:20:36 +04:00
/*
2006-06-09 08:48:12 +04:00
* Copyright ( c ) 2000 - 2006 Silicon Graphics , Inc .
2005-11-02 06:58:39 +03:00
* All Rights Reserved .
2005-04-17 02:20:36 +04:00
*/
# include "xfs.h"
2005-11-02 06:38:42 +03:00
# include "xfs_fs.h"
2013-10-23 03:36:05 +04:00
# include "xfs_shared.h"
2013-10-23 03:50:10 +04:00
# include "xfs_format.h"
# include "xfs_log_format.h"
# include "xfs_trans_resv.h"
2005-11-02 06:38:42 +03:00
# include "xfs_bit.h"
2005-04-17 02:20:36 +04:00
# include "xfs_sb.h"
2013-04-24 12:58:02 +04:00
# include "xfs_mount.h"
2016-08-03 04:15:38 +03:00
# include "xfs_defer.h"
2013-08-12 14:49:37 +04:00
# include "xfs_dir2.h"
2005-04-17 02:20:36 +04:00
# include "xfs_inode.h"
2005-11-02 06:38:42 +03:00
# include "xfs_btree.h"
2013-10-23 03:50:10 +04:00
# include "xfs_trans.h"
2005-04-17 02:20:36 +04:00
# include "xfs_alloc.h"
# include "xfs_bmap.h"
2013-08-12 14:49:42 +04:00
# include "xfs_bmap_util.h"
2013-10-23 03:51:50 +04:00
# include "xfs_bmap_btree.h"
2005-04-17 02:20:36 +04:00
# include "xfs_rtalloc.h"
2017-10-31 22:04:49 +03:00
# include "xfs_errortag.h"
2005-04-17 02:20:36 +04:00
# include "xfs_error.h"
# include "xfs_quota.h"
# include "xfs_trans_space.h"
# include "xfs_buf_item.h"
2009-12-15 02:14:59 +03:00
# include "xfs_trace.h"
2013-10-23 03:51:50 +04:00
# include "xfs_attr_leaf.h"
# include "xfs_filestream.h"
2016-08-03 04:33:42 +03:00
# include "xfs_rmap.h"
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
# include "xfs_ag_resv.h"
2016-10-03 19:11:23 +03:00
# include "xfs_refcount.h"
2016-11-28 06:57:42 +03:00
# include "xfs_icache.h"
2005-04-17 02:20:36 +04:00
kmem_zone_t * xfs_bmap_free_item_zone ;
/*
2013-02-25 05:31:26 +04:00
* Miscellaneous helper functions
2005-04-17 02:20:36 +04:00
*/
/*
2013-02-25 05:31:26 +04:00
* Compute and fill in the value of the maximum depth of a bmap btree
* in this filesystem . Done once , during mount .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
void
xfs_bmap_compute_maxlevels (
xfs_mount_t * mp , /* file system mount structure */
int whichfork ) /* data or attr fork */
{
int level ; /* btree level */
uint maxblocks ; /* max blocks at this level */
uint maxleafents ; /* max leaf entries possible */
int maxrootrecs ; /* max records in root block */
int minleafrecs ; /* min records in leaf block */
int minnoderecs ; /* min records in node block */
int sz ; /* root block size */
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
/*
* The maximum number of extents in a file , hence the maximum
* number of leaf entries , is controlled by the type of di_nextents
* ( a signed 32 - bit number , xfs_extnum_t ) , or by di_anextents
* ( a signed 16 - bit number , xfs_aextnum_t ) .
*
* Note that we can no longer assume that if we are in ATTR1 that
* the fork offset of all the inodes will be
* ( xfs_default_attroffset ( ip ) > > 3 ) because we could have mounted
* with ATTR2 and then mounted back with ATTR1 , keeping the
* di_forkoff ' s fixed but probably at various positions . Therefore ,
* for both ATTR1 and ATTR2 we have to assume the worst case scenario
* of a minimum size available .
*/
if ( whichfork = = XFS_DATA_FORK ) {
maxleafents = MAXEXTNUM ;
sz = XFS_BMDR_SPACE_CALC ( MINDBTPTRS ) ;
} else {
maxleafents = MAXAEXTNUM ;
sz = XFS_BMDR_SPACE_CALC ( MINABTPTRS ) ;
}
2014-04-14 12:58:51 +04:00
maxrootrecs = xfs_bmdr_maxrecs ( sz , 0 ) ;
2013-02-25 05:31:26 +04:00
minleafrecs = mp - > m_bmap_dmnr [ 0 ] ;
minnoderecs = mp - > m_bmap_dmnr [ 1 ] ;
maxblocks = ( maxleafents + minleafrecs - 1 ) / minleafrecs ;
for ( level = 1 ; maxblocks > 1 ; level + + ) {
if ( maxblocks < = maxrootrecs )
maxblocks = 1 ;
else
maxblocks = ( maxblocks + minnoderecs - 1 ) / minnoderecs ;
}
mp - > m_bm_maxlevels [ whichfork ] = level ;
}
2005-11-02 07:10:24 +03:00
2008-10-30 08:56:09 +03:00
STATIC int /* error */
xfs_bmbt_lookup_eq (
struct xfs_btree_cur * cur ,
2017-10-18 00:16:26 +03:00
struct xfs_bmbt_irec * irec ,
2008-10-30 08:56:09 +03:00
int * stat ) /* success/failure */
{
2017-10-18 00:16:26 +03:00
cur - > bc_rec . b = * irec ;
2008-10-30 08:56:09 +03:00
return xfs_btree_lookup ( cur , XFS_LOOKUP_EQ , stat ) ;
}
STATIC int /* error */
2017-10-18 00:16:27 +03:00
xfs_bmbt_lookup_first (
2008-10-30 08:56:09 +03:00
struct xfs_btree_cur * cur ,
int * stat ) /* success/failure */
{
2017-10-18 00:16:27 +03:00
cur - > bc_rec . b . br_startoff = 0 ;
cur - > bc_rec . b . br_startblock = 0 ;
cur - > bc_rec . b . br_blockcount = 0 ;
2008-10-30 08:56:09 +03:00
return xfs_btree_lookup ( cur , XFS_LOOKUP_GE , stat ) ;
}
2008-10-30 08:56:32 +03:00
/*
2011-12-19 00:00:07 +04:00
* Check if the inode needs to be converted to btree format .
*/
static inline bool xfs_bmap_needs_btree ( struct xfs_inode * ip , int whichfork )
{
2016-10-03 19:11:34 +03:00
return whichfork ! = XFS_COW_FORK & &
XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_EXTENTS & &
2011-12-19 00:00:07 +04:00
XFS_IFORK_NEXTENTS ( ip , whichfork ) >
XFS_IFORK_MAXEXT ( ip , whichfork ) ;
}
/*
* Check if the inode should be converted to extent format .
*/
static inline bool xfs_bmap_wants_extents ( struct xfs_inode * ip , int whichfork )
{
2016-10-03 19:11:34 +03:00
return whichfork ! = XFS_COW_FORK & &
XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_BTREE & &
2011-12-19 00:00:07 +04:00
XFS_IFORK_NEXTENTS ( ip , whichfork ) < =
XFS_IFORK_MAXEXT ( ip , whichfork ) ;
}
/*
2017-10-18 00:16:26 +03:00
* Update the record referred to by cur to the value given by irec
2008-10-30 08:56:32 +03:00
* This either works ( return 0 ) or gets an EFSCORRUPTED error .
*/
STATIC int
xfs_bmbt_update (
struct xfs_btree_cur * cur ,
2017-10-18 00:16:26 +03:00
struct xfs_bmbt_irec * irec )
2008-10-30 08:56:32 +03:00
{
union xfs_btree_rec rec ;
2017-10-18 00:16:26 +03:00
xfs_bmbt_disk_set_all ( & rec . bmbt , irec ) ;
2008-10-30 08:56:32 +03:00
return xfs_btree_update ( cur , & rec ) ;
}
2008-10-30 08:56:09 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Compute the worst - case number of indirect blocks that will be used
* for ip ' s delayed extent of length " len " .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
STATIC xfs_filblks_t
xfs_bmap_worst_indlen (
xfs_inode_t * ip , /* incore inode pointer */
xfs_filblks_t len ) /* delayed extent length */
2005-04-17 02:20:36 +04:00
{
2013-02-25 05:31:26 +04:00
int level ; /* btree level number */
int maxrecs ; /* maximum record count at this level */
xfs_mount_t * mp ; /* mount structure */
xfs_filblks_t rval ; /* return value */
2005-04-17 02:20:36 +04:00
mp = ip - > i_mount ;
2013-02-25 05:31:26 +04:00
maxrecs = mp - > m_bmap_dmxr [ 0 ] ;
for ( level = 0 , rval = 0 ;
level < XFS_BM_MAXLEVELS ( mp , XFS_DATA_FORK ) ;
level + + ) {
len + = maxrecs - 1 ;
do_div ( len , maxrecs ) ;
rval + = len ;
2017-09-18 19:41:17 +03:00
if ( len = = 1 )
return rval + XFS_BM_MAXLEVELS ( mp , XFS_DATA_FORK ) -
2013-02-25 05:31:26 +04:00
level - 1 ;
if ( level = = 0 )
maxrecs = mp - > m_bmap_dmxr [ 1 ] ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
return rval ;
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Calculate the default attribute fork offset for newly created inodes .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
uint
xfs_default_attroffset (
struct xfs_inode * ip )
2005-04-17 02:20:36 +04:00
{
2013-02-25 05:31:26 +04:00
struct xfs_mount * mp = ip - > i_mount ;
uint offset ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
if ( mp - > m_sb . sb_inodesize = = 256 ) {
2013-03-12 16:30:36 +04:00
offset = XFS_LITINO ( mp , ip - > i_d . di_version ) -
2013-02-25 05:31:26 +04:00
XFS_BMDR_SPACE_CALC ( MINABTPTRS ) ;
} else {
offset = XFS_BMDR_SPACE_CALC ( 6 * MINABTPTRS ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
2013-03-12 16:30:36 +04:00
ASSERT ( offset < XFS_LITINO ( mp , ip - > i_d . di_version ) ) ;
2013-02-25 05:31:26 +04:00
return offset ;
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Helper routine to reset inode di_forkoff field when switching
* attribute fork from local to extent format - we reset it where
* possible to make space available for inline data fork extents .
2013-02-11 08:58:13 +04:00
*/
STATIC void
2013-02-25 05:31:26 +04:00
xfs_bmap_forkoff_reset (
xfs_inode_t * ip ,
int whichfork )
2013-02-11 08:58:13 +04:00
{
2013-02-25 05:31:26 +04:00
if ( whichfork = = XFS_ATTR_FORK & &
ip - > i_d . di_format ! = XFS_DINODE_FMT_DEV & &
ip - > i_d . di_format ! = XFS_DINODE_FMT_BTREE ) {
uint dfl_forkoff = xfs_default_attroffset ( ip ) > > 3 ;
if ( dfl_forkoff > ip - > i_d . di_forkoff )
ip - > i_d . di_forkoff = dfl_forkoff ;
}
2013-02-11 08:58:13 +04:00
}
2013-02-25 05:31:26 +04:00
# ifdef DEBUG
STATIC struct xfs_buf *
xfs_bmap_get_bp (
struct xfs_btree_cur * cur ,
xfs_fsblock_t bno )
{
2018-05-09 17:49:37 +03:00
struct xfs_log_item * lip ;
2013-02-25 05:31:26 +04:00
int i ;
2009-11-25 03:00:19 +03:00
2013-02-25 05:31:26 +04:00
if ( ! cur )
return NULL ;
for ( i = 0 ; i < XFS_BTREE_MAXLEVELS ; i + + ) {
if ( ! cur - > bc_bufs [ i ] )
break ;
if ( XFS_BUF_ADDR ( cur - > bc_bufs [ i ] ) = = bno )
return cur - > bc_bufs [ i ] ;
2005-04-17 02:20:36 +04:00
}
2009-11-25 03:00:19 +03:00
2013-02-25 05:31:26 +04:00
/* Chase down all the log items to see if the bp is there */
2018-05-09 17:49:37 +03:00
list_for_each_entry ( lip , & cur - > bc_tp - > t_items , li_trans ) {
struct xfs_buf_log_item * bip = ( struct xfs_buf_log_item * ) lip ;
2013-02-25 05:31:26 +04:00
if ( bip - > bli_item . li_type = = XFS_LI_BUF & &
XFS_BUF_ADDR ( bip - > bli_buf ) = = bno )
return bip - > bli_buf ;
}
2009-11-25 03:00:19 +03:00
2013-02-25 05:31:26 +04:00
return NULL ;
}
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
STATIC void
xfs_check_block (
struct xfs_btree_block * block ,
xfs_mount_t * mp ,
int root ,
short sz )
{
int i , j , dmxr ;
__be64 * pp , * thispa ; /* pointer to block address */
xfs_bmbt_key_t * prevp , * keyp ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
ASSERT ( be16_to_cpu ( block - > bb_level ) > 0 ) ;
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
prevp = NULL ;
for ( i = 1 ; i < = xfs_btree_get_numrecs ( block ) ; i + + ) {
dmxr = mp - > m_bmap_dmxr [ 0 ] ;
keyp = XFS_BMBT_KEY_ADDR ( mp , block , i ) ;
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
if ( prevp ) {
ASSERT ( be64_to_cpu ( prevp - > br_startoff ) <
be64_to_cpu ( keyp - > br_startoff ) ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
prevp = keyp ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Compare the block numbers to see if there are dups .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( root )
pp = XFS_BMAP_BROOT_PTR_ADDR ( mp , block , i , sz ) ;
else
pp = XFS_BMBT_PTR_ADDR ( mp , block , i , dmxr ) ;
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
for ( j = i + 1 ; j < = be16_to_cpu ( block - > bb_numrecs ) ; j + + ) {
if ( root )
thispa = XFS_BMAP_BROOT_PTR_ADDR ( mp , block , j , sz ) ;
else
thispa = XFS_BMBT_PTR_ADDR ( mp , block , j , dmxr ) ;
if ( * thispa = = * pp ) {
xfs_warn ( mp , " %s: thispa(%d) == pp(%d) %Ld " ,
__func__ , j , i ,
( unsigned long long ) be64_to_cpu ( * thispa ) ) ;
2018-05-05 01:31:21 +03:00
xfs_err ( mp , " %s: ptrs are equal in node \n " ,
2013-02-25 05:31:26 +04:00
__func__ ) ;
2018-05-05 01:31:21 +03:00
xfs_force_shutdown ( mp , SHUTDOWN_CORRUPT_INCORE ) ;
2013-02-25 05:31:26 +04:00
}
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
}
}
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
/*
* Check that the extents for the inode ip are in the right order in all
2016-01-08 03:28:49 +03:00
* btree leaves . THis becomes prohibitively expensive for large extent count
* files , so don ' t bother with inodes that have more than 10 , 000 extents in
* them . The btree record ordering checks will still be done , so for such large
* bmapbt constructs that is going to catch most corruptions .
2013-02-25 05:31:26 +04:00
*/
STATIC void
xfs_bmap_check_leaf_extents (
xfs_btree_cur_t * cur , /* btree cursor or null */
xfs_inode_t * ip , /* incore inode pointer */
int whichfork ) /* data or attr fork */
{
struct xfs_btree_block * block ; /* current btree block */
xfs_fsblock_t bno ; /* block # of "block" */
xfs_buf_t * bp ; /* buffer for "block" */
int error ; /* error return value */
xfs_extnum_t i = 0 , j ; /* index into the extents list */
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* fork structure */
2013-02-25 05:31:26 +04:00
int level ; /* btree level, for checking */
xfs_mount_t * mp ; /* file system mount structure */
__be64 * pp ; /* pointer to block address */
xfs_bmbt_rec_t * ep ; /* pointer to current extent */
xfs_bmbt_rec_t last = { 0 , 0 } ; /* last extent in prev block */
xfs_bmbt_rec_t * nextp ; /* pointer to next extent */
int bp_release = 0 ;
if ( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) {
return ;
}
2016-01-08 03:28:49 +03:00
/* skip large extent count inodes */
if ( ip - > i_d . di_nextents > 10000 )
return ;
2013-02-25 05:31:26 +04:00
bno = NULLFSBLOCK ;
mp = ip - > i_mount ;
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
block = ifp - > if_broot ;
/*
* Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out .
*/
level = be16_to_cpu ( block - > bb_level ) ;
ASSERT ( level > 0 ) ;
xfs_check_block ( block , mp , 1 , ifp - > if_broot_bytes ) ;
pp = XFS_BMAP_BROOT_PTR_ADDR ( mp , block , 1 , ifp - > if_broot_bytes ) ;
bno = be64_to_cpu ( * pp ) ;
2014-07-30 03:12:05 +04:00
ASSERT ( bno ! = NULLFSBLOCK ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( XFS_FSB_TO_AGNO ( mp , bno ) < mp - > m_sb . sb_agcount ) ;
ASSERT ( XFS_FSB_TO_AGBNO ( mp , bno ) < mp - > m_sb . sb_agblocks ) ;
/*
* Go down the tree until leaf level is reached , following the first
* pointer ( leftmost ) at each level .
*/
while ( level - - > 0 ) {
/* See if buf is in cur first */
bp_release = 0 ;
bp = xfs_bmap_get_bp ( cur , XFS_FSB_TO_DADDR ( mp , bno ) ) ;
if ( ! bp ) {
bp_release = 1 ;
2019-06-12 19:00:00 +03:00
error = xfs_btree_read_bufl ( mp , NULL , bno , & bp ,
2013-02-25 05:31:26 +04:00
XFS_BMAP_BTREE_REF ,
& xfs_bmbt_buf_ops ) ;
2011-09-19 00:41:04 +04:00
if ( error )
2013-02-25 05:31:26 +04:00
goto error_norelse ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
block = XFS_BUF_TO_BLOCK ( bp ) ;
if ( level = = 0 )
break ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check this block for basic sanity ( increasing keys and
* no duplicate blocks ) .
2005-04-17 02:20:36 +04:00
*/
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
xfs_check_block ( block , mp , 0 , 0 ) ;
pp = XFS_BMBT_PTR_ADDR ( mp , block , 1 , mp - > m_bmap_dmxr [ 1 ] ) ;
bno = be64_to_cpu ( * pp ) ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp ,
2018-01-08 21:51:00 +03:00
xfs_verify_fsbno ( mp , bno ) , error0 ) ;
2013-02-25 05:31:26 +04:00
if ( bp_release ) {
bp_release = 0 ;
xfs_trans_brelse ( NULL , bp ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
}
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
/*
* Here with bp and block set to the leftmost leaf node in the tree .
*/
i = 0 ;
/*
* Loop over all leaf nodes checking that all extents are in the right order .
*/
for ( ; ; ) {
xfs_fsblock_t nextbno ;
xfs_extnum_t num_recs ;
num_recs = xfs_btree_get_numrecs ( block ) ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Read - ahead the next leaf block , if any .
2005-04-17 02:20:36 +04:00
*/
2011-12-19 00:00:07 +04:00
2013-02-25 05:31:26 +04:00
nextbno = be64_to_cpu ( block - > bb_u . l . bb_rightsib ) ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check all the extents to make sure they are OK .
* If we had a previous block , the last entry should
* conform with the first entry in this one .
2005-04-17 02:20:36 +04:00
*/
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
ep = XFS_BMBT_REC_ADDR ( mp , block , 1 ) ;
if ( i ) {
ASSERT ( xfs_bmbt_disk_get_startoff ( & last ) +
xfs_bmbt_disk_get_blockcount ( & last ) < =
xfs_bmbt_disk_get_startoff ( ep ) ) ;
}
for ( j = 1 ; j < num_recs ; j + + ) {
nextp = XFS_BMBT_REC_ADDR ( mp , block , j + 1 ) ;
ASSERT ( xfs_bmbt_disk_get_startoff ( ep ) +
xfs_bmbt_disk_get_blockcount ( ep ) < =
xfs_bmbt_disk_get_startoff ( nextp ) ) ;
ep = nextp ;
}
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
last = * ep ;
i + = num_recs ;
if ( bp_release ) {
bp_release = 0 ;
xfs_trans_brelse ( NULL , bp ) ;
}
bno = nextbno ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* If we ' ve reached the end , stop .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( bno = = NULLFSBLOCK )
break ;
2011-12-19 00:00:07 +04:00
2013-02-25 05:31:26 +04:00
bp_release = 0 ;
bp = xfs_bmap_get_bp ( cur , XFS_FSB_TO_DADDR ( mp , bno ) ) ;
if ( ! bp ) {
bp_release = 1 ;
2019-06-12 19:00:00 +03:00
error = xfs_btree_read_bufl ( mp , NULL , bno , & bp ,
2013-02-25 05:31:26 +04:00
XFS_BMAP_BTREE_REF ,
& xfs_bmbt_buf_ops ) ;
2011-09-19 00:40:42 +04:00
if ( error )
2013-02-25 05:31:26 +04:00
goto error_norelse ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
block = XFS_BUF_TO_BLOCK ( bp ) ;
2011-09-19 00:40:54 +04:00
}
2016-03-09 00:17:56 +03:00
2013-02-25 05:31:26 +04:00
return ;
2011-09-19 00:40:54 +04:00
2013-02-25 05:31:26 +04:00
error0 :
xfs_warn ( mp , " %s: at error0 " , __func__ ) ;
if ( bp_release )
xfs_trans_brelse ( NULL , bp ) ;
error_norelse :
xfs_warn ( mp , " %s: BAD after btree leaves for %d extents " ,
__func__ , i ) ;
2018-05-05 01:31:21 +03:00
xfs_err ( mp , " %s: CORRUPTED BTREE OR SOMETHING " , __func__ ) ;
xfs_force_shutdown ( mp , SHUTDOWN_CORRUPT_INCORE ) ;
2013-02-25 05:31:26 +04:00
return ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
/*
* Validate that the bmbt_irecs being returned from bmapi are valid
2013-08-12 07:14:53 +04:00
* given the caller ' s original parameters . Specifically check the
* ranges of the returned irecs to ensure that they only extend beyond
2013-02-25 05:31:26 +04:00
* the given parameters if the XFS_BMAPI_ENTIRE flag was set .
*/
STATIC void
xfs_bmap_validate_ret (
xfs_fileoff_t bno ,
xfs_filblks_t len ,
int flags ,
xfs_bmbt_irec_t * mval ,
int nmap ,
int ret_nmap )
{
int i ; /* index to map values */
2011-09-19 00:40:54 +04:00
2013-02-25 05:31:26 +04:00
ASSERT ( ret_nmap < = nmap ) ;
2011-09-19 00:40:54 +04:00
2013-02-25 05:31:26 +04:00
for ( i = 0 ; i < ret_nmap ; i + + ) {
ASSERT ( mval [ i ] . br_blockcount > 0 ) ;
if ( ! ( flags & XFS_BMAPI_ENTIRE ) ) {
ASSERT ( mval [ i ] . br_startoff > = bno ) ;
ASSERT ( mval [ i ] . br_blockcount < = len ) ;
ASSERT ( mval [ i ] . br_startoff + mval [ i ] . br_blockcount < =
bno + len ) ;
} else {
ASSERT ( mval [ i ] . br_startoff < bno + len ) ;
ASSERT ( mval [ i ] . br_startoff + mval [ i ] . br_blockcount >
bno ) ;
}
ASSERT ( i = = 0 | |
mval [ i - 1 ] . br_startoff + mval [ i - 1 ] . br_blockcount = =
mval [ i ] . br_startoff ) ;
ASSERT ( mval [ i ] . br_startblock ! = DELAYSTARTBLOCK & &
mval [ i ] . br_startblock ! = HOLESTARTBLOCK ) ;
ASSERT ( mval [ i ] . br_state = = XFS_EXT_NORM | |
mval [ i ] . br_state = = XFS_EXT_UNWRITTEN ) ;
}
}
2009-11-25 03:00:19 +03:00
2013-02-25 05:31:26 +04:00
# else
# define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
2017-09-01 01:11:06 +03:00
# define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0)
2013-02-25 05:31:26 +04:00
# endif /* DEBUG */
2009-11-25 03:00:19 +03:00
2013-02-25 05:31:26 +04:00
/*
* bmap free list manipulation functions
*/
2009-11-25 03:00:19 +03:00
2013-02-25 05:31:26 +04:00
/*
* Add the extent to the list of extents to be free at transaction end .
* The list is maintained sorted ( by block number ) .
*/
void
2018-05-09 18:45:04 +03:00
__xfs_bmap_add_free (
2018-08-01 17:20:34 +03:00
struct xfs_trans * tp ,
2016-08-03 04:33:42 +03:00
xfs_fsblock_t bno ,
xfs_filblks_t len ,
2018-12-12 19:46:23 +03:00
const struct xfs_owner_info * oinfo ,
2018-05-09 18:45:04 +03:00
bool skip_discard )
2013-02-25 05:31:26 +04:00
{
2016-08-03 04:18:10 +03:00
struct xfs_extent_free_item * new ; /* new element */
2013-02-25 05:31:26 +04:00
# ifdef DEBUG
2018-08-01 17:20:34 +03:00
struct xfs_mount * mp = tp - > t_mountp ;
xfs_agnumber_t agno ;
xfs_agblock_t agbno ;
2013-02-25 05:31:26 +04:00
ASSERT ( bno ! = NULLFSBLOCK ) ;
ASSERT ( len > 0 ) ;
ASSERT ( len < = MAXEXTLEN ) ;
ASSERT ( ! isnullstartblock ( bno ) ) ;
agno = XFS_FSB_TO_AGNO ( mp , bno ) ;
agbno = XFS_FSB_TO_AGBNO ( mp , bno ) ;
ASSERT ( agno < mp - > m_sb . sb_agcount ) ;
ASSERT ( agbno < mp - > m_sb . sb_agblocks ) ;
ASSERT ( len < mp - > m_sb . sb_agblocks ) ;
ASSERT ( agbno + len < = mp - > m_sb . sb_agblocks ) ;
# endif
ASSERT ( xfs_bmap_free_item_zone ! = NULL ) ;
2016-08-03 04:33:42 +03:00
2013-02-25 05:31:26 +04:00
new = kmem_zone_alloc ( xfs_bmap_free_item_zone , KM_SLEEP ) ;
2016-08-03 04:18:10 +03:00
new - > xefi_startblock = bno ;
new - > xefi_blockcount = ( xfs_extlen_t ) len ;
2016-08-03 04:33:42 +03:00
if ( oinfo )
new - > xefi_oinfo = * oinfo ;
else
2018-12-12 19:46:23 +03:00
new - > xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE ;
2018-05-09 18:45:04 +03:00
new - > xefi_skip_discard = skip_discard ;
2018-08-01 17:20:34 +03:00
trace_xfs_bmap_free_defer ( tp - > t_mountp ,
XFS_FSB_TO_AGNO ( tp - > t_mountp , bno ) , 0 ,
XFS_FSB_TO_AGBNO ( tp - > t_mountp , bno ) , len ) ;
xfs_defer_add ( tp , XFS_DEFER_OPS_TYPE_FREE , & new - > xefi_list ) ;
2013-02-25 05:31:26 +04:00
}
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
/*
* Inode fork format manipulation functions
*/
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
/*
2019-02-15 19:02:47 +03:00
* Convert the inode format to extent format if it currently is in btree format ,
* but the extent list is small enough that it fits into the extent format .
*
* Since the extents are already in - core , all we have to do is give up the space
* for the btree root and pitch the leaf block .
2013-02-25 05:31:26 +04:00
*/
STATIC int /* error */
xfs_bmap_btree_to_extents (
2019-02-15 19:02:47 +03:00
struct xfs_trans * tp , /* transaction pointer */
struct xfs_inode * ip , /* incore inode pointer */
struct xfs_btree_cur * cur , /* btree cursor */
2013-02-25 05:31:26 +04:00
int * logflagsp , /* inode logging flags */
int whichfork ) /* data or attr fork */
{
2019-02-15 19:02:47 +03:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_btree_block * rblock = ifp - > if_broot ;
2013-02-25 05:31:26 +04:00
struct xfs_btree_block * cblock ; /* child btree block */
xfs_fsblock_t cbno ; /* child block number */
xfs_buf_t * cbp ; /* child block's buffer */
int error ; /* error return value */
__be64 * pp ; /* ptr to block address */
2016-08-03 04:33:42 +03:00
struct xfs_owner_info oinfo ;
2005-04-17 02:20:36 +04:00
2019-02-15 19:02:47 +03:00
/* check if we actually need the extent format first: */
if ( ! xfs_bmap_wants_extents ( ip , whichfork ) )
return 0 ;
ASSERT ( cur ) ;
2016-10-03 19:11:34 +03:00
ASSERT ( whichfork ! = XFS_COW_FORK ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( ifp - > if_flags & XFS_IFEXTENTS ) ;
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_BTREE ) ;
ASSERT ( be16_to_cpu ( rblock - > bb_level ) = = 1 ) ;
ASSERT ( be16_to_cpu ( rblock - > bb_numrecs ) = = 1 ) ;
ASSERT ( xfs_bmbt_maxrecs ( mp , ifp - > if_broot_bytes , 0 ) = = 1 ) ;
2019-02-15 19:02:47 +03:00
2013-02-25 05:31:26 +04:00
pp = XFS_BMAP_BROOT_PTR_ADDR ( mp , rblock , 1 , ifp - > if_broot_bytes ) ;
cbno = be64_to_cpu ( * pp ) ;
# ifdef DEBUG
2017-10-18 07:37:33 +03:00
XFS_WANT_CORRUPTED_RETURN ( cur - > bc_mp ,
xfs_btree_check_lptr ( cur , cbno , 1 ) ) ;
2013-02-25 05:31:26 +04:00
# endif
2019-06-12 19:00:00 +03:00
error = xfs_btree_read_bufl ( mp , tp , cbno , & cbp , XFS_BMAP_BTREE_REF ,
2013-02-25 05:31:26 +04:00
& xfs_bmbt_buf_ops ) ;
if ( error )
return error ;
cblock = XFS_BUF_TO_BLOCK ( cbp ) ;
if ( ( error = xfs_btree_check_block ( cur , cblock , 0 , cbp ) ) )
return error ;
2016-08-03 04:33:42 +03:00
xfs_rmap_ino_bmbt_owner ( & oinfo , ip - > i_ino , whichfork ) ;
2018-08-01 17:20:34 +03:00
xfs_bmap_add_free ( cur - > bc_tp , cbno , 1 , & oinfo ) ;
2013-02-25 05:31:26 +04:00
ip - > i_d . di_nblocks - - ;
xfs_trans_mod_dquot_byino ( tp , ip , XFS_TRANS_DQ_BCOUNT , - 1L ) ;
xfs_trans_binval ( tp , cbp ) ;
if ( cur - > bc_bufs [ 0 ] = = cbp )
cur - > bc_bufs [ 0 ] = NULL ;
xfs_iroot_realloc ( ip , - 1 , whichfork ) ;
ASSERT ( ifp - > if_broot = = NULL ) ;
ASSERT ( ( ifp - > if_flags & XFS_IFBROOT ) = = 0 ) ;
XFS_IFORK_FMT_SET ( ip , whichfork , XFS_DINODE_FMT_EXTENTS ) ;
2019-02-15 19:02:47 +03:00
* logflagsp | = XFS_ILOG_CORE | xfs_ilog_fext ( whichfork ) ;
2013-02-25 05:31:26 +04:00
return 0 ;
}
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
/*
* Convert an extents - format file into a btree - format file .
* The new file will have a root block ( in the inode ) and a single child block .
*/
STATIC int /* error */
xfs_bmap_extents_to_btree (
2018-07-12 08:26:16 +03:00
struct xfs_trans * tp , /* transaction pointer */
struct xfs_inode * ip , /* incore inode pointer */
struct xfs_btree_cur * * curp , /* cursor returned to caller */
2013-02-25 05:31:26 +04:00
int wasdel , /* converting a delayed alloc */
int * logflagsp , /* inode logging flags */
int whichfork ) /* data or attr fork */
{
struct xfs_btree_block * ablock ; /* allocated (child) bt block */
2018-07-12 08:26:16 +03:00
struct xfs_buf * abp ; /* buffer for ablock */
struct xfs_alloc_arg args ; /* allocation arguments */
struct xfs_bmbt_rec * arp ; /* child record pointer */
2013-02-25 05:31:26 +04:00
struct xfs_btree_block * block ; /* btree root block */
2018-07-12 08:26:16 +03:00
struct xfs_btree_cur * cur ; /* bmap btree cursor */
2013-02-25 05:31:26 +04:00
int error ; /* error return value */
2018-07-12 08:26:16 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
struct xfs_bmbt_key * kp ; /* root block key pointer */
struct xfs_mount * mp ; /* mount structure */
2013-02-25 05:31:26 +04:00
xfs_bmbt_ptr_t * pp ; /* root block address pointer */
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2017-11-03 20:34:43 +03:00
struct xfs_bmbt_irec rec ;
2017-11-03 20:34:43 +03:00
xfs_extnum_t cnt = 0 ;
2005-04-17 02:20:36 +04:00
2013-04-21 23:53:46 +04:00
mp = ip - > i_mount ;
2016-10-03 19:11:34 +03:00
ASSERT ( whichfork ! = XFS_COW_FORK ) ;
2013-02-25 05:31:26 +04:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_EXTENTS ) ;
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
/*
2018-10-01 01:11:07 +03:00
* Make space in the inode incore . This needs to be undone if we fail
* to expand the root .
2013-02-25 05:31:26 +04:00
*/
xfs_iroot_realloc ( ip , 1 , whichfork ) ;
ifp - > if_flags | = XFS_IFBROOT ;
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
/*
* Fill in the root .
*/
block = ifp - > if_broot ;
2017-01-28 10:16:39 +03:00
xfs_btree_init_block_int ( mp , block , XFS_BUF_DADDR_NULL ,
XFS_BTNUM_BMAP , 1 , 1 , ip - > i_ino ,
2017-01-28 10:16:37 +03:00
XFS_BTREE_LONG_PTRS ) ;
2013-02-25 05:31:26 +04:00
/*
* Need a cursor . Can ' t allocate until bb_level is filled in .
*/
cur = xfs_bmbt_init_cursor ( mp , tp , ip , whichfork ) ;
cur - > bc_private . b . flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0 ;
/*
* Convert to a btree with two levels , one record in root .
*/
XFS_IFORK_FMT_SET ( ip , whichfork , XFS_DINODE_FMT_BTREE ) ;
memset ( & args , 0 , sizeof ( args ) ) ;
args . tp = tp ;
args . mp = mp ;
2016-08-03 04:33:42 +03:00
xfs_rmap_ino_bmbt_owner ( & args . oinfo , ip - > i_ino , whichfork ) ;
2018-07-12 08:26:29 +03:00
if ( tp - > t_firstblock = = NULLFSBLOCK ) {
2013-02-25 05:31:26 +04:00
args . type = XFS_ALLOCTYPE_START_BNO ;
args . fsbno = XFS_INO_TO_FSB ( mp , ip - > i_ino ) ;
2018-08-01 17:20:31 +03:00
} else if ( tp - > t_flags & XFS_TRANS_LOWMODE ) {
2013-02-25 05:31:26 +04:00
args . type = XFS_ALLOCTYPE_START_BNO ;
2018-07-12 08:26:29 +03:00
args . fsbno = tp - > t_firstblock ;
2013-02-25 05:31:26 +04:00
} else {
args . type = XFS_ALLOCTYPE_NEAR_BNO ;
2018-07-12 08:26:29 +03:00
args . fsbno = tp - > t_firstblock ;
2013-02-25 05:31:26 +04:00
}
args . minlen = args . maxlen = args . prod = 1 ;
args . wasdel = wasdel ;
* logflagsp = 0 ;
2018-10-01 01:11:07 +03:00
error = xfs_alloc_vextent ( & args ) ;
if ( error )
goto out_root_realloc ;
2016-10-03 19:11:45 +03:00
2017-03-08 21:38:53 +03:00
if ( WARN_ON_ONCE ( args . fsbno = = NULLFSBLOCK ) ) {
2018-08-11 03:55:55 +03:00
error = - ENOSPC ;
2018-10-01 01:11:07 +03:00
goto out_root_realloc ;
2017-03-08 21:38:53 +03:00
}
2018-10-01 01:11:07 +03:00
2013-02-25 05:31:26 +04:00
/*
* Allocation can ' t fail , the space was reserved .
*/
2018-07-12 08:26:29 +03:00
ASSERT ( tp - > t_firstblock = = NULLFSBLOCK | |
args . agno > = XFS_FSB_TO_AGNO ( mp , tp - > t_firstblock ) ) ;
2018-07-12 08:26:29 +03:00
tp - > t_firstblock = args . fsbno ;
2013-02-25 05:31:26 +04:00
cur - > bc_private . b . allocated + + ;
ip - > i_d . di_nblocks + + ;
xfs_trans_mod_dquot_byino ( tp , ip , XFS_TRANS_DQ_BCOUNT , 1L ) ;
2019-06-12 19:00:00 +03:00
abp = xfs_btree_get_bufl ( mp , tp , args . fsbno ) ;
2018-08-11 03:55:55 +03:00
if ( ! abp ) {
2018-10-01 01:11:07 +03:00
error = - EFSCORRUPTED ;
goto out_unreserve_dquot ;
2018-08-11 03:55:55 +03:00
}
2018-10-01 01:11:07 +03:00
2013-02-25 05:31:26 +04:00
/*
* Fill in the child block .
*/
abp - > b_ops = & xfs_bmbt_buf_ops ;
ablock = XFS_BUF_TO_BLOCK ( abp ) ;
2017-01-28 10:16:39 +03:00
xfs_btree_init_block_int ( mp , ablock , abp - > b_bn ,
XFS_BTNUM_BMAP , 0 , 0 , ip - > i_ino ,
2013-04-21 23:53:46 +04:00
XFS_BTREE_LONG_PTRS ) ;
2017-11-03 20:34:43 +03:00
for_each_xfs_iext ( ifp , & icur , & rec ) {
2017-11-03 20:34:43 +03:00
if ( isnullstartblock ( rec . br_startblock ) )
continue ;
arp = XFS_BMBT_REC_ADDR ( mp , ablock , 1 + cnt ) ;
xfs_bmbt_disk_set_all ( arp , & rec ) ;
cnt + + ;
2013-02-25 05:31:26 +04:00
}
ASSERT ( cnt = = XFS_IFORK_NEXTENTS ( ip , whichfork ) ) ;
xfs_btree_set_numrecs ( ablock , cnt ) ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
/*
* Fill in the root key and pointer .
*/
kp = XFS_BMBT_KEY_ADDR ( mp , block , 1 ) ;
arp = XFS_BMBT_REC_ADDR ( mp , ablock , 1 ) ;
kp - > br_startoff = cpu_to_be64 ( xfs_bmbt_disk_get_startoff ( arp ) ) ;
pp = XFS_BMBT_PTR_ADDR ( mp , block , 1 , xfs_bmbt_get_maxrecs ( cur ,
be16_to_cpu ( block - > bb_level ) ) ) ;
* pp = cpu_to_be64 ( args . fsbno ) ;
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
/*
* Do all this logging at the end so that
* the root is at the right level .
*/
xfs_btree_log_block ( cur , abp , XFS_BB_ALL_BITS ) ;
xfs_btree_log_recs ( cur , abp , 1 , be16_to_cpu ( ablock - > bb_numrecs ) ) ;
ASSERT ( * curp = = NULL ) ;
* curp = cur ;
* logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot ( whichfork ) ;
return 0 ;
2018-08-11 03:55:55 +03:00
2018-10-01 01:11:07 +03:00
out_unreserve_dquot :
2018-08-11 03:55:55 +03:00
xfs_trans_mod_dquot_byino ( tp , ip , XFS_TRANS_DQ_BCOUNT , - 1L ) ;
2018-10-01 01:11:07 +03:00
out_root_realloc :
2018-08-11 03:55:55 +03:00
xfs_iroot_realloc ( ip , - 1 , whichfork ) ;
XFS_IFORK_FMT_SET ( ip , whichfork , XFS_DINODE_FMT_EXTENTS ) ;
2018-10-01 01:11:07 +03:00
ASSERT ( ifp - > if_broot = = NULL ) ;
2018-08-11 03:55:55 +03:00
xfs_btree_del_cursor ( cur , XFS_BTREE_ERROR ) ;
return error ;
2013-02-25 05:31:26 +04:00
}
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
/*
* Convert a local file to an extents file .
* This code is out of bounds for data forks of regular files ,
* since the file data needs to get logged so things will stay consistent .
* ( The bmap - level manipulations are ok , though ) .
*/
2013-07-10 01:04:00 +04:00
void
xfs_bmap_local_to_extents_empty (
struct xfs_inode * ip ,
int whichfork )
{
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2016-10-03 19:11:34 +03:00
ASSERT ( whichfork ! = XFS_COW_FORK ) ;
2013-07-10 01:04:00 +04:00
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_LOCAL ) ;
ASSERT ( ifp - > if_bytes = = 0 ) ;
ASSERT ( XFS_IFORK_NEXTENTS ( ip , whichfork ) = = 0 ) ;
2014-04-14 12:59:26 +04:00
xfs_bmap_forkoff_reset ( ip , whichfork ) ;
2013-07-10 01:04:00 +04:00
ifp - > if_flags & = ~ XFS_IFINLINE ;
ifp - > if_flags | = XFS_IFEXTENTS ;
2017-11-03 20:34:46 +03:00
ifp - > if_u1 . if_root = NULL ;
ifp - > if_height = 0 ;
2013-07-10 01:04:00 +04:00
XFS_IFORK_FMT_SET ( ip , whichfork , XFS_DINODE_FMT_EXTENTS ) ;
}
2013-02-25 05:31:26 +04:00
STATIC int /* error */
xfs_bmap_local_to_extents (
xfs_trans_t * tp , /* transaction pointer */
xfs_inode_t * ip , /* incore inode pointer */
xfs_extlen_t total , /* total blocks needed by transaction */
int * logflagsp , /* inode logging flags */
int whichfork ,
2013-04-21 23:53:46 +04:00
void ( * init_fn ) ( struct xfs_trans * tp ,
struct xfs_buf * bp ,
2013-02-25 05:31:26 +04:00
struct xfs_inode * ip ,
struct xfs_ifork * ifp ) )
{
2013-07-10 01:04:00 +04:00
int error = 0 ;
2013-02-25 05:31:26 +04:00
int flags ; /* logging flags returned */
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
2013-07-10 01:04:00 +04:00
xfs_alloc_arg_t args ; /* allocation arguments */
xfs_buf_t * bp ; /* buffer for extent block */
2017-08-30 01:44:11 +03:00
struct xfs_bmbt_irec rec ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
/*
* We don ' t want to deal with the case of keeping inode data inline yet .
* So sending the data fork of a regular inode is invalid .
*/
2016-02-09 08:54:58 +03:00
ASSERT ( ! ( S_ISREG ( VFS_I ( ip ) - > i_mode ) & & whichfork = = XFS_DATA_FORK ) ) ;
2013-02-25 05:31:26 +04:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_LOCAL ) ;
2013-07-10 01:04:00 +04:00
if ( ! ifp - > if_bytes ) {
xfs_bmap_local_to_extents_empty ( ip , whichfork ) ;
flags = XFS_ILOG_CORE ;
goto done ;
}
2013-02-25 05:31:26 +04:00
flags = 0 ;
error = 0 ;
2017-11-03 20:34:46 +03:00
ASSERT ( ( ifp - > if_flags & ( XFS_IFINLINE | XFS_IFEXTENTS ) ) = = XFS_IFINLINE ) ;
2013-07-10 01:04:00 +04:00
memset ( & args , 0 , sizeof ( args ) ) ;
args . tp = tp ;
args . mp = ip - > i_mount ;
2016-08-03 04:33:42 +03:00
xfs_rmap_ino_owner ( & args . oinfo , ip - > i_ino , whichfork , 0 ) ;
2013-07-10 01:04:00 +04:00
/*
* Allocate a block . We know we need only one , since the
* file currently fits in an inode .
*/
2018-07-12 08:26:29 +03:00
if ( tp - > t_firstblock = = NULLFSBLOCK ) {
2013-07-10 01:04:00 +04:00
args . fsbno = XFS_INO_TO_FSB ( args . mp , ip - > i_ino ) ;
args . type = XFS_ALLOCTYPE_START_BNO ;
2013-02-25 05:31:26 +04:00
} else {
2018-07-12 08:26:29 +03:00
args . fsbno = tp - > t_firstblock ;
2013-07-10 01:04:00 +04:00
args . type = XFS_ALLOCTYPE_NEAR_BNO ;
2013-02-25 05:31:26 +04:00
}
2013-07-10 01:04:00 +04:00
args . total = total ;
args . minlen = args . maxlen = args . prod = 1 ;
error = xfs_alloc_vextent ( & args ) ;
if ( error )
goto done ;
/* Can't fail, the space was reserved. */
ASSERT ( args . fsbno ! = NULLFSBLOCK ) ;
ASSERT ( args . len = = 1 ) ;
2018-07-12 08:26:29 +03:00
tp - > t_firstblock = args . fsbno ;
2019-06-12 19:00:00 +03:00
bp = xfs_btree_get_bufl ( args . mp , tp , args . fsbno ) ;
2013-07-10 01:04:00 +04:00
2015-01-22 01:30:06 +03:00
/*
2015-10-12 07:40:24 +03:00
* Initialize the block , copy the data and log the remote buffer .
2015-01-22 01:30:06 +03:00
*
2015-10-12 07:40:24 +03:00
* The callout is responsible for logging because the remote format
* might differ from the local format and thus we don ' t know how much to
* log here . Note that init_fn must also set the buffer log item type
* correctly .
2015-01-22 01:30:06 +03:00
*/
2013-07-10 01:04:00 +04:00
init_fn ( tp , bp , ip , ifp ) ;
2015-10-12 07:40:24 +03:00
/* account for the change in fork size */
2013-07-10 01:04:00 +04:00
xfs_idata_realloc ( ip , - ifp - > if_bytes , whichfork ) ;
xfs_bmap_local_to_extents_empty ( ip , whichfork ) ;
2013-02-25 05:31:26 +04:00
flags | = XFS_ILOG_CORE ;
2013-07-10 01:04:00 +04:00
2017-11-03 20:34:46 +03:00
ifp - > if_u1 . if_root = NULL ;
ifp - > if_height = 0 ;
2017-08-30 01:44:11 +03:00
rec . br_startoff = 0 ;
rec . br_startblock = args . fsbno ;
rec . br_blockcount = 1 ;
rec . br_state = XFS_EXT_NORM ;
2017-11-03 20:34:43 +03:00
xfs_iext_first ( ifp , & icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , & icur , & rec , 0 ) ;
2017-08-30 01:44:11 +03:00
2013-07-10 01:04:00 +04:00
XFS_IFORK_NEXT_SET ( ip , whichfork , 1 ) ;
ip - > i_d . di_nblocks = 1 ;
xfs_trans_mod_dquot_byino ( tp , ip ,
XFS_TRANS_DQ_BCOUNT , 1L ) ;
flags | = xfs_ilog_fext ( whichfork ) ;
2013-02-25 05:31:26 +04:00
done :
* logflagsp = flags ;
return error ;
}
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
/*
* Called from xfs_bmap_add_attrfork to handle btree format files .
*/
STATIC int /* error */
xfs_bmap_add_attrfork_btree (
xfs_trans_t * tp , /* transaction pointer */
xfs_inode_t * ip , /* incore inode pointer */
int * flags ) /* inode logging flags */
{
xfs_btree_cur_t * cur ; /* btree cursor */
int error ; /* error return value */
xfs_mount_t * mp ; /* file system mount struct */
int stat ; /* newroot status */
2011-05-23 12:52:53 +04:00
2013-02-25 05:31:26 +04:00
mp = ip - > i_mount ;
if ( ip - > i_df . if_broot_bytes < = XFS_IFORK_DSIZE ( ip ) )
* flags | = XFS_ILOG_DBROOT ;
else {
cur = xfs_bmbt_init_cursor ( mp , tp , ip , XFS_DATA_FORK ) ;
2017-10-18 00:16:27 +03:00
error = xfs_bmbt_lookup_first ( cur , & stat ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto error0 ;
/* must be at least one entry */
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , stat = = 1 , error0 ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_new_iroot ( cur , flags , & stat ) ) )
goto error0 ;
if ( stat = = 0 ) {
xfs_btree_del_cursor ( cur , XFS_BTREE_NOERROR ) ;
2014-06-25 08:58:08 +04:00
return - ENOSPC ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
cur - > bc_private . b . allocated = 0 ;
xfs_btree_del_cursor ( cur , XFS_BTREE_NOERROR ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
return 0 ;
error0 :
xfs_btree_del_cursor ( cur , XFS_BTREE_ERROR ) ;
return error ;
}
2011-09-19 00:40:54 +04:00
2013-02-25 05:31:26 +04:00
/*
* Called from xfs_bmap_add_attrfork to handle extents format files .
*/
STATIC int /* error */
xfs_bmap_add_attrfork_extents (
2018-07-12 08:26:16 +03:00
struct xfs_trans * tp , /* transaction pointer */
struct xfs_inode * ip , /* incore inode pointer */
2013-02-25 05:31:26 +04:00
int * flags ) /* inode logging flags */
{
xfs_btree_cur_t * cur ; /* bmap btree cursor */
int error ; /* error return value */
2011-09-19 00:40:54 +04:00
2013-02-25 05:31:26 +04:00
if ( ip - > i_d . di_nextents * sizeof ( xfs_bmbt_rec_t ) < = XFS_IFORK_DSIZE ( ip ) )
return 0 ;
cur = NULL ;
2018-07-12 08:26:29 +03:00
error = xfs_bmap_extents_to_btree ( tp , ip , & cur , 0 , flags ,
XFS_DATA_FORK ) ;
2011-09-19 00:40:54 +04:00
if ( cur ) {
cur - > bc_private . b . allocated = 0 ;
2018-07-19 22:26:31 +03:00
xfs_btree_del_cursor ( cur , error ) ;
2011-09-19 00:40:54 +04:00
}
2005-04-17 02:20:36 +04:00
return error ;
}
2013-02-25 05:31:26 +04:00
/*
* Called from xfs_bmap_add_attrfork to handle local format files . Each
* different data fork content type needs a different callout to do the
* conversion . Some are basic and only require special block initialisation
* callouts for the data formating , others ( directories ) are so specialised they
* handle everything themselves .
*
* XXX ( dgc ) : investigate whether directory conversion can use the generic
* formatting callout . It should be possible - it ' s just a very complex
2013-04-21 23:53:46 +04:00
* formatter .
2013-02-25 05:31:26 +04:00
*/
STATIC int /* error */
xfs_bmap_add_attrfork_local (
2018-07-12 08:26:21 +03:00
struct xfs_trans * tp , /* transaction pointer */
struct xfs_inode * ip , /* incore inode pointer */
2013-02-25 05:31:26 +04:00
int * flags ) /* inode logging flags */
{
2018-07-12 08:26:21 +03:00
struct xfs_da_args dargs ; /* args for dir/attr code */
2009-11-25 03:00:19 +03:00
2013-02-25 05:31:26 +04:00
if ( ip - > i_df . if_bytes < = XFS_IFORK_DSIZE ( ip ) )
return 0 ;
2009-11-25 03:00:19 +03:00
2016-02-09 08:54:58 +03:00
if ( S_ISDIR ( VFS_I ( ip ) - > i_mode ) ) {
2013-02-25 05:31:26 +04:00
memset ( & dargs , 0 , sizeof ( dargs ) ) ;
2014-06-06 09:14:11 +04:00
dargs . geo = ip - > i_mount - > m_dir_geo ;
2013-02-25 05:31:26 +04:00
dargs . dp = ip ;
2014-06-06 09:14:11 +04:00
dargs . total = dargs . geo - > fsbcount ;
2013-02-25 05:31:26 +04:00
dargs . whichfork = XFS_DATA_FORK ;
dargs . trans = tp ;
return xfs_dir2_sf_to_block ( & dargs ) ;
2005-04-17 02:20:36 +04:00
}
2009-11-25 03:00:19 +03:00
2016-02-09 08:54:58 +03:00
if ( S_ISLNK ( VFS_I ( ip ) - > i_mode ) )
2018-07-12 08:26:29 +03:00
return xfs_bmap_local_to_extents ( tp , ip , 1 , flags ,
XFS_DATA_FORK ,
2013-02-25 05:31:26 +04:00
xfs_symlink_local_to_remote ) ;
2009-11-25 03:00:19 +03:00
2013-07-10 01:04:00 +04:00
/* should only be called for types that support local format data */
ASSERT ( 0 ) ;
2014-06-25 08:58:08 +04:00
return - EFSCORRUPTED ;
2013-02-25 05:31:26 +04:00
}
2009-12-15 02:14:59 +03:00
2018-10-18 09:21:16 +03:00
/* Set an inode attr fork off based on the format */
int
xfs_bmap_set_attrforkoff (
struct xfs_inode * ip ,
int size ,
int * version )
{
switch ( ip - > i_d . di_format ) {
case XFS_DINODE_FMT_DEV :
ip - > i_d . di_forkoff = roundup ( sizeof ( xfs_dev_t ) , 8 ) > > 3 ;
break ;
case XFS_DINODE_FMT_LOCAL :
case XFS_DINODE_FMT_EXTENTS :
case XFS_DINODE_FMT_BTREE :
ip - > i_d . di_forkoff = xfs_attr_shortform_bytesfit ( ip , size ) ;
if ( ! ip - > i_d . di_forkoff )
ip - > i_d . di_forkoff = xfs_default_attroffset ( ip ) > > 3 ;
else if ( ( ip - > i_mount - > m_flags & XFS_MOUNT_ATTR2 ) & & version )
* version = 2 ;
break ;
default :
ASSERT ( 0 ) ;
return - EINVAL ;
}
return 0 ;
}
2013-02-25 05:31:26 +04:00
/*
* Convert inode from non - attributed to attributed .
* Must not be in a transaction , ip must not be locked .
*/
int /* error code */
xfs_bmap_add_attrfork (
xfs_inode_t * ip , /* incore inode pointer */
int size , /* space new attribute needs */
int rsvd ) /* xact may use reserved blks */
{
xfs_mount_t * mp ; /* mount structure */
xfs_trans_t * tp ; /* transaction pointer */
int blks ; /* space reservation */
int version = 1 ; /* superblock attr version */
int logflags ; /* logging flags */
int error ; /* error return value */
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
ASSERT ( XFS_IFORK_Q ( ip ) = = 0 ) ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
mp = ip - > i_mount ;
ASSERT ( ! XFS_NOT_DQATTACHED ( mp , ip ) ) ;
2016-04-06 02:19:55 +03:00
2013-02-25 05:31:26 +04:00
blks = XFS_ADDAFORK_SPACE_RES ( mp ) ;
2016-04-06 02:19:55 +03:00
error = xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_addafork , blks , 0 ,
rsvd ? XFS_TRANS_RESERVE : 0 , & tp ) ;
if ( error )
2013-11-08 01:43:28 +04:00
return error ;
2016-04-06 02:19:55 +03:00
2013-02-25 05:31:26 +04:00
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
error = xfs_trans_reserve_quota_nblks ( tp , ip , blks , 0 , rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS ) ;
2013-11-08 01:43:28 +04:00
if ( error )
goto trans_cancel ;
2013-02-25 05:31:26 +04:00
if ( XFS_IFORK_Q ( ip ) )
2013-11-08 01:43:28 +04:00
goto trans_cancel ;
2016-12-05 04:38:11 +03:00
if ( ip - > i_d . di_anextents ! = 0 ) {
error = - EFSCORRUPTED ;
goto trans_cancel ;
}
2013-02-25 05:31:26 +04:00
if ( ip - > i_d . di_aformat ! = XFS_DINODE_FMT_EXTENTS ) {
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* For inodes coming from pre - 6.2 filesystems .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
ASSERT ( ip - > i_d . di_aformat = = 0 ) ;
ip - > i_d . di_aformat = XFS_DINODE_FMT_EXTENTS ;
}
2011-05-23 12:52:53 +04:00
2013-11-08 01:43:28 +04:00
xfs_trans_ijoin ( tp , ip , 0 ) ;
2013-02-25 05:31:26 +04:00
xfs_trans_log_inode ( tp , ip , XFS_ILOG_CORE ) ;
2018-10-18 09:21:16 +03:00
error = xfs_bmap_set_attrforkoff ( ip , size , & version ) ;
if ( error )
2013-11-08 01:43:28 +04:00
goto trans_cancel ;
2013-02-25 05:31:26 +04:00
ASSERT ( ip - > i_afp = = NULL ) ;
ip - > i_afp = kmem_zone_zalloc ( xfs_ifork_zone , KM_SLEEP ) ;
ip - > i_afp - > if_flags = XFS_IFEXTENTS ;
logflags = 0 ;
switch ( ip - > i_d . di_format ) {
case XFS_DINODE_FMT_LOCAL :
2018-07-12 08:26:21 +03:00
error = xfs_bmap_add_attrfork_local ( tp , ip , & logflags ) ;
2013-02-25 05:31:26 +04:00
break ;
case XFS_DINODE_FMT_EXTENTS :
2018-07-12 08:26:21 +03:00
error = xfs_bmap_add_attrfork_extents ( tp , ip , & logflags ) ;
2013-02-25 05:31:26 +04:00
break ;
case XFS_DINODE_FMT_BTREE :
2018-07-12 08:26:21 +03:00
error = xfs_bmap_add_attrfork_btree ( tp , ip , & logflags ) ;
2013-02-25 05:31:26 +04:00
break ;
default :
error = 0 ;
2005-04-17 02:20:36 +04:00
break ;
}
2013-02-25 05:31:26 +04:00
if ( logflags )
xfs_trans_log_inode ( tp , ip , logflags ) ;
if ( error )
2018-07-24 23:43:13 +03:00
goto trans_cancel ;
2013-02-25 05:31:26 +04:00
if ( ! xfs_sb_version_hasattr ( & mp - > m_sb ) | |
( ! xfs_sb_version_hasattr2 ( & mp - > m_sb ) & & version = = 2 ) ) {
2015-01-22 01:10:31 +03:00
bool log_sb = false ;
2013-02-25 05:31:26 +04:00
spin_lock ( & mp - > m_sb_lock ) ;
if ( ! xfs_sb_version_hasattr ( & mp - > m_sb ) ) {
xfs_sb_version_addattr ( & mp - > m_sb ) ;
2015-01-22 01:10:31 +03:00
log_sb = true ;
2013-02-25 05:31:26 +04:00
}
if ( ! xfs_sb_version_hasattr2 ( & mp - > m_sb ) & & version = = 2 ) {
xfs_sb_version_addattr2 ( & mp - > m_sb ) ;
2015-01-22 01:10:31 +03:00
log_sb = true ;
2013-02-25 05:31:26 +04:00
}
xfs: remove bitfield based superblock updates
When we log changes to the superblock, we first have to write them
to the on-disk buffer, and then log that. Right now we have a
complex bitfield based arrangement to only write the modified field
to the buffer before we log it.
This used to be necessary as a performance optimisation because we
logged the superblock buffer in every extent or inode allocation or
freeing, and so performance was extremely important. We haven't done
this for years, however, ever since the lazy superblock counters
pulled the superblock logging out of the transaction commit
fast path.
Hence we have a bunch of complexity that is not necessary that makes
writing the in-core superblock to disk much more complex than it
needs to be. We only need to log the superblock now during
management operations (e.g. during mount, unmount or quota control
operations) so it is not a performance critical path anymore.
As such, remove the complex field based logging mechanism and
replace it with a simple conversion function similar to what we use
for all other on-disk structures.
This means we always log the entirity of the superblock, but again
because we rarely modify the superblock this is not an issue for log
bandwidth or CPU time. Indeed, if we do log the superblock
frequently, delayed logging will minimise the impact of this
overhead.
[Fixed gquota/pquota inode sharing regression noticed by bfoster.]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-01-22 01:10:26 +03:00
spin_unlock ( & mp - > m_sb_lock ) ;
2015-01-22 01:10:31 +03:00
if ( log_sb )
xfs_log_sb ( tp ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
2015-06-04 06:48:08 +03:00
error = xfs_trans_commit ( tp ) ;
2013-11-08 01:43:28 +04:00
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
return error ;
trans_cancel :
2015-06-04 06:47:56 +03:00
xfs_trans_cancel ( tp ) ;
2013-02-25 05:31:26 +04:00
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
return error ;
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Internal and external extent tree search functions .
2005-04-17 02:20:36 +04:00
*/
2011-09-19 00:40:54 +04:00
2013-02-25 05:31:26 +04:00
/*
2017-10-24 02:32:39 +03:00
* Read in extents from a btree - format inode .
2013-02-25 05:31:26 +04:00
*/
2017-10-24 02:32:39 +03:00
int
xfs_iread_extents (
struct xfs_trans * tp ,
struct xfs_inode * ip ,
int whichfork )
2013-02-25 05:31:26 +04:00
{
2017-10-24 02:32:39 +03:00
struct xfs_mount * mp = ip - > i_mount ;
2017-10-19 21:06:29 +03:00
int state = xfs_bmap_fork_to_state ( whichfork ) ;
2017-10-24 02:32:39 +03:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
xfs_extnum_t nextents = XFS_IFORK_NEXTENTS ( ip , whichfork ) ;
struct xfs_btree_block * block = ifp - > if_broot ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2017-11-03 20:34:46 +03:00
struct xfs_bmbt_irec new ;
2017-10-24 02:32:39 +03:00
xfs_fsblock_t bno ;
struct xfs_buf * bp ;
xfs_extnum_t i , j ;
int level ;
__be64 * pp ;
int error ;
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL ) ) ;
if ( unlikely ( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ) {
XFS_ERROR_REPORT ( __func__ , XFS_ERRLEVEL_LOW , mp ) ;
return - EFSCORRUPTED ;
}
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
level = be16_to_cpu ( block - > bb_level ) ;
2019-03-18 01:21:49 +03:00
if ( unlikely ( level = = 0 ) ) {
XFS_ERROR_REPORT ( __func__ , XFS_ERRLEVEL_LOW , mp ) ;
return - EFSCORRUPTED ;
}
2013-02-25 05:31:26 +04:00
pp = XFS_BMAP_BROOT_PTR_ADDR ( mp , block , 1 , ifp - > if_broot_bytes ) ;
bno = be64_to_cpu ( * pp ) ;
2017-02-03 02:13:58 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Go down the tree until leaf level is reached , following the first
* pointer ( leftmost ) at each level .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
while ( level - - > 0 ) {
2019-06-12 19:00:00 +03:00
error = xfs_btree_read_bufl ( mp , tp , bno , & bp ,
2013-02-25 05:31:26 +04:00
XFS_BMAP_BTREE_REF , & xfs_bmbt_buf_ops ) ;
if ( error )
2017-10-24 02:32:39 +03:00
goto out ;
2013-02-25 05:31:26 +04:00
block = XFS_BUF_TO_BLOCK ( bp ) ;
if ( level = = 0 )
break ;
pp = XFS_BMBT_PTR_ADDR ( mp , block , 1 , mp - > m_bmap_dmxr [ 1 ] ) ;
bno = be64_to_cpu ( * pp ) ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp ,
2018-01-08 21:51:00 +03:00
xfs_verify_fsbno ( mp , bno ) , out_brelse ) ;
2013-02-25 05:31:26 +04:00
xfs_trans_brelse ( tp , bp ) ;
2005-04-17 02:20:36 +04:00
}
2017-10-24 02:32:39 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Here with bp and block set to the leftmost leaf node in the tree .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
i = 0 ;
2017-11-03 20:34:43 +03:00
xfs_iext_first ( ifp , & icur ) ;
2017-10-24 02:32:39 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Loop over all leaf nodes . Copy information to the extent records .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
for ( ; ; ) {
xfs_bmbt_rec_t * frp ;
xfs_fsblock_t nextbno ;
xfs_extnum_t num_recs ;
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
num_recs = xfs_btree_get_numrecs ( block ) ;
2017-10-24 02:32:39 +03:00
if ( unlikely ( i + num_recs > nextents ) ) {
2013-02-25 05:31:26 +04:00
xfs_warn ( ip - > i_mount ,
" corrupt dinode %Lu, (btree extents). " ,
( unsigned long long ) ip - > i_ino ) ;
2018-03-23 20:06:52 +03:00
xfs_inode_verifier_error ( ip , - EFSCORRUPTED ,
__func__ , block , sizeof ( * block ) ,
__this_address ) ;
2017-10-24 02:32:39 +03:00
error = - EFSCORRUPTED ;
goto out_brelse ;
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Read - ahead the next leaf block , if any .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
nextbno = be64_to_cpu ( block - > bb_u . l . bb_rightsib ) ;
if ( nextbno ! = NULLFSBLOCK )
xfs_btree_reada_bufl ( mp , nextbno , 1 ,
& xfs_bmbt_buf_ops ) ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Copy records into the extent records .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
frp = XFS_BMBT_REC_ADDR ( mp , block , 1 ) ;
2017-11-03 20:34:46 +03:00
for ( j = 0 ; j < num_recs ; j + + , frp + + , i + + ) {
2018-03-23 20:06:52 +03:00
xfs_failaddr_t fa ;
2017-11-03 20:34:47 +03:00
xfs_bmbt_disk_get_all ( frp , & new ) ;
2018-03-23 20:06:52 +03:00
fa = xfs_bmap_validate_extent ( ip , whichfork , & new ) ;
if ( fa ) {
2017-10-24 02:32:39 +03:00
error = - EFSCORRUPTED ;
2018-03-23 20:06:52 +03:00
xfs_inode_verifier_error ( ip , error ,
" xfs_iread_extents(2) " ,
frp , sizeof ( * frp ) , fa ) ;
2017-10-24 02:32:39 +03:00
goto out_brelse ;
2013-02-25 05:31:26 +04:00
}
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , & icur , & new , state ) ;
2017-11-03 20:34:43 +03:00
trace_xfs_read_extent ( ip , & icur , state , _THIS_IP_ ) ;
xfs_iext_next ( ifp , & icur ) ;
2013-02-25 05:31:26 +04:00
}
xfs_trans_brelse ( tp , bp ) ;
bno = nextbno ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* If we ' ve reached the end , stop .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( bno = = NULLFSBLOCK )
break ;
2019-06-12 19:00:00 +03:00
error = xfs_btree_read_bufl ( mp , tp , bno , & bp ,
2013-02-25 05:31:26 +04:00
XFS_BMAP_BTREE_REF , & xfs_bmbt_buf_ops ) ;
if ( error )
2017-10-24 02:32:39 +03:00
goto out ;
2013-02-25 05:31:26 +04:00
block = XFS_BUF_TO_BLOCK ( bp ) ;
2005-04-17 02:20:36 +04:00
}
2017-10-24 02:32:39 +03:00
if ( i ! = XFS_IFORK_NEXTENTS ( ip , whichfork ) ) {
error = - EFSCORRUPTED ;
goto out ;
}
2016-11-08 04:59:42 +03:00
ASSERT ( i = = xfs_iext_count ( ifp ) ) ;
2017-10-24 02:32:39 +03:00
ifp - > if_flags | = XFS_IFEXTENTS ;
2013-02-25 05:31:26 +04:00
return 0 ;
2017-10-24 02:32:39 +03:00
out_brelse :
2013-02-25 05:31:26 +04:00
xfs_trans_brelse ( tp , bp ) ;
2017-10-24 02:32:39 +03:00
out :
xfs_iext_destroy ( ifp ) ;
return error ;
2013-02-25 05:31:26 +04:00
}
2011-09-19 00:40:54 +04:00
2013-02-25 05:31:26 +04:00
/*
2017-10-19 21:08:52 +03:00
* Returns the relative block number of the first unused block ( s ) in the given
* fork with at least " len " logically contiguous blocks free . This is the
* lowest - address hole if the fork has holes , else the first block past the end
* of fork . Return 0 if the fork is currently local ( in - inode ) .
2013-02-25 05:31:26 +04:00
*/
int /* error */
xfs_bmap_first_unused (
2017-10-19 21:08:52 +03:00
struct xfs_trans * tp , /* transaction pointer */
struct xfs_inode * ip , /* incore inode */
xfs_extlen_t len , /* size of hole to find */
xfs_fileoff_t * first_unused , /* unused block */
int whichfork ) /* data or attr fork */
2013-02-25 05:31:26 +04:00
{
2017-10-19 21:08:52 +03:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
struct xfs_bmbt_irec got ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2017-10-19 21:08:52 +03:00
xfs_fileoff_t lastaddr = 0 ;
xfs_fileoff_t lowest , max ;
int error ;
2013-02-25 05:31:26 +04:00
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_BTREE | |
XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_EXTENTS | |
XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_LOCAL ) ;
2017-10-19 21:08:52 +03:00
2013-02-25 05:31:26 +04:00
if ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_LOCAL ) {
* first_unused = 0 ;
return 0 ;
2006-01-11 07:28:28 +03:00
}
2017-08-30 01:44:12 +03:00
2017-10-19 21:08:52 +03:00
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
error = xfs_iread_extents ( tp , ip , whichfork ) ;
if ( error )
return error ;
}
2017-08-30 01:44:12 +03:00
2017-10-19 21:08:52 +03:00
lowest = max = * first_unused ;
2017-11-03 20:34:43 +03:00
for_each_xfs_iext ( ifp , & icur , & got ) {
2013-02-25 05:31:26 +04:00
/*
* See if the hole before this extent will work .
*/
2017-08-30 01:44:12 +03:00
if ( got . br_startoff > = lowest + len & &
2017-10-19 21:08:52 +03:00
got . br_startoff - max > = len )
break ;
2017-08-30 01:44:12 +03:00
lastaddr = got . br_startoff + got . br_blockcount ;
2013-02-25 05:31:26 +04:00
max = XFS_FILEOFF_MAX ( lastaddr , lowest ) ;
2006-01-11 07:28:28 +03:00
}
2017-10-19 21:08:52 +03:00
2013-02-25 05:31:26 +04:00
* first_unused = max ;
return 0 ;
}
/*
2013-08-12 07:14:54 +04:00
* Returns the file - relative block number of the last block - 1 before
2013-02-25 05:31:26 +04:00
* last_block ( input value ) in the file .
* This is not based on i_size , it is based on the extent records .
* Returns 0 for local files , as they do not have extent records .
*/
int /* error */
xfs_bmap_last_before (
2016-11-24 03:39:38 +03:00
struct xfs_trans * tp , /* transaction pointer */
struct xfs_inode * ip , /* incore inode */
xfs_fileoff_t * last_block , /* last block */
int whichfork ) /* data or attr fork */
2013-02-25 05:31:26 +04:00
{
2016-11-24 03:39:38 +03:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
struct xfs_bmbt_irec got ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2016-11-24 03:39:38 +03:00
int error ;
2013-02-25 05:31:26 +04:00
2016-11-24 03:39:38 +03:00
switch ( XFS_IFORK_FORMAT ( ip , whichfork ) ) {
case XFS_DINODE_FMT_LOCAL :
2013-02-25 05:31:26 +04:00
* last_block = 0 ;
return 0 ;
2016-11-24 03:39:38 +03:00
case XFS_DINODE_FMT_BTREE :
case XFS_DINODE_FMT_EXTENTS :
break ;
default :
return - EIO ;
2013-02-25 05:31:26 +04:00
}
2016-11-24 03:39:38 +03:00
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
error = xfs_iread_extents ( tp , ip , whichfork ) ;
if ( error )
return error ;
2006-01-11 07:28:28 +03:00
}
2016-11-24 03:39:38 +03:00
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_lookup_extent_before ( ip , ifp , last_block , & icur , & got ) )
2017-10-24 02:32:39 +03:00
* last_block = 0 ;
2013-02-25 05:31:26 +04:00
return 0 ;
}
2013-08-12 14:49:42 +04:00
int
2013-02-25 05:31:26 +04:00
xfs_bmap_last_extent (
struct xfs_trans * tp ,
struct xfs_inode * ip ,
int whichfork ,
struct xfs_bmbt_irec * rec ,
int * is_empty )
{
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2013-02-25 05:31:26 +04:00
int error ;
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
error = xfs_iread_extents ( tp , ip , whichfork ) ;
if ( error )
return error ;
2006-01-11 07:28:28 +03:00
}
2017-11-03 20:34:43 +03:00
xfs_iext_last ( ifp , & icur ) ;
if ( ! xfs_iext_get_extent ( ifp , & icur , rec ) )
2013-02-25 05:31:26 +04:00
* is_empty = 1 ;
2017-11-03 20:34:43 +03:00
else
* is_empty = 0 ;
2006-01-11 07:28:28 +03:00
return 0 ;
}
2013-02-25 05:31:26 +04:00
/*
* Check the last inode extent to determine whether this allocation will result
* in blocks being allocated at the end of the file . When we allocate new data
* blocks at the end of the file which do not start at the previous data block ,
* we will try to align the new blocks at stripe unit boundaries .
*
2013-11-22 03:41:16 +04:00
* Returns 1 in bma - > aeof if the file ( fork ) is empty as any new write will be
2013-02-25 05:31:26 +04:00
* at , or past the EOF .
*/
STATIC int
xfs_bmap_isaeof (
struct xfs_bmalloca * bma ,
int whichfork )
2005-04-17 02:20:36 +04:00
{
2013-02-25 05:31:26 +04:00
struct xfs_bmbt_irec rec ;
int is_empty ;
int error ;
2005-04-17 02:20:36 +04:00
2017-10-09 21:38:54 +03:00
bma - > aeof = false ;
2013-02-25 05:31:26 +04:00
error = xfs_bmap_last_extent ( NULL , bma - > ip , whichfork , & rec ,
& is_empty ) ;
2013-11-22 03:41:16 +04:00
if ( error )
2013-02-25 05:31:26 +04:00
return error ;
2005-04-17 02:20:36 +04:00
2013-11-22 03:41:16 +04:00
if ( is_empty ) {
2017-10-09 21:38:54 +03:00
bma - > aeof = true ;
2013-11-22 03:41:16 +04:00
return 0 ;
}
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check if we are allocation or past the last extent , or at least into
* the last delayed allocated extent .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
bma - > aeof = bma - > offset > = rec . br_startoff + rec . br_blockcount | |
( bma - > offset > = rec . br_startoff & &
isnullstartblock ( rec . br_startblock ) ) ;
return 0 ;
}
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
/*
* Returns the file - relative block number of the first block past eof in
* the file . This is not based on i_size , it is based on the extent records .
* Returns 0 for local files , as they do not have extent records .
*/
int
xfs_bmap_last_offset (
struct xfs_inode * ip ,
xfs_fileoff_t * last_block ,
int whichfork )
{
struct xfs_bmbt_irec rec ;
int is_empty ;
int error ;
2011-01-25 12:06:19 +03:00
2013-02-25 05:31:26 +04:00
* last_block = 0 ;
2007-06-28 10:46:56 +04:00
2013-02-25 05:31:26 +04:00
if ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_LOCAL )
return 0 ;
2006-03-14 05:34:16 +03:00
2013-02-25 05:31:26 +04:00
if ( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS )
2014-06-25 08:58:08 +04:00
return - EIO ;
2006-03-14 05:34:16 +03:00
2013-02-25 05:31:26 +04:00
error = xfs_bmap_last_extent ( NULL , ip , whichfork , & rec , & is_empty ) ;
if ( error | | is_empty )
2006-03-14 05:34:16 +03:00
return error ;
2013-02-25 05:31:26 +04:00
* last_block = rec . br_startoff + rec . br_blockcount ;
2006-03-14 05:34:16 +03:00
return 0 ;
}
2013-02-25 05:31:26 +04:00
/*
* Returns whether the selected fork of the inode has exactly one
* block or not . For the data fork we check this matches di_size ,
* implying the file ' s range is 0. . bsize - 1.
*/
int /* 1=>1 block, 0=>otherwise */
xfs_bmap_one_block (
xfs_inode_t * ip , /* incore inode */
int whichfork ) /* data or attr fork */
2010-02-16 02:34:42 +03:00
{
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
2013-02-25 05:31:26 +04:00
int rval ; /* return value */
xfs_bmbt_irec_t s ; /* internal version of extent */
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2010-02-16 02:34:42 +03:00
2013-02-25 05:31:26 +04:00
# ifndef DEBUG
if ( whichfork = = XFS_DATA_FORK )
return XFS_ISIZE ( ip ) = = ip - > i_mount - > m_sb . sb_blocksize ;
# endif /* !DEBUG */
if ( XFS_IFORK_NEXTENTS ( ip , whichfork ) ! = 1 )
return 0 ;
if ( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS )
return 0 ;
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
ASSERT ( ifp - > if_flags & XFS_IFEXTENTS ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_first ( ifp , & icur ) ;
xfs_iext_get_extent ( ifp , & icur , & s ) ;
2013-02-25 05:31:26 +04:00
rval = s . br_startoff = = 0 & & s . br_blockcount = = 1 ;
if ( rval & & whichfork = = XFS_DATA_FORK )
ASSERT ( XFS_ISIZE ( ip ) = = ip - > i_mount - > m_sb . sb_blocksize ) ;
return rval ;
}
2010-02-16 02:34:42 +03:00
2013-02-25 05:31:26 +04:00
/*
* Extent tree manipulation functions used during allocation .
*/
2010-02-16 02:34:42 +03:00
2013-02-25 05:31:26 +04:00
/*
* Convert a delayed allocation to a real allocation .
*/
STATIC int /* error */
xfs_bmap_add_extent_delay_real (
2016-10-03 19:11:34 +03:00
struct xfs_bmalloca * bma ,
int whichfork )
2013-02-25 05:31:26 +04:00
{
struct xfs_bmbt_irec * new = & bma - > got ;
int error ; /* error return value */
int i ; /* temp state */
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
2013-02-25 05:31:26 +04:00
xfs_fileoff_t new_endoff ; /* end offset of new entry */
xfs_bmbt_irec_t r [ 3 ] ; /* neighbor extent entries */
/* left is 0, right is 1, prev is 2 */
int rval = 0 ; /* return value (logging flags) */
2017-10-19 21:02:29 +03:00
int state = xfs_bmap_fork_to_state ( whichfork ) ;
2013-02-25 05:31:26 +04:00
xfs_filblks_t da_new ; /* new count del alloc blocks used */
xfs_filblks_t da_old ; /* old count del alloc blocks used */
xfs_filblks_t temp = 0 ; /* value for da_new calculations */
int tmp_rval ; /* partial logging flags */
2015-02-23 14:39:08 +03:00
struct xfs_mount * mp ;
2016-10-03 19:11:34 +03:00
xfs_extnum_t * nextents ;
2017-10-18 00:16:24 +03:00
struct xfs_bmbt_irec old ;
2010-02-16 02:34:42 +03:00
2016-01-04 08:10:42 +03:00
mp = bma - > ip - > i_mount ;
2016-01-04 08:12:42 +03:00
ifp = XFS_IFORK_PTR ( bma - > ip , whichfork ) ;
2016-10-03 19:11:34 +03:00
ASSERT ( whichfork ! = XFS_ATTR_FORK ) ;
nextents = ( whichfork = = XFS_COW_FORK ? & bma - > ip - > i_cnextents :
& bma - > ip - > i_d . di_nextents ) ;
2010-02-16 02:34:42 +03:00
2013-02-25 05:31:26 +04:00
ASSERT ( ! isnullstartblock ( new - > br_startblock ) ) ;
ASSERT ( ! bma - > cur | |
( bma - > cur - > bc_private . b . flags & XFS_BTCUR_BPRV_WASDEL ) ) ;
2010-02-16 02:34:42 +03:00
2015-10-12 10:21:22 +03:00
XFS_STATS_INC ( mp , xs_add_exlist ) ;
2010-02-16 02:34:42 +03:00
2013-02-25 05:31:26 +04:00
# define LEFT r[0]
# define RIGHT r[1]
# define PREV r[2]
2010-02-16 02:34:42 +03:00
/*
2013-02-25 05:31:26 +04:00
* Set up a bunch of variables to make the tests simpler .
2010-02-16 02:34:42 +03:00
*/
2017-11-03 20:34:43 +03:00
xfs_iext_get_extent ( ifp , & bma - > icur , & PREV ) ;
2013-02-25 05:31:26 +04:00
new_endoff = new - > br_startoff + new - > br_blockcount ;
2017-10-18 00:16:24 +03:00
ASSERT ( isnullstartblock ( PREV . br_startblock ) ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( PREV . br_startoff < = new - > br_startoff ) ;
ASSERT ( PREV . br_startoff + PREV . br_blockcount > = new_endoff ) ;
da_old = startblockval ( PREV . br_startblock ) ;
da_new = 0 ;
2010-02-16 02:34:42 +03:00
/*
2013-02-25 05:31:26 +04:00
* Set flags determining what part of the previous delayed allocation
* extent is being replaced by a real allocation .
2010-02-16 02:34:42 +03:00
*/
2013-02-25 05:31:26 +04:00
if ( PREV . br_startoff = = new - > br_startoff )
state | = BMAP_LEFT_FILLING ;
if ( PREV . br_startoff + PREV . br_blockcount = = new_endoff )
state | = BMAP_RIGHT_FILLING ;
2010-02-16 02:34:42 +03:00
/*
2013-02-25 05:31:26 +04:00
* Check and set flags if this segment has a left neighbor .
* Don ' t set contiguous if the combined extent would be too large .
2010-02-16 02:34:42 +03:00
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_prev_extent ( ifp , & bma - > icur , & LEFT ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_LEFT_VALID ;
if ( isnullstartblock ( LEFT . br_startblock ) )
state | = BMAP_LEFT_DELAY ;
2006-03-14 05:34:16 +03:00
}
2013-02-25 05:31:26 +04:00
if ( ( state & BMAP_LEFT_VALID ) & & ! ( state & BMAP_LEFT_DELAY ) & &
LEFT . br_startoff + LEFT . br_blockcount = = new - > br_startoff & &
LEFT . br_startblock + LEFT . br_blockcount = = new - > br_startblock & &
LEFT . br_state = = new - > br_state & &
LEFT . br_blockcount + new - > br_blockcount < = MAXEXTLEN )
state | = BMAP_LEFT_CONTIG ;
2006-03-14 05:34:16 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check and set flags if this segment has a right neighbor .
* Don ' t set contiguous if the combined extent would be too large .
* Also check for all - three - contiguous being too large .
2005-04-17 02:20:36 +04:00
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_next_extent ( ifp , & bma - > icur , & RIGHT ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_RIGHT_VALID ;
if ( isnullstartblock ( RIGHT . br_startblock ) )
state | = BMAP_RIGHT_DELAY ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
if ( ( state & BMAP_RIGHT_VALID ) & & ! ( state & BMAP_RIGHT_DELAY ) & &
new_endoff = = RIGHT . br_startoff & &
new - > br_startblock + new - > br_blockcount = = RIGHT . br_startblock & &
new - > br_state = = RIGHT . br_state & &
new - > br_blockcount + RIGHT . br_blockcount < = MAXEXTLEN & &
( ( state & ( BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING ) ) ! =
( BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING ) | |
LEFT . br_blockcount + new - > br_blockcount + RIGHT . br_blockcount
< = MAXEXTLEN ) )
state | = BMAP_RIGHT_CONTIG ;
error = 0 ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Switch out based on the FILLING and CONTIG state bits .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
switch ( state & ( BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG ) ) {
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Filling in all of a previously delayed allocation extent .
* The left and right neighbors are both contiguous with new .
2005-04-17 02:20:36 +04:00
*/
2017-10-18 00:16:24 +03:00
LEFT . br_blockcount + = PREV . br_blockcount + RIGHT . br_blockcount ;
2013-02-25 05:31:26 +04:00
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( bma - > ip , & bma - > icur , state ) ;
xfs_iext_remove ( bma - > ip , & bma - > icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , & bma - > icur ) ;
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & LEFT ) ;
2016-10-03 19:11:34 +03:00
( * nextents ) - - ;
2017-11-03 20:34:39 +03:00
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , & RIGHT , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
error = xfs_btree_delete ( bma - > cur , & i ) ;
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
error = xfs_btree_decrement ( bma - > cur , 0 , & i ) ;
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( bma - > cur , & LEFT ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
}
break ;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG :
2006-03-14 05:34:16 +03:00
/*
2013-02-25 05:31:26 +04:00
* Filling in all of a previously delayed allocation extent .
* The left neighbor is contiguous , the right is not .
2006-03-14 05:34:16 +03:00
*/
2017-10-18 00:16:24 +03:00
old = LEFT ;
LEFT . br_blockcount + = PREV . br_blockcount ;
2017-11-03 20:34:39 +03:00
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( bma - > ip , & bma - > icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , & bma - > icur ) ;
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & LEFT ) ;
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_DEXT ;
else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , & old , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( bma - > cur , & LEFT ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
}
break ;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG :
2011-09-19 00:40:57 +04:00
/*
2013-02-25 05:31:26 +04:00
* Filling in all of a previously delayed allocation extent .
xfs: delalloc -> unwritten COW fork allocation can go wrong
Long saga. There have been days spent following this through dead end
after dead end in multi-GB event traces. This morning, after writing
a trace-cmd wrapper that enabled me to be more selective about XFS
trace points, I discovered that I could get just enough essential
tracepoints enabled that there was a 50:50 chance the fsx config
would fail at ~115k ops. If it didn't fail at op 115547, I stopped
fsx at op 115548 anyway.
That gave me two traces - one where the problem manifested, and one
where it didn't. After refining the traces to have the necessary
information, I found that in the failing case there was a real
extent in the COW fork compared to an unwritten extent in the
working case.
Walking back through the two traces to the point where the CWO fork
extents actually diverged, I found that the bad case had an extra
unwritten extent in it. This is likely because the bug it led me to
had triggered multiple times in those 115k ops, leaving stray
COW extents around. What I saw was a COW delalloc conversion to an
unwritten extent (as they should always be through
xfs_iomap_write_allocate()) resulted in a /written extent/:
xfs_writepage: dev 259:0 ino 0x83 pgoff 0x17000 size 0x79a00 offset 0 length 0
xfs_iext_remove: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/2 offset 32 block 152 count 20 flag 1 caller xfs_bmap_add_extent_delay_real
xfs_bmap_pre_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 4503599627239429 count 31 flag 0 caller xfs_bmap_add_extent_delay_real
xfs_bmap_post_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 121 count 51 flag 0 caller xfs_bmap_add_ex
Basically, Cow fork before:
0 1 32 52
+H+DDDDDDDDDDDD+UUUUUUUUUUU+
PREV RIGHT
COW delalloc conversion allocates:
1 32
+uuuuuuuuuuuu+
NEW
And the result according to the xfs_bmap_post_update trace was:
0 1 32 52
+H+wwwwwwwwwwwwwwwwwwwwwwww+
PREV
Which is clearly wrong - it should be a merged unwritten extent,
not an unwritten extent.
That lead me to look at the LEFT_FILLING|RIGHT_FILLING|RIGHT_CONTIG
case in xfs_bmap_add_extent_delay_real(), and sure enough, there's
the bug.
It takes the old delalloc extent (PREV) and adds the length of the
RIGHT extent to it, takes the start block from NEW, removes the
RIGHT extent and then updates PREV with the new extent.
What it fails to do is update PREV.br_state. For delalloc, this is
always XFS_EXT_NORM, while in this case we are converting the
delayed allocation to unwritten, so it needs to be updated to
XFS_EXT_UNWRITTEN. This LF|RF|RC case does not do this, and so
the resultant extent is always written.
And that's the bug I've been chasing for a week - a bmap btree bug,
not a reflink/dedupe/copy_file_range bug, but a BMBT bug introduced
with the recent in core extent tree scalability enhancements.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-11-20 09:50:08 +03:00
* The right neighbor is contiguous , the left is not . Take care
* with delay - > unwritten extent allocation here because the
* delalloc record we are overwriting is always written .
2011-09-19 00:40:57 +04:00
*/
2017-10-18 00:16:24 +03:00
PREV . br_startblock = new - > br_startblock ;
PREV . br_blockcount + = RIGHT . br_blockcount ;
xfs: delalloc -> unwritten COW fork allocation can go wrong
Long saga. There have been days spent following this through dead end
after dead end in multi-GB event traces. This morning, after writing
a trace-cmd wrapper that enabled me to be more selective about XFS
trace points, I discovered that I could get just enough essential
tracepoints enabled that there was a 50:50 chance the fsx config
would fail at ~115k ops. If it didn't fail at op 115547, I stopped
fsx at op 115548 anyway.
That gave me two traces - one where the problem manifested, and one
where it didn't. After refining the traces to have the necessary
information, I found that in the failing case there was a real
extent in the COW fork compared to an unwritten extent in the
working case.
Walking back through the two traces to the point where the CWO fork
extents actually diverged, I found that the bad case had an extra
unwritten extent in it. This is likely because the bug it led me to
had triggered multiple times in those 115k ops, leaving stray
COW extents around. What I saw was a COW delalloc conversion to an
unwritten extent (as they should always be through
xfs_iomap_write_allocate()) resulted in a /written extent/:
xfs_writepage: dev 259:0 ino 0x83 pgoff 0x17000 size 0x79a00 offset 0 length 0
xfs_iext_remove: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/2 offset 32 block 152 count 20 flag 1 caller xfs_bmap_add_extent_delay_real
xfs_bmap_pre_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 4503599627239429 count 31 flag 0 caller xfs_bmap_add_extent_delay_real
xfs_bmap_post_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 121 count 51 flag 0 caller xfs_bmap_add_ex
Basically, Cow fork before:
0 1 32 52
+H+DDDDDDDDDDDD+UUUUUUUUUUU+
PREV RIGHT
COW delalloc conversion allocates:
1 32
+uuuuuuuuuuuu+
NEW
And the result according to the xfs_bmap_post_update trace was:
0 1 32 52
+H+wwwwwwwwwwwwwwwwwwwwwwww+
PREV
Which is clearly wrong - it should be a merged unwritten extent,
not an unwritten extent.
That lead me to look at the LEFT_FILLING|RIGHT_FILLING|RIGHT_CONTIG
case in xfs_bmap_add_extent_delay_real(), and sure enough, there's
the bug.
It takes the old delalloc extent (PREV) and adds the length of the
RIGHT extent to it, takes the start block from NEW, removes the
RIGHT extent and then updates PREV with the new extent.
What it fails to do is update PREV.br_state. For delalloc, this is
always XFS_EXT_NORM, while in this case we are converting the
delayed allocation to unwritten, so it needs to be updated to
XFS_EXT_UNWRITTEN. This LF|RF|RC case does not do this, and so
the resultant extent is always written.
And that's the bug I've been chasing for a week - a bmap btree bug,
not a reflink/dedupe/copy_file_range bug, but a BMBT bug introduced
with the recent in core extent tree scalability enhancements.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-11-20 09:50:08 +03:00
PREV . br_state = new - > br_state ;
2017-11-03 20:34:39 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_next ( ifp , & bma - > icur ) ;
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( bma - > ip , & bma - > icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , & bma - > icur ) ;
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & PREV ) ;
2011-09-19 00:40:57 +04:00
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_DEXT ;
else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , & RIGHT , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( bma - > cur , & PREV ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
}
break ;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING :
2006-03-14 05:34:16 +03:00
/*
2013-02-25 05:31:26 +04:00
* Filling in all of a previously delayed allocation extent .
* Neither the left nor right neighbors are contiguous with
* the new one .
2006-03-14 05:34:16 +03:00
*/
2017-10-18 00:16:24 +03:00
PREV . br_startblock = new - > br_startblock ;
PREV . br_state = new - > br_state ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & PREV ) ;
2006-03-14 05:34:16 +03:00
2016-10-03 19:11:34 +03:00
( * nextents ) + + ;
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , new , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , done ) ;
2013-02-25 05:31:26 +04:00
error = xfs_btree_insert ( bma - > cur , & i ) ;
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
}
break ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Filling in the first part of a previous delayed allocation .
* The left neighbor is contiguous .
2005-04-17 02:20:36 +04:00
*/
2017-10-18 00:16:24 +03:00
old = LEFT ;
temp = PREV . br_blockcount - new - > br_blockcount ;
da_new = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( bma - > ip , temp ) ,
startblockval ( PREV . br_startblock ) ) ;
LEFT . br_blockcount + = new - > br_blockcount ;
2005-04-17 02:20:36 +04:00
2017-11-03 20:34:38 +03:00
PREV . br_blockcount = temp ;
2017-10-18 00:16:24 +03:00
PREV . br_startoff + = new - > br_blockcount ;
PREV . br_startblock = nullstartblock ( da_new ) ;
2017-11-03 20:34:39 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & PREV ) ;
xfs_iext_prev ( ifp , & bma - > icur ) ;
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & LEFT ) ;
2017-10-18 00:16:24 +03:00
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_DEXT ;
else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , & old , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( bma - > cur , & LEFT ) ;
2011-07-08 16:34:47 +04:00
if ( error )
2005-04-17 02:20:36 +04:00
goto done ;
}
2013-02-25 05:31:26 +04:00
break ;
case BMAP_LEFT_FILLING :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Filling in the first part of a previous delayed allocation .
* The left neighbor is not contiguous .
2005-04-17 02:20:36 +04:00
*/
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , new ) ;
2016-10-03 19:11:34 +03:00
( * nextents ) + + ;
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
2005-04-17 02:20:36 +04:00
else {
2013-02-25 05:31:26 +04:00
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , new , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , done ) ;
2013-02-25 05:31:26 +04:00
error = xfs_btree_insert ( bma - > cur , & i ) ;
if ( error )
2005-04-17 02:20:36 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2005-04-17 02:20:36 +04:00
}
2011-05-11 19:04:05 +04:00
2016-01-04 08:12:42 +03:00
if ( xfs_bmap_needs_btree ( bma - > ip , whichfork ) ) {
2013-02-25 05:31:26 +04:00
error = xfs_bmap_extents_to_btree ( bma - > tp , bma - > ip ,
2018-07-12 08:26:29 +03:00
& bma - > cur , 1 , & tmp_rval , whichfork ) ;
2013-02-25 05:31:26 +04:00
rval | = tmp_rval ;
if ( error )
goto done ;
2005-04-17 02:20:36 +04:00
}
2017-10-18 00:16:24 +03:00
temp = PREV . br_blockcount - new - > br_blockcount ;
2013-02-25 05:31:26 +04:00
da_new = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( bma - > ip , temp ) ,
startblockval ( PREV . br_startblock ) -
( bma - > cur ? bma - > cur - > bc_private . b . allocated : 0 ) ) ;
2017-10-18 00:16:24 +03:00
PREV . br_startoff = new_endoff ;
PREV . br_blockcount = temp ;
PREV . br_startblock = nullstartblock ( da_new ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_next ( ifp , & bma - > icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( bma - > ip , & bma - > icur , & PREV , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , & bma - > icur ) ;
2005-04-17 02:20:36 +04:00
break ;
2013-02-25 05:31:26 +04:00
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Filling in the last part of a previous delayed allocation .
* The right neighbor is contiguous with the new allocation .
2005-04-17 02:20:36 +04:00
*/
2017-10-18 00:16:24 +03:00
old = RIGHT ;
RIGHT . br_startoff = new - > br_startoff ;
RIGHT . br_startblock = new - > br_startblock ;
RIGHT . br_blockcount + = new - > br_blockcount ;
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_DEXT ;
else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , & old , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( bma - > cur , & RIGHT ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
2017-10-18 00:16:24 +03:00
temp = PREV . br_blockcount - new - > br_blockcount ;
2013-02-25 05:31:26 +04:00
da_new = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( bma - > ip , temp ) ,
startblockval ( PREV . br_startblock ) ) ;
2017-10-18 00:16:24 +03:00
PREV . br_blockcount = temp ;
PREV . br_startblock = nullstartblock ( da_new ) ;
2013-02-25 05:31:26 +04:00
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & PREV ) ;
xfs_iext_next ( ifp , & bma - > icur ) ;
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & RIGHT ) ;
2005-04-17 02:20:36 +04:00
break ;
2013-02-25 05:31:26 +04:00
case BMAP_RIGHT_FILLING :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Filling in the last part of a previous delayed allocation .
* The right neighbor is not contiguous .
2005-04-17 02:20:36 +04:00
*/
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , new ) ;
2016-10-03 19:11:34 +03:00
( * nextents ) + + ;
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , new , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , done ) ;
2013-02-25 05:31:26 +04:00
error = xfs_btree_insert ( bma - > cur , & i ) ;
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
2016-01-04 08:12:42 +03:00
if ( xfs_bmap_needs_btree ( bma - > ip , whichfork ) ) {
2013-02-25 05:31:26 +04:00
error = xfs_bmap_extents_to_btree ( bma - > tp , bma - > ip ,
2018-07-12 08:26:29 +03:00
& bma - > cur , 1 , & tmp_rval , whichfork ) ;
2013-02-25 05:31:26 +04:00
rval | = tmp_rval ;
if ( error )
goto done ;
2005-04-17 02:20:36 +04:00
}
2017-10-18 00:16:24 +03:00
temp = PREV . br_blockcount - new - > br_blockcount ;
2013-02-25 05:31:26 +04:00
da_new = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( bma - > ip , temp ) ,
startblockval ( PREV . br_startblock ) -
( bma - > cur ? bma - > cur - > bc_private . b . allocated : 0 ) ) ;
2017-10-18 00:16:24 +03:00
PREV . br_startblock = nullstartblock ( da_new ) ;
PREV . br_blockcount = temp ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( bma - > ip , & bma - > icur , & PREV , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_next ( ifp , & bma - > icur ) ;
2005-04-17 02:20:36 +04:00
break ;
case 0 :
/*
2013-02-25 05:31:26 +04:00
* Filling in the middle part of a previous delayed allocation .
* Contiguity is impossible here .
* This case is avoided almost all the time .
*
* We start with a delayed allocation :
*
* + ddddddddddddddddddddddddddddddddddddddddddddddddddddddd +
* PREV @ idx
*
* and we are allocating :
* + rrrrrrrrrrrrrrrrr +
* new
*
* and we set it up for insertion as :
* + ddddddddddddddddddd + rrrrrrrrrrrrrrrrr + ddddddddddddddddd +
* new
* PREV @ idx LEFT RIGHT
* inserted at idx + 1
2005-04-17 02:20:36 +04:00
*/
2017-10-18 00:16:24 +03:00
old = PREV ;
/* LEFT is the new middle */
2013-02-25 05:31:26 +04:00
LEFT = * new ;
2017-10-18 00:16:24 +03:00
/* RIGHT is the new right */
2013-02-25 05:31:26 +04:00
RIGHT . br_state = PREV . br_state ;
RIGHT . br_startoff = new_endoff ;
2017-10-18 00:16:24 +03:00
RIGHT . br_blockcount =
PREV . br_startoff + PREV . br_blockcount - new_endoff ;
RIGHT . br_startblock =
nullstartblock ( xfs_bmap_worst_indlen ( bma - > ip ,
RIGHT . br_blockcount ) ) ;
/* truncate PREV */
PREV . br_blockcount = new - > br_startoff - PREV . br_startoff ;
PREV . br_startblock =
nullstartblock ( xfs_bmap_worst_indlen ( bma - > ip ,
PREV . br_blockcount ) ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( bma - > ip , state , & bma - > icur , & PREV ) ;
2017-10-18 00:16:24 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_next ( ifp , & bma - > icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( bma - > ip , & bma - > icur , & RIGHT , state ) ;
xfs_iext_insert ( bma - > ip , & bma - > icur , & LEFT , state ) ;
2016-10-03 19:11:34 +03:00
( * nextents ) + + ;
2017-10-18 00:16:24 +03:00
2013-02-25 05:31:26 +04:00
if ( bma - > cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( bma - > cur , new , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , done ) ;
2013-02-25 05:31:26 +04:00
error = xfs_btree_insert ( bma - > cur , & i ) ;
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2005-04-17 02:20:36 +04:00
}
2016-01-04 08:12:42 +03:00
if ( xfs_bmap_needs_btree ( bma - > ip , whichfork ) ) {
2013-02-25 05:31:26 +04:00
error = xfs_bmap_extents_to_btree ( bma - > tp , bma - > ip ,
2018-07-12 08:26:29 +03:00
& bma - > cur , 1 , & tmp_rval , whichfork ) ;
2013-02-25 05:31:26 +04:00
rval | = tmp_rval ;
if ( error )
goto done ;
}
2017-10-18 00:16:24 +03:00
da_new = startblockval ( PREV . br_startblock ) +
startblockval ( RIGHT . br_startblock ) ;
2013-02-25 05:31:26 +04:00
break ;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG :
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG :
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
case BMAP_LEFT_CONTIG :
case BMAP_RIGHT_CONTIG :
/*
* These cases are all impossible .
*/
ASSERT ( 0 ) ;
}
2018-05-09 20:02:32 +03:00
/* add reverse mapping unless caller opted out */
if ( ! ( bma - > flags & XFS_BMAPI_NORMAP ) ) {
2018-08-01 17:20:34 +03:00
error = xfs_rmap_map_extent ( bma - > tp , bma - > ip , whichfork , new ) ;
2018-05-09 20:02:32 +03:00
if ( error )
goto done ;
}
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
2013-02-25 05:31:26 +04:00
/* convert to a btree if necessary */
2016-01-04 08:12:42 +03:00
if ( xfs_bmap_needs_btree ( bma - > ip , whichfork ) ) {
2013-02-25 05:31:26 +04:00
int tmp_logflags ; /* partial log flag return val */
ASSERT ( bma - > cur = = NULL ) ;
error = xfs_bmap_extents_to_btree ( bma - > tp , bma - > ip ,
2018-07-12 08:26:29 +03:00
& bma - > cur , da_old > 0 , & tmp_logflags ,
whichfork ) ;
2013-02-25 05:31:26 +04:00
bma - > logflags | = tmp_logflags ;
if ( error )
goto done ;
}
2019-04-26 04:26:22 +03:00
if ( da_new ! = da_old )
xfs_mod_delalloc ( mp , ( int64_t ) da_new - da_old ) ;
2017-10-18 00:16:25 +03:00
if ( bma - > cur ) {
da_new + = bma - > cur - > bc_private . b . allocated ;
bma - > cur - > bc_private . b . allocated = 0 ;
2010-09-30 06:25:55 +04:00
}
2013-02-25 05:31:26 +04:00
2017-10-18 00:16:25 +03:00
/* adjust for changes in reserved delayed indirect blocks */
if ( da_new ! = da_old ) {
ASSERT ( state = = 0 | | da_new < da_old ) ;
error = xfs_mod_fdblocks ( mp , ( int64_t ) ( da_old - da_new ) ,
false ) ;
}
2013-02-25 05:31:26 +04:00
2016-01-04 08:12:42 +03:00
xfs_bmap_check_leaf_extents ( bma - > cur , bma - > ip , whichfork ) ;
2005-04-17 02:20:36 +04:00
done :
2016-10-03 19:11:34 +03:00
if ( whichfork ! = XFS_COW_FORK )
bma - > logflags | = rval ;
2005-04-17 02:20:36 +04:00
return error ;
2013-02-25 05:31:26 +04:00
# undef LEFT
# undef RIGHT
# undef PREV
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Convert an unwritten allocation to a real allocation or vice versa .
2005-04-17 02:20:36 +04:00
*/
2019-02-18 20:38:48 +03:00
int /* error */
2013-02-25 05:31:26 +04:00
xfs_bmap_add_extent_unwritten_real (
struct xfs_trans * tp ,
xfs_inode_t * ip , /* incore inode pointer */
2017-02-03 02:14:01 +03:00
int whichfork ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2013-02-25 05:31:26 +04:00
xfs_btree_cur_t * * curp , /* if *curp is null, not a btree */
xfs_bmbt_irec_t * new , /* new data to add to file extents */
int * logflagsp ) /* inode logging flags */
2005-04-17 02:20:36 +04:00
{
2013-02-25 05:31:26 +04:00
xfs_btree_cur_t * cur ; /* btree cursor */
int error ; /* error return value */
int i ; /* temp state */
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
2013-02-25 05:31:26 +04:00
xfs_fileoff_t new_endoff ; /* end offset of new entry */
xfs_bmbt_irec_t r [ 3 ] ; /* neighbor extent entries */
/* left is 0, right is 1, prev is 2 */
int rval = 0 ; /* return value (logging flags) */
2017-10-19 21:02:29 +03:00
int state = xfs_bmap_fork_to_state ( whichfork ) ;
2017-02-03 02:14:01 +03:00
struct xfs_mount * mp = ip - > i_mount ;
2017-10-18 00:16:25 +03:00
struct xfs_bmbt_irec old ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
* logflagsp = 0 ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
cur = * curp ;
2017-02-03 02:14:01 +03:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2011-12-19 00:00:07 +04:00
2013-02-25 05:31:26 +04:00
ASSERT ( ! isnullstartblock ( new - > br_startblock ) ) ;
2015-10-12 10:21:22 +03:00
XFS_STATS_INC ( mp , xs_add_exlist ) ;
2013-02-25 05:31:26 +04:00
# define LEFT r[0]
# define RIGHT r[1]
# define PREV r[2]
2008-10-30 09:14:34 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Set up a bunch of variables to make the tests simpler .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
error = 0 ;
2017-11-03 20:34:43 +03:00
xfs_iext_get_extent ( ifp , icur , & PREV ) ;
2017-10-18 00:16:25 +03:00
ASSERT ( new - > br_state ! = PREV . br_state ) ;
2013-02-25 05:31:26 +04:00
new_endoff = new - > br_startoff + new - > br_blockcount ;
ASSERT ( PREV . br_startoff < = new - > br_startoff ) ;
ASSERT ( PREV . br_startoff + PREV . br_blockcount > = new_endoff ) ;
2008-10-30 09:14:34 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Set flags determining what part of the previous oldext allocation
* extent is being replaced by a newext allocation .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( PREV . br_startoff = = new - > br_startoff )
state | = BMAP_LEFT_FILLING ;
if ( PREV . br_startoff + PREV . br_blockcount = = new_endoff )
state | = BMAP_RIGHT_FILLING ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check and set flags if this segment has a left neighbor .
* Don ' t set contiguous if the combined extent would be too large .
2005-04-17 02:20:36 +04:00
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_prev_extent ( ifp , icur , & LEFT ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_LEFT_VALID ;
if ( isnullstartblock ( LEFT . br_startblock ) )
state | = BMAP_LEFT_DELAY ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
if ( ( state & BMAP_LEFT_VALID ) & & ! ( state & BMAP_LEFT_DELAY ) & &
LEFT . br_startoff + LEFT . br_blockcount = = new - > br_startoff & &
LEFT . br_startblock + LEFT . br_blockcount = = new - > br_startblock & &
2017-10-18 00:16:25 +03:00
LEFT . br_state = = new - > br_state & &
2013-02-25 05:31:26 +04:00
LEFT . br_blockcount + new - > br_blockcount < = MAXEXTLEN )
state | = BMAP_LEFT_CONTIG ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check and set flags if this segment has a right neighbor .
* Don ' t set contiguous if the combined extent would be too large .
* Also check for all - three - contiguous being too large .
2005-04-17 02:20:36 +04:00
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_next_extent ( ifp , icur , & RIGHT ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_RIGHT_VALID ;
if ( isnullstartblock ( RIGHT . br_startblock ) )
state | = BMAP_RIGHT_DELAY ;
2005-04-17 02:20:36 +04:00
}
2008-10-30 09:14:34 +03:00
2013-02-25 05:31:26 +04:00
if ( ( state & BMAP_RIGHT_VALID ) & & ! ( state & BMAP_RIGHT_DELAY ) & &
new_endoff = = RIGHT . br_startoff & &
new - > br_startblock + new - > br_blockcount = = RIGHT . br_startblock & &
2017-10-18 00:16:25 +03:00
new - > br_state = = RIGHT . br_state & &
2013-02-25 05:31:26 +04:00
new - > br_blockcount + RIGHT . br_blockcount < = MAXEXTLEN & &
( ( state & ( BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING ) ) ! =
( BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING ) | |
LEFT . br_blockcount + new - > br_blockcount + RIGHT . br_blockcount
< = MAXEXTLEN ) )
state | = BMAP_RIGHT_CONTIG ;
2008-10-30 09:11:40 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Switch out based on the FILLING and CONTIG state bits .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
switch ( state & ( BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG ) ) {
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG :
/*
* Setting all of a previous oldext extent to newext .
* The left and right neighbors are both contiguous with new .
*/
2017-10-18 00:16:25 +03:00
LEFT . br_blockcount + = PREV . br_blockcount + RIGHT . br_blockcount ;
2009-03-29 21:26:46 +04:00
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & LEFT ) ;
2017-02-03 02:14:01 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) - 2 ) ;
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & RIGHT , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_delete ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_decrement ( cur , 0 , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_delete ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_decrement ( cur , 0 , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & LEFT ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
}
break ;
2009-03-29 21:26:46 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG :
/*
* Setting all of a previous oldext extent to newext .
* The left neighbor is contiguous , the right is not .
*/
2017-10-18 00:16:25 +03:00
LEFT . br_blockcount + = PREV . br_blockcount ;
2005-04-17 02:20:36 +04:00
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & LEFT ) ;
2017-02-03 02:14:01 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) - 1 ) ;
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & PREV , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_delete ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_decrement ( cur , 0 , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & LEFT ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
}
break ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Setting all of a previous oldext extent to newext .
* The right neighbor is contiguous , the left is not .
2005-04-17 02:20:36 +04:00
*/
2017-10-18 00:16:25 +03:00
PREV . br_blockcount + = RIGHT . br_blockcount ;
PREV . br_state = new - > br_state ;
2017-11-03 20:34:40 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_next ( ifp , icur ) ;
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & PREV ) ;
2017-10-18 00:16:25 +03:00
2017-02-03 02:14:01 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) - 1 ) ;
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & RIGHT , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_delete ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_decrement ( cur , 0 , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & PREV ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
break ;
2013-02-11 08:58:13 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING :
/*
* Setting all of a previous oldext extent to newext .
* Neither the left nor right neighbors are contiguous with
* the new one .
*/
2017-10-18 00:16:25 +03:00
PREV . br_state = new - > br_state ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & PREV ) ;
2013-02-11 08:58:13 +04:00
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_DEXT ;
else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , new , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & PREV ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
}
break ;
2013-02-11 08:58:13 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG :
/*
* Setting the first part of a previous oldext extent to newext .
* The left neighbor is contiguous .
*/
2017-10-18 00:16:25 +03:00
LEFT . br_blockcount + = new - > br_blockcount ;
2005-04-17 02:20:36 +04:00
2017-10-18 00:16:25 +03:00
old = PREV ;
PREV . br_startoff + = new - > br_blockcount ;
PREV . br_startblock + = new - > br_blockcount ;
PREV . br_blockcount - = new - > br_blockcount ;
2006-03-14 05:30:23 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & PREV ) ;
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & LEFT ) ;
[XFS] There are a few problems with the new
xfs_bmap_search_multi_extents() wrapper function that I introduced in mod
xfs-linux:xfs-kern:207393a. The function was added as a wrapper around
xfs_bmap_do_search_extents() to avoid breaking the top-of-tree CXFS
interface. The idea of the function was basically to extract the target
extent buffer (if muli- level extent allocation mode), then call
xfs_bmap_do_search_extents() with either a pointer to the first extent in
the target buffer or a pointer to the first extent in the file, depending
on which extent mode was being used. However, in addition to locating the
target extent record for block bno, xfs_bmap_do_search_extents() also sets
four parameters needed by the caller: *lastx, *eofp, *gotp, *prevp.
Passing only the target extent buffer to xfs_bmap_do_search_extents()
causes *eofp to be set incorrectly if the extent is at the end of the
target list but there are actually more extents in the next er_extbuf.
Likewise, if the extent is the first one in the buffer but NOT the first
in the file, *prevp is incorrectly set to NULL. Adding the needed
functionality to xfs_bmap_search_multi_extents() to re-set any incorrectly
set fields is redundant and makes the call to xfs_bmap_do_search_extents()
not make much sense when multi-level extent allocation mode is being used.
This mod basically extracts the two functional components from
xfs_bmap_do_search_extents(), with the intent of obsoleting/removing
xfs_bmap_do_search_extents() after the CXFS mult-level in-core extent
changes are checked in. The two components are: 1) The binary search to
locate the target extent record, and 2) Setting the four parameters needed
by the caller (*lastx, *eofp, *gotp, *prevp). Component 1: I created a
new function in xfs_inode.c called xfs_iext_bno_to_ext(), which executes
the binary search to find the target extent record.
xfs_bmap_search_multi_extents() has been modified to call
xfs_iext_bno_to_ext() rather than xfs_bmap_do_search_extents(). Component
2: The parameter setting functionality has been added to
xfs_bmap_search_multi_extents(), eliminating the need for
xfs_bmap_do_search_extents(). These changes make the removal of
xfs_bmap_do_search_extents() trival once the CXFS changes are in place.
They also allow us to maintain the current XFS interface, using the new
search function introduced in mod xfs-linux:xfs-kern:207393a.
SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207866a
Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
2006-03-17 09:25:04 +03:00
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_DEXT ;
else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & old , & i ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & PREV ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2017-10-18 00:16:25 +03:00
error = xfs_btree_decrement ( cur , 0 , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & LEFT ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
[XFS] There are a few problems with the new
xfs_bmap_search_multi_extents() wrapper function that I introduced in mod
xfs-linux:xfs-kern:207393a. The function was added as a wrapper around
xfs_bmap_do_search_extents() to avoid breaking the top-of-tree CXFS
interface. The idea of the function was basically to extract the target
extent buffer (if muli- level extent allocation mode), then call
xfs_bmap_do_search_extents() with either a pointer to the first extent in
the target buffer or a pointer to the first extent in the file, depending
on which extent mode was being used. However, in addition to locating the
target extent record for block bno, xfs_bmap_do_search_extents() also sets
four parameters needed by the caller: *lastx, *eofp, *gotp, *prevp.
Passing only the target extent buffer to xfs_bmap_do_search_extents()
causes *eofp to be set incorrectly if the extent is at the end of the
target list but there are actually more extents in the next er_extbuf.
Likewise, if the extent is the first one in the buffer but NOT the first
in the file, *prevp is incorrectly set to NULL. Adding the needed
functionality to xfs_bmap_search_multi_extents() to re-set any incorrectly
set fields is redundant and makes the call to xfs_bmap_do_search_extents()
not make much sense when multi-level extent allocation mode is being used.
This mod basically extracts the two functional components from
xfs_bmap_do_search_extents(), with the intent of obsoleting/removing
xfs_bmap_do_search_extents() after the CXFS mult-level in-core extent
changes are checked in. The two components are: 1) The binary search to
locate the target extent record, and 2) Setting the four parameters needed
by the caller (*lastx, *eofp, *gotp, *prevp). Component 1: I created a
new function in xfs_inode.c called xfs_iext_bno_to_ext(), which executes
the binary search to find the target extent record.
xfs_bmap_search_multi_extents() has been modified to call
xfs_iext_bno_to_ext() rather than xfs_bmap_do_search_extents(). Component
2: The parameter setting functionality has been added to
xfs_bmap_search_multi_extents(), eliminating the need for
xfs_bmap_do_search_extents(). These changes make the removal of
xfs_bmap_do_search_extents() trival once the CXFS changes are in place.
They also allow us to maintain the current XFS interface, using the new
search function introduced in mod xfs-linux:xfs-kern:207393a.
SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207866a
Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
2006-03-17 09:25:04 +03:00
}
2013-02-25 05:31:26 +04:00
break ;
2006-03-14 05:30:23 +03:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_FILLING :
/*
* Setting the first part of a previous oldext extent to newext .
* The left neighbor is not contiguous .
*/
2017-10-18 00:16:25 +03:00
old = PREV ;
PREV . br_startoff + = new - > br_blockcount ;
PREV . br_startblock + = new - > br_blockcount ;
PREV . br_blockcount - = new - > br_blockcount ;
2005-04-17 02:20:36 +04:00
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & PREV ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , new , state ) ;
2017-02-03 02:14:01 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) + 1 ) ;
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & old , & i ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & PREV ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
cur - > bc_rec . b = * new ;
if ( ( error = xfs_btree_insert ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
}
break ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG :
/*
* Setting the last part of a previous oldext extent to newext .
* The right neighbor is contiguous with the new allocation .
*/
2017-10-18 00:16:25 +03:00
old = PREV ;
PREV . br_blockcount - = new - > br_blockcount ;
2005-04-17 02:20:36 +04:00
2017-10-18 00:16:25 +03:00
RIGHT . br_startoff = new - > br_startoff ;
RIGHT . br_startblock = new - > br_startblock ;
RIGHT . br_blockcount + = new - > br_blockcount ;
2017-11-03 20:34:40 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & PREV ) ;
xfs_iext_next ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & RIGHT ) ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_DEXT ;
else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & old , & i ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & PREV ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2017-10-18 00:16:25 +03:00
error = xfs_btree_increment ( cur , 0 , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & RIGHT ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
}
break ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_RIGHT_FILLING :
/*
* Setting the last part of a previous oldext extent to newext .
* The right neighbor is not contiguous .
*/
2017-10-18 00:16:25 +03:00
old = PREV ;
PREV . br_blockcount - = new - > br_blockcount ;
2005-04-17 02:20:36 +04:00
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & PREV ) ;
xfs_iext_next ( ifp , icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , new , state ) ;
2005-11-02 02:34:53 +03:00
2017-02-03 02:14:01 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) + 1 ) ;
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & old , & i ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & PREV ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , new , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , done ) ;
2013-02-25 05:31:26 +04:00
if ( ( error = xfs_btree_insert ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
}
break ;
case 0 :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Setting the middle part of a previous oldext extent to
* newext . Contiguity is impossible here .
* One extent becomes three extents .
2005-04-17 02:20:36 +04:00
*/
2017-10-18 00:16:25 +03:00
old = PREV ;
PREV . br_blockcount = new - > br_startoff - PREV . br_startoff ;
2010-06-24 05:36:58 +04:00
2013-02-25 05:31:26 +04:00
r [ 0 ] = * new ;
r [ 1 ] . br_startoff = new_endoff ;
r [ 1 ] . br_blockcount =
2017-10-18 00:16:25 +03:00
old . br_startoff + old . br_blockcount - new_endoff ;
2013-02-25 05:31:26 +04:00
r [ 1 ] . br_startblock = new - > br_startblock + new - > br_blockcount ;
2017-10-18 00:16:25 +03:00
r [ 1 ] . br_state = PREV . br_state ;
2010-06-24 05:36:58 +04:00
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & PREV ) ;
xfs_iext_next ( ifp , icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , & r [ 1 ] , state ) ;
xfs_iext_insert ( ip , icur , & r [ 0 ] , state ) ;
2013-02-25 05:31:26 +04:00
2017-02-03 02:14:01 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) + 2 ) ;
2013-02-25 05:31:26 +04:00
if ( cur = = NULL )
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT ;
else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & old , & i ) ;
2017-10-18 00:16:25 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
/* new right extent - oldext */
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & r [ 1 ] ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
/* new left extent - oldext */
cur - > bc_rec . b = PREV ;
if ( ( error = xfs_btree_insert ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
/*
* Reset the cursor to the position of the new extent
* we are about to insert as we can ' t trust it after
* the previous insert .
*/
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , new , & i ) ;
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , done ) ;
2013-02-25 05:31:26 +04:00
/* new middle extent - newext */
if ( ( error = xfs_btree_insert ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
}
2005-04-17 02:20:36 +04:00
break ;
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG :
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG :
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
case BMAP_LEFT_CONTIG :
case BMAP_RIGHT_CONTIG :
/*
* These cases are all impossible .
*/
2005-04-17 02:20:36 +04:00
ASSERT ( 0 ) ;
}
2011-12-19 00:00:07 +04:00
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
/* update reverse mappings */
2018-08-01 17:20:34 +03:00
error = xfs_rmap_convert_extent ( mp , tp , ip , whichfork , new ) ;
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
if ( error )
goto done ;
2013-02-25 05:31:26 +04:00
/* convert to a btree if necessary */
2017-02-03 02:14:01 +03:00
if ( xfs_bmap_needs_btree ( ip , whichfork ) ) {
2013-02-25 05:31:26 +04:00
int tmp_logflags ; /* partial log flag return val */
ASSERT ( cur = = NULL ) ;
2018-07-12 08:26:29 +03:00
error = xfs_bmap_extents_to_btree ( tp , ip , & cur , 0 ,
& tmp_logflags , whichfork ) ;
2013-02-25 05:31:26 +04:00
* logflagsp | = tmp_logflags ;
if ( error )
goto done ;
2005-04-17 02:20:36 +04:00
}
2005-11-02 07:00:20 +03:00
2013-02-25 05:31:26 +04:00
/* clear out the allocated field, done with it now in any case. */
if ( cur ) {
cur - > bc_private . b . allocated = 0 ;
* curp = cur ;
2005-04-17 02:20:36 +04:00
}
2011-12-19 00:00:07 +04:00
2017-02-03 02:14:01 +03:00
xfs_bmap_check_leaf_extents ( * curp , ip , whichfork ) ;
2013-02-25 05:31:26 +04:00
done :
* logflagsp | = rval ;
2005-04-17 02:20:36 +04:00
return error ;
2013-02-25 05:31:26 +04:00
# undef LEFT
# undef RIGHT
# undef PREV
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Convert a hole to a delayed allocation .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
STATIC void
xfs_bmap_add_extent_hole_delay (
xfs_inode_t * ip , /* incore inode pointer */
2016-10-03 19:11:32 +03:00
int whichfork ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2013-02-25 05:31:26 +04:00
xfs_bmbt_irec_t * new ) /* new data to add to file extents */
2005-04-17 02:20:36 +04:00
{
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
2013-02-25 05:31:26 +04:00
xfs_bmbt_irec_t left ; /* left neighbor extent entry */
xfs_filblks_t newlen = 0 ; /* new indirect size */
xfs_filblks_t oldlen = 0 ; /* old indirect size */
xfs_bmbt_irec_t right ; /* right neighbor extent entry */
2017-10-19 21:02:29 +03:00
int state = xfs_bmap_fork_to_state ( whichfork ) ;
2017-10-18 00:16:23 +03:00
xfs_filblks_t temp ; /* temp for indirect calculations */
2005-04-17 02:20:36 +04:00
2016-10-03 19:11:32 +03:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( isnullstartblock ( new - > br_startblock ) ) ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check and set flags if this segment has a left neighbor
2005-04-17 02:20:36 +04:00
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_prev_extent ( ifp , icur , & left ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_LEFT_VALID ;
if ( isnullstartblock ( left . br_startblock ) )
state | = BMAP_LEFT_DELAY ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
/*
* Check and set flags if the current ( right ) segment exists .
* If it doesn ' t exist , we ' re converting the hole at end - of - file .
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_get_extent ( ifp , icur , & right ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_RIGHT_VALID ;
if ( isnullstartblock ( right . br_startblock ) )
state | = BMAP_RIGHT_DELAY ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Set contiguity flags on the left and right neighbors .
* Don ' t let extents get too large , even if the pieces are contiguous .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( ( state & BMAP_LEFT_VALID ) & & ( state & BMAP_LEFT_DELAY ) & &
left . br_startoff + left . br_blockcount = = new - > br_startoff & &
left . br_blockcount + new - > br_blockcount < = MAXEXTLEN )
state | = BMAP_LEFT_CONTIG ;
if ( ( state & BMAP_RIGHT_VALID ) & & ( state & BMAP_RIGHT_DELAY ) & &
new - > br_startoff + new - > br_blockcount = = right . br_startoff & &
new - > br_blockcount + right . br_blockcount < = MAXEXTLEN & &
( ! ( state & BMAP_LEFT_CONTIG ) | |
( left . br_blockcount + new - > br_blockcount +
right . br_blockcount < = MAXEXTLEN ) ) )
state | = BMAP_RIGHT_CONTIG ;
2008-11-17 09:37:10 +03:00
/*
2013-02-25 05:31:26 +04:00
* Switch out based on the contiguity flags .
2008-11-17 09:37:10 +03:00
*/
2013-02-25 05:31:26 +04:00
switch ( state & ( BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG ) ) {
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
/*
* New allocation is contiguous with delayed allocations
* on the left and on the right .
* Merge all three into a single extent record .
*/
temp = left . br_blockcount + new - > br_blockcount +
right . br_blockcount ;
2008-11-17 09:37:10 +03:00
2013-02-25 05:31:26 +04:00
oldlen = startblockval ( left . br_startblock ) +
startblockval ( new - > br_startblock ) +
startblockval ( right . br_startblock ) ;
xfs: handle indlen shortage on delalloc extent merge
When a delalloc extent is created, it can be merged with pre-existing,
contiguous, delalloc extents. When this occurs,
xfs_bmap_add_extent_hole_delay() merges the extents along with the
associated indirect block reservations. The expectation here is that the
combined worst case indlen reservation is always less than or equal to
the indlen reservation for the individual extents.
This is not always the case, however, as existing extents can less than
the expected indlen reservation if the extent was previously split due
to a hole punch. If a new extent merges with such an extent, the total
indlen requirement may be larger than the sum of the indlen reservations
held by both extents.
xfs_bmap_add_extent_hole_delay() assumes that the worst case indlen
reservation is always available and assigns it to the merged extent
without consideration for the indlen held by the pre-existing extent. As
a result, the subsequent xfs_mod_fdblocks() call can attempt an
unintentional allocation rather than a free (indicated by an ASSERT()
failure). Further, if the allocation happens to fail in this context,
the failure goes unhandled and creates a filesystem wide block
accounting inconsistency.
Fix xfs_bmap_add_extent_hole_delay() to function as designed. Cap the
indlen reservation assigned to the merged extent to the sum of the
indlen reservations held by each of the individual extents.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-02-14 09:48:18 +03:00
newlen = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( ip , temp ) ,
oldlen ) ;
2017-10-18 00:16:23 +03:00
left . br_startblock = nullstartblock ( newlen ) ;
left . br_blockcount = temp ;
2005-04-17 02:20:36 +04:00
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & left ) ;
2013-02-25 05:31:26 +04:00
break ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_CONTIG :
/*
* New allocation is contiguous with a delayed allocation
* on the left .
* Merge the new allocation with the left neighbor .
*/
temp = left . br_blockcount + new - > br_blockcount ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
oldlen = startblockval ( left . br_startblock ) +
startblockval ( new - > br_startblock ) ;
xfs: handle indlen shortage on delalloc extent merge
When a delalloc extent is created, it can be merged with pre-existing,
contiguous, delalloc extents. When this occurs,
xfs_bmap_add_extent_hole_delay() merges the extents along with the
associated indirect block reservations. The expectation here is that the
combined worst case indlen reservation is always less than or equal to
the indlen reservation for the individual extents.
This is not always the case, however, as existing extents can less than
the expected indlen reservation if the extent was previously split due
to a hole punch. If a new extent merges with such an extent, the total
indlen requirement may be larger than the sum of the indlen reservations
held by both extents.
xfs_bmap_add_extent_hole_delay() assumes that the worst case indlen
reservation is always available and assigns it to the merged extent
without consideration for the indlen held by the pre-existing extent. As
a result, the subsequent xfs_mod_fdblocks() call can attempt an
unintentional allocation rather than a free (indicated by an ASSERT()
failure). Further, if the allocation happens to fail in this context,
the failure goes unhandled and creates a filesystem wide block
accounting inconsistency.
Fix xfs_bmap_add_extent_hole_delay() to function as designed. Cap the
indlen reservation assigned to the merged extent to the sum of the
indlen reservations held by each of the individual extents.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-02-14 09:48:18 +03:00
newlen = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( ip , temp ) ,
oldlen ) ;
2017-10-18 00:16:23 +03:00
left . br_blockcount = temp ;
left . br_startblock = nullstartblock ( newlen ) ;
2017-11-03 20:34:39 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & left ) ;
2013-02-25 05:31:26 +04:00
break ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_RIGHT_CONTIG :
/*
* New allocation is contiguous with a delayed allocation
* on the right .
* Merge the new allocation with the right neighbor .
*/
temp = new - > br_blockcount + right . br_blockcount ;
oldlen = startblockval ( new - > br_startblock ) +
startblockval ( right . br_startblock ) ;
xfs: handle indlen shortage on delalloc extent merge
When a delalloc extent is created, it can be merged with pre-existing,
contiguous, delalloc extents. When this occurs,
xfs_bmap_add_extent_hole_delay() merges the extents along with the
associated indirect block reservations. The expectation here is that the
combined worst case indlen reservation is always less than or equal to
the indlen reservation for the individual extents.
This is not always the case, however, as existing extents can less than
the expected indlen reservation if the extent was previously split due
to a hole punch. If a new extent merges with such an extent, the total
indlen requirement may be larger than the sum of the indlen reservations
held by both extents.
xfs_bmap_add_extent_hole_delay() assumes that the worst case indlen
reservation is always available and assigns it to the merged extent
without consideration for the indlen held by the pre-existing extent. As
a result, the subsequent xfs_mod_fdblocks() call can attempt an
unintentional allocation rather than a free (indicated by an ASSERT()
failure). Further, if the allocation happens to fail in this context,
the failure goes unhandled and creates a filesystem wide block
accounting inconsistency.
Fix xfs_bmap_add_extent_hole_delay() to function as designed. Cap the
indlen reservation assigned to the merged extent to the sum of the
indlen reservations held by each of the individual extents.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-02-14 09:48:18 +03:00
newlen = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( ip , temp ) ,
oldlen ) ;
2017-10-18 00:16:23 +03:00
right . br_startoff = new - > br_startoff ;
right . br_startblock = nullstartblock ( newlen ) ;
right . br_blockcount = temp ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & right ) ;
2013-02-25 05:31:26 +04:00
break ;
case 0 :
/*
* New allocation is not contiguous with another
* delayed allocation .
* Insert a new entry .
*/
oldlen = newlen = 0 ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , new , state ) ;
2013-02-25 05:31:26 +04:00
break ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
if ( oldlen ! = newlen ) {
ASSERT ( oldlen > newlen ) ;
2015-02-23 13:22:03 +03:00
xfs_mod_fdblocks ( ip - > i_mount , ( int64_t ) ( oldlen - newlen ) ,
false ) ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Nothing to do for disk quota accounting here .
2005-04-17 02:20:36 +04:00
*/
2019-04-26 04:26:22 +03:00
xfs_mod_delalloc ( ip - > i_mount , ( int64_t ) newlen - oldlen ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
2013-02-25 05:31:26 +04:00
* Convert a hole to a real allocation .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
STATIC int /* error */
xfs_bmap_add_extent_hole_real (
2017-04-12 02:45:54 +03:00
struct xfs_trans * tp ,
struct xfs_inode * ip ,
int whichfork ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2017-04-12 02:45:54 +03:00
struct xfs_btree_cur * * curp ,
struct xfs_bmbt_irec * new ,
2018-05-09 20:02:32 +03:00
int * logflagsp ,
int flags )
2011-09-19 00:40:53 +04:00
{
2017-04-12 02:45:54 +03:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_btree_cur * cur = * curp ;
2013-02-25 05:31:26 +04:00
int error ; /* error return value */
int i ; /* temp state */
xfs_bmbt_irec_t left ; /* left neighbor extent entry */
xfs_bmbt_irec_t right ; /* right neighbor extent entry */
int rval = 0 ; /* return value (logging flags) */
2017-10-19 21:02:29 +03:00
int state = xfs_bmap_fork_to_state ( whichfork ) ;
2017-10-18 00:16:24 +03:00
struct xfs_bmbt_irec old ;
2011-09-19 00:40:53 +04:00
2013-02-25 05:31:26 +04:00
ASSERT ( ! isnullstartblock ( new - > br_startblock ) ) ;
2017-04-12 02:45:54 +03:00
ASSERT ( ! cur | | ! ( cur - > bc_private . b . flags & XFS_BTCUR_BPRV_WASDEL ) ) ;
2011-09-19 00:40:53 +04:00
2015-10-12 10:21:22 +03:00
XFS_STATS_INC ( mp , xs_add_exlist ) ;
2011-09-19 00:40:53 +04:00
2013-02-25 05:31:26 +04:00
/*
* Check and set flags if this segment has a left neighbor .
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_prev_extent ( ifp , icur , & left ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_LEFT_VALID ;
if ( isnullstartblock ( left . br_startblock ) )
state | = BMAP_LEFT_DELAY ;
}
2011-09-19 00:40:53 +04:00
/*
2013-02-25 05:31:26 +04:00
* Check and set flags if this segment has a current value .
* Not true if we ' re inserting into the " hole " at eof .
2011-09-19 00:40:53 +04:00
*/
2017-11-03 20:34:43 +03:00
if ( xfs_iext_get_extent ( ifp , icur , & right ) ) {
2013-02-25 05:31:26 +04:00
state | = BMAP_RIGHT_VALID ;
if ( isnullstartblock ( right . br_startblock ) )
state | = BMAP_RIGHT_DELAY ;
}
2011-09-19 00:40:53 +04:00
2013-02-25 05:31:26 +04:00
/*
* We ' re inserting a real allocation between " left " and " right " .
* Set the contiguity flags . Don ' t let extents get too large .
*/
if ( ( state & BMAP_LEFT_VALID ) & & ! ( state & BMAP_LEFT_DELAY ) & &
left . br_startoff + left . br_blockcount = = new - > br_startoff & &
left . br_startblock + left . br_blockcount = = new - > br_startblock & &
left . br_state = = new - > br_state & &
left . br_blockcount + new - > br_blockcount < = MAXEXTLEN )
state | = BMAP_LEFT_CONTIG ;
2011-09-19 00:40:53 +04:00
2013-02-25 05:31:26 +04:00
if ( ( state & BMAP_RIGHT_VALID ) & & ! ( state & BMAP_RIGHT_DELAY ) & &
new - > br_startoff + new - > br_blockcount = = right . br_startoff & &
new - > br_startblock + new - > br_blockcount = = right . br_startblock & &
new - > br_state = = right . br_state & &
new - > br_blockcount + right . br_blockcount < = MAXEXTLEN & &
( ! ( state & BMAP_LEFT_CONTIG ) | |
left . br_blockcount + new - > br_blockcount +
right . br_blockcount < = MAXEXTLEN ) )
state | = BMAP_RIGHT_CONTIG ;
2011-09-19 00:40:53 +04:00
2013-02-25 05:31:26 +04:00
error = 0 ;
/*
* Select which case we ' re in here , and implement it .
*/
switch ( state & ( BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG ) ) {
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG :
/*
* New allocation is contiguous with real allocations on the
* left and on the right .
* Merge all three into a single extent record .
*/
2017-10-18 00:16:24 +03:00
left . br_blockcount + = new - > br_blockcount + right . br_blockcount ;
2011-09-19 00:40:53 +04:00
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & left ) ;
2011-09-19 00:40:53 +04:00
2017-04-12 02:45:54 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) - 1 ) ;
if ( cur = = NULL ) {
2013-02-25 05:31:26 +04:00
rval = XFS_ILOG_CORE | xfs_ilog_fext ( whichfork ) ;
} else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & right , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-04-12 02:45:54 +03:00
error = xfs_btree_delete ( cur , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-04-12 02:45:54 +03:00
error = xfs_btree_decrement ( cur , 0 , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & left ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
}
break ;
2011-09-19 00:40:53 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_LEFT_CONTIG :
/*
* New allocation is contiguous with a real allocation
* on the left .
* Merge the new allocation with the left neighbor .
*/
2017-10-18 00:16:24 +03:00
old = left ;
left . br_blockcount + = new - > br_blockcount ;
2017-11-03 20:34:40 +03:00
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
xfs_iext_update_extent ( ip , state , icur , & left ) ;
2005-04-17 02:20:36 +04:00
2017-04-12 02:45:54 +03:00
if ( cur = = NULL ) {
2013-02-25 05:31:26 +04:00
rval = xfs_ilog_fext ( whichfork ) ;
} else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & old , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & left ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
}
break ;
2011-09-19 00:40:53 +04:00
2013-02-25 05:31:26 +04:00
case BMAP_RIGHT_CONTIG :
/*
* New allocation is contiguous with a real allocation
* on the right .
* Merge the new allocation with the right neighbor .
*/
2017-10-18 00:16:24 +03:00
old = right ;
2017-10-19 21:04:44 +03:00
2017-10-18 00:16:24 +03:00
right . br_startoff = new - > br_startoff ;
right . br_startblock = new - > br_startblock ;
right . br_blockcount + = new - > br_blockcount ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & right ) ;
2011-09-19 00:40:53 +04:00
2017-04-12 02:45:54 +03:00
if ( cur = = NULL ) {
2013-02-25 05:31:26 +04:00
rval = xfs_ilog_fext ( whichfork ) ;
} else {
rval = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & old , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & right ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
}
break ;
case 0 :
/*
* New allocation is not contiguous with another
* real allocation .
* Insert a new entry .
*/
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , new , state ) ;
2017-04-12 02:45:54 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) + 1 ) ;
if ( cur = = NULL ) {
2013-02-25 05:31:26 +04:00
rval = XFS_ILOG_CORE | xfs_ilog_fext ( whichfork ) ;
} else {
rval = XFS_ILOG_CORE ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , new , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , done ) ;
2017-04-12 02:45:54 +03:00
error = xfs_btree_insert ( cur , & i ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
}
break ;
}
2018-05-09 20:02:32 +03:00
/* add reverse mapping unless caller opted out */
if ( ! ( flags & XFS_BMAPI_NORMAP ) ) {
2018-08-01 17:20:34 +03:00
error = xfs_rmap_map_extent ( tp , ip , whichfork , new ) ;
2018-05-09 20:02:32 +03:00
if ( error )
goto done ;
}
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
2013-02-25 05:31:26 +04:00
/* convert to a btree if necessary */
2017-04-12 02:45:54 +03:00
if ( xfs_bmap_needs_btree ( ip , whichfork ) ) {
2013-02-25 05:31:26 +04:00
int tmp_logflags ; /* partial log flag return val */
2017-04-12 02:45:54 +03:00
ASSERT ( cur = = NULL ) ;
2018-07-12 08:26:29 +03:00
error = xfs_bmap_extents_to_btree ( tp , ip , curp , 0 ,
& tmp_logflags , whichfork ) ;
2017-04-12 02:45:54 +03:00
* logflagsp | = tmp_logflags ;
cur = * curp ;
2013-02-25 05:31:26 +04:00
if ( error )
goto done ;
}
/* clear out the allocated field, done with it now in any case. */
2017-04-12 02:45:54 +03:00
if ( cur )
cur - > bc_private . b . allocated = 0 ;
2013-02-25 05:31:26 +04:00
2017-04-12 02:45:54 +03:00
xfs_bmap_check_leaf_extents ( cur , ip , whichfork ) ;
2013-02-25 05:31:26 +04:00
done :
2017-04-12 02:45:54 +03:00
* logflagsp | = rval ;
2013-02-25 05:31:26 +04:00
return error ;
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Functions used in the extent read , allocate and remove paths
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
/*
* Adjust the size of the new extent based on di_extsize and rt extsize .
*/
2013-08-12 14:49:42 +04:00
int
2013-02-25 05:31:26 +04:00
xfs_bmap_extsize_align (
xfs_mount_t * mp ,
xfs_bmbt_irec_t * gotp , /* next extent pointer */
xfs_bmbt_irec_t * prevp , /* previous extent pointer */
xfs_extlen_t extsz , /* align to this extent size */
int rt , /* is this a realtime inode? */
int eof , /* is extent at end-of-file? */
int delay , /* creating delalloc extent? */
int convert , /* overwriting unwritten extent? */
xfs_fileoff_t * offp , /* in/out: aligned offset */
xfs_extlen_t * lenp ) /* in/out: aligned length */
2008-10-30 09:14:43 +03:00
{
2013-02-25 05:31:26 +04:00
xfs_fileoff_t orig_off ; /* original offset */
xfs_extlen_t orig_alen ; /* original length */
xfs_fileoff_t orig_end ; /* original off+len */
xfs_fileoff_t nexto ; /* next file offset */
xfs_fileoff_t prevo ; /* previous file offset */
xfs_fileoff_t align_off ; /* temp for offset */
xfs_extlen_t align_alen ; /* temp for length */
xfs_extlen_t temp ; /* temp for calculations */
2008-10-30 09:14:43 +03:00
2013-02-25 05:31:26 +04:00
if ( convert )
2008-10-30 09:14:43 +03:00
return 0 ;
2013-02-25 05:31:26 +04:00
orig_off = align_off = * offp ;
orig_alen = align_alen = * lenp ;
orig_end = orig_off + orig_alen ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* If this request overlaps an existing extent , then don ' t
* attempt to perform any additional alignment .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( ! delay & & ! eof & &
( orig_off > = gotp - > br_startoff ) & &
( orig_end < = gotp - > br_startoff + gotp - > br_blockcount ) ) {
return 0 ;
}
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* If the file offset is unaligned vs . the extent size
* we need to align it . This will be possible unless
* the file was previously written with a kernel that didn ' t
* perform this alignment , or if a truncate shot us in the
* foot .
2005-04-17 02:20:36 +04:00
*/
2018-06-08 19:54:22 +03:00
div_u64_rem ( orig_off , extsz , & temp ) ;
2013-02-25 05:31:26 +04:00
if ( temp ) {
align_alen + = temp ;
align_off - = temp ;
2005-04-17 02:20:36 +04:00
}
xfs: extent size hints can round up extents past MAXEXTLEN
This results in BMBT corruption, as seen by this test:
# mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc
....
# mount /dev/vdc /mnt/scratch
# xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo
which results in this failure on a debug kernel:
XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211
....
Call Trace:
[<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100
[<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20
[<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120
[<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0
[<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170
[<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400
[<ffffffff81ddcc29>] ? down_write+0x29/0x60
[<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310
[<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120
[<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570
[<ffffffff811cd680>] vfs_fallocate+0x140/0x260
[<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80
[<ffffffff81ddec09>] system_call_fastpath+0x12/0x17
The tracepoint that indicates the extent that triggered the assert
failure is:
xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1
Clearly indicating that the extent length is greater than MAXEXTLEN,
which is 2097151. A prior trace point shows the allocation was an
exact size match and that a length greater than MAXEXTLEN was asked
for:
xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152
^^^^^^^ ^^^^^^^
We don't see this problem with extent size hints through the IO path
because we can't do single IOs large enough to trigger MAXEXTLEN
allocation. fallocate(), OTOH, is not limited in it's allocation
sizes and so needs help here.
The issue is that the extent size hint alignment is rounding up the
extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking
into account extent size hints when calculating the maximum extent
length to allocate. xfs_bmapi_reserve_delalloc() is already doing
this, but direct extent allocation is not.
Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is
wrong, and it works only because delayed allocation extents are not
limited in size to MAXEXTLEN in the in-core extent tree. hence this
calculation does not work for direct allocation, and the delalloc
code needs fixing. This may, in fact be the underlying bug that
occassionally causes transaction overruns in delayed allocation
extent conversion, so now we know it's wrong we should fix it, too.
Many thanks to Brian Foster for finding this problem during review
of this patch.
Hence the fix, after much code reading, is to allow
xfs_bmap_extsize_align() to align partial extents when full
alignment would extend the alignment past MAXEXTLEN. We can safely
do this because all callers have higher layer allocation loops that
already handle short allocations, and so will simply run another
allocation to cover the remainder of the requested allocation range
that we ignored during alignment. The advantage of this approach is
that it also removes the need for callers to do anything other than
limit their requests to MAXEXTLEN - they don't really need to be
aware of extent size hints at all.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 00:40:06 +03:00
/* Same adjustment for the end of the requested area. */
temp = ( align_alen % extsz ) ;
if ( temp )
align_alen + = extsz - temp ;
2005-04-17 02:20:36 +04:00
/*
xfs: extent size hints can round up extents past MAXEXTLEN
This results in BMBT corruption, as seen by this test:
# mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc
....
# mount /dev/vdc /mnt/scratch
# xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo
which results in this failure on a debug kernel:
XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211
....
Call Trace:
[<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100
[<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20
[<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120
[<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0
[<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170
[<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400
[<ffffffff81ddcc29>] ? down_write+0x29/0x60
[<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310
[<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120
[<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570
[<ffffffff811cd680>] vfs_fallocate+0x140/0x260
[<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80
[<ffffffff81ddec09>] system_call_fastpath+0x12/0x17
The tracepoint that indicates the extent that triggered the assert
failure is:
xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1
Clearly indicating that the extent length is greater than MAXEXTLEN,
which is 2097151. A prior trace point shows the allocation was an
exact size match and that a length greater than MAXEXTLEN was asked
for:
xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152
^^^^^^^ ^^^^^^^
We don't see this problem with extent size hints through the IO path
because we can't do single IOs large enough to trigger MAXEXTLEN
allocation. fallocate(), OTOH, is not limited in it's allocation
sizes and so needs help here.
The issue is that the extent size hint alignment is rounding up the
extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking
into account extent size hints when calculating the maximum extent
length to allocate. xfs_bmapi_reserve_delalloc() is already doing
this, but direct extent allocation is not.
Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is
wrong, and it works only because delayed allocation extents are not
limited in size to MAXEXTLEN in the in-core extent tree. hence this
calculation does not work for direct allocation, and the delalloc
code needs fixing. This may, in fact be the underlying bug that
occassionally causes transaction overruns in delayed allocation
extent conversion, so now we know it's wrong we should fix it, too.
Many thanks to Brian Foster for finding this problem during review
of this patch.
Hence the fix, after much code reading, is to allow
xfs_bmap_extsize_align() to align partial extents when full
alignment would extend the alignment past MAXEXTLEN. We can safely
do this because all callers have higher layer allocation loops that
already handle short allocations, and so will simply run another
allocation to cover the remainder of the requested allocation range
that we ignored during alignment. The advantage of this approach is
that it also removes the need for callers to do anything other than
limit their requests to MAXEXTLEN - they don't really need to be
aware of extent size hints at all.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 00:40:06 +03:00
* For large extent hint sizes , the aligned extent might be larger than
* MAXEXTLEN . In that case , reduce the size by an extsz so that it pulls
* the length back under MAXEXTLEN . The outer allocation loops handle
* short allocation just fine , so it is safe to do this . We only want to
* do it when we are forced to , though , because it means more allocation
* operations are required .
2005-04-17 02:20:36 +04:00
*/
xfs: extent size hints can round up extents past MAXEXTLEN
This results in BMBT corruption, as seen by this test:
# mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc
....
# mount /dev/vdc /mnt/scratch
# xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo
which results in this failure on a debug kernel:
XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211
....
Call Trace:
[<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100
[<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20
[<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120
[<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0
[<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170
[<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400
[<ffffffff81ddcc29>] ? down_write+0x29/0x60
[<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310
[<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120
[<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570
[<ffffffff811cd680>] vfs_fallocate+0x140/0x260
[<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80
[<ffffffff81ddec09>] system_call_fastpath+0x12/0x17
The tracepoint that indicates the extent that triggered the assert
failure is:
xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1
Clearly indicating that the extent length is greater than MAXEXTLEN,
which is 2097151. A prior trace point shows the allocation was an
exact size match and that a length greater than MAXEXTLEN was asked
for:
xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152
^^^^^^^ ^^^^^^^
We don't see this problem with extent size hints through the IO path
because we can't do single IOs large enough to trigger MAXEXTLEN
allocation. fallocate(), OTOH, is not limited in it's allocation
sizes and so needs help here.
The issue is that the extent size hint alignment is rounding up the
extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking
into account extent size hints when calculating the maximum extent
length to allocate. xfs_bmapi_reserve_delalloc() is already doing
this, but direct extent allocation is not.
Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is
wrong, and it works only because delayed allocation extents are not
limited in size to MAXEXTLEN in the in-core extent tree. hence this
calculation does not work for direct allocation, and the delalloc
code needs fixing. This may, in fact be the underlying bug that
occassionally causes transaction overruns in delayed allocation
extent conversion, so now we know it's wrong we should fix it, too.
Many thanks to Brian Foster for finding this problem during review
of this patch.
Hence the fix, after much code reading, is to allow
xfs_bmap_extsize_align() to align partial extents when full
alignment would extend the alignment past MAXEXTLEN. We can safely
do this because all callers have higher layer allocation loops that
already handle short allocations, and so will simply run another
allocation to cover the remainder of the requested allocation range
that we ignored during alignment. The advantage of this approach is
that it also removes the need for callers to do anything other than
limit their requests to MAXEXTLEN - they don't really need to be
aware of extent size hints at all.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 00:40:06 +03:00
while ( align_alen > MAXEXTLEN )
align_alen - = extsz ;
ASSERT ( align_alen < = MAXEXTLEN ) ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* If the previous block overlaps with this proposed allocation
* then move the start forward without adjusting the length .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( prevp - > br_startoff ! = NULLFILEOFF ) {
if ( prevp - > br_startblock = = HOLESTARTBLOCK )
prevo = prevp - > br_startoff ;
else
prevo = prevp - > br_startoff + prevp - > br_blockcount ;
} else
prevo = 0 ;
if ( align_off ! = orig_off & & align_off < prevo )
align_off = prevo ;
/*
* If the next block overlaps with this proposed allocation
* then move the start back without adjusting the length ,
* but not before offset 0.
* This may of course make the start overlap previous block ,
* and if we hit the offset 0 limit then the next block
* can still overlap too .
*/
if ( ! eof & & gotp - > br_startoff ! = NULLFILEOFF ) {
if ( ( delay & & gotp - > br_startblock = = HOLESTARTBLOCK ) | |
( ! delay & & gotp - > br_startblock = = DELAYSTARTBLOCK ) )
nexto = gotp - > br_startoff + gotp - > br_blockcount ;
else
nexto = gotp - > br_startoff ;
} else
nexto = NULLFILEOFF ;
if ( ! eof & &
align_off + align_alen ! = orig_end & &
align_off + align_alen > nexto )
align_off = nexto > align_alen ? nexto - align_alen : 0 ;
/*
* If we ' re now overlapping the next or previous extent that
* means we can ' t fit an extsz piece in this hole . Just move
* the start forward to the first valid spot and set
* the length so we hit the end .
*/
if ( align_off ! = orig_off & & align_off < prevo )
align_off = prevo ;
if ( align_off + align_alen ! = orig_end & &
align_off + align_alen > nexto & &
nexto ! = NULLFILEOFF ) {
ASSERT ( nexto > prevo ) ;
align_alen = nexto - align_off ;
}
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
/*
* If realtime , and the result isn ' t a multiple of the realtime
* extent size we need to remove blocks until it is .
*/
if ( rt & & ( temp = ( align_alen % mp - > m_sb . sb_rextsize ) ) ) {
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* We ' re not covering the original request , or
* we won ' t be able to once we fix the length .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( orig_off < align_off | |
orig_end > align_off + align_alen | |
align_alen - temp < orig_alen )
2014-06-25 08:58:08 +04:00
return - EINVAL ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Try to fix it by moving the start up .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( align_off + temp < = orig_off ) {
align_alen - = temp ;
align_off + = temp ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
/*
* Try to fix it by moving the end in .
*/
else if ( align_off + align_alen - temp > = orig_end )
align_alen - = temp ;
/*
* Set the start to the minimum then trim the length .
*/
else {
align_alen - = orig_off - align_off ;
align_off = orig_off ;
align_alen - = align_alen % mp - > m_sb . sb_rextsize ;
2005-04-17 02:20:36 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Result doesn ' t cover the request , fail it .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( orig_off < align_off | | orig_end > align_off + align_alen )
2014-06-25 08:58:08 +04:00
return - EINVAL ;
2013-02-25 05:31:26 +04:00
} else {
ASSERT ( orig_off > = align_off ) ;
xfs: extent size hints can round up extents past MAXEXTLEN
This results in BMBT corruption, as seen by this test:
# mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc
....
# mount /dev/vdc /mnt/scratch
# xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo
which results in this failure on a debug kernel:
XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211
....
Call Trace:
[<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100
[<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20
[<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120
[<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70
[<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0
[<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170
[<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400
[<ffffffff81ddcc29>] ? down_write+0x29/0x60
[<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310
[<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120
[<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570
[<ffffffff811cd680>] vfs_fallocate+0x140/0x260
[<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80
[<ffffffff81ddec09>] system_call_fastpath+0x12/0x17
The tracepoint that indicates the extent that triggered the assert
failure is:
xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1
Clearly indicating that the extent length is greater than MAXEXTLEN,
which is 2097151. A prior trace point shows the allocation was an
exact size match and that a length greater than MAXEXTLEN was asked
for:
xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152
^^^^^^^ ^^^^^^^
We don't see this problem with extent size hints through the IO path
because we can't do single IOs large enough to trigger MAXEXTLEN
allocation. fallocate(), OTOH, is not limited in it's allocation
sizes and so needs help here.
The issue is that the extent size hint alignment is rounding up the
extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking
into account extent size hints when calculating the maximum extent
length to allocate. xfs_bmapi_reserve_delalloc() is already doing
this, but direct extent allocation is not.
Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is
wrong, and it works only because delayed allocation extents are not
limited in size to MAXEXTLEN in the in-core extent tree. hence this
calculation does not work for direct allocation, and the delalloc
code needs fixing. This may, in fact be the underlying bug that
occassionally causes transaction overruns in delayed allocation
extent conversion, so now we know it's wrong we should fix it, too.
Many thanks to Brian Foster for finding this problem during review
of this patch.
Hence the fix, after much code reading, is to allow
xfs_bmap_extsize_align() to align partial extents when full
alignment would extend the alignment past MAXEXTLEN. We can safely
do this because all callers have higher layer allocation loops that
already handle short allocations, and so will simply run another
allocation to cover the remainder of the requested allocation range
that we ignored during alignment. The advantage of this approach is
that it also removes the need for callers to do anything other than
limit their requests to MAXEXTLEN - they don't really need to be
aware of extent size hints at all.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 00:40:06 +03:00
/* see MAXEXTLEN handling above */
ASSERT ( orig_end < = align_off + align_alen | |
align_alen + extsz > MAXEXTLEN ) ;
2005-04-17 02:20:36 +04:00
}
2009-12-15 02:14:59 +03:00
# ifdef DEBUG
2013-02-25 05:31:26 +04:00
if ( ! eof & & gotp - > br_startoff ! = NULLFILEOFF )
ASSERT ( align_off + align_alen < = gotp - > br_startoff ) ;
if ( prevp - > br_startoff ! = NULLFILEOFF )
ASSERT ( align_off > = prevp - > br_startoff + prevp - > br_blockcount ) ;
# endif
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
* lenp = align_alen ;
* offp = align_off ;
return 0 ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
# define XFS_ALLOC_GAP_UNITS 4
2013-08-12 14:49:42 +04:00
void
2013-02-25 05:31:26 +04:00
xfs_bmap_adjacent (
2013-08-12 14:49:42 +04:00
struct xfs_bmalloca * ap ) /* bmap alloc argument struct */
2005-04-17 02:20:36 +04:00
{
2013-02-25 05:31:26 +04:00
xfs_fsblock_t adjust ; /* adjustment to block numbers */
xfs_agnumber_t fb_agno ; /* ag number of ap->firstblock */
xfs_mount_t * mp ; /* mount point structure */
int nullfb ; /* true if ap->firstblock isn't set */
int rt ; /* true if inode is realtime */
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
# define ISVALID(x,y) \
( rt ? \
( x ) < mp - > m_sb . sb_rblocks : \
XFS_FSB_TO_AGNO ( mp , x ) = = XFS_FSB_TO_AGNO ( mp , y ) & & \
XFS_FSB_TO_AGNO ( mp , x ) < mp - > m_sb . sb_agcount & & \
XFS_FSB_TO_AGBNO ( mp , x ) < mp - > m_sb . sb_agblocks )
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
mp = ap - > ip - > i_mount ;
2018-07-12 08:26:28 +03:00
nullfb = ap - > tp - > t_firstblock = = NULLFSBLOCK ;
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
rt = XFS_IS_REALTIME_INODE ( ap - > ip ) & &
xfs_alloc_is_userdata ( ap - > datatype ) ;
2018-07-12 08:26:28 +03:00
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO ( mp ,
ap - > tp - > t_firstblock ) ;
2013-02-25 05:31:26 +04:00
/*
* If allocating at eof , and there ' s a previous real block ,
* try to use its last block as our starting point .
*/
if ( ap - > eof & & ap - > prev . br_startoff ! = NULLFILEOFF & &
! isnullstartblock ( ap - > prev . br_startblock ) & &
ISVALID ( ap - > prev . br_startblock + ap - > prev . br_blockcount ,
ap - > prev . br_startblock ) ) {
ap - > blkno = ap - > prev . br_startblock + ap - > prev . br_blockcount ;
/*
* Adjust for the gap between prevp and us .
*/
adjust = ap - > offset -
( ap - > prev . br_startoff + ap - > prev . br_blockcount ) ;
if ( adjust & &
ISVALID ( ap - > blkno + adjust , ap - > prev . br_startblock ) )
ap - > blkno + = adjust ;
}
/*
* If not at eof , then compare the two neighbor blocks .
* Figure out whether either one gives us a good starting point ,
* and pick the better one .
*/
else if ( ! ap - > eof ) {
xfs_fsblock_t gotbno ; /* right side block number */
xfs_fsblock_t gotdiff = 0 ; /* right side difference */
xfs_fsblock_t prevbno ; /* left side block number */
xfs_fsblock_t prevdiff = 0 ; /* left side difference */
/*
* If there ' s a previous ( left ) block , select a requested
* start block based on it .
*/
if ( ap - > prev . br_startoff ! = NULLFILEOFF & &
! isnullstartblock ( ap - > prev . br_startblock ) & &
( prevbno = ap - > prev . br_startblock +
ap - > prev . br_blockcount ) & &
ISVALID ( prevbno , ap - > prev . br_startblock ) ) {
/*
* Calculate gap to end of previous block .
*/
adjust = prevdiff = ap - > offset -
( ap - > prev . br_startoff +
ap - > prev . br_blockcount ) ;
/*
* Figure the startblock based on the previous block ' s
* end and the gap size .
* Heuristic !
* If the gap is large relative to the piece we ' re
* allocating , or using it gives us an invalid block
* number , then just use the end of the previous block .
*/
if ( prevdiff < = XFS_ALLOC_GAP_UNITS * ap - > length & &
ISVALID ( prevbno + prevdiff ,
ap - > prev . br_startblock ) )
prevbno + = adjust ;
else
prevdiff + = adjust ;
/*
* If the firstblock forbids it , can ' t use it ,
* must use default .
*/
if ( ! rt & & ! nullfb & &
XFS_FSB_TO_AGNO ( mp , prevbno ) ! = fb_agno )
prevbno = NULLFSBLOCK ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
/*
* No previous block or can ' t follow it , just default .
*/
else
prevbno = NULLFSBLOCK ;
/*
* If there ' s a following ( right ) block , select a requested
* start block based on it .
*/
if ( ! isnullstartblock ( ap - > got . br_startblock ) ) {
/*
* Calculate gap to start of next block .
*/
adjust = gotdiff = ap - > got . br_startoff - ap - > offset ;
/*
* Figure the startblock based on the next block ' s
* start and the gap size .
*/
gotbno = ap - > got . br_startblock ;
/*
* Heuristic !
* If the gap is large relative to the piece we ' re
* allocating , or using it gives us an invalid block
* number , then just use the start of the next block
* offset by our length .
*/
if ( gotdiff < = XFS_ALLOC_GAP_UNITS * ap - > length & &
ISVALID ( gotbno - gotdiff , gotbno ) )
gotbno - = adjust ;
else if ( ISVALID ( gotbno - ap - > length , gotbno ) ) {
gotbno - = ap - > length ;
gotdiff + = adjust - ap - > length ;
} else
gotdiff + = adjust ;
/*
* If the firstblock forbids it , can ' t use it ,
* must use default .
*/
if ( ! rt & & ! nullfb & &
XFS_FSB_TO_AGNO ( mp , gotbno ) ! = fb_agno )
gotbno = NULLFSBLOCK ;
}
/*
* No next block , just default .
*/
else
gotbno = NULLFSBLOCK ;
/*
* If both valid , pick the better one , else the only good
* one , else ap - > blkno is already set ( to 0 or the inode block ) .
*/
if ( prevbno ! = NULLFSBLOCK & & gotbno ! = NULLFSBLOCK )
ap - > blkno = prevdiff < = gotdiff ? prevbno : gotbno ;
else if ( prevbno ! = NULLFSBLOCK )
ap - > blkno = prevbno ;
else if ( gotbno ! = NULLFSBLOCK )
ap - > blkno = gotbno ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
# undef ISVALID
2005-04-17 02:20:36 +04:00
}
2014-04-23 01:11:41 +04:00
static int
xfs_bmap_longest_free_extent (
struct xfs_trans * tp ,
xfs_agnumber_t ag ,
xfs_extlen_t * blen ,
int * notinit )
{
struct xfs_mount * mp = tp - > t_mountp ;
struct xfs_perag * pag ;
xfs_extlen_t longest ;
int error = 0 ;
pag = xfs_perag_get ( mp , ag ) ;
if ( ! pag - > pagf_init ) {
error = xfs_alloc_pagf_init ( mp , tp , ag , XFS_ALLOC_FLAG_TRYLOCK ) ;
if ( error )
goto out ;
if ( ! pag - > pagf_init ) {
* notinit = 1 ;
goto out ;
}
}
2018-04-06 20:09:42 +03:00
longest = xfs_alloc_longest_free_extent ( pag ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
xfs_alloc_min_freelist ( mp , pag ) ,
xfs_ag_resv_needed ( pag , XFS_AG_RESV_NONE ) ) ;
2014-04-23 01:11:41 +04:00
if ( * blen < longest )
* blen = longest ;
out :
xfs_perag_put ( pag ) ;
return error ;
}
static void
xfs_bmap_select_minlen (
struct xfs_bmalloca * ap ,
struct xfs_alloc_arg * args ,
xfs_extlen_t * blen ,
int notinit )
{
if ( notinit | | * blen < ap - > minlen ) {
/*
* Since we did a BUF_TRYLOCK above , it is possible that
* there is space for this request .
*/
args - > minlen = ap - > minlen ;
} else if ( * blen < args - > maxlen ) {
/*
* If the best seen length is less than the request length ,
* use the best as the minimum .
*/
args - > minlen = * blen ;
} else {
/*
* Otherwise we ' ve seen an extent as big as maxlen , use that
* as the minimum .
*/
args - > minlen = args - > maxlen ;
}
}
2011-09-19 00:40:47 +04:00
STATIC int
2013-02-25 05:31:26 +04:00
xfs_bmap_btalloc_nullfb (
struct xfs_bmalloca * ap ,
struct xfs_alloc_arg * args ,
xfs_extlen_t * blen )
2011-09-19 00:40:47 +04:00
{
2013-02-25 05:31:26 +04:00
struct xfs_mount * mp = ap - > ip - > i_mount ;
xfs_agnumber_t ag , startag ;
int notinit = 0 ;
2011-09-19 00:40:47 +04:00
int error ;
2014-04-23 01:11:41 +04:00
args - > type = XFS_ALLOCTYPE_START_BNO ;
2013-02-25 05:31:26 +04:00
args - > total = ap - > total ;
2011-09-19 00:40:47 +04:00
2013-02-25 05:31:26 +04:00
startag = ag = XFS_FSB_TO_AGNO ( mp , args - > fsbno ) ;
if ( startag = = NULLAGNUMBER )
startag = ag = 0 ;
2011-09-19 00:40:47 +04:00
2013-02-25 05:31:26 +04:00
while ( * blen < args - > maxlen ) {
2014-04-23 01:11:41 +04:00
error = xfs_bmap_longest_free_extent ( args - > tp , ag , blen ,
& notinit ) ;
if ( error )
return error ;
2011-09-19 00:40:47 +04:00
2013-02-25 05:31:26 +04:00
if ( + + ag = = mp - > m_sb . sb_agcount )
ag = 0 ;
if ( ag = = startag )
break ;
}
2011-09-19 00:40:47 +04:00
2014-04-23 01:11:41 +04:00
xfs_bmap_select_minlen ( ap , args , blen , notinit ) ;
return 0 ;
}
STATIC int
xfs_bmap_btalloc_filestreams (
struct xfs_bmalloca * ap ,
struct xfs_alloc_arg * args ,
xfs_extlen_t * blen )
{
struct xfs_mount * mp = ap - > ip - > i_mount ;
xfs_agnumber_t ag ;
int notinit = 0 ;
int error ;
args - > type = XFS_ALLOCTYPE_NEAR_BNO ;
args - > total = ap - > total ;
ag = XFS_FSB_TO_AGNO ( mp , args - > fsbno ) ;
if ( ag = = NULLAGNUMBER )
ag = 0 ;
error = xfs_bmap_longest_free_extent ( args - > tp , ag , blen , & notinit ) ;
if ( error )
return error ;
if ( * blen < args - > maxlen ) {
error = xfs_filestream_new_ag ( ap , & ag ) ;
if ( error )
return error ;
error = xfs_bmap_longest_free_extent ( args - > tp , ag , blen ,
& notinit ) ;
if ( error )
return error ;
}
xfs_bmap_select_minlen ( ap , args , blen , notinit ) ;
2011-09-19 00:40:47 +04:00
/*
2014-04-23 01:11:41 +04:00
* Set the failure fallback case to look in the selected AG as stream
* may have moved .
2011-09-19 00:40:47 +04:00
*/
2014-04-23 01:11:41 +04:00
ap - > blkno = args - > fsbno = XFS_AGB_TO_FSB ( mp , ag , 0 ) ;
2011-09-19 00:40:47 +04:00
return 0 ;
}
2018-01-26 00:58:13 +03:00
/* Update all inode and quota accounting for the allocation we just did. */
static void
xfs_bmap_btalloc_accounting (
struct xfs_bmalloca * ap ,
struct xfs_alloc_arg * args )
{
2018-01-19 20:05:48 +03:00
if ( ap - > flags & XFS_BMAPI_COWFORK ) {
/*
* COW fork blocks are in - core only and thus are treated as
* in - core quota reservation ( like delalloc blocks ) even when
* converted to real blocks . The quota reservation is not
* accounted to disk until blocks are remapped to the data
* fork . So if these blocks were previously delalloc , we
* already have quota reservation and there ' s nothing to do
* yet .
*/
2019-04-26 04:26:22 +03:00
if ( ap - > wasdel ) {
xfs_mod_delalloc ( ap - > ip - > i_mount , - ( int64_t ) args - > len ) ;
2018-01-19 20:05:48 +03:00
return ;
2019-04-26 04:26:22 +03:00
}
2018-01-19 20:05:48 +03:00
/*
* Otherwise , we ' ve allocated blocks in a hole . The transaction
* has acquired in - core quota reservation for this extent .
* Rather than account these as real blocks , however , we reduce
* the transaction quota reservation based on the allocation .
* This essentially transfers the transaction quota reservation
* to that of a delalloc extent .
*/
ap - > ip - > i_delayed_blks + = args - > len ;
xfs_trans_mod_dquot_byino ( ap - > tp , ap - > ip , XFS_TRANS_DQ_RES_BLKS ,
- ( long ) args - > len ) ;
return ;
}
/* data/attr fork only */
ap - > ip - > i_d . di_nblocks + = args - > len ;
2018-01-26 00:58:13 +03:00
xfs_trans_log_inode ( ap - > tp , ap - > ip , XFS_ILOG_CORE ) ;
2019-04-26 04:26:22 +03:00
if ( ap - > wasdel ) {
2018-01-26 00:58:13 +03:00
ap - > ip - > i_delayed_blks - = args - > len ;
2019-04-26 04:26:22 +03:00
xfs_mod_delalloc ( ap - > ip - > i_mount , - ( int64_t ) args - > len ) ;
}
2018-01-26 00:58:13 +03:00
xfs_trans_mod_dquot_byino ( ap - > tp , ap - > ip ,
ap - > wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT ,
args - > len ) ;
}
2013-02-25 05:31:26 +04:00
STATIC int
xfs_bmap_btalloc (
2013-08-12 14:49:42 +04:00
struct xfs_bmalloca * ap ) /* bmap alloc argument struct */
2011-09-19 00:40:48 +04:00
{
2013-02-25 05:31:26 +04:00
xfs_mount_t * mp ; /* mount point structure */
xfs_alloctype_t atype = 0 ; /* type for allocation routines */
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
xfs_extlen_t align = 0 ; /* minimum allocation alignment */
2013-02-25 05:31:26 +04:00
xfs_agnumber_t fb_agno ; /* ag number of ap->firstblock */
xfs_agnumber_t ag ;
xfs_alloc_arg_t args ;
2018-01-20 04:47:36 +03:00
xfs_fileoff_t orig_offset ;
xfs_extlen_t orig_length ;
2013-02-25 05:31:26 +04:00
xfs_extlen_t blen ;
xfs_extlen_t nextminlen = 0 ;
int nullfb ; /* true if ap->firstblock isn't set */
int isaligned ;
int tryagain ;
int error ;
2013-12-12 09:34:36 +04:00
int stripe_align ;
2011-09-19 00:40:48 +04:00
2013-02-25 05:31:26 +04:00
ASSERT ( ap - > length ) ;
2018-01-20 04:47:36 +03:00
orig_offset = ap - > offset ;
orig_length = ap - > length ;
2011-09-19 00:40:48 +04:00
2013-02-25 05:31:26 +04:00
mp = ap - > ip - > i_mount ;
2013-12-12 09:34:36 +04:00
/* stripe alignment for allocation is determined by mount parameters */
stripe_align = 0 ;
if ( mp - > m_swidth & & ( mp - > m_flags & XFS_MOUNT_SWALLOC ) )
stripe_align = mp - > m_swidth ;
else if ( mp - > m_dalign )
stripe_align = mp - > m_dalign ;
2016-10-03 19:11:43 +03:00
if ( ap - > flags & XFS_BMAPI_COWFORK )
align = xfs_get_cowextsz_hint ( ap - > ip ) ;
else if ( xfs_alloc_is_userdata ( ap - > datatype ) )
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
align = xfs_get_extsz_hint ( ap - > ip ) ;
2017-01-25 19:59:43 +03:00
if ( align ) {
2013-02-25 05:31:26 +04:00
error = xfs_bmap_extsize_align ( mp , & ap - > got , & ap - > prev ,
align , 0 , ap - > eof , 0 , ap - > conv ,
& ap - > offset , & ap - > length ) ;
ASSERT ( ! error ) ;
ASSERT ( ap - > length ) ;
2011-09-19 00:40:48 +04:00
}
2013-12-12 09:34:36 +04:00
2018-07-12 08:26:28 +03:00
nullfb = ap - > tp - > t_firstblock = = NULLFSBLOCK ;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO ( mp ,
ap - > tp - > t_firstblock ) ;
2013-02-25 05:31:26 +04:00
if ( nullfb ) {
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
if ( xfs_alloc_is_userdata ( ap - > datatype ) & &
xfs_inode_is_filestream ( ap - > ip ) ) {
2013-02-25 05:31:26 +04:00
ag = xfs_filestream_lookup_ag ( ap - > ip ) ;
ag = ( ag ! = NULLAGNUMBER ) ? ag : 0 ;
ap - > blkno = XFS_AGB_TO_FSB ( mp , ag , 0 ) ;
} else {
ap - > blkno = XFS_INO_TO_FSB ( mp , ap - > ip - > i_ino ) ;
}
} else
2018-07-12 08:26:28 +03:00
ap - > blkno = ap - > tp - > t_firstblock ;
2011-09-19 00:40:48 +04:00
2013-02-25 05:31:26 +04:00
xfs_bmap_adjacent ( ap ) ;
2011-09-19 00:40:48 +04:00
2013-02-25 05:31:26 +04:00
/*
* If allowed , use ap - > blkno ; otherwise must use firstblock since
* it ' s in the right allocation group .
*/
if ( nullfb | | XFS_FSB_TO_AGNO ( mp , ap - > blkno ) = = fb_agno )
;
else
2018-07-12 08:26:28 +03:00
ap - > blkno = ap - > tp - > t_firstblock ;
2013-02-25 05:31:26 +04:00
/*
* Normal allocation , done through xfs_alloc_vextent .
*/
tryagain = isaligned = 0 ;
memset ( & args , 0 , sizeof ( args ) ) ;
args . tp = ap - > tp ;
args . mp = mp ;
args . fsbno = ap - > blkno ;
2018-12-12 19:46:23 +03:00
args . oinfo = XFS_RMAP_OINFO_SKIP_UPDATE ;
2011-09-19 00:40:48 +04:00
2013-02-25 05:31:26 +04:00
/* Trim the allocation back to the maximum an AG can fit. */
2018-06-07 17:54:02 +03:00
args . maxlen = min ( ap - > length , mp - > m_ag_max_usable ) ;
2013-02-25 05:31:26 +04:00
blen = 0 ;
if ( nullfb ) {
2014-04-23 01:11:41 +04:00
/*
* Search for an allocation group with a single extent large
* enough for the request . If one isn ' t found , then adjust
* the minimum allocation size to the largest space found .
*/
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
if ( xfs_alloc_is_userdata ( ap - > datatype ) & &
xfs_inode_is_filestream ( ap - > ip ) )
2014-04-23 01:11:41 +04:00
error = xfs_bmap_btalloc_filestreams ( ap , & args , & blen ) ;
else
error = xfs_bmap_btalloc_nullfb ( ap , & args , & blen ) ;
2011-09-19 00:40:48 +04:00
if ( error )
return error ;
2018-08-01 17:20:31 +03:00
} else if ( ap - > tp - > t_flags & XFS_TRANS_LOWMODE ) {
2013-02-25 05:31:26 +04:00
if ( xfs_inode_is_filestream ( ap - > ip ) )
args . type = XFS_ALLOCTYPE_FIRST_AG ;
else
args . type = XFS_ALLOCTYPE_START_BNO ;
args . total = args . minlen = ap - > minlen ;
} else {
args . type = XFS_ALLOCTYPE_NEAR_BNO ;
args . total = ap - > total ;
args . minlen = ap - > minlen ;
2011-09-19 00:40:48 +04:00
}
2013-02-25 05:31:26 +04:00
/* apply extent size hints if obtained earlier */
2017-01-25 19:59:43 +03:00
if ( align ) {
2013-02-25 05:31:26 +04:00
args . prod = align ;
2018-06-08 19:54:22 +03:00
div_u64_rem ( ap - > offset , args . prod , & args . mod ) ;
if ( args . mod )
args . mod = args . prod - args . mod ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
} else if ( mp - > m_sb . sb_blocksize > = PAGE_SIZE ) {
2013-02-25 05:31:26 +04:00
args . prod = 1 ;
args . mod = 0 ;
} else {
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
args . prod = PAGE_SIZE > > mp - > m_sb . sb_blocklog ;
2018-06-08 19:54:22 +03:00
div_u64_rem ( ap - > offset , args . prod , & args . mod ) ;
if ( args . mod )
args . mod = args . prod - args . mod ;
2011-09-19 00:40:48 +04:00
}
2011-09-19 00:40:50 +04:00
/*
2013-02-25 05:31:26 +04:00
* If we are not low on available data blocks , and the
* underlying logical volume manager is a stripe , and
* the file offset is zero then try to allocate data
* blocks on stripe unit boundary .
* NOTE : ap - > aeof is only set if the allocation length
* is > = the stripe unit and the allocation offset is
* at the end of file .
2011-09-19 00:40:50 +04:00
*/
2018-08-01 17:20:31 +03:00
if ( ! ( ap - > tp - > t_flags & XFS_TRANS_LOWMODE ) & & ap - > aeof ) {
2013-02-25 05:31:26 +04:00
if ( ! ap - > offset ) {
2013-12-12 09:34:36 +04:00
args . alignment = stripe_align ;
2013-02-25 05:31:26 +04:00
atype = args . type ;
isaligned = 1 ;
/*
* Adjust for alignment
*/
if ( blen > args . alignment & & blen < = args . maxlen )
args . minlen = blen - args . alignment ;
args . minalignslop = 0 ;
} else {
/*
* First try an exact bno allocation .
* If it fails then do a near or start bno
* allocation with alignment turned on .
*/
atype = args . type ;
tryagain = 1 ;
args . type = XFS_ALLOCTYPE_THIS_BNO ;
args . alignment = 1 ;
/*
* Compute the minlen + alignment for the
* next case . Set slop so that the value
* of minlen + alignment + slop doesn ' t go up
* between the calls .
*/
2013-12-12 09:34:36 +04:00
if ( blen > stripe_align & & blen < = args . maxlen )
nextminlen = blen - stripe_align ;
2013-02-25 05:31:26 +04:00
else
nextminlen = args . minlen ;
2013-12-12 09:34:36 +04:00
if ( nextminlen + stripe_align > args . minlen + 1 )
2013-02-25 05:31:26 +04:00
args . minalignslop =
2013-12-12 09:34:36 +04:00
nextminlen + stripe_align -
2013-02-25 05:31:26 +04:00
args . minlen - 1 ;
else
args . minalignslop = 0 ;
2011-09-19 00:40:50 +04:00
}
} else {
2013-02-25 05:31:26 +04:00
args . alignment = 1 ;
args . minalignslop = 0 ;
2011-09-19 00:40:50 +04:00
}
2013-02-25 05:31:26 +04:00
args . minleft = ap - > minleft ;
args . wasdel = ap - > wasdel ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
args . resv = XFS_AG_RESV_NONE ;
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
args . datatype = ap - > datatype ;
if ( ap - > datatype & XFS_ALLOC_USERDATA_ZERO )
2015-11-03 04:27:22 +03:00
args . ip = ap - > ip ;
error = xfs_alloc_vextent ( & args ) ;
if ( error )
2013-02-25 05:31:26 +04:00
return error ;
2015-11-03 04:27:22 +03:00
2013-02-25 05:31:26 +04:00
if ( tryagain & & args . fsbno = = NULLFSBLOCK ) {
/*
* Exact allocation failed . Now try with alignment
* turned on .
*/
args . type = atype ;
args . fsbno = ap - > blkno ;
2013-12-12 09:34:36 +04:00
args . alignment = stripe_align ;
2013-02-25 05:31:26 +04:00
args . minlen = nextminlen ;
args . minalignslop = 0 ;
isaligned = 1 ;
if ( ( error = xfs_alloc_vextent ( & args ) ) )
return error ;
2011-09-19 00:40:50 +04:00
}
2013-02-25 05:31:26 +04:00
if ( isaligned & & args . fsbno = = NULLFSBLOCK ) {
/*
* allocation failed , so turn off alignment and
* try again .
*/
args . type = atype ;
args . fsbno = ap - > blkno ;
args . alignment = 0 ;
if ( ( error = xfs_alloc_vextent ( & args ) ) )
2011-09-19 00:40:50 +04:00
return error ;
}
2013-02-25 05:31:26 +04:00
if ( args . fsbno = = NULLFSBLOCK & & nullfb & &
args . minlen > ap - > minlen ) {
args . minlen = ap - > minlen ;
args . type = XFS_ALLOCTYPE_START_BNO ;
args . fsbno = ap - > blkno ;
if ( ( error = xfs_alloc_vextent ( & args ) ) )
return error ;
2011-09-19 00:40:50 +04:00
}
2013-02-25 05:31:26 +04:00
if ( args . fsbno = = NULLFSBLOCK & & nullfb ) {
args . fsbno = 0 ;
args . type = XFS_ALLOCTYPE_FIRST_AG ;
args . total = ap - > minlen ;
if ( ( error = xfs_alloc_vextent ( & args ) ) )
return error ;
2018-08-01 17:20:31 +03:00
ap - > tp - > t_flags | = XFS_TRANS_LOWMODE ;
2013-02-25 05:31:26 +04:00
}
if ( args . fsbno ! = NULLFSBLOCK ) {
/*
* check the allocation happened at the same or higher AG than
* the first block that was allocated .
*/
2018-07-12 08:26:28 +03:00
ASSERT ( ap - > tp - > t_firstblock = = NULLFSBLOCK | |
XFS_FSB_TO_AGNO ( mp , ap - > tp - > t_firstblock ) < =
2017-02-17 04:12:51 +03:00
XFS_FSB_TO_AGNO ( mp , args . fsbno ) ) ;
2011-09-19 00:40:50 +04:00
2013-02-25 05:31:26 +04:00
ap - > blkno = args . fsbno ;
2018-07-12 08:26:28 +03:00
if ( ap - > tp - > t_firstblock = = NULLFSBLOCK )
ap - > tp - > t_firstblock = args . fsbno ;
2017-02-17 04:12:51 +03:00
ASSERT ( nullfb | | fb_agno < = args . agno ) ;
2013-02-25 05:31:26 +04:00
ap - > length = args . len ;
2018-01-20 04:47:36 +03:00
/*
* If the extent size hint is active , we tried to round the
* caller ' s allocation request offset down to extsz and the
* length up to another extsz boundary . If we found a free
* extent we mapped it in starting at this new offset . If the
* newly mapped space isn ' t long enough to cover any of the
* range of offsets that was originally requested , move the
* mapping up so that we can fill as much of the caller ' s
* original request as possible . Free space is apparently
* very fragmented so we ' re unlikely to be able to satisfy the
* hints anyway .
*/
if ( ap - > length < = orig_length )
ap - > offset = orig_offset ;
else if ( ap - > offset + ap - > length < orig_offset + orig_length )
ap - > offset = orig_offset + orig_length - ap - > length ;
2018-01-26 00:58:13 +03:00
xfs_bmap_btalloc_accounting ( ap , & args ) ;
2013-02-25 05:31:26 +04:00
} else {
ap - > blkno = NULLFSBLOCK ;
ap - > length = 0 ;
}
return 0 ;
}
2011-09-19 00:40:50 +04:00
2013-02-25 05:31:26 +04:00
/*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file .
* It figures out where to ask the underlying allocator to put the new extent .
*/
STATIC int
xfs_bmap_alloc (
2013-08-12 14:49:42 +04:00
struct xfs_bmalloca * ap ) /* bmap alloc argument struct */
2013-02-25 05:31:26 +04:00
{
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
if ( XFS_IS_REALTIME_INODE ( ap - > ip ) & &
xfs_alloc_is_userdata ( ap - > datatype ) )
2013-02-25 05:31:26 +04:00
return xfs_bmap_rtalloc ( ap ) ;
return xfs_bmap_btalloc ( ap ) ;
}
2011-09-19 00:40:54 +04:00
2016-10-20 07:51:50 +03:00
/* Trim extent to fit a logical block range. */
void
xfs_trim_extent (
struct xfs_bmbt_irec * irec ,
xfs_fileoff_t bno ,
xfs_filblks_t len )
{
xfs_fileoff_t distance ;
xfs_fileoff_t end = bno + len ;
if ( irec - > br_startoff + irec - > br_blockcount < = bno | |
irec - > br_startoff > = end ) {
irec - > br_blockcount = 0 ;
return ;
}
if ( irec - > br_startoff < bno ) {
distance = bno - irec - > br_startoff ;
if ( isnullstartblock ( irec - > br_startblock ) )
irec - > br_startblock = DELAYSTARTBLOCK ;
if ( irec - > br_startblock ! = DELAYSTARTBLOCK & &
irec - > br_startblock ! = HOLESTARTBLOCK )
irec - > br_startblock + = distance ;
irec - > br_startoff + = distance ;
irec - > br_blockcount - = distance ;
}
if ( end < irec - > br_startoff + irec - > br_blockcount ) {
distance = irec - > br_startoff + irec - > br_blockcount - end ;
irec - > br_blockcount - = distance ;
}
}
2013-02-25 05:31:26 +04:00
/*
* Trim the returned map to the required bounds
*/
STATIC void
xfs_bmapi_trim_map (
struct xfs_bmbt_irec * mval ,
struct xfs_bmbt_irec * got ,
xfs_fileoff_t * bno ,
xfs_filblks_t len ,
xfs_fileoff_t obno ,
xfs_fileoff_t end ,
int n ,
int flags )
{
if ( ( flags & XFS_BMAPI_ENTIRE ) | |
got - > br_startoff + got - > br_blockcount < = obno ) {
* mval = * got ;
if ( isnullstartblock ( got - > br_startblock ) )
mval - > br_startblock = DELAYSTARTBLOCK ;
return ;
}
2011-09-19 00:40:50 +04:00
2013-02-25 05:31:26 +04:00
if ( obno > * bno )
* bno = obno ;
ASSERT ( ( * bno > = obno ) | | ( n = = 0 ) ) ;
ASSERT ( * bno < end ) ;
mval - > br_startoff = * bno ;
if ( isnullstartblock ( got - > br_startblock ) )
mval - > br_startblock = DELAYSTARTBLOCK ;
else
mval - > br_startblock = got - > br_startblock +
( * bno - got - > br_startoff ) ;
2011-09-19 00:40:50 +04:00
/*
2013-02-25 05:31:26 +04:00
* Return the minimum of what we got and what we asked for for
* the length . We can use the len variable here because it is
* modified below and we could have been there before coming
* here if the first part of the allocation didn ' t overlap what
* was asked for .
2011-09-19 00:40:50 +04:00
*/
2013-02-25 05:31:26 +04:00
mval - > br_blockcount = XFS_FILBLKS_MIN ( end - * bno ,
got - > br_blockcount - ( * bno - got - > br_startoff ) ) ;
mval - > br_state = got - > br_state ;
ASSERT ( mval - > br_blockcount < = len ) ;
return ;
2011-09-19 00:40:50 +04:00
}
2013-02-25 05:31:26 +04:00
/*
* Update and validate the extent map to return
*/
STATIC void
xfs_bmapi_update_map (
struct xfs_bmbt_irec * * map ,
xfs_fileoff_t * bno ,
xfs_filblks_t * len ,
xfs_fileoff_t obno ,
xfs_fileoff_t end ,
int * n ,
int flags )
2012-10-05 05:06:59 +04:00
{
2013-02-25 05:31:26 +04:00
xfs_bmbt_irec_t * mval = * map ;
2012-10-05 05:06:59 +04:00
2013-02-25 05:31:26 +04:00
ASSERT ( ( flags & XFS_BMAPI_ENTIRE ) | |
( ( mval - > br_startoff + mval - > br_blockcount ) < = end ) ) ;
ASSERT ( ( flags & XFS_BMAPI_ENTIRE ) | | ( mval - > br_blockcount < = * len ) | |
( mval - > br_startoff < obno ) ) ;
2012-10-05 05:06:59 +04:00
2013-02-25 05:31:26 +04:00
* bno = mval - > br_startoff + mval - > br_blockcount ;
* len = end - * bno ;
if ( * n > 0 & & mval - > br_startoff = = mval [ - 1 ] . br_startoff ) {
/* update previous map with new information */
ASSERT ( mval - > br_startblock = = mval [ - 1 ] . br_startblock ) ;
ASSERT ( mval - > br_blockcount > mval [ - 1 ] . br_blockcount ) ;
ASSERT ( mval - > br_state = = mval [ - 1 ] . br_state ) ;
mval [ - 1 ] . br_blockcount = mval - > br_blockcount ;
mval [ - 1 ] . br_state = mval - > br_state ;
} else if ( * n > 0 & & mval - > br_startblock ! = DELAYSTARTBLOCK & &
mval [ - 1 ] . br_startblock ! = DELAYSTARTBLOCK & &
mval [ - 1 ] . br_startblock ! = HOLESTARTBLOCK & &
mval - > br_startblock = = mval [ - 1 ] . br_startblock +
mval [ - 1 ] . br_blockcount & &
2018-07-12 08:26:01 +03:00
mval [ - 1 ] . br_state = = mval - > br_state ) {
2013-02-25 05:31:26 +04:00
ASSERT ( mval - > br_startoff = =
mval [ - 1 ] . br_startoff + mval [ - 1 ] . br_blockcount ) ;
mval [ - 1 ] . br_blockcount + = mval - > br_blockcount ;
} else if ( * n > 0 & &
mval - > br_startblock = = DELAYSTARTBLOCK & &
mval [ - 1 ] . br_startblock = = DELAYSTARTBLOCK & &
mval - > br_startoff = =
mval [ - 1 ] . br_startoff + mval [ - 1 ] . br_blockcount ) {
mval [ - 1 ] . br_blockcount + = mval - > br_blockcount ;
mval [ - 1 ] . br_state = mval - > br_state ;
} else if ( ! ( ( * n = = 0 ) & &
( ( mval - > br_startoff + mval - > br_blockcount ) < =
obno ) ) ) {
mval + + ;
( * n ) + + ;
}
* map = mval ;
2012-10-05 05:06:59 +04:00
}
/*
2013-02-25 05:31:26 +04:00
* Map file blocks to filesystem blocks without allocation .
2012-10-05 05:06:59 +04:00
*/
int
2013-02-25 05:31:26 +04:00
xfs_bmapi_read (
struct xfs_inode * ip ,
xfs_fileoff_t bno ,
2011-09-19 00:40:51 +04:00
xfs_filblks_t len ,
2013-02-25 05:31:26 +04:00
struct xfs_bmbt_irec * mval ,
int * nmap ,
2011-09-19 00:41:02 +04:00
int flags )
2011-09-19 00:40:51 +04:00
{
2013-02-25 05:31:26 +04:00
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_ifork * ifp ;
struct xfs_bmbt_irec got ;
xfs_fileoff_t obno ;
xfs_fileoff_t end ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2013-02-25 05:31:26 +04:00
int error ;
2016-11-24 03:39:43 +03:00
bool eof = false ;
2013-02-25 05:31:26 +04:00
int n = 0 ;
2016-10-03 19:11:32 +03:00
int whichfork = xfs_bmapi_whichfork ( flags ) ;
2011-09-19 00:40:51 +04:00
2013-02-25 05:31:26 +04:00
ASSERT ( * nmap > = 1 ) ;
ASSERT ( ! ( flags & ~ ( XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE |
2018-07-12 08:26:01 +03:00
XFS_BMAPI_COWFORK ) ) ) ;
2013-12-07 00:30:17 +04:00
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_SHARED | XFS_ILOCK_EXCL ) ) ;
2011-09-19 00:40:51 +04:00
2013-02-25 05:31:26 +04:00
if ( unlikely ( XFS_TEST_ERROR (
( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ,
2017-06-21 03:54:47 +03:00
mp , XFS_ERRTAG_BMAPIFORMAT ) ) ) {
2013-02-25 05:31:26 +04:00
XFS_ERROR_REPORT ( " xfs_bmapi_read " , XFS_ERRLEVEL_LOW , mp ) ;
2014-06-25 08:58:08 +04:00
return - EFSCORRUPTED ;
2013-02-25 05:31:26 +04:00
}
2011-09-19 00:40:51 +04:00
2013-02-25 05:31:26 +04:00
if ( XFS_FORCED_SHUTDOWN ( mp ) )
2014-06-25 08:58:08 +04:00
return - EIO ;
2013-02-25 05:31:26 +04:00
2015-10-12 10:21:22 +03:00
XFS_STATS_INC ( mp , xs_blk_mapr ) ;
2013-02-25 05:31:26 +04:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2016-10-03 19:11:32 +03:00
/* No CoW fork? Return a hole. */
if ( whichfork = = XFS_COW_FORK & & ! ifp ) {
mval - > br_startoff = bno ;
mval - > br_startblock = HOLESTARTBLOCK ;
mval - > br_blockcount = len ;
mval - > br_state = XFS_EXT_NORM ;
* nmap = 1 ;
return 0 ;
}
2013-02-25 05:31:26 +04:00
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
error = xfs_iread_extents ( NULL , ip , whichfork ) ;
if ( error )
return error ;
2011-09-19 00:40:51 +04:00
}
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_lookup_extent ( ip , ifp , bno , & icur , & got ) )
2016-11-24 03:39:43 +03:00
eof = true ;
2013-02-25 05:31:26 +04:00
end = bno + len ;
obno = bno ;
2011-09-19 00:40:51 +04:00
2013-02-25 05:31:26 +04:00
while ( bno < end & & n < * nmap ) {
/* Reading past eof, act as though there's a hole up to end. */
if ( eof )
got . br_startoff = end ;
if ( got . br_startoff > bno ) {
/* Reading in a hole. */
mval - > br_startoff = bno ;
mval - > br_startblock = HOLESTARTBLOCK ;
mval - > br_blockcount =
XFS_FILBLKS_MIN ( len , got . br_startoff - bno ) ;
mval - > br_state = XFS_EXT_NORM ;
bno + = mval - > br_blockcount ;
len - = mval - > br_blockcount ;
mval + + ;
n + + ;
continue ;
}
2011-09-19 00:40:51 +04:00
2013-02-25 05:31:26 +04:00
/* set up the extent map to return. */
xfs_bmapi_trim_map ( mval , & got , & bno , len , obno , end , n , flags ) ;
xfs_bmapi_update_map ( & mval , & bno , & len , obno , end , & n , flags ) ;
/* If we're done, stop now. */
if ( bno > = end | | n > = * nmap )
break ;
/* Else go on to the next record. */
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_next_extent ( ifp , & icur , & got ) )
2016-11-24 03:39:43 +03:00
eof = true ;
2013-02-25 05:31:26 +04:00
}
* nmap = n ;
2011-09-19 00:40:51 +04:00
return 0 ;
}
2017-03-08 20:58:08 +03:00
/*
* Add a delayed allocation extent to an inode . Blocks are reserved from the
* global pool and the extent inserted into the inode in - core extent tree .
*
* On entry , got refers to the first extent beyond the offset of the extent to
* allocate or eof is specified if no such extent exists . On return , got refers
* to the extent record that was inserted to the inode fork .
*
* Note that the allocated extent may have been merged with contiguous extents
* during insertion into the inode fork . Thus , got does not reflect the current
* state of the inode fork on return . If necessary , the caller can use lastx to
* look up the updated record in the inode fork .
*/
2016-09-19 04:10:21 +03:00
int
2013-02-25 05:31:26 +04:00
xfs_bmapi_reserve_delalloc (
struct xfs_inode * ip ,
2016-10-03 19:11:32 +03:00
int whichfork ,
2016-11-28 06:57:42 +03:00
xfs_fileoff_t off ,
2013-02-25 05:31:26 +04:00
xfs_filblks_t len ,
2016-11-28 06:57:42 +03:00
xfs_filblks_t prealloc ,
2013-02-25 05:31:26 +04:00
struct xfs_bmbt_irec * got ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2013-02-25 05:31:26 +04:00
int eof )
2005-04-17 02:20:36 +04:00
{
2011-09-19 00:40:52 +04:00
struct xfs_mount * mp = ip - > i_mount ;
2016-10-03 19:11:32 +03:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2013-02-25 05:31:26 +04:00
xfs_extlen_t alen ;
xfs_extlen_t indlen ;
int error ;
2016-11-28 06:57:42 +03:00
xfs_fileoff_t aoff = off ;
2011-09-19 00:40:52 +04:00
2016-11-28 06:57:42 +03:00
/*
* Cap the alloc length . Keep track of prealloc so we know whether to
* tag the inode before we return .
*/
alen = XFS_FILBLKS_MIN ( len + prealloc , MAXEXTLEN ) ;
2013-02-25 05:31:26 +04:00
if ( ! eof )
alen = XFS_FILBLKS_MIN ( alen , got - > br_startoff - aoff ) ;
2016-11-28 06:57:42 +03:00
if ( prealloc & & alen > = len )
prealloc = alen - len ;
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
/* Figure out the extent size, adjust alen */
2018-01-24 00:56:11 +03:00
if ( whichfork = = XFS_COW_FORK ) {
2016-11-24 03:39:44 +03:00
struct xfs_bmbt_irec prev ;
2018-01-24 00:56:11 +03:00
xfs_extlen_t extsz = xfs_get_cowextsz_hint ( ip ) ;
2016-11-24 03:39:44 +03:00
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_peek_prev_extent ( ifp , icur , & prev ) )
2016-11-24 03:39:44 +03:00
prev . br_startoff = NULLFILEOFF ;
2018-01-24 00:56:11 +03:00
error = xfs_bmap_extsize_align ( mp , got , & prev , extsz , 0 , eof ,
2013-02-25 05:31:26 +04:00
1 , 0 , & aoff , & alen ) ;
ASSERT ( ! error ) ;
}
/*
* Make a transaction - less quota reservation for delayed allocation
* blocks . This number gets adjusted later . We return if we haven ' t
* allocated blocks already inside this loop .
*/
error = xfs_trans_reserve_quota_nblks ( NULL , ip , ( long ) alen , 0 ,
2018-01-24 00:56:11 +03:00
XFS_QMOPT_RES_REGBLKS ) ;
2013-02-25 05:31:26 +04:00
if ( error )
return error ;
/*
* Split changing sb for alen and indlen since they could be coming
* from different places .
*/
indlen = ( xfs_extlen_t ) xfs_bmap_worst_indlen ( ip , alen ) ;
ASSERT ( indlen > 0 ) ;
2018-01-24 00:56:11 +03:00
error = xfs_mod_fdblocks ( mp , - ( ( int64_t ) alen ) , false ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto out_unreserve_quota ;
2015-02-23 13:22:03 +03:00
error = xfs_mod_fdblocks ( mp , - ( ( int64_t ) indlen ) , false ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto out_unreserve_blocks ;
ip - > i_delayed_blks + = alen ;
2019-04-26 04:26:22 +03:00
xfs_mod_delalloc ( ip - > i_mount , alen + indlen ) ;
2013-02-25 05:31:26 +04:00
got - > br_startoff = aoff ;
got - > br_startblock = nullstartblock ( indlen ) ;
got - > br_blockcount = alen ;
got - > br_state = XFS_EXT_NORM ;
2017-11-03 20:34:43 +03:00
xfs_bmap_add_extent_hole_delay ( ip , whichfork , icur , got ) ;
2013-02-25 05:31:26 +04:00
2016-11-28 06:57:42 +03:00
/*
* Tag the inode if blocks were preallocated . Note that COW fork
* preallocation can occur at the start or end of the extent , even when
* prealloc = = 0 , so we must also check the aligned offset and length .
*/
if ( whichfork = = XFS_DATA_FORK & & prealloc )
xfs_inode_set_eofblocks_tag ( ip ) ;
if ( whichfork = = XFS_COW_FORK & & ( prealloc | | aoff < off | | alen > len ) )
xfs_inode_set_cowblocks_tag ( ip ) ;
2013-02-25 05:31:26 +04:00
return 0 ;
out_unreserve_blocks :
2018-01-24 00:56:11 +03:00
xfs_mod_fdblocks ( mp , alen , false ) ;
2013-02-25 05:31:26 +04:00
out_unreserve_quota :
if ( XFS_IS_QUOTA_ON ( mp ) )
2018-01-24 00:56:11 +03:00
xfs_trans_unreserve_quota_nblks ( NULL , ip , ( long ) alen , 0 ,
XFS_QMOPT_RES_REGBLKS ) ;
2013-02-25 05:31:26 +04:00
return error ;
}
xfs: refine the allocation stack switch
The allocation stack switch at xfs_bmapi_allocate() has served it's
purpose, but is no longer a sufficient solution to the stack usage
problem we have in the XFS allocation path.
Whilst the kernel stack size is now 16k, that is not a valid reason
for undoing all our "keep stack usage down" modifications. What it
does allow us to do is have the freedom to refine and perfect the
modifications knowing that if we get it wrong it won't blow up in
our faces - we have a safety net now.
This is important because we still have the issue of older kernels
having smaller stacks and that they are still supported and are
demonstrating a wide range of different stack overflows. Red Hat
has several open bugs for allocation based stack overflows from
directory modifications and direct IO block allocation and these
problems still need to be solved. If we can solve them upstream,
then distro's won't need to bake their own unique solutions.
To that end, I've observed that every allocation based stack
overflow report has had a specific characteristic - it has happened
during or directly after a bmap btree block split. That event
requires a new block to be allocated to the tree, and so we
effectively stack one allocation stack on top of another, and that's
when we get into trouble.
A further observation is that bmap btree block splits are much rarer
than writeback allocation - over a range of different workloads I've
observed the ratio of bmap btree inserts to splits ranges from 100:1
(xfstests run) to 10000:1 (local VM image server with sparse files
that range in the hundreds of thousands to millions of extents).
Either way, bmap btree split events are much, much rarer than
allocation events.
Finally, we have to move the kswapd state to the allocation workqueue
work when allocation is done on behalf of kswapd. This is proving to
cause significant perturbation in performance under memory pressure
and appears to be generating allocation deadlock warnings under some
workloads, so avoiding the use of a workqueue for the majority of
kswapd writeback allocation will minimise the impact of such
behaviour.
Hence it makes sense to move the stack switch to xfs_btree_split()
and only do it for bmap btree splits. Stack switches during
allocation will be much rarer, so there won't be significant
performacne overhead caused by switching stacks. The worse case
stack from all allocation paths will be split, not just writeback.
And the majority of memory allocations will be done in the correct
context (e.g. kswapd) without causing additional latency, and so we
simplify the memory reclaim interactions between processes,
workqueues and kswapd.
The worst stack I've been able to generate with this patch in place
is 5600 bytes deep. It's very revealing because we exit XFS at:
37) 1768 64 kmem_cache_alloc+0x13b/0x170
about 1800 bytes of stack consumed, and the remaining 3800 bytes
(and 36 functions) is memory reclaim, swap and the IO stack. And
this occurs in the inode allocation from an open(O_CREAT) syscall,
not writeback.
The amount of stack being used is much less than I've previously be
able to generate - fs_mark testing has been able to generate stack
usage of around 7k without too much trouble; with this patch it's
only just getting to 5.5k. This is primarily because the metadata
allocation paths (e.g. directory blocks) are no longer causing
double splits on the same stack, and hence now stack tracing is
showing swapping being the worst stack consumer rather than XFS.
Performance of fs_mark inode create workloads is unchanged.
Performance of fs_mark async fsync workloads is consistently good
with context switches reduced by around 150,000/s (30%).
Performance of dbench, streaming IO and postmark is unchanged.
Allocation deadlock warnings have not been seen on the workloads
that generated them since adding this patch.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-15 01:08:24 +04:00
static int
xfs_bmapi_allocate (
2013-02-25 05:31:26 +04:00
struct xfs_bmalloca * bma )
{
struct xfs_mount * mp = bma - > ip - > i_mount ;
2016-10-03 19:11:34 +03:00
int whichfork = xfs_bmapi_whichfork ( bma - > flags ) ;
2013-02-25 05:31:26 +04:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( bma - > ip , whichfork ) ;
int tmp_logflags = 0 ;
int error ;
ASSERT ( bma - > length > 0 ) ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* For the wasdelay case , we could also just allocate the stuff asked
* for in this bmap call but that wouldn ' t be as good .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( bma - > wasdel ) {
bma - > length = ( xfs_extlen_t ) bma - > got . br_blockcount ;
bma - > offset = bma - > got . br_startoff ;
2017-11-03 20:34:43 +03:00
xfs_iext_peek_prev_extent ( ifp , & bma - > icur , & bma - > prev ) ;
2013-02-25 05:31:26 +04:00
} else {
bma - > length = XFS_FILBLKS_MIN ( bma - > length , MAXEXTLEN ) ;
if ( ! bma - > eof )
bma - > length = XFS_FILBLKS_MIN ( bma - > length ,
bma - > got . br_startoff - bma - > offset ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
/*
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
* Set the data type being allocated . For the data fork , the first data
* in the file is treated differently to all other allocations . For the
* attribute fork , we only need to ensure the allocated range is not on
* the busy list .
2013-02-25 05:31:26 +04:00
*/
if ( ! ( bma - > flags & XFS_BMAPI_METADATA ) ) {
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
bma - > datatype = XFS_ALLOC_NOBUSY ;
if ( whichfork = = XFS_DATA_FORK ) {
if ( bma - > offset = = 0 )
bma - > datatype | = XFS_ALLOC_INITIAL_USER_DATA ;
else
bma - > datatype | = XFS_ALLOC_USERDATA ;
}
2015-11-03 04:27:22 +03:00
if ( bma - > flags & XFS_BMAPI_ZERO )
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
bma - > datatype | = XFS_ALLOC_USERDATA_ZERO ;
2013-02-25 05:31:26 +04:00
}
2005-04-17 02:20:36 +04:00
2013-02-25 05:31:26 +04:00
bma - > minlen = ( bma - > flags & XFS_BMAPI_CONTIG ) ? bma - > length : 1 ;
2009-12-15 02:14:59 +03:00
2013-02-25 05:31:26 +04:00
/*
* Only want to do the alignment at the eof if it is userdata and
* allocation length is larger than a stripe unit .
*/
if ( mp - > m_dalign & & bma - > length > = mp - > m_dalign & &
! ( bma - > flags & XFS_BMAPI_METADATA ) & & whichfork = = XFS_DATA_FORK ) {
error = xfs_bmap_isaeof ( bma , whichfork ) ;
if ( error )
return error ;
2005-04-17 02:20:36 +04:00
}
2011-12-19 00:00:07 +04:00
2013-02-25 05:31:26 +04:00
error = xfs_bmap_alloc ( bma ) ;
if ( error )
2005-04-17 02:20:36 +04:00
return error ;
2013-02-25 05:31:26 +04:00
if ( bma - > blkno = = NULLFSBLOCK )
2005-04-17 02:20:36 +04:00
return 0 ;
2018-07-12 08:26:29 +03:00
if ( ( ifp - > if_flags & XFS_IFBROOT ) & & ! bma - > cur )
2013-02-25 05:31:26 +04:00
bma - > cur = xfs_bmbt_init_cursor ( mp , bma - > tp , bma - > ip , whichfork ) ;
/*
* Bump the number of extents we ' ve allocated
* in this call .
*/
bma - > nallocs + + ;
if ( bma - > cur )
bma - > cur - > bc_private . b . flags =
bma - > wasdel ? XFS_BTCUR_BPRV_WASDEL : 0 ;
bma - > got . br_startoff = bma - > offset ;
bma - > got . br_startblock = bma - > blkno ;
bma - > got . br_blockcount = bma - > length ;
bma - > got . br_state = XFS_EXT_NORM ;
2010-06-23 12:11:15 +04:00
2005-04-17 02:20:36 +04:00
/*
2017-02-03 02:14:01 +03:00
* In the data fork , a wasdelay extent has been initialized , so
* shouldn ' t be flagged as unwritten .
*
* For the cow fork , however , we convert delalloc reservations
* ( extents allocated for speculative preallocation ) to
* allocated unwritten extents , and only convert the unwritten
* extents to real extents when we ' re about to write the data .
2005-04-17 02:20:36 +04:00
*/
2017-02-03 02:14:01 +03:00
if ( ( ! bma - > wasdel | | ( bma - > flags & XFS_BMAPI_COWFORK ) ) & &
2018-10-18 09:18:58 +03:00
( bma - > flags & XFS_BMAPI_PREALLOC ) )
2013-02-25 05:31:26 +04:00
bma - > got . br_state = XFS_EXT_UNWRITTEN ;
if ( bma - > wasdel )
2016-10-03 19:11:34 +03:00
error = xfs_bmap_add_extent_delay_real ( bma , whichfork ) ;
2013-02-25 05:31:26 +04:00
else
2017-04-12 02:45:54 +03:00
error = xfs_bmap_add_extent_hole_real ( bma - > tp , bma - > ip ,
2017-11-03 20:34:43 +03:00
whichfork , & bma - > icur , & bma - > cur , & bma - > got ,
2018-07-12 08:26:28 +03:00
& bma - > logflags , bma - > flags ) ;
2013-02-25 05:31:26 +04:00
bma - > logflags | = tmp_logflags ;
if ( error )
return error ;
/*
* Update our extent pointer , given that xfs_bmap_add_extent_delay_real
* or xfs_bmap_add_extent_hole_real might have merged it into one of
* the neighbouring ones .
*/
2017-11-03 20:34:43 +03:00
xfs_iext_get_extent ( ifp , & bma - > icur , & bma - > got ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( bma - > got . br_startoff < = bma - > offset ) ;
ASSERT ( bma - > got . br_startoff + bma - > got . br_blockcount > =
bma - > offset + bma - > length ) ;
ASSERT ( bma - > got . br_state = = XFS_EXT_NORM | |
bma - > got . br_state = = XFS_EXT_UNWRITTEN ) ;
return 0 ;
}
STATIC int
xfs_bmapi_convert_unwritten (
struct xfs_bmalloca * bma ,
struct xfs_bmbt_irec * mval ,
xfs_filblks_t len ,
int flags )
{
2016-10-03 19:11:32 +03:00
int whichfork = xfs_bmapi_whichfork ( flags ) ;
2013-02-25 05:31:26 +04:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( bma - > ip , whichfork ) ;
int tmp_logflags = 0 ;
int error ;
/* check if we need to do unwritten->real conversion */
if ( mval - > br_state = = XFS_EXT_UNWRITTEN & &
( flags & XFS_BMAPI_PREALLOC ) )
return 0 ;
/* check if we need to do real->unwritten conversion */
if ( mval - > br_state = = XFS_EXT_NORM & &
( flags & ( XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT ) ) ! =
( XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT ) )
return 0 ;
/*
* Modify ( by adding ) the state flag , if writing .
*/
ASSERT ( mval - > br_blockcount < = len ) ;
if ( ( ifp - > if_flags & XFS_IFBROOT ) & & ! bma - > cur ) {
bma - > cur = xfs_bmbt_init_cursor ( bma - > ip - > i_mount , bma - > tp ,
bma - > ip , whichfork ) ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
mval - > br_state = ( mval - > br_state = = XFS_EXT_UNWRITTEN )
? XFS_EXT_NORM : XFS_EXT_UNWRITTEN ;
2012-02-23 04:41:39 +04:00
2015-11-03 04:27:22 +03:00
/*
* Before insertion into the bmbt , zero the range being converted
* if required .
*/
if ( flags & XFS_BMAPI_ZERO ) {
error = xfs_zero_extent ( bma - > ip , mval - > br_startblock ,
mval - > br_blockcount ) ;
if ( error )
return error ;
}
2017-02-03 02:14:01 +03:00
error = xfs_bmap_add_extent_unwritten_real ( bma - > tp , bma - > ip , whichfork ,
2018-07-12 08:26:28 +03:00
& bma - > icur , & bma - > cur , mval , & tmp_logflags ) ;
2015-06-01 00:15:23 +03:00
/*
* Log the inode core unconditionally in the unwritten extent conversion
* path because the conversion might not have done so ( e . g . , if the
* extent count hasn ' t changed ) . We need to make sure the inode is dirty
* in the transaction for the sake of fsync ( ) , even if nothing has
* changed , because fsync ( ) will not force the log for this transaction
* unless it sees the inode pinned .
2017-02-03 02:14:01 +03:00
*
* Note : If we ' re only converting cow fork extents , there aren ' t
* any on - disk updates to make , so we don ' t need to log anything .
2015-06-01 00:15:23 +03:00
*/
2017-02-03 02:14:01 +03:00
if ( whichfork ! = XFS_COW_FORK )
bma - > logflags | = tmp_logflags | XFS_ILOG_CORE ;
2013-02-25 05:31:26 +04:00
if ( error )
return error ;
/*
* Update our extent pointer , given that
* xfs_bmap_add_extent_unwritten_real might have merged it into one
* of the neighbouring ones .
*/
2017-11-03 20:34:43 +03:00
xfs_iext_get_extent ( ifp , & bma - > icur , & bma - > got ) ;
2013-02-25 05:31:26 +04:00
/*
* We may have combined previously unwritten space with written space ,
* so generate another request .
*/
if ( mval - > br_blockcount < len )
2014-06-25 08:58:08 +04:00
return - EAGAIN ;
2013-02-25 05:31:26 +04:00
return 0 ;
}
2019-02-15 19:02:48 +03:00
static inline xfs_extlen_t
xfs_bmapi_minleft (
struct xfs_trans * tp ,
struct xfs_inode * ip ,
int fork )
{
if ( tp & & tp - > t_firstblock ! = NULLFSBLOCK )
return 0 ;
if ( XFS_IFORK_FORMAT ( ip , fork ) ! = XFS_DINODE_FMT_BTREE )
return 1 ;
return be16_to_cpu ( XFS_IFORK_PTR ( ip , fork ) - > if_broot - > bb_level ) + 1 ;
}
/*
* Log whatever the flags say , even if error . Otherwise we might miss detecting
* a case where the data is changed , there ' s an error , and it ' s not logged so we
* don ' t shutdown when we should . Don ' t bother logging extents / btree changes if
* we converted to the other format .
*/
static void
xfs_bmapi_finish (
struct xfs_bmalloca * bma ,
int whichfork ,
int error )
{
if ( ( bma - > logflags & xfs_ilog_fext ( whichfork ) ) & &
XFS_IFORK_FORMAT ( bma - > ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS )
bma - > logflags & = ~ xfs_ilog_fext ( whichfork ) ;
else if ( ( bma - > logflags & xfs_ilog_fbroot ( whichfork ) ) & &
XFS_IFORK_FORMAT ( bma - > ip , whichfork ) ! = XFS_DINODE_FMT_BTREE )
bma - > logflags & = ~ xfs_ilog_fbroot ( whichfork ) ;
if ( bma - > logflags )
xfs_trans_log_inode ( bma - > tp , bma - > ip , bma - > logflags ) ;
if ( bma - > cur )
xfs_btree_del_cursor ( bma - > cur , error ) ;
}
2013-02-25 05:31:26 +04:00
/*
* Map file blocks to filesystem blocks , and allocate blocks or convert the
* extent state if necessary . Details behaviour is controlled by the flags
* parameter . Only allocates blocks from a single allocation group , to avoid
* locking problems .
*/
int
xfs_bmapi_write (
struct xfs_trans * tp , /* transaction pointer */
struct xfs_inode * ip , /* incore inode */
xfs_fileoff_t bno , /* starting file offs. mapped */
xfs_filblks_t len , /* length to map in file */
int flags , /* XFS_BMAPI_... */
xfs_extlen_t total , /* total blocks needed */
struct xfs_bmbt_irec * mval , /* output: map values */
2018-07-12 08:26:12 +03:00
int * nmap ) /* i/o: mval size/count */
2013-02-25 05:31:26 +04:00
{
2019-03-19 18:16:22 +03:00
struct xfs_bmalloca bma = {
. tp = tp ,
. ip = ip ,
. total = total ,
} ;
2013-02-25 05:31:26 +04:00
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_ifork * ifp ;
xfs_fileoff_t end ; /* end of mapped file region */
2016-11-24 03:39:43 +03:00
bool eof = false ; /* after the end of extents */
2013-02-25 05:31:26 +04:00
int error ; /* error return */
int n ; /* current extent index */
xfs_fileoff_t obno ; /* old block number (offset) */
int whichfork ; /* data or attr fork */
# ifdef DEBUG
xfs_fileoff_t orig_bno ; /* original block number value */
int orig_flags ; /* original flags arg value */
xfs_filblks_t orig_len ; /* original value of len arg */
struct xfs_bmbt_irec * orig_mval ; /* original value of mval */
int orig_nmap ; /* original value of *nmap */
orig_bno = bno ;
orig_len = len ;
orig_flags = flags ;
orig_mval = mval ;
orig_nmap = * nmap ;
# endif
2016-10-03 19:11:34 +03:00
whichfork = xfs_bmapi_whichfork ( flags ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( * nmap > = 1 ) ;
ASSERT ( * nmap < = XFS_BMAP_MAX_NMAP ) ;
2019-02-18 20:38:48 +03:00
ASSERT ( tp ! = NULL ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( len > 0 ) ;
2013-07-10 01:04:00 +04:00
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_LOCAL ) ;
2013-12-07 00:30:17 +04:00
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL ) ) ;
2017-04-12 02:45:55 +03:00
ASSERT ( ! ( flags & XFS_BMAPI_REMAP ) ) ;
2013-02-25 05:31:26 +04:00
2015-11-03 04:27:22 +03:00
/* zeroing is for currently only for data extents, not metadata */
ASSERT ( ( flags & ( XFS_BMAPI_METADATA | XFS_BMAPI_ZERO ) ) ! =
( XFS_BMAPI_METADATA | XFS_BMAPI_ZERO ) ) ;
/*
* we can allocate unwritten extents or pre - zero allocated blocks ,
* but it makes no sense to do both at once . This would result in
* zeroing the unwritten extent twice , but it still being an
* unwritten extent . . . .
*/
ASSERT ( ( flags & ( XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO ) ) ! =
( XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO ) ) ;
2013-02-25 05:31:26 +04:00
if ( unlikely ( XFS_TEST_ERROR (
( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS & &
2013-07-10 01:04:00 +04:00
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ,
2017-06-21 03:54:47 +03:00
mp , XFS_ERRTAG_BMAPIFORMAT ) ) ) {
2013-02-25 05:31:26 +04:00
XFS_ERROR_REPORT ( " xfs_bmapi_write " , XFS_ERRLEVEL_LOW , mp ) ;
2014-06-25 08:58:08 +04:00
return - EFSCORRUPTED ;
2012-02-23 04:41:39 +04:00
}
2013-02-25 05:31:26 +04:00
if ( XFS_FORCED_SHUTDOWN ( mp ) )
2014-06-25 08:58:08 +04:00
return - EIO ;
2013-02-25 05:31:26 +04:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2015-10-12 10:21:22 +03:00
XFS_STATS_INC ( mp , xs_blk_mapw ) ;
2013-02-25 05:31:26 +04:00
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
error = xfs_iread_extents ( tp , ip , whichfork ) ;
if ( error )
goto error0 ;
}
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_lookup_extent ( ip , ifp , bno , & bma . icur , & bma . got ) )
2016-11-24 03:39:43 +03:00
eof = true ;
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_peek_prev_extent ( ifp , & bma . icur , & bma . prev ) )
2016-11-24 03:39:43 +03:00
bma . prev . br_startoff = NULLFILEOFF ;
2019-02-15 19:02:48 +03:00
bma . minleft = xfs_bmapi_minleft ( tp , ip , whichfork ) ;
2013-02-25 05:31:26 +04:00
2019-02-01 20:14:23 +03:00
n = 0 ;
end = bno + len ;
obno = bno ;
2013-02-25 05:31:26 +04:00
while ( bno < end & & n < * nmap ) {
2017-01-20 20:31:54 +03:00
bool need_alloc = false , wasdelay = false ;
2018-01-17 06:03:59 +03:00
/* in hole or beyond EOF? */
2017-01-20 20:31:54 +03:00
if ( eof | | bma . got . br_startoff > bno ) {
2018-01-17 06:03:59 +03:00
/*
* CoW fork conversions should / never / hit EOF or
* holes . There should always be something for us
* to work on .
*/
ASSERT ( ! ( ( flags & XFS_BMAPI_CONVERT ) & &
( flags & XFS_BMAPI_COWFORK ) ) ) ;
2019-02-15 19:02:48 +03:00
need_alloc = true ;
2017-04-12 02:45:55 +03:00
} else if ( isnullstartblock ( bma . got . br_startblock ) ) {
wasdelay = true ;
2017-01-20 20:31:54 +03:00
}
2016-10-03 19:11:27 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* First , deal with the hole before the allocated space
* that we found , if any .
2005-04-17 02:20:36 +04:00
*/
2019-02-18 20:38:48 +03:00
if ( need_alloc | | wasdelay ) {
2013-02-25 05:31:26 +04:00
bma . eof = eof ;
bma . conv = ! ! ( flags & XFS_BMAPI_CONVERT ) ;
bma . wasdel = wasdelay ;
bma . offset = bno ;
bma . flags = flags ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* There ' s a 32 / 64 bit type mismatch between the
* allocation length request ( which can be 64 bits in
* length ) and the bma length request , which is
* xfs_extlen_t and therefore 32 bits . Hence we have to
* check for 32 - bit overflows and handle them here .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( len > ( xfs_filblks_t ) MAXEXTLEN )
bma . length = MAXEXTLEN ;
else
bma . length = len ;
ASSERT ( len > 0 ) ;
ASSERT ( bma . length > 0 ) ;
error = xfs_bmapi_allocate ( & bma ) ;
2005-04-17 02:20:36 +04:00
if ( error )
goto error0 ;
2013-02-25 05:31:26 +04:00
if ( bma . blkno = = NULLFSBLOCK )
break ;
2016-10-03 19:11:39 +03:00
/*
* If this is a CoW allocation , record the data in
* the refcount btree for orphan recovery .
*/
if ( whichfork = = XFS_COW_FORK ) {
2018-08-01 17:20:34 +03:00
error = xfs_refcount_alloc_cow_extent ( tp ,
bma . blkno , bma . length ) ;
2016-10-03 19:11:39 +03:00
if ( error )
goto error0 ;
}
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
/* Deal with the allocated space we found. */
xfs_bmapi_trim_map ( mval , & bma . got , & bno , len , obno ,
end , n , flags ) ;
/* Execute unwritten extent conversion if necessary */
error = xfs_bmapi_convert_unwritten ( & bma , mval , len , flags ) ;
2014-06-25 08:58:08 +04:00
if ( error = = - EAGAIN )
2013-02-25 05:31:26 +04:00
continue ;
if ( error )
goto error0 ;
/* update the extent map to return */
xfs_bmapi_update_map ( & mval , & bno , & len , obno , end , & n , flags ) ;
/*
* If we ' re done , stop now . Stop when we ' ve allocated
* XFS_BMAP_MAX_NMAP extents no matter what . Otherwise
* the transaction may get too big .
*/
if ( bno > = end | | n > = * nmap | | bma . nallocs > = * nmap )
break ;
/* Else go on to the next record. */
bma . prev = bma . got ;
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_next_extent ( ifp , & bma . icur , & bma . got ) )
2016-11-24 03:39:43 +03:00
eof = true ;
2013-02-25 05:31:26 +04:00
}
* nmap = n ;
2019-02-15 19:02:47 +03:00
error = xfs_bmap_btree_to_extents ( tp , ip , bma . cur , & bma . logflags ,
whichfork ) ;
if ( error )
goto error0 ;
2013-02-25 05:31:26 +04:00
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE | |
XFS_IFORK_NEXTENTS ( ip , whichfork ) >
XFS_IFORK_MAXEXT ( ip , whichfork ) ) ;
2019-02-15 19:02:48 +03:00
xfs_bmapi_finish ( & bma , whichfork , 0 ) ;
xfs_bmap_validate_ret ( orig_bno , orig_len , orig_flags , orig_mval ,
orig_nmap , * nmap ) ;
return 0 ;
2013-02-25 05:31:26 +04:00
error0 :
2019-02-15 19:02:48 +03:00
xfs_bmapi_finish ( & bma , whichfork , error ) ;
2013-02-25 05:31:26 +04:00
return error ;
}
2005-06-21 09:48:47 +04:00
2019-02-01 20:14:23 +03:00
/*
* Convert an existing delalloc extent to real blocks based on file offset . This
* attempts to allocate the entire delalloc extent and may require multiple
* invocations to allocate the target offset if a large enough physical extent
* is not available .
*/
int
xfs_bmapi_convert_delalloc (
struct xfs_inode * ip ,
int whichfork ,
2019-02-15 19:02:49 +03:00
xfs_fileoff_t offset_fsb ,
struct xfs_bmbt_irec * imap ,
unsigned int * seq )
2019-02-01 20:14:23 +03:00
{
2019-02-15 19:02:48 +03:00
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2019-02-15 19:02:49 +03:00
struct xfs_mount * mp = ip - > i_mount ;
2019-02-15 19:02:48 +03:00
struct xfs_bmalloca bma = { NULL } ;
2019-02-15 19:02:49 +03:00
struct xfs_trans * tp ;
2019-02-01 20:14:23 +03:00
int error ;
2019-02-15 19:02:49 +03:00
/*
* Space for the extent and indirect blocks was reserved when the
* delalloc extent was created so there ' s no need to do so here .
*/
error = xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_write , 0 , 0 ,
XFS_TRANS_RESERVE , & tp ) ;
if ( error )
return error ;
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
xfs_trans_ijoin ( tp , ip , 0 ) ;
2019-02-15 19:02:48 +03:00
if ( ! xfs_iext_lookup_extent ( ip , ifp , offset_fsb , & bma . icur , & bma . got ) | |
bma . got . br_startoff > offset_fsb ) {
/*
* No extent found in the range we are trying to convert . This
* should only happen for the COW fork , where another thread
* might have moved the extent to the data fork in the meantime .
*/
WARN_ON_ONCE ( whichfork ! = XFS_COW_FORK ) ;
2019-02-15 19:02:49 +03:00
error = - EAGAIN ;
goto out_trans_cancel ;
2019-02-15 19:02:48 +03:00
}
2019-02-01 20:14:23 +03:00
/*
2019-02-15 19:02:48 +03:00
* If we find a real extent here we raced with another thread converting
* the extent . Just return the real extent at this offset .
2019-02-01 20:14:23 +03:00
*/
2019-02-15 19:02:48 +03:00
if ( ! isnullstartblock ( bma . got . br_startblock ) ) {
* imap = bma . got ;
2019-02-15 19:02:49 +03:00
* seq = READ_ONCE ( ifp - > if_seq ) ;
goto out_trans_cancel ;
2019-02-15 19:02:48 +03:00
}
bma . tp = tp ;
bma . ip = ip ;
bma . wasdel = true ;
bma . offset = bma . got . br_startoff ;
bma . length = max_t ( xfs_filblks_t , bma . got . br_blockcount , MAXEXTLEN ) ;
bma . total = XFS_EXTENTADD_SPACE_RES ( ip - > i_mount , XFS_DATA_FORK ) ;
bma . minleft = xfs_bmapi_minleft ( tp , ip , whichfork ) ;
if ( whichfork = = XFS_COW_FORK )
bma . flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC ;
if ( ! xfs_iext_peek_prev_extent ( ifp , & bma . icur , & bma . prev ) )
bma . prev . br_startoff = NULLFILEOFF ;
error = xfs_bmapi_allocate ( & bma ) ;
if ( error )
goto out_finish ;
error = - ENOSPC ;
if ( WARN_ON_ONCE ( bma . blkno = = NULLFSBLOCK ) )
goto out_finish ;
error = - EFSCORRUPTED ;
if ( WARN_ON_ONCE ( ! bma . got . br_startblock & & ! XFS_IS_REALTIME_INODE ( ip ) ) )
goto out_finish ;
2019-02-15 19:02:49 +03:00
XFS_STATS_ADD ( mp , xs_xstrat_bytes , XFS_FSB_TO_B ( mp , bma . length ) ) ;
XFS_STATS_INC ( mp , xs_xstrat_quick ) ;
2019-02-15 19:02:48 +03:00
ASSERT ( ! isnullstartblock ( bma . got . br_startblock ) ) ;
* imap = bma . got ;
2019-02-15 19:02:49 +03:00
* seq = READ_ONCE ( ifp - > if_seq ) ;
2019-02-15 19:02:48 +03:00
if ( whichfork = = XFS_COW_FORK ) {
error = xfs_refcount_alloc_cow_extent ( tp , bma . blkno ,
bma . length ) ;
if ( error )
goto out_finish ;
}
error = xfs_bmap_btree_to_extents ( tp , ip , bma . cur , & bma . logflags ,
whichfork ) ;
2019-02-15 19:02:49 +03:00
if ( error )
goto out_finish ;
xfs_bmapi_finish ( & bma , whichfork , 0 ) ;
error = xfs_trans_commit ( tp ) ;
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
return error ;
2019-02-15 19:02:48 +03:00
out_finish :
xfs_bmapi_finish ( & bma , whichfork , error ) ;
2019-02-15 19:02:49 +03:00
out_trans_cancel :
xfs_trans_cancel ( tp ) ;
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
2019-02-01 20:14:23 +03:00
return error ;
}
2018-05-14 16:34:34 +03:00
int
2017-04-12 02:45:55 +03:00
xfs_bmapi_remap (
struct xfs_trans * tp ,
struct xfs_inode * ip ,
xfs_fileoff_t bno ,
xfs_filblks_t len ,
xfs_fsblock_t startblock ,
2018-05-14 16:34:34 +03:00
int flags )
2017-04-12 02:45:55 +03:00
{
struct xfs_mount * mp = ip - > i_mount ;
2018-05-14 16:34:34 +03:00
struct xfs_ifork * ifp ;
2017-04-12 02:45:55 +03:00
struct xfs_btree_cur * cur = NULL ;
struct xfs_bmbt_irec got ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2018-05-14 16:34:34 +03:00
int whichfork = xfs_bmapi_whichfork ( flags ) ;
2017-04-12 02:45:55 +03:00
int logflags = 0 , error ;
2018-05-14 16:34:34 +03:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
2017-04-12 02:45:55 +03:00
ASSERT ( len > 0 ) ;
ASSERT ( len < = ( xfs_filblks_t ) MAXEXTLEN ) ;
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL ) ) ;
2018-05-14 16:34:35 +03:00
ASSERT ( ! ( flags & ~ ( XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
XFS_BMAPI_NORMAP ) ) ) ;
ASSERT ( ( flags & ( XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC ) ) ! =
( XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC ) ) ;
2017-04-12 02:45:55 +03:00
if ( unlikely ( XFS_TEST_ERROR (
2018-05-14 16:34:34 +03:00
( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ,
2017-06-21 03:54:47 +03:00
mp , XFS_ERRTAG_BMAPIFORMAT ) ) ) {
2017-04-12 02:45:55 +03:00
XFS_ERROR_REPORT ( " xfs_bmapi_remap " , XFS_ERRLEVEL_LOW , mp ) ;
return - EFSCORRUPTED ;
}
if ( XFS_FORCED_SHUTDOWN ( mp ) )
return - EIO ;
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
2018-05-14 16:34:34 +03:00
error = xfs_iread_extents ( tp , ip , whichfork ) ;
2017-04-12 02:45:55 +03:00
if ( error )
return error ;
}
2017-11-03 20:34:43 +03:00
if ( xfs_iext_lookup_extent ( ip , ifp , bno , & icur , & got ) ) {
2017-04-12 02:45:55 +03:00
/* make sure we only reflink into a hole. */
ASSERT ( got . br_startoff > bno ) ;
ASSERT ( got . br_startoff - bno > = len ) ;
}
2017-04-12 02:45:56 +03:00
ip - > i_d . di_nblocks + = len ;
xfs_trans_log_inode ( tp , ip , XFS_ILOG_CORE ) ;
2017-04-12 02:45:55 +03:00
if ( ifp - > if_flags & XFS_IFBROOT ) {
2018-05-14 16:34:34 +03:00
cur = xfs_bmbt_init_cursor ( mp , tp , ip , whichfork ) ;
2017-04-12 02:45:55 +03:00
cur - > bc_private . b . flags = 0 ;
}
got . br_startoff = bno ;
got . br_startblock = startblock ;
got . br_blockcount = len ;
2018-05-14 16:34:35 +03:00
if ( flags & XFS_BMAPI_PREALLOC )
got . br_state = XFS_EXT_UNWRITTEN ;
else
got . br_state = XFS_EXT_NORM ;
2017-04-12 02:45:55 +03:00
2018-05-14 16:34:34 +03:00
error = xfs_bmap_add_extent_hole_real ( tp , ip , whichfork , & icur ,
2018-07-12 08:26:28 +03:00
& cur , & got , & logflags , flags ) ;
2017-04-12 02:45:55 +03:00
if ( error )
goto error0 ;
2019-02-15 19:02:47 +03:00
error = xfs_bmap_btree_to_extents ( tp , ip , cur , & logflags , whichfork ) ;
2017-04-12 02:45:55 +03:00
error0 :
if ( ip - > i_d . di_format ! = XFS_DINODE_FMT_EXTENTS )
logflags & = ~ XFS_ILOG_DEXT ;
else if ( ip - > i_d . di_format ! = XFS_DINODE_FMT_BTREE )
logflags & = ~ XFS_ILOG_DBROOT ;
if ( logflags )
xfs_trans_log_inode ( tp , ip , logflags ) ;
2018-07-19 22:26:31 +03:00
if ( cur )
xfs_btree_del_cursor ( cur , error ) ;
2017-04-12 02:45:55 +03:00
return error ;
}
2016-03-15 03:42:46 +03:00
/*
* When a delalloc extent is split ( e . g . , due to a hole punch ) , the original
* indlen reservation must be shared across the two new extents that are left
* behind .
*
* Given the original reservation and the worst case indlen for the two new
* extents ( as calculated by xfs_bmap_worst_indlen ( ) ) , split the original
2016-03-15 03:42:47 +03:00
* reservation fairly across the two new extents . If necessary , steal available
* blocks from a deleted extent to make up a reservation deficiency ( e . g . , if
* ores = = 1 ) . The number of stolen blocks is returned . The availability and
* subsequent accounting of stolen blocks is the responsibility of the caller .
2016-03-15 03:42:46 +03:00
*/
2016-03-15 03:42:47 +03:00
static xfs_filblks_t
2016-03-15 03:42:46 +03:00
xfs_bmap_split_indlen (
xfs_filblks_t ores , /* original res. */
xfs_filblks_t * indlen1 , /* ext1 worst indlen */
2016-03-15 03:42:47 +03:00
xfs_filblks_t * indlen2 , /* ext2 worst indlen */
xfs_filblks_t avail ) /* stealable blocks */
2016-03-15 03:42:46 +03:00
{
xfs_filblks_t len1 = * indlen1 ;
xfs_filblks_t len2 = * indlen2 ;
xfs_filblks_t nres = len1 + len2 ; /* new total res. */
2016-03-15 03:42:47 +03:00
xfs_filblks_t stolen = 0 ;
2017-02-14 09:48:30 +03:00
xfs_filblks_t resfactor ;
2016-03-15 03:42:47 +03:00
/*
* Steal as many blocks as we can to try and satisfy the worst case
* indlen for both new extents .
*/
2017-02-14 09:48:30 +03:00
if ( ores < nres & & avail )
stolen = XFS_FILBLKS_MIN ( nres - ores , avail ) ;
ores + = stolen ;
/* nothing else to do if we've satisfied the new reservation */
if ( ores > = nres )
return stolen ;
/*
* We can ' t meet the total required reservation for the two extents .
* Calculate the percent of the overall shortage between both extents
* and apply this percentage to each of the requested indlen values .
* This distributes the shortage fairly and reduces the chances that one
* of the two extents is left with nothing when extents are repeatedly
* split .
*/
resfactor = ( ores * 100 ) ;
do_div ( resfactor , nres ) ;
len1 * = resfactor ;
do_div ( len1 , 100 ) ;
len2 * = resfactor ;
do_div ( len2 , 100 ) ;
ASSERT ( len1 + len2 < = ores ) ;
ASSERT ( len1 < * indlen1 & & len2 < * indlen2 ) ;
2016-03-15 03:42:46 +03:00
/*
2017-02-14 09:48:30 +03:00
* Hand out the remainder to each extent . If one of the two reservations
* is zero , we want to make sure that one gets a block first . The loop
* below starts with len1 , so hand len2 a block right off the bat if it
* is zero .
2016-03-15 03:42:46 +03:00
*/
2017-02-14 09:48:30 +03:00
ores - = ( len1 + len2 ) ;
ASSERT ( ( * indlen1 - len1 ) + ( * indlen2 - len2 ) > = ores ) ;
if ( ores & & ! len2 & & * indlen2 ) {
len2 + + ;
ores - - ;
}
while ( ores ) {
if ( len1 < * indlen1 ) {
len1 + + ;
ores - - ;
2016-03-15 03:42:46 +03:00
}
2017-02-14 09:48:30 +03:00
if ( ! ores )
2016-03-15 03:42:46 +03:00
break ;
2017-02-14 09:48:30 +03:00
if ( len2 < * indlen2 ) {
len2 + + ;
ores - - ;
2016-03-15 03:42:46 +03:00
}
}
* indlen1 = len1 ;
* indlen2 = len2 ;
2016-03-15 03:42:47 +03:00
return stolen ;
2016-03-15 03:42:46 +03:00
}
2016-10-20 07:54:14 +03:00
int
xfs_bmap_del_extent_delay (
struct xfs_inode * ip ,
int whichfork ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2016-10-20 07:54:14 +03:00
struct xfs_bmbt_irec * got ,
struct xfs_bmbt_irec * del )
{
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
struct xfs_bmbt_irec new ;
int64_t da_old , da_new , da_diff = 0 ;
xfs_fileoff_t del_endoff , got_endoff ;
xfs_filblks_t got_indlen , new_indlen , stolen ;
2017-10-19 21:02:29 +03:00
int state = xfs_bmap_fork_to_state ( whichfork ) ;
int error = 0 ;
2016-10-20 07:54:14 +03:00
bool isrt ;
XFS_STATS_INC ( mp , xs_del_exlist ) ;
isrt = ( whichfork = = XFS_DATA_FORK ) & & XFS_IS_REALTIME_INODE ( ip ) ;
del_endoff = del - > br_startoff + del - > br_blockcount ;
got_endoff = got - > br_startoff + got - > br_blockcount ;
da_old = startblockval ( got - > br_startblock ) ;
da_new = 0 ;
ASSERT ( del - > br_blockcount > 0 ) ;
ASSERT ( got - > br_startoff < = del - > br_startoff ) ;
ASSERT ( got_endoff > = del_endoff ) ;
if ( isrt ) {
2017-04-20 01:19:32 +03:00
uint64_t rtexts = XFS_FSB_TO_B ( mp , del - > br_blockcount ) ;
2016-10-20 07:54:14 +03:00
do_div ( rtexts , mp - > m_sb . sb_rextsize ) ;
xfs_mod_frextents ( mp , rtexts ) ;
}
/*
* Update the inode delalloc counter now and wait to update the
* sb counters as we might have to borrow some blocks for the
* indirect block accounting .
*/
2016-11-08 03:59:26 +03:00
error = xfs_trans_reserve_quota_nblks ( NULL , ip ,
- ( ( long ) del - > br_blockcount ) , 0 ,
2016-10-20 07:54:14 +03:00
isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS ) ;
2016-11-08 03:59:26 +03:00
if ( error )
return error ;
2016-10-20 07:54:14 +03:00
ip - > i_delayed_blks - = del - > br_blockcount ;
if ( got - > br_startoff = = del - > br_startoff )
2017-10-18 00:16:22 +03:00
state | = BMAP_LEFT_FILLING ;
2016-10-20 07:54:14 +03:00
if ( got_endoff = = del_endoff )
2017-10-18 00:16:22 +03:00
state | = BMAP_RIGHT_FILLING ;
2016-10-20 07:54:14 +03:00
2017-10-18 00:16:22 +03:00
switch ( state & ( BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING ) ) {
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING :
2016-10-20 07:54:14 +03:00
/*
* Matches the whole extent . Delete the entry .
*/
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
2016-10-20 07:54:14 +03:00
break ;
2017-10-18 00:16:22 +03:00
case BMAP_LEFT_FILLING :
2016-10-20 07:54:14 +03:00
/*
* Deleting the first part of the extent .
*/
got - > br_startoff = del_endoff ;
got - > br_blockcount - = del - > br_blockcount ;
da_new = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( ip ,
got - > br_blockcount ) , da_old ) ;
got - > br_startblock = nullstartblock ( ( int ) da_new ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , got ) ;
2016-10-20 07:54:14 +03:00
break ;
2017-10-18 00:16:22 +03:00
case BMAP_RIGHT_FILLING :
2016-10-20 07:54:14 +03:00
/*
* Deleting the last part of the extent .
*/
got - > br_blockcount = got - > br_blockcount - del - > br_blockcount ;
da_new = XFS_FILBLKS_MIN ( xfs_bmap_worst_indlen ( ip ,
got - > br_blockcount ) , da_old ) ;
got - > br_startblock = nullstartblock ( ( int ) da_new ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , got ) ;
2016-10-20 07:54:14 +03:00
break ;
case 0 :
/*
* Deleting the middle of the extent .
*
* Distribute the original indlen reservation across the two new
* extents . Steal blocks from the deleted extent if necessary .
* Stealing blocks simply fudges the fdblocks accounting below .
* Warn if either of the new indlen reservations is zero as this
* can lead to delalloc problems .
*/
got - > br_blockcount = del - > br_startoff - got - > br_startoff ;
got_indlen = xfs_bmap_worst_indlen ( ip , got - > br_blockcount ) ;
new . br_blockcount = got_endoff - del_endoff ;
new_indlen = xfs_bmap_worst_indlen ( ip , new . br_blockcount ) ;
WARN_ON_ONCE ( ! got_indlen | | ! new_indlen ) ;
stolen = xfs_bmap_split_indlen ( da_old , & got_indlen , & new_indlen ,
del - > br_blockcount ) ;
got - > br_startblock = nullstartblock ( ( int ) got_indlen ) ;
new . br_startoff = del_endoff ;
new . br_state = got - > br_state ;
new . br_startblock = nullstartblock ( ( int ) new_indlen ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , got ) ;
xfs_iext_next ( ifp , icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , & new , state ) ;
2016-10-20 07:54:14 +03:00
da_new = got_indlen + new_indlen - stolen ;
del - > br_blockcount - = stolen ;
break ;
}
ASSERT ( da_old > = da_new ) ;
da_diff = da_old - da_new ;
if ( ! isrt )
da_diff + = del - > br_blockcount ;
2019-04-26 04:26:22 +03:00
if ( da_diff ) {
2016-10-20 07:54:14 +03:00
xfs_mod_fdblocks ( mp , da_diff , false ) ;
2019-04-26 04:26:22 +03:00
xfs_mod_delalloc ( mp , - da_diff ) ;
}
2016-10-20 07:54:14 +03:00
return error ;
}
void
xfs_bmap_del_extent_cow (
struct xfs_inode * ip ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2016-10-20 07:54:14 +03:00
struct xfs_bmbt_irec * got ,
struct xfs_bmbt_irec * del )
{
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , XFS_COW_FORK ) ;
struct xfs_bmbt_irec new ;
xfs_fileoff_t del_endoff , got_endoff ;
int state = BMAP_COWFORK ;
XFS_STATS_INC ( mp , xs_del_exlist ) ;
del_endoff = del - > br_startoff + del - > br_blockcount ;
got_endoff = got - > br_startoff + got - > br_blockcount ;
ASSERT ( del - > br_blockcount > 0 ) ;
ASSERT ( got - > br_startoff < = del - > br_startoff ) ;
ASSERT ( got_endoff > = del_endoff ) ;
ASSERT ( ! isnullstartblock ( got - > br_startblock ) ) ;
if ( got - > br_startoff = = del - > br_startoff )
2017-10-18 00:16:22 +03:00
state | = BMAP_LEFT_FILLING ;
2016-10-20 07:54:14 +03:00
if ( got_endoff = = del_endoff )
2017-10-18 00:16:22 +03:00
state | = BMAP_RIGHT_FILLING ;
2016-10-20 07:54:14 +03:00
2017-10-18 00:16:22 +03:00
switch ( state & ( BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING ) ) {
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING :
2016-10-20 07:54:14 +03:00
/*
* Matches the whole extent . Delete the entry .
*/
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
2016-10-20 07:54:14 +03:00
break ;
2017-10-18 00:16:22 +03:00
case BMAP_LEFT_FILLING :
2016-10-20 07:54:14 +03:00
/*
* Deleting the first part of the extent .
*/
got - > br_startoff = del_endoff ;
got - > br_blockcount - = del - > br_blockcount ;
got - > br_startblock = del - > br_startblock + del - > br_blockcount ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , got ) ;
2016-10-20 07:54:14 +03:00
break ;
2017-10-18 00:16:22 +03:00
case BMAP_RIGHT_FILLING :
2016-10-20 07:54:14 +03:00
/*
* Deleting the last part of the extent .
*/
got - > br_blockcount - = del - > br_blockcount ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , got ) ;
2016-10-20 07:54:14 +03:00
break ;
case 0 :
/*
* Deleting the middle of the extent .
*/
got - > br_blockcount = del - > br_startoff - got - > br_startoff ;
new . br_startoff = del_endoff ;
new . br_blockcount = got_endoff - del_endoff ;
new . br_state = got - > br_state ;
new . br_startblock = del - > br_startblock + del - > br_blockcount ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , got ) ;
xfs_iext_next ( ifp , icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , & new , state ) ;
2016-10-20 07:54:14 +03:00
break ;
}
2018-01-19 20:05:48 +03:00
ip - > i_delayed_blks - = del - > br_blockcount ;
2016-10-20 07:54:14 +03:00
}
2013-02-25 05:31:26 +04:00
/*
* Called by xfs_bmapi to update file extent records and the btree
2017-10-18 00:16:21 +03:00
* after removing space .
2013-02-25 05:31:26 +04:00
*/
STATIC int /* error */
2017-10-18 00:16:21 +03:00
xfs_bmap_del_extent_real (
2013-02-25 05:31:26 +04:00
xfs_inode_t * ip , /* incore inode pointer */
xfs_trans_t * tp , /* current transaction pointer */
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2013-02-25 05:31:26 +04:00
xfs_btree_cur_t * cur , /* if null, not a btree */
xfs_bmbt_irec_t * del , /* data to remove from extents */
int * logflagsp , /* inode logging flags */
2016-10-03 19:11:27 +03:00
int whichfork , /* data or attr fork */
int bflags ) /* bmapi flags */
2013-02-25 05:31:26 +04:00
{
xfs_fsblock_t del_endblock = 0 ; /* first block past del */
xfs_fileoff_t del_endoff ; /* first offset past del */
int do_fx ; /* free extent at end of routine */
int error ; /* error return value */
2017-10-18 00:16:22 +03:00
int flags = 0 ; /* inode logging flags */
2017-10-18 00:16:23 +03:00
struct xfs_bmbt_irec got ; /* current extent entry */
2013-02-25 05:31:26 +04:00
xfs_fileoff_t got_endoff ; /* first offset past got */
int i ; /* temp state */
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
2013-02-25 05:31:26 +04:00
xfs_mount_t * mp ; /* mount structure */
xfs_filblks_t nblks ; /* quota/sb block count */
xfs_bmbt_irec_t new ; /* new record to be inserted */
/* REFERENCED */
uint qfield ; /* quota field to update */
2017-10-19 21:02:29 +03:00
int state = xfs_bmap_fork_to_state ( whichfork ) ;
2017-10-18 00:16:23 +03:00
struct xfs_bmbt_irec old ;
2013-02-25 05:31:26 +04:00
2015-10-12 10:21:22 +03:00
mp = ip - > i_mount ;
XFS_STATS_INC ( mp , xs_del_exlist ) ;
2013-02-25 05:31:26 +04:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
ASSERT ( del - > br_blockcount > 0 ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_get_extent ( ifp , icur , & got ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( got . br_startoff < = del - > br_startoff ) ;
del_endoff = del - > br_startoff + del - > br_blockcount ;
got_endoff = got . br_startoff + got . br_blockcount ;
ASSERT ( got_endoff > = del_endoff ) ;
2017-10-18 00:16:21 +03:00
ASSERT ( ! isnullstartblock ( got . br_startblock ) ) ;
2013-02-25 05:31:26 +04:00
qfield = 0 ;
error = 0 ;
2017-10-18 00:16:21 +03:00
2017-10-18 00:16:22 +03:00
/*
* If it ' s the case where the directory code is running with no block
* reservation , and the deleted block is in the middle of its extent ,
* and the resulting insert of an extent would cause transformation to
* btree format , then reject it . The calling code will then swap blocks
* around instead . We have to do this now , rather than waiting for the
* conversion to btree format , since the transaction will be dirty then .
*/
if ( tp - > t_blk_res = = 0 & &
XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_EXTENTS & &
XFS_IFORK_NEXTENTS ( ip , whichfork ) > =
XFS_IFORK_MAXEXT ( ip , whichfork ) & &
del - > br_startoff > got . br_startoff & & del_endoff < got_endoff )
return - ENOSPC ;
flags = XFS_ILOG_CORE ;
2017-10-18 00:16:21 +03:00
if ( whichfork = = XFS_DATA_FORK & & XFS_IS_REALTIME_INODE ( ip ) ) {
xfs_fsblock_t bno ;
xfs_filblks_t len ;
2018-06-08 19:54:22 +03:00
xfs_extlen_t mod ;
bno = div_u64_rem ( del - > br_startblock , mp - > m_sb . sb_rextsize ,
& mod ) ;
ASSERT ( mod = = 0 ) ;
len = div_u64_rem ( del - > br_blockcount , mp - > m_sb . sb_rextsize ,
& mod ) ;
ASSERT ( mod = = 0 ) ;
2017-10-18 00:16:21 +03:00
error = xfs_rtfree_extent ( tp , bno , ( xfs_extlen_t ) len ) ;
if ( error )
goto done ;
2013-02-25 05:31:26 +04:00
do_fx = 0 ;
2017-10-18 00:16:21 +03:00
nblks = len * mp - > m_sb . sb_rextsize ;
qfield = XFS_TRANS_DQ_RTBCOUNT ;
} else {
do_fx = 1 ;
nblks = del - > br_blockcount ;
qfield = XFS_TRANS_DQ_BCOUNT ;
}
del_endblock = del - > br_startblock + del - > br_blockcount ;
if ( cur ) {
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & got , & i ) ;
2017-10-18 00:16:21 +03:00
if ( error )
goto done ;
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2005-04-17 02:20:36 +04:00
}
2016-08-03 04:33:42 +03:00
2017-10-18 00:16:23 +03:00
if ( got . br_startoff = = del - > br_startoff )
state | = BMAP_LEFT_FILLING ;
if ( got_endoff = = del_endoff )
state | = BMAP_RIGHT_FILLING ;
switch ( state & ( BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING ) ) {
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING :
2013-02-25 05:31:26 +04:00
/*
* Matches the whole extent . Delete the entry .
*/
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( ifp , icur ) ;
2013-02-25 05:31:26 +04:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) - 1 ) ;
flags | = XFS_ILOG_CORE ;
if ( ! cur ) {
flags | = xfs_ilog_fext ( whichfork ) ;
break ;
}
if ( ( error = xfs_btree_delete ( cur , & i ) ) )
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2013-02-25 05:31:26 +04:00
break ;
2017-10-18 00:16:23 +03:00
case BMAP_LEFT_FILLING :
2013-02-25 05:31:26 +04:00
/*
* Deleting the first part of the extent .
*/
2017-10-18 00:16:23 +03:00
got . br_startoff = del_endoff ;
got . br_startblock = del_endblock ;
got . br_blockcount - = del - > br_blockcount ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & got ) ;
2013-02-25 05:31:26 +04:00
if ( ! cur ) {
flags | = xfs_ilog_fext ( whichfork ) ;
break ;
}
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & got ) ;
2017-10-18 00:16:23 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
break ;
2017-10-18 00:16:23 +03:00
case BMAP_RIGHT_FILLING :
2013-02-25 05:31:26 +04:00
/*
* Deleting the last part of the extent .
*/
2017-10-18 00:16:23 +03:00
got . br_blockcount - = del - > br_blockcount ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & got ) ;
2013-02-25 05:31:26 +04:00
if ( ! cur ) {
flags | = xfs_ilog_fext ( whichfork ) ;
break ;
}
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & got ) ;
2017-10-18 00:16:23 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
break ;
case 0 :
/*
* Deleting the middle of the extent .
*/
2017-10-18 00:16:23 +03:00
old = got ;
2017-10-19 21:04:44 +03:00
2017-10-18 00:16:23 +03:00
got . br_blockcount = del - > br_startoff - got . br_startoff ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & got ) ;
2017-10-18 00:16:23 +03:00
2013-02-25 05:31:26 +04:00
new . br_startoff = del_endoff ;
2017-10-18 00:16:23 +03:00
new . br_blockcount = got_endoff - del_endoff ;
2013-02-25 05:31:26 +04:00
new . br_state = got . br_state ;
2017-10-18 00:16:21 +03:00
new . br_startblock = del_endblock ;
2017-10-18 00:16:23 +03:00
2017-10-18 00:16:21 +03:00
flags | = XFS_ILOG_CORE ;
if ( cur ) {
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & got ) ;
2017-10-18 00:16:21 +03:00
if ( error )
goto done ;
error = xfs_btree_increment ( cur , 0 , & i ) ;
if ( error )
goto done ;
cur - > bc_rec . b = new ;
error = xfs_btree_insert ( cur , & i ) ;
if ( error & & error ! = - ENOSPC )
goto done ;
/*
* If get no - space back from btree insert , it tried a
* split , and we have a zero block reservation . Fix up
* our state and return the error .
*/
if ( error = = - ENOSPC ) {
2013-02-25 05:31:26 +04:00
/*
2017-10-18 00:16:21 +03:00
* Reset the cursor , don ' t trust it after any
* insert operation .
2013-02-25 05:31:26 +04:00
*/
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & got , & i ) ;
2017-10-18 00:16:21 +03:00
if ( error )
2013-02-25 05:31:26 +04:00
goto done ;
2015-02-23 14:39:08 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
2017-10-18 00:16:21 +03:00
/*
* Update the btree record back
* to the original value .
*/
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & old ) ;
2017-10-18 00:16:21 +03:00
if ( error )
goto done ;
/*
* Reset the extent record back
* to the original value .
*/
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , state , icur , & old ) ;
2017-10-18 00:16:21 +03:00
flags = 0 ;
error = - ENOSPC ;
goto done ;
}
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , done ) ;
} else
flags | = xfs_ilog_fext ( whichfork ) ;
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) + 1 ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_next ( ifp , icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , icur , & new , state ) ;
2013-02-25 05:31:26 +04:00
break ;
2005-04-17 02:20:36 +04:00
}
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
/* remove reverse mapping */
2018-08-01 17:20:34 +03:00
error = xfs_rmap_unmap_extent ( tp , ip , whichfork , del ) ;
2017-10-18 00:16:21 +03:00
if ( error )
goto done ;
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* If we need to , add to list of extents to delete .
2005-04-17 02:20:36 +04:00
*/
2016-10-03 19:11:27 +03:00
if ( do_fx & & ! ( bflags & XFS_BMAPI_REMAP ) ) {
2016-10-03 19:11:23 +03:00
if ( xfs_is_reflink_inode ( ip ) & & whichfork = = XFS_DATA_FORK ) {
2018-08-01 17:20:34 +03:00
error = xfs_refcount_decrease_extent ( tp , del ) ;
2016-10-03 19:11:23 +03:00
if ( error )
goto done ;
2018-05-09 18:45:04 +03:00
} else {
2018-08-01 17:20:34 +03:00
__xfs_bmap_add_free ( tp , del - > br_startblock ,
2018-05-10 19:35:42 +03:00
del - > br_blockcount , NULL ,
( bflags & XFS_BMAPI_NODISCARD ) | |
del - > br_state = = XFS_EXT_UNWRITTEN ) ;
2018-05-09 18:45:04 +03:00
}
2016-10-03 19:11:23 +03:00
}
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Adjust inode # blocks in the file .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( nblks )
ip - > i_d . di_nblocks - = nblks ;
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Adjust quota data .
2005-04-17 02:20:36 +04:00
*/
2016-10-03 19:11:27 +03:00
if ( qfield & & ! ( bflags & XFS_BMAPI_REMAP ) )
2013-02-25 05:31:26 +04:00
xfs_trans_mod_dquot_byino ( tp , ip , qfield , ( long ) - nblks ) ;
done :
* logflagsp = flags ;
2005-04-17 02:20:36 +04:00
return error ;
}
2007-08-16 09:20:25 +04:00
/*
2013-02-25 05:31:26 +04:00
* Unmap ( remove ) blocks from a file .
* If nexts is nonzero then the number of extents to remove is limited to
* that value . If not all extents in the block range can be removed then
* * done is set .
2007-08-16 09:20:25 +04:00
*/
2013-02-25 05:31:26 +04:00
int /* error */
2016-10-03 19:11:29 +03:00
__xfs_bunmapi (
2018-07-12 08:26:13 +03:00
struct xfs_trans * tp , /* transaction pointer */
2013-02-25 05:31:26 +04:00
struct xfs_inode * ip , /* incore inode */
2017-10-18 00:16:21 +03:00
xfs_fileoff_t start , /* first file offset deleted */
2016-10-03 19:11:29 +03:00
xfs_filblks_t * rlen , /* i/o: amount remaining */
2013-02-25 05:31:26 +04:00
int flags , /* misc flags */
2018-07-12 08:26:25 +03:00
xfs_extnum_t nexts ) /* number of extents max */
2007-08-16 09:20:25 +04:00
{
2018-07-12 08:26:13 +03:00
struct xfs_btree_cur * cur ; /* bmap btree cursor */
struct xfs_bmbt_irec del ; /* extent being deleted */
2013-02-25 05:31:26 +04:00
int error ; /* error return value */
xfs_extnum_t extno ; /* extent number in list */
2018-07-12 08:26:13 +03:00
struct xfs_bmbt_irec got ; /* current extent record */
2018-07-18 02:51:50 +03:00
struct xfs_ifork * ifp ; /* inode fork pointer */
2013-02-25 05:31:26 +04:00
int isrt ; /* freeing in rt area */
int logflags ; /* transaction logging flags */
xfs_extlen_t mod ; /* rt extent offset */
2018-07-12 08:26:13 +03:00
struct xfs_mount * mp ; /* mount structure */
2013-02-25 05:31:26 +04:00
int tmp_logflags ; /* partial logging flags */
int wasdel ; /* was a delayed alloc extent */
int whichfork ; /* data or attribute fork */
xfs_fsblock_t sum ;
2016-10-03 19:11:29 +03:00
xfs_filblks_t len = * rlen ; /* length to unmap in file */
2017-06-15 07:25:57 +03:00
xfs_fileoff_t max_len ;
2017-07-18 21:16:51 +03:00
xfs_agnumber_t prev_agno = NULLAGNUMBER , agno ;
2017-10-18 00:16:21 +03:00
xfs_fileoff_t end ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
bool done = false ;
2005-04-17 02:20:36 +04:00
2017-10-18 00:16:21 +03:00
trace_xfs_bunmap ( ip , start , len , flags , _RET_IP_ ) ;
2005-04-17 02:20:36 +04:00
2016-10-03 19:11:32 +03:00
whichfork = xfs_bmapi_whichfork ( flags ) ;
ASSERT ( whichfork ! = XFS_COW_FORK ) ;
2013-02-25 05:31:26 +04:00
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
if ( unlikely (
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ) {
XFS_ERROR_REPORT ( " xfs_bunmapi " , XFS_ERRLEVEL_LOW ,
ip - > i_mount ) ;
2014-06-25 08:58:08 +04:00
return - EFSCORRUPTED ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
mp = ip - > i_mount ;
if ( XFS_FORCED_SHUTDOWN ( mp ) )
2014-06-25 08:58:08 +04:00
return - EIO ;
2005-04-17 02:20:36 +04:00
2013-12-07 00:30:17 +04:00
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL ) ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( len > 0 ) ;
ASSERT ( nexts > = 0 ) ;
2005-04-17 02:20:36 +04:00
2017-06-15 07:25:57 +03:00
/*
* Guesstimate how many blocks we can unmap without running the risk of
* blowing out the transaction with a mix of EFIs and reflink
* adjustments .
*/
2017-12-11 05:03:53 +03:00
if ( tp & & xfs_is_reflink_inode ( ip ) & & whichfork = = XFS_DATA_FORK )
2017-06-15 07:25:57 +03:00
max_len = min ( len , xfs_refcount_max_unmap ( tp - > t_log_res ) ) ;
else
max_len = len ;
2013-02-25 05:31:26 +04:00
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) & &
( error = xfs_iread_extents ( tp , ip , whichfork ) ) )
return error ;
2016-11-08 04:59:42 +03:00
if ( xfs_iext_count ( ifp ) = = 0 ) {
2016-10-03 19:11:29 +03:00
* rlen = 0 ;
2013-02-25 05:31:26 +04:00
return 0 ;
}
2015-10-12 10:21:22 +03:00
XFS_STATS_INC ( mp , xs_blk_unmap ) ;
2013-02-25 05:31:26 +04:00
isrt = ( whichfork = = XFS_DATA_FORK ) & & XFS_IS_REALTIME_INODE ( ip ) ;
2017-10-24 02:32:39 +03:00
end = start + len ;
2005-04-17 02:20:36 +04:00
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_lookup_extent_before ( ip , ifp , & end , & icur , & got ) ) {
2017-10-24 02:32:39 +03:00
* rlen = 0 ;
return 0 ;
2013-02-25 05:31:26 +04:00
}
2017-10-24 02:32:39 +03:00
end - - ;
2016-11-24 03:39:44 +03:00
2013-02-25 05:31:26 +04:00
logflags = 0 ;
if ( ifp - > if_flags & XFS_IFBROOT ) {
ASSERT ( XFS_IFORK_FORMAT ( ip , whichfork ) = = XFS_DINODE_FMT_BTREE ) ;
cur = xfs_bmbt_init_cursor ( mp , tp , ip , whichfork ) ;
cur - > bc_private . b . flags = 0 ;
} else
cur = NULL ;
if ( isrt ) {
/*
* Synchronize by locking the bitmap inode .
*/
2016-08-03 04:00:42 +03:00
xfs_ilock ( mp - > m_rbmip , XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP ) ;
2013-02-25 05:31:26 +04:00
xfs_trans_ijoin ( tp , mp - > m_rbmip , XFS_ILOCK_EXCL ) ;
2016-08-03 04:00:42 +03:00
xfs_ilock ( mp - > m_rsumip , XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM ) ;
xfs_trans_ijoin ( tp , mp - > m_rsumip , XFS_ILOCK_EXCL ) ;
2013-02-25 05:31:26 +04:00
}
2012-04-29 15:16:17 +04:00
2013-02-25 05:31:26 +04:00
extno = 0 ;
2017-11-03 20:34:43 +03:00
while ( end ! = ( xfs_fileoff_t ) - 1 & & end > = start & &
2017-06-15 07:25:57 +03:00
( nexts = = 0 | | extno < nexts ) & & max_len > 0 ) {
2013-02-25 05:31:26 +04:00
/*
2017-10-18 00:16:21 +03:00
* Is the found extent after a hole in which end lives ?
2013-02-25 05:31:26 +04:00
* Just back up to the previous extent , if so .
*/
2017-11-03 20:34:43 +03:00
if ( got . br_startoff > end & &
! xfs_iext_prev_extent ( ifp , & icur , & got ) ) {
done = true ;
break ;
2013-02-25 05:31:26 +04:00
}
/*
* Is the last block of this extent before the range
* we ' re supposed to delete ? If so , we ' re done .
*/
2017-10-18 00:16:21 +03:00
end = XFS_FILEOFF_MIN ( end ,
2013-02-25 05:31:26 +04:00
got . br_startoff + got . br_blockcount - 1 ) ;
2017-10-18 00:16:21 +03:00
if ( end < start )
2013-02-25 05:31:26 +04:00
break ;
/*
* Then deal with the ( possibly delayed ) allocated space
* we found .
*/
del = got ;
wasdel = isnullstartblock ( del . br_startblock ) ;
2017-07-18 21:16:51 +03:00
/*
* Make sure we don ' t touch multiple AGF headers out of order
* in a single transaction , as that could cause AB - BA deadlocks .
*/
if ( ! wasdel ) {
agno = XFS_FSB_TO_AGNO ( mp , del . br_startblock ) ;
if ( prev_agno ! = NULLAGNUMBER & & prev_agno > agno )
break ;
prev_agno = agno ;
}
2013-02-25 05:31:26 +04:00
if ( got . br_startoff < start ) {
del . br_startoff = start ;
del . br_blockcount - = start - got . br_startoff ;
if ( ! wasdel )
del . br_startblock + = start - got . br_startoff ;
}
2017-10-18 00:16:21 +03:00
if ( del . br_startoff + del . br_blockcount > end + 1 )
del . br_blockcount = end + 1 - del . br_startoff ;
2017-06-15 07:25:57 +03:00
/* How much can we safely unmap? */
if ( max_len < del . br_blockcount ) {
del . br_startoff + = del . br_blockcount - max_len ;
if ( ! wasdel )
del . br_startblock + = del . br_blockcount - max_len ;
del . br_blockcount = max_len ;
}
2018-06-08 19:54:22 +03:00
if ( ! isrt )
goto delete ;
2013-02-25 05:31:26 +04:00
sum = del . br_startblock + del . br_blockcount ;
2018-06-08 19:54:22 +03:00
div_u64_rem ( sum , mp - > m_sb . sb_rextsize , & mod ) ;
if ( mod ) {
2012-04-29 15:16:17 +04:00
/*
2013-02-25 05:31:26 +04:00
* Realtime extent not lined up at the end .
* The extent could have been split into written
* and unwritten pieces , or we could just be
* unmapping part of it . But we can ' t really
* get rid of part of a realtime extent .
2012-04-29 15:16:17 +04:00
*/
2018-10-18 09:18:58 +03:00
if ( del . br_state = = XFS_EXT_UNWRITTEN ) {
2013-02-25 05:31:26 +04:00
/*
* This piece is unwritten , or we ' re not
* using unwritten extents . Skip over it .
*/
2017-10-18 00:16:21 +03:00
ASSERT ( end > = mod ) ;
end - = mod > del . br_blockcount ?
2013-02-25 05:31:26 +04:00
del . br_blockcount : mod ;
2017-11-03 20:34:43 +03:00
if ( end < got . br_startoff & &
! xfs_iext_prev_extent ( ifp , & icur , & got ) ) {
done = true ;
break ;
2013-02-25 05:31:26 +04:00
}
continue ;
2005-04-17 02:20:36 +04:00
}
xfs: Make fiemap work with sparse files
In xfs_vn_fiemap, we set bvm_count to fi_extent_max + 1 and want
to return fi_extent_max extents, but actually it won't work for
a sparse file. The reason is that in xfs_getbmap we will
calculate holes and set it in 'out', while out is malloced by
bmv_count(fi_extent_max+1) which didn't consider holes. So in the
worst case, if 'out' vector looks like
[hole, extent, hole, extent, hole, ... hole, extent, hole],
we will only return half of fi_extent_max extents.
This patch add a new parameter BMV_IF_NO_HOLES for bvm_iflags.
So with this flags, we don't use our 'out' in xfs_getbmap for
a hole. The solution is a bit ugly by just don't increasing
index of 'out' vector. I felt that it is not easy to skip it
at the very beginning since we have the complicated check and
some function like xfs_getbmapx_fix_eof_hole to adjust 'out'.
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-08-30 06:44:03 +04:00
/*
2013-02-25 05:31:26 +04:00
* It ' s written , turn it unwritten .
* This is better than zeroing it .
xfs: Make fiemap work with sparse files
In xfs_vn_fiemap, we set bvm_count to fi_extent_max + 1 and want
to return fi_extent_max extents, but actually it won't work for
a sparse file. The reason is that in xfs_getbmap we will
calculate holes and set it in 'out', while out is malloced by
bmv_count(fi_extent_max+1) which didn't consider holes. So in the
worst case, if 'out' vector looks like
[hole, extent, hole, extent, hole, ... hole, extent, hole],
we will only return half of fi_extent_max extents.
This patch add a new parameter BMV_IF_NO_HOLES for bvm_iflags.
So with this flags, we don't use our 'out' in xfs_getbmap for
a hole. The solution is a bit ugly by just don't increasing
index of 'out' vector. I felt that it is not easy to skip it
at the very beginning since we have the complicated check and
some function like xfs_getbmapx_fix_eof_hole to adjust 'out'.
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-08-30 06:44:03 +04:00
*/
2013-02-25 05:31:26 +04:00
ASSERT ( del . br_state = = XFS_EXT_NORM ) ;
2016-03-02 01:58:21 +03:00
ASSERT ( tp - > t_blk_res > 0 ) ;
2013-02-25 05:31:26 +04:00
/*
* If this spans a realtime extent boundary ,
* chop it back to the start of the one we end at .
*/
if ( del . br_blockcount > mod ) {
del . br_startoff + = del . br_blockcount - mod ;
del . br_startblock + = del . br_blockcount - mod ;
del . br_blockcount = mod ;
}
del . br_state = XFS_EXT_UNWRITTEN ;
error = xfs_bmap_add_extent_unwritten_real ( tp , ip ,
2017-11-03 20:34:43 +03:00
whichfork , & icur , & cur , & del ,
2018-07-12 08:26:28 +03:00
& logflags ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto error0 ;
goto nodelete ;
}
2018-06-08 19:54:22 +03:00
div_u64_rem ( del . br_startblock , mp - > m_sb . sb_rextsize , & mod ) ;
if ( mod ) {
2013-02-25 05:31:26 +04:00
/*
* Realtime extent is lined up at the end but not
* at the front . We ' ll get rid of full extents if
* we can .
*/
mod = mp - > m_sb . sb_rextsize - mod ;
if ( del . br_blockcount > mod ) {
del . br_blockcount - = mod ;
del . br_startoff + = mod ;
del . br_startblock + = mod ;
2018-10-18 09:18:58 +03:00
} else if ( del . br_startoff = = start & &
( del . br_state = = XFS_EXT_UNWRITTEN | |
tp - > t_blk_res = = 0 ) ) {
2013-02-25 05:31:26 +04:00
/*
* Can ' t make it unwritten . There isn ' t
* a full extent here so just skip it .
*/
2017-10-18 00:16:21 +03:00
ASSERT ( end > = del . br_blockcount ) ;
end - = del . br_blockcount ;
2017-11-03 20:34:43 +03:00
if ( got . br_startoff > end & &
! xfs_iext_prev_extent ( ifp , & icur , & got ) ) {
done = true ;
break ;
}
xfs: Make fiemap work with sparse files
In xfs_vn_fiemap, we set bvm_count to fi_extent_max + 1 and want
to return fi_extent_max extents, but actually it won't work for
a sparse file. The reason is that in xfs_getbmap we will
calculate holes and set it in 'out', while out is malloced by
bmv_count(fi_extent_max+1) which didn't consider holes. So in the
worst case, if 'out' vector looks like
[hole, extent, hole, extent, hole, ... hole, extent, hole],
we will only return half of fi_extent_max extents.
This patch add a new parameter BMV_IF_NO_HOLES for bvm_iflags.
So with this flags, we don't use our 'out' in xfs_getbmap for
a hole. The solution is a bit ugly by just don't increasing
index of 'out' vector. I felt that it is not easy to skip it
at the very beginning since we have the complicated check and
some function like xfs_getbmapx_fix_eof_hole to adjust 'out'.
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-08-30 06:44:03 +04:00
continue ;
2013-02-25 05:31:26 +04:00
} else if ( del . br_state = = XFS_EXT_UNWRITTEN ) {
2016-11-24 03:39:44 +03:00
struct xfs_bmbt_irec prev ;
2013-02-25 05:31:26 +04:00
/*
* This one is already unwritten .
* It must have a written left neighbor .
* Unwrite the killed part of that one and
* try again .
*/
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_prev_extent ( ifp , & icur , & prev ) )
ASSERT ( 0 ) ;
2013-02-25 05:31:26 +04:00
ASSERT ( prev . br_state = = XFS_EXT_NORM ) ;
ASSERT ( ! isnullstartblock ( prev . br_startblock ) ) ;
ASSERT ( del . br_startblock = =
prev . br_startblock + prev . br_blockcount ) ;
if ( prev . br_startoff < start ) {
mod = start - prev . br_startoff ;
prev . br_blockcount - = mod ;
prev . br_startblock + = mod ;
prev . br_startoff = start ;
}
prev . br_state = XFS_EXT_UNWRITTEN ;
error = xfs_bmap_add_extent_unwritten_real ( tp ,
2017-11-03 20:34:43 +03:00
ip , whichfork , & icur , & cur ,
2018-07-12 08:26:28 +03:00
& prev , & logflags ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto error0 ;
goto nodelete ;
} else {
ASSERT ( del . br_state = = XFS_EXT_NORM ) ;
del . br_state = XFS_EXT_UNWRITTEN ;
error = xfs_bmap_add_extent_unwritten_real ( tp ,
2017-11-03 20:34:43 +03:00
ip , whichfork , & icur , & cur ,
2018-07-12 08:26:28 +03:00
& del , & logflags ) ;
2013-02-25 05:31:26 +04:00
if ( error )
goto error0 ;
goto nodelete ;
xfs: Make fiemap work with sparse files
In xfs_vn_fiemap, we set bvm_count to fi_extent_max + 1 and want
to return fi_extent_max extents, but actually it won't work for
a sparse file. The reason is that in xfs_getbmap we will
calculate holes and set it in 'out', while out is malloced by
bmv_count(fi_extent_max+1) which didn't consider holes. So in the
worst case, if 'out' vector looks like
[hole, extent, hole, extent, hole, ... hole, extent, hole],
we will only return half of fi_extent_max extents.
This patch add a new parameter BMV_IF_NO_HOLES for bvm_iflags.
So with this flags, we don't use our 'out' in xfs_getbmap for
a hole. The solution is a bit ugly by just don't increasing
index of 'out' vector. I felt that it is not easy to skip it
at the very beginning since we have the complicated check and
some function like xfs_getbmapx_fix_eof_hole to adjust 'out'.
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-08-30 06:44:03 +04:00
}
2005-04-17 02:20:36 +04:00
}
2018-06-08 19:54:22 +03:00
delete :
2016-03-15 03:42:46 +03:00
if ( wasdel ) {
2017-11-03 20:34:43 +03:00
error = xfs_bmap_del_extent_delay ( ip , whichfork , & icur ,
2017-10-18 00:16:21 +03:00
& got , & del ) ;
} else {
2018-07-12 08:26:16 +03:00
error = xfs_bmap_del_extent_real ( ip , tp , & icur , cur ,
& del , & tmp_logflags , whichfork ,
2017-10-18 00:16:21 +03:00
flags ) ;
logflags | = tmp_logflags ;
2017-10-18 00:16:20 +03:00
}
2016-03-15 03:42:46 +03:00
2013-02-25 05:31:26 +04:00
if ( error )
goto error0 ;
2016-03-15 03:42:46 +03:00
2017-06-15 07:25:57 +03:00
max_len - = del . br_blockcount ;
2017-10-18 00:16:21 +03:00
end = del . br_startoff - 1 ;
2013-02-25 05:31:26 +04:00
nodelete :
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* If not done go on to the next ( previous ) record .
2005-04-17 02:20:36 +04:00
*/
2017-10-18 00:16:21 +03:00
if ( end ! = ( xfs_fileoff_t ) - 1 & & end > = start ) {
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_get_extent ( ifp , & icur , & got ) | |
( got . br_startoff > end & &
! xfs_iext_prev_extent ( ifp , & icur , & got ) ) ) {
done = true ;
break ;
2005-04-17 02:20:36 +04:00
}
2013-02-25 05:31:26 +04:00
extno + + ;
2005-04-17 02:20:36 +04:00
}
}
2017-11-03 20:34:43 +03:00
if ( done | | end = = ( xfs_fileoff_t ) - 1 | | end < start )
2016-10-03 19:11:29 +03:00
* rlen = 0 ;
else
2017-10-18 00:16:21 +03:00
* rlen = end - start + 1 ;
2006-09-28 04:58:06 +04:00
2005-04-17 02:20:36 +04:00
/*
2013-02-25 05:31:26 +04:00
* Convert to a btree if necessary .
2005-04-17 02:20:36 +04:00
*/
2013-02-25 05:31:26 +04:00
if ( xfs_bmap_needs_btree ( ip , whichfork ) ) {
ASSERT ( cur = = NULL ) ;
2018-07-12 08:26:29 +03:00
error = xfs_bmap_extents_to_btree ( tp , ip , & cur , 0 ,
& tmp_logflags , whichfork ) ;
2013-02-25 05:31:26 +04:00
logflags | = tmp_logflags ;
2019-02-15 19:02:47 +03:00
} else {
error = xfs_bmap_btree_to_extents ( tp , ip , cur , & logflags ,
2013-02-25 05:31:26 +04:00
whichfork ) ;
}
2019-02-15 19:02:47 +03:00
2013-02-25 05:31:26 +04:00
error0 :
/*
* Log everything . Do this after conversion , there ' s no point in
* logging the extent records if we ' ve converted to btree format .
*/
if ( ( logflags & xfs_ilog_fext ( whichfork ) ) & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS )
logflags & = ~ xfs_ilog_fext ( whichfork ) ;
else if ( ( logflags & xfs_ilog_fbroot ( whichfork ) ) & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE )
logflags & = ~ xfs_ilog_fbroot ( whichfork ) ;
/*
* Log inode even in the error case , if the transaction
* is dirty we ' ll need to shut down the filesystem .
*/
if ( logflags )
xfs_trans_log_inode ( tp , ip , logflags ) ;
if ( cur ) {
2018-07-12 08:26:29 +03:00
if ( ! error )
2013-02-25 05:31:26 +04:00
cur - > bc_private . b . allocated = 0 ;
2018-07-19 22:26:31 +03:00
xfs_btree_del_cursor ( cur , error ) ;
2013-02-25 05:31:26 +04:00
}
return error ;
}
2014-02-24 03:58:19 +04:00
2016-10-03 19:11:29 +03:00
/* Unmap a range of a file. */
int
xfs_bunmapi (
xfs_trans_t * tp ,
struct xfs_inode * ip ,
xfs_fileoff_t bno ,
xfs_filblks_t len ,
int flags ,
xfs_extnum_t nexts ,
int * done )
{
int error ;
2018-07-12 08:26:25 +03:00
error = __xfs_bunmapi ( tp , ip , bno , & len , flags , nexts ) ;
2016-10-03 19:11:29 +03:00
* done = ( len = = 0 ) ;
return error ;
}
2014-09-23 09:38:09 +04:00
/*
* Determine whether an extent shift can be accomplished by a merge with the
* extent that precedes the target hole of the shift .
*/
STATIC bool
xfs_bmse_can_merge (
struct xfs_bmbt_irec * left , /* preceding extent */
struct xfs_bmbt_irec * got , /* current extent to shift */
xfs_fileoff_t shift ) /* shift fsb */
{
xfs_fileoff_t startoff ;
startoff = got - > br_startoff - shift ;
/*
* The extent , once shifted , must be adjacent in - file and on - disk with
* the preceding extent .
*/
if ( ( left - > br_startoff + left - > br_blockcount ! = startoff ) | |
( left - > br_startblock + left - > br_blockcount ! = got - > br_startblock ) | |
( left - > br_state ! = got - > br_state ) | |
( left - > br_blockcount + got - > br_blockcount > MAXEXTLEN ) )
return false ;
return true ;
}
/*
* A bmap extent shift adjusts the file offset of an extent to fill a preceding
* hole in the file . If an extent shift would result in the extent being fully
* adjacent to the extent that currently precedes the hole , we can merge with
* the preceding extent rather than do the shift .
*
* This function assumes the caller has verified a shift - by - merge is possible
* with the provided extents via xfs_bmse_can_merge ( ) .
*/
STATIC int
xfs_bmse_merge (
2018-08-01 17:20:34 +03:00
struct xfs_trans * tp ,
2014-09-23 09:38:09 +04:00
struct xfs_inode * ip ,
int whichfork ,
xfs_fileoff_t shift , /* shift fsb */
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2017-08-30 01:44:13 +03:00
struct xfs_bmbt_irec * got , /* extent to shift */
struct xfs_bmbt_irec * left , /* preceding extent */
2014-09-23 09:38:09 +04:00
struct xfs_btree_cur * cur ,
2018-08-01 17:20:34 +03:00
int * logflags ) /* output */
2014-09-23 09:38:09 +04:00
{
2017-08-30 01:44:13 +03:00
struct xfs_bmbt_irec new ;
2014-09-23 09:38:09 +04:00
xfs_filblks_t blockcount ;
int error , i ;
2015-02-23 14:39:13 +03:00
struct xfs_mount * mp = ip - > i_mount ;
2014-09-23 09:38:09 +04:00
2017-08-30 01:44:13 +03:00
blockcount = left - > br_blockcount + got - > br_blockcount ;
2014-09-23 09:38:09 +04:00
ASSERT ( xfs_isilocked ( ip , XFS_IOLOCK_EXCL ) ) ;
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL ) ) ;
2017-08-30 01:44:13 +03:00
ASSERT ( xfs_bmse_can_merge ( left , got , shift ) ) ;
2014-09-23 09:38:09 +04:00
2017-08-30 01:44:13 +03:00
new = * left ;
new . br_blockcount = blockcount ;
2014-09-23 09:38:09 +04:00
/*
* Update the on - disk extent count , the btree if necessary and log the
* inode .
*/
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) - 1 ) ;
* logflags | = XFS_ILOG_CORE ;
if ( ! cur ) {
* logflags | = XFS_ILOG_DEXT ;
2017-08-30 01:44:13 +03:00
goto done ;
2014-09-23 09:38:09 +04:00
}
/* lookup and remove the extent to merge */
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , got , & i ) ;
2014-09-23 09:38:09 +04:00
if ( error )
2014-12-04 01:42:40 +03:00
return error ;
2015-02-23 14:39:13 +03:00
XFS_WANT_CORRUPTED_RETURN ( mp , i = = 1 ) ;
2014-09-23 09:38:09 +04:00
error = xfs_btree_delete ( cur , & i ) ;
if ( error )
2014-12-04 01:42:40 +03:00
return error ;
2015-02-23 14:39:13 +03:00
XFS_WANT_CORRUPTED_RETURN ( mp , i = = 1 ) ;
2014-09-23 09:38:09 +04:00
/* lookup and update size of the previous extent */
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , left , & i ) ;
2014-09-23 09:38:09 +04:00
if ( error )
2014-12-04 01:42:40 +03:00
return error ;
2015-02-23 14:39:13 +03:00
XFS_WANT_CORRUPTED_RETURN ( mp , i = = 1 ) ;
2014-09-23 09:38:09 +04:00
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & new ) ;
2017-08-30 01:44:13 +03:00
if ( error )
return error ;
done :
2017-11-03 20:34:47 +03:00
xfs_iext_remove ( ip , icur , 0 ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_prev ( XFS_IFORK_PTR ( ip , whichfork ) , icur ) ;
xfs_iext_update_extent ( ip , xfs_bmap_fork_to_state ( whichfork ) , icur ,
& new ) ;
2014-09-23 09:38:09 +04:00
2017-08-31 02:06:36 +03:00
/* update reverse mapping. rmap functions merge the rmaps for us */
2018-08-01 17:20:34 +03:00
error = xfs_rmap_unmap_extent ( tp , ip , whichfork , got ) ;
2017-08-30 01:44:13 +03:00
if ( error )
return error ;
2017-08-31 02:06:36 +03:00
memcpy ( & new , got , sizeof ( new ) ) ;
new . br_startoff = left - > br_startoff + left - > br_blockcount ;
2018-08-01 17:20:34 +03:00
return xfs_rmap_map_extent ( tp , ip , whichfork , & new ) ;
2014-09-23 09:38:09 +04:00
}
2017-10-19 21:07:34 +03:00
static int
xfs_bmap_shift_update_extent (
2018-08-01 17:20:34 +03:00
struct xfs_trans * tp ,
2017-10-19 21:07:34 +03:00
struct xfs_inode * ip ,
int whichfork ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * icur ,
2017-10-19 21:07:34 +03:00
struct xfs_bmbt_irec * got ,
struct xfs_btree_cur * cur ,
int * logflags ,
xfs_fileoff_t startoff )
2014-09-23 09:39:04 +04:00
{
2017-10-19 21:07:34 +03:00
struct xfs_mount * mp = ip - > i_mount ;
2017-10-19 21:08:51 +03:00
struct xfs_bmbt_irec prev = * got ;
2017-10-19 21:07:34 +03:00
int error , i ;
2017-08-30 01:44:13 +03:00
2014-09-23 09:39:04 +04:00
* logflags | = XFS_ILOG_CORE ;
2017-08-30 01:44:13 +03:00
2017-10-19 21:08:51 +03:00
got - > br_startoff = startoff ;
2017-08-30 01:44:13 +03:00
if ( cur ) {
2017-10-19 21:08:51 +03:00
error = xfs_bmbt_lookup_eq ( cur , & prev , & i ) ;
2017-08-30 01:44:13 +03:00
if ( error )
return error ;
XFS_WANT_CORRUPTED_RETURN ( mp , i = = 1 ) ;
2017-10-19 21:08:51 +03:00
error = xfs_bmbt_update ( cur , got ) ;
2017-08-30 01:44:13 +03:00
if ( error )
return error ;
} else {
2014-09-23 09:39:04 +04:00
* logflags | = XFS_ILOG_DEXT ;
}
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , xfs_bmap_fork_to_state ( whichfork ) , icur ,
got ) ;
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
/* update reverse mapping */
2018-08-01 17:20:34 +03:00
error = xfs_rmap_unmap_extent ( tp , ip , whichfork , & prev ) ;
xfs: propagate bmap updates to rmapbt
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt. Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.
We use the deferred operations code to handle redo operations
atomically and deadlock free. This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two. We also add
an error injection site to test log recovery.
Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 05:16:05 +03:00
if ( error )
return error ;
2018-08-01 17:20:34 +03:00
return xfs_rmap_map_extent ( tp , ip , whichfork , got ) ;
2014-09-23 09:39:04 +04:00
}
2014-02-24 03:58:19 +04:00
int
2017-10-19 21:07:11 +03:00
xfs_bmap_collapse_extents (
2014-02-24 03:58:19 +04:00
struct xfs_trans * tp ,
struct xfs_inode * ip ,
2015-03-25 07:08:56 +03:00
xfs_fileoff_t * next_fsb ,
2014-02-24 03:58:19 +04:00
xfs_fileoff_t offset_shift_fsb ,
2018-07-12 08:26:27 +03:00
bool * done )
2014-02-24 03:58:19 +04:00
{
2017-10-19 21:07:11 +03:00
int whichfork = XFS_DATA_FORK ;
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
struct xfs_btree_cur * cur = NULL ;
2017-10-19 21:07:34 +03:00
struct xfs_bmbt_irec got , prev ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2017-10-19 21:07:34 +03:00
xfs_fileoff_t new_startoff ;
2017-10-19 21:07:11 +03:00
int error = 0 ;
int logflags = 0 ;
2014-02-24 03:58:19 +04:00
if ( unlikely ( XFS_TEST_ERROR (
( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ,
2017-06-21 03:54:47 +03:00
mp , XFS_ERRTAG_BMAPIFORMAT ) ) ) {
2017-10-19 21:07:11 +03:00
XFS_ERROR_REPORT ( __func__ , XFS_ERRLEVEL_LOW , mp ) ;
2014-06-25 08:58:08 +04:00
return - EFSCORRUPTED ;
2014-02-24 03:58:19 +04:00
}
if ( XFS_FORCED_SHUTDOWN ( mp ) )
2014-06-25 08:58:08 +04:00
return - EIO ;
2014-02-24 03:58:19 +04:00
2017-10-19 21:07:11 +03:00
ASSERT ( xfs_isilocked ( ip , XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL ) ) ;
2014-02-24 03:58:19 +04:00
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
error = xfs_iread_extents ( tp , ip , whichfork ) ;
if ( error )
return error ;
}
2014-09-23 09:38:09 +04:00
if ( ifp - > if_flags & XFS_IFBROOT ) {
cur = xfs_bmbt_init_cursor ( mp , tp , ip , whichfork ) ;
cur - > bc_private . b . flags = 0 ;
}
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_lookup_extent ( ip , ifp , * next_fsb , & icur , & got ) ) {
2017-10-19 21:07:11 +03:00
* done = true ;
goto del_cursor ;
}
2017-11-28 05:23:32 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , ! isnullstartblock ( got . br_startblock ) ,
del_cursor ) ;
2017-10-19 21:07:11 +03:00
2017-10-19 21:07:34 +03:00
new_startoff = got . br_startoff - offset_shift_fsb ;
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_prev_extent ( ifp , & icur , & prev ) ) {
2017-10-19 21:07:34 +03:00
if ( new_startoff < prev . br_startoff + prev . br_blockcount ) {
error = - EINVAL ;
goto del_cursor ;
}
if ( xfs_bmse_can_merge ( & prev , & got , offset_shift_fsb ) ) {
2018-08-01 17:20:34 +03:00
error = xfs_bmse_merge ( tp , ip , whichfork ,
offset_shift_fsb , & icur , & got , & prev ,
cur , & logflags ) ;
2017-10-19 21:07:34 +03:00
if ( error )
goto del_cursor ;
goto done ;
}
} else {
if ( got . br_startoff < offset_shift_fsb ) {
error = - EINVAL ;
goto del_cursor ;
}
}
2018-08-01 17:20:34 +03:00
error = xfs_bmap_shift_update_extent ( tp , ip , whichfork , & icur , & got ,
cur , & logflags , new_startoff ) ;
2017-10-19 21:07:11 +03:00
if ( error )
goto del_cursor ;
2017-10-19 21:08:51 +03:00
2017-11-03 20:34:41 +03:00
done :
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_next_extent ( ifp , & icur , & got ) ) {
* done = true ;
goto del_cursor ;
2017-10-19 21:07:11 +03:00
}
2017-10-19 21:07:34 +03:00
* next_fsb = got . br_startoff ;
2017-10-19 21:07:11 +03:00
del_cursor :
if ( cur )
2018-07-19 22:26:31 +03:00
xfs_btree_del_cursor ( cur , error ) ;
2017-10-19 21:07:11 +03:00
if ( logflags )
xfs_trans_log_inode ( tp , ip , logflags ) ;
return error ;
}
2018-06-22 09:26:57 +03:00
/* Make sure we won't be right-shifting an extent past the maximum bound. */
int
xfs_bmap_can_insert_extents (
struct xfs_inode * ip ,
xfs_fileoff_t off ,
xfs_fileoff_t shift )
{
struct xfs_bmbt_irec got ;
int is_empty ;
int error = 0 ;
ASSERT ( xfs_isilocked ( ip , XFS_IOLOCK_EXCL ) ) ;
if ( XFS_FORCED_SHUTDOWN ( ip - > i_mount ) )
return - EIO ;
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
error = xfs_bmap_last_extent ( NULL , ip , XFS_DATA_FORK , & got , & is_empty ) ;
if ( ! error & & ! is_empty & & got . br_startoff > = off & &
( ( got . br_startoff + shift ) & BMBT_STARTOFF_MASK ) < got . br_startoff )
error = - EINVAL ;
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
return error ;
}
2017-10-19 21:07:11 +03:00
int
xfs_bmap_insert_extents (
struct xfs_trans * tp ,
struct xfs_inode * ip ,
xfs_fileoff_t * next_fsb ,
xfs_fileoff_t offset_shift_fsb ,
bool * done ,
2018-07-12 08:26:27 +03:00
xfs_fileoff_t stop_fsb )
2017-10-19 21:07:11 +03:00
{
int whichfork = XFS_DATA_FORK ;
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_ifork * ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
struct xfs_btree_cur * cur = NULL ;
2017-10-19 21:08:52 +03:00
struct xfs_bmbt_irec got , next ;
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2017-10-19 21:07:34 +03:00
xfs_fileoff_t new_startoff ;
2017-10-19 21:07:11 +03:00
int error = 0 ;
int logflags = 0 ;
if ( unlikely ( XFS_TEST_ERROR (
( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ,
mp , XFS_ERRTAG_BMAPIFORMAT ) ) ) {
XFS_ERROR_REPORT ( __func__ , XFS_ERRLEVEL_LOW , mp ) ;
return - EFSCORRUPTED ;
}
if ( XFS_FORCED_SHUTDOWN ( mp ) )
return - EIO ;
ASSERT ( xfs_isilocked ( ip , XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL ) ) ;
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
error = xfs_iread_extents ( tp , ip , whichfork ) ;
if ( error )
return error ;
}
if ( ifp - > if_flags & XFS_IFBROOT ) {
cur = xfs_bmbt_init_cursor ( mp , tp , ip , whichfork ) ;
cur - > bc_private . b . flags = 0 ;
}
2015-03-25 07:08:56 +03:00
if ( * next_fsb = = NULLFSBLOCK ) {
2017-11-03 20:34:43 +03:00
xfs_iext_last ( ifp , & icur ) ;
if ( ! xfs_iext_get_extent ( ifp , & icur , & got ) | |
2017-10-19 21:08:52 +03:00
stop_fsb > got . br_startoff ) {
2017-10-19 21:07:11 +03:00
* done = true ;
2015-03-25 07:08:56 +03:00
goto del_cursor ;
}
2017-08-30 01:44:12 +03:00
} else {
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_lookup_extent ( ip , ifp , * next_fsb , & icur , & got ) ) {
2017-10-19 21:07:11 +03:00
* done = true ;
2017-08-30 01:44:12 +03:00
goto del_cursor ;
}
2015-03-25 07:08:56 +03:00
}
2017-11-28 05:23:32 +03:00
XFS_WANT_CORRUPTED_GOTO ( mp , ! isnullstartblock ( got . br_startblock ) ,
del_cursor ) ;
2015-03-25 07:08:56 +03:00
2017-10-19 21:08:52 +03:00
if ( stop_fsb > = got . br_startoff + got . br_blockcount ) {
2017-10-19 21:07:11 +03:00
error = - EIO ;
goto del_cursor ;
2015-03-25 07:08:56 +03:00
}
2017-10-19 21:07:34 +03:00
new_startoff = got . br_startoff + offset_shift_fsb ;
2017-11-03 20:34:43 +03:00
if ( xfs_iext_peek_next_extent ( ifp , & icur , & next ) ) {
2017-10-19 21:07:34 +03:00
if ( new_startoff + got . br_blockcount > next . br_startoff ) {
error = - EINVAL ;
goto del_cursor ;
}
/*
* Unlike a left shift ( which involves a hole punch ) , a right
* shift does not modify extent neighbors in any way . We should
* never find mergeable extents in this scenario . Check anyways
* and warn if we encounter two extents that could be one .
*/
if ( xfs_bmse_can_merge ( & got , & next , offset_shift_fsb ) )
WARN_ON_ONCE ( 1 ) ;
}
2018-08-01 17:20:34 +03:00
error = xfs_bmap_shift_update_extent ( tp , ip , whichfork , & icur , & got ,
cur , & logflags , new_startoff ) ;
2017-10-19 21:07:10 +03:00
if ( error )
goto del_cursor ;
2017-10-19 21:08:52 +03:00
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_prev_extent ( ifp , & icur , & got ) | |
2017-10-19 21:08:52 +03:00
stop_fsb > = got . br_startoff + got . br_blockcount ) {
2017-10-19 21:07:11 +03:00
* done = true ;
2017-10-19 21:07:10 +03:00
goto del_cursor ;
2014-02-24 03:58:19 +04:00
}
2017-10-19 21:07:10 +03:00
* next_fsb = got . br_startoff ;
2014-02-24 03:58:19 +04:00
del_cursor :
if ( cur )
2018-07-19 22:26:31 +03:00
xfs_btree_del_cursor ( cur , error ) ;
2014-09-02 06:12:53 +04:00
if ( logflags )
xfs_trans_log_inode ( tp , ip , logflags ) ;
2014-02-24 03:58:19 +04:00
return error ;
}
2015-03-25 07:08:56 +03:00
/*
2017-11-03 20:34:43 +03:00
* Splits an extent into two extents at split_fsb block such that it is the
* first block of the current_ext . @ ext is a target extent to be split .
* @ split_fsb is a block where the extents is split . If split_fsb lies in a
* hole or the first block of extents , just return 0.
2015-03-25 07:08:56 +03:00
*/
STATIC int
xfs_bmap_split_extent_at (
struct xfs_trans * tp ,
struct xfs_inode * ip ,
2018-07-12 08:26:27 +03:00
xfs_fileoff_t split_fsb )
2015-03-25 07:08:56 +03:00
{
int whichfork = XFS_DATA_FORK ;
struct xfs_btree_cur * cur = NULL ;
struct xfs_bmbt_irec got ;
struct xfs_bmbt_irec new ; /* split extent */
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_ifork * ifp ;
xfs_fsblock_t gotblkcnt ; /* new block count for got */
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ;
2015-03-25 07:08:56 +03:00
int error = 0 ;
int logflags = 0 ;
int i = 0 ;
if ( unlikely ( XFS_TEST_ERROR (
( XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_EXTENTS & &
XFS_IFORK_FORMAT ( ip , whichfork ) ! = XFS_DINODE_FMT_BTREE ) ,
2017-06-21 03:54:47 +03:00
mp , XFS_ERRTAG_BMAPIFORMAT ) ) ) {
2015-03-25 07:08:56 +03:00
XFS_ERROR_REPORT ( " xfs_bmap_split_extent_at " ,
XFS_ERRLEVEL_LOW , mp ) ;
return - EFSCORRUPTED ;
}
if ( XFS_FORCED_SHUTDOWN ( mp ) )
return - EIO ;
ifp = XFS_IFORK_PTR ( ip , whichfork ) ;
if ( ! ( ifp - > if_flags & XFS_IFEXTENTS ) ) {
/* Read in all the extents */
error = xfs_iread_extents ( tp , ip , whichfork ) ;
if ( error )
return error ;
}
/*
2017-08-30 01:44:13 +03:00
* If there are not extents , or split_fsb lies in a hole we are done .
2015-03-25 07:08:56 +03:00
*/
2017-11-03 20:34:43 +03:00
if ( ! xfs_iext_lookup_extent ( ip , ifp , split_fsb , & icur , & got ) | |
2017-08-30 01:44:13 +03:00
got . br_startoff > = split_fsb )
2015-03-25 07:08:56 +03:00
return 0 ;
gotblkcnt = split_fsb - got . br_startoff ;
new . br_startoff = split_fsb ;
new . br_startblock = got . br_startblock + gotblkcnt ;
new . br_blockcount = got . br_blockcount - gotblkcnt ;
new . br_state = got . br_state ;
if ( ifp - > if_flags & XFS_IFBROOT ) {
cur = xfs_bmbt_init_cursor ( mp , tp , ip , whichfork ) ;
cur - > bc_private . b . flags = 0 ;
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & got , & i ) ;
2015-03-25 07:08:56 +03:00
if ( error )
goto del_cursor ;
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , del_cursor ) ;
}
got . br_blockcount = gotblkcnt ;
2017-11-03 20:34:43 +03:00
xfs_iext_update_extent ( ip , xfs_bmap_fork_to_state ( whichfork ) , & icur ,
& got ) ;
2015-03-25 07:08:56 +03:00
logflags = XFS_ILOG_CORE ;
if ( cur ) {
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_update ( cur , & got ) ;
2015-03-25 07:08:56 +03:00
if ( error )
goto del_cursor ;
} else
logflags | = XFS_ILOG_DEXT ;
/* Add new extent */
2017-11-03 20:34:43 +03:00
xfs_iext_next ( ifp , & icur ) ;
2017-11-03 20:34:46 +03:00
xfs_iext_insert ( ip , & icur , & new , 0 ) ;
2015-03-25 07:08:56 +03:00
XFS_IFORK_NEXT_SET ( ip , whichfork ,
XFS_IFORK_NEXTENTS ( ip , whichfork ) + 1 ) ;
if ( cur ) {
2017-10-18 00:16:26 +03:00
error = xfs_bmbt_lookup_eq ( cur , & new , & i ) ;
2015-03-25 07:08:56 +03:00
if ( error )
goto del_cursor ;
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 0 , del_cursor ) ;
error = xfs_btree_insert ( cur , & i ) ;
if ( error )
goto del_cursor ;
XFS_WANT_CORRUPTED_GOTO ( mp , i = = 1 , del_cursor ) ;
}
/*
* Convert to a btree if necessary .
*/
if ( xfs_bmap_needs_btree ( ip , whichfork ) ) {
int tmp_logflags ; /* partial log flag return val */
ASSERT ( cur = = NULL ) ;
2018-07-12 08:26:29 +03:00
error = xfs_bmap_extents_to_btree ( tp , ip , & cur , 0 ,
& tmp_logflags , whichfork ) ;
2015-03-25 07:08:56 +03:00
logflags | = tmp_logflags ;
}
del_cursor :
if ( cur ) {
cur - > bc_private . b . allocated = 0 ;
2018-07-19 22:26:31 +03:00
xfs_btree_del_cursor ( cur , error ) ;
2015-03-25 07:08:56 +03:00
}
if ( logflags )
xfs_trans_log_inode ( tp , ip , logflags ) ;
return error ;
}
int
xfs_bmap_split_extent (
struct xfs_inode * ip ,
xfs_fileoff_t split_fsb )
{
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_trans * tp ;
int error ;
2016-04-06 02:19:55 +03:00
error = xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_write ,
XFS_DIOSTRAT_SPACE_RES ( mp , 0 ) , 0 , 0 , & tp ) ;
if ( error )
2015-03-25 07:08:56 +03:00
return error ;
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
xfs_trans_ijoin ( tp , ip , XFS_ILOCK_EXCL ) ;
2018-07-12 08:26:27 +03:00
error = xfs_bmap_split_extent_at ( tp , ip , split_fsb ) ;
2015-03-25 07:08:56 +03:00
if ( error )
goto out ;
2015-06-04 06:48:08 +03:00
return xfs_trans_commit ( tp ) ;
2015-03-25 07:08:56 +03:00
out :
2015-06-04 06:47:56 +03:00
xfs_trans_cancel ( tp ) ;
2015-03-25 07:08:56 +03:00
return error ;
}
2016-10-03 19:11:28 +03:00
/* Deferred mapping is only for real extents in the data fork. */
static bool
xfs_bmap_is_update_needed (
struct xfs_bmbt_irec * bmap )
{
return bmap - > br_startblock ! = HOLESTARTBLOCK & &
bmap - > br_startblock ! = DELAYSTARTBLOCK ;
}
/* Record a bmap intent. */
static int
__xfs_bmap_add (
2018-08-01 17:20:34 +03:00
struct xfs_trans * tp ,
2016-10-03 19:11:28 +03:00
enum xfs_bmap_intent_type type ,
struct xfs_inode * ip ,
int whichfork ,
struct xfs_bmbt_irec * bmap )
{
struct xfs_bmap_intent * bi ;
2018-08-01 17:20:34 +03:00
trace_xfs_bmap_defer ( tp - > t_mountp ,
XFS_FSB_TO_AGNO ( tp - > t_mountp , bmap - > br_startblock ) ,
2016-10-03 19:11:28 +03:00
type ,
2018-08-01 17:20:34 +03:00
XFS_FSB_TO_AGBNO ( tp - > t_mountp , bmap - > br_startblock ) ,
2016-10-03 19:11:28 +03:00
ip - > i_ino , whichfork ,
bmap - > br_startoff ,
bmap - > br_blockcount ,
bmap - > br_state ) ;
bi = kmem_alloc ( sizeof ( struct xfs_bmap_intent ) , KM_SLEEP | KM_NOFS ) ;
INIT_LIST_HEAD ( & bi - > bi_list ) ;
bi - > bi_type = type ;
bi - > bi_owner = ip ;
bi - > bi_whichfork = whichfork ;
bi - > bi_bmap = * bmap ;
2018-08-01 17:20:34 +03:00
xfs_defer_add ( tp , XFS_DEFER_OPS_TYPE_BMAP , & bi - > bi_list ) ;
2016-10-03 19:11:28 +03:00
return 0 ;
}
/* Map an extent into a file. */
int
xfs_bmap_map_extent (
2018-08-01 17:20:34 +03:00
struct xfs_trans * tp ,
2016-10-03 19:11:28 +03:00
struct xfs_inode * ip ,
struct xfs_bmbt_irec * PREV )
{
if ( ! xfs_bmap_is_update_needed ( PREV ) )
return 0 ;
2018-08-01 17:20:34 +03:00
return __xfs_bmap_add ( tp , XFS_BMAP_MAP , ip , XFS_DATA_FORK , PREV ) ;
2016-10-03 19:11:28 +03:00
}
/* Unmap an extent out of a file. */
int
xfs_bmap_unmap_extent (
2018-08-01 17:20:34 +03:00
struct xfs_trans * tp ,
2016-10-03 19:11:28 +03:00
struct xfs_inode * ip ,
struct xfs_bmbt_irec * PREV )
{
if ( ! xfs_bmap_is_update_needed ( PREV ) )
return 0 ;
2018-08-01 17:20:34 +03:00
return __xfs_bmap_add ( tp , XFS_BMAP_UNMAP , ip , XFS_DATA_FORK , PREV ) ;
2016-10-03 19:11:28 +03:00
}
/*
* Process one of the deferred bmap operations . We pass back the
* btree cursor to maintain our lock on the bmapbt between calls .
*/
int
xfs_bmap_finish_one (
struct xfs_trans * tp ,
struct xfs_inode * ip ,
enum xfs_bmap_intent_type type ,
int whichfork ,
xfs_fileoff_t startoff ,
xfs_fsblock_t startblock ,
2017-06-15 07:25:57 +03:00
xfs_filblks_t * blockcount ,
2016-10-03 19:11:28 +03:00
xfs_exntst_t state )
{
2017-06-15 07:25:57 +03:00
int error = 0 ;
2016-10-03 19:11:28 +03:00
2018-07-12 08:26:23 +03:00
ASSERT ( tp - > t_firstblock = = NULLFSBLOCK ) ;
2017-07-18 00:30:51 +03:00
2016-10-03 19:11:28 +03:00
trace_xfs_bmap_deferred ( tp - > t_mountp ,
XFS_FSB_TO_AGNO ( tp - > t_mountp , startblock ) , type ,
XFS_FSB_TO_AGBNO ( tp - > t_mountp , startblock ) ,
2017-06-15 07:25:57 +03:00
ip - > i_ino , whichfork , startoff , * blockcount , state ) ;
2016-10-03 19:11:28 +03:00
2017-04-12 02:45:53 +03:00
if ( WARN_ON_ONCE ( whichfork ! = XFS_DATA_FORK ) )
2016-10-03 19:11:28 +03:00
return - EFSCORRUPTED ;
if ( XFS_TEST_ERROR ( false , tp - > t_mountp ,
2017-06-21 03:54:47 +03:00
XFS_ERRTAG_BMAP_FINISH_ONE ) )
2016-10-03 19:11:28 +03:00
return - EIO ;
switch ( type ) {
case XFS_BMAP_MAP :
2017-06-15 07:25:57 +03:00
error = xfs_bmapi_remap ( tp , ip , startoff , * blockcount ,
2018-07-12 08:26:14 +03:00
startblock , 0 ) ;
2017-06-15 07:25:57 +03:00
* blockcount = 0 ;
2016-10-03 19:11:28 +03:00
break ;
case XFS_BMAP_UNMAP :
2017-06-15 07:25:57 +03:00
error = __xfs_bunmapi ( tp , ip , startoff , blockcount ,
2018-07-12 08:26:25 +03:00
XFS_BMAPI_REMAP , 1 ) ;
2016-10-03 19:11:28 +03:00
break ;
default :
ASSERT ( 0 ) ;
error = - EFSCORRUPTED ;
}
return error ;
}
2018-03-23 20:06:52 +03:00
/* Check that an inode's extent does not have invalid flags or bad ranges. */
xfs_failaddr_t
xfs_bmap_validate_extent (
struct xfs_inode * ip ,
int whichfork ,
struct xfs_bmbt_irec * irec )
{
struct xfs_mount * mp = ip - > i_mount ;
xfs_fsblock_t endfsb ;
bool isrt ;
isrt = XFS_IS_REALTIME_INODE ( ip ) ;
endfsb = irec - > br_startblock + irec - > br_blockcount - 1 ;
if ( isrt ) {
if ( ! xfs_verify_rtbno ( mp , irec - > br_startblock ) )
return __this_address ;
if ( ! xfs_verify_rtbno ( mp , endfsb ) )
return __this_address ;
} else {
if ( ! xfs_verify_fsbno ( mp , irec - > br_startblock ) )
return __this_address ;
if ( ! xfs_verify_fsbno ( mp , endfsb ) )
return __this_address ;
if ( XFS_FSB_TO_AGNO ( mp , irec - > br_startblock ) ! =
XFS_FSB_TO_AGNO ( mp , endfsb ) )
return __this_address ;
}
2018-10-18 09:18:58 +03:00
if ( irec - > br_state ! = XFS_EXT_NORM & & whichfork ! = XFS_DATA_FORK )
return __this_address ;
2018-03-23 20:06:52 +03:00
return NULL ;
}