2018-06-06 05:42:14 +03:00
// SPDX-License-Identifier: GPL-2.0
2005-04-17 02:20:36 +04:00
/*
2006-06-09 08:48:12 +04:00
* Copyright ( c ) 2000 - 2006 Silicon Graphics , Inc .
2005-11-02 06:58:39 +03:00
* All Rights Reserved .
2005-04-17 02:20:36 +04:00
*/
# ifndef __XFS_BMAP_H__
# define __XFS_BMAP_H__
struct getbmap ;
struct xfs_bmbt_irec ;
2006-03-14 05:29:52 +03:00
struct xfs_ifork ;
2005-04-17 02:20:36 +04:00
struct xfs_inode ;
struct xfs_mount ;
struct xfs_trans ;
2007-11-23 08:28:09 +03:00
extern kmem_zone_t * xfs_bmap_free_item_zone ;
2015-01-09 02:46:49 +03:00
/*
* Argument structure for xfs_bmap_alloc .
*/
struct xfs_bmalloca {
xfs_fsblock_t * firstblock ; /* i/o first block allocated */
struct xfs_trans * tp ; /* transaction pointer */
struct xfs_inode * ip ; /* incore inode pointer */
struct xfs_bmbt_irec prev ; /* extent before the new one */
struct xfs_bmbt_irec got ; /* extent after, or delayed */
xfs_fileoff_t offset ; /* offset in file filling in */
xfs_extlen_t length ; /* i/o length asked/allocated */
xfs_fsblock_t blkno ; /* starting block of new extent */
struct xfs_btree_cur * cur ; /* btree cursor */
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor icur ; /* incore extent cursor */
2015-01-09 02:46:49 +03:00
int nallocs ; /* number of extents alloc'd */
int logflags ; /* flags for transaction logging */
xfs_extlen_t total ; /* total blocks needed for xaction */
xfs_extlen_t minlen ; /* minimum allocation size (blocks) */
xfs_extlen_t minleft ; /* amount must be left after alloc */
bool eof ; /* set if allocating past last extent */
bool wasdel ; /* replacing a delayed allocation */
bool aeof ; /* allocated space at eof */
bool conv ; /* overwriting unwritten extents */
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
int datatype ; /* data type being allocated */
2015-01-09 02:46:49 +03:00
int flags ;
} ;
2005-04-17 02:20:36 +04:00
/*
* List of extents to be free " later " .
* The list is kept sorted on xbf_startblock .
*/
2016-08-03 04:18:10 +03:00
struct xfs_extent_free_item
2005-04-17 02:20:36 +04:00
{
2016-08-03 04:18:10 +03:00
xfs_fsblock_t xefi_startblock ; /* starting fs block number */
xfs_extlen_t xefi_blockcount ; /* number of blocks in extent */
struct list_head xefi_list ;
2016-08-03 04:33:42 +03:00
struct xfs_owner_info xefi_oinfo ; /* extent owner */
2018-05-09 18:45:04 +03:00
bool xefi_skip_discard ;
2016-06-21 04:53:28 +03:00
} ;
2005-04-17 02:20:36 +04:00
# define XFS_BMAP_MAX_NMAP 4
/*
2011-09-19 00:40:52 +04:00
* Flags for xfs_bmapi_ *
2005-04-17 02:20:36 +04:00
*/
2011-09-19 00:40:52 +04:00
# define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */
# define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */
# define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */
# define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */
# define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */
2010-08-24 06:02:11 +04:00
/*
* unwritten extent conversion - this needs write cache flushing and no additional
* allocation alignments . When specified with XFS_BMAPI_PREALLOC it converts
* from written to unwritten , otherwise convert from unwritten to written .
*/
2011-09-19 00:40:52 +04:00
# define XFS_BMAPI_CONVERT 0x040
2005-04-17 02:20:36 +04:00
2015-11-03 04:27:22 +03:00
/*
* allocate zeroed extents - this requires all newly allocated user data extents
* to be initialised to zero . It will be ignored if XFS_BMAPI_METADATA is set .
* Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found
* during the allocation range to zeroed written extents .
*/
# define XFS_BMAPI_ZERO 0x080
2016-10-03 19:11:27 +03:00
/*
* Map the inode offset to the block given in ap - > firstblock . Primarily
* used for reflink . The range must be in a hole , and this flag cannot be
* turned on with PREALLOC or CONVERT , and cannot be used on the attr fork .
2016-10-03 19:11:27 +03:00
*
* For bunmapi , this flag unmaps the range without adjusting quota , reducing
* refcount , or freeing the blocks .
2016-10-03 19:11:27 +03:00
*/
# define XFS_BMAPI_REMAP 0x100
2016-10-03 19:11:32 +03:00
/* Map something in the CoW fork. */
# define XFS_BMAPI_COWFORK 0x200
2017-01-20 20:31:54 +03:00
/* Only convert delalloc space, don't allocate entirely new extents */
# define XFS_BMAPI_DELALLOC 0x400
2017-11-03 20:34:44 +03:00
/* Only convert unwritten extents, don't allocate new blocks */
# define XFS_BMAPI_CONVERT_ONLY 0x800
2018-05-09 18:45:04 +03:00
/* Skip online discard of freed extents */
# define XFS_BMAPI_NODISCARD 0x1000
2018-05-09 20:02:32 +03:00
/* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
# define XFS_BMAPI_NORMAP 0x2000
2009-12-15 02:14:59 +03:00
# define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE , " ENTIRE " } , \
{ XFS_BMAPI_METADATA , " METADATA " } , \
{ XFS_BMAPI_ATTRFORK , " ATTRFORK " } , \
{ XFS_BMAPI_PREALLOC , " PREALLOC " } , \
{ XFS_BMAPI_CONTIG , " CONTIG " } , \
2015-11-03 04:27:22 +03:00
{ XFS_BMAPI_CONVERT , " CONVERT " } , \
2016-10-03 19:11:27 +03:00
{ XFS_BMAPI_ZERO , " ZERO " } , \
2016-10-03 19:11:32 +03:00
{ XFS_BMAPI_REMAP , " REMAP " } , \
2017-01-20 20:31:54 +03:00
{ XFS_BMAPI_COWFORK , " COWFORK " } , \
2017-11-03 20:34:44 +03:00
{ XFS_BMAPI_DELALLOC , " DELALLOC " } , \
2018-05-09 18:45:04 +03:00
{ XFS_BMAPI_CONVERT_ONLY , " CONVERT_ONLY " } , \
2018-05-09 20:02:32 +03:00
{ XFS_BMAPI_NODISCARD , " NODISCARD " } , \
{ XFS_BMAPI_NORMAP , " NORMAP " }
2009-12-15 02:14:59 +03:00
2005-11-02 06:38:42 +03:00
static inline int xfs_bmapi_aflag ( int w )
{
2016-10-03 19:11:32 +03:00
return ( w = = XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK :
( w = = XFS_COW_FORK ? XFS_BMAPI_COWFORK : 0 ) ) ;
}
static inline int xfs_bmapi_whichfork ( int bmapi_flags )
{
if ( bmapi_flags & XFS_BMAPI_COWFORK )
return XFS_COW_FORK ;
else if ( bmapi_flags & XFS_BMAPI_ATTRFORK )
return XFS_ATTR_FORK ;
return XFS_DATA_FORK ;
2005-11-02 06:38:42 +03:00
}
2005-04-17 02:20:36 +04:00
/*
* Special values for xfs_bmbt_irec_t br_startblock field .
*/
# define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL)
# define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL)
2009-11-25 03:00:19 +03:00
/*
* Flags for xfs_bmap_add_extent * .
*/
# define BMAP_LEFT_CONTIG (1 << 0)
# define BMAP_RIGHT_CONTIG (1 << 1)
# define BMAP_LEFT_FILLING (1 << 2)
# define BMAP_RIGHT_FILLING (1 << 3)
# define BMAP_LEFT_DELAY (1 << 4)
# define BMAP_RIGHT_DELAY (1 << 5)
# define BMAP_LEFT_VALID (1 << 6)
# define BMAP_RIGHT_VALID (1 << 7)
2009-11-25 03:00:21 +03:00
# define BMAP_ATTRFORK (1 << 8)
2016-10-03 19:11:32 +03:00
# define BMAP_COWFORK (1 << 9)
2009-11-25 03:00:19 +03:00
2009-12-15 02:14:59 +03:00
# define XFS_BMAP_EXT_FLAGS \
{ BMAP_LEFT_CONTIG , " LC " } , \
{ BMAP_RIGHT_CONTIG , " RC " } , \
{ BMAP_LEFT_FILLING , " LF " } , \
{ BMAP_RIGHT_FILLING , " RF " } , \
2016-10-03 19:11:32 +03:00
{ BMAP_ATTRFORK , " ATTR " } , \
{ BMAP_COWFORK , " COW " }
2005-04-17 02:20:36 +04:00
2014-02-24 03:58:19 +04:00
2017-03-29 00:53:35 +03:00
/*
* Return true if the extent is a real , allocated extent , or false if it is a
* delayed allocation , and unwritten extent or a hole .
*/
static inline bool xfs_bmap_is_real_extent ( struct xfs_bmbt_irec * irec )
{
return irec - > br_state ! = XFS_EXT_UNWRITTEN & &
irec - > br_startblock ! = HOLESTARTBLOCK & &
irec - > br_startblock ! = DELAYSTARTBLOCK & &
! isnullstartblock ( irec - > br_startblock ) ;
}
2016-10-20 07:51:50 +03:00
void xfs_trim_extent ( struct xfs_bmbt_irec * irec , xfs_fileoff_t bno ,
xfs_filblks_t len ) ;
xfs: trim writepage mapping to within eof
The writeback rework in commit fbcc02561359 ("xfs: Introduce
writeback context for writepages") introduced a subtle change in
behavior with regard to the block mapping used across the
->writepages() sequence. The previous xfs_cluster_write() code would
only flush pages up to EOF at the time of the writepage, thus
ensuring that any pages due to file-extending writes would be
handled on a separate cycle and with a new, updated block mapping.
The updated code establishes a block mapping in xfs_writepage_map()
that could extend beyond EOF if the file has post-eof preallocation.
Because we now use the generic writeback infrastructure and pass the
cached mapping to each writepage call, there is no implicit EOF
limit in place. If eofblocks trimming occurs during ->writepages(),
any post-eof portion of the cached mapping becomes invalid. The
eofblocks code has no means to serialize against writeback because
there are no pages associated with post-eof blocks. Therefore if an
eofblocks trim occurs and is followed by a file-extending buffered
write, not only has the mapping become invalid, but we could end up
writing a page to disk based on the invalid mapping.
Consider the following sequence of events:
- A buffered write creates a delalloc extent and post-eof
speculative preallocation.
- Writeback starts and on the first writepage cycle, the delalloc
extent is converted to real blocks (including the post-eof blocks)
and the mapping is cached.
- The file is closed and xfs_release() trims post-eof blocks. The
cached writeback mapping is now invalid.
- Another buffered write appends the file with a delalloc extent.
- The concurrent writeback cycle picks up the just written page
because the writeback range end is LLONG_MAX. xfs_writepage_map()
attributes it to the (now invalid) cached mapping and writes the
data to an incorrect location on disk (and where the file offset is
still backed by a delalloc extent).
This problem is reproduced by xfstests test generic/464, which
triggers racing writes, appends, open/closes and writeback requests.
To address this problem, trim the mapping used during writeback to
within EOF when the mapping is validated. This ensures the mapping
is revalidated for any pages encountered beyond EOF as of the time
the current mapping was cached or last validated.
Reported-by: Eryu Guan <eguan@redhat.com>
Diagnosed-by: Eryu Guan <eguan@redhat.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-10-13 19:47:46 +03:00
void xfs_trim_extent_eof ( struct xfs_bmbt_irec * , struct xfs_inode * ) ;
2011-09-19 00:41:07 +04:00
int xfs_bmap_add_attrfork ( struct xfs_inode * ip , int size , int rsvd ) ;
2013-07-10 01:04:00 +04:00
void xfs_bmap_local_to_extents_empty ( struct xfs_inode * ip , int whichfork ) ;
2018-05-09 18:45:04 +03:00
void __xfs_bmap_add_free ( struct xfs_mount * mp , struct xfs_defer_ops * dfops ,
2016-08-03 04:33:42 +03:00
xfs_fsblock_t bno , xfs_filblks_t len ,
2018-05-09 18:45:04 +03:00
struct xfs_owner_info * oinfo , bool skip_discard ) ;
2011-09-19 00:41:07 +04:00
void xfs_bmap_compute_maxlevels ( struct xfs_mount * mp , int whichfork ) ;
int xfs_bmap_first_unused ( struct xfs_trans * tp , struct xfs_inode * ip ,
xfs_extlen_t len , xfs_fileoff_t * unused , int whichfork ) ;
int xfs_bmap_last_before ( struct xfs_trans * tp , struct xfs_inode * ip ,
xfs_fileoff_t * last_block , int whichfork ) ;
2014-04-14 12:58:05 +04:00
int xfs_bmap_last_offset ( struct xfs_inode * ip , xfs_fileoff_t * unused ,
int whichfork ) ;
2011-09-19 00:41:07 +04:00
int xfs_bmap_one_block ( struct xfs_inode * ip , int whichfork ) ;
2011-09-19 00:40:45 +04:00
int xfs_bmapi_read ( struct xfs_inode * ip , xfs_fileoff_t bno ,
xfs_filblks_t len , struct xfs_bmbt_irec * mval ,
int * nmap , int flags ) ;
2011-09-19 00:40:52 +04:00
int xfs_bmapi_write ( struct xfs_trans * tp , struct xfs_inode * ip ,
xfs_fileoff_t bno , xfs_filblks_t len , int flags ,
xfs_fsblock_t * firstblock , xfs_extlen_t total ,
2018-07-12 08:26:12 +03:00
struct xfs_bmbt_irec * mval , int * nmap ) ;
2016-10-03 19:11:29 +03:00
int __xfs_bunmapi ( struct xfs_trans * tp , struct xfs_inode * ip ,
xfs_fileoff_t bno , xfs_filblks_t * rlen , int flags ,
2018-07-12 08:26:13 +03:00
xfs_extnum_t nexts , xfs_fsblock_t * firstblock ) ;
2011-09-19 00:41:07 +04:00
int xfs_bunmapi ( struct xfs_trans * tp , struct xfs_inode * ip ,
xfs_fileoff_t bno , xfs_filblks_t len , int flags ,
xfs_extnum_t nexts , xfs_fsblock_t * firstblock ,
2018-07-12 08:26:13 +03:00
int * done ) ;
2016-10-20 07:54:14 +03:00
int xfs_bmap_del_extent_delay ( struct xfs_inode * ip , int whichfork ,
2017-11-03 20:34:43 +03:00
struct xfs_iext_cursor * cur , struct xfs_bmbt_irec * got ,
struct xfs_bmbt_irec * del ) ;
void xfs_bmap_del_extent_cow ( struct xfs_inode * ip ,
struct xfs_iext_cursor * cur , struct xfs_bmbt_irec * got ,
2016-10-20 07:54:14 +03:00
struct xfs_bmbt_irec * del ) ;
2011-09-19 00:41:07 +04:00
uint xfs_default_attroffset ( struct xfs_inode * ip ) ;
2017-10-19 21:07:11 +03:00
int xfs_bmap_collapse_extents ( struct xfs_trans * tp , struct xfs_inode * ip ,
2015-03-25 07:08:56 +03:00
xfs_fileoff_t * next_fsb , xfs_fileoff_t offset_shift_fsb ,
2018-07-12 08:26:15 +03:00
bool * done , xfs_fsblock_t * firstblock ) ;
2018-06-22 09:26:57 +03:00
int xfs_bmap_can_insert_extents ( struct xfs_inode * ip , xfs_fileoff_t off ,
xfs_fileoff_t shift ) ;
2017-10-19 21:07:11 +03:00
int xfs_bmap_insert_extents ( struct xfs_trans * tp , struct xfs_inode * ip ,
xfs_fileoff_t * next_fsb , xfs_fileoff_t offset_shift_fsb ,
2018-07-12 08:26:15 +03:00
bool * done , xfs_fileoff_t stop_fsb , xfs_fsblock_t * firstblock ) ;
2015-03-25 07:08:56 +03:00
int xfs_bmap_split_extent ( struct xfs_inode * ip , xfs_fileoff_t split_offset ) ;
2016-10-03 19:11:32 +03:00
int xfs_bmapi_reserve_delalloc ( struct xfs_inode * ip , int whichfork ,
2016-11-28 06:57:42 +03:00
xfs_fileoff_t off , xfs_filblks_t len , xfs_filblks_t prealloc ,
2017-11-03 20:34:43 +03:00
struct xfs_bmbt_irec * got , struct xfs_iext_cursor * cur ,
int eof ) ;
2009-03-29 21:26:46 +04:00
2018-05-09 18:45:04 +03:00
static inline void
xfs_bmap_add_free (
struct xfs_mount * mp ,
struct xfs_defer_ops * dfops ,
xfs_fsblock_t bno ,
xfs_filblks_t len ,
struct xfs_owner_info * oinfo )
{
__xfs_bmap_add_free ( mp , dfops , bno , len , oinfo , false ) ;
}
2016-10-03 19:11:26 +03:00
enum xfs_bmap_intent_type {
XFS_BMAP_MAP = 1 ,
XFS_BMAP_UNMAP ,
} ;
struct xfs_bmap_intent {
struct list_head bi_list ;
enum xfs_bmap_intent_type bi_type ;
struct xfs_inode * bi_owner ;
int bi_whichfork ;
struct xfs_bmbt_irec bi_bmap ;
} ;
2016-10-03 19:11:28 +03:00
int xfs_bmap_finish_one ( struct xfs_trans * tp , struct xfs_defer_ops * dfops ,
struct xfs_inode * ip , enum xfs_bmap_intent_type type ,
int whichfork , xfs_fileoff_t startoff , xfs_fsblock_t startblock ,
2017-06-15 07:25:57 +03:00
xfs_filblks_t * blockcount , xfs_exntst_t state ) ;
2016-10-03 19:11:28 +03:00
int xfs_bmap_map_extent ( struct xfs_mount * mp , struct xfs_defer_ops * dfops ,
struct xfs_inode * ip , struct xfs_bmbt_irec * imap ) ;
int xfs_bmap_unmap_extent ( struct xfs_mount * mp , struct xfs_defer_ops * dfops ,
struct xfs_inode * ip , struct xfs_bmbt_irec * imap ) ;
2017-10-19 21:02:29 +03:00
static inline int xfs_bmap_fork_to_state ( int whichfork )
{
switch ( whichfork ) {
case XFS_ATTR_FORK :
return BMAP_ATTRFORK ;
case XFS_COW_FORK :
return BMAP_COWFORK ;
default :
return 0 ;
}
}
2018-03-23 20:06:52 +03:00
xfs_failaddr_t xfs_bmap_validate_extent ( struct xfs_inode * ip , int whichfork ,
struct xfs_bmbt_irec * irec ) ;
2018-05-14 16:34:34 +03:00
int xfs_bmapi_remap ( struct xfs_trans * tp , struct xfs_inode * ip ,
xfs_fileoff_t bno , xfs_filblks_t len , xfs_fsblock_t startblock ,
2018-07-12 08:26:14 +03:00
int flags ) ;
2018-05-14 16:34:34 +03:00
2005-04-17 02:20:36 +04:00
# endif /* __XFS_BMAP_H__ */