2018-06-06 05:42:14 +03:00
// SPDX-License-Identifier: GPL-2.0
2005-04-17 02:20:36 +04:00
/*
2005-11-02 06:58:39 +03:00
* Copyright ( c ) 2003 - 2005 Silicon Graphics , Inc .
* All Rights Reserved .
2005-04-17 02:20:36 +04:00
*/
# ifndef __XFS_IOMAP_H__
# define __XFS_IOMAP_H__
2016-06-21 02:53:44 +03:00
# include <linux/iomap.h>
2005-04-17 02:20:36 +04:00
struct xfs_inode ;
struct xfs_bmbt_irec ;
2019-10-30 22:24:59 +03:00
int xfs_iomap_write_direct ( struct xfs_inode * ip , xfs_fileoff_t offset_fsb ,
2021-11-29 13:21:58 +03:00
xfs_fileoff_t count_fsb , unsigned int flags ,
2022-11-29 01:09:17 +03:00
struct xfs_bmbt_irec * imap , u64 * sequence ) ;
2017-09-21 21:26:18 +03:00
int xfs_iomap_write_unwritten ( struct xfs_inode * , xfs_off_t , xfs_off_t , bool ) ;
2019-10-30 22:24:59 +03:00
xfs_fileoff_t xfs_iomap_eof_align_last_fsb ( struct xfs_inode * ip ,
xfs_fileoff_t end_fsb ) ;
2005-04-17 02:20:36 +04:00
2022-11-29 01:09:17 +03:00
u64 xfs_iomap_inode_sequence ( struct xfs_inode * ip , u16 iomap_flags ) ;
2021-11-29 13:21:57 +03:00
int xfs_bmbt_to_iomap ( struct xfs_inode * ip , struct iomap * iomap ,
struct xfs_bmbt_irec * imap , unsigned int mapping_flags ,
2022-11-29 01:09:17 +03:00
u16 iomap_flags , u64 sequence_cookie ) ;
2016-06-21 02:52:47 +03:00
2021-11-29 13:21:49 +03:00
int xfs_zero_range ( struct xfs_inode * ip , loff_t pos , loff_t len ,
bool * did_zero ) ;
int xfs_truncate_page ( struct xfs_inode * ip , loff_t pos , bool * did_zero ) ;
2017-02-06 21:42:26 +03:00
static inline xfs_filblks_t
xfs_aligned_fsb_count (
xfs_fileoff_t offset_fsb ,
xfs_filblks_t count_fsb ,
xfs_extlen_t extsz )
{
if ( extsz ) {
xfs_extlen_t align ;
2018-06-08 19:54:22 +03:00
div_u64_rem ( offset_fsb , extsz , & align ) ;
2017-02-06 21:42:26 +03:00
if ( align )
count_fsb + = align ;
2018-06-08 19:54:22 +03:00
div_u64_rem ( count_fsb , extsz , & align ) ;
2017-02-06 21:42:26 +03:00
if ( align )
count_fsb + = extsz - align ;
}
return count_fsb ;
}
2019-10-19 19:09:46 +03:00
extern const struct iomap_ops xfs_buffered_write_iomap_ops ;
xfs: write page faults in iomap are not buffered writes
When we reserve a delalloc region in xfs_buffered_write_iomap_begin,
we mark the iomap as IOMAP_F_NEW so that the the write context
understands that it allocated the delalloc region.
If we then fail that buffered write, xfs_buffered_write_iomap_end()
checks for the IOMAP_F_NEW flag and if it is set, it punches out
the unused delalloc region that was allocated for the write.
The assumption this code makes is that all buffered write operations
that can allocate space are run under an exclusive lock (i_rwsem).
This is an invalid assumption: page faults in mmap()d regions call
through this same function pair to map the file range being faulted
and this runs only holding the inode->i_mapping->invalidate_lock in
shared mode.
IOWs, we can have races between page faults and write() calls that
fail the nested page cache write operation that result in data loss.
That is, the failing iomap_end call will punch out the data that
the other racing iomap iteration brought into the page cache. This
can be reproduced with generic/34[46] if we arbitrarily fail page
cache copy-in operations from write() syscalls.
Code analysis tells us that the iomap_page_mkwrite() function holds
the already instantiated and uptodate folio locked across the iomap
mapping iterations. Hence the folio cannot be removed from memory
whilst we are mapping the range it covers, and as such we do not
care if the mapping changes state underneath the iomap iteration
loop:
1. if the folio is not already dirty, there is no writeback races
possible.
2. if we allocated the mapping (delalloc or unwritten), the folio
cannot already be dirty. See #1.
3. If the folio is already dirty, it must be up to date. As we hold
it locked, it cannot be reclaimed from memory. Hence we always
have valid data in the page cache while iterating the mapping.
4. Valid data in the page cache can exist when the underlying
mapping is DELALLOC, UNWRITTEN or WRITTEN. Having the mapping
change from DELALLOC->UNWRITTEN or UNWRITTEN->WRITTEN does not
change the data in the page - it only affects actions if we are
initialising a new page. Hence #3 applies and we don't care
about these extent map transitions racing with
iomap_page_mkwrite().
5. iomap_page_mkwrite() checks for page invalidation races
(truncate, hole punch, etc) after it locks the folio. We also
hold the mapping->invalidation_lock here, and hence the mapping
cannot change due to extent removal operations while we are
iterating the folio.
As such, filesystems that don't use bufferheads will never fail
the iomap_folio_mkwrite_iter() operation on the current mapping,
regardless of whether the iomap should be considered stale.
Further, the range we are asked to iterate is limited to the range
inside EOF that the folio spans. Hence, for XFS, we will only map
the exact range we are asked for, and we will only do speculative
preallocation with delalloc if we are mapping a hole at the EOF
page. The iterator will consume the entire range of the folio that
is within EOF, and anything beyond the EOF block cannot be accessed.
We never need to truncate this post-EOF speculative prealloc away in
the context of the iomap_page_mkwrite() iterator because if it
remains unused we'll remove it when the last reference to the inode
goes away.
Hence we don't actually need an .iomap_end() cleanup/error handling
path at all for iomap_page_mkwrite() for XFS. This means we can
separate the page fault processing from the complexity of the
.iomap_end() processing in the buffered write path. This also means
that the buffered write path will also be able to take the
mapping->invalidate_lock as necessary.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
2022-11-07 02:09:11 +03:00
extern const struct iomap_ops xfs_page_mkwrite_iomap_ops ;
2019-10-19 19:09:46 +03:00
extern const struct iomap_ops xfs_direct_write_iomap_ops ;
2019-10-19 19:09:45 +03:00
extern const struct iomap_ops xfs_read_iomap_ops ;
2019-02-18 20:38:46 +03:00
extern const struct iomap_ops xfs_seek_iomap_ops ;
2017-01-28 10:20:26 +03:00
extern const struct iomap_ops xfs_xattr_iomap_ops ;
2022-06-03 08:37:37 +03:00
extern const struct iomap_ops xfs_dax_write_iomap_ops ;
2016-06-21 02:53:44 +03:00
2005-04-17 02:20:36 +04:00
# endif /* __XFS_IOMAP_H__*/