2024-04-15 14:54:14 -07:00
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright ( c ) 2020 - 2024 Oracle . All Rights Reserved .
* Author : Darrick J . Wong < djwong @ kernel . org >
*/
# include "xfs.h"
# include "xfs_shared.h"
# include "xfs_format.h"
# include "xfs_log_format.h"
# include "xfs_trans_resv.h"
# include "xfs_mount.h"
# include "xfs_defer.h"
# include "xfs_inode.h"
# include "xfs_trans.h"
2024-04-15 14:54:18 -07:00
# include "xfs_quota.h"
# include "xfs_bmap_util.h"
# include "xfs_reflink.h"
# include "xfs_trace.h"
2024-04-15 14:54:14 -07:00
# include "xfs_exchrange.h"
2024-04-15 14:54:17 -07:00
# include "xfs_exchmaps.h"
2024-04-15 14:54:18 -07:00
# include "xfs_sb.h"
# include "xfs_icache.h"
# include "xfs_log.h"
2024-04-15 14:54:22 -07:00
# include "xfs_rtbitmap.h"
2024-04-15 14:54:14 -07:00
# include <linux/fsnotify.h>
2024-04-15 14:54:17 -07:00
/* Lock (and optionally join) two inodes for a file range exchange. */
void
xfs_exchrange_ilock (
struct xfs_trans * tp ,
struct xfs_inode * ip1 ,
struct xfs_inode * ip2 )
{
if ( ip1 ! = ip2 )
xfs_lock_two_inodes ( ip1 , XFS_ILOCK_EXCL ,
ip2 , XFS_ILOCK_EXCL ) ;
else
xfs_ilock ( ip1 , XFS_ILOCK_EXCL ) ;
if ( tp ) {
xfs_trans_ijoin ( tp , ip1 , 0 ) ;
if ( ip2 ! = ip1 )
xfs_trans_ijoin ( tp , ip2 , 0 ) ;
}
}
/* Unlock two inodes after a file range exchange operation. */
void
xfs_exchrange_iunlock (
struct xfs_inode * ip1 ,
struct xfs_inode * ip2 )
{
if ( ip2 ! = ip1 )
xfs_iunlock ( ip2 , XFS_ILOCK_EXCL ) ;
xfs_iunlock ( ip1 , XFS_ILOCK_EXCL ) ;
}
/*
* Estimate the resource requirements to exchange file contents between the two
* files . The caller is required to hold the IOLOCK and the MMAPLOCK and to
* have flushed both inodes ' pagecache and active direct - ios .
*/
int
xfs_exchrange_estimate (
struct xfs_exchmaps_req * req )
{
int error ;
xfs_exchrange_ilock ( NULL , req - > ip1 , req - > ip2 ) ;
error = xfs_exchmaps_estimate ( req ) ;
xfs_exchrange_iunlock ( req - > ip1 , req - > ip2 ) ;
return error ;
}
2024-04-15 14:54:18 -07:00
# define QRETRY_IP1 (0x1)
# define QRETRY_IP2 (0x2)
/*
* Obtain a quota reservation to make sure we don ' t hit EDQUOT . We can skip
* this if quota enforcement is disabled or if both inodes ' dquots are the
* same . The qretry structure must be initialized to zeroes before the first
* call to this function .
*/
STATIC int
xfs_exchrange_reserve_quota (
struct xfs_trans * tp ,
const struct xfs_exchmaps_req * req ,
unsigned int * qretry )
{
int64_t ddelta , rdelta ;
int ip1_error = 0 ;
int error ;
/*
* Don ' t bother with a quota reservation if we ' re not enforcing them
* or the two inodes have the same dquots .
*/
if ( ! XFS_IS_QUOTA_ON ( tp - > t_mountp ) | | req - > ip1 = = req - > ip2 | |
( req - > ip1 - > i_udquot = = req - > ip2 - > i_udquot & &
req - > ip1 - > i_gdquot = = req - > ip2 - > i_gdquot & &
req - > ip1 - > i_pdquot = = req - > ip2 - > i_pdquot ) )
return 0 ;
* qretry = 0 ;
/*
* For each file , compute the net gain in the number of regular blocks
* that will be mapped into that file and reserve that much quota . The
* quota counts must be able to absorb at least that much space .
*/
ddelta = req - > ip2_bcount - req - > ip1_bcount ;
rdelta = req - > ip2_rtbcount - req - > ip1_rtbcount ;
if ( ddelta > 0 | | rdelta > 0 ) {
error = xfs_trans_reserve_quota_nblks ( tp , req - > ip1 ,
ddelta > 0 ? ddelta : 0 ,
rdelta > 0 ? rdelta : 0 ,
false ) ;
if ( error = = - EDQUOT | | error = = - ENOSPC ) {
/*
* Save this error and see what happens if we try to
* reserve quota for ip2 . Then report both .
*/
* qretry | = QRETRY_IP1 ;
ip1_error = error ;
error = 0 ;
}
if ( error )
return error ;
}
if ( ddelta < 0 | | rdelta < 0 ) {
error = xfs_trans_reserve_quota_nblks ( tp , req - > ip2 ,
ddelta < 0 ? - ddelta : 0 ,
rdelta < 0 ? - rdelta : 0 ,
false ) ;
if ( error = = - EDQUOT | | error = = - ENOSPC )
* qretry | = QRETRY_IP2 ;
if ( error )
return error ;
}
if ( ip1_error )
return ip1_error ;
/*
* For each file , forcibly reserve the gross gain in mapped blocks so
* that we don ' t trip over any quota block reservation assertions .
* We must reserve the gross gain because the quota code subtracts from
* bcount the number of blocks that we unmap ; it does not add that
* quantity back to the quota block reservation .
*/
error = xfs_trans_reserve_quota_nblks ( tp , req - > ip1 , req - > ip1_bcount ,
req - > ip1_rtbcount , true ) ;
if ( error )
return error ;
return xfs_trans_reserve_quota_nblks ( tp , req - > ip2 , req - > ip2_bcount ,
req - > ip2_rtbcount , true ) ;
}
/* Exchange the mappings (and hence the contents) of two files' forks. */
STATIC int
xfs_exchrange_mappings (
const struct xfs_exchrange * fxr ,
struct xfs_inode * ip1 ,
struct xfs_inode * ip2 )
{
struct xfs_mount * mp = ip1 - > i_mount ;
struct xfs_exchmaps_req req = {
. ip1 = ip1 ,
. ip2 = ip2 ,
. startoff1 = XFS_B_TO_FSBT ( mp , fxr - > file1_offset ) ,
. startoff2 = XFS_B_TO_FSBT ( mp , fxr - > file2_offset ) ,
. blockcount = XFS_B_TO_FSB ( mp , fxr - > length ) ,
} ;
struct xfs_trans * tp ;
unsigned int qretry ;
bool retried = false ;
int error ;
trace_xfs_exchrange_mappings ( fxr , ip1 , ip2 ) ;
if ( fxr - > flags & XFS_EXCHANGE_RANGE_TO_EOF )
req . flags | = XFS_EXCHMAPS_SET_SIZES ;
if ( fxr - > flags & XFS_EXCHANGE_RANGE_FILE1_WRITTEN )
req . flags | = XFS_EXCHMAPS_INO1_WRITTEN ;
2024-04-15 14:54:22 -07:00
/*
* Round the request length up to the nearest file allocation unit .
* The prep function already checked that the request offsets and
* length in @ fxr are safe to round up .
*/
if ( xfs_inode_has_bigrtalloc ( ip2 ) )
req . blockcount = xfs_rtb_roundup_rtx ( mp , req . blockcount ) ;
2024-04-15 14:54:18 -07:00
error = xfs_exchrange_estimate ( & req ) ;
if ( error )
return error ;
retry :
/* Allocate the transaction, lock the inodes, and join them. */
error = xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_write , req . resblks , 0 ,
XFS_TRANS_RES_FDBLKS , & tp ) ;
if ( error )
return error ;
xfs_exchrange_ilock ( tp , ip1 , ip2 ) ;
trace_xfs_exchrange_before ( ip2 , 2 ) ;
trace_xfs_exchrange_before ( ip1 , 1 ) ;
error = xfs_exchmaps_check_forks ( mp , & req ) ;
if ( error )
goto out_trans_cancel ;
/*
* Reserve ourselves some quota if any of them are in enforcing mode .
* In theory we only need enough to satisfy the change in the number
* of blocks between the two ranges being remapped .
*/
error = xfs_exchrange_reserve_quota ( tp , & req , & qretry ) ;
if ( ( error = = - EDQUOT | | error = = - ENOSPC ) & & ! retried ) {
xfs_trans_cancel ( tp ) ;
xfs_exchrange_iunlock ( ip1 , ip2 ) ;
if ( qretry & QRETRY_IP1 )
xfs_blockgc_free_quota ( ip1 , 0 ) ;
if ( qretry & QRETRY_IP2 )
xfs_blockgc_free_quota ( ip2 , 0 ) ;
retried = true ;
goto retry ;
}
if ( error )
goto out_trans_cancel ;
/* If we got this far on a dry run, all parameters are ok. */
if ( fxr - > flags & XFS_EXCHANGE_RANGE_DRY_RUN )
goto out_trans_cancel ;
/* Update the mtime and ctime of both files. */
if ( fxr - > flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME1 )
xfs_trans_ichgtime ( tp , ip1 , XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG ) ;
if ( fxr - > flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME2 )
xfs_trans_ichgtime ( tp , ip2 , XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG ) ;
xfs_exchange_mappings ( tp , & req ) ;
/*
* Force the log to persist metadata updates if the caller or the
* administrator requires this . The generic prep function already
* flushed the relevant parts of the page cache .
*/
if ( xfs_has_wsync ( mp ) | | ( fxr - > flags & XFS_EXCHANGE_RANGE_DSYNC ) )
xfs_trans_set_sync ( tp ) ;
error = xfs_trans_commit ( tp ) ;
trace_xfs_exchrange_after ( ip2 , 2 ) ;
trace_xfs_exchrange_after ( ip1 , 1 ) ;
if ( error )
goto out_unlock ;
/*
* If the caller wanted us to exchange the contents of two complete
* files of unequal length , exchange the incore sizes now . This should
* be safe because we flushed both files ' page caches , exchanged all
* the mappings , and updated the ondisk sizes .
*/
if ( fxr - > flags & XFS_EXCHANGE_RANGE_TO_EOF ) {
loff_t temp ;
temp = i_size_read ( VFS_I ( ip2 ) ) ;
i_size_write ( VFS_I ( ip2 ) , i_size_read ( VFS_I ( ip1 ) ) ) ;
i_size_write ( VFS_I ( ip1 ) , temp ) ;
}
out_unlock :
xfs_exchrange_iunlock ( ip1 , ip2 ) ;
return error ;
out_trans_cancel :
xfs_trans_cancel ( tp ) ;
goto out_unlock ;
}
2024-04-15 14:54:14 -07:00
/*
* Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE .
* This part deals with struct file objects and byte ranges and does not deal
* with XFS - specific data structures such as xfs_inodes and block ranges . This
* separation may some day facilitate porting to another filesystem .
*
* The goal is to exchange fxr . length bytes starting at fxr . file1_offset in
* file1 with the same number of bytes starting at fxr . file2_offset in file2 .
* Implementations must call xfs_exchange_range_prep to prepare the two
* files prior to taking locks ; and they must update the inode change and mod
* times of both files as part of the metadata update . The timestamp update
* and freshness checks must be done atomically as part of the data exchange
* operation to ensure correctness of the freshness check .
* xfs_exchange_range_finish must be called after the operation completes
* successfully but before locks are dropped .
*/
/* Verify that we have security clearance to perform this operation. */
static int
xfs_exchange_range_verify_area (
struct xfs_exchrange * fxr )
{
int ret ;
ret = remap_verify_area ( fxr - > file1 , fxr - > file1_offset , fxr - > length ,
true ) ;
if ( ret )
return ret ;
return remap_verify_area ( fxr - > file2 , fxr - > file2_offset , fxr - > length ,
true ) ;
}
/*
* Performs necessary checks before doing a range exchange , having stabilized
* mutable inode attributes via i_rwsem .
*/
static inline int
xfs_exchange_range_checks (
struct xfs_exchrange * fxr ,
unsigned int alloc_unit )
{
struct inode * inode1 = file_inode ( fxr - > file1 ) ;
struct inode * inode2 = file_inode ( fxr - > file2 ) ;
uint64_t allocmask = alloc_unit - 1 ;
int64_t test_len ;
uint64_t blen ;
loff_t size1 , size2 , tmp ;
int error ;
/* Don't touch certain kinds of inodes */
if ( IS_IMMUTABLE ( inode1 ) | | IS_IMMUTABLE ( inode2 ) )
return - EPERM ;
if ( IS_SWAPFILE ( inode1 ) | | IS_SWAPFILE ( inode2 ) )
return - ETXTBSY ;
size1 = i_size_read ( inode1 ) ;
size2 = i_size_read ( inode2 ) ;
/* Ranges cannot start after EOF. */
if ( fxr - > file1_offset > size1 | | fxr - > file2_offset > size2 )
return - EINVAL ;
/*
* If the caller said to exchange to EOF , we set the length of the
* request large enough to cover everything to the end of both files .
*/
if ( fxr - > flags & XFS_EXCHANGE_RANGE_TO_EOF ) {
fxr - > length = max_t ( int64_t , size1 - fxr - > file1_offset ,
size2 - fxr - > file2_offset ) ;
error = xfs_exchange_range_verify_area ( fxr ) ;
if ( error )
return error ;
}
/*
* The start of both ranges must be aligned to the file allocation
* unit .
*/
if ( ! IS_ALIGNED ( fxr - > file1_offset , alloc_unit ) | |
! IS_ALIGNED ( fxr - > file2_offset , alloc_unit ) )
return - EINVAL ;
/* Ensure offsets don't wrap. */
if ( check_add_overflow ( fxr - > file1_offset , fxr - > length , & tmp ) | |
check_add_overflow ( fxr - > file2_offset , fxr - > length , & tmp ) )
return - EINVAL ;
/*
* We require both ranges to end within EOF , unless we ' re exchanging
* to EOF .
*/
if ( ! ( fxr - > flags & XFS_EXCHANGE_RANGE_TO_EOF ) & &
( fxr - > file1_offset + fxr - > length > size1 | |
fxr - > file2_offset + fxr - > length > size2 ) )
return - EINVAL ;
/*
* Make sure we don ' t hit any file size limits . If we hit any size
* limits such that test_length was adjusted , we abort the whole
* operation .
*/
test_len = fxr - > length ;
error = generic_write_check_limits ( fxr - > file2 , fxr - > file2_offset ,
& test_len ) ;
if ( error )
return error ;
error = generic_write_check_limits ( fxr - > file1 , fxr - > file1_offset ,
& test_len ) ;
if ( error )
return error ;
if ( test_len ! = fxr - > length )
return - EINVAL ;
/*
* If the user wanted us to exchange up to the infile ' s EOF , round up
* to the next allocation unit boundary for this check . Do the same
* for the outfile .
*
* Otherwise , reject the range length if it ' s not aligned to an
* allocation unit .
*/
if ( fxr - > file1_offset + fxr - > length = = size1 )
blen = ALIGN ( size1 , alloc_unit ) - fxr - > file1_offset ;
else if ( fxr - > file2_offset + fxr - > length = = size2 )
blen = ALIGN ( size2 , alloc_unit ) - fxr - > file2_offset ;
else if ( ! IS_ALIGNED ( fxr - > length , alloc_unit ) )
return - EINVAL ;
else
blen = fxr - > length ;
/* Don't allow overlapped exchanges within the same file. */
if ( inode1 = = inode2 & &
fxr - > file2_offset + blen > fxr - > file1_offset & &
fxr - > file1_offset + blen > fxr - > file2_offset )
return - EINVAL ;
/*
* Ensure that we don ' t exchange a partial EOF block into the middle of
* another file .
*/
if ( ( fxr - > length & allocmask ) = = 0 )
return 0 ;
blen = fxr - > length ;
if ( fxr - > file2_offset + blen < size2 )
blen & = ~ allocmask ;
if ( fxr - > file1_offset + blen < size1 )
blen & = ~ allocmask ;
return blen = = fxr - > length ? 0 : - EINVAL ;
}
/*
* Check that the two inodes are eligible for range exchanges , the ranges make
* sense , and then flush all dirty data . Caller must ensure that the inodes
* have been locked against any other modifications .
*/
static inline int
xfs_exchange_range_prep (
struct xfs_exchrange * fxr ,
unsigned int alloc_unit )
{
struct inode * inode1 = file_inode ( fxr - > file1 ) ;
struct inode * inode2 = file_inode ( fxr - > file2 ) ;
bool same_inode = ( inode1 = = inode2 ) ;
int error ;
/* Check that we don't violate system file offset limits. */
error = xfs_exchange_range_checks ( fxr , alloc_unit ) ;
if ( error | | fxr - > length = = 0 )
return error ;
/* Wait for the completion of any pending IOs on both files */
inode_dio_wait ( inode1 ) ;
if ( ! same_inode )
inode_dio_wait ( inode2 ) ;
error = filemap_write_and_wait_range ( inode1 - > i_mapping ,
fxr - > file1_offset ,
fxr - > file1_offset + fxr - > length - 1 ) ;
if ( error )
return error ;
error = filemap_write_and_wait_range ( inode2 - > i_mapping ,
fxr - > file2_offset ,
fxr - > file2_offset + fxr - > length - 1 ) ;
if ( error )
return error ;
/*
* If the files or inodes involved require synchronous writes , amend
* the request to force the filesystem to flush all data and metadata
* to disk after the operation completes .
*/
if ( ( ( fxr - > file1 - > f_flags | fxr - > file2 - > f_flags ) & O_SYNC ) | |
IS_SYNC ( inode1 ) | | IS_SYNC ( inode2 ) )
fxr - > flags | = XFS_EXCHANGE_RANGE_DSYNC ;
return 0 ;
}
/*
* Finish a range exchange operation , if it was successful . Caller must ensure
* that the inodes are still locked against any other modifications .
*/
static inline int
xfs_exchange_range_finish (
struct xfs_exchrange * fxr )
{
int error ;
error = file_remove_privs ( fxr - > file1 ) ;
if ( error )
return error ;
if ( file_inode ( fxr - > file1 ) = = file_inode ( fxr - > file2 ) )
return 0 ;
return file_remove_privs ( fxr - > file2 ) ;
}
2024-04-15 14:54:23 -07:00
/*
* Check the alignment of an exchange request when the allocation unit size
* isn ' t a power of two . The generic file - level helpers use ( fast )
* bitmask - based alignment checks , but here we have to use slow long division .
*/
static int
xfs_exchrange_check_rtalign (
const struct xfs_exchrange * fxr ,
struct xfs_inode * ip1 ,
struct xfs_inode * ip2 ,
unsigned int alloc_unit )
{
uint64_t length = fxr - > length ;
uint64_t blen ;
loff_t size1 , size2 ;
size1 = i_size_read ( VFS_I ( ip1 ) ) ;
size2 = i_size_read ( VFS_I ( ip2 ) ) ;
/* The start of both ranges must be aligned to a rt extent. */
if ( ! isaligned_64 ( fxr - > file1_offset , alloc_unit ) | |
! isaligned_64 ( fxr - > file2_offset , alloc_unit ) )
return - EINVAL ;
if ( fxr - > flags & XFS_EXCHANGE_RANGE_TO_EOF )
length = max_t ( int64_t , size1 - fxr - > file1_offset ,
size2 - fxr - > file2_offset ) ;
/*
* If the user wanted us to exchange up to the infile ' s EOF , round up
* to the next rt extent boundary for this check . Do the same for the
* outfile .
*
* Otherwise , reject the range length if it ' s not rt extent aligned .
* We already confirmed the starting offsets ' rt extent block
* alignment .
*/
if ( fxr - > file1_offset + length = = size1 )
blen = roundup_64 ( size1 , alloc_unit ) - fxr - > file1_offset ;
else if ( fxr - > file2_offset + length = = size2 )
blen = roundup_64 ( size2 , alloc_unit ) - fxr - > file2_offset ;
else if ( ! isaligned_64 ( length , alloc_unit ) )
return - EINVAL ;
else
blen = length ;
/* Don't allow overlapped exchanges within the same file. */
if ( ip1 = = ip2 & &
fxr - > file2_offset + blen > fxr - > file1_offset & &
fxr - > file1_offset + blen > fxr - > file2_offset )
return - EINVAL ;
/*
* Ensure that we don ' t exchange a partial EOF rt extent into the
* middle of another file .
*/
if ( isaligned_64 ( length , alloc_unit ) )
return 0 ;
blen = length ;
if ( fxr - > file2_offset + length < size2 )
blen = rounddown_64 ( blen , alloc_unit ) ;
if ( fxr - > file1_offset + blen < size1 )
blen = rounddown_64 ( blen , alloc_unit ) ;
return blen = = length ? 0 : - EINVAL ;
}
2024-04-15 14:54:18 -07:00
/* Prepare two files to have their data exchanged. */
STATIC int
xfs_exchrange_prep (
struct xfs_exchrange * fxr ,
struct xfs_inode * ip1 ,
struct xfs_inode * ip2 )
{
2024-04-15 14:54:23 -07:00
struct xfs_mount * mp = ip2 - > i_mount ;
2024-04-15 14:54:18 -07:00
unsigned int alloc_unit = xfs_inode_alloc_unitsize ( ip2 ) ;
int error ;
trace_xfs_exchrange_prep ( fxr , ip1 , ip2 ) ;
/* Verify both files are either real-time or non-realtime */
if ( XFS_IS_REALTIME_INODE ( ip1 ) ! = XFS_IS_REALTIME_INODE ( ip2 ) )
return - EINVAL ;
2024-04-15 14:54:23 -07:00
/* Check non-power of two alignment issues, if necessary. */
if ( ! is_power_of_2 ( alloc_unit ) ) {
error = xfs_exchrange_check_rtalign ( fxr , ip1 , ip2 , alloc_unit ) ;
if ( error )
return error ;
/*
* Do the generic file - level checks with the regular block
* alignment .
*/
alloc_unit = mp - > m_sb . sb_blocksize ;
}
2024-04-15 14:54:18 -07:00
error = xfs_exchange_range_prep ( fxr , alloc_unit ) ;
if ( error | | fxr - > length = = 0 )
return error ;
/* Attach dquots to both inodes before changing block maps. */
error = xfs_qm_dqattach ( ip2 ) ;
if ( error )
return error ;
error = xfs_qm_dqattach ( ip1 ) ;
if ( error )
return error ;
trace_xfs_exchrange_flush ( fxr , ip1 , ip2 ) ;
/* Flush the relevant ranges of both files. */
error = xfs_flush_unmap_range ( ip2 , fxr - > file2_offset , fxr - > length ) ;
if ( error )
return error ;
error = xfs_flush_unmap_range ( ip1 , fxr - > file1_offset , fxr - > length ) ;
if ( error )
return error ;
/*
* Cancel CoW fork preallocations for the ranges of both files . The
* prep function should have flushed all the dirty data , so the only
* CoW mappings remaining should be speculative .
*/
if ( xfs_inode_has_cow_data ( ip1 ) ) {
error = xfs_reflink_cancel_cow_range ( ip1 , fxr - > file1_offset ,
fxr - > length , true ) ;
if ( error )
return error ;
}
if ( xfs_inode_has_cow_data ( ip2 ) ) {
error = xfs_reflink_cancel_cow_range ( ip2 , fxr - > file2_offset ,
fxr - > length , true ) ;
if ( error )
return error ;
}
return 0 ;
}
/*
* Exchange contents of files . This is the binding between the generic
* file - level concepts and the XFS inode - specific implementation .
*/
STATIC int
xfs_exchrange_contents (
struct xfs_exchrange * fxr )
{
struct inode * inode1 = file_inode ( fxr - > file1 ) ;
struct inode * inode2 = file_inode ( fxr - > file2 ) ;
struct xfs_inode * ip1 = XFS_I ( inode1 ) ;
struct xfs_inode * ip2 = XFS_I ( inode2 ) ;
struct xfs_mount * mp = ip1 - > i_mount ;
int error ;
if ( ! xfs_has_exchange_range ( mp ) )
return - EOPNOTSUPP ;
if ( fxr - > flags & ~ ( XFS_EXCHANGE_RANGE_ALL_FLAGS |
XFS_EXCHANGE_RANGE_PRIV_FLAGS ) )
return - EINVAL ;
if ( xfs_is_shutdown ( mp ) )
return - EIO ;
/* Lock both files against IO */
error = xfs_ilock2_io_mmap ( ip1 , ip2 ) ;
if ( error )
goto out_err ;
/* Prepare and then exchange file contents. */
error = xfs_exchrange_prep ( fxr , ip1 , ip2 ) ;
if ( error )
goto out_unlock ;
error = xfs_exchrange_mappings ( fxr , ip1 , ip2 ) ;
if ( error )
goto out_unlock ;
/*
* Finish the exchange by removing special file privileges like any
* other file write would do . This may involve turning on support for
* logged xattrs if either file has security capabilities .
*/
error = xfs_exchange_range_finish ( fxr ) ;
if ( error )
goto out_unlock ;
out_unlock :
xfs_iunlock2_io_mmap ( ip1 , ip2 ) ;
out_err :
if ( error )
trace_xfs_exchrange_error ( ip2 , error , _RET_IP_ ) ;
return error ;
}
2024-04-15 14:54:14 -07:00
/* Exchange parts of two files. */
static int
xfs_exchange_range (
struct xfs_exchrange * fxr )
{
struct inode * inode1 = file_inode ( fxr - > file1 ) ;
struct inode * inode2 = file_inode ( fxr - > file2 ) ;
int ret ;
BUILD_BUG_ON ( XFS_EXCHANGE_RANGE_ALL_FLAGS &
XFS_EXCHANGE_RANGE_PRIV_FLAGS ) ;
/* Both files must be on the same mount/filesystem. */
if ( fxr - > file1 - > f_path . mnt ! = fxr - > file2 - > f_path . mnt )
return - EXDEV ;
if ( fxr - > flags & ~ XFS_EXCHANGE_RANGE_ALL_FLAGS )
return - EINVAL ;
/* Userspace requests only honored for regular files. */
if ( S_ISDIR ( inode1 - > i_mode ) | | S_ISDIR ( inode2 - > i_mode ) )
return - EISDIR ;
if ( ! S_ISREG ( inode1 - > i_mode ) | | ! S_ISREG ( inode2 - > i_mode ) )
return - EINVAL ;
/* Both files must be opened for read and write. */
if ( ! ( fxr - > file1 - > f_mode & FMODE_READ ) | |
! ( fxr - > file1 - > f_mode & FMODE_WRITE ) | |
! ( fxr - > file2 - > f_mode & FMODE_READ ) | |
! ( fxr - > file2 - > f_mode & FMODE_WRITE ) )
return - EBADF ;
/* Neither file can be opened append-only. */
if ( ( fxr - > file1 - > f_flags & O_APPEND ) | |
( fxr - > file2 - > f_flags & O_APPEND ) )
return - EBADF ;
/*
* If we ' re not exchanging to EOF , we can check the areas before
* stabilizing both files ' i_size .
*/
if ( ! ( fxr - > flags & XFS_EXCHANGE_RANGE_TO_EOF ) ) {
ret = xfs_exchange_range_verify_area ( fxr ) ;
if ( ret )
return ret ;
}
/* Update cmtime if the fd/inode don't forbid it. */
if ( ! ( fxr - > file1 - > f_mode & FMODE_NOCMTIME ) & & ! IS_NOCMTIME ( inode1 ) )
fxr - > flags | = __XFS_EXCHANGE_RANGE_UPD_CMTIME1 ;
if ( ! ( fxr - > file2 - > f_mode & FMODE_NOCMTIME ) & & ! IS_NOCMTIME ( inode2 ) )
fxr - > flags | = __XFS_EXCHANGE_RANGE_UPD_CMTIME2 ;
file_start_write ( fxr - > file2 ) ;
2024-04-15 14:54:18 -07:00
ret = xfs_exchrange_contents ( fxr ) ;
2024-04-15 14:54:14 -07:00
file_end_write ( fxr - > file2 ) ;
if ( ret )
return ret ;
fsnotify_modify ( fxr - > file1 ) ;
if ( fxr - > file2 ! = fxr - > file1 )
fsnotify_modify ( fxr - > file2 ) ;
return 0 ;
}
/* Collect exchange-range arguments from userspace. */
long
xfs_ioc_exchange_range (
struct file * file ,
struct xfs_exchange_range __user * argp )
{
struct xfs_exchrange fxr = {
. file2 = file ,
} ;
struct xfs_exchange_range args ;
struct fd file1 ;
int error ;
if ( copy_from_user ( & args , argp , sizeof ( args ) ) )
return - EFAULT ;
if ( memchr_inv ( & args . pad , 0 , sizeof ( args . pad ) ) )
return - EINVAL ;
if ( args . flags & ~ XFS_EXCHANGE_RANGE_ALL_FLAGS )
return - EINVAL ;
fxr . file1_offset = args . file1_offset ;
fxr . file2_offset = args . file2_offset ;
fxr . length = args . length ;
fxr . flags = args . flags ;
file1 = fdget ( args . file1_fd ) ;
if ( ! file1 . file )
return - EBADF ;
fxr . file1 = file1 . file ;
error = xfs_exchange_range ( & fxr ) ;
fdput ( file1 ) ;
return error ;
}