2018-06-06 05:42:14 +03:00
// SPDX-License-Identifier: GPL-2.0+
2017-03-29 00:56:37 +03:00
/*
* Copyright ( C ) 2017 Oracle . All Rights Reserved .
* Author : Darrick J . Wong < darrick . wong @ oracle . com >
*/
# include "xfs.h"
# include "xfs_fs.h"
# include "xfs_shared.h"
# include "xfs_format.h"
# include "xfs_log_format.h"
# include "xfs_trans_resv.h"
# include "xfs_mount.h"
# include "xfs_inode.h"
# include "xfs_trans.h"
# include "xfs_btree.h"
# include "xfs_rmap_btree.h"
# include "xfs_trace.h"
# include "xfs_rmap.h"
# include "xfs_alloc.h"
# include "xfs_bit.h"
# include <linux/fsmap.h>
# include "xfs_fsmap.h"
# include "xfs_refcount.h"
# include "xfs_refcount_btree.h"
2017-03-29 00:56:37 +03:00
# include "xfs_alloc_btree.h"
2023-10-16 19:21:47 +03:00
# include "xfs_rtbitmap.h"
2021-06-02 03:48:24 +03:00
# include "xfs_ag.h"
2017-03-29 00:56:37 +03:00
/* Convert an xfs_fsmap to an fsmap. */
2020-10-01 20:56:07 +03:00
static void
2017-03-29 00:56:37 +03:00
xfs_fsmap_from_internal (
struct fsmap * dest ,
struct xfs_fsmap * src )
{
dest - > fmr_device = src - > fmr_device ;
dest - > fmr_flags = src - > fmr_flags ;
dest - > fmr_physical = BBTOB ( src - > fmr_physical ) ;
dest - > fmr_owner = src - > fmr_owner ;
dest - > fmr_offset = BBTOB ( src - > fmr_offset ) ;
dest - > fmr_length = BBTOB ( src - > fmr_length ) ;
dest - > fmr_reserved [ 0 ] = 0 ;
dest - > fmr_reserved [ 1 ] = 0 ;
dest - > fmr_reserved [ 2 ] = 0 ;
}
/* Convert an fsmap to an xfs_fsmap. */
void
xfs_fsmap_to_internal (
struct xfs_fsmap * dest ,
struct fsmap * src )
{
dest - > fmr_device = src - > fmr_device ;
dest - > fmr_flags = src - > fmr_flags ;
dest - > fmr_physical = BTOBBT ( src - > fmr_physical ) ;
dest - > fmr_owner = src - > fmr_owner ;
dest - > fmr_offset = BTOBBT ( src - > fmr_offset ) ;
dest - > fmr_length = BTOBBT ( src - > fmr_length ) ;
}
/* Convert an fsmap owner into an rmapbt owner. */
static int
xfs_fsmap_owner_to_rmap (
struct xfs_rmap_irec * dest ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * src )
2017-03-29 00:56:37 +03:00
{
if ( ! ( src - > fmr_flags & FMR_OF_SPECIAL_OWNER ) ) {
dest - > rm_owner = src - > fmr_owner ;
return 0 ;
}
switch ( src - > fmr_owner ) {
case 0 : /* "lowest owner id possible" */
case - 1ULL : /* "highest owner id possible" */
dest - > rm_owner = 0 ;
break ;
case XFS_FMR_OWN_FREE :
dest - > rm_owner = XFS_RMAP_OWN_NULL ;
break ;
case XFS_FMR_OWN_UNKNOWN :
dest - > rm_owner = XFS_RMAP_OWN_UNKNOWN ;
break ;
case XFS_FMR_OWN_FS :
dest - > rm_owner = XFS_RMAP_OWN_FS ;
break ;
case XFS_FMR_OWN_LOG :
dest - > rm_owner = XFS_RMAP_OWN_LOG ;
break ;
case XFS_FMR_OWN_AG :
dest - > rm_owner = XFS_RMAP_OWN_AG ;
break ;
case XFS_FMR_OWN_INOBT :
dest - > rm_owner = XFS_RMAP_OWN_INOBT ;
break ;
case XFS_FMR_OWN_INODES :
dest - > rm_owner = XFS_RMAP_OWN_INODES ;
break ;
case XFS_FMR_OWN_REFC :
dest - > rm_owner = XFS_RMAP_OWN_REFC ;
break ;
case XFS_FMR_OWN_COW :
dest - > rm_owner = XFS_RMAP_OWN_COW ;
break ;
case XFS_FMR_OWN_DEFECTIVE : /* not implemented */
/* fall through */
default :
return - EINVAL ;
}
return 0 ;
}
/* Convert an rmapbt owner into an fsmap owner. */
static int
xfs_fsmap_owner_from_rmap (
2021-08-11 03:02:16 +03:00
struct xfs_fsmap * dest ,
const struct xfs_rmap_irec * src )
2017-03-29 00:56:37 +03:00
{
dest - > fmr_flags = 0 ;
if ( ! XFS_RMAP_NON_INODE_OWNER ( src - > rm_owner ) ) {
dest - > fmr_owner = src - > rm_owner ;
return 0 ;
}
dest - > fmr_flags | = FMR_OF_SPECIAL_OWNER ;
switch ( src - > rm_owner ) {
case XFS_RMAP_OWN_FS :
dest - > fmr_owner = XFS_FMR_OWN_FS ;
break ;
case XFS_RMAP_OWN_LOG :
dest - > fmr_owner = XFS_FMR_OWN_LOG ;
break ;
case XFS_RMAP_OWN_AG :
dest - > fmr_owner = XFS_FMR_OWN_AG ;
break ;
case XFS_RMAP_OWN_INOBT :
dest - > fmr_owner = XFS_FMR_OWN_INOBT ;
break ;
case XFS_RMAP_OWN_INODES :
dest - > fmr_owner = XFS_FMR_OWN_INODES ;
break ;
case XFS_RMAP_OWN_REFC :
dest - > fmr_owner = XFS_FMR_OWN_REFC ;
break ;
case XFS_RMAP_OWN_COW :
dest - > fmr_owner = XFS_FMR_OWN_COW ;
break ;
2017-03-29 00:56:37 +03:00
case XFS_RMAP_OWN_NULL : /* "free" */
dest - > fmr_owner = XFS_FMR_OWN_FREE ;
break ;
2017-03-29 00:56:37 +03:00
default :
2019-11-02 19:41:18 +03:00
ASSERT ( 0 ) ;
2017-03-29 00:56:37 +03:00
return - EFSCORRUPTED ;
}
return 0 ;
}
/* getfsmap query state */
struct xfs_getfsmap_info {
struct xfs_fsmap_head * head ;
2020-10-01 20:56:07 +03:00
struct fsmap * fsmap_recs ; /* mapping records */
2017-03-29 00:56:37 +03:00
struct xfs_buf * agf_bp ; /* AGF, for refcount queries */
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag ; /* AG info, if applicable */
2017-03-29 00:56:37 +03:00
xfs_daddr_t next_daddr ; /* next daddr we expect */
2023-06-30 03:39:43 +03:00
/* daddr of low fsmap key when we're using the rtbitmap */
xfs_daddr_t low_daddr ;
2017-03-29 00:56:37 +03:00
u64 missing_owner ; /* owner of holes */
u32 dev ; /* device id */
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
/*
* Low rmap key for the query . If low . rm_blockcount is nonzero , this
* is the second ( or later ) call to retrieve the recordset in pieces .
* xfs_getfsmap_rec_before_start will compare all records retrieved
* by the rmapbt query to filter out any records that start before
* the last record .
*/
struct xfs_rmap_irec low ;
2017-03-29 00:56:37 +03:00
struct xfs_rmap_irec high ; /* high rmap key */
bool last ; /* last extent? */
} ;
/* Associate a device with a getfsmap handler. */
struct xfs_getfsmap_dev {
u32 dev ;
int ( * fn ) ( struct xfs_trans * tp ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * keys ,
2017-03-29 00:56:37 +03:00
struct xfs_getfsmap_info * info ) ;
} ;
/* Compare two getfsmap device handlers. */
static int
xfs_getfsmap_dev_compare (
const void * p1 ,
const void * p2 )
{
const struct xfs_getfsmap_dev * d1 = p1 ;
const struct xfs_getfsmap_dev * d2 = p2 ;
return d1 - > dev - d2 - > dev ;
}
/* Decide if this mapping is shared. */
STATIC int
xfs_getfsmap_is_shared (
struct xfs_trans * tp ,
struct xfs_getfsmap_info * info ,
2021-08-11 03:02:16 +03:00
const struct xfs_rmap_irec * rec ,
2017-03-29 00:56:37 +03:00
bool * stat )
{
struct xfs_mount * mp = tp - > t_mountp ;
struct xfs_btree_cur * cur ;
xfs_agblock_t fbno ;
xfs_extlen_t flen ;
int error ;
* stat = false ;
2021-08-19 04:46:37 +03:00
if ( ! xfs_has_reflink ( mp ) )
2017-03-29 00:56:37 +03:00
return 0 ;
2021-06-02 03:48:24 +03:00
/* rt files will have no perag structure */
if ( ! info - > pag )
2017-03-29 00:56:37 +03:00
return 0 ;
/* Are there any shared blocks here? */
flen = 0 ;
2021-06-02 03:48:24 +03:00
cur = xfs_refcountbt_init_cursor ( mp , tp , info - > agf_bp , info - > pag ) ;
2017-03-29 00:56:37 +03:00
error = xfs_refcount_find_shared ( cur , rec - > rm_startblock ,
rec - > rm_blockcount , & fbno , & flen , false ) ;
2018-07-19 22:26:31 +03:00
xfs_btree_del_cursor ( cur , error ) ;
2017-03-29 00:56:37 +03:00
if ( error )
return error ;
* stat = flen > 0 ;
return 0 ;
}
2020-10-01 20:56:07 +03:00
static inline void
xfs_getfsmap_format (
struct xfs_mount * mp ,
struct xfs_fsmap * xfm ,
struct xfs_getfsmap_info * info )
{
struct fsmap * rec ;
trace_xfs_getfsmap_mapping ( mp , xfm ) ;
rec = & info - > fsmap_recs [ info - > head - > fmh_entries + + ] ;
xfs_fsmap_from_internal ( rec , xfm ) ;
}
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
static inline bool
xfs_getfsmap_rec_before_start (
struct xfs_getfsmap_info * info ,
const struct xfs_rmap_irec * rec ,
xfs_daddr_t rec_daddr )
{
2023-06-30 03:39:43 +03:00
if ( info - > low_daddr ! = - 1ULL )
return rec_daddr < info - > low_daddr ;
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
if ( info - > low . rm_blockcount )
return xfs_rmap_compare ( rec , & info - > low ) < 0 ;
return false ;
}
2017-03-29 00:56:37 +03:00
/*
* Format a reverse mapping for getfsmap , having translated rm_startblock
2023-06-30 03:39:43 +03:00
* into the appropriate daddr units . Pass in a nonzero @ len_daddr if the
* length could be larger than rm_blockcount in struct xfs_rmap_irec .
2017-03-29 00:56:37 +03:00
*/
STATIC int
xfs_getfsmap_helper (
struct xfs_trans * tp ,
struct xfs_getfsmap_info * info ,
2021-08-11 03:02:16 +03:00
const struct xfs_rmap_irec * rec ,
2023-06-30 03:39:43 +03:00
xfs_daddr_t rec_daddr ,
xfs_daddr_t len_daddr )
2017-03-29 00:56:37 +03:00
{
struct xfs_fsmap fmr ;
struct xfs_mount * mp = tp - > t_mountp ;
bool shared ;
int error ;
if ( fatal_signal_pending ( current ) )
return - EINTR ;
2023-06-30 03:39:43 +03:00
if ( len_daddr = = 0 )
len_daddr = XFS_FSB_TO_BB ( mp , rec - > rm_blockcount ) ;
2017-03-29 00:56:37 +03:00
/*
* Filter out records that start before our startpoint , if the
* caller requested that .
*/
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
if ( xfs_getfsmap_rec_before_start ( info , rec , rec_daddr ) ) {
2023-06-30 03:39:43 +03:00
rec_daddr + = len_daddr ;
2017-03-29 00:56:37 +03:00
if ( info - > next_daddr < rec_daddr )
info - > next_daddr = rec_daddr ;
2019-08-29 00:39:46 +03:00
return 0 ;
2017-03-29 00:56:37 +03:00
}
/* Are we just counting mappings? */
if ( info - > head - > fmh_count = = 0 ) {
2020-10-01 20:56:07 +03:00
if ( info - > head - > fmh_entries = = UINT_MAX )
return - ECANCELED ;
2017-03-29 00:56:37 +03:00
if ( rec_daddr > info - > next_daddr )
info - > head - > fmh_entries + + ;
if ( info - > last )
2019-08-29 00:39:46 +03:00
return 0 ;
2017-03-29 00:56:37 +03:00
info - > head - > fmh_entries + + ;
2023-06-30 03:39:43 +03:00
rec_daddr + = len_daddr ;
2017-03-29 00:56:37 +03:00
if ( info - > next_daddr < rec_daddr )
info - > next_daddr = rec_daddr ;
2019-08-29 00:39:46 +03:00
return 0 ;
2017-03-29 00:56:37 +03:00
}
/*
* If the record starts past the last physical block we saw ,
* then we ' ve found a gap . Report the gap as being owned by
* whatever the caller specified is the missing owner .
*/
if ( rec_daddr > info - > next_daddr ) {
if ( info - > head - > fmh_entries > = info - > head - > fmh_count )
2019-08-29 00:37:57 +03:00
return - ECANCELED ;
2017-03-29 00:56:37 +03:00
fmr . fmr_device = info - > dev ;
fmr . fmr_physical = info - > next_daddr ;
fmr . fmr_owner = info - > missing_owner ;
fmr . fmr_offset = 0 ;
fmr . fmr_length = rec_daddr - info - > next_daddr ;
fmr . fmr_flags = FMR_OF_SPECIAL_OWNER ;
2020-10-01 20:56:07 +03:00
xfs_getfsmap_format ( mp , & fmr , info ) ;
2017-03-29 00:56:37 +03:00
}
if ( info - > last )
goto out ;
/* Fill out the extent we found */
if ( info - > head - > fmh_entries > = info - > head - > fmh_count )
2019-08-29 00:37:57 +03:00
return - ECANCELED ;
2017-03-29 00:56:37 +03:00
2021-06-02 03:48:24 +03:00
trace_xfs_fsmap_mapping ( mp , info - > dev ,
info - > pag ? info - > pag - > pag_agno : NULLAGNUMBER , rec ) ;
2017-03-29 00:56:37 +03:00
fmr . fmr_device = info - > dev ;
fmr . fmr_physical = rec_daddr ;
error = xfs_fsmap_owner_from_rmap ( & fmr , rec ) ;
if ( error )
return error ;
fmr . fmr_offset = XFS_FSB_TO_BB ( mp , rec - > rm_offset ) ;
2023-06-30 03:39:43 +03:00
fmr . fmr_length = len_daddr ;
2017-03-29 00:56:37 +03:00
if ( rec - > rm_flags & XFS_RMAP_UNWRITTEN )
fmr . fmr_flags | = FMR_OF_PREALLOC ;
if ( rec - > rm_flags & XFS_RMAP_ATTR_FORK )
fmr . fmr_flags | = FMR_OF_ATTR_FORK ;
if ( rec - > rm_flags & XFS_RMAP_BMBT_BLOCK )
fmr . fmr_flags | = FMR_OF_EXTENT_MAP ;
if ( fmr . fmr_flags = = 0 ) {
error = xfs_getfsmap_is_shared ( tp , info , rec , & shared ) ;
if ( error )
return error ;
if ( shared )
fmr . fmr_flags | = FMR_OF_SHARED ;
}
2020-10-01 20:56:07 +03:00
xfs_getfsmap_format ( mp , & fmr , info ) ;
2017-03-29 00:56:37 +03:00
out :
2023-06-30 03:39:43 +03:00
rec_daddr + = len_daddr ;
2017-03-29 00:56:37 +03:00
if ( info - > next_daddr < rec_daddr )
info - > next_daddr = rec_daddr ;
2019-08-29 00:39:46 +03:00
return 0 ;
2017-03-29 00:56:37 +03:00
}
/* Transform a rmapbt irec into a fsmap */
STATIC int
xfs_getfsmap_datadev_helper (
struct xfs_btree_cur * cur ,
2021-08-11 03:02:16 +03:00
const struct xfs_rmap_irec * rec ,
2017-03-29 00:56:37 +03:00
void * priv )
{
struct xfs_mount * mp = cur - > bc_mp ;
struct xfs_getfsmap_info * info = priv ;
xfs_fsblock_t fsb ;
xfs_daddr_t rec_daddr ;
2021-06-02 03:48:24 +03:00
fsb = XFS_AGB_TO_FSB ( mp , cur - > bc_ag . pag - > pag_agno , rec - > rm_startblock ) ;
2017-03-29 00:56:37 +03:00
rec_daddr = XFS_FSB_TO_DADDR ( mp , fsb ) ;
2023-06-30 03:39:43 +03:00
return xfs_getfsmap_helper ( cur - > bc_tp , info , rec , rec_daddr , 0 ) ;
2017-03-29 00:56:37 +03:00
}
2017-03-29 00:56:37 +03:00
/* Transform a bnobt irec into a fsmap */
STATIC int
xfs_getfsmap_datadev_bnobt_helper (
struct xfs_btree_cur * cur ,
2021-08-11 03:02:16 +03:00
const struct xfs_alloc_rec_incore * rec ,
2017-03-29 00:56:37 +03:00
void * priv )
{
struct xfs_mount * mp = cur - > bc_mp ;
struct xfs_getfsmap_info * info = priv ;
struct xfs_rmap_irec irec ;
xfs_daddr_t rec_daddr ;
2021-06-02 03:48:24 +03:00
rec_daddr = XFS_AGB_TO_DADDR ( mp , cur - > bc_ag . pag - > pag_agno ,
2017-03-29 00:56:37 +03:00
rec - > ar_startblock ) ;
irec . rm_startblock = rec - > ar_startblock ;
irec . rm_blockcount = rec - > ar_blockcount ;
irec . rm_owner = XFS_RMAP_OWN_NULL ; /* "free" */
irec . rm_offset = 0 ;
irec . rm_flags = 0 ;
2023-06-30 03:39:43 +03:00
return xfs_getfsmap_helper ( cur - > bc_tp , info , & irec , rec_daddr , 0 ) ;
2017-03-29 00:56:37 +03:00
}
2017-03-29 00:56:37 +03:00
/* Set rmap flags based on the getfsmap flags */
static void
xfs_getfsmap_set_irec_flags (
struct xfs_rmap_irec * irec ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * fmr )
2017-03-29 00:56:37 +03:00
{
irec - > rm_flags = 0 ;
if ( fmr - > fmr_flags & FMR_OF_ATTR_FORK )
irec - > rm_flags | = XFS_RMAP_ATTR_FORK ;
if ( fmr - > fmr_flags & FMR_OF_EXTENT_MAP )
irec - > rm_flags | = XFS_RMAP_BMBT_BLOCK ;
if ( fmr - > fmr_flags & FMR_OF_PREALLOC )
irec - > rm_flags | = XFS_RMAP_UNWRITTEN ;
}
/* Execute a getfsmap query against the log device. */
STATIC int
xfs_getfsmap_logdev (
struct xfs_trans * tp ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * keys ,
2017-03-29 00:56:37 +03:00
struct xfs_getfsmap_info * info )
{
struct xfs_mount * mp = tp - > t_mountp ;
struct xfs_rmap_irec rmap ;
2023-06-30 03:39:43 +03:00
xfs_daddr_t rec_daddr , len_daddr ;
2023-06-30 03:39:45 +03:00
xfs_fsblock_t start_fsb , end_fsb ;
uint64_t eofs ;
2017-03-29 00:56:37 +03:00
2023-06-30 03:39:45 +03:00
eofs = XFS_FSB_TO_BB ( mp , mp - > m_sb . sb_logblocks ) ;
if ( keys [ 0 ] . fmr_physical > = eofs )
return 0 ;
2023-06-30 03:39:43 +03:00
start_fsb = XFS_BB_TO_FSBT ( mp ,
keys [ 0 ] . fmr_physical + keys [ 0 ] . fmr_length ) ;
2023-06-30 03:39:45 +03:00
end_fsb = XFS_BB_TO_FSB ( mp , min ( eofs - 1 , keys [ 1 ] . fmr_physical ) ) ;
2017-03-29 00:56:37 +03:00
2023-06-30 03:39:43 +03:00
/* Adjust the low key if we are continuing from where we left off. */
if ( keys [ 0 ] . fmr_length > 0 )
info - > low_daddr = XFS_FSB_TO_BB ( mp , start_fsb ) ;
2023-06-30 03:39:45 +03:00
trace_xfs_fsmap_low_key_linear ( mp , info - > dev , start_fsb ) ;
trace_xfs_fsmap_high_key_linear ( mp , info - > dev , end_fsb ) ;
2017-03-29 00:56:37 +03:00
2023-06-30 03:39:43 +03:00
if ( start_fsb > 0 )
2017-03-29 00:56:37 +03:00
return 0 ;
/* Fabricate an rmap entry for the external log device. */
rmap . rm_startblock = 0 ;
rmap . rm_blockcount = mp - > m_sb . sb_logblocks ;
rmap . rm_owner = XFS_RMAP_OWN_LOG ;
rmap . rm_offset = 0 ;
rmap . rm_flags = 0 ;
2023-06-30 03:39:43 +03:00
rec_daddr = XFS_FSB_TO_BB ( mp , rmap . rm_startblock ) ;
len_daddr = XFS_FSB_TO_BB ( mp , rmap . rm_blockcount ) ;
return xfs_getfsmap_helper ( tp , info , & rmap , rec_daddr , len_daddr ) ;
2017-03-29 00:56:37 +03:00
}
2017-10-13 19:47:47 +03:00
# ifdef CONFIG_XFS_RT
/* Transform a rtbitmap "record" into a fsmap */
STATIC int
xfs_getfsmap_rtdev_rtbitmap_helper (
2022-04-11 23:49:41 +03:00
struct xfs_mount * mp ,
2017-10-13 19:47:47 +03:00
struct xfs_trans * tp ,
2021-08-11 03:02:16 +03:00
const struct xfs_rtalloc_rec * rec ,
2017-10-13 19:47:47 +03:00
void * priv )
{
struct xfs_getfsmap_info * info = priv ;
struct xfs_rmap_irec irec ;
2023-06-30 03:39:43 +03:00
xfs_rtblock_t rtbno ;
xfs_daddr_t rec_daddr , len_daddr ;
2023-10-16 19:32:54 +03:00
rtbno = xfs_rtx_to_rtb ( mp , rec - > ar_startext ) ;
2023-06-30 03:39:43 +03:00
rec_daddr = XFS_FSB_TO_BB ( mp , rtbno ) ;
irec . rm_startblock = rtbno ;
2023-10-16 19:32:54 +03:00
rtbno = xfs_rtx_to_rtb ( mp , rec - > ar_extcount ) ;
2023-06-30 03:39:43 +03:00
len_daddr = XFS_FSB_TO_BB ( mp , rtbno ) ;
irec . rm_blockcount = rtbno ;
2017-10-13 19:47:47 +03:00
irec . rm_owner = XFS_RMAP_OWN_NULL ; /* "free" */
irec . rm_offset = 0 ;
irec . rm_flags = 0 ;
2023-06-30 03:39:43 +03:00
return xfs_getfsmap_helper ( tp , info , & irec , rec_daddr , len_daddr ) ;
2017-10-13 19:47:47 +03:00
}
2023-06-30 03:39:44 +03:00
/* Execute a getfsmap query against the realtime device rtbitmap. */
2017-03-29 00:56:38 +03:00
STATIC int
2023-06-30 03:39:44 +03:00
xfs_getfsmap_rtdev_rtbitmap (
2017-03-29 00:56:38 +03:00
struct xfs_trans * tp ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * keys ,
2017-03-29 00:56:38 +03:00
struct xfs_getfsmap_info * info )
{
2023-06-30 03:39:44 +03:00
struct xfs_rtalloc_rec alow = { 0 } ;
struct xfs_rtalloc_rec ahigh = { 0 } ;
2017-03-29 00:56:38 +03:00
struct xfs_mount * mp = tp - > t_mountp ;
2023-06-30 03:39:43 +03:00
xfs_rtblock_t start_rtb ;
xfs_rtblock_t end_rtb ;
2021-08-11 03:00:31 +03:00
uint64_t eofs ;
2023-06-30 03:39:44 +03:00
int error ;
2017-03-29 00:56:38 +03:00
2023-10-16 19:32:54 +03:00
eofs = XFS_FSB_TO_BB ( mp , xfs_rtx_to_rtb ( mp , mp - > m_sb . sb_rextents ) ) ;
2017-03-29 00:56:38 +03:00
if ( keys [ 0 ] . fmr_physical > = eofs )
return 0 ;
2023-06-30 03:39:43 +03:00
start_rtb = XFS_BB_TO_FSBT ( mp ,
keys [ 0 ] . fmr_physical + keys [ 0 ] . fmr_length ) ;
end_rtb = XFS_BB_TO_FSB ( mp , min ( eofs - 1 , keys [ 1 ] . fmr_physical ) ) ;
2017-03-29 00:56:38 +03:00
2023-06-30 03:39:44 +03:00
info - > missing_owner = XFS_FMR_OWN_UNKNOWN ;
2017-03-29 00:56:38 +03:00
2023-06-30 03:39:43 +03:00
/* Adjust the low key if we are continuing from where we left off. */
if ( keys [ 0 ] . fmr_length > 0 ) {
info - > low_daddr = XFS_FSB_TO_BB ( mp , start_rtb ) ;
if ( info - > low_daddr > = eofs )
return 0 ;
}
2023-06-30 03:39:44 +03:00
trace_xfs_fsmap_low_key_linear ( mp , info - > dev , start_rtb ) ;
trace_xfs_fsmap_high_key_linear ( mp , info - > dev , end_rtb ) ;
2017-03-29 00:56:38 +03:00
2022-11-07 04:03:18 +03:00
xfs_ilock ( mp - > m_rbmip , XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP ) ;
2017-03-29 00:56:38 +03:00
2021-08-11 03:00:31 +03:00
/*
* Set up query parameters to return free rtextents covering the range
* we want .
*/
2023-10-16 19:37:47 +03:00
alow . ar_startext = xfs_rtb_to_rtx ( mp , start_rtb ) ;
ahigh . ar_startext = xfs_rtb_to_rtxup ( mp , end_rtb ) ;
2022-04-11 23:49:41 +03:00
error = xfs_rtalloc_query_range ( mp , tp , & alow , & ahigh ,
2017-03-29 00:56:38 +03:00
xfs_getfsmap_rtdev_rtbitmap_helper , info ) ;
if ( error )
goto err ;
2021-08-11 03:00:31 +03:00
/*
* Report any gaps at the end of the rtbitmap by simulating a null
* rmap starting at the block after the end of the query range .
*/
2017-03-29 00:56:38 +03:00
info - > last = true ;
2021-08-11 03:00:31 +03:00
ahigh . ar_startext = min ( mp - > m_sb . sb_rextents , ahigh . ar_startext ) ;
2022-04-11 23:49:41 +03:00
error = xfs_getfsmap_rtdev_rtbitmap_helper ( mp , tp , & ahigh , info ) ;
2017-03-29 00:56:38 +03:00
if ( error )
goto err ;
err :
2022-11-07 04:03:18 +03:00
xfs_iunlock ( mp - > m_rbmip , XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP ) ;
2017-03-29 00:56:38 +03:00
return error ;
}
2017-10-09 21:37:22 +03:00
# endif /* CONFIG_XFS_RT */
2017-03-29 00:56:38 +03:00
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 18:39:02 +03:00
static inline bool
rmap_not_shareable ( struct xfs_mount * mp , const struct xfs_rmap_irec * r )
{
if ( ! xfs_has_reflink ( mp ) )
return true ;
if ( XFS_RMAP_NON_INODE_OWNER ( r - > rm_owner ) )
return true ;
if ( r - > rm_flags & ( XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
XFS_RMAP_UNWRITTEN ) )
return true ;
return false ;
}
2017-03-29 00:56:37 +03:00
/* Execute a getfsmap query against the regular data device. */
STATIC int
__xfs_getfsmap_datadev (
struct xfs_trans * tp ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * keys ,
2017-03-29 00:56:37 +03:00
struct xfs_getfsmap_info * info ,
int ( * query_fn ) ( struct xfs_trans * ,
struct xfs_getfsmap_info * ,
struct xfs_btree_cur * * ,
void * ) ,
void * priv )
{
struct xfs_mount * mp = tp - > t_mountp ;
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag ;
2017-03-29 00:56:37 +03:00
struct xfs_btree_cur * bt_cur = NULL ;
xfs_fsblock_t start_fsb ;
xfs_fsblock_t end_fsb ;
xfs_agnumber_t start_ag ;
xfs_agnumber_t end_ag ;
2021-08-11 03:00:31 +03:00
uint64_t eofs ;
2017-03-29 00:56:37 +03:00
int error = 0 ;
eofs = XFS_FSB_TO_BB ( mp , mp - > m_sb . sb_dblocks ) ;
if ( keys [ 0 ] . fmr_physical > = eofs )
return 0 ;
start_fsb = XFS_DADDR_TO_FSB ( mp , keys [ 0 ] . fmr_physical ) ;
2021-08-11 03:00:31 +03:00
end_fsb = XFS_DADDR_TO_FSB ( mp , min ( eofs - 1 , keys [ 1 ] . fmr_physical ) ) ;
2017-03-29 00:56:37 +03:00
/*
* Convert the fsmap low / high keys to AG based keys . Initialize
* low to the fsmap low key and max out the high key to the end
* of the AG .
*/
info - > low . rm_offset = XFS_BB_TO_FSBT ( mp , keys [ 0 ] . fmr_offset ) ;
error = xfs_fsmap_owner_to_rmap ( & info - > low , & keys [ 0 ] ) ;
if ( error )
return error ;
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
info - > low . rm_blockcount = XFS_BB_TO_FSBT ( mp , keys [ 0 ] . fmr_length ) ;
2017-03-29 00:56:37 +03:00
xfs_getfsmap_set_irec_flags ( & info - > low , & keys [ 0 ] ) ;
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
/* Adjust the low key if we are continuing from where we left off. */
if ( info - > low . rm_blockcount = = 0 ) {
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 18:39:02 +03:00
/* No previous record from which to continue */
} else if ( rmap_not_shareable ( mp , & info - > low ) ) {
/* Last record seen was an unshareable extent */
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
info - > low . rm_owner = 0 ;
info - > low . rm_offset = 0 ;
start_fsb + = info - > low . rm_blockcount ;
if ( XFS_FSB_TO_DADDR ( mp , start_fsb ) > = eofs )
return 0 ;
} else {
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 18:39:02 +03:00
/* Last record seen was a shareable file data extent */
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
info - > low . rm_offset + = info - > low . rm_blockcount ;
}
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 18:39:02 +03:00
info - > low . rm_startblock = XFS_FSB_TO_AGBNO ( mp , start_fsb ) ;
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
2017-03-29 00:56:37 +03:00
info - > high . rm_startblock = - 1U ;
info - > high . rm_owner = ULLONG_MAX ;
info - > high . rm_offset = ULLONG_MAX ;
info - > high . rm_blockcount = 0 ;
info - > high . rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS ;
start_ag = XFS_FSB_TO_AGNO ( mp , start_fsb ) ;
end_ag = XFS_FSB_TO_AGNO ( mp , end_fsb ) ;
2021-06-02 03:48:24 +03:00
for_each_perag_range ( mp , start_ag , end_ag , pag ) {
2017-03-29 00:56:37 +03:00
/*
* Set the AG high key from the fsmap high key if this
* is the last AG that we ' re querying .
*/
2021-06-02 03:48:24 +03:00
info - > pag = pag ;
if ( pag - > pag_agno = = end_ag ) {
2017-03-29 00:56:37 +03:00
info - > high . rm_startblock = XFS_FSB_TO_AGBNO ( mp ,
end_fsb ) ;
info - > high . rm_offset = XFS_BB_TO_FSBT ( mp ,
keys [ 1 ] . fmr_offset ) ;
error = xfs_fsmap_owner_to_rmap ( & info - > high , & keys [ 1 ] ) ;
if ( error )
2021-06-02 03:48:24 +03:00
break ;
2017-03-29 00:56:37 +03:00
xfs_getfsmap_set_irec_flags ( & info - > high , & keys [ 1 ] ) ;
}
if ( bt_cur ) {
xfs_btree_del_cursor ( bt_cur , XFS_BTREE_NOERROR ) ;
bt_cur = NULL ;
xfs_trans_brelse ( tp , info - > agf_bp ) ;
info - > agf_bp = NULL ;
}
2022-07-07 12:07:40 +03:00
error = xfs_alloc_read_agf ( pag , tp , 0 , & info - > agf_bp ) ;
2017-03-29 00:56:37 +03:00
if ( error )
2021-06-02 03:48:24 +03:00
break ;
2017-03-29 00:56:37 +03:00
2021-06-02 03:48:24 +03:00
trace_xfs_fsmap_low_key ( mp , info - > dev , pag - > pag_agno ,
& info - > low ) ;
trace_xfs_fsmap_high_key ( mp , info - > dev , pag - > pag_agno ,
2017-03-29 00:56:37 +03:00
& info - > high ) ;
error = query_fn ( tp , info , & bt_cur , priv ) ;
if ( error )
2021-06-02 03:48:24 +03:00
break ;
2017-03-29 00:56:37 +03:00
/*
* Set the AG low key to the start of the AG prior to
* moving on to the next AG .
*/
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
if ( pag - > pag_agno = = start_ag )
memset ( & info - > low , 0 , sizeof ( info - > low ) ) ;
2017-03-29 00:56:37 +03:00
2021-06-02 03:48:24 +03:00
/*
* If this is the last AG , report any gap at the end of it
* before we drop the reference to the perag when the loop
* terminates .
*/
if ( pag - > pag_agno = = end_ag ) {
info - > last = true ;
error = query_fn ( tp , info , & bt_cur , priv ) ;
if ( error )
break ;
}
info - > pag = NULL ;
}
2017-03-29 00:56:37 +03:00
if ( bt_cur )
xfs_btree_del_cursor ( bt_cur , error < 0 ? XFS_BTREE_ERROR :
XFS_BTREE_NOERROR ) ;
if ( info - > agf_bp ) {
xfs_trans_brelse ( tp , info - > agf_bp ) ;
info - > agf_bp = NULL ;
}
2021-06-02 03:48:24 +03:00
if ( info - > pag ) {
2023-02-13 01:14:42 +03:00
xfs_perag_rele ( info - > pag ) ;
2021-06-02 03:48:24 +03:00
info - > pag = NULL ;
} else if ( pag ) {
/* loop termination case */
2023-02-13 01:14:42 +03:00
xfs_perag_rele ( pag ) ;
2021-06-02 03:48:24 +03:00
}
2017-03-29 00:56:37 +03:00
return error ;
}
/* Actually query the rmap btree. */
STATIC int
xfs_getfsmap_datadev_rmapbt_query (
struct xfs_trans * tp ,
struct xfs_getfsmap_info * info ,
struct xfs_btree_cur * * curpp ,
void * priv )
{
/* Report any gap at the end of the last AG. */
if ( info - > last )
return xfs_getfsmap_datadev_helper ( * curpp , & info - > high , info ) ;
/* Allocate cursor for this AG and query_range it. */
* curpp = xfs_rmapbt_init_cursor ( tp - > t_mountp , tp , info - > agf_bp ,
2021-06-02 03:48:24 +03:00
info - > pag ) ;
2017-03-29 00:56:37 +03:00
return xfs_rmap_query_range ( * curpp , & info - > low , & info - > high ,
xfs_getfsmap_datadev_helper , info ) ;
}
/* Execute a getfsmap query against the regular data device rmapbt. */
STATIC int
xfs_getfsmap_datadev_rmapbt (
struct xfs_trans * tp ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * keys ,
2017-03-29 00:56:37 +03:00
struct xfs_getfsmap_info * info )
{
info - > missing_owner = XFS_FMR_OWN_FREE ;
return __xfs_getfsmap_datadev ( tp , keys , info ,
xfs_getfsmap_datadev_rmapbt_query , NULL ) ;
}
2017-03-29 00:56:37 +03:00
/* Actually query the bno btree. */
STATIC int
xfs_getfsmap_datadev_bnobt_query (
struct xfs_trans * tp ,
struct xfs_getfsmap_info * info ,
struct xfs_btree_cur * * curpp ,
void * priv )
{
struct xfs_alloc_rec_incore * key = priv ;
/* Report any gap at the end of the last AG. */
if ( info - > last )
return xfs_getfsmap_datadev_bnobt_helper ( * curpp , & key [ 1 ] , info ) ;
/* Allocate cursor for this AG and query_range it. */
2024-02-22 23:40:12 +03:00
* curpp = xfs_bnobt_init_cursor ( tp - > t_mountp , tp , info - > agf_bp ,
info - > pag ) ;
2017-03-29 00:56:37 +03:00
key - > ar_startblock = info - > low . rm_startblock ;
key [ 1 ] . ar_startblock = info - > high . rm_startblock ;
return xfs_alloc_query_range ( * curpp , key , & key [ 1 ] ,
xfs_getfsmap_datadev_bnobt_helper , info ) ;
}
/* Execute a getfsmap query against the regular data device's bnobt. */
STATIC int
xfs_getfsmap_datadev_bnobt (
struct xfs_trans * tp ,
2021-08-11 03:00:31 +03:00
const struct xfs_fsmap * keys ,
2017-03-29 00:56:37 +03:00
struct xfs_getfsmap_info * info )
{
struct xfs_alloc_rec_incore akeys [ 2 ] ;
2023-02-15 04:51:35 +03:00
memset ( akeys , 0 , sizeof ( akeys ) ) ;
2017-03-29 00:56:37 +03:00
info - > missing_owner = XFS_FMR_OWN_UNKNOWN ;
return __xfs_getfsmap_datadev ( tp , keys , info ,
xfs_getfsmap_datadev_bnobt_query , & akeys [ 0 ] ) ;
}
2017-03-29 00:56:37 +03:00
/* Do we recognize the device? */
STATIC bool
xfs_getfsmap_is_valid_device (
struct xfs_mount * mp ,
struct xfs_fsmap * fm )
{
if ( fm - > fmr_device = = 0 | | fm - > fmr_device = = UINT_MAX | |
fm - > fmr_device = = new_encode_dev ( mp - > m_ddev_targp - > bt_dev ) )
return true ;
if ( mp - > m_logdev_targp & &
fm - > fmr_device = = new_encode_dev ( mp - > m_logdev_targp - > bt_dev ) )
return true ;
2017-03-29 00:56:38 +03:00
if ( mp - > m_rtdev_targp & &
fm - > fmr_device = = new_encode_dev ( mp - > m_rtdev_targp - > bt_dev ) )
return true ;
2017-03-29 00:56:37 +03:00
return false ;
}
/* Ensure that the low key is less than the high key. */
STATIC bool
xfs_getfsmap_check_keys (
struct xfs_fsmap * low_key ,
struct xfs_fsmap * high_key )
{
2023-06-30 03:39:45 +03:00
if ( low_key - > fmr_flags & ( FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP ) ) {
if ( low_key - > fmr_offset )
return false ;
}
if ( high_key - > fmr_flags ! = - 1U & &
( high_key - > fmr_flags & ( FMR_OF_SPECIAL_OWNER |
FMR_OF_EXTENT_MAP ) ) ) {
if ( high_key - > fmr_offset & & high_key - > fmr_offset ! = - 1ULL )
return false ;
}
if ( high_key - > fmr_length & & high_key - > fmr_length ! = - 1ULL )
return false ;
2017-03-29 00:56:37 +03:00
if ( low_key - > fmr_device > high_key - > fmr_device )
return false ;
if ( low_key - > fmr_device < high_key - > fmr_device )
return true ;
if ( low_key - > fmr_physical > high_key - > fmr_physical )
return false ;
if ( low_key - > fmr_physical < high_key - > fmr_physical )
return true ;
if ( low_key - > fmr_owner > high_key - > fmr_owner )
return false ;
if ( low_key - > fmr_owner < high_key - > fmr_owner )
return true ;
if ( low_key - > fmr_offset > high_key - > fmr_offset )
return false ;
if ( low_key - > fmr_offset < high_key - > fmr_offset )
return true ;
return false ;
}
2017-10-09 21:37:22 +03:00
/*
* There are only two devices if we didn ' t configure RT devices at build time .
*/
# ifdef CONFIG_XFS_RT
2017-03-29 00:56:38 +03:00
# define XFS_GETFSMAP_DEVS 3
2017-10-09 21:37:22 +03:00
# else
# define XFS_GETFSMAP_DEVS 2
# endif /* CONFIG_XFS_RT */
2017-03-29 00:56:37 +03:00
/*
2020-10-01 20:56:07 +03:00
* Get filesystem ' s extents as described in head , and format for output . Fills
* in the supplied records array until there are no more reverse mappings to
* return or head . fmh_entries = = head . fmh_count . In the second case , this
* function returns - ECANCELED to indicate that more records would have been
* returned .
2017-03-29 00:56:37 +03:00
*
* Key to Confusion
* - - - - - - - - - - - - - - - -
* There are multiple levels of keys and counters at work here :
* xfs_fsmap_head . fmh_keys - - low and high fsmap keys passed in ;
2023-06-30 03:39:45 +03:00
* these reflect fs - wide sector addrs .
2017-03-29 00:56:37 +03:00
* dkeys - - fmh_keys used to query each device ;
2023-06-30 03:39:45 +03:00
* these are fmh_keys but w / the low key
* bumped up by fmr_length .
2017-03-29 00:56:37 +03:00
* xfs_getfsmap_info . next_daddr - - next disk addr we expect to see ; this
* is how we detect gaps in the fsmap
records and report them .
* xfs_getfsmap_info . low / high - - per - AG low / high keys computed from
2023-06-30 03:39:45 +03:00
* dkeys ; used to query the metadata .
2017-03-29 00:56:37 +03:00
*/
int
xfs_getfsmap (
struct xfs_mount * mp ,
struct xfs_fsmap_head * head ,
2020-10-01 20:56:07 +03:00
struct fsmap * fsmap_recs )
2017-03-29 00:56:37 +03:00
{
struct xfs_trans * tp = NULL ;
struct xfs_fsmap dkeys [ 2 ] ; /* per-dev keys */
struct xfs_getfsmap_dev handlers [ XFS_GETFSMAP_DEVS ] ;
2017-04-21 21:24:39 +03:00
struct xfs_getfsmap_info info = { NULL } ;
2017-05-12 20:44:10 +03:00
bool use_rmap ;
2017-03-29 00:56:37 +03:00
int i ;
int error = 0 ;
if ( head - > fmh_iflags & ~ FMH_IF_VALID )
return - EINVAL ;
if ( ! xfs_getfsmap_is_valid_device ( mp , & head - > fmh_keys [ 0 ] ) | |
! xfs_getfsmap_is_valid_device ( mp , & head - > fmh_keys [ 1 ] ) )
return - EINVAL ;
2023-06-30 03:39:45 +03:00
if ( ! xfs_getfsmap_check_keys ( & head - > fmh_keys [ 0 ] , & head - > fmh_keys [ 1 ] ) )
return - EINVAL ;
2017-03-29 00:56:37 +03:00
2022-02-26 03:18:30 +03:00
use_rmap = xfs_has_rmapbt ( mp ) & &
has_capability_noaudit ( current , CAP_SYS_ADMIN ) ;
2017-03-29 00:56:37 +03:00
head - > fmh_entries = 0 ;
/* Set up our device handlers. */
memset ( handlers , 0 , sizeof ( handlers ) ) ;
handlers [ 0 ] . dev = new_encode_dev ( mp - > m_ddev_targp - > bt_dev ) ;
2017-05-12 20:44:10 +03:00
if ( use_rmap )
2017-03-29 00:56:37 +03:00
handlers [ 0 ] . fn = xfs_getfsmap_datadev_rmapbt ;
else
handlers [ 0 ] . fn = xfs_getfsmap_datadev_bnobt ;
2017-03-29 00:56:37 +03:00
if ( mp - > m_logdev_targp ! = mp - > m_ddev_targp ) {
handlers [ 1 ] . dev = new_encode_dev ( mp - > m_logdev_targp - > bt_dev ) ;
handlers [ 1 ] . fn = xfs_getfsmap_logdev ;
}
2017-10-09 21:37:22 +03:00
# ifdef CONFIG_XFS_RT
2017-03-29 00:56:38 +03:00
if ( mp - > m_rtdev_targp ) {
handlers [ 2 ] . dev = new_encode_dev ( mp - > m_rtdev_targp - > bt_dev ) ;
handlers [ 2 ] . fn = xfs_getfsmap_rtdev_rtbitmap ;
}
2017-10-09 21:37:22 +03:00
# endif /* CONFIG_XFS_RT */
2017-03-29 00:56:37 +03:00
xfs_sort ( handlers , XFS_GETFSMAP_DEVS , sizeof ( struct xfs_getfsmap_dev ) ,
xfs_getfsmap_dev_compare ) ;
/*
* To continue where we left off , we allow userspace to use the
* last mapping from a previous call as the low key of the next .
* This is identified by a non - zero length in the low key . We
* have to increment the low key in this scenario to ensure we
* don ' t return the same mapping again , and instead return the
* very next mapping .
*
* If the low key mapping refers to file data , the same physical
* blocks could be mapped to several other files / offsets .
* According to rmapbt record ordering , the minimal next
* possible record for the block range is the next starting
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
* offset in the same inode . Therefore , each fsmap backend bumps
* the file offset to continue the search appropriately . For
* all other low key mapping types ( attr blocks , metadata ) , each
* fsmap backend bumps the physical offset as there can be no
* other mapping for the same physical block range .
2017-03-29 00:56:37 +03:00
*/
dkeys [ 0 ] = head - > fmh_keys [ 0 ] ;
memset ( & dkeys [ 1 ] , 0xFF , sizeof ( struct xfs_fsmap ) ) ;
info . next_daddr = head - > fmh_keys [ 0 ] . fmr_physical +
head - > fmh_keys [ 0 ] . fmr_length ;
2020-10-01 20:56:07 +03:00
info . fsmap_recs = fsmap_recs ;
2017-03-29 00:56:37 +03:00
info . head = head ;
/* For each device we support... */
for ( i = 0 ; i < XFS_GETFSMAP_DEVS ; i + + ) {
/* Is this device within the range the user asked for? */
if ( ! handlers [ i ] . fn )
continue ;
if ( head - > fmh_keys [ 0 ] . fmr_device > handlers [ i ] . dev )
continue ;
if ( head - > fmh_keys [ 1 ] . fmr_device < handlers [ i ] . dev )
break ;
/*
* If this device number matches the high key , we have
* to pass the high key to the handler to limit the
* query results . If the device number exceeds the
* low key , zero out the low key so that we get
* everything from the beginning .
*/
if ( handlers [ i ] . dev = = head - > fmh_keys [ 1 ] . fmr_device )
dkeys [ 1 ] = head - > fmh_keys [ 1 ] ;
if ( handlers [ i ] . dev > head - > fmh_keys [ 0 ] . fmr_device )
memset ( & dkeys [ 0 ] , 0 , sizeof ( struct xfs_fsmap ) ) ;
2021-03-22 19:51:50 +03:00
/*
* Grab an empty transaction so that we can use its recursive
* buffer locking abilities to detect cycles in the rmapbt
* without deadlocking .
*/
2017-03-29 00:56:37 +03:00
error = xfs_trans_alloc_empty ( mp , & tp ) ;
if ( error )
break ;
info . dev = handlers [ i ] . dev ;
info . last = false ;
2021-06-02 03:48:24 +03:00
info . pag = NULL ;
2023-06-30 03:39:43 +03:00
info . low_daddr = - 1ULL ;
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 03:39:43 +03:00
info . low . rm_blockcount = 0 ;
2017-03-29 00:56:37 +03:00
error = handlers [ i ] . fn ( tp , dkeys , & info ) ;
if ( error )
break ;
xfs_trans_cancel ( tp ) ;
tp = NULL ;
info . next_daddr = 0 ;
}
if ( tp )
xfs_trans_cancel ( tp ) ;
head - > fmh_oflags = FMH_OF_DEV_T ;
return error ;
}