xfs: refactor btree owner change into a separate visit-blocks function
Refactor the btree_change_owner function into a more generic apparatus which visits all blocks in a btree. We'll use this in a subsequent patch for counting btree blocks for AG reservations. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
committed by
Dave Chinner
parent
105f7d83db
commit
28a89567b8
@@ -4328,69 +4328,27 @@ xfs_btree_get_rec(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the owner of a btree.
|
||||
*
|
||||
* The mechanism we use here is ordered buffer logging. Because we don't know
|
||||
* how many buffers were are going to need to modify, we don't really want to
|
||||
* have to make transaction reservations for the worst case of every buffer in a
|
||||
* full size btree as that may be more space that we can fit in the log....
|
||||
*
|
||||
* We do the btree walk in the most optimal manner possible - we have sibling
|
||||
* pointers so we can just walk all the blocks on each level from left to right
|
||||
* in a single pass, and then move to the next level and do the same. We can
|
||||
* also do readahead on the sibling pointers to get IO moving more quickly,
|
||||
* though for slow disks this is unlikely to make much difference to performance
|
||||
* as the amount of CPU work we have to do before moving to the next block is
|
||||
* relatively small.
|
||||
*
|
||||
* For each btree block that we load, modify the owner appropriately, set the
|
||||
* buffer as an ordered buffer and log it appropriately. We need to ensure that
|
||||
* we mark the region we change dirty so that if the buffer is relogged in
|
||||
* a subsequent transaction the changes we make here as an ordered buffer are
|
||||
* correctly relogged in that transaction. If we are in recovery context, then
|
||||
* just queue the modified buffer as delayed write buffer so the transaction
|
||||
* recovery completion writes the changes to disk.
|
||||
*/
|
||||
static int
|
||||
xfs_btree_block_change_owner(
|
||||
/* Visit a block in a btree. */
|
||||
STATIC int
|
||||
xfs_btree_visit_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
int level,
|
||||
__uint64_t new_owner,
|
||||
struct list_head *buffer_list)
|
||||
xfs_btree_visit_blocks_fn fn,
|
||||
void *data)
|
||||
{
|
||||
struct xfs_btree_block *block;
|
||||
struct xfs_buf *bp;
|
||||
union xfs_btree_ptr rptr;
|
||||
int error;
|
||||
|
||||
/* do right sibling readahead */
|
||||
xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
|
||||
|
||||
/* modify the owner */
|
||||
block = xfs_btree_get_block(cur, level, &bp);
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
|
||||
block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
|
||||
else
|
||||
block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
|
||||
|
||||
/*
|
||||
* If the block is a root block hosted in an inode, we might not have a
|
||||
* buffer pointer here and we shouldn't attempt to log the change as the
|
||||
* information is already held in the inode and discarded when the root
|
||||
* block is formatted into the on-disk inode fork. We still change it,
|
||||
* though, so everything is consistent in memory.
|
||||
*/
|
||||
if (bp) {
|
||||
if (cur->bc_tp) {
|
||||
xfs_trans_ordered_buf(cur->bc_tp, bp);
|
||||
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
|
||||
} else {
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
}
|
||||
} else {
|
||||
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
|
||||
ASSERT(level == cur->bc_nlevels - 1);
|
||||
}
|
||||
/* process the block */
|
||||
error = fn(cur, level, data);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* now read rh sibling block for next iteration */
|
||||
xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
|
||||
@@ -4400,11 +4358,13 @@ xfs_btree_block_change_owner(
|
||||
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
|
||||
}
|
||||
|
||||
|
||||
/* Visit every block in a btree. */
|
||||
int
|
||||
xfs_btree_change_owner(
|
||||
xfs_btree_visit_blocks(
|
||||
struct xfs_btree_cur *cur,
|
||||
__uint64_t new_owner,
|
||||
struct list_head *buffer_list)
|
||||
xfs_btree_visit_blocks_fn fn,
|
||||
void *data)
|
||||
{
|
||||
union xfs_btree_ptr lptr;
|
||||
int level;
|
||||
@@ -4433,9 +4393,7 @@ xfs_btree_change_owner(
|
||||
|
||||
/* for each buffer in the level */
|
||||
do {
|
||||
error = xfs_btree_block_change_owner(cur, level,
|
||||
new_owner,
|
||||
buffer_list);
|
||||
error = xfs_btree_visit_block(cur, level, fn, data);
|
||||
} while (!error);
|
||||
|
||||
if (error != -ENOENT)
|
||||
@@ -4445,6 +4403,89 @@ xfs_btree_change_owner(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the owner of a btree.
|
||||
*
|
||||
* The mechanism we use here is ordered buffer logging. Because we don't know
|
||||
* how many buffers were are going to need to modify, we don't really want to
|
||||
* have to make transaction reservations for the worst case of every buffer in a
|
||||
* full size btree as that may be more space that we can fit in the log....
|
||||
*
|
||||
* We do the btree walk in the most optimal manner possible - we have sibling
|
||||
* pointers so we can just walk all the blocks on each level from left to right
|
||||
* in a single pass, and then move to the next level and do the same. We can
|
||||
* also do readahead on the sibling pointers to get IO moving more quickly,
|
||||
* though for slow disks this is unlikely to make much difference to performance
|
||||
* as the amount of CPU work we have to do before moving to the next block is
|
||||
* relatively small.
|
||||
*
|
||||
* For each btree block that we load, modify the owner appropriately, set the
|
||||
* buffer as an ordered buffer and log it appropriately. We need to ensure that
|
||||
* we mark the region we change dirty so that if the buffer is relogged in
|
||||
* a subsequent transaction the changes we make here as an ordered buffer are
|
||||
* correctly relogged in that transaction. If we are in recovery context, then
|
||||
* just queue the modified buffer as delayed write buffer so the transaction
|
||||
* recovery completion writes the changes to disk.
|
||||
*/
|
||||
struct xfs_btree_block_change_owner_info {
|
||||
__uint64_t new_owner;
|
||||
struct list_head *buffer_list;
|
||||
};
|
||||
|
||||
static int
|
||||
xfs_btree_block_change_owner(
|
||||
struct xfs_btree_cur *cur,
|
||||
int level,
|
||||
void *data)
|
||||
{
|
||||
struct xfs_btree_block_change_owner_info *bbcoi = data;
|
||||
struct xfs_btree_block *block;
|
||||
struct xfs_buf *bp;
|
||||
|
||||
/* modify the owner */
|
||||
block = xfs_btree_get_block(cur, level, &bp);
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
|
||||
block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
|
||||
else
|
||||
block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
|
||||
|
||||
/*
|
||||
* If the block is a root block hosted in an inode, we might not have a
|
||||
* buffer pointer here and we shouldn't attempt to log the change as the
|
||||
* information is already held in the inode and discarded when the root
|
||||
* block is formatted into the on-disk inode fork. We still change it,
|
||||
* though, so everything is consistent in memory.
|
||||
*/
|
||||
if (bp) {
|
||||
if (cur->bc_tp) {
|
||||
xfs_trans_ordered_buf(cur->bc_tp, bp);
|
||||
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
|
||||
} else {
|
||||
xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
|
||||
}
|
||||
} else {
|
||||
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
|
||||
ASSERT(level == cur->bc_nlevels - 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_btree_change_owner(
|
||||
struct xfs_btree_cur *cur,
|
||||
__uint64_t new_owner,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_btree_block_change_owner_info bbcoi;
|
||||
|
||||
bbcoi.new_owner = new_owner;
|
||||
bbcoi.buffer_list = buffer_list;
|
||||
|
||||
return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
|
||||
&bbcoi);
|
||||
}
|
||||
|
||||
/**
|
||||
* xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
|
||||
* btree block
|
||||
|
@@ -536,4 +536,9 @@ int xfs_btree_query_range(struct xfs_btree_cur *cur,
|
||||
union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
|
||||
xfs_btree_query_range_fn fn, void *priv);
|
||||
|
||||
typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
|
||||
void *data);
|
||||
int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
|
||||
xfs_btree_visit_blocks_fn fn, void *data);
|
||||
|
||||
#endif /* __XFS_BTREE_H__ */
|
||||
|
Reference in New Issue
Block a user