xfs: refactor btree owner change into a separate visit-blocks function

Refactor the btree_change_owner function into a more generic apparatus
which visits all blocks in a btree.  We'll use this in a subsequent
patch for counting btree blocks for AG reservations.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
Darrick J. Wong
2016-08-03 11:10:55 +10:00
committed by Dave Chinner
parent 105f7d83db
commit 28a89567b8
2 changed files with 96 additions and 50 deletions

View File

@@ -4328,69 +4328,27 @@ xfs_btree_get_rec(
return 0;
}
/*
* Change the owner of a btree.
*
* The mechanism we use here is ordered buffer logging. Because we don't know
* how many buffers were are going to need to modify, we don't really want to
* have to make transaction reservations for the worst case of every buffer in a
* full size btree as that may be more space that we can fit in the log....
*
* We do the btree walk in the most optimal manner possible - we have sibling
* pointers so we can just walk all the blocks on each level from left to right
* in a single pass, and then move to the next level and do the same. We can
* also do readahead on the sibling pointers to get IO moving more quickly,
* though for slow disks this is unlikely to make much difference to performance
* as the amount of CPU work we have to do before moving to the next block is
* relatively small.
*
* For each btree block that we load, modify the owner appropriately, set the
* buffer as an ordered buffer and log it appropriately. We need to ensure that
* we mark the region we change dirty so that if the buffer is relogged in
* a subsequent transaction the changes we make here as an ordered buffer are
* correctly relogged in that transaction. If we are in recovery context, then
* just queue the modified buffer as delayed write buffer so the transaction
* recovery completion writes the changes to disk.
*/
static int
xfs_btree_block_change_owner(
/* Visit a block in a btree. */
STATIC int
xfs_btree_visit_block(
struct xfs_btree_cur *cur,
int level,
__uint64_t new_owner,
struct list_head *buffer_list)
xfs_btree_visit_blocks_fn fn,
void *data)
{
struct xfs_btree_block *block;
struct xfs_buf *bp;
union xfs_btree_ptr rptr;
int error;
/* do right sibling readahead */
xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
/* modify the owner */
block = xfs_btree_get_block(cur, level, &bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
else
block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
/*
* If the block is a root block hosted in an inode, we might not have a
* buffer pointer here and we shouldn't attempt to log the change as the
* information is already held in the inode and discarded when the root
* block is formatted into the on-disk inode fork. We still change it,
* though, so everything is consistent in memory.
*/
if (bp) {
if (cur->bc_tp) {
xfs_trans_ordered_buf(cur->bc_tp, bp);
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
} else {
xfs_buf_delwri_queue(bp, buffer_list);
}
} else {
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
ASSERT(level == cur->bc_nlevels - 1);
}
/* process the block */
error = fn(cur, level, data);
if (error)
return error;
/* now read rh sibling block for next iteration */
xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
@@ -4400,11 +4358,13 @@ xfs_btree_block_change_owner(
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
}
/* Visit every block in a btree. */
int
xfs_btree_change_owner(
xfs_btree_visit_blocks(
struct xfs_btree_cur *cur,
__uint64_t new_owner,
struct list_head *buffer_list)
xfs_btree_visit_blocks_fn fn,
void *data)
{
union xfs_btree_ptr lptr;
int level;
@@ -4433,9 +4393,7 @@ xfs_btree_change_owner(
/* for each buffer in the level */
do {
error = xfs_btree_block_change_owner(cur, level,
new_owner,
buffer_list);
error = xfs_btree_visit_block(cur, level, fn, data);
} while (!error);
if (error != -ENOENT)
@@ -4445,6 +4403,89 @@ xfs_btree_change_owner(
return 0;
}
/*
* Change the owner of a btree.
*
* The mechanism we use here is ordered buffer logging. Because we don't know
* how many buffers were are going to need to modify, we don't really want to
* have to make transaction reservations for the worst case of every buffer in a
* full size btree as that may be more space that we can fit in the log....
*
* We do the btree walk in the most optimal manner possible - we have sibling
* pointers so we can just walk all the blocks on each level from left to right
* in a single pass, and then move to the next level and do the same. We can
* also do readahead on the sibling pointers to get IO moving more quickly,
* though for slow disks this is unlikely to make much difference to performance
* as the amount of CPU work we have to do before moving to the next block is
* relatively small.
*
* For each btree block that we load, modify the owner appropriately, set the
* buffer as an ordered buffer and log it appropriately. We need to ensure that
* we mark the region we change dirty so that if the buffer is relogged in
* a subsequent transaction the changes we make here as an ordered buffer are
* correctly relogged in that transaction. If we are in recovery context, then
* just queue the modified buffer as delayed write buffer so the transaction
* recovery completion writes the changes to disk.
*/
struct xfs_btree_block_change_owner_info {
__uint64_t new_owner;
struct list_head *buffer_list;
};
static int
xfs_btree_block_change_owner(
struct xfs_btree_cur *cur,
int level,
void *data)
{
struct xfs_btree_block_change_owner_info *bbcoi = data;
struct xfs_btree_block *block;
struct xfs_buf *bp;
/* modify the owner */
block = xfs_btree_get_block(cur, level, &bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
else
block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
/*
* If the block is a root block hosted in an inode, we might not have a
* buffer pointer here and we shouldn't attempt to log the change as the
* information is already held in the inode and discarded when the root
* block is formatted into the on-disk inode fork. We still change it,
* though, so everything is consistent in memory.
*/
if (bp) {
if (cur->bc_tp) {
xfs_trans_ordered_buf(cur->bc_tp, bp);
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
} else {
xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
}
} else {
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
ASSERT(level == cur->bc_nlevels - 1);
}
return 0;
}
int
xfs_btree_change_owner(
struct xfs_btree_cur *cur,
__uint64_t new_owner,
struct list_head *buffer_list)
{
struct xfs_btree_block_change_owner_info bbcoi;
bbcoi.new_owner = new_owner;
bbcoi.buffer_list = buffer_list;
return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
&bbcoi);
}
/**
* xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
* btree block

View File

@@ -536,4 +536,9 @@ int xfs_btree_query_range(struct xfs_btree_cur *cur,
union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
xfs_btree_query_range_fn fn, void *priv);
typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
void *data);
int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
xfs_btree_visit_blocks_fn fn, void *data);
#endif /* __XFS_BTREE_H__ */