btrfs: skip backref walking during fiemap if we know the leaf is shared
During fiemap, when checking if a data extent is shared we are doing the backref walking even if we already know the leaf is shared, which is a waste of time since if the leaf shared then the data extent is also shared. So skip the backref walking when we know we are in a shared leaf. The following test was measures the gains for a case where all leaves are shared due to a snapshot: $ cat test.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj umount $DEV &> /dev/null mkfs.btrfs -f $DEV # Use compression to quickly create files with a lot of extents # (each with a size of 128K). mount -o compress=lzo $DEV $MNT # 40G gives 327680 extents, each with a size of 128K. xfs_io -f -c "pwrite -S 0xab -b 1M 0 40G" $MNT/foobar # Add some more files to increase the size of the fs and extent # trees (in the real world there's a lot of files and extents # from other files). xfs_io -f -c "pwrite -S 0xcd -b 1M 0 20G" $MNT/file1 xfs_io -f -c "pwrite -S 0xef -b 1M 0 20G" $MNT/file2 xfs_io -f -c "pwrite -S 0x73 -b 1M 0 20G" $MNT/file3 # Create a snapshot so all the extents become indirectly shared # through subtrees, with a generation less than or equals to the # generation used to create the snapshot. btrfs subvolume snapshot -r $MNT $MNT/snap1 # Unmount and mount again to clear cached metadata. umount $MNT mount -o compress=lzo $DEV $MNT start=$(date +%s%N) # The filefrag tool uses the fiemap ioctl. filefrag $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "fiemap took $dur milliseconds (metadata not cached)" echo start=$(date +%s%N) filefrag $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "fiemap took $dur milliseconds (metadata cached)" umount $MNT The results were the following on a non-debug kernel (Debian's default kernel config). Before this patch: (...) /mnt/sdi/foobar: 327680 extents found fiemap took 1821 milliseconds (metadata not cached) /mnt/sdi/foobar: 327680 extents found fiemap took 399 milliseconds (metadata cached) After this patch: (...) /mnt/sdi/foobar: 327680 extents found fiemap took 591 milliseconds (metadata not cached) /mnt/sdi/foobar: 327680 extents found fiemap took 123 milliseconds (metadata cached) That's a speedup of 3.1x and 3.2x. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
4e4488d4ef
commit
e2fd83064a
@ -1872,6 +1872,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
||||
.have_delayed_delete_refs = false,
|
||||
};
|
||||
int level;
|
||||
bool leaf_cached;
|
||||
bool leaf_is_shared;
|
||||
|
||||
for (int i = 0; i < BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE; i++) {
|
||||
if (ctx->prev_extents_cache[i].bytenr == bytenr)
|
||||
@ -1893,6 +1895,23 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
||||
walk_ctx.time_seq = elem.seq;
|
||||
}
|
||||
|
||||
ctx->use_path_cache = true;
|
||||
|
||||
/*
|
||||
* We may have previously determined that the current leaf is shared.
|
||||
* If it is, then we have a data extent that is shared due to a shared
|
||||
* subtree (caused by snapshotting) and we don't need to check for data
|
||||
* backrefs. If the leaf is not shared, then we must do backref walking
|
||||
* to determine if the data extent is shared through reflinks.
|
||||
*/
|
||||
leaf_cached = lookup_backref_shared_cache(ctx, root,
|
||||
ctx->curr_leaf_bytenr, 0,
|
||||
&leaf_is_shared);
|
||||
if (leaf_cached && leaf_is_shared) {
|
||||
ret = 1;
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
walk_ctx.ignore_extent_item_pos = true;
|
||||
walk_ctx.trans = trans;
|
||||
walk_ctx.fs_info = fs_info;
|
||||
@ -1901,7 +1920,6 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
||||
/* -1 means we are in the bytenr of the data extent. */
|
||||
level = -1;
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
ctx->use_path_cache = true;
|
||||
while (1) {
|
||||
bool is_shared;
|
||||
bool cached;
|
||||
@ -1972,6 +1990,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
||||
ctx->prev_extents_cache_slot = slot;
|
||||
}
|
||||
|
||||
out_trans:
|
||||
if (trans) {
|
||||
btrfs_put_tree_mod_seq(fs_info, &elem);
|
||||
btrfs_end_transaction(trans);
|
||||
|
Loading…
Reference in New Issue
Block a user