From eba1469f8f6a4c98d9cfdba2477fac82a2347c6c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 20 Jun 2024 19:51:32 +0100 Subject: [PATCH] btrfs: avoid allocating and running pointless delayed extent operations We always allocate a delayed extent op structure when allocating a tree block (except for log trees), but most of the time we don't need it as we only need to set the BTRFS_BLOCK_FLAG_FULL_BACKREF if we're dealing with a relocation tree and we only need to set the key of a tree block in a btrfs_tree_block_info structure if we are not using skinny metadata (feature enabled by default since btrfs-progs 3.18 and available as of kernel 3.10). In these cases, where we don't need neither to update flags nor to set the key, we only use the delayed extent op structure to set the tree block's level. This is a waste of memory and besides that, the memory allocation can fail and can add additional latency. Instead of using a delayed extent op structure to store the level of the tree block, use the delayed ref head to store it. This doesn't change the size of neither structure and helps us avoid allocating delayed extent ops structures when using the skinny metadata feature and there's no relocation going on. This also gets rid of a BUG_ON(). For example, for a fs_mark run, with 5 iterations, 8 threads and 100K files per iteration, before this patch there were 118109 allocations of delayed extent op structures and after it there were none. Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 9 +++++- fs/btrfs/delayed-ref.h | 6 ++-- fs/btrfs/extent-tree.c | 63 ++++++++++++++++++------------------------ 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6b4296ab651f..2ac9296edccb 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -819,6 +819,12 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, spin_lock_init(&head_ref->lock); mutex_init(&head_ref->mutex); + /* If not metadata set an impossible level to help debugging. */ + if (generic_ref->type == BTRFS_REF_METADATA) + head_ref->level = generic_ref->tree_ref.level; + else + head_ref->level = U8_MAX; + if (qrecord) { if (generic_ref->ref_root && reserved) { qrecord->data_rsv = reserved; @@ -1072,7 +1078,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, } int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, + u64 bytenr, u64 num_bytes, u8 level, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_ref_head *head_ref; @@ -1082,6 +1088,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, .action = BTRFS_UPDATE_DELAYED_HEAD, .bytenr = bytenr, .num_bytes = num_bytes, + .tree_ref.level = level, }; head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 405be46c420f..ef15e998be03 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -108,7 +108,6 @@ struct btrfs_delayed_ref_node { struct btrfs_delayed_extent_op { struct btrfs_disk_key key; - u8 level; bool update_key; bool update_flags; u64 flags_to_set; @@ -172,6 +171,9 @@ struct btrfs_delayed_ref_head { */ u64 reserved_bytes; + /* Tree block level, for metadata only. */ + u8 level; + /* * when a new extent is allocated, it is just reserved in memory * The actual extent isn't inserted into the extent allocation tree @@ -355,7 +357,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, u64 reserved); int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, + u64 bytenr, u64 num_bytes, u8 level, struct btrfs_delayed_extent_op *extent_op); void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 23a7cac108eb..d77498e7671c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -200,19 +200,8 @@ search_again: goto search_again; } spin_lock(&head->lock); - if (head->extent_op && head->extent_op->update_flags) { + if (head->extent_op && head->extent_op->update_flags) extent_flags |= head->extent_op->flags_to_set; - } else if (unlikely(num_refs == 0)) { - spin_unlock(&head->lock); - mutex_unlock(&head->mutex); - spin_unlock(&delayed_refs->lock); - ret = -EUCLEAN; - btrfs_err(fs_info, - "unexpected zero reference count for extent %llu (%s)", - bytenr, metadata ? "metadata" : "data"); - btrfs_abort_transaction(trans, ret); - goto out_free; - } num_refs += head->ref_mod; spin_unlock(&head->lock); @@ -1632,7 +1621,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, if (metadata) { key.type = BTRFS_METADATA_ITEM_KEY; - key.offset = extent_op->level; + key.offset = head->level; } else { key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = head->num_bytes; @@ -1667,7 +1656,7 @@ again: ret = -EUCLEAN; btrfs_err(fs_info, "missing extent item for extent %llu num_bytes %llu level %d", - head->bytenr, head->num_bytes, extent_op->level); + head->bytenr, head->num_bytes, head->level); goto out; } } @@ -1726,7 +1715,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, .generation = trans->transid, }; - BUG_ON(!extent_op || !extent_op->update_flags); ret = alloc_reserved_tree_block(trans, node, extent_op); if (!ret) btrfs_record_squota_delta(fs_info, &delta); @@ -2233,7 +2221,6 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, struct extent_buffer *eb, u64 flags) { struct btrfs_delayed_extent_op *extent_op; - int level = btrfs_header_level(eb); int ret; extent_op = btrfs_alloc_delayed_extent_op(); @@ -2243,9 +2230,9 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, extent_op->flags_to_set = flags; extent_op->update_flags = true; extent_op->update_key = false; - extent_op->level = level; - ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op); + ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, + btrfs_header_level(eb), extent_op); if (ret) btrfs_free_delayed_extent_op(extent_op); return ret; @@ -4866,7 +4853,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct extent_buffer *leaf; u32 size = sizeof(*extent_item) + sizeof(*iref); - u64 flags = extent_op->flags_to_set; + const u64 flags = (extent_op ? extent_op->flags_to_set : 0); /* The owner of a tree block is the level. */ int level = btrfs_delayed_ref_owner(node); bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); @@ -5123,7 +5110,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, struct btrfs_key ins; struct btrfs_block_rsv *block_rsv; struct extent_buffer *buf; - struct btrfs_delayed_extent_op *extent_op; u64 flags = 0; int ret; u32 blocksize = fs_info->nodesize; @@ -5166,6 +5152,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, BUG_ON(parent > 0); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { + struct btrfs_delayed_extent_op *extent_op; struct btrfs_ref generic_ref = { .action = BTRFS_ADD_DELAYED_EXTENT, .bytenr = ins.objectid, @@ -5174,30 +5161,34 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, .owning_root = owning_root, .ref_root = root_objectid, }; - extent_op = btrfs_alloc_delayed_extent_op(); - if (!extent_op) { - ret = -ENOMEM; - goto out_free_buf; + + if (!skinny_metadata || flags != 0) { + extent_op = btrfs_alloc_delayed_extent_op(); + if (!extent_op) { + ret = -ENOMEM; + goto out_free_buf; + } + if (key) + memcpy(&extent_op->key, key, sizeof(extent_op->key)); + else + memset(&extent_op->key, 0, sizeof(extent_op->key)); + extent_op->flags_to_set = flags; + extent_op->update_key = (skinny_metadata ? false : true); + extent_op->update_flags = (flags != 0); + } else { + extent_op = NULL; } - if (key) - memcpy(&extent_op->key, key, sizeof(extent_op->key)); - else - memset(&extent_op->key, 0, sizeof(extent_op->key)); - extent_op->flags_to_set = flags; - extent_op->update_key = skinny_metadata ? false : true; - extent_op->update_flags = true; - extent_op->level = level; btrfs_init_tree_ref(&generic_ref, level, btrfs_root_id(root), false); btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op); - if (ret) - goto out_free_delayed; + if (ret) { + btrfs_free_delayed_extent_op(extent_op); + goto out_free_buf; + } } return buf; -out_free_delayed: - btrfs_free_delayed_extent_op(extent_op); out_free_buf: btrfs_tree_unlock(buf); free_extent_buffer(buf);