49d11bead7
I got the following lockdep splat with tree locks converted to rwsem patches on btrfs/104: ====================================================== WARNING: possible circular locking dependency detected 5.9.0+ #102 Not tainted ------------------------------------------------------ btrfs-cleaner/903 is trying to acquire lock: ffff8e7fab6ffe30 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x32/0x170 but task is already holding lock: ffff8e7fab628a88 (&fs_info->commit_root_sem){++++}-{3:3}, at: btrfs_find_all_roots+0x41/0x80 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&fs_info->commit_root_sem){++++}-{3:3}: down_read+0x40/0x130 caching_thread+0x53/0x5a0 btrfs_work_helper+0xfa/0x520 process_one_work+0x238/0x540 worker_thread+0x55/0x3c0 kthread+0x13a/0x150 ret_from_fork+0x1f/0x30 -> #2 (&caching_ctl->mutex){+.+.}-{3:3}: __mutex_lock+0x7e/0x7b0 btrfs_cache_block_group+0x1e0/0x510 find_free_extent+0xb6e/0x12f0 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb1/0x330 alloc_tree_block_no_bg_flush+0x4f/0x60 __btrfs_cow_block+0x11d/0x580 btrfs_cow_block+0x10c/0x220 commit_cowonly_roots+0x47/0x2e0 btrfs_commit_transaction+0x595/0xbd0 sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 deactivate_locked_super+0x36/0xa0 cleanup_mnt+0x12d/0x190 task_work_run+0x5c/0xa0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x54/0x280 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (&space_info->groups_sem){++++}-{3:3}: down_read+0x40/0x130 find_free_extent+0x2ed/0x12f0 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb1/0x330 alloc_tree_block_no_bg_flush+0x4f/0x60 __btrfs_cow_block+0x11d/0x580 btrfs_cow_block+0x10c/0x220 commit_cowonly_roots+0x47/0x2e0 btrfs_commit_transaction+0x595/0xbd0 sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 deactivate_locked_super+0x36/0xa0 cleanup_mnt+0x12d/0x190 task_work_run+0x5c/0xa0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x54/0x280 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (btrfs-root-00){++++}-{3:3}: __lock_acquire+0x1167/0x2150 lock_acquire+0xb9/0x3d0 down_read_nested+0x43/0x130 __btrfs_tree_read_lock+0x32/0x170 __btrfs_read_lock_root_node+0x3a/0x50 btrfs_search_slot+0x614/0x9d0 btrfs_find_root+0x35/0x1b0 btrfs_read_tree_root+0x61/0x120 btrfs_get_root_ref+0x14b/0x600 find_parent_nodes+0x3e6/0x1b30 btrfs_find_all_roots_safe+0xb4/0x130 btrfs_find_all_roots+0x60/0x80 btrfs_qgroup_trace_extent_post+0x27/0x40 btrfs_add_delayed_data_ref+0x3fd/0x460 btrfs_free_extent+0x42/0x100 __btrfs_mod_ref+0x1d7/0x2f0 walk_up_proc+0x11c/0x400 walk_up_tree+0xf0/0x180 btrfs_drop_snapshot+0x1c7/0x780 btrfs_clean_one_deleted_snapshot+0xfb/0x110 cleaner_kthread+0xd4/0x140 kthread+0x13a/0x150 ret_from_fork+0x1f/0x30 other info that might help us debug this: Chain exists of: btrfs-root-00 --> &caching_ctl->mutex --> &fs_info->commit_root_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fs_info->commit_root_sem); lock(&caching_ctl->mutex); lock(&fs_info->commit_root_sem); lock(btrfs-root-00); *** DEADLOCK *** 3 locks held by btrfs-cleaner/903: #0: ffff8e7fab628838 (&fs_info->cleaner_mutex){+.+.}-{3:3}, at: cleaner_kthread+0x6e/0x140 #1: ffff8e7faadac640 (sb_internal){.+.+}-{0:0}, at: start_transaction+0x40b/0x5c0 #2: ffff8e7fab628a88 (&fs_info->commit_root_sem){++++}-{3:3}, at: btrfs_find_all_roots+0x41/0x80 stack backtrace: CPU: 0 PID: 903 Comm: btrfs-cleaner Not tainted 5.9.0+ #102 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x8b/0xb0 check_noncircular+0xcf/0xf0 __lock_acquire+0x1167/0x2150 ? __bfs+0x42/0x210 lock_acquire+0xb9/0x3d0 ? __btrfs_tree_read_lock+0x32/0x170 down_read_nested+0x43/0x130 ? __btrfs_tree_read_lock+0x32/0x170 __btrfs_tree_read_lock+0x32/0x170 __btrfs_read_lock_root_node+0x3a/0x50 btrfs_search_slot+0x614/0x9d0 ? find_held_lock+0x2b/0x80 btrfs_find_root+0x35/0x1b0 ? do_raw_spin_unlock+0x4b/0xa0 btrfs_read_tree_root+0x61/0x120 btrfs_get_root_ref+0x14b/0x600 find_parent_nodes+0x3e6/0x1b30 btrfs_find_all_roots_safe+0xb4/0x130 btrfs_find_all_roots+0x60/0x80 btrfs_qgroup_trace_extent_post+0x27/0x40 btrfs_add_delayed_data_ref+0x3fd/0x460 btrfs_free_extent+0x42/0x100 __btrfs_mod_ref+0x1d7/0x2f0 walk_up_proc+0x11c/0x400 walk_up_tree+0xf0/0x180 btrfs_drop_snapshot+0x1c7/0x780 ? btrfs_clean_one_deleted_snapshot+0x73/0x110 btrfs_clean_one_deleted_snapshot+0xfb/0x110 cleaner_kthread+0xd4/0x140 ? btrfs_alloc_root+0x50/0x50 kthread+0x13a/0x150 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x1f/0x30 BTRFS info (device sdb): disk space caching is enabled BTRFS info (device sdb): has skinny extents This happens because qgroups does a backref lookup when we create a delayed ref. From here it may have to look up a root from an indirect ref, which does a normal lookup on the tree_root, which takes the read lock on the tree_root nodes. To fix this we need to add a variant for looking up roots that searches the commit root of the tree_root. Then when we do the backref search using the commit root we are sure to not take any locks on the tree_root nodes. This gets rid of the lockdep splat when running btrfs/104. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
151 lines
5.4 KiB
C
151 lines
5.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
*/
|
|
|
|
#ifndef BTRFS_DISK_IO_H
|
|
#define BTRFS_DISK_IO_H
|
|
|
|
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
|
#define BTRFS_SUPER_INFO_SIZE 4096
|
|
|
|
#define BTRFS_SUPER_MIRROR_MAX 3
|
|
#define BTRFS_SUPER_MIRROR_SHIFT 12
|
|
|
|
/*
|
|
* Fixed blocksize for all devices, applies to specific ways of reading
|
|
* metadata like superblock. Must meet the set_blocksize requirements.
|
|
*
|
|
* Do not change.
|
|
*/
|
|
#define BTRFS_BDEV_BLOCKSIZE (4096)
|
|
|
|
enum btrfs_wq_endio_type {
|
|
BTRFS_WQ_ENDIO_DATA,
|
|
BTRFS_WQ_ENDIO_METADATA,
|
|
BTRFS_WQ_ENDIO_FREE_SPACE,
|
|
BTRFS_WQ_ENDIO_RAID56,
|
|
};
|
|
|
|
static inline u64 btrfs_sb_offset(int mirror)
|
|
{
|
|
u64 start = SZ_16K;
|
|
if (mirror)
|
|
return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
|
|
return BTRFS_SUPER_INFO_OFFSET;
|
|
}
|
|
|
|
struct btrfs_device;
|
|
struct btrfs_fs_devices;
|
|
|
|
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
|
|
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
|
|
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
|
struct btrfs_key *first_key, u64 parent_transid);
|
|
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|
u64 parent_transid, int level,
|
|
struct btrfs_key *first_key);
|
|
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
|
|
struct extent_buffer *btrfs_find_create_tree_block(
|
|
struct btrfs_fs_info *fs_info,
|
|
u64 bytenr);
|
|
void btrfs_clean_tree_block(struct extent_buffer *buf);
|
|
int __cold open_ctree(struct super_block *sb,
|
|
struct btrfs_fs_devices *fs_devices,
|
|
char *options);
|
|
void __cold close_ctree(struct btrfs_fs_info *fs_info);
|
|
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
|
|
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
|
|
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
|
int copy_num);
|
|
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
|
|
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
|
|
struct btrfs_key *key);
|
|
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_root *root);
|
|
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
|
|
|
|
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
|
|
u64 objectid, bool check_ref);
|
|
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
|
|
u64 objectid, dev_t anon_dev);
|
|
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_path *path,
|
|
u64 objectid);
|
|
|
|
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
|
|
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
|
|
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
|
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
|
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_root *root);
|
|
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, u64 phy_offset,
|
|
struct page *page, u64 start, u64 end,
|
|
int mirror);
|
|
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
|
int mirror_num, unsigned long bio_flags);
|
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
|
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
|
#endif
|
|
|
|
/*
|
|
* This function is used to grab the root, and avoid it is freed when we
|
|
* access it. But it doesn't ensure that the tree is not dropped.
|
|
*
|
|
* If you want to ensure the whole tree is safe, you should use
|
|
* fs_info->subvol_srcu
|
|
*/
|
|
static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
|
{
|
|
if (!root)
|
|
return NULL;
|
|
if (refcount_inc_not_zero(&root->refs))
|
|
return root;
|
|
return NULL;
|
|
}
|
|
|
|
void btrfs_put_root(struct btrfs_root *root);
|
|
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
|
|
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
|
int atomic);
|
|
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
|
|
struct btrfs_key *first_key);
|
|
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
|
enum btrfs_wq_endio_type metadata);
|
|
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
|
int mirror_num, unsigned long bio_flags,
|
|
u64 bio_offset, void *private_data,
|
|
extent_submit_bio_start_t *submit_bio_start);
|
|
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
|
|
int mirror_num);
|
|
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info);
|
|
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root);
|
|
void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans,
|
|
struct btrfs_fs_info *fs_info);
|
|
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
|
|
struct btrfs_fs_info *fs_info);
|
|
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
|
u64 objectid);
|
|
int btree_lock_page_hook(struct page *page, void *data,
|
|
void (*flush_fn)(void *));
|
|
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
|
int __init btrfs_end_io_wq_init(void);
|
|
void __cold btrfs_end_io_wq_exit(void);
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
void btrfs_init_lockdep(void);
|
|
void btrfs_set_buffer_lockdep_class(u64 objectid,
|
|
struct extent_buffer *eb, int level);
|
|
#else
|
|
static inline void btrfs_init_lockdep(void)
|
|
{ }
|
|
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
|
|
struct extent_buffer *eb, int level)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#endif
|