btrfs: switch btrfs_root::delayed_nodes_tree to xarray from radix-tree
The radix-tree has been superseded by the xarray (https://lwn.net/Articles/745073), this patch converts the btrfs_root::delayed_nodes, the APIs are used in a simple way. First idea is to do xa_insert() but this would require GFP_ATOMIC allocation which we want to avoid if possible. The preload mechanism of radix-tree can be emulated within the xarray API. - xa_reserve() with GFP_NOFS outside of the lock, the reserved entry is inserted atomically at most once - xa_store() under a lock, in case something races in we can detect that and xa_load() returns a valid pointer All uses of xa_load() must check for a valid pointer in case they manage to get between the xa_reserve() and xa_store(), this is handled in btrfs_get_delayed_node(). Otherwise the functionality is equivalent, xarray implements the radix-tree and there should be no performance difference. The patch continues the efforts started in 253bf57555e451 ("btrfs: turn delayed_nodes_tree into an XArray") and fixes the problems with locking and GFP flags 088aea3b97e0ae ("Revert "btrfs: turn delayed_nodes_tree into an XArray""). Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
eefaf0a1a6
commit
6140ba8a0a
@ -227,10 +227,10 @@ struct btrfs_root {
|
||||
struct rb_root inode_tree;
|
||||
|
||||
/*
|
||||
* radix tree that keeps track of delayed nodes of every inode,
|
||||
* protected by inode_lock
|
||||
* Xarray that keeps track of delayed nodes of every inode, protected
|
||||
* by @inode_lock.
|
||||
*/
|
||||
struct radix_tree_root delayed_nodes_tree;
|
||||
struct xarray delayed_nodes;
|
||||
/*
|
||||
* right now this just gets used so that a root has its own devid
|
||||
* for stat. It may be used for more later
|
||||
|
@ -71,7 +71,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
}
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
|
||||
node = xa_load(&root->delayed_nodes, ino);
|
||||
|
||||
if (node) {
|
||||
if (btrfs_inode->delayed_node) {
|
||||
@ -83,9 +83,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
|
||||
/*
|
||||
* It's possible that we're racing into the middle of removing
|
||||
* this node from the radix tree. In this case, the refcount
|
||||
* this node from the xarray. In this case, the refcount
|
||||
* was zero and it should never go back to one. Just return
|
||||
* NULL like it was never in the radix at all; our release
|
||||
* NULL like it was never in the xarray at all; our release
|
||||
* function is in the process of removing it.
|
||||
*
|
||||
* Some implementations of refcount_inc refuse to bump the
|
||||
@ -93,7 +93,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
* here, refcount_inc() may decide to just WARN_ONCE() instead
|
||||
* of actually bumping the refcount.
|
||||
*
|
||||
* If this node is properly in the radix, we want to bump the
|
||||
* If this node is properly in the xarray, we want to bump the
|
||||
* refcount twice, once for the inode and once for this get
|
||||
* operation.
|
||||
*/
|
||||
@ -120,6 +120,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
|
||||
struct btrfs_root *root = btrfs_inode->root;
|
||||
u64 ino = btrfs_ino(btrfs_inode);
|
||||
int ret;
|
||||
void *ptr;
|
||||
|
||||
again:
|
||||
node = btrfs_get_delayed_node(btrfs_inode);
|
||||
@ -131,26 +132,30 @@ again:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
btrfs_init_delayed_node(node, root, ino);
|
||||
|
||||
/* cached in the btrfs inode and can be accessed */
|
||||
/* Cached in the inode and can be accessed. */
|
||||
refcount_set(&node->refs, 2);
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret) {
|
||||
/* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */
|
||||
ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS);
|
||||
if (ret == -ENOMEM) {
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
return ERR_PTR(ret);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
|
||||
if (ret == -EEXIST) {
|
||||
ptr = xa_load(&root->delayed_nodes, ino);
|
||||
if (ptr) {
|
||||
/* Somebody inserted it, go back and read it. */
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
radix_tree_preload_end();
|
||||
node = NULL;
|
||||
goto again;
|
||||
}
|
||||
ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
|
||||
ASSERT(xa_err(ptr) != -EINVAL);
|
||||
ASSERT(xa_err(ptr) != -ENOMEM);
|
||||
ASSERT(ptr == NULL);
|
||||
btrfs_inode->delayed_node = node;
|
||||
spin_unlock(&root->inode_lock);
|
||||
radix_tree_preload_end();
|
||||
|
||||
return node;
|
||||
}
|
||||
@ -269,8 +274,7 @@ static void __btrfs_release_delayed_node(
|
||||
* back up. We can delete it now.
|
||||
*/
|
||||
ASSERT(refcount_read(&delayed_node->refs) == 0);
|
||||
radix_tree_delete(&root->delayed_nodes_tree,
|
||||
delayed_node->inode_id);
|
||||
xa_erase(&root->delayed_nodes, delayed_node->inode_id);
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, delayed_node);
|
||||
}
|
||||
@ -2038,34 +2042,36 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
|
||||
|
||||
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
|
||||
{
|
||||
u64 inode_id = 0;
|
||||
unsigned long index = 0;
|
||||
struct btrfs_delayed_node *delayed_nodes[8];
|
||||
int i, n;
|
||||
|
||||
while (1) {
|
||||
struct btrfs_delayed_node *node;
|
||||
int count;
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
|
||||
(void **)delayed_nodes, inode_id,
|
||||
ARRAY_SIZE(delayed_nodes));
|
||||
if (!n) {
|
||||
if (xa_empty(&root->delayed_nodes)) {
|
||||
spin_unlock(&root->inode_lock);
|
||||
break;
|
||||
return;
|
||||
}
|
||||
|
||||
inode_id = delayed_nodes[n - 1]->inode_id + 1;
|
||||
for (i = 0; i < n; i++) {
|
||||
count = 0;
|
||||
xa_for_each_start(&root->delayed_nodes, index, node, index) {
|
||||
/*
|
||||
* Don't increase refs in case the node is dead and
|
||||
* about to be removed from the tree in the loop below
|
||||
*/
|
||||
if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
|
||||
delayed_nodes[i] = NULL;
|
||||
if (refcount_inc_not_zero(&node->refs)) {
|
||||
delayed_nodes[count] = node;
|
||||
count++;
|
||||
}
|
||||
if (count >= ARRAY_SIZE(delayed_nodes))
|
||||
break;
|
||||
}
|
||||
spin_unlock(&root->inode_lock);
|
||||
index++;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (!delayed_nodes[i])
|
||||
continue;
|
||||
for (int i = 0; i < count; i++) {
|
||||
__btrfs_kill_delayed_node(delayed_nodes[i]);
|
||||
btrfs_release_delayed_node(delayed_nodes[i]);
|
||||
}
|
||||
|
@ -655,7 +655,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
root->nr_delalloc_inodes = 0;
|
||||
root->nr_ordered_extents = 0;
|
||||
root->inode_tree = RB_ROOT;
|
||||
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
|
||||
/* GFP flags are compatible with XA_FLAGS_*. */
|
||||
xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
|
||||
|
||||
btrfs_init_root_block_rsv(root);
|
||||
|
||||
|
@ -3805,7 +3805,7 @@ cache_index:
|
||||
* cache.
|
||||
*
|
||||
* This is required for both inode re-read from disk and delayed inode
|
||||
* in delayed_nodes_tree.
|
||||
* in the delayed_nodes xarray.
|
||||
*/
|
||||
if (BTRFS_I(inode)->last_trans == btrfs_get_fs_generation(fs_info))
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
|
Loading…
x
Reference in New Issue
Block a user