lib: radix-tree: add entry deletion support to __radix_tree_replace()
Page cache shadow entry handling will be a lot simpler when it can use a single generic replacement function for pages, shadow entries, and emptying slots. Make __radix_tree_replace() properly account insertions and deletions in node->count and garbage collect nodes as they become empty. Then re-implement radix_tree_delete() on top of it. Link: http://lkml.kernel.org/r/20161117193058.GC23430@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
6d75f366b9
commit
f4b109c6da
227
lib/radix-tree.c
227
lib/radix-tree.c
@ -538,6 +538,107 @@ out:
|
||||
return maxshift + RADIX_TREE_MAP_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* radix_tree_shrink - shrink radix tree to minimum height
|
||||
* @root radix tree root
|
||||
*/
|
||||
static inline bool radix_tree_shrink(struct radix_tree_root *root)
|
||||
{
|
||||
bool shrunk = false;
|
||||
|
||||
for (;;) {
|
||||
struct radix_tree_node *node = root->rnode;
|
||||
struct radix_tree_node *child;
|
||||
|
||||
if (!radix_tree_is_internal_node(node))
|
||||
break;
|
||||
node = entry_to_node(node);
|
||||
|
||||
/*
|
||||
* The candidate node has more than one child, or its child
|
||||
* is not at the leftmost slot, or the child is a multiorder
|
||||
* entry, we cannot shrink.
|
||||
*/
|
||||
if (node->count != 1)
|
||||
break;
|
||||
child = node->slots[0];
|
||||
if (!child)
|
||||
break;
|
||||
if (!radix_tree_is_internal_node(child) && node->shift)
|
||||
break;
|
||||
|
||||
if (radix_tree_is_internal_node(child))
|
||||
entry_to_node(child)->parent = NULL;
|
||||
|
||||
/*
|
||||
* We don't need rcu_assign_pointer(), since we are simply
|
||||
* moving the node from one part of the tree to another: if it
|
||||
* was safe to dereference the old pointer to it
|
||||
* (node->slots[0]), it will be safe to dereference the new
|
||||
* one (root->rnode) as far as dependent read barriers go.
|
||||
*/
|
||||
root->rnode = child;
|
||||
|
||||
/*
|
||||
* We have a dilemma here. The node's slot[0] must not be
|
||||
* NULLed in case there are concurrent lookups expecting to
|
||||
* find the item. However if this was a bottom-level node,
|
||||
* then it may be subject to the slot pointer being visible
|
||||
* to callers dereferencing it. If item corresponding to
|
||||
* slot[0] is subsequently deleted, these callers would expect
|
||||
* their slot to become empty sooner or later.
|
||||
*
|
||||
* For example, lockless pagecache will look up a slot, deref
|
||||
* the page pointer, and if the page has 0 refcount it means it
|
||||
* was concurrently deleted from pagecache so try the deref
|
||||
* again. Fortunately there is already a requirement for logic
|
||||
* to retry the entire slot lookup -- the indirect pointer
|
||||
* problem (replacing direct root node with an indirect pointer
|
||||
* also results in a stale slot). So tag the slot as indirect
|
||||
* to force callers to retry.
|
||||
*/
|
||||
if (!radix_tree_is_internal_node(child))
|
||||
node->slots[0] = RADIX_TREE_RETRY;
|
||||
|
||||
radix_tree_node_free(node);
|
||||
shrunk = true;
|
||||
}
|
||||
|
||||
return shrunk;
|
||||
}
|
||||
|
||||
static bool delete_node(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node)
|
||||
{
|
||||
bool deleted = false;
|
||||
|
||||
do {
|
||||
struct radix_tree_node *parent;
|
||||
|
||||
if (node->count) {
|
||||
if (node == entry_to_node(root->rnode))
|
||||
deleted |= radix_tree_shrink(root);
|
||||
return deleted;
|
||||
}
|
||||
|
||||
parent = node->parent;
|
||||
if (parent) {
|
||||
parent->slots[node->offset] = NULL;
|
||||
parent->count--;
|
||||
} else {
|
||||
root_tag_clear_all(root);
|
||||
root->rnode = NULL;
|
||||
}
|
||||
|
||||
radix_tree_node_free(node);
|
||||
deleted = true;
|
||||
|
||||
node = parent;
|
||||
} while (node);
|
||||
|
||||
return deleted;
|
||||
}
|
||||
|
||||
/**
|
||||
* __radix_tree_create - create a slot in a radix tree
|
||||
* @root: radix tree root
|
||||
@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root,
|
||||
bool warn_typeswitch)
|
||||
{
|
||||
void *old = rcu_dereference_raw(*slot);
|
||||
int exceptional;
|
||||
int count, exceptional;
|
||||
|
||||
WARN_ON_ONCE(radix_tree_is_internal_node(item));
|
||||
WARN_ON_ONCE(!!item - !!old);
|
||||
|
||||
count = !!item - !!old;
|
||||
exceptional = !!radix_tree_exceptional_entry(item) -
|
||||
!!radix_tree_exceptional_entry(old);
|
||||
|
||||
WARN_ON_ONCE(warn_typeswitch && exceptional);
|
||||
WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
|
||||
|
||||
if (node)
|
||||
if (node) {
|
||||
node->count += count;
|
||||
node->exceptional += exceptional;
|
||||
}
|
||||
|
||||
rcu_assign_pointer(*slot, item);
|
||||
}
|
||||
@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root,
|
||||
void **slot, void *item)
|
||||
{
|
||||
/*
|
||||
* This function supports replacing exceptional entries, but
|
||||
* that needs accounting against the node unless the slot is
|
||||
* root->rnode.
|
||||
* This function supports replacing exceptional entries and
|
||||
* deleting entries, but that needs accounting against the
|
||||
* node unless the slot is root->rnode.
|
||||
*/
|
||||
replace_slot(root, node, slot, item,
|
||||
!node && slot != (void **)&root->rnode);
|
||||
|
||||
delete_node(root, node);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
|
||||
*
|
||||
* NOTE: This cannot be used to switch between non-entries (empty slots),
|
||||
* regular entries, and exceptional entries, as that requires accounting
|
||||
* inside the radix tree node. When switching from one type of entry to
|
||||
* another, use __radix_tree_lookup() and __radix_tree_replace().
|
||||
* inside the radix tree node. When switching from one type of entry or
|
||||
* deleting, use __radix_tree_lookup() and __radix_tree_replace().
|
||||
*/
|
||||
void radix_tree_replace_slot(struct radix_tree_root *root,
|
||||
void **slot, void *item)
|
||||
@ -1466,75 +1571,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
|
||||
}
|
||||
#endif /* CONFIG_SHMEM && CONFIG_SWAP */
|
||||
|
||||
/**
|
||||
* radix_tree_shrink - shrink radix tree to minimum height
|
||||
* @root radix tree root
|
||||
*/
|
||||
static inline bool radix_tree_shrink(struct radix_tree_root *root)
|
||||
{
|
||||
bool shrunk = false;
|
||||
|
||||
for (;;) {
|
||||
struct radix_tree_node *node = root->rnode;
|
||||
struct radix_tree_node *child;
|
||||
|
||||
if (!radix_tree_is_internal_node(node))
|
||||
break;
|
||||
node = entry_to_node(node);
|
||||
|
||||
/*
|
||||
* The candidate node has more than one child, or its child
|
||||
* is not at the leftmost slot, or the child is a multiorder
|
||||
* entry, we cannot shrink.
|
||||
*/
|
||||
if (node->count != 1)
|
||||
break;
|
||||
child = node->slots[0];
|
||||
if (!child)
|
||||
break;
|
||||
if (!radix_tree_is_internal_node(child) && node->shift)
|
||||
break;
|
||||
|
||||
if (radix_tree_is_internal_node(child))
|
||||
entry_to_node(child)->parent = NULL;
|
||||
|
||||
/*
|
||||
* We don't need rcu_assign_pointer(), since we are simply
|
||||
* moving the node from one part of the tree to another: if it
|
||||
* was safe to dereference the old pointer to it
|
||||
* (node->slots[0]), it will be safe to dereference the new
|
||||
* one (root->rnode) as far as dependent read barriers go.
|
||||
*/
|
||||
root->rnode = child;
|
||||
|
||||
/*
|
||||
* We have a dilemma here. The node's slot[0] must not be
|
||||
* NULLed in case there are concurrent lookups expecting to
|
||||
* find the item. However if this was a bottom-level node,
|
||||
* then it may be subject to the slot pointer being visible
|
||||
* to callers dereferencing it. If item corresponding to
|
||||
* slot[0] is subsequently deleted, these callers would expect
|
||||
* their slot to become empty sooner or later.
|
||||
*
|
||||
* For example, lockless pagecache will look up a slot, deref
|
||||
* the page pointer, and if the page has 0 refcount it means it
|
||||
* was concurrently deleted from pagecache so try the deref
|
||||
* again. Fortunately there is already a requirement for logic
|
||||
* to retry the entire slot lookup -- the indirect pointer
|
||||
* problem (replacing direct root node with an indirect pointer
|
||||
* also results in a stale slot). So tag the slot as indirect
|
||||
* to force callers to retry.
|
||||
*/
|
||||
if (!radix_tree_is_internal_node(child))
|
||||
node->slots[0] = RADIX_TREE_RETRY;
|
||||
|
||||
radix_tree_node_free(node);
|
||||
shrunk = true;
|
||||
}
|
||||
|
||||
return shrunk;
|
||||
}
|
||||
|
||||
/**
|
||||
* __radix_tree_delete_node - try to free node after clearing a slot
|
||||
* @root: radix tree root
|
||||
@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
|
||||
bool __radix_tree_delete_node(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node)
|
||||
{
|
||||
bool deleted = false;
|
||||
|
||||
do {
|
||||
struct radix_tree_node *parent;
|
||||
|
||||
if (node->count) {
|
||||
if (node == entry_to_node(root->rnode))
|
||||
deleted |= radix_tree_shrink(root);
|
||||
return deleted;
|
||||
}
|
||||
|
||||
parent = node->parent;
|
||||
if (parent) {
|
||||
parent->slots[node->offset] = NULL;
|
||||
parent->count--;
|
||||
} else {
|
||||
root_tag_clear_all(root);
|
||||
root->rnode = NULL;
|
||||
}
|
||||
|
||||
radix_tree_node_free(node);
|
||||
deleted = true;
|
||||
|
||||
node = parent;
|
||||
} while (node);
|
||||
|
||||
return deleted;
|
||||
return delete_node(root, node);
|
||||
}
|
||||
|
||||
static inline void delete_sibling_entries(struct radix_tree_node *node,
|
||||
@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
|
||||
node_tag_clear(root, node, tag, offset);
|
||||
|
||||
delete_sibling_entries(node, node_to_entry(slot), offset);
|
||||
node->slots[offset] = NULL;
|
||||
node->count--;
|
||||
if (radix_tree_exceptional_entry(entry))
|
||||
node->exceptional--;
|
||||
|
||||
__radix_tree_delete_node(root, node);
|
||||
__radix_tree_replace(root, node, slot, NULL);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user