tmpfs: convert shmem_unuse_inode to radix-swap
Convert shmem_unuse_inode() to use a lockless gang lookup of the radix tree, searching for matching swap. This is somewhat slower than the old method: because of repeated radix tree descents, because of copying entries up, but probably most because the old method noted and skipped once a vector page was cleared of swap. Perhaps we can devise a use of radix tree tagging to achieve that later. shmem_add_to_page_cache() uses shmem_radix_tree_replace() to compensate for the lockless lookup by checking that the expected entry is in place, under lock. It is not very satisfactory to be copying this much from add_to_page_cache_locked(), but I think easier to sell than insisting that every caller of add_to_page_cache*() go through the extras. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
7a5d0fbb29
commit
46f65ec15c
133
mm/shmem.c
133
mm/shmem.c
@ -261,6 +261,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like add_to_page_cache_locked, but error if expected item has gone.
|
||||||
|
*/
|
||||||
|
static int shmem_add_to_page_cache(struct page *page,
|
||||||
|
struct address_space *mapping,
|
||||||
|
pgoff_t index, gfp_t gfp, void *expected)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
|
||||||
|
VM_BUG_ON(!PageLocked(page));
|
||||||
|
VM_BUG_ON(!PageSwapBacked(page));
|
||||||
|
|
||||||
|
error = mem_cgroup_cache_charge(page, current->mm,
|
||||||
|
gfp & GFP_RECLAIM_MASK);
|
||||||
|
if (error)
|
||||||
|
goto out;
|
||||||
|
if (!expected)
|
||||||
|
error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
|
||||||
|
if (!error) {
|
||||||
|
page_cache_get(page);
|
||||||
|
page->mapping = mapping;
|
||||||
|
page->index = index;
|
||||||
|
|
||||||
|
spin_lock_irq(&mapping->tree_lock);
|
||||||
|
if (!expected)
|
||||||
|
error = radix_tree_insert(&mapping->page_tree,
|
||||||
|
index, page);
|
||||||
|
else
|
||||||
|
error = shmem_radix_tree_replace(mapping, index,
|
||||||
|
expected, page);
|
||||||
|
if (!error) {
|
||||||
|
mapping->nrpages++;
|
||||||
|
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||||
|
__inc_zone_page_state(page, NR_SHMEM);
|
||||||
|
spin_unlock_irq(&mapping->tree_lock);
|
||||||
|
} else {
|
||||||
|
page->mapping = NULL;
|
||||||
|
spin_unlock_irq(&mapping->tree_lock);
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
if (!expected)
|
||||||
|
radix_tree_preload_end();
|
||||||
|
}
|
||||||
|
if (error)
|
||||||
|
mem_cgroup_uncharge_cache_page(page);
|
||||||
|
out:
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Like find_get_pages, but collecting swap entries as well as pages.
|
* Like find_get_pages, but collecting swap entries as well as pages.
|
||||||
*/
|
*/
|
||||||
@ -308,6 +357,42 @@ export:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lockless lookup of swap entry in radix tree, avoiding refcount on pages.
|
||||||
|
*/
|
||||||
|
static pgoff_t shmem_find_swap(struct address_space *mapping, void *radswap)
|
||||||
|
{
|
||||||
|
void **slots[PAGEVEC_SIZE];
|
||||||
|
pgoff_t indices[PAGEVEC_SIZE];
|
||||||
|
unsigned int nr_found;
|
||||||
|
|
||||||
|
restart:
|
||||||
|
nr_found = 1;
|
||||||
|
indices[0] = -1;
|
||||||
|
while (nr_found) {
|
||||||
|
pgoff_t index = indices[nr_found - 1] + 1;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||||
|
slots, indices, index, PAGEVEC_SIZE);
|
||||||
|
for (i = 0; i < nr_found; i++) {
|
||||||
|
void *item = radix_tree_deref_slot(slots[i]);
|
||||||
|
if (radix_tree_deref_retry(item)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
if (item == radswap) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return indices[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove swap entry from radix tree, free the swap and its page cache.
|
* Remove swap entry from radix tree, free the swap and its page cache.
|
||||||
*/
|
*/
|
||||||
@ -515,23 +600,21 @@ static void shmem_evict_inode(struct inode *inode)
|
|||||||
end_writeback(inode);
|
end_writeback(inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If swap found in inode, free it and move page from swapcache to filecache.
|
||||||
|
*/
|
||||||
static int shmem_unuse_inode(struct shmem_inode_info *info,
|
static int shmem_unuse_inode(struct shmem_inode_info *info,
|
||||||
swp_entry_t swap, struct page *page)
|
swp_entry_t swap, struct page *page)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = info->vfs_inode.i_mapping;
|
struct address_space *mapping = info->vfs_inode.i_mapping;
|
||||||
|
void *radswap;
|
||||||
pgoff_t index;
|
pgoff_t index;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
for (index = 0; index < SHMEM_NR_DIRECT; index++)
|
radswap = swp_to_radix_entry(swap);
|
||||||
if (shmem_get_swap(info, index).val == swap.val)
|
index = shmem_find_swap(mapping, radswap);
|
||||||
goto found;
|
if (index == -1)
|
||||||
return 0;
|
|
||||||
found:
|
|
||||||
spin_lock(&info->lock);
|
|
||||||
if (shmem_get_swap(info, index).val != swap.val) {
|
|
||||||
spin_unlock(&info->lock);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Move _head_ to start search for next from here.
|
* Move _head_ to start search for next from here.
|
||||||
@ -547,23 +630,30 @@ found:
|
|||||||
* but also to hold up shmem_evict_inode(): so inode cannot be freed
|
* but also to hold up shmem_evict_inode(): so inode cannot be freed
|
||||||
* beneath us (pagelock doesn't help until the page is in pagecache).
|
* beneath us (pagelock doesn't help until the page is in pagecache).
|
||||||
*/
|
*/
|
||||||
error = add_to_page_cache_locked(page, mapping, index, GFP_NOWAIT);
|
error = shmem_add_to_page_cache(page, mapping, index,
|
||||||
|
GFP_NOWAIT, radswap);
|
||||||
/* which does mem_cgroup_uncharge_cache_page on error */
|
/* which does mem_cgroup_uncharge_cache_page on error */
|
||||||
|
|
||||||
if (error != -ENOMEM) {
|
if (error != -ENOMEM) {
|
||||||
|
/*
|
||||||
|
* Truncation and eviction use free_swap_and_cache(), which
|
||||||
|
* only does trylock page: if we raced, best clean up here.
|
||||||
|
*/
|
||||||
delete_from_swap_cache(page);
|
delete_from_swap_cache(page);
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
shmem_put_swap(info, index, (swp_entry_t){0});
|
if (!error) {
|
||||||
info->swapped--;
|
spin_lock(&info->lock);
|
||||||
swap_free(swap);
|
info->swapped--;
|
||||||
|
spin_unlock(&info->lock);
|
||||||
|
swap_free(swap);
|
||||||
|
}
|
||||||
error = 1; /* not an error, but entry was found */
|
error = 1; /* not an error, but entry was found */
|
||||||
}
|
}
|
||||||
spin_unlock(&info->lock);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* shmem_unuse() search for an eventually swapped out shmem page.
|
* Search through swapped inodes to find and replace swap by page.
|
||||||
*/
|
*/
|
||||||
int shmem_unuse(swp_entry_t swap, struct page *page)
|
int shmem_unuse(swp_entry_t swap, struct page *page)
|
||||||
{
|
{
|
||||||
@ -576,20 +666,12 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
|
|||||||
* Charge page using GFP_KERNEL while we can wait, before taking
|
* Charge page using GFP_KERNEL while we can wait, before taking
|
||||||
* the shmem_swaplist_mutex which might hold up shmem_writepage().
|
* the shmem_swaplist_mutex which might hold up shmem_writepage().
|
||||||
* Charged back to the user (not to caller) when swap account is used.
|
* Charged back to the user (not to caller) when swap account is used.
|
||||||
* add_to_page_cache() will be called with GFP_NOWAIT.
|
* shmem_add_to_page_cache() will be called with GFP_NOWAIT.
|
||||||
*/
|
*/
|
||||||
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
|
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
|
||||||
if (error)
|
if (error)
|
||||||
goto out;
|
goto out;
|
||||||
/*
|
/* No radix_tree_preload: swap entry keeps a place for page in tree */
|
||||||
* Try to preload while we can wait, to not make a habit of
|
|
||||||
* draining atomic reserves; but don't latch on to this cpu,
|
|
||||||
* it's okay if sometimes we get rescheduled after this.
|
|
||||||
*/
|
|
||||||
error = radix_tree_preload(GFP_KERNEL);
|
|
||||||
if (error)
|
|
||||||
goto uncharge;
|
|
||||||
radix_tree_preload_end();
|
|
||||||
|
|
||||||
mutex_lock(&shmem_swaplist_mutex);
|
mutex_lock(&shmem_swaplist_mutex);
|
||||||
list_for_each_safe(this, next, &shmem_swaplist) {
|
list_for_each_safe(this, next, &shmem_swaplist) {
|
||||||
@ -608,7 +690,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
|
|||||||
}
|
}
|
||||||
mutex_unlock(&shmem_swaplist_mutex);
|
mutex_unlock(&shmem_swaplist_mutex);
|
||||||
|
|
||||||
uncharge:
|
|
||||||
if (!found)
|
if (!found)
|
||||||
mem_cgroup_uncharge_cache_page(page);
|
mem_cgroup_uncharge_cache_page(page);
|
||||||
if (found < 0)
|
if (found < 0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user