2013-07-11 03:05:03 +04:00
/*
* zswap . c - zswap driver file
*
* zswap is a backend for frontswap that takes pages that are in the process
* of being swapped out and attempts to compress and store them in a
* RAM - based memory pool . This can result in a significant I / O reduction on
* the swap device and , in the case where decompressing from RAM is faster
* than reading from the swap device , can also improve workload performance .
*
* Copyright ( C ) 2012 Seth Jennings < sjenning @ linux . vnet . ibm . com >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version 2
* of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/module.h>
# include <linux/cpu.h>
# include <linux/highmem.h>
# include <linux/slab.h>
# include <linux/spinlock.h>
# include <linux/types.h>
# include <linux/atomic.h>
# include <linux/frontswap.h>
# include <linux/rbtree.h>
# include <linux/swap.h>
# include <linux/crypto.h>
# include <linux/mempool.h>
# include <linux/zbud.h>
# include <linux/mm_types.h>
# include <linux/page-flags.h>
# include <linux/swapops.h>
# include <linux/writeback.h>
# include <linux/pagemap.h>
/*********************************
* statistics
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* Number of memory pages used by the compressed pool */
static u64 zswap_pool_pages ;
/* The number of compressed pages currently stored in zswap */
static atomic_t zswap_stored_pages = ATOMIC_INIT ( 0 ) ;
/*
* The statistics below are not protected from concurrent access for
* performance reasons so they may not be a 100 % accurate . However ,
* they do provide useful information on roughly how many times a
* certain event is occurring .
*/
/* Pool limit was hit (see zswap_max_pool_percent) */
static u64 zswap_pool_limit_hit ;
/* Pages written back when pool limit was reached */
static u64 zswap_written_back_pages ;
/* Store failed due to a reclaim failure after pool limit was reached */
static u64 zswap_reject_reclaim_fail ;
/* Compressed page was too big for the allocator to (optimally) store */
static u64 zswap_reject_compress_poor ;
/* Store failed because underlying allocator could not get memory */
static u64 zswap_reject_alloc_fail ;
/* Store failed because the entry metadata could not be allocated (rare) */
static u64 zswap_reject_kmemcache_fail ;
/* Duplicate store was encountered (rare) */
static u64 zswap_duplicate_entry ;
/*********************************
* tunables
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* Enable/disable zswap (disabled by default, fixed at boot for now) */
static bool zswap_enabled __read_mostly ;
module_param_named ( enabled , zswap_enabled , bool , 0 ) ;
/* Compressor to be used by zswap (fixed at boot for now) */
# define ZSWAP_COMPRESSOR_DEFAULT "lzo"
static char * zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT ;
module_param_named ( compressor , zswap_compressor , charp , 0 ) ;
/* The maximum percentage of memory that the compressed pool can occupy */
static unsigned int zswap_max_pool_percent = 20 ;
module_param_named ( max_pool_percent ,
zswap_max_pool_percent , uint , 0644 ) ;
/*********************************
* compression functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* per-cpu compression transforms */
static struct crypto_comp * __percpu * zswap_comp_pcpu_tfms ;
enum comp_op {
ZSWAP_COMPOP_COMPRESS ,
ZSWAP_COMPOP_DECOMPRESS
} ;
static int zswap_comp_op ( enum comp_op op , const u8 * src , unsigned int slen ,
u8 * dst , unsigned int * dlen )
{
struct crypto_comp * tfm ;
int ret ;
tfm = * per_cpu_ptr ( zswap_comp_pcpu_tfms , get_cpu ( ) ) ;
switch ( op ) {
case ZSWAP_COMPOP_COMPRESS :
ret = crypto_comp_compress ( tfm , src , slen , dst , dlen ) ;
break ;
case ZSWAP_COMPOP_DECOMPRESS :
ret = crypto_comp_decompress ( tfm , src , slen , dst , dlen ) ;
break ;
default :
ret = - EINVAL ;
}
put_cpu ( ) ;
return ret ;
}
static int __init zswap_comp_init ( void )
{
if ( ! crypto_has_comp ( zswap_compressor , 0 , 0 ) ) {
pr_info ( " %s compressor not available \n " , zswap_compressor ) ;
/* fall back to default compressor */
zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT ;
if ( ! crypto_has_comp ( zswap_compressor , 0 , 0 ) )
/* can't even load the default compressor */
return - ENODEV ;
}
pr_info ( " using %s compressor \n " , zswap_compressor ) ;
/* alloc percpu transforms */
zswap_comp_pcpu_tfms = alloc_percpu ( struct crypto_comp * ) ;
if ( ! zswap_comp_pcpu_tfms )
return - ENOMEM ;
return 0 ;
}
static void zswap_comp_exit ( void )
{
/* free percpu transforms */
if ( zswap_comp_pcpu_tfms )
free_percpu ( zswap_comp_pcpu_tfms ) ;
}
/*********************************
* data structures
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* struct zswap_entry
*
* This structure contains the metadata for tracking a single compressed
* page within zswap .
*
* rbnode - links the entry into red - black tree for the appropriate swap type
* refcount - the number of outstanding reference to the entry . This is needed
* to protect against premature freeing of the entry by code
* concurent calls to load , invalidate , and writeback . The lock
* for the zswap_tree structure that contains the entry must
* be held while changing the refcount . Since the lock must
* be held , there is no reason to also make refcount atomic .
* offset - the swap offset for the entry . Index into the red - black tree .
* handle - zsmalloc allocation handle that stores the compressed page data
* length - the length in bytes of the compressed page data . Needed during
* decompression
*/
struct zswap_entry {
struct rb_node rbnode ;
pgoff_t offset ;
int refcount ;
unsigned int length ;
unsigned long handle ;
} ;
struct zswap_header {
swp_entry_t swpentry ;
} ;
/*
* The tree lock in the zswap_tree struct protects a few things :
* - the rbtree
* - the refcount field of each entry in the tree
*/
struct zswap_tree {
struct rb_root rbroot ;
spinlock_t lock ;
struct zbud_pool * pool ;
} ;
static struct zswap_tree * zswap_trees [ MAX_SWAPFILES ] ;
/*********************************
* zswap entry functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static struct kmem_cache * zswap_entry_cache ;
static int zswap_entry_cache_create ( void )
{
zswap_entry_cache = KMEM_CACHE ( zswap_entry , 0 ) ;
return ( zswap_entry_cache = = NULL ) ;
}
static void zswap_entry_cache_destory ( void )
{
kmem_cache_destroy ( zswap_entry_cache ) ;
}
static struct zswap_entry * zswap_entry_cache_alloc ( gfp_t gfp )
{
struct zswap_entry * entry ;
entry = kmem_cache_alloc ( zswap_entry_cache , gfp ) ;
if ( ! entry )
return NULL ;
entry - > refcount = 1 ;
2013-11-13 03:08:27 +04:00
RB_CLEAR_NODE ( & entry - > rbnode ) ;
2013-07-11 03:05:03 +04:00
return entry ;
}
static void zswap_entry_cache_free ( struct zswap_entry * entry )
{
kmem_cache_free ( zswap_entry_cache , entry ) ;
}
/*********************************
* rbtree functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static struct zswap_entry * zswap_rb_search ( struct rb_root * root , pgoff_t offset )
{
struct rb_node * node = root - > rb_node ;
struct zswap_entry * entry ;
while ( node ) {
entry = rb_entry ( node , struct zswap_entry , rbnode ) ;
if ( entry - > offset > offset )
node = node - > rb_left ;
else if ( entry - > offset < offset )
node = node - > rb_right ;
else
return entry ;
}
return NULL ;
}
/*
* In the case that a entry with the same offset is found , a pointer to
* the existing entry is stored in dupentry and the function returns - EEXIST
*/
static int zswap_rb_insert ( struct rb_root * root , struct zswap_entry * entry ,
struct zswap_entry * * dupentry )
{
struct rb_node * * link = & root - > rb_node , * parent = NULL ;
struct zswap_entry * myentry ;
while ( * link ) {
parent = * link ;
myentry = rb_entry ( parent , struct zswap_entry , rbnode ) ;
if ( myentry - > offset > entry - > offset )
link = & ( * link ) - > rb_left ;
else if ( myentry - > offset < entry - > offset )
link = & ( * link ) - > rb_right ;
else {
* dupentry = myentry ;
return - EEXIST ;
}
}
rb_link_node ( & entry - > rbnode , parent , link ) ;
rb_insert_color ( & entry - > rbnode , root ) ;
return 0 ;
}
2013-11-13 03:08:27 +04:00
static void zswap_rb_erase ( struct rb_root * root , struct zswap_entry * entry )
{
if ( ! RB_EMPTY_NODE ( & entry - > rbnode ) ) {
rb_erase ( & entry - > rbnode , root ) ;
RB_CLEAR_NODE ( & entry - > rbnode ) ;
}
}
/*
* Carries out the common pattern of freeing and entry ' s zsmalloc allocation ,
* freeing the entry itself , and decrementing the number of stored pages .
*/
static void zswap_free_entry ( struct zswap_tree * tree ,
struct zswap_entry * entry )
{
zbud_free ( tree - > pool , entry - > handle ) ;
zswap_entry_cache_free ( entry ) ;
atomic_dec ( & zswap_stored_pages ) ;
zswap_pool_pages = zbud_get_pool_size ( tree - > pool ) ;
}
/* caller must hold the tree lock */
static void zswap_entry_get ( struct zswap_entry * entry )
{
entry - > refcount + + ;
}
/* caller must hold the tree lock
* remove from the tree and free it , if nobody reference the entry
*/
static void zswap_entry_put ( struct zswap_tree * tree ,
struct zswap_entry * entry )
{
int refcount = - - entry - > refcount ;
BUG_ON ( refcount < 0 ) ;
if ( refcount = = 0 ) {
zswap_rb_erase ( & tree - > rbroot , entry ) ;
zswap_free_entry ( tree , entry ) ;
}
}
/* caller must hold the tree lock */
static struct zswap_entry * zswap_entry_find_get ( struct rb_root * root ,
pgoff_t offset )
{
struct zswap_entry * entry = NULL ;
entry = zswap_rb_search ( root , offset ) ;
if ( entry )
zswap_entry_get ( entry ) ;
return entry ;
}
2013-07-11 03:05:03 +04:00
/*********************************
* per - cpu code
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static DEFINE_PER_CPU ( u8 * , zswap_dstmem ) ;
static int __zswap_cpu_notifier ( unsigned long action , unsigned long cpu )
{
struct crypto_comp * tfm ;
u8 * dst ;
switch ( action ) {
case CPU_UP_PREPARE :
tfm = crypto_alloc_comp ( zswap_compressor , 0 , 0 ) ;
if ( IS_ERR ( tfm ) ) {
pr_err ( " can't allocate compressor transform \n " ) ;
return NOTIFY_BAD ;
}
* per_cpu_ptr ( zswap_comp_pcpu_tfms , cpu ) = tfm ;
dst = kmalloc ( PAGE_SIZE * 2 , GFP_KERNEL ) ;
if ( ! dst ) {
pr_err ( " can't allocate compressor buffer \n " ) ;
crypto_free_comp ( tfm ) ;
* per_cpu_ptr ( zswap_comp_pcpu_tfms , cpu ) = NULL ;
return NOTIFY_BAD ;
}
per_cpu ( zswap_dstmem , cpu ) = dst ;
break ;
case CPU_DEAD :
case CPU_UP_CANCELED :
tfm = * per_cpu_ptr ( zswap_comp_pcpu_tfms , cpu ) ;
if ( tfm ) {
crypto_free_comp ( tfm ) ;
* per_cpu_ptr ( zswap_comp_pcpu_tfms , cpu ) = NULL ;
}
dst = per_cpu ( zswap_dstmem , cpu ) ;
kfree ( dst ) ;
per_cpu ( zswap_dstmem , cpu ) = NULL ;
break ;
default :
break ;
}
return NOTIFY_OK ;
}
static int zswap_cpu_notifier ( struct notifier_block * nb ,
unsigned long action , void * pcpu )
{
unsigned long cpu = ( unsigned long ) pcpu ;
return __zswap_cpu_notifier ( action , cpu ) ;
}
static struct notifier_block zswap_cpu_notifier_block = {
. notifier_call = zswap_cpu_notifier
} ;
static int zswap_cpu_init ( void )
{
unsigned long cpu ;
get_online_cpus ( ) ;
for_each_online_cpu ( cpu )
if ( __zswap_cpu_notifier ( CPU_UP_PREPARE , cpu ) ! = NOTIFY_OK )
goto cleanup ;
register_cpu_notifier ( & zswap_cpu_notifier_block ) ;
put_online_cpus ( ) ;
return 0 ;
cleanup :
for_each_online_cpu ( cpu )
__zswap_cpu_notifier ( CPU_UP_CANCELED , cpu ) ;
put_online_cpus ( ) ;
return - ENOMEM ;
}
/*********************************
* helpers
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static bool zswap_is_full ( void )
{
return ( totalram_pages * zswap_max_pool_percent / 100 <
zswap_pool_pages ) ;
}
/*********************************
* writeback code
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* return enum for zswap_get_swap_cache_page */
enum zswap_get_swap_ret {
ZSWAP_SWAPCACHE_NEW ,
ZSWAP_SWAPCACHE_EXIST ,
2013-11-13 03:08:26 +04:00
ZSWAP_SWAPCACHE_FAIL ,
2013-07-11 03:05:03 +04:00
} ;
/*
* zswap_get_swap_cache_page
*
* This is an adaption of read_swap_cache_async ( )
*
* This function tries to find a page with the given swap entry
* in the swapper_space address space ( the swap cache ) . If the page
* is found , it is returned in retpage . Otherwise , a page is allocated ,
* added to the swap cache , and returned in retpage .
*
* If success , the swap cache page is returned in retpage
2013-11-13 03:08:26 +04:00
* Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
* Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated ,
* the new page is added to swapcache and locked
* Returns ZSWAP_SWAPCACHE_FAIL on error
2013-07-11 03:05:03 +04:00
*/
static int zswap_get_swap_cache_page ( swp_entry_t entry ,
struct page * * retpage )
{
struct page * found_page , * new_page = NULL ;
2013-09-12 01:20:22 +04:00
struct address_space * swapper_space = swap_address_space ( entry ) ;
2013-07-11 03:05:03 +04:00
int err ;
* retpage = NULL ;
do {
/*
* First check the swap cache . Since this is normally
* called after lookup_swap_cache ( ) failed , re - calling
* that would confuse statistics .
*/
found_page = find_get_page ( swapper_space , entry . val ) ;
if ( found_page )
break ;
/*
* Get a new page to read into from swap .
*/
if ( ! new_page ) {
new_page = alloc_page ( GFP_KERNEL ) ;
if ( ! new_page )
break ; /* Out of memory */
}
/*
* call radix_tree_preload ( ) while we can wait .
*/
err = radix_tree_preload ( GFP_KERNEL ) ;
if ( err )
break ;
/*
* Swap entry may have been freed since our caller observed it .
*/
err = swapcache_prepare ( entry ) ;
if ( err = = - EEXIST ) { /* seems racy */
radix_tree_preload_end ( ) ;
continue ;
}
if ( err ) { /* swp entry is obsolete ? */
radix_tree_preload_end ( ) ;
break ;
}
/* May fail (-ENOMEM) if radix-tree node allocation failed. */
__set_page_locked ( new_page ) ;
SetPageSwapBacked ( new_page ) ;
err = __add_to_swap_cache ( new_page , entry ) ;
if ( likely ( ! err ) ) {
radix_tree_preload_end ( ) ;
lru_cache_add_anon ( new_page ) ;
* retpage = new_page ;
return ZSWAP_SWAPCACHE_NEW ;
}
radix_tree_preload_end ( ) ;
ClearPageSwapBacked ( new_page ) ;
__clear_page_locked ( new_page ) ;
/*
* add_to_swap_cache ( ) doesn ' t return - EEXIST , so we can safely
* clear SWAP_HAS_CACHE flag .
*/
swapcache_free ( entry , NULL ) ;
} while ( err ! = - ENOMEM ) ;
if ( new_page )
page_cache_release ( new_page ) ;
if ( ! found_page )
2013-11-13 03:08:26 +04:00
return ZSWAP_SWAPCACHE_FAIL ;
2013-07-11 03:05:03 +04:00
* retpage = found_page ;
return ZSWAP_SWAPCACHE_EXIST ;
}
/*
* Attempts to free an entry by adding a page to the swap cache ,
* decompressing the entry data into the page , and issuing a
* bio write to write the page back to the swap device .
*
* This can be thought of as a " resumed writeback " of the page
* to the swap device . We are basically resuming the same swap
* writeback path that was intercepted with the frontswap_store ( )
* in the first place . After the page has been decompressed into
* the swap cache , the compressed version stored by zswap can be
* freed .
*/
static int zswap_writeback_entry ( struct zbud_pool * pool , unsigned long handle )
{
struct zswap_header * zhdr ;
swp_entry_t swpentry ;
struct zswap_tree * tree ;
pgoff_t offset ;
struct zswap_entry * entry ;
struct page * page ;
u8 * src , * dst ;
unsigned int dlen ;
2013-11-13 03:08:27 +04:00
int ret ;
2013-07-11 03:05:03 +04:00
struct writeback_control wbc = {
. sync_mode = WB_SYNC_NONE ,
} ;
/* extract swpentry from data */
zhdr = zbud_map ( pool , handle ) ;
swpentry = zhdr - > swpentry ; /* here */
zbud_unmap ( pool , handle ) ;
tree = zswap_trees [ swp_type ( swpentry ) ] ;
offset = swp_offset ( swpentry ) ;
BUG_ON ( pool ! = tree - > pool ) ;
/* find and ref zswap entry */
spin_lock ( & tree - > lock ) ;
2013-11-13 03:08:27 +04:00
entry = zswap_entry_find_get ( & tree - > rbroot , offset ) ;
2013-07-11 03:05:03 +04:00
if ( ! entry ) {
/* entry was invalidated */
spin_unlock ( & tree - > lock ) ;
return 0 ;
}
spin_unlock ( & tree - > lock ) ;
BUG_ON ( offset ! = entry - > offset ) ;
/* try to allocate swap cache page */
switch ( zswap_get_swap_cache_page ( swpentry , & page ) ) {
2013-11-13 03:08:26 +04:00
case ZSWAP_SWAPCACHE_FAIL : /* no memory or invalidate happened */
2013-07-11 03:05:03 +04:00
ret = - ENOMEM ;
goto fail ;
2013-11-13 03:08:26 +04:00
case ZSWAP_SWAPCACHE_EXIST :
2013-07-11 03:05:03 +04:00
/* page is already in the swap cache, ignore for now */
page_cache_release ( page ) ;
ret = - EEXIST ;
goto fail ;
case ZSWAP_SWAPCACHE_NEW : /* page is locked */
/* decompress */
dlen = PAGE_SIZE ;
src = ( u8 * ) zbud_map ( tree - > pool , entry - > handle ) +
sizeof ( struct zswap_header ) ;
dst = kmap_atomic ( page ) ;
ret = zswap_comp_op ( ZSWAP_COMPOP_DECOMPRESS , src ,
entry - > length , dst , & dlen ) ;
kunmap_atomic ( dst ) ;
zbud_unmap ( tree - > pool , entry - > handle ) ;
BUG_ON ( ret ) ;
BUG_ON ( dlen ! = PAGE_SIZE ) ;
/* page is up to date */
SetPageUptodate ( page ) ;
}
2013-11-13 03:07:52 +04:00
/* move it to the tail of the inactive list after end_writeback */
SetPageReclaim ( page ) ;
2013-07-11 03:05:03 +04:00
/* start writeback */
__swap_writepage ( page , & wbc , end_swap_bio_write ) ;
page_cache_release ( page ) ;
zswap_written_back_pages + + ;
spin_lock ( & tree - > lock ) ;
/* drop local reference */
2013-11-13 03:08:27 +04:00
zswap_entry_put ( tree , entry ) ;
2013-07-11 03:05:03 +04:00
/*
2013-11-13 03:08:27 +04:00
* There are two possible situations for entry here :
* ( 1 ) refcount is 1 ( normal case ) , entry is valid and on the tree
* ( 2 ) refcount is 0 , entry is freed and not on the tree
* because invalidate happened during writeback
* search the tree and free the entry if find entry
*/
if ( entry = = zswap_rb_search ( & tree - > rbroot , offset ) )
zswap_entry_put ( tree , entry ) ;
2013-07-11 03:05:03 +04:00
spin_unlock ( & tree - > lock ) ;
2013-11-13 03:08:27 +04:00
goto end ;
/*
* if we get here due to ZSWAP_SWAPCACHE_EXIST
* a load may happening concurrently
* it is safe and okay to not free the entry
* if we free the entry in the following put
* it it either okay to return ! 0
*/
2013-07-11 03:05:03 +04:00
fail :
spin_lock ( & tree - > lock ) ;
2013-11-13 03:08:27 +04:00
zswap_entry_put ( tree , entry ) ;
2013-07-11 03:05:03 +04:00
spin_unlock ( & tree - > lock ) ;
2013-11-13 03:08:27 +04:00
end :
2013-07-11 03:05:03 +04:00
return ret ;
}
/*********************************
* frontswap hooks
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* attempts to compress and store an single page */
static int zswap_frontswap_store ( unsigned type , pgoff_t offset ,
struct page * page )
{
struct zswap_tree * tree = zswap_trees [ type ] ;
struct zswap_entry * entry , * dupentry ;
int ret ;
unsigned int dlen = PAGE_SIZE , len ;
unsigned long handle ;
char * buf ;
u8 * src , * dst ;
struct zswap_header * zhdr ;
if ( ! tree ) {
ret = - ENODEV ;
goto reject ;
}
/* reclaim space if needed */
if ( zswap_is_full ( ) ) {
zswap_pool_limit_hit + + ;
if ( zbud_reclaim_page ( tree - > pool , 8 ) ) {
zswap_reject_reclaim_fail + + ;
ret = - ENOMEM ;
goto reject ;
}
}
/* allocate entry */
entry = zswap_entry_cache_alloc ( GFP_KERNEL ) ;
if ( ! entry ) {
zswap_reject_kmemcache_fail + + ;
ret = - ENOMEM ;
goto reject ;
}
/* compress */
dst = get_cpu_var ( zswap_dstmem ) ;
src = kmap_atomic ( page ) ;
ret = zswap_comp_op ( ZSWAP_COMPOP_COMPRESS , src , PAGE_SIZE , dst , & dlen ) ;
kunmap_atomic ( src ) ;
if ( ret ) {
ret = - EINVAL ;
goto freepage ;
}
/* store */
len = dlen + sizeof ( struct zswap_header ) ;
ret = zbud_alloc ( tree - > pool , len , __GFP_NORETRY | __GFP_NOWARN ,
& handle ) ;
if ( ret = = - ENOSPC ) {
zswap_reject_compress_poor + + ;
goto freepage ;
}
if ( ret ) {
zswap_reject_alloc_fail + + ;
goto freepage ;
}
zhdr = zbud_map ( tree - > pool , handle ) ;
zhdr - > swpentry = swp_entry ( type , offset ) ;
buf = ( u8 * ) ( zhdr + 1 ) ;
memcpy ( buf , dst , dlen ) ;
zbud_unmap ( tree - > pool , handle ) ;
put_cpu_var ( zswap_dstmem ) ;
/* populate entry */
entry - > offset = offset ;
entry - > handle = handle ;
entry - > length = dlen ;
/* map */
spin_lock ( & tree - > lock ) ;
do {
ret = zswap_rb_insert ( & tree - > rbroot , entry , & dupentry ) ;
if ( ret = = - EEXIST ) {
zswap_duplicate_entry + + ;
/* remove from rbtree */
2013-11-13 03:08:27 +04:00
zswap_rb_erase ( & tree - > rbroot , dupentry ) ;
zswap_entry_put ( tree , dupentry ) ;
2013-07-11 03:05:03 +04:00
}
} while ( ret = = - EEXIST ) ;
spin_unlock ( & tree - > lock ) ;
/* update stats */
atomic_inc ( & zswap_stored_pages ) ;
zswap_pool_pages = zbud_get_pool_size ( tree - > pool ) ;
return 0 ;
freepage :
put_cpu_var ( zswap_dstmem ) ;
zswap_entry_cache_free ( entry ) ;
reject :
return ret ;
}
/*
* returns 0 if the page was successfully decompressed
* return - 1 on entry not found or error
*/
static int zswap_frontswap_load ( unsigned type , pgoff_t offset ,
struct page * page )
{
struct zswap_tree * tree = zswap_trees [ type ] ;
struct zswap_entry * entry ;
u8 * src , * dst ;
unsigned int dlen ;
2013-11-13 03:08:27 +04:00
int ret ;
2013-07-11 03:05:03 +04:00
/* find */
spin_lock ( & tree - > lock ) ;
2013-11-13 03:08:27 +04:00
entry = zswap_entry_find_get ( & tree - > rbroot , offset ) ;
2013-07-11 03:05:03 +04:00
if ( ! entry ) {
/* entry was written back */
spin_unlock ( & tree - > lock ) ;
return - 1 ;
}
spin_unlock ( & tree - > lock ) ;
/* decompress */
dlen = PAGE_SIZE ;
src = ( u8 * ) zbud_map ( tree - > pool , entry - > handle ) +
sizeof ( struct zswap_header ) ;
dst = kmap_atomic ( page ) ;
ret = zswap_comp_op ( ZSWAP_COMPOP_DECOMPRESS , src , entry - > length ,
dst , & dlen ) ;
kunmap_atomic ( dst ) ;
zbud_unmap ( tree - > pool , entry - > handle ) ;
BUG_ON ( ret ) ;
spin_lock ( & tree - > lock ) ;
2013-11-13 03:08:27 +04:00
zswap_entry_put ( tree , entry ) ;
2013-07-11 03:05:03 +04:00
spin_unlock ( & tree - > lock ) ;
return 0 ;
}
/* frees an entry in zswap */
static void zswap_frontswap_invalidate_page ( unsigned type , pgoff_t offset )
{
struct zswap_tree * tree = zswap_trees [ type ] ;
struct zswap_entry * entry ;
/* find */
spin_lock ( & tree - > lock ) ;
entry = zswap_rb_search ( & tree - > rbroot , offset ) ;
if ( ! entry ) {
/* entry was written back */
spin_unlock ( & tree - > lock ) ;
return ;
}
/* remove from rbtree */
2013-11-13 03:08:27 +04:00
zswap_rb_erase ( & tree - > rbroot , entry ) ;
2013-07-11 03:05:03 +04:00
/* drop the initial reference from entry creation */
2013-11-13 03:08:27 +04:00
zswap_entry_put ( tree , entry ) ;
2013-07-11 03:05:03 +04:00
spin_unlock ( & tree - > lock ) ;
}
/* frees all zswap entries for the given swap type */
static void zswap_frontswap_invalidate_area ( unsigned type )
{
struct zswap_tree * tree = zswap_trees [ type ] ;
2013-09-12 01:25:33 +04:00
struct zswap_entry * entry , * n ;
2013-07-11 03:05:03 +04:00
if ( ! tree )
return ;
/* walk the tree and free everything */
spin_lock ( & tree - > lock ) ;
2013-11-13 03:08:27 +04:00
rbtree_postorder_for_each_entry_safe ( entry , n , & tree - > rbroot , rbnode )
zswap_free_entry ( tree , entry ) ;
2013-07-11 03:05:03 +04:00
tree - > rbroot = RB_ROOT ;
spin_unlock ( & tree - > lock ) ;
2013-10-17 00:46:54 +04:00
zbud_destroy_pool ( tree - > pool ) ;
kfree ( tree ) ;
zswap_trees [ type ] = NULL ;
2013-07-11 03:05:03 +04:00
}
static struct zbud_ops zswap_zbud_ops = {
. evict = zswap_writeback_entry
} ;
static void zswap_frontswap_init ( unsigned type )
{
struct zswap_tree * tree ;
tree = kzalloc ( sizeof ( struct zswap_tree ) , GFP_KERNEL ) ;
if ( ! tree )
goto err ;
tree - > pool = zbud_create_pool ( GFP_KERNEL , & zswap_zbud_ops ) ;
if ( ! tree - > pool )
goto freetree ;
tree - > rbroot = RB_ROOT ;
spin_lock_init ( & tree - > lock ) ;
zswap_trees [ type ] = tree ;
return ;
freetree :
kfree ( tree ) ;
err :
pr_err ( " alloc failed, zswap disabled for swap type %d \n " , type ) ;
}
static struct frontswap_ops zswap_frontswap_ops = {
. store = zswap_frontswap_store ,
. load = zswap_frontswap_load ,
. invalidate_page = zswap_frontswap_invalidate_page ,
. invalidate_area = zswap_frontswap_invalidate_area ,
. init = zswap_frontswap_init
} ;
/*********************************
* debugfs functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifdef CONFIG_DEBUG_FS
# include <linux/debugfs.h>
static struct dentry * zswap_debugfs_root ;
static int __init zswap_debugfs_init ( void )
{
if ( ! debugfs_initialized ( ) )
return - ENODEV ;
zswap_debugfs_root = debugfs_create_dir ( " zswap " , NULL ) ;
if ( ! zswap_debugfs_root )
return - ENOMEM ;
debugfs_create_u64 ( " pool_limit_hit " , S_IRUGO ,
zswap_debugfs_root , & zswap_pool_limit_hit ) ;
debugfs_create_u64 ( " reject_reclaim_fail " , S_IRUGO ,
zswap_debugfs_root , & zswap_reject_reclaim_fail ) ;
debugfs_create_u64 ( " reject_alloc_fail " , S_IRUGO ,
zswap_debugfs_root , & zswap_reject_alloc_fail ) ;
debugfs_create_u64 ( " reject_kmemcache_fail " , S_IRUGO ,
zswap_debugfs_root , & zswap_reject_kmemcache_fail ) ;
debugfs_create_u64 ( " reject_compress_poor " , S_IRUGO ,
zswap_debugfs_root , & zswap_reject_compress_poor ) ;
debugfs_create_u64 ( " written_back_pages " , S_IRUGO ,
zswap_debugfs_root , & zswap_written_back_pages ) ;
debugfs_create_u64 ( " duplicate_entry " , S_IRUGO ,
zswap_debugfs_root , & zswap_duplicate_entry ) ;
debugfs_create_u64 ( " pool_pages " , S_IRUGO ,
zswap_debugfs_root , & zswap_pool_pages ) ;
debugfs_create_atomic_t ( " stored_pages " , S_IRUGO ,
zswap_debugfs_root , & zswap_stored_pages ) ;
return 0 ;
}
static void __exit zswap_debugfs_exit ( void )
{
debugfs_remove_recursive ( zswap_debugfs_root ) ;
}
# else
static int __init zswap_debugfs_init ( void )
{
return 0 ;
}
static void __exit zswap_debugfs_exit ( void ) { }
# endif
/*********************************
* module init and exit
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int __init init_zswap ( void )
{
if ( ! zswap_enabled )
return 0 ;
pr_info ( " loading zswap \n " ) ;
if ( zswap_entry_cache_create ( ) ) {
pr_err ( " entry cache creation failed \n " ) ;
goto error ;
}
if ( zswap_comp_init ( ) ) {
pr_err ( " compressor initialization failed \n " ) ;
goto compfail ;
}
if ( zswap_cpu_init ( ) ) {
pr_err ( " per-cpu initialization failed \n " ) ;
goto pcpufail ;
}
frontswap_register_ops ( & zswap_frontswap_ops ) ;
if ( zswap_debugfs_init ( ) )
pr_warn ( " debugfs initialization failed \n " ) ;
return 0 ;
pcpufail :
zswap_comp_exit ( ) ;
compfail :
zswap_entry_cache_destory ( ) ;
error :
return - ENOMEM ;
}
/* must be late so crypto has time to come up */
late_initcall ( init_zswap ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Seth Jennings <sjenning@linux.vnet.ibm.com> " ) ;
MODULE_DESCRIPTION ( " Compressed cache for swap pages " ) ;