2008-10-19 07:28:16 +04:00
# include <linux/mm.h>
# include <linux/mmzone.h>
# include <linux/bootmem.h>
# include <linux/bit_spinlock.h>
# include <linux/page_cgroup.h>
# include <linux/hash.h>
2008-10-23 01:15:05 +04:00
# include <linux/slab.h>
2008-10-19 07:28:16 +04:00
# include <linux/memory.h>
2008-10-23 01:14:58 +04:00
# include <linux/vmalloc.h>
2008-10-23 01:15:05 +04:00
# include <linux/cgroup.h>
2009-01-08 05:07:58 +03:00
# include <linux/swapops.h>
2010-07-19 14:54:14 +04:00
# include <linux/kmemleak.h>
2008-10-19 07:28:16 +04:00
2011-03-24 02:42:30 +03:00
static void __meminit init_page_cgroup ( struct page_cgroup * pc , unsigned long id )
2008-10-19 07:28:16 +04:00
{
pc - > flags = 0 ;
2011-03-24 02:42:30 +03:00
set_page_cgroup_array_id ( pc , id ) ;
2008-10-19 07:28:16 +04:00
pc - > mem_cgroup = NULL ;
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-08 05:08:01 +03:00
INIT_LIST_HEAD ( & pc - > lru ) ;
2008-10-19 07:28:16 +04:00
}
static unsigned long total_usage ;
# if !defined(CONFIG_SPARSEMEM)
2008-11-22 20:33:24 +03:00
void __meminit pgdat_page_cgroup_init ( struct pglist_data * pgdat )
2008-10-19 07:28:16 +04:00
{
pgdat - > node_page_cgroup = NULL ;
}
struct page_cgroup * lookup_page_cgroup ( struct page * page )
{
unsigned long pfn = page_to_pfn ( page ) ;
unsigned long offset ;
struct page_cgroup * base ;
base = NODE_DATA ( page_to_nid ( page ) ) - > node_page_cgroup ;
if ( unlikely ( ! base ) )
return NULL ;
offset = pfn - NODE_DATA ( page_to_nid ( page ) ) - > node_start_pfn ;
return base + offset ;
}
2011-03-24 02:42:30 +03:00
struct page * lookup_cgroup_page ( struct page_cgroup * pc )
{
unsigned long pfn ;
struct page * page ;
pg_data_t * pgdat ;
pgdat = NODE_DATA ( page_cgroup_array_id ( pc ) ) ;
pfn = pc - pgdat - > node_page_cgroup + pgdat - > node_start_pfn ;
page = pfn_to_page ( pfn ) ;
VM_BUG_ON ( pc ! = lookup_page_cgroup ( page ) ) ;
return page ;
}
2008-10-19 07:28:16 +04:00
static int __init alloc_node_page_cgroup ( int nid )
{
struct page_cgroup * base , * pc ;
unsigned long table_size ;
unsigned long start_pfn , nr_pages , index ;
start_pfn = NODE_DATA ( nid ) - > node_start_pfn ;
nr_pages = NODE_DATA ( nid ) - > node_spanned_pages ;
2008-12-10 00:14:20 +03:00
if ( ! nr_pages )
return 0 ;
2008-10-19 07:28:16 +04:00
table_size = sizeof ( struct page_cgroup ) * nr_pages ;
2009-06-12 11:33:53 +04:00
base = __alloc_bootmem_node_nopanic ( NODE_DATA ( nid ) ,
table_size , PAGE_SIZE , __pa ( MAX_DMA_ADDRESS ) ) ;
if ( ! base )
2008-10-19 07:28:16 +04:00
return - ENOMEM ;
for ( index = 0 ; index < nr_pages ; index + + ) {
pc = base + index ;
2011-03-24 02:42:30 +03:00
init_page_cgroup ( pc , nid ) ;
2008-10-19 07:28:16 +04:00
}
NODE_DATA ( nid ) - > node_page_cgroup = base ;
total_usage + = table_size ;
return 0 ;
}
2009-06-12 11:33:53 +04:00
void __init page_cgroup_init_flatmem ( void )
2008-10-19 07:28:16 +04:00
{
int nid , fail ;
2009-01-08 05:08:02 +03:00
if ( mem_cgroup_disabled ( ) )
2008-10-23 01:15:05 +04:00
return ;
2008-10-19 07:28:16 +04:00
for_each_online_node ( nid ) {
fail = alloc_node_page_cgroup ( nid ) ;
if ( fail )
goto fail ;
}
printk ( KERN_INFO " allocated %ld bytes of page_cgroup \n " , total_usage ) ;
2009-06-18 03:26:32 +04:00
printk ( KERN_INFO " please try 'cgroup_disable=memory' option if you "
" don't want memory cgroups \n " ) ;
2008-10-19 07:28:16 +04:00
return ;
fail :
2009-06-18 03:26:32 +04:00
printk ( KERN_CRIT " allocation of page_cgroup failed. \n " ) ;
printk ( KERN_CRIT " please try 'cgroup_disable=memory' boot option \n " ) ;
2008-10-19 07:28:16 +04:00
panic ( " Out of memory " ) ;
}
# else /* CONFIG_FLAT_NODE_MEM_MAP */
struct page_cgroup * lookup_page_cgroup ( struct page * page )
{
unsigned long pfn = page_to_pfn ( page ) ;
struct mem_section * section = __pfn_to_section ( pfn ) ;
2009-06-18 03:26:34 +04:00
if ( ! section - > page_cgroup )
return NULL ;
2008-10-19 07:28:16 +04:00
return section - > page_cgroup + pfn ;
}
2011-03-24 02:42:30 +03:00
struct page * lookup_cgroup_page ( struct page_cgroup * pc )
{
struct mem_section * section ;
struct page * page ;
unsigned long nr ;
nr = page_cgroup_array_id ( pc ) ;
section = __nr_to_section ( nr ) ;
page = pfn_to_page ( pc - section - > page_cgroup ) ;
VM_BUG_ON ( pc ! = lookup_page_cgroup ( page ) ) ;
return page ;
}
2011-05-27 03:25:29 +04:00
static void * __meminit alloc_page_cgroup ( size_t size , int nid )
2011-03-24 02:42:40 +03:00
{
void * addr = NULL ;
2011-11-03 00:38:11 +04:00
gfp_t flags = GFP_KERNEL | __GFP_NOWARN ;
2011-03-24 02:42:40 +03:00
2011-11-03 00:38:11 +04:00
addr = alloc_pages_exact_nid ( nid , size , flags ) ;
if ( addr ) {
kmemleak_alloc ( addr , size , 1 , flags ) ;
2011-03-24 02:42:40 +03:00
return addr ;
2011-11-03 00:38:11 +04:00
}
2011-03-24 02:42:40 +03:00
if ( node_state ( nid , N_HIGH_MEMORY ) )
addr = vmalloc_node ( size , nid ) ;
else
addr = vmalloc ( size ) ;
return addr ;
}
# ifdef CONFIG_MEMORY_HOTPLUG
static void free_page_cgroup ( void * addr )
{
if ( is_vmalloc_addr ( addr ) ) {
vfree ( addr ) ;
} else {
struct page * page = virt_to_page ( addr ) ;
2011-03-24 02:42:41 +03:00
size_t table_size =
sizeof ( struct page_cgroup ) * PAGES_PER_SECTION ;
BUG_ON ( PageReserved ( page ) ) ;
free_pages_exact ( addr , table_size ) ;
2011-03-24 02:42:40 +03:00
}
}
# endif
2011-06-16 02:08:42 +04:00
static int __meminit init_section_page_cgroup ( unsigned long pfn , int nid )
2008-10-19 07:28:16 +04:00
{
struct page_cgroup * base , * pc ;
2011-03-24 02:42:30 +03:00
struct mem_section * section ;
2008-10-19 07:28:16 +04:00
unsigned long table_size ;
2011-03-24 02:42:30 +03:00
unsigned long nr ;
2011-06-16 02:08:42 +04:00
int index ;
2008-10-19 07:28:16 +04:00
2011-03-24 02:42:30 +03:00
nr = pfn_to_section_nr ( pfn ) ;
section = __nr_to_section ( nr ) ;
if ( section - > page_cgroup )
return 0 ;
table_size = sizeof ( struct page_cgroup ) * PAGES_PER_SECTION ;
2011-03-24 02:42:40 +03:00
base = alloc_page_cgroup ( table_size , nid ) ;
2011-03-24 02:42:30 +03:00
/*
* The value stored in section - > page_cgroup is ( base - pfn )
* and it does not point to the memory block allocated above ,
* causing kmemleak false positives .
*/
kmemleak_not_leak ( base ) ;
2008-10-19 07:28:16 +04:00
if ( ! base ) {
printk ( KERN_ERR " page cgroup allocation failure \n " ) ;
return - ENOMEM ;
}
for ( index = 0 ; index < PAGES_PER_SECTION ; index + + ) {
pc = base + index ;
2011-03-24 02:42:30 +03:00
init_page_cgroup ( pc , nr ) ;
2008-10-19 07:28:16 +04:00
}
2011-06-16 02:08:42 +04:00
/*
* The passed " pfn " may not be aligned to SECTION . For the calculation
* we need to apply a mask .
*/
pfn & = PAGE_SECTION_MASK ;
2008-10-19 07:28:16 +04:00
section - > page_cgroup = base - pfn ;
total_usage + = table_size ;
return 0 ;
}
# ifdef CONFIG_MEMORY_HOTPLUG
void __free_page_cgroup ( unsigned long pfn )
{
struct mem_section * ms ;
struct page_cgroup * base ;
ms = __pfn_to_section ( pfn ) ;
if ( ! ms | | ! ms - > page_cgroup )
return ;
base = ms - > page_cgroup + pfn ;
2011-03-24 02:42:40 +03:00
free_page_cgroup ( base ) ;
ms - > page_cgroup = NULL ;
2008-10-19 07:28:16 +04:00
}
2008-11-22 20:33:24 +03:00
int __meminit online_page_cgroup ( unsigned long start_pfn ,
2008-10-19 07:28:16 +04:00
unsigned long nr_pages ,
int nid )
{
unsigned long start , end , pfn ;
int fail = 0 ;
2011-07-26 04:12:13 +04:00
start = SECTION_ALIGN_DOWN ( start_pfn ) ;
end = SECTION_ALIGN_UP ( start_pfn + nr_pages ) ;
2008-10-19 07:28:16 +04:00
2011-06-16 02:08:42 +04:00
if ( nid = = - 1 ) {
/*
* In this case , " nid " already exists and contains valid memory .
* " start_pfn " passed to us is a pfn which is an arg for
* online__pages ( ) , and start_pfn should exist .
*/
nid = pfn_to_nid ( start_pfn ) ;
VM_BUG_ON ( ! node_state ( nid , N_ONLINE ) ) ;
}
2008-10-19 07:28:16 +04:00
for ( pfn = start ; ! fail & & pfn < end ; pfn + = PAGES_PER_SECTION ) {
if ( ! pfn_present ( pfn ) )
continue ;
2011-06-16 02:08:42 +04:00
fail = init_section_page_cgroup ( pfn , nid ) ;
2008-10-19 07:28:16 +04:00
}
if ( ! fail )
return 0 ;
/* rollback */
for ( pfn = start ; pfn < end ; pfn + = PAGES_PER_SECTION )
__free_page_cgroup ( pfn ) ;
return - ENOMEM ;
}
2008-11-22 20:33:24 +03:00
int __meminit offline_page_cgroup ( unsigned long start_pfn ,
2008-10-19 07:28:16 +04:00
unsigned long nr_pages , int nid )
{
unsigned long start , end , pfn ;
2011-07-26 04:12:13 +04:00
start = SECTION_ALIGN_DOWN ( start_pfn ) ;
end = SECTION_ALIGN_UP ( start_pfn + nr_pages ) ;
2008-10-19 07:28:16 +04:00
for ( pfn = start ; pfn < end ; pfn + = PAGES_PER_SECTION )
__free_page_cgroup ( pfn ) ;
return 0 ;
}
2008-11-22 20:33:24 +03:00
static int __meminit page_cgroup_callback ( struct notifier_block * self ,
2008-10-19 07:28:16 +04:00
unsigned long action , void * arg )
{
struct memory_notify * mn = arg ;
int ret = 0 ;
switch ( action ) {
case MEM_GOING_ONLINE :
ret = online_page_cgroup ( mn - > start_pfn ,
mn - > nr_pages , mn - > status_change_nid ) ;
break ;
case MEM_OFFLINE :
offline_page_cgroup ( mn - > start_pfn ,
mn - > nr_pages , mn - > status_change_nid ) ;
break ;
2008-12-02 00:13:48 +03:00
case MEM_CANCEL_ONLINE :
2008-10-19 07:28:16 +04:00
case MEM_GOING_OFFLINE :
break ;
case MEM_ONLINE :
case MEM_CANCEL_OFFLINE :
break ;
}
2008-12-02 00:13:48 +03:00
2011-03-23 02:30:49 +03:00
return notifier_from_errno ( ret ) ;
2008-10-19 07:28:16 +04:00
}
# endif
void __init page_cgroup_init ( void )
{
unsigned long pfn ;
2011-06-16 02:08:42 +04:00
int nid ;
2008-10-19 07:28:16 +04:00
2009-01-08 05:08:02 +03:00
if ( mem_cgroup_disabled ( ) )
2008-10-23 01:15:05 +04:00
return ;
2011-06-16 02:08:42 +04:00
for_each_node_state ( nid , N_HIGH_MEMORY ) {
unsigned long start_pfn , end_pfn ;
start_pfn = node_start_pfn ( nid ) ;
end_pfn = node_end_pfn ( nid ) ;
/*
* start_pfn and end_pfn may not be aligned to SECTION and the
* page - > flags of out of node pages are not initialized . So we
* scan [ start_pfn , the biggest section ' s pfn < end_pfn ) here .
*/
for ( pfn = start_pfn ;
pfn < end_pfn ;
pfn = ALIGN ( pfn + 1 , PAGES_PER_SECTION ) ) {
if ( ! pfn_valid ( pfn ) )
continue ;
/*
* Nodes ' s pfns can be overlapping .
* We know some arch can have a nodes layout such as
* - - - - - - - - - - - - - pfn - - - - - - - - - - - - - - >
* N0 | N1 | N2 | N0 | N1 | N2 | . . . .
*/
if ( pfn_to_nid ( pfn ) ! = nid )
continue ;
if ( init_section_page_cgroup ( pfn , nid ) )
goto oom ;
}
2008-10-19 07:28:16 +04:00
}
2011-06-16 02:08:42 +04:00
hotplug_memory_notifier ( page_cgroup_callback , 0 ) ;
2008-10-19 07:28:16 +04:00
printk ( KERN_INFO " allocated %ld bytes of page_cgroup \n " , total_usage ) ;
2011-06-16 02:08:42 +04:00
printk ( KERN_INFO " please try 'cgroup_disable=memory' option if you "
" don't want memory cgroups \n " ) ;
return ;
oom :
printk ( KERN_CRIT " try 'cgroup_disable=memory' boot option \n " ) ;
panic ( " Out of memory " ) ;
2008-10-19 07:28:16 +04:00
}
2008-11-22 20:33:24 +03:00
void __meminit pgdat_page_cgroup_init ( struct pglist_data * pgdat )
2008-10-19 07:28:16 +04:00
{
return ;
}
# endif
2009-01-08 05:07:58 +03:00
# ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
static DEFINE_MUTEX ( swap_cgroup_mutex ) ;
struct swap_cgroup_ctrl {
struct page * * map ;
unsigned long length ;
2010-03-15 07:34:57 +03:00
spinlock_t lock ;
2009-01-08 05:07:58 +03:00
} ;
2011-11-03 00:38:36 +04:00
static struct swap_cgroup_ctrl swap_cgroup_ctrl [ MAX_SWAPFILES ] ;
2009-01-08 05:07:58 +03:00
struct swap_cgroup {
2009-04-03 03:57:45 +04:00
unsigned short id ;
2009-01-08 05:07:58 +03:00
} ;
# define SC_PER_PAGE (PAGE_SIZE / sizeof(struct swap_cgroup))
# define SC_POS_MASK (SC_PER_PAGE - 1)
/*
* SwapCgroup implements " lookup " and " exchange " operations .
* In typical usage , this swap_cgroup is accessed via memcg ' s charge / uncharge
* against SwapCache . At swap_free ( ) , this is accessed directly from swap .
*
* This means ,
* - we have no race in " exchange " when we ' re accessed via SwapCache because
* SwapCache ( and its swp_entry ) is under lock .
* - When called via swap_free ( ) , there is no user of this entry and no race .
* Then , we don ' t need lock around " exchange " .
*
* TODO : we can push these buffers out to HIGHMEM .
*/
/*
* allocate buffer for swap_cgroup .
*/
static int swap_cgroup_prepare ( int type )
{
struct page * page ;
struct swap_cgroup_ctrl * ctrl ;
unsigned long idx , max ;
ctrl = & swap_cgroup_ctrl [ type ] ;
for ( idx = 0 ; idx < ctrl - > length ; idx + + ) {
page = alloc_page ( GFP_KERNEL | __GFP_ZERO ) ;
if ( ! page )
goto not_enough_page ;
ctrl - > map [ idx ] = page ;
}
return 0 ;
not_enough_page :
max = idx ;
for ( idx = 0 ; idx < max ; idx + + )
__free_page ( ctrl - > map [ idx ] ) ;
return - ENOMEM ;
}
2010-03-11 02:22:17 +03:00
/**
* swap_cgroup_cmpxchg - cmpxchg mem_cgroup ' s id for this swp_entry .
* @ end : swap entry to be cmpxchged
* @ old : old id
* @ new : new id
*
* Returns old id at success , 0 at failure .
2011-03-31 05:57:33 +04:00
* ( There is no mem_cgroup using 0 as its id )
2010-03-11 02:22:17 +03:00
*/
unsigned short swap_cgroup_cmpxchg ( swp_entry_t ent ,
unsigned short old , unsigned short new )
{
int type = swp_type ( ent ) ;
unsigned long offset = swp_offset ( ent ) ;
unsigned long idx = offset / SC_PER_PAGE ;
unsigned long pos = offset & SC_POS_MASK ;
struct swap_cgroup_ctrl * ctrl ;
struct page * mappage ;
struct swap_cgroup * sc ;
2010-03-15 07:34:57 +03:00
unsigned long flags ;
unsigned short retval ;
2010-03-11 02:22:17 +03:00
ctrl = & swap_cgroup_ctrl [ type ] ;
mappage = ctrl - > map [ idx ] ;
sc = page_address ( mappage ) ;
sc + = pos ;
2010-03-15 07:34:57 +03:00
spin_lock_irqsave ( & ctrl - > lock , flags ) ;
retval = sc - > id ;
if ( retval = = old )
sc - > id = new ;
2010-03-11 02:22:17 +03:00
else
2010-03-15 07:34:57 +03:00
retval = 0 ;
spin_unlock_irqrestore ( & ctrl - > lock , flags ) ;
return retval ;
2010-03-11 02:22:17 +03:00
}
2009-01-08 05:07:58 +03:00
/**
* swap_cgroup_record - record mem_cgroup for this swp_entry .
* @ ent : swap entry to be recorded into
* @ mem : mem_cgroup to be recorded
*
2009-04-03 03:57:45 +04:00
* Returns old value at success , 0 at failure .
* ( Of course , old value can be 0. )
2009-01-08 05:07:58 +03:00
*/
2009-04-03 03:57:45 +04:00
unsigned short swap_cgroup_record ( swp_entry_t ent , unsigned short id )
2009-01-08 05:07:58 +03:00
{
int type = swp_type ( ent ) ;
unsigned long offset = swp_offset ( ent ) ;
unsigned long idx = offset / SC_PER_PAGE ;
unsigned long pos = offset & SC_POS_MASK ;
struct swap_cgroup_ctrl * ctrl ;
struct page * mappage ;
struct swap_cgroup * sc ;
2009-04-03 03:57:45 +04:00
unsigned short old ;
2010-03-15 07:34:57 +03:00
unsigned long flags ;
2009-01-08 05:07:58 +03:00
ctrl = & swap_cgroup_ctrl [ type ] ;
mappage = ctrl - > map [ idx ] ;
sc = page_address ( mappage ) ;
sc + = pos ;
2010-03-15 07:34:57 +03:00
spin_lock_irqsave ( & ctrl - > lock , flags ) ;
old = sc - > id ;
sc - > id = id ;
spin_unlock_irqrestore ( & ctrl - > lock , flags ) ;
2009-01-08 05:07:58 +03:00
return old ;
}
/**
* lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
* @ ent : swap entry to be looked up .
*
2009-04-03 03:57:45 +04:00
* Returns CSS ID of mem_cgroup at success . 0 at failure . ( 0 is invalid ID )
2009-01-08 05:07:58 +03:00
*/
2009-04-03 03:57:45 +04:00
unsigned short lookup_swap_cgroup ( swp_entry_t ent )
2009-01-08 05:07:58 +03:00
{
int type = swp_type ( ent ) ;
unsigned long offset = swp_offset ( ent ) ;
unsigned long idx = offset / SC_PER_PAGE ;
unsigned long pos = offset & SC_POS_MASK ;
struct swap_cgroup_ctrl * ctrl ;
struct page * mappage ;
struct swap_cgroup * sc ;
2009-04-03 03:57:45 +04:00
unsigned short ret ;
2009-01-08 05:07:58 +03:00
ctrl = & swap_cgroup_ctrl [ type ] ;
mappage = ctrl - > map [ idx ] ;
sc = page_address ( mappage ) ;
sc + = pos ;
2009-04-03 03:57:45 +04:00
ret = sc - > id ;
2009-01-08 05:07:58 +03:00
return ret ;
}
int swap_cgroup_swapon ( int type , unsigned long max_pages )
{
void * array ;
unsigned long array_size ;
unsigned long length ;
struct swap_cgroup_ctrl * ctrl ;
if ( ! do_swap_account )
return 0 ;
2011-05-27 03:25:30 +04:00
length = DIV_ROUND_UP ( max_pages , SC_PER_PAGE ) ;
2009-01-08 05:07:58 +03:00
array_size = length * sizeof ( void * ) ;
2011-05-28 21:36:34 +04:00
array = vzalloc ( array_size ) ;
2009-01-08 05:07:58 +03:00
if ( ! array )
goto nomem ;
ctrl = & swap_cgroup_ctrl [ type ] ;
mutex_lock ( & swap_cgroup_mutex ) ;
ctrl - > length = length ;
ctrl - > map = array ;
2010-03-15 07:34:57 +03:00
spin_lock_init ( & ctrl - > lock ) ;
2009-01-08 05:07:58 +03:00
if ( swap_cgroup_prepare ( type ) ) {
/* memory shortage */
ctrl - > map = NULL ;
ctrl - > length = 0 ;
mutex_unlock ( & swap_cgroup_mutex ) ;
2011-05-27 03:25:31 +04:00
vfree ( array ) ;
2009-01-08 05:07:58 +03:00
goto nomem ;
}
mutex_unlock ( & swap_cgroup_mutex ) ;
return 0 ;
nomem :
printk ( KERN_INFO " couldn't allocate enough memory for swap_cgroup. \n " ) ;
printk ( KERN_INFO
2011-07-26 04:12:12 +04:00
" swap_cgroup can be disabled by swapaccount=0 boot option \n " ) ;
2009-01-08 05:07:58 +03:00
return - ENOMEM ;
}
void swap_cgroup_swapoff ( int type )
{
2011-05-27 03:25:31 +04:00
struct page * * map ;
unsigned long i , length ;
2009-01-08 05:07:58 +03:00
struct swap_cgroup_ctrl * ctrl ;
if ( ! do_swap_account )
return ;
mutex_lock ( & swap_cgroup_mutex ) ;
ctrl = & swap_cgroup_ctrl [ type ] ;
2011-05-27 03:25:31 +04:00
map = ctrl - > map ;
length = ctrl - > length ;
ctrl - > map = NULL ;
ctrl - > length = 0 ;
mutex_unlock ( & swap_cgroup_mutex ) ;
if ( map ) {
for ( i = 0 ; i < length ; i + + ) {
struct page * page = map [ i ] ;
2009-01-08 05:07:58 +03:00
if ( page )
__free_page ( page ) ;
}
2011-05-27 03:25:31 +04:00
vfree ( map ) ;
2009-01-08 05:07:58 +03:00
}
}
# endif