2010-05-25 01:32:27 +04:00
/*
* linux / mm / compaction . c
*
* Memory compaction for the reduction of external fragmentation . Note that
* this heavily depends upon page migration to do all the real heavy
* lifting
*
* Copyright IBM Corp . 2007 - 2010 Mel Gorman < mel @ csn . ul . ie >
*/
# include <linux/swap.h>
# include <linux/migrate.h>
# include <linux/compaction.h>
# include <linux/mm_inline.h>
# include <linux/backing-dev.h>
2010-05-25 01:32:28 +04:00
# include <linux/sysctl.h>
2010-05-25 01:32:29 +04:00
# include <linux/sysfs.h>
2010-05-25 01:32:27 +04:00
# include "internal.h"
2011-12-29 16:09:50 +04:00
# if defined CONFIG_COMPACTION || defined CONFIG_CMA
2011-01-14 02:45:54 +03:00
# define CREATE_TRACE_POINTS
# include <trace/events/compaction.h>
2010-05-25 01:32:27 +04:00
static unsigned long release_freepages ( struct list_head * freelist )
{
struct page * page , * next ;
unsigned long count = 0 ;
list_for_each_entry_safe ( page , next , freelist , lru ) {
list_del ( & page - > lru ) ;
__free_page ( page ) ;
count + + ;
}
return count ;
}
2011-12-29 16:09:50 +04:00
static void map_pages ( struct list_head * list )
{
struct page * page ;
list_for_each_entry ( page , list , lru ) {
arch_alloc_page ( page , 0 ) ;
kernel_map_pages ( page , 1 , 1 ) ;
}
}
2011-12-29 16:09:50 +04:00
static inline bool migrate_async_suitable ( int migratetype )
{
return is_migrate_cma ( migratetype ) | | migratetype = = MIGRATE_MOVABLE ;
}
2012-01-30 16:24:03 +04:00
/*
* Isolate free pages onto a private freelist . Caller must hold zone - > lock .
* If @ strict is true , will abort returning 0 on any invalid PFNs or non - free
* pages inside of the pageblock ( even though it may still end up isolating
* some pages ) .
*/
static unsigned long isolate_freepages_block ( unsigned long blockpfn ,
unsigned long end_pfn ,
struct list_head * freelist ,
bool strict )
2010-05-25 01:32:27 +04:00
{
2011-01-14 02:45:54 +03:00
int nr_scanned = 0 , total_isolated = 0 ;
2010-05-25 01:32:27 +04:00
struct page * cursor ;
cursor = pfn_to_page ( blockpfn ) ;
/* Isolate free pages. This assumes the block is valid */
for ( ; blockpfn < end_pfn ; blockpfn + + , cursor + + ) {
int isolated , i ;
struct page * page = cursor ;
2012-01-30 16:24:03 +04:00
if ( ! pfn_valid_within ( blockpfn ) ) {
if ( strict )
return 0 ;
2010-05-25 01:32:27 +04:00
continue ;
2012-01-30 16:24:03 +04:00
}
2011-01-14 02:45:54 +03:00
nr_scanned + + ;
2010-05-25 01:32:27 +04:00
2012-01-30 16:24:03 +04:00
if ( ! PageBuddy ( page ) ) {
if ( strict )
return 0 ;
2010-05-25 01:32:27 +04:00
continue ;
2012-01-30 16:24:03 +04:00
}
2010-05-25 01:32:27 +04:00
/* Found a free page, break it into order-0 pages */
isolated = split_free_page ( page ) ;
2012-01-30 16:24:03 +04:00
if ( ! isolated & & strict )
return 0 ;
2010-05-25 01:32:27 +04:00
total_isolated + = isolated ;
for ( i = 0 ; i < isolated ; i + + ) {
list_add ( & page - > lru , freelist ) ;
page + + ;
}
/* If a page was split, advance to the end of it */
if ( isolated ) {
blockpfn + = isolated - 1 ;
cursor + = isolated - 1 ;
}
}
2011-01-14 02:45:54 +03:00
trace_mm_compaction_isolate_freepages ( nr_scanned , total_isolated ) ;
2010-05-25 01:32:27 +04:00
return total_isolated ;
}
2012-01-30 16:24:03 +04:00
/**
* isolate_freepages_range ( ) - isolate free pages .
* @ start_pfn : The first PFN to start isolating .
* @ end_pfn : The one - past - last PFN .
*
* Non - free pages , invalid PFNs , or zone boundaries within the
* [ start_pfn , end_pfn ) range are considered errors , cause function to
* undo its actions and return zero .
*
* Otherwise , function returns one - past - the - last PFN of isolated page
* ( which may be greater then end_pfn if end fell in a middle of
* a free page ) .
*/
2011-12-29 16:09:50 +04:00
unsigned long
2012-01-30 16:24:03 +04:00
isolate_freepages_range ( unsigned long start_pfn , unsigned long end_pfn )
{
unsigned long isolated , pfn , block_end_pfn , flags ;
struct zone * zone = NULL ;
LIST_HEAD ( freelist ) ;
if ( pfn_valid ( start_pfn ) )
zone = page_zone ( pfn_to_page ( start_pfn ) ) ;
for ( pfn = start_pfn ; pfn < end_pfn ; pfn + = isolated ) {
if ( ! pfn_valid ( pfn ) | | zone ! = page_zone ( pfn_to_page ( pfn ) ) )
break ;
/*
* On subsequent iterations ALIGN ( ) is actually not needed ,
* but we keep it that we not to complicate the code .
*/
block_end_pfn = ALIGN ( pfn + 1 , pageblock_nr_pages ) ;
block_end_pfn = min ( block_end_pfn , end_pfn ) ;
spin_lock_irqsave ( & zone - > lock , flags ) ;
isolated = isolate_freepages_block ( pfn , block_end_pfn ,
& freelist , true ) ;
spin_unlock_irqrestore ( & zone - > lock , flags ) ;
/*
* In strict mode , isolate_freepages_block ( ) returns 0 if
* there are any holes in the block ( ie . invalid PFNs or
* non - free pages ) .
*/
if ( ! isolated )
break ;
/*
* If we managed to isolate pages , it is always ( 1 < < n ) *
* pageblock_nr_pages for some non - negative n . ( Max order
* page may span two pageblocks ) .
*/
}
/* split_free_page does not map the pages */
map_pages ( & freelist ) ;
if ( pfn < end_pfn ) {
/* Loop terminated early, cleanup. */
release_freepages ( & freelist ) ;
return 0 ;
}
/* We don't use freelists for anything. */
return pfn ;
}
2010-05-25 01:32:27 +04:00
/* Update the number of anon and file isolated pages in the zone */
static void acct_isolated ( struct zone * zone , struct compact_control * cc )
{
struct page * page ;
2011-11-01 04:06:44 +04:00
unsigned int count [ 2 ] = { 0 , } ;
2010-05-25 01:32:27 +04:00
2011-11-01 04:06:44 +04:00
list_for_each_entry ( page , & cc - > migratepages , lru )
count [ ! ! page_is_file_cache ( page ) ] + + ;
2010-05-25 01:32:27 +04:00
2011-11-01 04:06:44 +04:00
__mod_zone_page_state ( zone , NR_ISOLATED_ANON , count [ 0 ] ) ;
__mod_zone_page_state ( zone , NR_ISOLATED_FILE , count [ 1 ] ) ;
2010-05-25 01:32:27 +04:00
}
/* Similar to reclaim, but different enough that they don't share logic */
static bool too_many_isolated ( struct zone * zone )
{
2010-09-10 03:38:00 +04:00
unsigned long active , inactive , isolated ;
2010-05-25 01:32:27 +04:00
inactive = zone_page_state ( zone , NR_INACTIVE_FILE ) +
zone_page_state ( zone , NR_INACTIVE_ANON ) ;
2010-09-10 03:38:00 +04:00
active = zone_page_state ( zone , NR_ACTIVE_FILE ) +
zone_page_state ( zone , NR_ACTIVE_ANON ) ;
2010-05-25 01:32:27 +04:00
isolated = zone_page_state ( zone , NR_ISOLATED_FILE ) +
zone_page_state ( zone , NR_ISOLATED_ANON ) ;
2010-09-10 03:38:00 +04:00
return isolated > ( inactive + active ) / 2 ;
2010-05-25 01:32:27 +04:00
}
2012-01-30 16:16:26 +04:00
/**
* isolate_migratepages_range ( ) - isolate all migrate - able pages in range .
* @ zone : Zone pages are in .
* @ cc : Compaction control structure .
* @ low_pfn : The first PFN of the range .
* @ end_pfn : The one - past - the - last PFN of the range .
*
* Isolate all pages that can be migrated from the range specified by
* [ low_pfn , end_pfn ) . Returns zero if there is a fatal signal
* pending ) , otherwise PFN of the first page that was not scanned
* ( which may be both less , equal to or more then end_pfn ) .
*
* Assumes that cc - > migratepages is empty and cc - > nr_migratepages is
* zero .
*
* Apart from cc - > migratepages and cc - > nr_migratetypes this function
* does not modify any cc ' s fields , in particular it does not modify
* ( or read for that matter ) cc - > migrate_pfn .
2010-05-25 01:32:27 +04:00
*/
2011-12-29 16:09:50 +04:00
unsigned long
2012-01-30 16:16:26 +04:00
isolate_migratepages_range ( struct zone * zone , struct compact_control * cc ,
unsigned long low_pfn , unsigned long end_pfn )
2010-05-25 01:32:27 +04:00
{
2011-01-14 02:45:59 +03:00
unsigned long last_pageblock_nr = 0 , pageblock_nr ;
2011-01-14 02:45:54 +03:00
unsigned long nr_scanned = 0 , nr_isolated = 0 ;
2010-05-25 01:32:27 +04:00
struct list_head * migratelist = & cc - > migratepages ;
2012-05-30 02:06:54 +04:00
isolate_mode_t mode = 0 ;
2012-05-30 02:07:09 +04:00
struct lruvec * lruvec ;
2010-05-25 01:32:27 +04:00
/*
* Ensure that there are not too many pages isolated from the LRU
* list by either parallel reclaimers or compaction . If there are ,
* delay for some time until fewer pages are isolated
*/
while ( unlikely ( too_many_isolated ( zone ) ) ) {
2011-06-16 02:08:52 +04:00
/* async migration should just abort */
2012-06-04 07:05:57 +04:00
if ( ! cc - > sync )
2012-01-30 16:16:26 +04:00
return 0 ;
2011-06-16 02:08:52 +04:00
2010-05-25 01:32:27 +04:00
congestion_wait ( BLK_RW_ASYNC , HZ / 10 ) ;
if ( fatal_signal_pending ( current ) )
2012-01-30 16:16:26 +04:00
return 0 ;
2010-05-25 01:32:27 +04:00
}
/* Time to isolate some pages for migration */
2011-03-23 02:33:10 +03:00
cond_resched ( ) ;
2010-05-25 01:32:27 +04:00
spin_lock_irq ( & zone - > lru_lock ) ;
for ( ; low_pfn < end_pfn ; low_pfn + + ) {
struct page * page ;
2011-03-23 02:33:10 +03:00
bool locked = true ;
/* give a chance to irqs before checking need_resched() */
if ( ! ( ( low_pfn + 1 ) % SWAP_CLUSTER_MAX ) ) {
spin_unlock_irq ( & zone - > lru_lock ) ;
locked = false ;
}
if ( need_resched ( ) | | spin_is_contended ( & zone - > lru_lock ) ) {
if ( locked )
spin_unlock_irq ( & zone - > lru_lock ) ;
cond_resched ( ) ;
spin_lock_irq ( & zone - > lru_lock ) ;
if ( fatal_signal_pending ( current ) )
break ;
} else if ( ! locked )
spin_lock_irq ( & zone - > lru_lock ) ;
2012-02-04 03:37:18 +04:00
/*
* migrate_pfn does not necessarily start aligned to a
* pageblock . Ensure that pfn_valid is called when moving
* into a new MAX_ORDER_NR_PAGES range in case of large
* memory holes within the zone
*/
if ( ( low_pfn & ( MAX_ORDER_NR_PAGES - 1 ) ) = = 0 ) {
if ( ! pfn_valid ( low_pfn ) ) {
low_pfn + = MAX_ORDER_NR_PAGES - 1 ;
continue ;
}
}
2010-05-25 01:32:27 +04:00
if ( ! pfn_valid_within ( low_pfn ) )
continue ;
2011-01-14 02:45:54 +03:00
nr_scanned + + ;
2010-05-25 01:32:27 +04:00
2012-02-09 05:13:38 +04:00
/*
* Get the page and ensure the page is within the same zone .
* See the comment in isolate_freepages about overlapping
* nodes . It is deliberate that the new zone lock is not taken
* as memory compaction should not move pages between nodes .
*/
2010-05-25 01:32:27 +04:00
page = pfn_to_page ( low_pfn ) ;
2012-02-09 05:13:38 +04:00
if ( page_zone ( page ) ! = zone )
continue ;
/* Skip if free */
2010-05-25 01:32:27 +04:00
if ( PageBuddy ( page ) )
continue ;
2011-01-14 02:45:59 +03:00
/*
* For async migration , also only scan in MOVABLE blocks . Async
* migration is optimistic to see if the minimum amount of work
* satisfies the allocation
*/
pageblock_nr = low_pfn > > pageblock_order ;
2012-06-04 07:05:57 +04:00
if ( ! cc - > sync & & last_pageblock_nr ! = pageblock_nr & &
2011-12-29 16:09:50 +04:00
! migrate_async_suitable ( get_pageblock_migratetype ( page ) ) ) {
2011-01-14 02:45:59 +03:00
low_pfn + = pageblock_nr_pages ;
low_pfn = ALIGN ( low_pfn , pageblock_nr_pages ) - 1 ;
last_pageblock_nr = pageblock_nr ;
continue ;
}
2011-01-14 02:47:08 +03:00
if ( ! PageLRU ( page ) )
continue ;
/*
* PageLRU is set , and lru_lock excludes isolation ,
* splitting and collapsing ( collapsing has already
* happened if PageLRU is set ) .
*/
if ( PageTransHuge ( page ) ) {
low_pfn + = ( 1 < < compound_order ( page ) ) - 1 ;
continue ;
}
2012-06-04 07:05:57 +04:00
if ( ! cc - > sync )
2012-01-13 05:19:38 +04:00
mode | = ISOLATE_ASYNC_MIGRATE ;
2012-05-30 02:07:09 +04:00
lruvec = mem_cgroup_page_lruvec ( page , zone ) ;
2010-05-25 01:32:27 +04:00
/* Try isolate the page */
2012-05-30 02:06:54 +04:00
if ( __isolate_lru_page ( page , mode ) ! = 0 )
2010-05-25 01:32:27 +04:00
continue ;
2011-01-14 02:47:08 +03:00
VM_BUG_ON ( PageTransCompound ( page ) ) ;
2010-05-25 01:32:27 +04:00
/* Successfully isolated */
2012-05-30 02:07:09 +04:00
del_page_from_lru_list ( page , lruvec , page_lru ( page ) ) ;
2010-05-25 01:32:27 +04:00
list_add ( & page - > lru , migratelist ) ;
cc - > nr_migratepages + + ;
2011-01-14 02:45:54 +03:00
nr_isolated + + ;
2010-05-25 01:32:27 +04:00
/* Avoid isolating too much */
2012-01-11 03:07:59 +04:00
if ( cc - > nr_migratepages = = COMPACT_CLUSTER_MAX ) {
+ + low_pfn ;
2010-05-25 01:32:27 +04:00
break ;
2012-01-11 03:07:59 +04:00
}
2010-05-25 01:32:27 +04:00
}
acct_isolated ( zone , cc ) ;
spin_unlock_irq ( & zone - > lru_lock ) ;
2011-01-14 02:45:54 +03:00
trace_mm_compaction_isolate_migratepages ( nr_scanned , nr_isolated ) ;
2012-01-30 16:16:26 +04:00
return low_pfn ;
}
2011-12-29 16:09:50 +04:00
# endif /* CONFIG_COMPACTION || CONFIG_CMA */
# ifdef CONFIG_COMPACTION
2012-06-04 07:05:57 +04:00
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target ( struct page * page )
2011-12-29 16:09:50 +04:00
{
int migratetype = get_pageblock_migratetype ( page ) ;
/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
if ( migratetype = = MIGRATE_ISOLATE | | migratetype = = MIGRATE_RESERVE )
2012-06-04 07:05:57 +04:00
return false ;
2011-12-29 16:09:50 +04:00
/* If the page is a large free page, then allow migration */
if ( PageBuddy ( page ) & & page_order ( page ) > = pageblock_order )
2012-06-04 07:05:57 +04:00
return true ;
2011-12-29 16:09:50 +04:00
2011-12-29 16:09:50 +04:00
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
2012-06-04 07:05:57 +04:00
if ( migrate_async_suitable ( migratetype ) )
return true ;
2011-12-29 16:09:50 +04:00
/* Otherwise skip the block */
2012-06-04 07:05:57 +04:00
return false ;
2011-12-29 16:09:50 +04:00
}
2012-01-30 16:16:26 +04:00
/*
2011-12-29 16:09:50 +04:00
* Based on information in the current compact_control , find blocks
* suitable for isolating free pages from and then isolate them .
2012-01-30 16:16:26 +04:00
*/
2011-12-29 16:09:50 +04:00
static void isolate_freepages ( struct zone * zone ,
struct compact_control * cc )
2012-01-30 16:16:26 +04:00
{
2011-12-29 16:09:50 +04:00
struct page * page ;
unsigned long high_pfn , low_pfn , pfn , zone_end_pfn , end_pfn ;
unsigned long flags ;
int nr_freepages = cc - > nr_freepages ;
struct list_head * freelist = & cc - > freepages ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Initialise the free scanner . The starting point is where we last
* scanned from ( or the end of the zone if starting ) . The low point
* is the end of the pageblock the migration scanner is using .
*/
pfn = cc - > free_pfn ;
low_pfn = cc - > migrate_pfn + pageblock_nr_pages ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Take care that if the migration scanner is at the end of the zone
* that the free scanner does not accidentally move to the next zone
* in the next isolation cycle .
*/
high_pfn = min ( low_pfn , pfn ) ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
zone_end_pfn = zone - > zone_start_pfn + zone - > spanned_pages ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Isolate free pages until enough are available to migrate the
* pages on cc - > migratepages . We stop searching if the migrate
* and free page scanners meet or enough free pages are isolated .
*/
for ( ; pfn > low_pfn & & cc - > nr_migratepages > nr_freepages ;
pfn - = pageblock_nr_pages ) {
unsigned long isolated ;
2012-01-30 16:16:26 +04:00
2012-08-01 03:43:12 +04:00
/*
* Skip ahead if another thread is compacting in the area
* simultaneously . If we wrapped around , we can only skip
* ahead if zone - > compact_cached_free_pfn also wrapped to
* above our starting point .
*/
if ( cc - > order > 0 & & ( ! cc - > wrapped | |
zone - > compact_cached_free_pfn >
cc - > start_free_pfn ) )
pfn = min ( pfn , zone - > compact_cached_free_pfn ) ;
2011-12-29 16:09:50 +04:00
if ( ! pfn_valid ( pfn ) )
continue ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Check for overlapping nodes / zones . It ' s possible on some
* configurations to have a setup like
* node0 node1 node0
* i . e . it ' s possible that all pages within a zones range of
* pages do not belong to a single zone .
*/
page = pfn_to_page ( pfn ) ;
if ( page_zone ( page ) ! = zone )
continue ;
/* Check the block is suitable for migration */
2012-06-04 07:05:57 +04:00
if ( ! suitable_migration_target ( page ) )
2011-12-29 16:09:50 +04:00
continue ;
2012-06-04 07:05:57 +04:00
2011-12-29 16:09:50 +04:00
/*
* Found a block suitable for isolating free pages from . Now
* we disabled interrupts , double check things are ok and
* isolate the pages . This is to minimise the time IRQs
* are disabled
*/
isolated = 0 ;
spin_lock_irqsave ( & zone - > lock , flags ) ;
2012-06-04 07:05:57 +04:00
if ( suitable_migration_target ( page ) ) {
2011-12-29 16:09:50 +04:00
end_pfn = min ( pfn + pageblock_nr_pages , zone_end_pfn ) ;
isolated = isolate_freepages_block ( pfn , end_pfn ,
freelist , false ) ;
nr_freepages + = isolated ;
2012-06-04 07:05:57 +04:00
}
2011-12-29 16:09:50 +04:00
spin_unlock_irqrestore ( & zone - > lock , flags ) ;
/*
* Record the highest PFN we isolated pages from . When next
* looking for free pages , the search will restart here as
* page migration may have returned some pages to the allocator
*/
2012-08-01 03:43:12 +04:00
if ( isolated ) {
2011-12-29 16:09:50 +04:00
high_pfn = max ( high_pfn , pfn ) ;
2012-08-01 03:43:12 +04:00
if ( cc - > order > 0 )
zone - > compact_cached_free_pfn = high_pfn ;
}
2011-12-29 16:09:50 +04:00
}
/* split_free_page does not map the pages */
map_pages ( freelist ) ;
cc - > free_pfn = high_pfn ;
cc - > nr_freepages = nr_freepages ;
2010-05-25 01:32:27 +04:00
}
/*
* This is a migrate - callback that " allocates " freepages by taking pages
* from the isolated freelists in the block we are migrating to .
*/
static struct page * compaction_alloc ( struct page * migratepage ,
unsigned long data ,
int * * result )
{
struct compact_control * cc = ( struct compact_control * ) data ;
struct page * freepage ;
/* Isolate free pages if necessary */
if ( list_empty ( & cc - > freepages ) ) {
isolate_freepages ( cc - > zone , cc ) ;
if ( list_empty ( & cc - > freepages ) )
return NULL ;
}
freepage = list_entry ( cc - > freepages . next , struct page , lru ) ;
list_del ( & freepage - > lru ) ;
cc - > nr_freepages - - ;
return freepage ;
}
/*
* We cannot control nr_migratepages and nr_freepages fully when migration is
* running as migrate_pages ( ) has no knowledge of compact_control . When
* migration is complete , we count the number of pages on the lists by hand .
*/
static void update_nr_listpages ( struct compact_control * cc )
{
int nr_migratepages = 0 ;
int nr_freepages = 0 ;
struct page * page ;
list_for_each_entry ( page , & cc - > migratepages , lru )
nr_migratepages + + ;
list_for_each_entry ( page , & cc - > freepages , lru )
nr_freepages + + ;
cc - > nr_migratepages = nr_migratepages ;
cc - > nr_freepages = nr_freepages ;
}
2011-12-29 16:09:50 +04:00
/* possible outcome of isolate_migratepages */
typedef enum {
ISOLATE_ABORT , /* Abort compaction now */
ISOLATE_NONE , /* No pages isolated, continue scanning */
ISOLATE_SUCCESS , /* Pages isolated, migrate */
} isolate_migrate_t ;
/*
* Isolate all pages that can be migrated from the block pointed to by
* the migrate scanner within compact_control .
*/
static isolate_migrate_t isolate_migratepages ( struct zone * zone ,
struct compact_control * cc )
{
unsigned long low_pfn , end_pfn ;
/* Do not scan outside zone boundaries */
low_pfn = max ( cc - > migrate_pfn , zone - > zone_start_pfn ) ;
/* Only scan within a pageblock boundary */
end_pfn = ALIGN ( low_pfn + pageblock_nr_pages , pageblock_nr_pages ) ;
/* Do not cross the free scanner or scan within a memory hole */
if ( end_pfn > cc - > free_pfn | | ! pfn_valid ( low_pfn ) ) {
cc - > migrate_pfn = end_pfn ;
return ISOLATE_NONE ;
}
/* Perform the isolation */
low_pfn = isolate_migratepages_range ( zone , cc , low_pfn , end_pfn ) ;
if ( ! low_pfn )
return ISOLATE_ABORT ;
cc - > migrate_pfn = low_pfn ;
return ISOLATE_SUCCESS ;
}
2012-08-01 03:43:12 +04:00
/*
* Returns the start pfn of the last page block in a zone . This is the starting
* point for full compaction of a zone . Compaction searches for free pages from
* the end of each zone , while isolate_freepages_block scans forward inside each
* page block .
*/
static unsigned long start_free_pfn ( struct zone * zone )
{
unsigned long free_pfn ;
free_pfn = zone - > zone_start_pfn + zone - > spanned_pages ;
free_pfn & = ~ ( pageblock_nr_pages - 1 ) ;
return free_pfn ;
}
2010-05-25 01:32:27 +04:00
static int compact_finished ( struct zone * zone ,
2011-01-14 02:47:11 +03:00
struct compact_control * cc )
2010-05-25 01:32:27 +04:00
{
2010-05-25 01:32:30 +04:00
unsigned int order ;
2011-01-14 02:47:11 +03:00
unsigned long watermark ;
2010-05-25 01:32:30 +04:00
2010-05-25 01:32:27 +04:00
if ( fatal_signal_pending ( current ) )
return COMPACT_PARTIAL ;
2012-08-01 03:43:12 +04:00
/*
* A full ( order = = - 1 ) compaction run starts at the beginning and
* end of a zone ; it completes when the migrate and free scanner meet .
* A partial ( order > 0 ) compaction can start with the free scanner
* at a random point in the zone , and may have to restart .
*/
if ( cc - > free_pfn < = cc - > migrate_pfn ) {
if ( cc - > order > 0 & & ! cc - > wrapped ) {
/* We started partway through; restart at the end. */
unsigned long free_pfn = start_free_pfn ( zone ) ;
zone - > compact_cached_free_pfn = free_pfn ;
cc - > free_pfn = free_pfn ;
cc - > wrapped = 1 ;
return COMPACT_CONTINUE ;
}
return COMPACT_COMPLETE ;
}
/* We wrapped around and ended up where we started. */
if ( cc - > wrapped & & cc - > free_pfn < = cc - > start_free_pfn )
2010-05-25 01:32:27 +04:00
return COMPACT_COMPLETE ;
2011-01-21 01:44:21 +03:00
/*
* order = = - 1 is expected when compacting via
* / proc / sys / vm / compact_memory
*/
2010-05-25 01:32:30 +04:00
if ( cc - > order = = - 1 )
return COMPACT_CONTINUE ;
2011-06-16 02:08:25 +04:00
/* Compaction run is not finished if the watermark is not met */
watermark = low_wmark_pages ( zone ) ;
watermark + = ( 1 < < cc - > order ) ;
if ( ! zone_watermark_ok ( zone , cc - > order , watermark , 0 , 0 ) )
return COMPACT_CONTINUE ;
2010-05-25 01:32:30 +04:00
/* Direct compactor: Is a suitable page free? */
for ( order = cc - > order ; order < MAX_ORDER ; order + + ) {
/* Job done if page is free of the right migratetype */
if ( ! list_empty ( & zone - > free_area [ order ] . free_list [ cc - > migratetype ] ) )
return COMPACT_PARTIAL ;
/* Job done if allocation would set block type */
if ( order > = pageblock_order & & zone - > free_area [ order ] . nr_free )
return COMPACT_PARTIAL ;
}
2010-05-25 01:32:27 +04:00
return COMPACT_CONTINUE ;
}
2011-01-14 02:45:56 +03:00
/*
* compaction_suitable : Is this suitable to run compaction on this zone now ?
* Returns
* COMPACT_SKIPPED - If there are too few free pages for compaction
* COMPACT_PARTIAL - If the allocation would succeed without compaction
* COMPACT_CONTINUE - If compaction should run now
*/
unsigned long compaction_suitable ( struct zone * zone , int order )
{
int fragindex ;
unsigned long watermark ;
2011-06-16 02:08:25 +04:00
/*
* order = = - 1 is expected when compacting via
* / proc / sys / vm / compact_memory
*/
if ( order = = - 1 )
return COMPACT_CONTINUE ;
2011-01-14 02:45:56 +03:00
/*
* Watermarks for order - 0 must be met for compaction . Note the 2UL .
* This is because during migration , copies of pages need to be
* allocated and for a short time , the footprint is higher
*/
watermark = low_wmark_pages ( zone ) + ( 2UL < < order ) ;
if ( ! zone_watermark_ok ( zone , 0 , watermark , 0 , 0 ) )
return COMPACT_SKIPPED ;
/*
* fragmentation index determines if allocation failures are due to
* low memory or external fragmentation
*
2011-06-16 02:08:49 +04:00
* index of - 1000 implies allocations might succeed depending on
* watermarks
2011-01-14 02:45:56 +03:00
* index towards 0 implies failure is due to lack of memory
* index towards 1000 implies failure is due to fragmentation
*
* Only compact if a failure would be due to fragmentation .
*/
fragindex = fragmentation_index ( zone , order ) ;
if ( fragindex > = 0 & & fragindex < = sysctl_extfrag_threshold )
return COMPACT_SKIPPED ;
2011-06-16 02:08:49 +04:00
if ( fragindex = = - 1000 & & zone_watermark_ok ( zone , order , watermark ,
0 , 0 ) )
2011-01-14 02:45:56 +03:00
return COMPACT_PARTIAL ;
return COMPACT_CONTINUE ;
}
2010-05-25 01:32:27 +04:00
static int compact_zone ( struct zone * zone , struct compact_control * cc )
{
int ret ;
2011-01-14 02:45:56 +03:00
ret = compaction_suitable ( zone , cc - > order ) ;
switch ( ret ) {
case COMPACT_PARTIAL :
case COMPACT_SKIPPED :
/* Compaction is likely to fail */
return ret ;
case COMPACT_CONTINUE :
/* Fall through to compaction */
;
}
2010-05-25 01:32:27 +04:00
/* Setup to move all movable pages to the end of the zone */
cc - > migrate_pfn = zone - > zone_start_pfn ;
2012-08-01 03:43:12 +04:00
if ( cc - > order > 0 ) {
/* Incremental compaction. Start where the last one stopped. */
cc - > free_pfn = zone - > compact_cached_free_pfn ;
cc - > start_free_pfn = cc - > free_pfn ;
} else {
/* Order == -1 starts at the end of the zone. */
cc - > free_pfn = start_free_pfn ( zone ) ;
}
2010-05-25 01:32:27 +04:00
migrate_prep_local ( ) ;
while ( ( ret = compact_finished ( zone , cc ) ) = = COMPACT_CONTINUE ) {
unsigned long nr_migrate , nr_remaining ;
2011-03-23 02:30:39 +03:00
int err ;
2010-05-25 01:32:27 +04:00
2011-06-16 02:08:52 +04:00
switch ( isolate_migratepages ( zone , cc ) ) {
case ISOLATE_ABORT :
ret = COMPACT_PARTIAL ;
goto out ;
case ISOLATE_NONE :
2010-05-25 01:32:27 +04:00
continue ;
2011-06-16 02:08:52 +04:00
case ISOLATE_SUCCESS :
;
}
2010-05-25 01:32:27 +04:00
nr_migrate = cc - > nr_migratepages ;
2011-03-23 02:30:39 +03:00
err = migrate_pages ( & cc - > migratepages , compaction_alloc ,
2012-06-04 07:05:57 +04:00
( unsigned long ) cc , false ,
cc - > sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC ) ;
2010-05-25 01:32:27 +04:00
update_nr_listpages ( cc ) ;
nr_remaining = cc - > nr_migratepages ;
count_vm_event ( COMPACTBLOCKS ) ;
count_vm_events ( COMPACTPAGES , nr_migrate - nr_remaining ) ;
if ( nr_remaining )
count_vm_events ( COMPACTPAGEFAILED , nr_remaining ) ;
2011-01-14 02:45:54 +03:00
trace_mm_compaction_migratepages ( nr_migrate - nr_remaining ,
nr_remaining ) ;
2010-05-25 01:32:27 +04:00
/* Release LRU pages not migrated */
2011-03-23 02:30:39 +03:00
if ( err ) {
2010-05-25 01:32:27 +04:00
putback_lru_pages ( & cc - > migratepages ) ;
cc - > nr_migratepages = 0 ;
2012-07-12 01:02:13 +04:00
if ( err = = - ENOMEM ) {
ret = COMPACT_PARTIAL ;
goto out ;
}
2010-05-25 01:32:27 +04:00
}
}
2011-06-16 02:08:52 +04:00
out :
2010-05-25 01:32:27 +04:00
/* Release free pages and check accounting */
cc - > nr_freepages - = release_freepages ( & cc - > freepages ) ;
VM_BUG_ON ( cc - > nr_freepages ! = 0 ) ;
return ret ;
}
2010-05-25 01:32:28 +04:00
2011-11-01 04:09:08 +04:00
static unsigned long compact_zone_order ( struct zone * zone ,
2011-01-14 02:47:11 +03:00
int order , gfp_t gfp_mask ,
2012-06-04 07:05:57 +04:00
bool sync )
2010-05-25 01:32:30 +04:00
{
struct compact_control cc = {
. nr_freepages = 0 ,
. nr_migratepages = 0 ,
. order = order ,
. migratetype = allocflags_to_migratetype ( gfp_mask ) ,
. zone = zone ,
2012-06-04 07:05:57 +04:00
. sync = sync ,
2010-05-25 01:32:30 +04:00
} ;
INIT_LIST_HEAD ( & cc . freepages ) ;
INIT_LIST_HEAD ( & cc . migratepages ) ;
2012-06-04 07:05:57 +04:00
return compact_zone ( zone , & cc ) ;
2010-05-25 01:32:30 +04:00
}
2010-05-25 01:32:31 +04:00
int sysctl_extfrag_threshold = 500 ;
2010-05-25 01:32:30 +04:00
/**
* try_to_compact_pages - Direct compact to satisfy a high - order allocation
* @ zonelist : The zonelist used for the current allocation
* @ order : The order of the current allocation
* @ gfp_mask : The GFP mask of the current allocation
* @ nodemask : The allowed nodes to allocate from
2011-01-14 02:45:57 +03:00
* @ sync : Whether migration is synchronous or not
2010-05-25 01:32:30 +04:00
*
* This is the main entry point for direct page compaction .
*/
unsigned long try_to_compact_pages ( struct zonelist * zonelist ,
2011-01-14 02:45:57 +03:00
int order , gfp_t gfp_mask , nodemask_t * nodemask ,
bool sync )
2010-05-25 01:32:30 +04:00
{
enum zone_type high_zoneidx = gfp_zone ( gfp_mask ) ;
int may_enter_fs = gfp_mask & __GFP_FS ;
int may_perform_io = gfp_mask & __GFP_IO ;
struct zoneref * z ;
struct zone * zone ;
int rc = COMPACT_SKIPPED ;
/*
* Check whether it is worth even starting compaction . The order check is
* made because an assumption is made that the page allocator can satisfy
* the " cheaper " orders without taking special steps
*/
2011-01-14 02:47:11 +03:00
if ( ! order | | ! may_enter_fs | | ! may_perform_io )
2010-05-25 01:32:30 +04:00
return rc ;
count_vm_event ( COMPACTSTALL ) ;
/* Compact each zone in the list */
for_each_zone_zonelist_nodemask ( zone , z , zonelist , high_zoneidx ,
nodemask ) {
int status ;
2012-06-04 07:05:57 +04:00
status = compact_zone_order ( zone , order , gfp_mask , sync ) ;
2010-05-25 01:32:30 +04:00
rc = max ( status , rc ) ;
2011-01-14 02:45:56 +03:00
/* If a normal allocation would succeed, stop compacting */
if ( zone_watermark_ok ( zone , order , low_wmark_pages ( zone ) , 0 , 0 ) )
2010-05-25 01:32:30 +04:00
break ;
}
return rc ;
}
2010-05-25 01:32:28 +04:00
/* Compact all zones within a node */
2012-03-22 03:33:52 +04:00
static int __compact_pgdat ( pg_data_t * pgdat , struct compact_control * cc )
2010-05-25 01:32:28 +04:00
{
int zoneid ;
struct zone * zone ;
for ( zoneid = 0 ; zoneid < MAX_NR_ZONES ; zoneid + + ) {
zone = & pgdat - > node_zones [ zoneid ] ;
if ( ! populated_zone ( zone ) )
continue ;
2012-03-22 03:33:52 +04:00
cc - > nr_freepages = 0 ;
cc - > nr_migratepages = 0 ;
cc - > zone = zone ;
INIT_LIST_HEAD ( & cc - > freepages ) ;
INIT_LIST_HEAD ( & cc - > migratepages ) ;
2010-05-25 01:32:28 +04:00
2012-03-22 03:33:54 +04:00
if ( cc - > order = = - 1 | | ! compaction_deferred ( zone , cc - > order ) )
2012-03-22 03:33:52 +04:00
compact_zone ( zone , cc ) ;
2010-05-25 01:32:28 +04:00
2012-03-22 03:33:52 +04:00
if ( cc - > order > 0 ) {
int ok = zone_watermark_ok ( zone , cc - > order ,
low_wmark_pages ( zone ) , 0 , 0 ) ;
if ( ok & & cc - > order > zone - > compact_order_failed )
zone - > compact_order_failed = cc - > order + 1 ;
/* Currently async compaction is never deferred. */
2012-06-04 07:05:57 +04:00
else if ( ! ok & & cc - > sync )
2012-03-22 03:33:52 +04:00
defer_compaction ( zone , cc - > order ) ;
}
2012-03-22 03:33:52 +04:00
VM_BUG_ON ( ! list_empty ( & cc - > freepages ) ) ;
VM_BUG_ON ( ! list_empty ( & cc - > migratepages ) ) ;
2010-05-25 01:32:28 +04:00
}
return 0 ;
}
2012-03-22 03:33:52 +04:00
int compact_pgdat ( pg_data_t * pgdat , int order )
{
struct compact_control cc = {
. order = order ,
2012-06-04 07:05:57 +04:00
. sync = false ,
2012-03-22 03:33:52 +04:00
} ;
return __compact_pgdat ( pgdat , & cc ) ;
}
static int compact_node ( int nid )
{
struct compact_control cc = {
. order = - 1 ,
2012-06-04 07:05:57 +04:00
. sync = true ,
2012-03-22 03:33:52 +04:00
} ;
2012-03-22 03:33:53 +04:00
return __compact_pgdat ( NODE_DATA ( nid ) , & cc ) ;
2012-03-22 03:33:52 +04:00
}
2010-05-25 01:32:28 +04:00
/* Compact all nodes in the system */
static int compact_nodes ( void )
{
int nid ;
2012-03-22 03:33:53 +04:00
/* Flush pending updates to the LRU lists */
lru_add_drain_all ( ) ;
2010-05-25 01:32:28 +04:00
for_each_online_node ( nid )
compact_node ( nid ) ;
return COMPACT_COMPLETE ;
}
/* The written value is actually unused, all memory is compacted */
int sysctl_compact_memory ;
/* This is the entry point for compacting all nodes via /proc/sys/vm */
int sysctl_compaction_handler ( struct ctl_table * table , int write ,
void __user * buffer , size_t * length , loff_t * ppos )
{
if ( write )
return compact_nodes ( ) ;
return 0 ;
}
2010-05-25 01:32:29 +04:00
2010-05-25 01:32:31 +04:00
int sysctl_extfrag_handler ( struct ctl_table * table , int write ,
void __user * buffer , size_t * length , loff_t * ppos )
{
proc_dointvec_minmax ( table , write , buffer , length , ppos ) ;
return 0 ;
}
2010-05-25 01:32:29 +04:00
# if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
2011-12-22 02:48:43 +04:00
ssize_t sysfs_compact_node ( struct device * dev ,
struct device_attribute * attr ,
2010-05-25 01:32:29 +04:00
const char * buf , size_t count )
{
2012-03-22 03:33:53 +04:00
int nid = dev - > id ;
if ( nid > = 0 & & nid < nr_node_ids & & node_online ( nid ) ) {
/* Flush pending updates to the LRU lists */
lru_add_drain_all ( ) ;
compact_node ( nid ) ;
}
2010-05-25 01:32:29 +04:00
return count ;
}
2011-12-22 02:48:43 +04:00
static DEVICE_ATTR ( compact , S_IWUSR , NULL , sysfs_compact_node ) ;
2010-05-25 01:32:29 +04:00
int compaction_register_node ( struct node * node )
{
2011-12-22 02:48:43 +04:00
return device_create_file ( & node - > dev , & dev_attr_compact ) ;
2010-05-25 01:32:29 +04:00
}
void compaction_unregister_node ( struct node * node )
{
2011-12-22 02:48:43 +04:00
return device_remove_file ( & node - > dev , & dev_attr_compact ) ;
2010-05-25 01:32:29 +04:00
}
# endif /* CONFIG_SYSFS && CONFIG_NUMA */
2011-12-29 16:09:50 +04:00
# endif /* CONFIG_COMPACTION */