2010-05-25 01:32:27 +04:00
/*
* linux / mm / compaction . c
*
* Memory compaction for the reduction of external fragmentation . Note that
* this heavily depends upon page migration to do all the real heavy
* lifting
*
* Copyright IBM Corp . 2007 - 2010 Mel Gorman < mel @ csn . ul . ie >
*/
# include <linux/swap.h>
# include <linux/migrate.h>
# include <linux/compaction.h>
# include <linux/mm_inline.h>
# include <linux/backing-dev.h>
2010-05-25 01:32:28 +04:00
# include <linux/sysctl.h>
2010-05-25 01:32:29 +04:00
# include <linux/sysfs.h>
2012-12-12 04:02:42 +04:00
# include <linux/balloon_compaction.h>
2013-02-23 04:33:58 +04:00
# include <linux/page-isolation.h>
2010-05-25 01:32:27 +04:00
# include "internal.h"
2012-12-21 03:05:06 +04:00
# ifdef CONFIG_COMPACTION
static inline void count_compact_event ( enum vm_event_item item )
{
count_vm_event ( item ) ;
}
static inline void count_compact_events ( enum vm_event_item item , long delta )
{
count_vm_events ( item , delta ) ;
}
# else
# define count_compact_event(item) do { } while (0)
# define count_compact_events(item, delta) do { } while (0)
# endif
2011-12-29 16:09:50 +04:00
# if defined CONFIG_COMPACTION || defined CONFIG_CMA
2011-01-14 02:45:54 +03:00
# define CREATE_TRACE_POINTS
# include <trace/events/compaction.h>
2010-05-25 01:32:27 +04:00
static unsigned long release_freepages ( struct list_head * freelist )
{
struct page * page , * next ;
unsigned long count = 0 ;
list_for_each_entry_safe ( page , next , freelist , lru ) {
list_del ( & page - > lru ) ;
__free_page ( page ) ;
count + + ;
}
return count ;
}
2011-12-29 16:09:50 +04:00
static void map_pages ( struct list_head * list )
{
struct page * page ;
list_for_each_entry ( page , list , lru ) {
arch_alloc_page ( page , 0 ) ;
kernel_map_pages ( page , 1 , 1 ) ;
}
}
2011-12-29 16:09:50 +04:00
static inline bool migrate_async_suitable ( int migratetype )
{
return is_migrate_cma ( migratetype ) | | migratetype = = MIGRATE_MOVABLE ;
}
2012-10-09 03:32:41 +04:00
# ifdef CONFIG_COMPACTION
/* Returns true if the pageblock should be scanned for pages to isolate. */
static inline bool isolation_suitable ( struct compact_control * cc ,
struct page * page )
{
if ( cc - > ignore_skip_hint )
return true ;
return ! get_pageblock_skip ( page ) ;
}
/*
* This function is called to clear all cached information on pageblocks that
* should be skipped for page isolation when the migrate and free page scanner
* meet .
*/
2012-10-09 03:32:47 +04:00
static void __reset_isolation_suitable ( struct zone * zone )
2012-10-09 03:32:41 +04:00
{
unsigned long start_pfn = zone - > zone_start_pfn ;
2013-02-23 04:35:23 +04:00
unsigned long end_pfn = zone_end_pfn ( zone ) ;
2012-10-09 03:32:41 +04:00
unsigned long pfn ;
2012-10-09 03:32:45 +04:00
zone - > compact_cached_migrate_pfn = start_pfn ;
zone - > compact_cached_free_pfn = end_pfn ;
2012-10-09 03:32:47 +04:00
zone - > compact_blockskip_flush = false ;
2012-10-09 03:32:41 +04:00
/* Walk the zone and mark every pageblock as suitable for isolation */
for ( pfn = start_pfn ; pfn < end_pfn ; pfn + = pageblock_nr_pages ) {
struct page * page ;
cond_resched ( ) ;
if ( ! pfn_valid ( pfn ) )
continue ;
page = pfn_to_page ( pfn ) ;
if ( zone ! = page_zone ( page ) )
continue ;
clear_pageblock_skip ( page ) ;
}
}
2012-10-09 03:32:47 +04:00
void reset_isolation_suitable ( pg_data_t * pgdat )
{
int zoneid ;
for ( zoneid = 0 ; zoneid < MAX_NR_ZONES ; zoneid + + ) {
struct zone * zone = & pgdat - > node_zones [ zoneid ] ;
if ( ! populated_zone ( zone ) )
continue ;
/* Only flush if a full compaction finished recently */
if ( zone - > compact_blockskip_flush )
__reset_isolation_suitable ( zone ) ;
}
}
2012-10-09 03:32:41 +04:00
/*
* If no pages were isolated then mark this pageblock to be skipped in the
2012-10-09 03:32:47 +04:00
* future . The information is later cleared by __reset_isolation_suitable ( ) .
2012-10-09 03:32:41 +04:00
*/
2012-10-09 03:32:45 +04:00
static void update_pageblock_skip ( struct compact_control * cc ,
struct page * page , unsigned long nr_isolated ,
bool migrate_scanner )
2012-10-09 03:32:41 +04:00
{
2012-10-09 03:32:45 +04:00
struct zone * zone = cc - > zone ;
2013-12-19 05:08:52 +04:00
if ( cc - > ignore_skip_hint )
return ;
2012-10-09 03:32:41 +04:00
if ( ! page )
return ;
2012-10-09 03:32:45 +04:00
if ( ! nr_isolated ) {
unsigned long pfn = page_to_pfn ( page ) ;
2012-10-09 03:32:41 +04:00
set_pageblock_skip ( page ) ;
2012-10-09 03:32:45 +04:00
/* Update where compaction should restart */
if ( migrate_scanner ) {
if ( ! cc - > finished_update_migrate & &
pfn > zone - > compact_cached_migrate_pfn )
zone - > compact_cached_migrate_pfn = pfn ;
} else {
if ( ! cc - > finished_update_free & &
pfn < zone - > compact_cached_free_pfn )
zone - > compact_cached_free_pfn = pfn ;
}
}
2012-10-09 03:32:41 +04:00
}
# else
static inline bool isolation_suitable ( struct compact_control * cc ,
struct page * page )
{
return true ;
}
2012-10-09 03:32:45 +04:00
static void update_pageblock_skip ( struct compact_control * cc ,
struct page * page , unsigned long nr_isolated ,
bool migrate_scanner )
2012-10-09 03:32:41 +04:00
{
}
# endif /* CONFIG_COMPACTION */
2012-10-09 03:32:33 +04:00
static inline bool should_release_lock ( spinlock_t * lock )
{
return need_resched ( ) | | spin_is_contended ( lock ) ;
}
2012-08-22 03:16:17 +04:00
/*
* Compaction requires the taking of some coarse locks that are potentially
* very heavily contended . Check if the process needs to be scheduled or
* if the lock is contended . For async compaction , back out in the event
* if contention is severe . For sync compaction , schedule .
*
* Returns true if the lock is held .
* Returns false if the lock is released and compaction should abort
*/
static bool compact_checklock_irqsave ( spinlock_t * lock , unsigned long * flags ,
bool locked , struct compact_control * cc )
{
2012-10-09 03:32:33 +04:00
if ( should_release_lock ( lock ) ) {
2012-08-22 03:16:17 +04:00
if ( locked ) {
spin_unlock_irqrestore ( lock , * flags ) ;
locked = false ;
}
/* async aborts if taking too long or contended */
if ( ! cc - > sync ) {
2012-10-09 03:32:27 +04:00
cc - > contended = true ;
2012-08-22 03:16:17 +04:00
return false ;
}
cond_resched ( ) ;
}
if ( ! locked )
spin_lock_irqsave ( lock , * flags ) ;
return true ;
}
static inline bool compact_trylock_irqsave ( spinlock_t * lock ,
unsigned long * flags , struct compact_control * cc )
{
return compact_checklock_irqsave ( lock , flags , false , cc ) ;
}
2012-10-09 03:32:36 +04:00
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target ( struct page * page )
{
int migratetype = get_pageblock_migratetype ( page ) ;
/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
2013-02-23 04:33:58 +04:00
if ( migratetype = = MIGRATE_RESERVE )
return false ;
if ( is_migrate_isolate ( migratetype ) )
2012-10-09 03:32:36 +04:00
return false ;
/* If the page is a large free page, then allow migration */
if ( PageBuddy ( page ) & & page_order ( page ) > = pageblock_order )
return true ;
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
if ( migrate_async_suitable ( migratetype ) )
return true ;
/* Otherwise skip the block */
return false ;
}
2012-01-30 16:24:03 +04:00
/*
2013-11-13 03:07:12 +04:00
* Isolate free pages onto a private freelist . If @ strict is true , will abort
* returning 0 on any invalid PFNs or non - free pages inside of the pageblock
* ( even though it may still end up isolating some pages ) .
2012-01-30 16:24:03 +04:00
*/
2012-10-09 03:32:36 +04:00
static unsigned long isolate_freepages_block ( struct compact_control * cc ,
unsigned long blockpfn ,
2012-01-30 16:24:03 +04:00
unsigned long end_pfn ,
struct list_head * freelist ,
bool strict )
2010-05-25 01:32:27 +04:00
{
2011-01-14 02:45:54 +03:00
int nr_scanned = 0 , total_isolated = 0 ;
2012-10-09 03:32:41 +04:00
struct page * cursor , * valid_page = NULL ;
2012-10-09 03:32:36 +04:00
unsigned long nr_strict_required = end_pfn - blockpfn ;
unsigned long flags ;
bool locked = false ;
2010-05-25 01:32:27 +04:00
cursor = pfn_to_page ( blockpfn ) ;
2012-10-09 03:32:36 +04:00
/* Isolate free pages. */
2010-05-25 01:32:27 +04:00
for ( ; blockpfn < end_pfn ; blockpfn + + , cursor + + ) {
int isolated , i ;
struct page * page = cursor ;
2011-01-14 02:45:54 +03:00
nr_scanned + + ;
2012-10-09 03:32:36 +04:00
if ( ! pfn_valid_within ( blockpfn ) )
continue ;
2012-10-09 03:32:41 +04:00
if ( ! valid_page )
valid_page = page ;
2012-10-09 03:32:36 +04:00
if ( ! PageBuddy ( page ) )
continue ;
/*
* The zone lock must be held to isolate freepages .
* Unfortunately this is a very coarse lock and can be
* heavily contended if there are parallel allocations
* or parallel compactions . For async compaction do not
* spin on the lock and we acquire the lock as late as
* possible .
*/
locked = compact_checklock_irqsave ( & cc - > zone - > lock , & flags ,
locked , cc ) ;
if ( ! locked )
break ;
/* Recheck this is a suitable migration target under lock */
if ( ! strict & & ! suitable_migration_target ( page ) )
break ;
2010-05-25 01:32:27 +04:00
2012-10-09 03:32:36 +04:00
/* Recheck this is a buddy page under lock */
if ( ! PageBuddy ( page ) )
2010-05-25 01:32:27 +04:00
continue ;
/* Found a free page, break it into order-0 pages */
isolated = split_free_page ( page ) ;
2012-01-30 16:24:03 +04:00
if ( ! isolated & & strict )
2012-10-09 03:32:36 +04:00
break ;
2010-05-25 01:32:27 +04:00
total_isolated + = isolated ;
for ( i = 0 ; i < isolated ; i + + ) {
list_add ( & page - > lru , freelist ) ;
page + + ;
}
/* If a page was split, advance to the end of it */
if ( isolated ) {
blockpfn + = isolated - 1 ;
cursor + = isolated - 1 ;
}
}
2011-01-14 02:45:54 +03:00
trace_mm_compaction_isolate_freepages ( nr_scanned , total_isolated ) ;
2012-10-09 03:32:36 +04:00
/*
* If strict isolation is requested by CMA then check that all the
* pages requested were isolated . If there were any failures , 0 is
* returned and CMA will fail .
*/
2012-10-20 00:56:57 +04:00
if ( strict & & nr_strict_required > total_isolated )
2012-10-09 03:32:36 +04:00
total_isolated = 0 ;
if ( locked )
spin_unlock_irqrestore ( & cc - > zone - > lock , flags ) ;
2012-10-09 03:32:41 +04:00
/* Update the pageblock-skip if the whole pageblock was scanned */
if ( blockpfn = = end_pfn )
2012-10-09 03:32:45 +04:00
update_pageblock_skip ( cc , valid_page , total_isolated , false ) ;
2012-10-09 03:32:41 +04:00
2012-12-21 03:05:06 +04:00
count_compact_events ( COMPACTFREE_SCANNED , nr_scanned ) ;
2012-10-19 15:00:10 +04:00
if ( total_isolated )
2012-12-21 03:05:06 +04:00
count_compact_events ( COMPACTISOLATED , total_isolated ) ;
2010-05-25 01:32:27 +04:00
return total_isolated ;
}
2012-01-30 16:24:03 +04:00
/**
* isolate_freepages_range ( ) - isolate free pages .
* @ start_pfn : The first PFN to start isolating .
* @ end_pfn : The one - past - last PFN .
*
* Non - free pages , invalid PFNs , or zone boundaries within the
* [ start_pfn , end_pfn ) range are considered errors , cause function to
* undo its actions and return zero .
*
* Otherwise , function returns one - past - the - last PFN of isolated page
* ( which may be greater then end_pfn if end fell in a middle of
* a free page ) .
*/
2011-12-29 16:09:50 +04:00
unsigned long
2012-10-09 03:32:41 +04:00
isolate_freepages_range ( struct compact_control * cc ,
unsigned long start_pfn , unsigned long end_pfn )
2012-01-30 16:24:03 +04:00
{
2012-10-09 03:32:36 +04:00
unsigned long isolated , pfn , block_end_pfn ;
2012-01-30 16:24:03 +04:00
LIST_HEAD ( freelist ) ;
for ( pfn = start_pfn ; pfn < end_pfn ; pfn + = isolated ) {
2012-10-09 03:32:41 +04:00
if ( ! pfn_valid ( pfn ) | | cc - > zone ! = page_zone ( pfn_to_page ( pfn ) ) )
2012-01-30 16:24:03 +04:00
break ;
/*
* On subsequent iterations ALIGN ( ) is actually not needed ,
* but we keep it that we not to complicate the code .
*/
block_end_pfn = ALIGN ( pfn + 1 , pageblock_nr_pages ) ;
block_end_pfn = min ( block_end_pfn , end_pfn ) ;
2012-10-09 03:32:41 +04:00
isolated = isolate_freepages_block ( cc , pfn , block_end_pfn ,
2012-01-30 16:24:03 +04:00
& freelist , true ) ;
/*
* In strict mode , isolate_freepages_block ( ) returns 0 if
* there are any holes in the block ( ie . invalid PFNs or
* non - free pages ) .
*/
if ( ! isolated )
break ;
/*
* If we managed to isolate pages , it is always ( 1 < < n ) *
* pageblock_nr_pages for some non - negative n . ( Max order
* page may span two pageblocks ) .
*/
}
/* split_free_page does not map the pages */
map_pages ( & freelist ) ;
if ( pfn < end_pfn ) {
/* Loop terminated early, cleanup. */
release_freepages ( & freelist ) ;
return 0 ;
}
/* We don't use freelists for anything. */
return pfn ;
}
2010-05-25 01:32:27 +04:00
/* Update the number of anon and file isolated pages in the zone */
2012-08-22 03:16:17 +04:00
static void acct_isolated ( struct zone * zone , bool locked , struct compact_control * cc )
2010-05-25 01:32:27 +04:00
{
struct page * page ;
2011-11-01 04:06:44 +04:00
unsigned int count [ 2 ] = { 0 , } ;
2010-05-25 01:32:27 +04:00
2011-11-01 04:06:44 +04:00
list_for_each_entry ( page , & cc - > migratepages , lru )
count [ ! ! page_is_file_cache ( page ) ] + + ;
2010-05-25 01:32:27 +04:00
2012-08-22 03:16:17 +04:00
/* If locked we can use the interrupt unsafe versions */
if ( locked ) {
__mod_zone_page_state ( zone , NR_ISOLATED_ANON , count [ 0 ] ) ;
__mod_zone_page_state ( zone , NR_ISOLATED_FILE , count [ 1 ] ) ;
} else {
mod_zone_page_state ( zone , NR_ISOLATED_ANON , count [ 0 ] ) ;
mod_zone_page_state ( zone , NR_ISOLATED_FILE , count [ 1 ] ) ;
}
2010-05-25 01:32:27 +04:00
}
/* Similar to reclaim, but different enough that they don't share logic */
static bool too_many_isolated ( struct zone * zone )
{
2010-09-10 03:38:00 +04:00
unsigned long active , inactive , isolated ;
2010-05-25 01:32:27 +04:00
inactive = zone_page_state ( zone , NR_INACTIVE_FILE ) +
zone_page_state ( zone , NR_INACTIVE_ANON ) ;
2010-09-10 03:38:00 +04:00
active = zone_page_state ( zone , NR_ACTIVE_FILE ) +
zone_page_state ( zone , NR_ACTIVE_ANON ) ;
2010-05-25 01:32:27 +04:00
isolated = zone_page_state ( zone , NR_ISOLATED_FILE ) +
zone_page_state ( zone , NR_ISOLATED_ANON ) ;
2010-09-10 03:38:00 +04:00
return isolated > ( inactive + active ) / 2 ;
2010-05-25 01:32:27 +04:00
}
2012-01-30 16:16:26 +04:00
/**
* isolate_migratepages_range ( ) - isolate all migrate - able pages in range .
* @ zone : Zone pages are in .
* @ cc : Compaction control structure .
* @ low_pfn : The first PFN of the range .
* @ end_pfn : The one - past - the - last PFN of the range .
2012-10-09 03:33:48 +04:00
* @ unevictable : true if it allows to isolate unevictable pages
2012-01-30 16:16:26 +04:00
*
* Isolate all pages that can be migrated from the range specified by
* [ low_pfn , end_pfn ) . Returns zero if there is a fatal signal
* pending ) , otherwise PFN of the first page that was not scanned
* ( which may be both less , equal to or more then end_pfn ) .
*
* Assumes that cc - > migratepages is empty and cc - > nr_migratepages is
* zero .
*
* Apart from cc - > migratepages and cc - > nr_migratetypes this function
* does not modify any cc ' s fields , in particular it does not modify
* ( or read for that matter ) cc - > migrate_pfn .
2010-05-25 01:32:27 +04:00
*/
2011-12-29 16:09:50 +04:00
unsigned long
2012-01-30 16:16:26 +04:00
isolate_migratepages_range ( struct zone * zone , struct compact_control * cc ,
2012-10-09 03:33:48 +04:00
unsigned long low_pfn , unsigned long end_pfn , bool unevictable )
2010-05-25 01:32:27 +04:00
{
2011-01-14 02:45:59 +03:00
unsigned long last_pageblock_nr = 0 , pageblock_nr ;
2011-01-14 02:45:54 +03:00
unsigned long nr_scanned = 0 , nr_isolated = 0 ;
2010-05-25 01:32:27 +04:00
struct list_head * migratelist = & cc - > migratepages ;
2012-05-30 02:06:54 +04:00
isolate_mode_t mode = 0 ;
2012-05-30 02:07:09 +04:00
struct lruvec * lruvec ;
2012-08-22 03:16:17 +04:00
unsigned long flags ;
2012-10-09 03:32:33 +04:00
bool locked = false ;
2012-10-09 03:32:41 +04:00
struct page * page = NULL , * valid_page = NULL ;
2010-05-25 01:32:27 +04:00
/*
* Ensure that there are not too many pages isolated from the LRU
* list by either parallel reclaimers or compaction . If there are ,
* delay for some time until fewer pages are isolated
*/
while ( unlikely ( too_many_isolated ( zone ) ) ) {
2011-06-16 02:08:52 +04:00
/* async migration should just abort */
2012-06-04 07:05:57 +04:00
if ( ! cc - > sync )
2012-01-30 16:16:26 +04:00
return 0 ;
2011-06-16 02:08:52 +04:00
2010-05-25 01:32:27 +04:00
congestion_wait ( BLK_RW_ASYNC , HZ / 10 ) ;
if ( fatal_signal_pending ( current ) )
2012-01-30 16:16:26 +04:00
return 0 ;
2010-05-25 01:32:27 +04:00
}
/* Time to isolate some pages for migration */
2011-03-23 02:33:10 +03:00
cond_resched ( ) ;
2010-05-25 01:32:27 +04:00
for ( ; low_pfn < end_pfn ; low_pfn + + ) {
2011-03-23 02:33:10 +03:00
/* give a chance to irqs before checking need_resched() */
2012-10-09 03:32:33 +04:00
if ( locked & & ! ( ( low_pfn + 1 ) % SWAP_CLUSTER_MAX ) ) {
if ( should_release_lock ( & zone - > lru_lock ) ) {
spin_unlock_irqrestore ( & zone - > lru_lock , flags ) ;
locked = false ;
}
2011-03-23 02:33:10 +03:00
}
2012-08-22 03:16:17 +04:00
2012-02-04 03:37:18 +04:00
/*
* migrate_pfn does not necessarily start aligned to a
* pageblock . Ensure that pfn_valid is called when moving
* into a new MAX_ORDER_NR_PAGES range in case of large
* memory holes within the zone
*/
if ( ( low_pfn & ( MAX_ORDER_NR_PAGES - 1 ) ) = = 0 ) {
if ( ! pfn_valid ( low_pfn ) ) {
low_pfn + = MAX_ORDER_NR_PAGES - 1 ;
continue ;
}
}
2010-05-25 01:32:27 +04:00
if ( ! pfn_valid_within ( low_pfn ) )
continue ;
2011-01-14 02:45:54 +03:00
nr_scanned + + ;
2010-05-25 01:32:27 +04:00
2012-02-09 05:13:38 +04:00
/*
* Get the page and ensure the page is within the same zone .
* See the comment in isolate_freepages about overlapping
* nodes . It is deliberate that the new zone lock is not taken
* as memory compaction should not move pages between nodes .
*/
2010-05-25 01:32:27 +04:00
page = pfn_to_page ( low_pfn ) ;
2012-02-09 05:13:38 +04:00
if ( page_zone ( page ) ! = zone )
continue ;
2012-10-09 03:32:41 +04:00
if ( ! valid_page )
valid_page = page ;
/* If isolation recently failed, do not retry */
pageblock_nr = low_pfn > > pageblock_order ;
if ( ! isolation_suitable ( cc , page ) )
goto next_pageblock ;
2012-02-09 05:13:38 +04:00
/* Skip if free */
2010-05-25 01:32:27 +04:00
if ( PageBuddy ( page ) )
continue ;
2011-01-14 02:45:59 +03:00
/*
* For async migration , also only scan in MOVABLE blocks . Async
* migration is optimistic to see if the minimum amount of work
* satisfies the allocation
*/
2012-06-04 07:05:57 +04:00
if ( ! cc - > sync & & last_pageblock_nr ! = pageblock_nr & &
2011-12-29 16:09:50 +04:00
! migrate_async_suitable ( get_pageblock_migratetype ( page ) ) ) {
2012-10-09 03:32:45 +04:00
cc - > finished_update_migrate = true ;
2012-10-09 03:32:33 +04:00
goto next_pageblock ;
2011-01-14 02:45:59 +03:00
}
2012-12-12 04:02:42 +04:00
/*
* Check may be lockless but that ' s ok as we recheck later .
* It ' s possible to migrate LRU pages and balloon pages
* Skip any other type of page
*/
if ( ! PageLRU ( page ) ) {
if ( unlikely ( balloon_page_movable ( page ) ) ) {
if ( locked & & balloon_page_isolate ( page ) ) {
/* Successfully isolated */
cc - > finished_update_migrate = true ;
list_add ( & page - > lru , migratelist ) ;
cc - > nr_migratepages + + ;
nr_isolated + + ;
goto check_compact_cluster ;
}
}
2011-01-14 02:47:08 +03:00
continue ;
2012-12-12 04:02:42 +04:00
}
2011-01-14 02:47:08 +03:00
/*
2012-10-09 03:32:33 +04:00
* PageLRU is set . lru_lock normally excludes isolation
* splitting and collapsing ( collapsing has already happened
* if PageLRU is set ) but the lock is not necessarily taken
* here and it is wasteful to take it just to check transhuge .
* Check TransHuge without lock and skip the whole pageblock if
* it ' s either a transhuge or hugetlbfs page , as calling
* compound_order ( ) without preventing THP from splitting the
* page underneath us may return surprising results .
2011-01-14 02:47:08 +03:00
*/
2012-10-09 03:32:33 +04:00
if ( PageTransHuge ( page ) ) {
if ( ! locked )
goto next_pageblock ;
low_pfn + = ( 1 < < compound_order ( page ) ) - 1 ;
continue ;
}
/* Check if it is ok to still hold the lock */
locked = compact_checklock_irqsave ( & zone - > lru_lock , & flags ,
locked , cc ) ;
if ( ! locked | | fatal_signal_pending ( current ) )
break ;
/* Recheck PageLRU and PageTransHuge under lock */
if ( ! PageLRU ( page ) )
continue ;
2011-01-14 02:47:08 +03:00
if ( PageTransHuge ( page ) ) {
low_pfn + = ( 1 < < compound_order ( page ) ) - 1 ;
continue ;
}
2012-06-04 07:05:57 +04:00
if ( ! cc - > sync )
2012-01-13 05:19:38 +04:00
mode | = ISOLATE_ASYNC_MIGRATE ;
2012-10-09 03:33:48 +04:00
if ( unevictable )
mode | = ISOLATE_UNEVICTABLE ;
2012-05-30 02:07:09 +04:00
lruvec = mem_cgroup_page_lruvec ( page , zone ) ;
2010-05-25 01:32:27 +04:00
/* Try isolate the page */
2012-05-30 02:06:54 +04:00
if ( __isolate_lru_page ( page , mode ) ! = 0 )
2010-05-25 01:32:27 +04:00
continue ;
2011-01-14 02:47:08 +03:00
VM_BUG_ON ( PageTransCompound ( page ) ) ;
2010-05-25 01:32:27 +04:00
/* Successfully isolated */
2012-10-09 03:32:45 +04:00
cc - > finished_update_migrate = true ;
2012-05-30 02:07:09 +04:00
del_page_from_lru_list ( page , lruvec , page_lru ( page ) ) ;
2010-05-25 01:32:27 +04:00
list_add ( & page - > lru , migratelist ) ;
cc - > nr_migratepages + + ;
2011-01-14 02:45:54 +03:00
nr_isolated + + ;
2010-05-25 01:32:27 +04:00
2012-12-12 04:02:42 +04:00
check_compact_cluster :
2010-05-25 01:32:27 +04:00
/* Avoid isolating too much */
2012-01-11 03:07:59 +04:00
if ( cc - > nr_migratepages = = COMPACT_CLUSTER_MAX ) {
+ + low_pfn ;
2010-05-25 01:32:27 +04:00
break ;
2012-01-11 03:07:59 +04:00
}
2012-10-09 03:32:33 +04:00
continue ;
next_pageblock :
2013-02-23 04:32:25 +04:00
low_pfn = ALIGN ( low_pfn + 1 , pageblock_nr_pages ) - 1 ;
2012-10-09 03:32:33 +04:00
last_pageblock_nr = pageblock_nr ;
2010-05-25 01:32:27 +04:00
}
2012-08-22 03:16:17 +04:00
acct_isolated ( zone , locked , cc ) ;
2010-05-25 01:32:27 +04:00
2012-08-22 03:16:17 +04:00
if ( locked )
spin_unlock_irqrestore ( & zone - > lru_lock , flags ) ;
2010-05-25 01:32:27 +04:00
2012-10-09 03:32:41 +04:00
/* Update the pageblock-skip if the whole pageblock was scanned */
if ( low_pfn = = end_pfn )
2012-10-09 03:32:45 +04:00
update_pageblock_skip ( cc , valid_page , nr_isolated , true ) ;
2012-10-09 03:32:41 +04:00
2011-01-14 02:45:54 +03:00
trace_mm_compaction_isolate_migratepages ( nr_scanned , nr_isolated ) ;
2012-12-21 03:05:06 +04:00
count_compact_events ( COMPACTMIGRATE_SCANNED , nr_scanned ) ;
2012-10-19 15:00:10 +04:00
if ( nr_isolated )
2012-12-21 03:05:06 +04:00
count_compact_events ( COMPACTISOLATED , nr_isolated ) ;
2012-10-19 15:00:10 +04:00
2012-01-30 16:16:26 +04:00
return low_pfn ;
}
2011-12-29 16:09:50 +04:00
# endif /* CONFIG_COMPACTION || CONFIG_CMA */
# ifdef CONFIG_COMPACTION
2012-01-30 16:16:26 +04:00
/*
2011-12-29 16:09:50 +04:00
* Based on information in the current compact_control , find blocks
* suitable for isolating free pages from and then isolate them .
2012-01-30 16:16:26 +04:00
*/
2011-12-29 16:09:50 +04:00
static void isolate_freepages ( struct zone * zone ,
struct compact_control * cc )
2012-01-30 16:16:26 +04:00
{
2011-12-29 16:09:50 +04:00
struct page * page ;
2013-02-23 04:35:23 +04:00
unsigned long high_pfn , low_pfn , pfn , z_end_pfn , end_pfn ;
2011-12-29 16:09:50 +04:00
int nr_freepages = cc - > nr_freepages ;
struct list_head * freelist = & cc - > freepages ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Initialise the free scanner . The starting point is where we last
* scanned from ( or the end of the zone if starting ) . The low point
* is the end of the pageblock the migration scanner is using .
*/
pfn = cc - > free_pfn ;
low_pfn = cc - > migrate_pfn + pageblock_nr_pages ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Take care that if the migration scanner is at the end of the zone
* that the free scanner does not accidentally move to the next zone
* in the next isolation cycle .
*/
high_pfn = min ( low_pfn , pfn ) ;
2012-01-30 16:16:26 +04:00
2013-02-23 04:35:23 +04:00
z_end_pfn = zone_end_pfn ( zone ) ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Isolate free pages until enough are available to migrate the
* pages on cc - > migratepages . We stop searching if the migrate
* and free page scanners meet or enough free pages are isolated .
*/
for ( ; pfn > low_pfn & & cc - > nr_migratepages > nr_freepages ;
pfn - = pageblock_nr_pages ) {
unsigned long isolated ;
2012-01-30 16:16:26 +04:00
2013-10-01 00:45:03 +04:00
/*
* This can iterate a massively long zone without finding any
* suitable migration targets , so periodically check if we need
* to schedule .
*/
cond_resched ( ) ;
2011-12-29 16:09:50 +04:00
if ( ! pfn_valid ( pfn ) )
continue ;
2012-01-30 16:16:26 +04:00
2011-12-29 16:09:50 +04:00
/*
* Check for overlapping nodes / zones . It ' s possible on some
* configurations to have a setup like
* node0 node1 node0
* i . e . it ' s possible that all pages within a zones range of
* pages do not belong to a single zone .
*/
page = pfn_to_page ( pfn ) ;
if ( page_zone ( page ) ! = zone )
continue ;
/* Check the block is suitable for migration */
2012-06-04 07:05:57 +04:00
if ( ! suitable_migration_target ( page ) )
2011-12-29 16:09:50 +04:00
continue ;
2012-06-04 07:05:57 +04:00
2012-10-09 03:32:41 +04:00
/* If isolation recently failed, do not retry */
if ( ! isolation_suitable ( cc , page ) )
continue ;
2012-10-09 03:32:36 +04:00
/* Found a block suitable for isolating free pages from */
2011-12-29 16:09:50 +04:00
isolated = 0 ;
2012-12-06 23:01:14 +04:00
/*
* As pfn may not start aligned , pfn + pageblock_nr_page
* may cross a MAX_ORDER_NR_PAGES boundary and miss
* a pfn_valid check . Ensure isolate_freepages_block ( )
* only scans within a pageblock
*/
end_pfn = ALIGN ( pfn + 1 , pageblock_nr_pages ) ;
2013-02-23 04:35:23 +04:00
end_pfn = min ( end_pfn , z_end_pfn ) ;
2012-10-09 03:32:36 +04:00
isolated = isolate_freepages_block ( cc , pfn , end_pfn ,
freelist , false ) ;
nr_freepages + = isolated ;
2011-12-29 16:09:50 +04:00
/*
* Record the highest PFN we isolated pages from . When next
* looking for free pages , the search will restart here as
* page migration may have returned some pages to the allocator
*/
2012-10-09 03:32:45 +04:00
if ( isolated ) {
cc - > finished_update_free = true ;
2011-12-29 16:09:50 +04:00
high_pfn = max ( high_pfn , pfn ) ;
2012-10-09 03:32:45 +04:00
}
2011-12-29 16:09:50 +04:00
}
/* split_free_page does not map the pages */
map_pages ( freelist ) ;
cc - > free_pfn = high_pfn ;
cc - > nr_freepages = nr_freepages ;
2010-05-25 01:32:27 +04:00
}
/*
* This is a migrate - callback that " allocates " freepages by taking pages
* from the isolated freelists in the block we are migrating to .
*/
static struct page * compaction_alloc ( struct page * migratepage ,
unsigned long data ,
int * * result )
{
struct compact_control * cc = ( struct compact_control * ) data ;
struct page * freepage ;
/* Isolate free pages if necessary */
if ( list_empty ( & cc - > freepages ) ) {
isolate_freepages ( cc - > zone , cc ) ;
if ( list_empty ( & cc - > freepages ) )
return NULL ;
}
freepage = list_entry ( cc - > freepages . next , struct page , lru ) ;
list_del ( & freepage - > lru ) ;
cc - > nr_freepages - - ;
return freepage ;
}
/*
* We cannot control nr_migratepages and nr_freepages fully when migration is
* running as migrate_pages ( ) has no knowledge of compact_control . When
* migration is complete , we count the number of pages on the lists by hand .
*/
static void update_nr_listpages ( struct compact_control * cc )
{
int nr_migratepages = 0 ;
int nr_freepages = 0 ;
struct page * page ;
list_for_each_entry ( page , & cc - > migratepages , lru )
nr_migratepages + + ;
list_for_each_entry ( page , & cc - > freepages , lru )
nr_freepages + + ;
cc - > nr_migratepages = nr_migratepages ;
cc - > nr_freepages = nr_freepages ;
}
2011-12-29 16:09:50 +04:00
/* possible outcome of isolate_migratepages */
typedef enum {
ISOLATE_ABORT , /* Abort compaction now */
ISOLATE_NONE , /* No pages isolated, continue scanning */
ISOLATE_SUCCESS , /* Pages isolated, migrate */
} isolate_migrate_t ;
/*
* Isolate all pages that can be migrated from the block pointed to by
* the migrate scanner within compact_control .
*/
static isolate_migrate_t isolate_migratepages ( struct zone * zone ,
struct compact_control * cc )
{
unsigned long low_pfn , end_pfn ;
/* Do not scan outside zone boundaries */
low_pfn = max ( cc - > migrate_pfn , zone - > zone_start_pfn ) ;
/* Only scan within a pageblock boundary */
2013-02-23 04:32:25 +04:00
end_pfn = ALIGN ( low_pfn + 1 , pageblock_nr_pages ) ;
2011-12-29 16:09:50 +04:00
/* Do not cross the free scanner or scan within a memory hole */
if ( end_pfn > cc - > free_pfn | | ! pfn_valid ( low_pfn ) ) {
cc - > migrate_pfn = end_pfn ;
return ISOLATE_NONE ;
}
/* Perform the isolation */
2012-10-09 03:33:48 +04:00
low_pfn = isolate_migratepages_range ( zone , cc , low_pfn , end_pfn , false ) ;
2012-10-09 03:32:27 +04:00
if ( ! low_pfn | | cc - > contended )
2011-12-29 16:09:50 +04:00
return ISOLATE_ABORT ;
cc - > migrate_pfn = low_pfn ;
return ISOLATE_SUCCESS ;
}
2010-05-25 01:32:27 +04:00
static int compact_finished ( struct zone * zone ,
2011-01-14 02:47:11 +03:00
struct compact_control * cc )
2010-05-25 01:32:27 +04:00
{
2013-01-12 02:32:16 +04:00
unsigned int order ;
2011-01-14 02:47:11 +03:00
unsigned long watermark ;
2010-05-25 01:32:30 +04:00
2010-05-25 01:32:27 +04:00
if ( fatal_signal_pending ( current ) )
return COMPACT_PARTIAL ;
2012-10-09 03:32:40 +04:00
/* Compaction run completes if the migrate and free scanner meet */
2012-10-09 03:32:41 +04:00
if ( cc - > free_pfn < = cc - > migrate_pfn ) {
2012-10-09 03:32:47 +04:00
/*
* Mark that the PG_migrate_skip information should be cleared
* by kswapd when it goes to sleep . kswapd does not set the
* flag itself as the decision to be clear should be directly
* based on an allocation request .
*/
if ( ! current_is_kswapd ( ) )
zone - > compact_blockskip_flush = true ;
2010-05-25 01:32:27 +04:00
return COMPACT_COMPLETE ;
2012-10-09 03:32:41 +04:00
}
2010-05-25 01:32:27 +04:00
2011-01-21 01:44:21 +03:00
/*
* order = = - 1 is expected when compacting via
* / proc / sys / vm / compact_memory
*/
2010-05-25 01:32:30 +04:00
if ( cc - > order = = - 1 )
return COMPACT_CONTINUE ;
2011-06-16 02:08:25 +04:00
/* Compaction run is not finished if the watermark is not met */
watermark = low_wmark_pages ( zone ) ;
watermark + = ( 1 < < cc - > order ) ;
if ( ! zone_watermark_ok ( zone , cc - > order , watermark , 0 , 0 ) )
return COMPACT_CONTINUE ;
2010-05-25 01:32:30 +04:00
/* Direct compactor: Is a suitable page free? */
2013-01-12 02:32:16 +04:00
for ( order = cc - > order ; order < MAX_ORDER ; order + + ) {
struct free_area * area = & zone - > free_area [ order ] ;
/* Job done if page is free of the right migratetype */
if ( ! list_empty ( & area - > free_list [ cc - > migratetype ] ) )
return COMPACT_PARTIAL ;
/* Job done if allocation would set block type */
if ( cc - > order > = pageblock_order & & area - > nr_free )
2010-05-25 01:32:30 +04:00
return COMPACT_PARTIAL ;
}
2010-05-25 01:32:27 +04:00
return COMPACT_CONTINUE ;
}
2011-01-14 02:45:56 +03:00
/*
* compaction_suitable : Is this suitable to run compaction on this zone now ?
* Returns
* COMPACT_SKIPPED - If there are too few free pages for compaction
* COMPACT_PARTIAL - If the allocation would succeed without compaction
* COMPACT_CONTINUE - If compaction should run now
*/
unsigned long compaction_suitable ( struct zone * zone , int order )
{
int fragindex ;
unsigned long watermark ;
2011-06-16 02:08:25 +04:00
/*
* order = = - 1 is expected when compacting via
* / proc / sys / vm / compact_memory
*/
if ( order = = - 1 )
return COMPACT_CONTINUE ;
2011-01-14 02:45:56 +03:00
/*
* Watermarks for order - 0 must be met for compaction . Note the 2UL .
* This is because during migration , copies of pages need to be
* allocated and for a short time , the footprint is higher
*/
watermark = low_wmark_pages ( zone ) + ( 2UL < < order ) ;
if ( ! zone_watermark_ok ( zone , 0 , watermark , 0 , 0 ) )
return COMPACT_SKIPPED ;
/*
* fragmentation index determines if allocation failures are due to
* low memory or external fragmentation
*
2011-06-16 02:08:49 +04:00
* index of - 1000 implies allocations might succeed depending on
* watermarks
2011-01-14 02:45:56 +03:00
* index towards 0 implies failure is due to lack of memory
* index towards 1000 implies failure is due to fragmentation
*
* Only compact if a failure would be due to fragmentation .
*/
fragindex = fragmentation_index ( zone , order ) ;
if ( fragindex > = 0 & & fragindex < = sysctl_extfrag_threshold )
return COMPACT_SKIPPED ;
2011-06-16 02:08:49 +04:00
if ( fragindex = = - 1000 & & zone_watermark_ok ( zone , order , watermark ,
0 , 0 ) )
2011-01-14 02:45:56 +03:00
return COMPACT_PARTIAL ;
return COMPACT_CONTINUE ;
}
2010-05-25 01:32:27 +04:00
static int compact_zone ( struct zone * zone , struct compact_control * cc )
{
int ret ;
2012-10-09 03:32:45 +04:00
unsigned long start_pfn = zone - > zone_start_pfn ;
2013-02-23 04:35:23 +04:00
unsigned long end_pfn = zone_end_pfn ( zone ) ;
2010-05-25 01:32:27 +04:00
2011-01-14 02:45:56 +03:00
ret = compaction_suitable ( zone , cc - > order ) ;
switch ( ret ) {
case COMPACT_PARTIAL :
case COMPACT_SKIPPED :
/* Compaction is likely to fail */
return ret ;
case COMPACT_CONTINUE :
/* Fall through to compaction */
;
}
2012-10-09 03:32:45 +04:00
/*
* Setup to move all movable pages to the end of the zone . Used cached
* information on where the scanners should start but check that it
* is initialised by ensuring the values are within zone boundaries .
*/
cc - > migrate_pfn = zone - > compact_cached_migrate_pfn ;
cc - > free_pfn = zone - > compact_cached_free_pfn ;
if ( cc - > free_pfn < start_pfn | | cc - > free_pfn > end_pfn ) {
cc - > free_pfn = end_pfn & ~ ( pageblock_nr_pages - 1 ) ;
zone - > compact_cached_free_pfn = cc - > free_pfn ;
}
if ( cc - > migrate_pfn < start_pfn | | cc - > migrate_pfn > end_pfn ) {
cc - > migrate_pfn = start_pfn ;
zone - > compact_cached_migrate_pfn = cc - > migrate_pfn ;
}
2010-05-25 01:32:27 +04:00
2012-10-09 03:32:47 +04:00
/*
* Clear pageblock skip if there were failures recently and compaction
* is about to be retried after being deferred . kswapd does not do
* this reset as it ' ll reset the cached information when going to sleep .
*/
if ( compaction_restarting ( zone , cc - > order ) & & ! current_is_kswapd ( ) )
__reset_isolation_suitable ( zone ) ;
2012-10-09 03:32:41 +04:00
2010-05-25 01:32:27 +04:00
migrate_prep_local ( ) ;
while ( ( ret = compact_finished ( zone , cc ) ) = = COMPACT_CONTINUE ) {
unsigned long nr_migrate , nr_remaining ;
2011-03-23 02:30:39 +03:00
int err ;
2010-05-25 01:32:27 +04:00
2011-06-16 02:08:52 +04:00
switch ( isolate_migratepages ( zone , cc ) ) {
case ISOLATE_ABORT :
ret = COMPACT_PARTIAL ;
2012-12-12 04:02:47 +04:00
putback_movable_pages ( & cc - > migratepages ) ;
2012-10-09 03:32:27 +04:00
cc - > nr_migratepages = 0 ;
2011-06-16 02:08:52 +04:00
goto out ;
case ISOLATE_NONE :
2010-05-25 01:32:27 +04:00
continue ;
2011-06-16 02:08:52 +04:00
case ISOLATE_SUCCESS :
;
}
2010-05-25 01:32:27 +04:00
nr_migrate = cc - > nr_migratepages ;
2011-03-23 02:30:39 +03:00
err = migrate_pages ( & cc - > migratepages , compaction_alloc ,
2013-02-23 04:35:14 +04:00
( unsigned long ) cc ,
2012-10-19 17:07:31 +04:00
cc - > sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC ,
MR_COMPACTION ) ;
2010-05-25 01:32:27 +04:00
update_nr_listpages ( cc ) ;
nr_remaining = cc - > nr_migratepages ;
2011-01-14 02:45:54 +03:00
trace_mm_compaction_migratepages ( nr_migrate - nr_remaining ,
nr_remaining ) ;
2010-05-25 01:32:27 +04:00
2012-12-12 04:02:47 +04:00
/* Release isolated pages not migrated */
2011-03-23 02:30:39 +03:00
if ( err ) {
2012-12-12 04:02:47 +04:00
putback_movable_pages ( & cc - > migratepages ) ;
2010-05-25 01:32:27 +04:00
cc - > nr_migratepages = 0 ;
2012-07-12 01:02:13 +04:00
if ( err = = - ENOMEM ) {
ret = COMPACT_PARTIAL ;
goto out ;
}
2010-05-25 01:32:27 +04:00
}
}
2011-06-16 02:08:52 +04:00
out :
2010-05-25 01:32:27 +04:00
/* Release free pages and check accounting */
cc - > nr_freepages - = release_freepages ( & cc - > freepages ) ;
VM_BUG_ON ( cc - > nr_freepages ! = 0 ) ;
return ret ;
}
2010-05-25 01:32:28 +04:00
2011-11-01 04:09:08 +04:00
static unsigned long compact_zone_order ( struct zone * zone ,
2011-01-14 02:47:11 +03:00
int order , gfp_t gfp_mask ,
2013-01-12 02:32:16 +04:00
bool sync , bool * contended )
2010-05-25 01:32:30 +04:00
{
2012-10-09 03:32:27 +04:00
unsigned long ret ;
2010-05-25 01:32:30 +04:00
struct compact_control cc = {
. nr_freepages = 0 ,
. nr_migratepages = 0 ,
. order = order ,
. migratetype = allocflags_to_migratetype ( gfp_mask ) ,
. zone = zone ,
2012-06-04 07:05:57 +04:00
. sync = sync ,
2010-05-25 01:32:30 +04:00
} ;
INIT_LIST_HEAD ( & cc . freepages ) ;
INIT_LIST_HEAD ( & cc . migratepages ) ;
2012-10-09 03:32:27 +04:00
ret = compact_zone ( zone , & cc ) ;
VM_BUG_ON ( ! list_empty ( & cc . freepages ) ) ;
VM_BUG_ON ( ! list_empty ( & cc . migratepages ) ) ;
* contended = cc . contended ;
return ret ;
2010-05-25 01:32:30 +04:00
}
2010-05-25 01:32:31 +04:00
int sysctl_extfrag_threshold = 500 ;
2010-05-25 01:32:30 +04:00
/**
* try_to_compact_pages - Direct compact to satisfy a high - order allocation
* @ zonelist : The zonelist used for the current allocation
* @ order : The order of the current allocation
* @ gfp_mask : The GFP mask of the current allocation
* @ nodemask : The allowed nodes to allocate from
2011-01-14 02:45:57 +03:00
* @ sync : Whether migration is synchronous or not
2012-10-09 03:32:31 +04:00
* @ contended : Return value that is true if compaction was aborted due to lock contention
* @ page : Optionally capture a free page of the requested order during compaction
2010-05-25 01:32:30 +04:00
*
* This is the main entry point for direct page compaction .
*/
unsigned long try_to_compact_pages ( struct zonelist * zonelist ,
2011-01-14 02:45:57 +03:00
int order , gfp_t gfp_mask , nodemask_t * nodemask ,
2013-01-12 02:32:16 +04:00
bool sync , bool * contended )
2010-05-25 01:32:30 +04:00
{
enum zone_type high_zoneidx = gfp_zone ( gfp_mask ) ;
int may_enter_fs = gfp_mask & __GFP_FS ;
int may_perform_io = gfp_mask & __GFP_IO ;
struct zoneref * z ;
struct zone * zone ;
int rc = COMPACT_SKIPPED ;
2012-10-09 03:32:05 +04:00
int alloc_flags = 0 ;
2010-05-25 01:32:30 +04:00
2012-10-09 03:29:09 +04:00
/* Check if the GFP flags allow compaction */
2011-01-14 02:47:11 +03:00
if ( ! order | | ! may_enter_fs | | ! may_perform_io )
2010-05-25 01:32:30 +04:00
return rc ;
2012-12-21 03:05:06 +04:00
count_compact_event ( COMPACTSTALL ) ;
2010-05-25 01:32:30 +04:00
2012-10-09 03:32:05 +04:00
# ifdef CONFIG_CMA
if ( allocflags_to_migratetype ( gfp_mask ) = = MIGRATE_MOVABLE )
alloc_flags | = ALLOC_CMA ;
# endif
2010-05-25 01:32:30 +04:00
/* Compact each zone in the list */
for_each_zone_zonelist_nodemask ( zone , z , zonelist , high_zoneidx ,
nodemask ) {
int status ;
2012-08-22 03:16:17 +04:00
status = compact_zone_order ( zone , order , gfp_mask , sync ,
2013-01-12 02:32:16 +04:00
contended ) ;
2010-05-25 01:32:30 +04:00
rc = max ( status , rc ) ;
2011-01-14 02:45:56 +03:00
/* If a normal allocation would succeed, stop compacting */
2012-10-09 03:32:05 +04:00
if ( zone_watermark_ok ( zone , order , low_wmark_pages ( zone ) , 0 ,
alloc_flags ) )
2010-05-25 01:32:30 +04:00
break ;
}
return rc ;
}
2010-05-25 01:32:28 +04:00
/* Compact all zones within a node */
2013-02-23 04:32:33 +04:00
static void __compact_pgdat ( pg_data_t * pgdat , struct compact_control * cc )
2010-05-25 01:32:28 +04:00
{
int zoneid ;
struct zone * zone ;
for ( zoneid = 0 ; zoneid < MAX_NR_ZONES ; zoneid + + ) {
zone = & pgdat - > node_zones [ zoneid ] ;
if ( ! populated_zone ( zone ) )
continue ;
2012-03-22 03:33:52 +04:00
cc - > nr_freepages = 0 ;
cc - > nr_migratepages = 0 ;
cc - > zone = zone ;
INIT_LIST_HEAD ( & cc - > freepages ) ;
INIT_LIST_HEAD ( & cc - > migratepages ) ;
2010-05-25 01:32:28 +04:00
2012-03-22 03:33:54 +04:00
if ( cc - > order = = - 1 | | ! compaction_deferred ( zone , cc - > order ) )
2012-03-22 03:33:52 +04:00
compact_zone ( zone , cc ) ;
2010-05-25 01:32:28 +04:00
2012-03-22 03:33:52 +04:00
if ( cc - > order > 0 ) {
int ok = zone_watermark_ok ( zone , cc - > order ,
low_wmark_pages ( zone ) , 0 , 0 ) ;
2012-08-22 03:16:03 +04:00
if ( ok & & cc - > order > = zone - > compact_order_failed )
2012-03-22 03:33:52 +04:00
zone - > compact_order_failed = cc - > order + 1 ;
/* Currently async compaction is never deferred. */
2012-06-04 07:05:57 +04:00
else if ( ! ok & & cc - > sync )
2012-03-22 03:33:52 +04:00
defer_compaction ( zone , cc - > order ) ;
}
2012-03-22 03:33:52 +04:00
VM_BUG_ON ( ! list_empty ( & cc - > freepages ) ) ;
VM_BUG_ON ( ! list_empty ( & cc - > migratepages ) ) ;
2010-05-25 01:32:28 +04:00
}
}
2013-02-23 04:32:33 +04:00
void compact_pgdat ( pg_data_t * pgdat , int order )
2012-03-22 03:33:52 +04:00
{
struct compact_control cc = {
. order = order ,
2012-06-04 07:05:57 +04:00
. sync = false ,
2012-03-22 03:33:52 +04:00
} ;
2013-09-12 01:22:19 +04:00
if ( ! order )
return ;
2013-02-23 04:32:33 +04:00
__compact_pgdat ( pgdat , & cc ) ;
2012-03-22 03:33:52 +04:00
}
2013-02-23 04:32:33 +04:00
static void compact_node ( int nid )
2012-03-22 03:33:52 +04:00
{
struct compact_control cc = {
. order = - 1 ,
2012-06-04 07:05:57 +04:00
. sync = true ,
2012-03-22 03:33:52 +04:00
} ;
2013-02-23 04:32:33 +04:00
__compact_pgdat ( NODE_DATA ( nid ) , & cc ) ;
2012-03-22 03:33:52 +04:00
}
2010-05-25 01:32:28 +04:00
/* Compact all nodes in the system */
2013-01-12 02:31:47 +04:00
static void compact_nodes ( void )
2010-05-25 01:32:28 +04:00
{
int nid ;
2012-03-22 03:33:53 +04:00
/* Flush pending updates to the LRU lists */
lru_add_drain_all ( ) ;
2010-05-25 01:32:28 +04:00
for_each_online_node ( nid )
compact_node ( nid ) ;
}
/* The written value is actually unused, all memory is compacted */
int sysctl_compact_memory ;
/* This is the entry point for compacting all nodes via /proc/sys/vm */
int sysctl_compaction_handler ( struct ctl_table * table , int write ,
void __user * buffer , size_t * length , loff_t * ppos )
{
if ( write )
2013-01-12 02:31:47 +04:00
compact_nodes ( ) ;
2010-05-25 01:32:28 +04:00
return 0 ;
}
2010-05-25 01:32:29 +04:00
2010-05-25 01:32:31 +04:00
int sysctl_extfrag_handler ( struct ctl_table * table , int write ,
void __user * buffer , size_t * length , loff_t * ppos )
{
proc_dointvec_minmax ( table , write , buffer , length , ppos ) ;
return 0 ;
}
2010-05-25 01:32:29 +04:00
# if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
2011-12-22 02:48:43 +04:00
ssize_t sysfs_compact_node ( struct device * dev ,
struct device_attribute * attr ,
2010-05-25 01:32:29 +04:00
const char * buf , size_t count )
{
2012-03-22 03:33:53 +04:00
int nid = dev - > id ;
if ( nid > = 0 & & nid < nr_node_ids & & node_online ( nid ) ) {
/* Flush pending updates to the LRU lists */
lru_add_drain_all ( ) ;
compact_node ( nid ) ;
}
2010-05-25 01:32:29 +04:00
return count ;
}
2011-12-22 02:48:43 +04:00
static DEVICE_ATTR ( compact , S_IWUSR , NULL , sysfs_compact_node ) ;
2010-05-25 01:32:29 +04:00
int compaction_register_node ( struct node * node )
{
2011-12-22 02:48:43 +04:00
return device_create_file ( & node - > dev , & dev_attr_compact ) ;
2010-05-25 01:32:29 +04:00
}
void compaction_unregister_node ( struct node * node )
{
2011-12-22 02:48:43 +04:00
return device_remove_file ( & node - > dev , & dev_attr_compact ) ;
2010-05-25 01:32:29 +04:00
}
# endif /* CONFIG_SYSFS && CONFIG_NUMA */
2011-12-29 16:09:50 +04:00
# endif /* CONFIG_COMPACTION */