[PATCH] mm: __alloc_pages cleanup
Clean up of __alloc_pages. Restoration of previous behaviour, plus further cleanups by introducing an 'alloc_flags', removing the last of should_reclaim_zone. Signed-off-by: Rohit Seth <rohit.seth@intel.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
51c6f666fc
commit
7fb1d9fca5
@ -329,7 +329,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
|
|||||||
void build_all_zonelists(void);
|
void build_all_zonelists(void);
|
||||||
void wakeup_kswapd(struct zone *zone, int order);
|
void wakeup_kswapd(struct zone *zone, int order);
|
||||||
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||||
int alloc_type, int can_try_harder, gfp_t gfp_high);
|
int classzone_idx, int alloc_flags);
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_MEMORY_PRESENT
|
#ifdef CONFIG_HAVE_MEMORY_PRESENT
|
||||||
void memory_present(int nid, unsigned long start, unsigned long end);
|
void memory_present(int nid, unsigned long start, unsigned long end);
|
||||||
|
179
mm/page_alloc.c
179
mm/page_alloc.c
@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
|
|||||||
}
|
}
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
} else {
|
||||||
|
|
||||||
if (page == NULL) {
|
|
||||||
spin_lock_irqsave(&zone->lock, flags);
|
spin_lock_irqsave(&zone->lock, flags);
|
||||||
page = __rmqueue(zone, order);
|
page = __rmqueue(zone, order);
|
||||||
spin_unlock_irqrestore(&zone->lock, flags);
|
spin_unlock_irqrestore(&zone->lock, flags);
|
||||||
@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
|
|||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
|
||||||
|
#define ALLOC_HARDER 0x02 /* try to alloc harder */
|
||||||
|
#define ALLOC_HIGH 0x04 /* __GFP_HIGH set */
|
||||||
|
#define ALLOC_CPUSET 0x08 /* check for correct cpuset */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return 1 if free pages are above 'mark'. This takes into account the order
|
* Return 1 if free pages are above 'mark'. This takes into account the order
|
||||||
* of the allocation.
|
* of the allocation.
|
||||||
*/
|
*/
|
||||||
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||||
int classzone_idx, int can_try_harder, gfp_t gfp_high)
|
int classzone_idx, int alloc_flags)
|
||||||
{
|
{
|
||||||
/* free_pages my go negative - that's OK */
|
/* free_pages my go negative - that's OK */
|
||||||
long min = mark, free_pages = z->free_pages - (1 << order) + 1;
|
long min = mark, free_pages = z->free_pages - (1 << order) + 1;
|
||||||
int o;
|
int o;
|
||||||
|
|
||||||
if (gfp_high)
|
if (alloc_flags & ALLOC_HIGH)
|
||||||
min -= min / 2;
|
min -= min / 2;
|
||||||
if (can_try_harder)
|
if (alloc_flags & ALLOC_HARDER)
|
||||||
min -= min / 4;
|
min -= min / 4;
|
||||||
|
|
||||||
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
|
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
|
||||||
@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
/*
|
||||||
should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
|
* get_page_from_freeliest goes through the zonelist trying to allocate
|
||||||
|
* a page.
|
||||||
|
*/
|
||||||
|
static struct page *
|
||||||
|
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
|
||||||
|
struct zonelist *zonelist, int alloc_flags)
|
||||||
{
|
{
|
||||||
if (!z->reclaim_pages)
|
struct zone **z = zonelist->zones;
|
||||||
return 0;
|
struct page *page = NULL;
|
||||||
if (gfp_mask & __GFP_NORECLAIM)
|
int classzone_idx = zone_idx(*z);
|
||||||
return 0;
|
|
||||||
return 1;
|
/*
|
||||||
|
* Go through the zonelist once, looking for a zone with enough free.
|
||||||
|
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
||||||
|
*/
|
||||||
|
do {
|
||||||
|
if ((alloc_flags & ALLOC_CPUSET) &&
|
||||||
|
!cpuset_zone_allowed(*z, gfp_mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
|
||||||
|
if (!zone_watermark_ok(*z, order, (*z)->pages_low,
|
||||||
|
classzone_idx, alloc_flags))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = buffered_rmqueue(*z, order, gfp_mask);
|
||||||
|
if (page) {
|
||||||
|
zone_statistics(zonelist, *z);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (*(++z) != NULL);
|
||||||
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
|
|||||||
struct zonelist *zonelist)
|
struct zonelist *zonelist)
|
||||||
{
|
{
|
||||||
const gfp_t wait = gfp_mask & __GFP_WAIT;
|
const gfp_t wait = gfp_mask & __GFP_WAIT;
|
||||||
struct zone **zones, *z;
|
struct zone **z;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
struct reclaim_state reclaim_state;
|
struct reclaim_state reclaim_state;
|
||||||
struct task_struct *p = current;
|
struct task_struct *p = current;
|
||||||
int i;
|
|
||||||
int classzone_idx;
|
|
||||||
int do_retry;
|
int do_retry;
|
||||||
int can_try_harder;
|
int alloc_flags;
|
||||||
int did_some_progress;
|
int did_some_progress;
|
||||||
|
|
||||||
might_sleep_if(wait);
|
might_sleep_if(wait);
|
||||||
|
|
||||||
/*
|
z = zonelist->zones; /* the list of zones suitable for gfp_mask */
|
||||||
* The caller may dip into page reserves a bit more if the caller
|
|
||||||
* cannot run direct reclaim, or is the caller has realtime scheduling
|
|
||||||
* policy
|
|
||||||
*/
|
|
||||||
can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
|
|
||||||
|
|
||||||
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
|
if (unlikely(*z == NULL)) {
|
||||||
|
|
||||||
if (unlikely(zones[0] == NULL)) {
|
|
||||||
/* Should this ever happen?? */
|
/* Should this ever happen?? */
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
classzone_idx = zone_idx(zones[0]);
|
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
/*
|
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
||||||
* Go through the zonelist once, looking for a zone with enough free.
|
zonelist, ALLOC_CPUSET);
|
||||||
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
|
||||||
*/
|
|
||||||
for (i = 0; (z = zones[i]) != NULL; i++) {
|
|
||||||
int do_reclaim = should_reclaim_zone(z, gfp_mask);
|
|
||||||
|
|
||||||
if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the zone is to attempt early page reclaim then this loop
|
|
||||||
* will try to reclaim pages and check the watermark a second
|
|
||||||
* time before giving up and falling back to the next zone.
|
|
||||||
*/
|
|
||||||
zone_reclaim_retry:
|
|
||||||
if (!zone_watermark_ok(z, order, z->pages_low,
|
|
||||||
classzone_idx, 0, 0)) {
|
|
||||||
if (!do_reclaim)
|
|
||||||
continue;
|
|
||||||
else {
|
|
||||||
zone_reclaim(z, gfp_mask, order);
|
|
||||||
/* Only try reclaim once */
|
|
||||||
do_reclaim = 0;
|
|
||||||
goto zone_reclaim_retry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
page = buffered_rmqueue(z, order, gfp_mask);
|
|
||||||
if (page)
|
if (page)
|
||||||
goto got_pg;
|
goto got_pg;
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; (z = zones[i]) != NULL; i++)
|
do
|
||||||
wakeup_kswapd(z, order);
|
wakeup_kswapd(*z, order);
|
||||||
|
while (*(++z));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OK, we're below the kswapd watermark and have kicked background
|
||||||
|
* reclaim. Now things get more complex, so set up alloc_flags according
|
||||||
|
* to how we want to proceed.
|
||||||
|
*
|
||||||
|
* The caller may dip into page reserves a bit more if the caller
|
||||||
|
* cannot run direct reclaim, or if the caller has realtime scheduling
|
||||||
|
* policy.
|
||||||
|
*/
|
||||||
|
alloc_flags = 0;
|
||||||
|
if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
|
||||||
|
alloc_flags |= ALLOC_HARDER;
|
||||||
|
if (gfp_mask & __GFP_HIGH)
|
||||||
|
alloc_flags |= ALLOC_HIGH;
|
||||||
|
if (wait)
|
||||||
|
alloc_flags |= ALLOC_CPUSET;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Go through the zonelist again. Let __GFP_HIGH and allocations
|
* Go through the zonelist again. Let __GFP_HIGH and allocations
|
||||||
* coming from realtime tasks to go deeper into reserves
|
* coming from realtime tasks go deeper into reserves.
|
||||||
*
|
*
|
||||||
* This is the last chance, in general, before the goto nopage.
|
* This is the last chance, in general, before the goto nopage.
|
||||||
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
|
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
|
||||||
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
||||||
*/
|
*/
|
||||||
for (i = 0; (z = zones[i]) != NULL; i++) {
|
page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
|
||||||
if (!zone_watermark_ok(z, order, z->pages_min,
|
|
||||||
classzone_idx, can_try_harder,
|
|
||||||
gfp_mask & __GFP_HIGH))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (wait && !cpuset_zone_allowed(z, gfp_mask))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
page = buffered_rmqueue(z, order, gfp_mask);
|
|
||||||
if (page)
|
if (page)
|
||||||
goto got_pg;
|
goto got_pg;
|
||||||
}
|
|
||||||
|
|
||||||
/* This allocation should allow future memory freeing. */
|
/* This allocation should allow future memory freeing. */
|
||||||
|
|
||||||
@ -897,13 +894,10 @@ zone_reclaim_retry:
|
|||||||
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
|
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
|
||||||
nofail_alloc:
|
nofail_alloc:
|
||||||
/* go through the zonelist yet again, ignoring mins */
|
/* go through the zonelist yet again, ignoring mins */
|
||||||
for (i = 0; (z = zones[i]) != NULL; i++) {
|
page = get_page_from_freelist(gfp_mask, order,
|
||||||
if (!cpuset_zone_allowed(z, gfp_mask))
|
zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
|
||||||
continue;
|
|
||||||
page = buffered_rmqueue(z, order, gfp_mask);
|
|
||||||
if (page)
|
if (page)
|
||||||
goto got_pg;
|
goto got_pg;
|
||||||
}
|
|
||||||
if (gfp_mask & __GFP_NOFAIL) {
|
if (gfp_mask & __GFP_NOFAIL) {
|
||||||
blk_congestion_wait(WRITE, HZ/50);
|
blk_congestion_wait(WRITE, HZ/50);
|
||||||
goto nofail_alloc;
|
goto nofail_alloc;
|
||||||
@ -924,7 +918,7 @@ rebalance:
|
|||||||
reclaim_state.reclaimed_slab = 0;
|
reclaim_state.reclaimed_slab = 0;
|
||||||
p->reclaim_state = &reclaim_state;
|
p->reclaim_state = &reclaim_state;
|
||||||
|
|
||||||
did_some_progress = try_to_free_pages(zones, gfp_mask);
|
did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
|
||||||
|
|
||||||
p->reclaim_state = NULL;
|
p->reclaim_state = NULL;
|
||||||
p->flags &= ~PF_MEMALLOC;
|
p->flags &= ~PF_MEMALLOC;
|
||||||
@ -932,19 +926,10 @@ rebalance:
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
if (likely(did_some_progress)) {
|
if (likely(did_some_progress)) {
|
||||||
for (i = 0; (z = zones[i]) != NULL; i++) {
|
page = get_page_from_freelist(gfp_mask, order,
|
||||||
if (!zone_watermark_ok(z, order, z->pages_min,
|
zonelist, alloc_flags);
|
||||||
classzone_idx, can_try_harder,
|
|
||||||
gfp_mask & __GFP_HIGH))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!cpuset_zone_allowed(z, gfp_mask))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
page = buffered_rmqueue(z, order, gfp_mask);
|
|
||||||
if (page)
|
if (page)
|
||||||
goto got_pg;
|
goto got_pg;
|
||||||
}
|
|
||||||
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
|
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
|
||||||
/*
|
/*
|
||||||
* Go through the zonelist yet one more time, keep
|
* Go through the zonelist yet one more time, keep
|
||||||
@ -952,18 +937,10 @@ rebalance:
|
|||||||
* a parallel oom killing, we must fail if we're still
|
* a parallel oom killing, we must fail if we're still
|
||||||
* under heavy pressure.
|
* under heavy pressure.
|
||||||
*/
|
*/
|
||||||
for (i = 0; (z = zones[i]) != NULL; i++) {
|
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
||||||
if (!zone_watermark_ok(z, order, z->pages_high,
|
zonelist, ALLOC_CPUSET);
|
||||||
classzone_idx, 0, 0))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
page = buffered_rmqueue(z, order, gfp_mask);
|
|
||||||
if (page)
|
if (page)
|
||||||
goto got_pg;
|
goto got_pg;
|
||||||
}
|
|
||||||
|
|
||||||
out_of_memory(gfp_mask, order);
|
out_of_memory(gfp_mask, order);
|
||||||
goto restart;
|
goto restart;
|
||||||
@ -996,9 +973,7 @@ nopage:
|
|||||||
dump_stack();
|
dump_stack();
|
||||||
show_mem();
|
show_mem();
|
||||||
}
|
}
|
||||||
return NULL;
|
|
||||||
got_pg:
|
got_pg:
|
||||||
zone_statistics(zonelist, z);
|
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1074,7 +1074,7 @@ loop_again:
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!zone_watermark_ok(zone, order,
|
if (!zone_watermark_ok(zone, order,
|
||||||
zone->pages_high, 0, 0, 0)) {
|
zone->pages_high, 0, 0)) {
|
||||||
end_zone = i;
|
end_zone = i;
|
||||||
goto scan;
|
goto scan;
|
||||||
}
|
}
|
||||||
@ -1111,7 +1111,7 @@ scan:
|
|||||||
|
|
||||||
if (nr_pages == 0) { /* Not software suspend */
|
if (nr_pages == 0) { /* Not software suspend */
|
||||||
if (!zone_watermark_ok(zone, order,
|
if (!zone_watermark_ok(zone, order,
|
||||||
zone->pages_high, end_zone, 0, 0))
|
zone->pages_high, end_zone, 0))
|
||||||
all_zones_ok = 0;
|
all_zones_ok = 0;
|
||||||
}
|
}
|
||||||
zone->temp_priority = priority;
|
zone->temp_priority = priority;
|
||||||
@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
pgdat = zone->zone_pgdat;
|
pgdat = zone->zone_pgdat;
|
||||||
if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
|
if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
|
||||||
return;
|
return;
|
||||||
if (pgdat->kswapd_max_order < order)
|
if (pgdat->kswapd_max_order < order)
|
||||||
pgdat->kswapd_max_order = order;
|
pgdat->kswapd_max_order = order;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user