2005-04-16 15:20:36 -07:00
/*
* This file is subject to the terms and conditions of the GNU General Public
* License . See the file " COPYING " in the main directory of this archive
* for more details .
*
* This file contains NUMA specific variables and functions which can
* be split away from DISCONTIGMEM and are used on NUMA machines with
* contiguous memory .
*
* 2002 / 08 / 07 Erich Focht < efocht @ ess . nec . de >
*/
# include <linux/cpu.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/node.h>
# include <linux/init.h>
# include <linux/bootmem.h>
2006-09-30 23:27:07 -07:00
# include <linux/module.h>
2005-04-16 15:20:36 -07:00
# include <asm/mmzone.h>
# include <asm/numa.h>
/*
* The following structures are usually initialized by ACPI or
* similar mechanisms and describe the NUMA characteristics of the machine .
*/
int num_node_memblks ;
struct node_memblk_s node_memblk [ NR_NODE_MEMBLKS ] ;
2008-04-03 15:17:13 -05:00
struct node_cpuid_s node_cpuid [ NR_CPUS ] =
{ [ 0 . . . NR_CPUS - 1 ] = { . phys_id = 0 , . nid = NUMA_NO_NODE } } ;
2005-04-16 15:20:36 -07:00
/*
* This is a matrix with " distances " between nodes , they should be
* proportional to the memory access latency ratios .
*/
u8 numa_slit [ MAX_NUMNODES * MAX_NUMNODES ] ;
/* Identify which cnode a physical address resides on */
int
paddr_to_nid ( unsigned long paddr )
{
int i ;
for ( i = 0 ; i < num_node_memblks ; i + + )
if ( paddr > = node_memblk [ i ] . start_paddr & &
paddr < node_memblk [ i ] . start_paddr + node_memblk [ i ] . size )
break ;
return ( i < num_node_memblks ) ? node_memblk [ i ] . nid : ( num_node_memblks ? - 1 : 0 ) ;
}
2005-10-04 15:13:57 -04:00
# if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA)
/*
* Because of holes evaluate on section limits .
* If the section of memory exists , then return the node where the section
* resides . Otherwise return node 0 as the default . This is used by
* SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
* the section resides .
*/
mm: clean up for early_pfn_to_nid()
What's happening is that the assertion in mm/page_alloc.c:move_freepages()
is triggering:
BUG_ON(page_zone(start_page) != page_zone(end_page));
Once I knew this is what was happening, I added some annotations:
if (unlikely(page_zone(start_page) != page_zone(end_page))) {
printk(KERN_ERR "move_freepages: Bogus zones: "
"start_page[%p] end_page[%p] zone[%p]\n",
start_page, end_page, zone);
printk(KERN_ERR "move_freepages: "
"start_zone[%p] end_zone[%p]\n",
page_zone(start_page), page_zone(end_page));
printk(KERN_ERR "move_freepages: "
"start_pfn[0x%lx] end_pfn[0x%lx]\n",
page_to_pfn(start_page), page_to_pfn(end_page));
printk(KERN_ERR "move_freepages: "
"start_nid[%d] end_nid[%d]\n",
page_to_nid(start_page), page_to_nid(end_page));
...
And here's what I got:
move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00]
move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00]
move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff]
move_freepages: start_nid[1] end_nid[0]
My memory layout on this box is:
[ 0.000000] Zone PFN ranges:
[ 0.000000] Normal 0x00000000 -> 0x0081ff5d
[ 0.000000] Movable zone start PFN for each node
[ 0.000000] early_node_map[8] active PFN ranges
[ 0.000000] 0: 0x00000000 -> 0x00020000
[ 0.000000] 1: 0x00800000 -> 0x0081f7ff
[ 0.000000] 1: 0x0081f800 -> 0x0081fe50
[ 0.000000] 1: 0x0081fed1 -> 0x0081fed8
[ 0.000000] 1: 0x0081feda -> 0x0081fedb
[ 0.000000] 1: 0x0081fedd -> 0x0081fee5
[ 0.000000] 1: 0x0081fee7 -> 0x0081ff51
[ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d
So it's a block move in that 0x81f600-->0x81f7ff region which triggers
the problem.
This patch:
Declaration of early_pfn_to_nid() is scattered over per-arch include
files, and it seems it's complicated to know when the declaration is used.
I think it makes fix-for-memmap-init not easy.
This patch moves all declaration to include/linux/mm.h
After this,
if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-> Use static definition in include/linux/mm.h
else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-> Use generic definition in mm/page_alloc.c
else
-> per-arch back end function will be called.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reported-by: David Miller <davem@davemlloft.net>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: <stable@kernel.org> [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-02-18 14:48:32 -08:00
int __meminit __early_pfn_to_nid ( unsigned long pfn )
2005-10-04 15:13:57 -04:00
{
int i , section = pfn > > PFN_SECTION_SHIFT , ssec , esec ;
for ( i = 0 ; i < num_node_memblks ; i + + ) {
ssec = node_memblk [ i ] . start_paddr > > PA_SECTION_SHIFT ;
esec = ( node_memblk [ i ] . start_paddr + node_memblk [ i ] . size +
( ( 1L < < PA_SECTION_SHIFT ) - 1 ) ) > > PA_SECTION_SHIFT ;
if ( section > = ssec & & section < esec )
return node_memblk [ i ] . nid ;
}
2009-02-18 14:48:33 -08:00
return - 1 ;
2005-10-04 15:13:57 -04:00
}
2006-09-30 23:27:07 -07:00
# ifdef CONFIG_MEMORY_HOTPLUG
/*
* SRAT information is stored in node_memblk [ ] , then we can use SRAT
* information at memory - hot - add if necessary .
*/
int memory_add_physaddr_to_nid ( u64 addr )
{
int nid = paddr_to_nid ( addr ) ;
if ( nid < 0 )
return 0 ;
return nid ;
}
EXPORT_SYMBOL_GPL ( memory_add_physaddr_to_nid ) ;
# endif
2005-10-04 15:13:57 -04:00
# endif