2005-04-17 02:20:36 +04:00
/*
* ACPI 3.0 based NUMA setup
* Copyright 2004 Andi Kleen , SuSE Labs .
*
* Reads the ACPI SRAT table to figure out what memory belongs to which CPUs .
*
* Called from acpi_numa_init while reading the SRAT and SLIT tables .
* Assumes all memory regions belonging to a single proximity domain
* are in one chunk . Holes between them will be included in the node .
*/
# include <linux/kernel.h>
# include <linux/acpi.h>
# include <linux/mmzone.h>
# include <linux/bitmap.h>
# include <linux/module.h>
# include <linux/topology.h>
2006-04-07 21:49:18 +04:00
# include <linux/bootmem.h>
2010-08-26 00:39:17 +04:00
# include <linux/memblock.h>
2006-04-07 21:49:18 +04:00
# include <linux/mm.h>
2005-04-17 02:20:36 +04:00
# include <asm/proto.h>
# include <asm/numa.h>
2006-01-12 00:44:39 +03:00
# include <asm/e820.h>
2009-02-17 15:58:15 +03:00
# include <asm/apic.h>
2009-01-21 12:24:27 +03:00
# include <asm/uv/uv.h>
2005-04-17 02:20:36 +04:00
2006-09-26 12:52:33 +04:00
int acpi_numa __initdata ;
2005-04-17 02:20:36 +04:00
static __init int setup_node ( int pxm )
{
2006-06-23 13:03:19 +04:00
return acpi_map_pxm_to_node ( pxm ) ;
2005-04-17 02:20:36 +04:00
}
static __init void bad_srat ( void )
{
printk ( KERN_ERR " SRAT: SRAT not used. \n " ) ;
acpi_numa = - 1 ;
}
static __init inline int srat_disabled ( void )
{
2011-02-16 14:13:06 +03:00
return acpi_numa < 0 ;
2005-04-17 02:20:36 +04:00
}
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init ( struct acpi_table_slit * slit )
{
2011-02-16 19:11:09 +03:00
int i , j ;
2008-07-11 07:36:37 +04:00
2011-02-16 19:11:09 +03:00
for ( i = 0 ; i < slit - > locality_count ; i + + )
for ( j = 0 ; j < slit - > locality_count ; j + + )
numa_set_distance ( pxm_to_node ( i ) , pxm_to_node ( j ) ,
slit - > entry [ slit - > locality_count * i + j ] ) ;
2005-04-17 02:20:36 +04:00
}
2009-03-31 01:55:30 +04:00
/* Callback for Proximity Domain -> x2APIC mapping */
void __init
acpi_numa_x2apic_affinity_init ( struct acpi_srat_x2apic_cpu_affinity * pa )
{
int pxm , node ;
int apic_id ;
if ( srat_disabled ( ) )
return ;
if ( pa - > header . length < sizeof ( struct acpi_srat_x2apic_cpu_affinity ) ) {
bad_srat ( ) ;
return ;
}
if ( ( pa - > flags & ACPI_SRAT_CPU_ENABLED ) = = 0 )
return ;
pxm = pa - > proximity_domain ;
2011-12-22 05:45:16 +04:00
apic_id = pa - > apic_id ;
2012-03-16 23:25:35 +04:00
if ( ! apic - > apic_id_valid ( apic_id ) ) {
2011-12-22 05:45:16 +04:00
printk ( KERN_INFO " SRAT: PXM %u -> X2APIC 0x%04x ignored \n " ,
pxm , apic_id ) ;
return ;
}
2009-03-31 01:55:30 +04:00
node = setup_node ( pxm ) ;
if ( node < 0 ) {
printk ( KERN_ERR " SRAT: Too many proximity domains %x \n " , pxm ) ;
bad_srat ( ) ;
return ;
}
2010-12-17 06:09:58 +03:00
if ( apic_id > = MAX_LOCAL_APIC ) {
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big \n " , pxm , apic_id , node ) ;
return ;
}
2011-01-23 16:37:39 +03:00
set_apicid_to_node ( apic_id , node ) ;
2011-02-16 19:11:09 +03:00
node_set ( node , numa_nodes_parsed ) ;
2009-03-31 01:55:30 +04:00
acpi_numa = 1 ;
2009-11-21 11:23:37 +03:00
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%04x -> Node %u \n " ,
2009-03-31 01:55:30 +04:00
pxm , apic_id , node ) ;
}
2005-04-17 02:20:36 +04:00
/* Callback for Proximity Domain -> LAPIC mapping */
void __init
2007-02-02 19:48:22 +03:00
acpi_numa_processor_affinity_init ( struct acpi_srat_cpu_affinity * pa )
2005-04-17 02:20:36 +04:00
{
int pxm , node ;
2008-01-30 15:33:10 +03:00
int apic_id ;
2006-02-03 23:51:26 +03:00
if ( srat_disabled ( ) )
return ;
2007-02-02 19:48:22 +03:00
if ( pa - > header . length ! = sizeof ( struct acpi_srat_cpu_affinity ) ) {
2006-05-15 20:19:44 +04:00
bad_srat ( ) ;
2006-02-03 23:51:26 +03:00
return ;
}
2007-02-02 19:48:22 +03:00
if ( ( pa - > flags & ACPI_SRAT_CPU_ENABLED ) = = 0 )
2005-04-17 02:20:36 +04:00
return ;
2007-02-02 19:48:22 +03:00
pxm = pa - > proximity_domain_lo ;
2012-01-17 13:20:31 +04:00
if ( acpi_srat_revision > = 2 )
pxm | = * ( ( unsigned int * ) pa - > proximity_domain_hi ) < < 8 ;
2005-04-17 02:20:36 +04:00
node = setup_node ( pxm ) ;
if ( node < 0 ) {
printk ( KERN_ERR " SRAT: Too many proximity domains %x \n " , pxm ) ;
bad_srat ( ) ;
return ;
}
2008-02-17 10:00:22 +03:00
2008-09-24 00:37:13 +04:00
if ( get_uv_system_type ( ) > = UV_X2APIC )
2008-03-28 22:12:08 +03:00
apic_id = ( pa - > apic_id < < 8 ) | pa - > local_sapic_eid ;
else
apic_id = pa - > apic_id ;
2010-12-17 06:09:58 +03:00
if ( apic_id > = MAX_LOCAL_APIC ) {
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big \n " , pxm , apic_id , node ) ;
return ;
}
2011-01-23 16:37:39 +03:00
set_apicid_to_node ( apic_id , node ) ;
2011-02-16 19:11:09 +03:00
node_set ( node , numa_nodes_parsed ) ;
2005-04-17 02:20:36 +04:00
acpi_numa = 1 ;
2009-11-21 11:23:37 +03:00
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%02x -> Node %u \n " ,
2008-01-30 15:33:10 +03:00
pxm , apic_id , node ) ;
2005-04-17 02:20:36 +04:00
}
2011-05-02 16:18:51 +04:00
# ifdef CONFIG_MEMORY_HOTPLUG
2006-10-01 10:27:05 +04:00
static inline int save_add_info ( void ) { return 1 ; }
# else
static inline int save_add_info ( void ) { return 0 ; }
# endif
2006-04-07 21:49:18 +04:00
2005-04-17 02:20:36 +04:00
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
2012-07-31 19:41:09 +04:00
int __init
2007-02-02 19:48:22 +03:00
acpi_numa_memory_affinity_init ( struct acpi_srat_mem_affinity * ma )
2005-04-17 02:20:36 +04:00
{
2011-05-02 16:18:52 +04:00
u64 start , end ;
2005-04-17 02:20:36 +04:00
int node , pxm ;
2006-02-03 23:51:26 +03:00
if ( srat_disabled ( ) )
2012-07-31 19:41:09 +04:00
return - 1 ;
2007-02-02 19:48:22 +03:00
if ( ma - > header . length ! = sizeof ( struct acpi_srat_mem_affinity ) ) {
2006-02-03 23:51:26 +03:00
bad_srat ( ) ;
2012-07-31 19:41:09 +04:00
return - 1 ;
2006-02-03 23:51:26 +03:00
}
2007-02-02 19:48:22 +03:00
if ( ( ma - > flags & ACPI_SRAT_MEM_ENABLED ) = = 0 )
2012-07-31 19:41:09 +04:00
return - 1 ;
2007-02-02 19:48:22 +03:00
if ( ( ma - > flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ) & & ! save_add_info ( ) )
2012-07-31 19:41:09 +04:00
return - 1 ;
2007-02-02 19:48:22 +03:00
start = ma - > base_address ;
end = start + ma - > length ;
2005-04-17 02:20:36 +04:00
pxm = ma - > proximity_domain ;
2012-01-17 13:20:31 +04:00
if ( acpi_srat_revision < = 1 )
pxm & = 0xff ;
2005-04-17 02:20:36 +04:00
node = setup_node ( pxm ) ;
if ( node < 0 ) {
printk ( KERN_ERR " SRAT: Too many proximity domains. \n " ) ;
bad_srat ( ) ;
2012-07-31 19:41:09 +04:00
return - 1 ;
2005-04-17 02:20:36 +04:00
}
2011-02-16 19:11:07 +03:00
if ( numa_add_memblk ( node , start , end ) < 0 ) {
2005-04-17 02:20:36 +04:00
bad_srat ( ) ;
2012-07-31 19:41:09 +04:00
return - 1 ;
2005-04-17 02:20:36 +04:00
}
2006-04-07 21:49:18 +04:00
x86/numa: Set numa_nodes_parsed at acpi_numa_memory_affinity_init()
When hot-adding a CPU, the system outputs following messages
since node_to_cpumask_map[2] was not allocated memory.
Booting Node 2 Processor 32 APIC 0xc0
node_to_cpumask_map[2] NULL
Pid: 0, comm: swapper/32 Tainted: G A 3.3.5-acd #21
Call Trace:
[<ffffffff81048845>] debug_cpumask_set_cpu+0x155/0x160
[<ffffffff8105e28a>] ? add_timer_on+0xaa/0x120
[<ffffffff8150665f>] numa_add_cpu+0x1e/0x22
[<ffffffff815020bb>] identify_cpu+0x1df/0x1e4
[<ffffffff815020d6>] identify_econdary_cpu+0x16/0x1d
[<ffffffff81504614>] smp_store_cpu_info+0x3c/0x3e
[<ffffffff81505263>] smp_callin+0x139/0x1be
[<ffffffff815052fb>] start_secondary+0x13/0xeb
The reason is that the bit of node 2 was not set at
numa_nodes_parsed. numa_nodes_parsed is set by only
acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init. Thus even if hot-added memory
which is same PXM as hot-added CPU is written in ACPI SRAT
Table, if the hot-added CPU is not written in ACPI SRAT table,
numa_nodes_parsed is not set.
But according to ACPI Spec Rev 5.0, it says about ACPI SRAT
table as follows: This optional table provides information that
allows OSPM to associate processors and memory ranges, including
ranges of memory provided by hot-added memory devices, with
system localities / proximity domains and clock domains.
It means that ACPI SRAT table only provides information for CPUs
present at boot time and for memory including hot-added memory.
So hot-added memory is written in ACPI SRAT table, but hot-added
CPU is not written in it. Thus numa_nodes_parsed should be set
by not only acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init but also
acpi_numa_memory_affinity_init for the case.
Additionally, if system has cpuless memory node,
acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init cannot set numa_nodes_parseds
since these functions cannot find cpu description for the node.
In this case, numa_nodes_parsed needs to be set by
acpi_numa_memory_affinity_init.
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: liuj97@gmail.com
Cc: kosaki.motohiro@gmail.com
Link: http://lkml.kernel.org/r/4FCC2098.4030007@jp.fujitsu.com
[ merged it ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2012-06-04 06:42:32 +04:00
node_set ( node , numa_nodes_parsed ) ;
2012-05-30 02:06:29 +04:00
printk ( KERN_INFO " SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] \n " ,
node , pxm ,
( unsigned long long ) start , ( unsigned long long ) end - 1 ) ;
2012-07-31 19:41:09 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
void __init acpi_numa_arch_fixup ( void ) { }
2011-02-16 14:13:06 +03:00
int __init x86_acpi_numa_init ( void )
{
int ret ;
ret = acpi_numa_init ( ) ;
if ( ret < 0 )
return ret ;
return srat_disabled ( ) ? - EINVAL : 0 ;
}