2005-04-16 15:20:36 -07:00
/*
* ACPI 3.0 based NUMA setup
* Copyright 2004 Andi Kleen , SuSE Labs .
*
* Reads the ACPI SRAT table to figure out what memory belongs to which CPUs .
*
* Called from acpi_numa_init while reading the SRAT and SLIT tables .
* Assumes all memory regions belonging to a single proximity domain
* are in one chunk . Holes between them will be included in the node .
*/
# include <linux/kernel.h>
# include <linux/acpi.h>
# include <linux/mmzone.h>
# include <linux/bitmap.h>
# include <linux/module.h>
# include <linux/topology.h>
2006-04-07 19:49:18 +02:00
# include <linux/bootmem.h>
2010-08-25 13:39:17 -07:00
# include <linux/memblock.h>
2006-04-07 19:49:18 +02:00
# include <linux/mm.h>
2005-04-16 15:20:36 -07:00
# include <asm/proto.h>
# include <asm/numa.h>
2006-01-11 22:44:39 +01:00
# include <asm/e820.h>
2009-02-17 13:58:15 +01:00
# include <asm/apic.h>
2009-01-21 10:24:27 +01:00
# include <asm/uv/uv.h>
2005-04-16 15:20:36 -07:00
2006-09-26 10:52:33 +02:00
int acpi_numa __initdata ;
2005-04-16 15:20:36 -07:00
static __init int setup_node ( int pxm )
{
2006-06-23 02:03:19 -07:00
return acpi_map_pxm_to_node ( pxm ) ;
2005-04-16 15:20:36 -07:00
}
static __init void bad_srat ( void )
{
printk ( KERN_ERR " SRAT: SRAT not used. \n " ) ;
acpi_numa = - 1 ;
}
static __init inline int srat_disabled ( void )
{
2011-02-16 12:13:06 +01:00
return acpi_numa < 0 ;
2005-04-16 15:20:36 -07:00
}
2014-01-21 14:33:15 -08:00
/*
* Callback for SLIT parsing . pxm_to_node ( ) returns NUMA_NO_NODE for
* I / O localities since SRAT does not list them . I / O localities are
* not supported at this point .
*/
2005-04-16 15:20:36 -07:00
void __init acpi_numa_slit_init ( struct acpi_table_slit * slit )
{
2011-02-16 17:11:09 +01:00
int i , j ;
2008-07-10 20:36:37 -07:00
2014-01-21 14:33:15 -08:00
for ( i = 0 ; i < slit - > locality_count ; i + + ) {
2014-01-26 13:01:42 -08:00
const int from_node = pxm_to_node ( i ) ;
if ( from_node = = NUMA_NO_NODE )
2014-01-21 14:33:15 -08:00
continue ;
2014-01-26 13:01:42 -08:00
2014-01-21 14:33:15 -08:00
for ( j = 0 ; j < slit - > locality_count ; j + + ) {
2014-01-26 13:01:42 -08:00
const int to_node = pxm_to_node ( j ) ;
if ( to_node = = NUMA_NO_NODE )
2014-01-21 14:33:15 -08:00
continue ;
2014-01-26 13:01:42 -08:00
numa_set_distance ( from_node , to_node ,
2011-02-16 17:11:09 +01:00
slit - > entry [ slit - > locality_count * i + j ] ) ;
2014-01-21 14:33:15 -08:00
}
}
2005-04-16 15:20:36 -07:00
}
2009-03-30 13:55:30 -08:00
/* Callback for Proximity Domain -> x2APIC mapping */
void __init
acpi_numa_x2apic_affinity_init ( struct acpi_srat_x2apic_cpu_affinity * pa )
{
int pxm , node ;
int apic_id ;
if ( srat_disabled ( ) )
return ;
if ( pa - > header . length < sizeof ( struct acpi_srat_x2apic_cpu_affinity ) ) {
bad_srat ( ) ;
return ;
}
if ( ( pa - > flags & ACPI_SRAT_CPU_ENABLED ) = = 0 )
return ;
pxm = pa - > proximity_domain ;
2011-12-21 17:45:16 -08:00
apic_id = pa - > apic_id ;
2012-03-16 20:25:35 +01:00
if ( ! apic - > apic_id_valid ( apic_id ) ) {
2011-12-21 17:45:16 -08:00
printk ( KERN_INFO " SRAT: PXM %u -> X2APIC 0x%04x ignored \n " ,
pxm , apic_id ) ;
return ;
}
2009-03-30 13:55:30 -08:00
node = setup_node ( pxm ) ;
if ( node < 0 ) {
printk ( KERN_ERR " SRAT: Too many proximity domains %x \n " , pxm ) ;
bad_srat ( ) ;
return ;
}
2010-12-16 19:09:58 -08:00
if ( apic_id > = MAX_LOCAL_APIC ) {
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big \n " , pxm , apic_id , node ) ;
return ;
}
2011-01-23 14:37:39 +01:00
set_apicid_to_node ( apic_id , node ) ;
2011-02-16 17:11:09 +01:00
node_set ( node , numa_nodes_parsed ) ;
2009-03-30 13:55:30 -08:00
acpi_numa = 1 ;
2009-11-21 00:23:37 -08:00
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%04x -> Node %u \n " ,
2009-03-30 13:55:30 -08:00
pxm , apic_id , node ) ;
}
2005-04-16 15:20:36 -07:00
/* Callback for Proximity Domain -> LAPIC mapping */
void __init
2007-02-02 19:48:22 +03:00
acpi_numa_processor_affinity_init ( struct acpi_srat_cpu_affinity * pa )
2005-04-16 15:20:36 -07:00
{
int pxm , node ;
2008-01-30 13:33:10 +01:00
int apic_id ;
2006-02-03 21:51:26 +01:00
if ( srat_disabled ( ) )
return ;
2007-02-02 19:48:22 +03:00
if ( pa - > header . length ! = sizeof ( struct acpi_srat_cpu_affinity ) ) {
2006-05-15 18:19:44 +02:00
bad_srat ( ) ;
2006-02-03 21:51:26 +01:00
return ;
}
2007-02-02 19:48:22 +03:00
if ( ( pa - > flags & ACPI_SRAT_CPU_ENABLED ) = = 0 )
2005-04-16 15:20:36 -07:00
return ;
2007-02-02 19:48:22 +03:00
pxm = pa - > proximity_domain_lo ;
2012-01-17 04:20:31 -05:00
if ( acpi_srat_revision > = 2 )
pxm | = * ( ( unsigned int * ) pa - > proximity_domain_hi ) < < 8 ;
2005-04-16 15:20:36 -07:00
node = setup_node ( pxm ) ;
if ( node < 0 ) {
printk ( KERN_ERR " SRAT: Too many proximity domains %x \n " , pxm ) ;
bad_srat ( ) ;
return ;
}
2008-02-16 23:00:22 -08:00
2008-09-23 15:37:13 -05:00
if ( get_uv_system_type ( ) > = UV_X2APIC )
2008-03-28 14:12:08 -05:00
apic_id = ( pa - > apic_id < < 8 ) | pa - > local_sapic_eid ;
else
apic_id = pa - > apic_id ;
2010-12-16 19:09:58 -08:00
if ( apic_id > = MAX_LOCAL_APIC ) {
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big \n " , pxm , apic_id , node ) ;
return ;
}
2011-01-23 14:37:39 +01:00
set_apicid_to_node ( apic_id , node ) ;
2011-02-16 17:11:09 +01:00
node_set ( node , numa_nodes_parsed ) ;
2005-04-16 15:20:36 -07:00
acpi_numa = 1 ;
2009-11-21 00:23:37 -08:00
printk ( KERN_INFO " SRAT: PXM %u -> APIC 0x%02x -> Node %u \n " ,
2008-01-30 13:33:10 +01:00
pxm , apic_id , node ) ;
2005-04-16 15:20:36 -07:00
}
2011-05-02 14:18:51 +02:00
# ifdef CONFIG_MEMORY_HOTPLUG
2006-09-30 23:27:05 -07:00
static inline int save_add_info ( void ) { return 1 ; }
# else
static inline int save_add_info ( void ) { return 0 ; }
# endif
2006-04-07 19:49:18 +02:00
2005-04-16 15:20:36 -07:00
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
2012-07-31 17:41:09 +02:00
int __init
2007-02-02 19:48:22 +03:00
acpi_numa_memory_affinity_init ( struct acpi_srat_mem_affinity * ma )
2005-04-16 15:20:36 -07:00
{
2011-05-02 14:18:52 +02:00
u64 start , end ;
2013-08-14 17:37:06 +08:00
u32 hotpluggable ;
2005-04-16 15:20:36 -07:00
int node , pxm ;
2006-02-03 21:51:26 +01:00
if ( srat_disabled ( ) )
2013-01-08 16:18:41 -08:00
goto out_err ;
if ( ma - > header . length ! = sizeof ( struct acpi_srat_mem_affinity ) )
goto out_err_bad_srat ;
2007-02-02 19:48:22 +03:00
if ( ( ma - > flags & ACPI_SRAT_MEM_ENABLED ) = = 0 )
2013-01-08 16:18:41 -08:00
goto out_err ;
2013-08-14 17:37:06 +08:00
hotpluggable = ma - > flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ;
if ( hotpluggable & & ! save_add_info ( ) )
2013-01-08 16:18:41 -08:00
goto out_err ;
2007-02-02 19:48:22 +03:00
start = ma - > base_address ;
end = start + ma - > length ;
2005-04-16 15:20:36 -07:00
pxm = ma - > proximity_domain ;
2012-01-17 04:20:31 -05:00
if ( acpi_srat_revision < = 1 )
pxm & = 0xff ;
2013-01-08 16:18:41 -08:00
2005-04-16 15:20:36 -07:00
node = setup_node ( pxm ) ;
if ( node < 0 ) {
printk ( KERN_ERR " SRAT: Too many proximity domains. \n " ) ;
2013-01-08 16:18:41 -08:00
goto out_err_bad_srat ;
2005-04-16 15:20:36 -07:00
}
2011-02-16 17:11:07 +01:00
2013-01-08 16:18:41 -08:00
if ( numa_add_memblk ( node , start , end ) < 0 )
goto out_err_bad_srat ;
2006-04-07 19:49:18 +02:00
x86/numa: Set numa_nodes_parsed at acpi_numa_memory_affinity_init()
When hot-adding a CPU, the system outputs following messages
since node_to_cpumask_map[2] was not allocated memory.
Booting Node 2 Processor 32 APIC 0xc0
node_to_cpumask_map[2] NULL
Pid: 0, comm: swapper/32 Tainted: G A 3.3.5-acd #21
Call Trace:
[<ffffffff81048845>] debug_cpumask_set_cpu+0x155/0x160
[<ffffffff8105e28a>] ? add_timer_on+0xaa/0x120
[<ffffffff8150665f>] numa_add_cpu+0x1e/0x22
[<ffffffff815020bb>] identify_cpu+0x1df/0x1e4
[<ffffffff815020d6>] identify_econdary_cpu+0x16/0x1d
[<ffffffff81504614>] smp_store_cpu_info+0x3c/0x3e
[<ffffffff81505263>] smp_callin+0x139/0x1be
[<ffffffff815052fb>] start_secondary+0x13/0xeb
The reason is that the bit of node 2 was not set at
numa_nodes_parsed. numa_nodes_parsed is set by only
acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init. Thus even if hot-added memory
which is same PXM as hot-added CPU is written in ACPI SRAT
Table, if the hot-added CPU is not written in ACPI SRAT table,
numa_nodes_parsed is not set.
But according to ACPI Spec Rev 5.0, it says about ACPI SRAT
table as follows: This optional table provides information that
allows OSPM to associate processors and memory ranges, including
ranges of memory provided by hot-added memory devices, with
system localities / proximity domains and clock domains.
It means that ACPI SRAT table only provides information for CPUs
present at boot time and for memory including hot-added memory.
So hot-added memory is written in ACPI SRAT table, but hot-added
CPU is not written in it. Thus numa_nodes_parsed should be set
by not only acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init but also
acpi_numa_memory_affinity_init for the case.
Additionally, if system has cpuless memory node,
acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init cannot set numa_nodes_parseds
since these functions cannot find cpu description for the node.
In this case, numa_nodes_parsed needs to be set by
acpi_numa_memory_affinity_init.
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: liuj97@gmail.com
Cc: kosaki.motohiro@gmail.com
Link: http://lkml.kernel.org/r/4FCC2098.4030007@jp.fujitsu.com
[ merged it ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2012-06-04 11:42:32 +09:00
node_set ( node , numa_nodes_parsed ) ;
2013-08-14 17:37:06 +08:00
pr_info ( " SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s \n " ,
node , pxm ,
( unsigned long long ) start , ( unsigned long long ) end - 1 ,
hotpluggable ? " hotplug " : " " ) ;
2013-01-08 16:18:41 -08:00
2014-01-21 15:49:29 -08:00
/* Mark hotplug range in memblock. */
if ( hotpluggable & & memblock_mark_hotplug ( start , ma - > length ) )
pr_warn ( " SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock \n " ,
( unsigned long long ) start , ( unsigned long long ) end - 1 ) ;
2012-07-31 17:41:09 +02:00
return 0 ;
2013-01-08 16:18:41 -08:00
out_err_bad_srat :
bad_srat ( ) ;
out_err :
return - 1 ;
2005-04-16 15:20:36 -07:00
}
void __init acpi_numa_arch_fixup ( void ) { }
2011-02-16 12:13:06 +01:00
int __init x86_acpi_numa_init ( void )
{
int ret ;
ret = acpi_numa_init ( ) ;
if ( ret < 0 )
return ret ;
return srat_disabled ( ) ? - EINVAL : 0 ;
}