2009-03-13 07:19:52 +03:00
/* Common code for 32 and 64-bit NUMA */
# include <linux/topology.h>
# include <linux/module.h>
# include <linux/bootmem.h>
2011-01-19 11:57:21 +03:00
# include <asm/numa.h>
# include <asm/acpi.h>
int __initdata numa_off ;
static __init int numa_setup ( char * opt )
{
if ( ! opt )
return - EINVAL ;
if ( ! strncmp ( opt , " off " , 3 ) )
numa_off = 1 ;
# ifdef CONFIG_NUMA_EMU
if ( ! strncmp ( opt , " fake= " , 5 ) )
numa_emu_cmdline ( opt + 5 ) ;
# endif
# ifdef CONFIG_ACPI_NUMA
if ( ! strncmp ( opt , " noacpi " , 6 ) )
acpi_numa = - 1 ;
# endif
return 0 ;
}
early_param ( " numa " , numa_setup ) ;
2009-03-13 07:19:52 +03:00
/*
2011-01-23 16:37:39 +03:00
* apicid , cpu , node mappings
2009-03-13 07:19:52 +03:00
*/
2011-01-23 16:37:39 +03:00
s16 __apicid_to_node [ MAX_LOCAL_APIC ] __cpuinitdata = {
[ 0 . . . MAX_LOCAL_APIC - 1 ] = NUMA_NO_NODE
} ;
2011-05-02 16:18:52 +04:00
int __cpuinit numa_cpu_node ( int cpu )
{
int apicid = early_per_cpu ( x86_cpu_to_apicid , cpu ) ;
if ( apicid ! = BAD_APICID )
return __apicid_to_node [ apicid ] ;
return NUMA_NO_NODE ;
}
2009-03-13 07:19:53 +03:00
cpumask_var_t node_to_cpumask_map [ MAX_NUMNODES ] ;
2009-03-13 07:19:52 +03:00
EXPORT_SYMBOL ( node_to_cpumask_map ) ;
2011-01-23 16:37:40 +03:00
/*
* Map cpu index to node index
*/
DEFINE_EARLY_PER_CPU ( int , x86_cpu_to_node_map , NUMA_NO_NODE ) ;
EXPORT_EARLY_PER_CPU_SYMBOL ( x86_cpu_to_node_map ) ;
void __cpuinit numa_set_node ( int cpu , int node )
{
int * cpu_to_node_map = early_per_cpu_ptr ( x86_cpu_to_node_map ) ;
/* early setting, no percpu area yet */
if ( cpu_to_node_map ) {
cpu_to_node_map [ cpu ] = node ;
return ;
}
# ifdef CONFIG_DEBUG_PER_CPU_MAPS
if ( cpu > = nr_cpu_ids | | ! cpu_possible ( cpu ) ) {
printk ( KERN_ERR " numa_set_node: invalid cpu# (%d) \n " , cpu ) ;
dump_stack ( ) ;
return ;
}
# endif
per_cpu ( x86_cpu_to_node_map , cpu ) = node ;
if ( node ! = NUMA_NO_NODE )
set_cpu_numa_node ( cpu , node ) ;
}
void __cpuinit numa_clear_node ( int cpu )
{
numa_set_node ( cpu , NUMA_NO_NODE ) ;
}
2009-03-13 07:19:52 +03:00
/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid .
*
* Note : node_to_cpumask ( ) is not valid until after this is done .
* ( Use CONFIG_DEBUG_PER_CPU_MAPS to check this . )
*/
void __init setup_node_to_cpumask_map ( void )
{
unsigned int node , num = 0 ;
/* setup nr_node_ids if not done yet */
if ( nr_node_ids = = MAX_NUMNODES ) {
for_each_node_mask ( node , node_possible_map )
num = node ;
nr_node_ids = num + 1 ;
}
/* allocate the map */
2009-03-13 07:19:53 +03:00
for ( node = 0 ; node < nr_node_ids ; node + + )
alloc_bootmem_cpumask_var ( & node_to_cpumask_map [ node ] ) ;
2009-03-13 07:19:52 +03:00
2009-03-13 07:19:53 +03:00
/* cpumask_of_node() will now work */
pr_debug ( " Node to cpumask map for %d nodes \n " , nr_node_ids ) ;
2009-03-13 07:19:52 +03:00
}
2011-01-23 16:37:42 +03:00
/*
* There are unfortunately some poorly designed mainboards around that
* only connect memory to a single CPU . This breaks the 1 : 1 cpu - > node
* mapping . To avoid this fill in the mapping for all possible CPUs ,
* as the number of CPUs is not known yet . We round robin the existing
* nodes .
*/
void __init numa_init_array ( void )
{
int rr , i ;
rr = first_node ( node_online_map ) ;
for ( i = 0 ; i < nr_cpu_ids ; i + + ) {
if ( early_cpu_to_node ( i ) ! = NUMA_NO_NODE )
continue ;
numa_set_node ( i , rr ) ;
rr = next_node ( rr , node_online_map ) ;
if ( rr = = MAX_NUMNODES )
rr = first_node ( node_online_map ) ;
}
}
static __init int find_near_online_node ( int node )
{
int n , val ;
int min_val = INT_MAX ;
int best_node = - 1 ;
for_each_online_node ( n ) {
val = node_distance ( node , n ) ;
if ( val < min_val ) {
min_val = val ;
best_node = n ;
}
}
return best_node ;
}
/*
* Setup early cpu_to_node .
*
* Populate cpu_to_node [ ] only if x86_cpu_to_apicid [ ] ,
* and apicid_to_node [ ] tables have valid entries for a CPU .
* This means we skip cpu_to_node [ ] initialisation for NUMA
* emulation and faking node case ( when running a kernel compiled
* for NUMA on a non NUMA box ) , which is OK as cpu_to_node [ ]
* is already initialized in a round robin manner at numa_init_array ,
* prior to this call , and this initialization is good enough
* for the fake NUMA cases .
*
* Called before the per_cpu areas are setup .
*/
void __init init_cpu_to_node ( void )
{
int cpu ;
u16 * cpu_to_apicid = early_per_cpu_ptr ( x86_cpu_to_apicid ) ;
BUG_ON ( cpu_to_apicid = = NULL ) ;
for_each_possible_cpu ( cpu ) {
int node = numa_cpu_node ( cpu ) ;
if ( node = = NUMA_NO_NODE )
continue ;
if ( ! node_online ( node ) )
node = find_near_online_node ( node ) ;
numa_set_node ( cpu , node ) ;
}
}
2011-01-23 16:37:41 +03:00
# ifndef CONFIG_DEBUG_PER_CPU_MAPS
# ifndef CONFIG_NUMA_EMU
void __cpuinit numa_add_cpu ( int cpu )
{
cpumask_set_cpu ( cpu , node_to_cpumask_map [ early_cpu_to_node ( cpu ) ] ) ;
}
void __cpuinit numa_remove_cpu ( int cpu )
{
cpumask_clear_cpu ( cpu , node_to_cpumask_map [ early_cpu_to_node ( cpu ) ] ) ;
}
# endif /* !CONFIG_NUMA_EMU */
# else /* !CONFIG_DEBUG_PER_CPU_MAPS */
2011-01-23 16:37:40 +03:00
int __cpu_to_node ( int cpu )
{
if ( early_per_cpu_ptr ( x86_cpu_to_node_map ) ) {
printk ( KERN_WARNING
" cpu_to_node(%d): usage too early! \n " , cpu ) ;
dump_stack ( ) ;
return early_per_cpu_ptr ( x86_cpu_to_node_map ) [ cpu ] ;
}
return per_cpu ( x86_cpu_to_node_map , cpu ) ;
}
EXPORT_SYMBOL ( __cpu_to_node ) ;
/*
* Same function as cpu_to_node ( ) but used if called before the
* per_cpu areas are setup .
*/
int early_cpu_to_node ( int cpu )
{
if ( early_per_cpu_ptr ( x86_cpu_to_node_map ) )
return early_per_cpu_ptr ( x86_cpu_to_node_map ) [ cpu ] ;
if ( ! cpu_possible ( cpu ) ) {
printk ( KERN_WARNING
" early_cpu_to_node(%d): no per_cpu area! \n " , cpu ) ;
dump_stack ( ) ;
return NUMA_NO_NODE ;
}
return per_cpu ( x86_cpu_to_node_map , cpu ) ;
}
2011-04-21 06:19:13 +04:00
void debug_cpumask_set_cpu ( int cpu , int node , bool enable )
2011-01-23 16:37:41 +03:00
{
struct cpumask * mask ;
char buf [ 64 ] ;
2011-02-08 01:08:53 +03:00
if ( node = = NUMA_NO_NODE ) {
/* early_cpu_to_node() already emits a warning and trace */
2011-04-21 06:19:13 +04:00
return ;
2011-02-08 01:08:53 +03:00
}
2011-01-23 16:37:41 +03:00
mask = node_to_cpumask_map [ node ] ;
if ( ! mask ) {
pr_err ( " node_to_cpumask_map[%i] NULL \n " , node ) ;
dump_stack ( ) ;
2011-04-21 06:19:13 +04:00
return ;
2011-01-23 16:37:41 +03:00
}
2011-04-21 06:19:13 +04:00
if ( enable )
cpumask_set_cpu ( cpu , mask ) ;
else
cpumask_clear_cpu ( cpu , mask ) ;
2011-01-23 16:37:41 +03:00
cpulist_scnprintf ( buf , sizeof ( buf ) , mask ) ;
printk ( KERN_DEBUG " %s cpu %d node %d: mask now %s \n " ,
enable ? " numa_add_cpu " : " numa_remove_cpu " ,
cpu , node , buf ) ;
2011-04-21 06:19:13 +04:00
return ;
2011-01-23 16:37:41 +03:00
}
# ifndef CONFIG_NUMA_EMU
2011-04-21 06:19:13 +04:00
static void __cpuinit numa_set_cpumask ( int cpu , bool enable )
2011-01-23 16:37:41 +03:00
{
2011-04-21 06:19:13 +04:00
debug_cpumask_set_cpu ( cpu , early_cpu_to_node ( cpu ) , enable ) ;
2011-01-23 16:37:41 +03:00
}
void __cpuinit numa_add_cpu ( int cpu )
{
2011-04-21 06:19:13 +04:00
numa_set_cpumask ( cpu , true ) ;
2011-01-23 16:37:41 +03:00
}
void __cpuinit numa_remove_cpu ( int cpu )
{
2011-04-21 06:19:13 +04:00
numa_set_cpumask ( cpu , false ) ;
2011-01-23 16:37:41 +03:00
}
# endif /* !CONFIG_NUMA_EMU */
2009-03-13 07:19:52 +03:00
/*
* Returns a pointer to the bitmask of CPUs on Node ' node ' .
*/
2009-03-13 07:19:57 +03:00
const struct cpumask * cpumask_of_node ( int node )
2009-03-13 07:19:52 +03:00
{
if ( node > = nr_node_ids ) {
printk ( KERN_WARNING
" cpumask_of_node(%d): node > nr_node_ids(%d) \n " ,
node , nr_node_ids ) ;
dump_stack ( ) ;
return cpu_none_mask ;
}
2009-03-13 07:19:53 +03:00
if ( node_to_cpumask_map [ node ] = = NULL ) {
printk ( KERN_WARNING
" cpumask_of_node(%d): no node_to_cpumask_map! \n " ,
node ) ;
dump_stack ( ) ;
return cpu_online_mask ;
}
2009-03-13 16:12:42 +03:00
return node_to_cpumask_map [ node ] ;
2009-03-13 07:19:52 +03:00
}
EXPORT_SYMBOL ( cpumask_of_node ) ;
2011-01-23 16:37:40 +03:00
2011-01-23 16:37:41 +03:00
# endif /* !CONFIG_DEBUG_PER_CPU_MAPS */