2016-04-08 15:50:25 -07:00
/*
* OF NUMA Parsing support .
*
* Copyright ( C ) 2015 - 2016 Cavium Inc .
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
2016-09-01 14:54:58 +08:00
# define pr_fmt(fmt) "OF: NUMA: " fmt
2016-04-08 15:50:25 -07:00
# include <linux/of.h>
# include <linux/of_address.h>
# include <linux/nodemask.h>
# include <asm/numa.h>
/* define default numa node to 0 */
# define DEFAULT_NODE 0
/*
* Even though we connect cpus to numa domains later in SMP
* init , we need to know the node ids now for all cpus .
*/
static void __init of_numa_parse_cpu_nodes ( void )
{
u32 nid ;
int r ;
struct device_node * cpus ;
struct device_node * np = NULL ;
cpus = of_find_node_by_path ( " /cpus " ) ;
if ( ! cpus )
return ;
for_each_child_of_node ( cpus , np ) {
/* Skip things that are not CPUs */
if ( of_node_cmp ( np - > type , " cpu " ) ! = 0 )
continue ;
r = of_property_read_u32 ( np , " numa-node-id " , & nid ) ;
if ( r )
continue ;
2016-09-01 14:54:58 +08:00
pr_debug ( " CPU on %u \n " , nid ) ;
2016-04-08 15:50:25 -07:00
if ( nid > = MAX_NUMNODES )
2016-09-01 14:54:58 +08:00
pr_warn ( " Node id %u exceeds maximum value \n " , nid ) ;
2016-04-08 15:50:25 -07:00
else
node_set ( nid , numa_nodes_parsed ) ;
}
2017-04-17 20:29:17 -04:00
of_node_put ( cpus ) ;
2016-04-08 15:50:25 -07:00
}
static int __init of_numa_parse_memory_nodes ( void )
{
struct device_node * np = NULL ;
struct resource rsrc ;
u32 nid ;
2016-09-01 14:54:53 +08:00
int i , r ;
2016-04-08 15:50:25 -07:00
2016-09-01 14:54:53 +08:00
for_each_node_by_type ( np , " memory " ) {
2016-04-08 15:50:25 -07:00
r = of_property_read_u32 ( np , " numa-node-id " , & nid ) ;
if ( r = = - EINVAL )
/*
* property doesn ' t exist if - EINVAL , continue
* looking for more memory nodes with
* " numa-node-id " property
*/
continue ;
2016-09-01 14:54:54 +08:00
if ( nid > = MAX_NUMNODES ) {
2016-09-01 14:54:58 +08:00
pr_warn ( " Node id %u exceeds maximum value \n " , nid ) ;
2016-09-01 14:54:54 +08:00
r = - EINVAL ;
}
2016-09-01 14:54:53 +08:00
for ( i = 0 ; ! r & & ! of_address_to_resource ( np , i , & rsrc ) ; i + + )
r = numa_add_memblk ( nid , rsrc . start , rsrc . end + 1 ) ;
if ( ! i | | r ) {
of_node_put ( np ) ;
2016-09-01 14:54:58 +08:00
pr_err ( " bad property in memory node \n " ) ;
2016-09-01 14:54:53 +08:00
return r ? : - EINVAL ;
}
2016-04-08 15:50:25 -07:00
}
2016-09-01 14:54:53 +08:00
return 0 ;
2016-04-08 15:50:25 -07:00
}
static int __init of_numa_parse_distance_map_v1 ( struct device_node * map )
{
const __be32 * matrix ;
int entry_count ;
int i ;
2016-09-01 14:54:58 +08:00
pr_info ( " parsing numa-distance-map-v1 \n " ) ;
2016-04-08 15:50:25 -07:00
matrix = of_get_property ( map , " distance-matrix " , NULL ) ;
if ( ! matrix ) {
2016-09-01 14:54:58 +08:00
pr_err ( " No distance-matrix property in distance-map \n " ) ;
2016-04-08 15:50:25 -07:00
return - EINVAL ;
}
entry_count = of_property_count_u32_elems ( map , " distance-matrix " ) ;
if ( entry_count < = 0 ) {
2016-09-01 14:54:58 +08:00
pr_err ( " Invalid distance-matrix \n " ) ;
2016-04-08 15:50:25 -07:00
return - EINVAL ;
}
for ( i = 0 ; i + 2 < entry_count ; i + = 3 ) {
u32 nodea , nodeb , distance ;
nodea = of_read_number ( matrix , 1 ) ;
matrix + + ;
nodeb = of_read_number ( matrix , 1 ) ;
matrix + + ;
distance = of_read_number ( matrix , 1 ) ;
matrix + + ;
numa_set_distance ( nodea , nodeb , distance ) ;
2016-09-01 14:54:58 +08:00
pr_debug ( " distance[node%d -> node%d] = %d \n " ,
2016-04-08 15:50:25 -07:00
nodea , nodeb , distance ) ;
/* Set default distance of node B->A same as A->B */
if ( nodeb > nodea )
numa_set_distance ( nodeb , nodea , distance ) ;
}
return 0 ;
}
static int __init of_numa_parse_distance_map ( void )
{
int ret = 0 ;
struct device_node * np ;
np = of_find_compatible_node ( NULL , NULL ,
" numa-distance-map-v1 " ) ;
if ( np )
ret = of_numa_parse_distance_map_v1 ( np ) ;
of_node_put ( np ) ;
return ret ;
}
int of_node_to_nid ( struct device_node * device )
{
struct device_node * np ;
u32 nid ;
int r = - ENODATA ;
np = of_node_get ( device ) ;
while ( np ) {
r = of_property_read_u32 ( np , " numa-node-id " , & nid ) ;
/*
* - EINVAL indicates the property was not found , and
* we walk up the tree trying to find a parent with a
* " numa-node-id " . Any other type of error indicates
* a bad device tree and we give up .
*/
if ( r ! = - EINVAL )
break ;
2016-09-01 14:54:57 +08:00
np = of_get_next_parent ( np ) ;
2016-04-08 15:50:25 -07:00
}
if ( np & & r )
2016-09-01 14:54:58 +08:00
pr_warn ( " Invalid \" numa-node-id \" property in node %s \n " ,
2016-04-08 15:50:25 -07:00
np - > name ) ;
of_node_put ( np ) ;
of, numa: Return NUMA_NO_NODE from disable of_node_to_nid() if nid not possible.
On arm64 NUMA kernels we can pass "numa=off" on the command line to
disable NUMA. A side effect of this is that kmalloc_node() calls to
non-zero nodes will crash the system with an OOPS:
[ 0.000000] ITS@0x0000901000020000: allocated 2097152 Devices @10002000000 (flat, esz 8, psz 64K, shr 1)
[ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00001680
[ 0.000000] pgd = fffffc0009470000
[ 0.000000] [00001680] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff90003, *pte=0000000000000000
[ 0.000000] Internal error: Oops: 96000006 [#1] SMP
.
.
.
[ 0.000000] [<fffffc00081c8950>] __alloc_pages_nodemask+0xa4/0xe68
[ 0.000000] [<fffffc000821fa70>] new_slab+0xd0/0x564
[ 0.000000] [<fffffc0008221e24>] ___slab_alloc+0x2e4/0x514
[ 0.000000] [<fffffc0008239498>] __slab_alloc+0x48/0x58
[ 0.000000] [<fffffc0008222c20>] __kmalloc_node+0xd0/0x2dc
[ 0.000000] [<fffffc0008115374>] __irq_domain_add+0x7c/0x164
[ 0.000000] [<fffffc0008b461dc>] its_probe+0x784/0x81c
[ 0.000000] [<fffffc0008b462bc>] its_init+0x48/0x1b0
[ 0.000000] [<fffffc0008b4543c>] gic_init_bases+0x228/0x360
[ 0.000000] [<fffffc0008b456bc>] gic_of_init+0x148/0x1cc
[ 0.000000] [<fffffc0008b5aec8>] of_irq_init+0x184/0x298
[ 0.000000] [<fffffc0008b43f9c>] irqchip_init+0x14/0x38
[ 0.000000] [<fffffc0008b12d60>] init_IRQ+0xc/0x30
[ 0.000000] [<fffffc0008b10a3c>] start_kernel+0x240/0x3b8
[ 0.000000] [<fffffc0008b101c4>] __primary_switched+0x30/0x6c
[ 0.000000] Code: 912ec2a0 b9403809 0a0902fb 37b007db (f9400300)
.
.
.
This is caused by code like this in kernel/irq/irqdomain.c
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
GFP_KERNEL, of_node_to_nid(of_node));
When NUMA is disabled, the concept of a node is really undefined, so
of_node_to_nid() should unconditionally return NUMA_NO_NODE.
Fix by returning NUMA_NO_NODE when the nid is not in the set of
possible nodes.
Reported-by: Gilbert Netzer <noname@pdc.kth.se>
Signed-off-by: David Daney <david.daney@cavium.com>
Cc: stable@vger.kernel.org # 4.7+
Signed-off-by: Rob Herring <robh@kernel.org>
2016-10-28 14:15:02 -07:00
/*
* If numa = off passed on command line , or with a defective
* device tree , the nid may not be in the set of possible
* nodes . Check for this case and return NUMA_NO_NODE .
*/
if ( ! r & & nid < MAX_NUMNODES & & node_possible ( nid ) )
2016-09-01 14:54:55 +08:00
return nid ;
2016-04-08 15:50:25 -07:00
return NUMA_NO_NODE ;
}
EXPORT_SYMBOL ( of_node_to_nid ) ;
int __init of_numa_init ( void )
{
int r ;
of_numa_parse_cpu_nodes ( ) ;
r = of_numa_parse_memory_nodes ( ) ;
if ( r )
return r ;
return of_numa_parse_distance_map ( ) ;
}