Gavin Shan 58ae0b5150 Documentation, dt, numa: Add note to empty NUMA node
The empty memory nodes, where no memory resides in, are allowed.
The NUMA node IDs are still valid and parsed, but memory may be
added to them through hotplug afterwards. Currently, QEMU fails
to boot when multiple empty memory nodes are specified. It's
caused by device-tree population failure and duplicated memory
node names.

The device-tree specification doesn't provide how empty NUMA
nodes are handled. Besides, I finds difficulty to get where
this case is documented. So lets add a section for empty memory
nodes to cover it in NUMA binding document.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Link: https://lore.kernel.org/r/20210927064119.127285-2-gshan@redhat.com
Signed-off-by: Rob Herring <robh@kernel.org>
2021-10-04 13:13:36 -05:00

320 lines
7.8 KiB
Plaintext

==============================================================================
NUMA binding description.
==============================================================================
==============================================================================
1 - Introduction
==============================================================================
Systems employing a Non Uniform Memory Access (NUMA) architecture contain
collections of hardware resources including processors, memory, and I/O buses,
that comprise what is commonly known as a NUMA node.
Processor accesses to memory within the local NUMA node is generally faster
than processor accesses to memory outside of the local NUMA node.
DT defines interfaces that allow the platform to convey NUMA node
topology information to OS.
==============================================================================
2 - numa-node-id
==============================================================================
For the purpose of identification, each NUMA node is associated with a unique
token known as a node id. For the purpose of this binding
a node id is a 32-bit integer.
A device node is associated with a NUMA node by the presence of a
numa-node-id property which contains the node id of the device.
Example:
/* numa node 0 */
numa-node-id = <0>;
/* numa node 1 */
numa-node-id = <1>;
==============================================================================
3 - distance-map
==============================================================================
The optional device tree node distance-map describes the relative
distance (memory latency) between all numa nodes.
- compatible : Should at least contain "numa-distance-map-v1".
- distance-matrix
This property defines a matrix to describe the relative distances
between all numa nodes.
It is represented as a list of node pairs and their relative distance.
Note:
1. Each entry represents distance from first node to second node.
The distances are equal in either direction.
2. The distance from a node to self (local distance) is represented
with value 10 and all internode distance should be represented with
a value greater than 10.
3. distance-matrix should have entries in lexicographical ascending
order of nodes.
4. There must be only one device node distance-map which must
reside in the root node.
5. If the distance-map node is not present, a default
distance-matrix is used.
Example:
4 nodes connected in mesh/ring topology as below,
0_______20______1
| |
| |
20 20
| |
| |
|_______________|
3 20 2
if relative distance for each hop is 20,
then internode distance would be,
0 -> 1 = 20
1 -> 2 = 20
2 -> 3 = 20
3 -> 0 = 20
0 -> 2 = 40
1 -> 3 = 40
and dt presentation for this distance matrix is,
distance-map {
compatible = "numa-distance-map-v1";
distance-matrix = <0 0 10>,
<0 1 20>,
<0 2 40>,
<0 3 20>,
<1 0 20>,
<1 1 10>,
<1 2 20>,
<1 3 40>,
<2 0 40>,
<2 1 20>,
<2 2 10>,
<2 3 20>,
<3 0 20>,
<3 1 40>,
<3 2 20>,
<3 3 10>;
};
==============================================================================
4 - Empty memory nodes
==============================================================================
Empty memory nodes, which no memory resides in, are allowed. There are no
device nodes for these empty memory nodes. However, the NUMA node IDs and
distance maps are still valid and memory may be added into them through
hotplug afterwards.
Example:
memory@0 {
device_type = "memory";
reg = <0x0 0x0 0x0 0x80000000>;
numa-node-id = <0>;
};
memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x80000000>;
numa-node-id = <1>;
};
/* Empty memory node 2 and 3 */
distance-map {
compatible = "numa-distance-map-v1";
distance-matrix = <0 0 10>,
<0 1 20>,
<0 2 40>,
<0 3 20>,
<1 0 20>,
<1 1 10>,
<1 2 20>,
<1 3 40>,
<2 0 40>,
<2 1 20>,
<2 2 10>,
<2 3 20>,
<3 0 20>,
<3 1 40>,
<3 2 20>,
<3 3 10>;
};
==============================================================================
5 - Example dts
==============================================================================
Dual socket system consists of 2 boards connected through ccn bus and
each board having one socket/soc of 8 cpus, memory and pci bus.
memory@c00000 {
device_type = "memory";
reg = <0x0 0xc00000 0x0 0x80000000>;
/* node 0 */
numa-node-id = <0>;
};
memory@10000000000 {
device_type = "memory";
reg = <0x100 0x0 0x0 0x80000000>;
/* node 1 */
numa-node-id = <1>;
};
cpus {
#address-cells = <2>;
#size-cells = <0>;
cpu@0 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x0>;
enable-method = "psci";
/* node 0 */
numa-node-id = <0>;
};
cpu@1 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x1>;
enable-method = "psci";
numa-node-id = <0>;
};
cpu@2 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x2>;
enable-method = "psci";
numa-node-id = <0>;
};
cpu@3 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x3>;
enable-method = "psci";
numa-node-id = <0>;
};
cpu@4 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x4>;
enable-method = "psci";
numa-node-id = <0>;
};
cpu@5 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x5>;
enable-method = "psci";
numa-node-id = <0>;
};
cpu@6 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x6>;
enable-method = "psci";
numa-node-id = <0>;
};
cpu@7 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x7>;
enable-method = "psci";
numa-node-id = <0>;
};
cpu@8 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x8>;
enable-method = "psci";
/* node 1 */
numa-node-id = <1>;
};
cpu@9 {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0x9>;
enable-method = "psci";
numa-node-id = <1>;
};
cpu@a {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0xa>;
enable-method = "psci";
numa-node-id = <1>;
};
cpu@b {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0xb>;
enable-method = "psci";
numa-node-id = <1>;
};
cpu@c {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0xc>;
enable-method = "psci";
numa-node-id = <1>;
};
cpu@d {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0xd>;
enable-method = "psci";
numa-node-id = <1>;
};
cpu@e {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0xe>;
enable-method = "psci";
numa-node-id = <1>;
};
cpu@f {
device_type = "cpu";
compatible = "arm,armv8";
reg = <0x0 0xf>;
enable-method = "psci";
numa-node-id = <1>;
};
};
pcie0: pcie0@848000000000 {
compatible = "arm,armv8";
device_type = "pci";
bus-range = <0 255>;
#size-cells = <2>;
#address-cells = <3>;
reg = <0x8480 0x00000000 0 0x10000000>; /* Configuration space */
ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>;
/* node 0 */
numa-node-id = <0>;
};
pcie1: pcie1@948000000000 {
compatible = "arm,armv8";
device_type = "pci";
bus-range = <0 255>;
#size-cells = <2>;
#address-cells = <3>;
reg = <0x9480 0x00000000 0 0x10000000>; /* Configuration space */
ranges = <0x03000000 0x9010 0x00000000 0x9010 0x00000000 0x70 0x00000000>;
/* node 1 */
numa-node-id = <1>;
};
distance-map {
compatible = "numa-distance-map-v1";
distance-matrix = <0 0 10>,
<0 1 20>,
<1 1 10>;
};