e5e179aa3a
At memory hot-remove time we can retrieve an LMB's nid from its corresponding memory_block. There is no need to store the nid in multiple locations. Note that lmb_to_memblock() uses find_memory_block() to get the corresponding memory_block. As find_memory_block() runs in sub-linear time this approach is negligibly slower than what we do at present. In exchange for this lookup at hot-remove time we no longer need to call memory_add_physaddr_to_nid() during drmem_init() for each LMB. On powerpc, memory_add_physaddr_to_nid() is a linear search, so this spares us an O(n^2) initialization during boot. On systems with many LMBs that initialization overhead is palpable and disruptive. For example, on a box with 249854 LMBs we're seeing drmem_init() take upwards of 30 seconds to complete: [ 53.721639] drmem: initializing drmem v2 [ 80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1] [ 80.604377] Modules linked in: [ 80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4 [ 80.604397] NIP: c0000000000a4980 LR: c0000000000a4940 CTR: 0000000000000000 [ 80.604407] REGS: c0002dbff8493830 TRAP: 0901 Not tainted (5.6.0-rc2+) [ 80.604412] MSR: 8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR: 44000248 XER: 0000000d [ 80.604431] CFAR: c0000000000a4a38 IRQMASK: 0 [ 80.604431] GPR00: c0000000000a4940 c0002dbff8493ac0 c000000001904400 c0003cfffffede30 [ 80.604431] GPR04: 0000000000000000 c000000000f4095a 000000000000002f 0000000010000000 [ 80.604431] GPR08: c0000bf7ecdb7fb8 c0000bf7ecc2d3c8 0000000000000008 c00c0002fdfb2001 [ 80.604431] GPR12: 0000000000000000 c00000001e8ec200 [ 80.604477] NIP [c0000000000a4980] hot_add_scn_to_nid+0xa0/0x3e0 [ 80.604486] LR [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0 [ 80.604492] Call Trace: [ 80.604498] [c0002dbff8493ac0] [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0 (unreliable) [ 80.604509] [c0002dbff8493b20] [c000000000087c10] memory_add_physaddr_to_nid+0x20/0x60 [ 80.604521] [c0002dbff8493b40] [c0000000010d4880] drmem_init+0x25c/0x2f0 [ 80.604530] [c0002dbff8493c10] [c000000000010154] do_one_initcall+0x64/0x2c0 [ 80.604540] [c0002dbff8493ce0] [c0000000010c4aa0] kernel_init_freeable+0x2d8/0x3a0 [ 80.604550] [c0002dbff8493db0] [c000000000010824] kernel_init+0x2c/0x148 [ 80.604560] [c0002dbff8493e20] [c00000000000b648] ret_from_kernel_thread+0x5c/0x74 [ 80.604567] Instruction dump: [ 80.604574] 392918e8 e9490000 e90a000a e92a0000 80ea000c 1d080018 3908ffe8 7d094214 [ 80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e9490000 7fbe5040 [ 89.047390] drmem: 249854 LMB(s) With a patched kernel on the same machine we're no longer seeing the soft lockup. drmem_init() now completes in negligible time, even when the LMB count is large. Fixes: b2d3b5ee66f2 ("powerpc/pseries: Track LMB nid instead of using device tree") Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com> Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20200811015115.63677-1-cheloha@linux.ibm.com
471 lines
10 KiB
C
471 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Dynamic reconfiguration memory support
|
|
*
|
|
* Copyright 2017 IBM Corporation
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "drmem: " fmt
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/of.h>
|
|
#include <linux/of_fdt.h>
|
|
#include <linux/memblock.h>
|
|
#include <asm/prom.h>
|
|
#include <asm/drmem.h>
|
|
|
|
static int n_root_addr_cells, n_root_size_cells;
|
|
|
|
static struct drmem_lmb_info __drmem_info;
|
|
struct drmem_lmb_info *drmem_info = &__drmem_info;
|
|
|
|
u64 drmem_lmb_memory_max(void)
|
|
{
|
|
struct drmem_lmb *last_lmb;
|
|
|
|
last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
|
|
return last_lmb->base_addr + drmem_lmb_size();
|
|
}
|
|
|
|
static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
|
|
{
|
|
/*
|
|
* Return the value of the lmb flags field minus the reserved
|
|
* bit used internally for hotplug processing.
|
|
*/
|
|
return lmb->flags & ~DRMEM_LMB_RESERVED;
|
|
}
|
|
|
|
static struct property *clone_property(struct property *prop, u32 prop_sz)
|
|
{
|
|
struct property *new_prop;
|
|
|
|
new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
|
|
if (!new_prop)
|
|
return NULL;
|
|
|
|
new_prop->name = kstrdup(prop->name, GFP_KERNEL);
|
|
new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
|
|
if (!new_prop->name || !new_prop->value) {
|
|
kfree(new_prop->name);
|
|
kfree(new_prop->value);
|
|
kfree(new_prop);
|
|
return NULL;
|
|
}
|
|
|
|
new_prop->length = prop_sz;
|
|
#if defined(CONFIG_OF_DYNAMIC)
|
|
of_property_set_flag(new_prop, OF_DYNAMIC);
|
|
#endif
|
|
return new_prop;
|
|
}
|
|
|
|
static int drmem_update_dt_v1(struct device_node *memory,
|
|
struct property *prop)
|
|
{
|
|
struct property *new_prop;
|
|
struct of_drconf_cell_v1 *dr_cell;
|
|
struct drmem_lmb *lmb;
|
|
u32 *p;
|
|
|
|
new_prop = clone_property(prop, prop->length);
|
|
if (!new_prop)
|
|
return -1;
|
|
|
|
p = new_prop->value;
|
|
*p++ = cpu_to_be32(drmem_info->n_lmbs);
|
|
|
|
dr_cell = (struct of_drconf_cell_v1 *)p;
|
|
|
|
for_each_drmem_lmb(lmb) {
|
|
dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
|
|
dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
|
|
dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
|
|
dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
|
|
|
|
dr_cell++;
|
|
}
|
|
|
|
of_update_property(memory, new_prop);
|
|
return 0;
|
|
}
|
|
|
|
static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
|
|
struct drmem_lmb *lmb)
|
|
{
|
|
dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
|
|
dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
|
|
dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
|
|
dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
|
|
}
|
|
|
|
static int drmem_update_dt_v2(struct device_node *memory,
|
|
struct property *prop)
|
|
{
|
|
struct property *new_prop;
|
|
struct of_drconf_cell_v2 *dr_cell;
|
|
struct drmem_lmb *lmb, *prev_lmb;
|
|
u32 lmb_sets, prop_sz, seq_lmbs;
|
|
u32 *p;
|
|
|
|
/* First pass, determine how many LMB sets are needed. */
|
|
lmb_sets = 0;
|
|
prev_lmb = NULL;
|
|
for_each_drmem_lmb(lmb) {
|
|
if (!prev_lmb) {
|
|
prev_lmb = lmb;
|
|
lmb_sets++;
|
|
continue;
|
|
}
|
|
|
|
if (prev_lmb->aa_index != lmb->aa_index ||
|
|
drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))
|
|
lmb_sets++;
|
|
|
|
prev_lmb = lmb;
|
|
}
|
|
|
|
prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
|
|
new_prop = clone_property(prop, prop_sz);
|
|
if (!new_prop)
|
|
return -1;
|
|
|
|
p = new_prop->value;
|
|
*p++ = cpu_to_be32(lmb_sets);
|
|
|
|
dr_cell = (struct of_drconf_cell_v2 *)p;
|
|
|
|
/* Second pass, populate the LMB set data */
|
|
prev_lmb = NULL;
|
|
seq_lmbs = 0;
|
|
for_each_drmem_lmb(lmb) {
|
|
if (prev_lmb == NULL) {
|
|
/* Start of first LMB set */
|
|
prev_lmb = lmb;
|
|
init_drconf_v2_cell(dr_cell, lmb);
|
|
seq_lmbs++;
|
|
continue;
|
|
}
|
|
|
|
if (prev_lmb->aa_index != lmb->aa_index ||
|
|
drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {
|
|
/* end of one set, start of another */
|
|
dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
|
|
dr_cell++;
|
|
|
|
init_drconf_v2_cell(dr_cell, lmb);
|
|
seq_lmbs = 1;
|
|
} else {
|
|
seq_lmbs++;
|
|
}
|
|
|
|
prev_lmb = lmb;
|
|
}
|
|
|
|
/* close out last LMB set */
|
|
dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
|
|
of_update_property(memory, new_prop);
|
|
return 0;
|
|
}
|
|
|
|
int drmem_update_dt(void)
|
|
{
|
|
struct device_node *memory;
|
|
struct property *prop;
|
|
int rc = -1;
|
|
|
|
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
|
if (!memory)
|
|
return -1;
|
|
|
|
prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
|
|
if (prop) {
|
|
rc = drmem_update_dt_v1(memory, prop);
|
|
} else {
|
|
prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL);
|
|
if (prop)
|
|
rc = drmem_update_dt_v2(memory, prop);
|
|
}
|
|
|
|
of_node_put(memory);
|
|
return rc;
|
|
}
|
|
|
|
static void read_drconf_v1_cell(struct drmem_lmb *lmb,
|
|
const __be32 **prop)
|
|
{
|
|
const __be32 *p = *prop;
|
|
|
|
lmb->base_addr = of_read_number(p, n_root_addr_cells);
|
|
p += n_root_addr_cells;
|
|
lmb->drc_index = of_read_number(p++, 1);
|
|
|
|
p++; /* skip reserved field */
|
|
|
|
lmb->aa_index = of_read_number(p++, 1);
|
|
lmb->flags = of_read_number(p++, 1);
|
|
|
|
*prop = p;
|
|
}
|
|
|
|
static int
|
|
__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data,
|
|
int (*func)(struct drmem_lmb *, const __be32 **, void *))
|
|
{
|
|
struct drmem_lmb lmb;
|
|
u32 i, n_lmbs;
|
|
int ret = 0;
|
|
|
|
n_lmbs = of_read_number(prop++, 1);
|
|
for (i = 0; i < n_lmbs; i++) {
|
|
read_drconf_v1_cell(&lmb, &prop);
|
|
ret = func(&lmb, &usm, data);
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
|
|
const __be32 **prop)
|
|
{
|
|
const __be32 *p = *prop;
|
|
|
|
dr_cell->seq_lmbs = of_read_number(p++, 1);
|
|
dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
|
|
p += n_root_addr_cells;
|
|
dr_cell->drc_index = of_read_number(p++, 1);
|
|
dr_cell->aa_index = of_read_number(p++, 1);
|
|
dr_cell->flags = of_read_number(p++, 1);
|
|
|
|
*prop = p;
|
|
}
|
|
|
|
static int
|
|
__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data,
|
|
int (*func)(struct drmem_lmb *, const __be32 **, void *))
|
|
{
|
|
struct of_drconf_cell_v2 dr_cell;
|
|
struct drmem_lmb lmb;
|
|
u32 i, j, lmb_sets;
|
|
int ret = 0;
|
|
|
|
lmb_sets = of_read_number(prop++, 1);
|
|
for (i = 0; i < lmb_sets; i++) {
|
|
read_drconf_v2_cell(&dr_cell, &prop);
|
|
|
|
for (j = 0; j < dr_cell.seq_lmbs; j++) {
|
|
lmb.base_addr = dr_cell.base_addr;
|
|
dr_cell.base_addr += drmem_lmb_size();
|
|
|
|
lmb.drc_index = dr_cell.drc_index;
|
|
dr_cell.drc_index++;
|
|
|
|
lmb.aa_index = dr_cell.aa_index;
|
|
lmb.flags = dr_cell.flags;
|
|
|
|
ret = func(&lmb, &usm, data);
|
|
if (ret)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
|
int __init walk_drmem_lmbs_early(unsigned long node, void *data,
|
|
int (*func)(struct drmem_lmb *, const __be32 **, void *))
|
|
{
|
|
const __be32 *prop, *usm;
|
|
int len, ret = -ENODEV;
|
|
|
|
prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
|
|
if (!prop || len < dt_root_size_cells * sizeof(__be32))
|
|
return ret;
|
|
|
|
/* Get the address & size cells */
|
|
n_root_addr_cells = dt_root_addr_cells;
|
|
n_root_size_cells = dt_root_size_cells;
|
|
|
|
drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
|
|
|
|
usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
|
|
|
|
prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
|
|
if (prop) {
|
|
ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
|
|
} else {
|
|
prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
|
|
&len);
|
|
if (prop)
|
|
ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
|
|
}
|
|
|
|
memblock_dump_all();
|
|
return ret;
|
|
}
|
|
|
|
#endif
|
|
|
|
static int init_drmem_lmb_size(struct device_node *dn)
|
|
{
|
|
const __be32 *prop;
|
|
int len;
|
|
|
|
if (drmem_info->lmb_size)
|
|
return 0;
|
|
|
|
prop = of_get_property(dn, "ibm,lmb-size", &len);
|
|
if (!prop || len < n_root_size_cells * sizeof(__be32)) {
|
|
pr_info("Could not determine LMB size\n");
|
|
return -1;
|
|
}
|
|
|
|
drmem_info->lmb_size = of_read_number(prop, n_root_size_cells);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Returns the property linux,drconf-usable-memory if
|
|
* it exists (the property exists only in kexec/kdump kernels,
|
|
* added by kexec-tools)
|
|
*/
|
|
static const __be32 *of_get_usable_memory(struct device_node *dn)
|
|
{
|
|
const __be32 *prop;
|
|
u32 len;
|
|
|
|
prop = of_get_property(dn, "linux,drconf-usable-memory", &len);
|
|
if (!prop || len < sizeof(unsigned int))
|
|
return NULL;
|
|
|
|
return prop;
|
|
}
|
|
|
|
int walk_drmem_lmbs(struct device_node *dn, void *data,
|
|
int (*func)(struct drmem_lmb *, const __be32 **, void *))
|
|
{
|
|
const __be32 *prop, *usm;
|
|
int ret = -ENODEV;
|
|
|
|
if (!of_root)
|
|
return ret;
|
|
|
|
/* Get the address & size cells */
|
|
of_node_get(of_root);
|
|
n_root_addr_cells = of_n_addr_cells(of_root);
|
|
n_root_size_cells = of_n_size_cells(of_root);
|
|
of_node_put(of_root);
|
|
|
|
if (init_drmem_lmb_size(dn))
|
|
return ret;
|
|
|
|
usm = of_get_usable_memory(dn);
|
|
|
|
prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
|
|
if (prop) {
|
|
ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
|
|
} else {
|
|
prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
|
|
if (prop)
|
|
ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void __init init_drmem_v1_lmbs(const __be32 *prop)
|
|
{
|
|
struct drmem_lmb *lmb;
|
|
|
|
drmem_info->n_lmbs = of_read_number(prop++, 1);
|
|
if (drmem_info->n_lmbs == 0)
|
|
return;
|
|
|
|
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
|
|
GFP_KERNEL);
|
|
if (!drmem_info->lmbs)
|
|
return;
|
|
|
|
for_each_drmem_lmb(lmb)
|
|
read_drconf_v1_cell(lmb, &prop);
|
|
}
|
|
|
|
static void __init init_drmem_v2_lmbs(const __be32 *prop)
|
|
{
|
|
struct drmem_lmb *lmb;
|
|
struct of_drconf_cell_v2 dr_cell;
|
|
const __be32 *p;
|
|
u32 i, j, lmb_sets;
|
|
int lmb_index;
|
|
|
|
lmb_sets = of_read_number(prop++, 1);
|
|
if (lmb_sets == 0)
|
|
return;
|
|
|
|
/* first pass, calculate the number of LMBs */
|
|
p = prop;
|
|
for (i = 0; i < lmb_sets; i++) {
|
|
read_drconf_v2_cell(&dr_cell, &p);
|
|
drmem_info->n_lmbs += dr_cell.seq_lmbs;
|
|
}
|
|
|
|
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
|
|
GFP_KERNEL);
|
|
if (!drmem_info->lmbs)
|
|
return;
|
|
|
|
/* second pass, read in the LMB information */
|
|
lmb_index = 0;
|
|
p = prop;
|
|
|
|
for (i = 0; i < lmb_sets; i++) {
|
|
read_drconf_v2_cell(&dr_cell, &p);
|
|
|
|
for (j = 0; j < dr_cell.seq_lmbs; j++) {
|
|
lmb = &drmem_info->lmbs[lmb_index++];
|
|
|
|
lmb->base_addr = dr_cell.base_addr;
|
|
dr_cell.base_addr += drmem_info->lmb_size;
|
|
|
|
lmb->drc_index = dr_cell.drc_index;
|
|
dr_cell.drc_index++;
|
|
|
|
lmb->aa_index = dr_cell.aa_index;
|
|
lmb->flags = dr_cell.flags;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int __init drmem_init(void)
|
|
{
|
|
struct device_node *dn;
|
|
const __be32 *prop;
|
|
|
|
dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
|
if (!dn) {
|
|
pr_info("No dynamic reconfiguration memory found\n");
|
|
return 0;
|
|
}
|
|
|
|
if (init_drmem_lmb_size(dn)) {
|
|
of_node_put(dn);
|
|
return 0;
|
|
}
|
|
|
|
prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
|
|
if (prop) {
|
|
init_drmem_v1_lmbs(prop);
|
|
} else {
|
|
prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
|
|
if (prop)
|
|
init_drmem_v2_lmbs(prop);
|
|
}
|
|
|
|
of_node_put(dn);
|
|
return 0;
|
|
}
|
|
late_initcall(drmem_init);
|