9a245d0e1f
cpu_cpu_map holds all the CPUs in the DIE. However in PowerPC, when onlining/offlining of CPUs, this mask doesn't get updated. This mask is however updated when CPUs are added/removed. So when both operations like online/offline of CPUs and adding/removing of CPUs are done simultaneously, then cpumaps end up broken. WARNING: CPU: 13 PID: 1142 at kernel/sched/topology.c:898 build_sched_domains+0xd48/0x1720 Modules linked in: rpadlpar_io rpaphp mptcp_diag xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag af_packet_diag netlink_diag bonding tls nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set rfkill nf_tables nfnetlink pseries_rng xts vmx_crypto uio_pdrv_genirq uio binfmt_misc ip_tables xfs libcrc32c dm_service_time sd_mod t10_pi sg ibmvfc scsi_transport_fc ibmveth dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 13 PID: 1142 Comm: kworker/13:2 Not tainted 5.13.0-rc6+ #28 Workqueue: events cpuset_hotplug_workfn NIP: c0000000001caac8 LR: c0000000001caac4 CTR: 00000000007088ec REGS: c00000005596f220 TRAP: 0700 Not tainted (5.13.0-rc6+) MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE> CR: 48828222 XER: 00000009 CFAR: c0000000001ea698 IRQMASK: 0 GPR00: c0000000001caac4 c00000005596f4c0 c000000001c4a400 0000000000000036 GPR04: 00000000fffdffff c00000005596f1d0 0000000000000027 c0000018cfd07f90 GPR08: 0000000000000023 0000000000000001 0000000000000027 c0000018fe68ffe8 GPR12: 0000000000008000 c00000001e9d1880 c00000013a047200 0000000000000800 GPR16: c000000001d3c7d0 0000000000000240 0000000000000048 c000000010aacd18 GPR20: 0000000000000001 c000000010aacc18 c00000013a047c00 c000000139ec2400 GPR24: 0000000000000280 c000000139ec2520 c000000136c1b400 c000000001c93060 GPR28: c00000013a047c20 c000000001d3c6c0 c000000001c978a0 000000000000000d NIP [c0000000001caac8] build_sched_domains+0xd48/0x1720 LR [c0000000001caac4] build_sched_domains+0xd44/0x1720 Call Trace: [c00000005596f4c0] [c0000000001caac4] build_sched_domains+0xd44/0x1720 (unreliable) [c00000005596f670] [c0000000001cc5ec] partition_sched_domains_locked+0x3ac/0x4b0 [c00000005596f710] [c0000000002804e4] rebuild_sched_domains_locked+0x404/0x9e0 [c00000005596f810] [c000000000283e60] rebuild_sched_domains+0x40/0x70 [c00000005596f840] [c000000000284124] cpuset_hotplug_workfn+0x294/0xf10 [c00000005596fc60] [c000000000175040] process_one_work+0x290/0x590 [c00000005596fd00] [c0000000001753c8] worker_thread+0x88/0x620 [c00000005596fda0] [c000000000181704] kthread+0x194/0x1a0 [c00000005596fe10] [c00000000000ccec] ret_from_kernel_thread+0x5c/0x70 Instruction dump: 485af049 60000000 2fa30800 409e0028 80fe0000 e89a00f8 e86100e8 38da0120 7f88e378 7ce53b78 4801fb91 60000000 <0fe00000> 39000000 38e00000 38c00000 Fix this by updating cpu_cpu_map aka cpumask_of_node() on every CPU online/offline. Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210826100521.412639-5-srikar@linux.vnet.ibm.com
152 lines
3.5 KiB
C
152 lines
3.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_POWERPC_TOPOLOGY_H
|
|
#define _ASM_POWERPC_TOPOLOGY_H
|
|
#ifdef __KERNEL__
|
|
|
|
|
|
struct device;
|
|
struct device_node;
|
|
struct drmem_lmb;
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
/*
|
|
* If zone_reclaim_mode is enabled, a RECLAIM_DISTANCE of 10 will mean that
|
|
* all zones on all nodes will be eligible for zone_reclaim().
|
|
*/
|
|
#define RECLAIM_DISTANCE 10
|
|
|
|
#include <asm/mmzone.h>
|
|
|
|
#define cpumask_of_node(node) ((node) == -1 ? \
|
|
cpu_all_mask : \
|
|
node_to_cpumask_map[node])
|
|
|
|
struct pci_bus;
|
|
#ifdef CONFIG_PCI
|
|
extern int pcibus_to_node(struct pci_bus *bus);
|
|
#else
|
|
static inline int pcibus_to_node(struct pci_bus *bus)
|
|
{
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
|
|
cpu_all_mask : \
|
|
cpumask_of_node(pcibus_to_node(bus)))
|
|
|
|
int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
|
|
extern int __node_distance(int, int);
|
|
#define node_distance(a, b) __node_distance(a, b)
|
|
|
|
extern void __init dump_numa_cpu_topology(void);
|
|
|
|
extern int sysfs_add_device_to_node(struct device *dev, int nid);
|
|
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
|
|
|
|
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
|
|
{
|
|
numa_cpu_lookup_table[cpu] = node;
|
|
}
|
|
|
|
static inline int early_cpu_to_node(int cpu)
|
|
{
|
|
int nid;
|
|
|
|
nid = numa_cpu_lookup_table[cpu];
|
|
|
|
/*
|
|
* Fall back to node 0 if nid is unset (it should be, except bugs).
|
|
* This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
|
|
*/
|
|
return (nid < 0) ? 0 : nid;
|
|
}
|
|
|
|
int of_drconf_to_nid_single(struct drmem_lmb *lmb);
|
|
void update_numa_distance(struct device_node *node);
|
|
|
|
extern void map_cpu_to_node(int cpu, int node);
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
extern void unmap_cpu_from_node(unsigned long cpu);
|
|
#endif /* CONFIG_HOTPLUG_CPU */
|
|
|
|
#else
|
|
|
|
static inline int early_cpu_to_node(int cpu) { return 0; }
|
|
|
|
static inline void dump_numa_cpu_topology(void) {}
|
|
|
|
static inline int sysfs_add_device_to_node(struct device *dev, int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void sysfs_remove_device_from_node(struct device *dev,
|
|
int nid)
|
|
{
|
|
}
|
|
|
|
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
|
|
|
|
static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
|
|
{
|
|
return first_online_node;
|
|
}
|
|
|
|
static inline void update_numa_distance(struct device_node *node) {}
|
|
|
|
#ifdef CONFIG_SMP
|
|
static inline void map_cpu_to_node(int cpu, int node) {}
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
static inline void unmap_cpu_from_node(unsigned long cpu) {}
|
|
#endif /* CONFIG_HOTPLUG_CPU */
|
|
#endif /* CONFIG_SMP */
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
|
|
extern int find_and_online_cpu_nid(int cpu);
|
|
extern int cpu_to_coregroup_id(int cpu);
|
|
#else
|
|
static inline int find_and_online_cpu_nid(int cpu)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int cpu_to_coregroup_id(int cpu)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
return cpu_to_core_id(cpu);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
|
|
|
|
#include <asm-generic/topology.h>
|
|
|
|
#ifdef CONFIG_SMP
|
|
#include <asm/cputable.h>
|
|
|
|
#ifdef CONFIG_PPC64
|
|
#include <asm/smp.h>
|
|
|
|
#define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu))
|
|
|
|
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
|
|
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
|
|
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
|
|
|
|
#endif
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_TOPOLOGY_H */
|