On Power9, a pair of SMT4 cores can be presented by the firmware as a SMT8 core for backward compatibility reasons, with the fusion of two SMT4 cores. Powerpc allows LPARs to be live migrated from Power8 to Power9. Existing software developed/configured for Power8, expects to see a SMT8 core. In order to maintain userspace backward compatibility (with Power8 chips in case of Power9) in enterprise Linux systems, the topology_sibling_cpumask has to be set to SMT8 core. cpu_smt_mask() should generally point to the cpu mask of the SMT4 core. Hence override the default cpu_smt_mask() to be powerpc specific allowing for better scheduling behaviour on Power. schbench (latency measured in usecs, so lesser is better) Without patch With patch Latency percentiles (usec) Latency percentiles (usec) 50.0000th: 34 50.0000th: 38 75.0000th: 47 75.0000th: 52 90.0000th: 54 90.0000th: 60 95.0000th: 57 95.0000th: 64 *99.0000th: 62 *99.0000th: 72 99.5000th: 65 99.5000th: 75 99.9000th: 76 99.9000th: 3452 min=0, max=9205 min=0, max=9344 schbench (With Cede disabled) Without patch With patch Latency percentiles (usec) Latency percentiles (usec) 50.0000th: 20 50.0000th: 21 75.0000th: 28 75.0000th: 29 90.0000th: 33 90.0000th: 34 95.0000th: 35 95.0000th: 37 *99.0000th: 40 *99.0000th: 40 99.5000th: 48 99.5000th: 42 99.9000th: 94 99.9000th: 79 min=0, max=791 min=0, max=791 perf bench sched pipe usec/ops : lesser is better Without patch N Min Max Median Avg Stddev 101 5.095113 5.595269 5.204842 5.2298776 0.10762713 5.10 - 5.15 : ################################################## 23% (24) 5.15 - 5.20 : ############################################# 21% (22) 5.20 - 5.25 : ################################################## 23% (24) 5.25 - 5.30 : ######################### 11% (12) 5.30 - 5.35 : ########## 4% (5) 5.35 - 5.40 : ######## 3% (4) 5.40 - 5.45 : ######## 3% (4) 5.45 - 5.50 : #### 1% (2) 5.50 - 5.55 : ## 0% (1) 5.55 - 5.60 : #### 1% (2) With patch N Min Max Median Avg Stddev 101 5.134675 8.524719 5.207658 5.2780985 0.34911969 5.1 - 5.5 : ################################################## 94% (95) 5.5 - 5.8 : ## 3% (4) 5.8 - 6.2 : 0% (1) 6.2 - 6.5 : 6.5 - 6.8 : 6.8 - 7.2 : 7.2 - 7.5 : 7.5 - 7.8 : 7.8 - 8.2 : 8.2 - 8.5 : perf bench sched pipe (cede disabled) usec/ops : lesser is better Without patch N Min Max Median Avg Stddev 101 7.884227 12.576538 7.956474 8.0170722 0.46159054 7.9 - 8.4 : ################################################## 99% (100) 8.4 - 8.8 : 8.8 - 9.3 : 9.3 - 9.8 : 9.8 - 10.2 : 10.2 - 10.7 : 10.7 - 11.2 : 11.2 - 11.6 : 11.6 - 12.1 : 12.1 - 12.6 : With patch N Min Max Median Avg Stddev 101 7.956021 8.217284 8.015615 8.0283866 0.049844967 7.96 - 7.98 : ###################### 12% (13) 7.98 - 8.01 : ################################################## 28% (29) 8.01 - 8.03 : #################################### 20% (21) 8.03 - 8.06 : ######################### 14% (15) 8.06 - 8.09 : ###################### 12% (13) 8.09 - 8.11 : ###### 3% (4) 8.11 - 8.14 : ### 1% (2) 8.14 - 8.17 : ### 1% (2) 8.17 - 8.19 : 8.19 - 8.22 : # 0% (1) Observations: With the patch, the initial run/iteration takes a slight longer time. This can be attributed to the fact that now we pick a CPU from a idle core which could be sleep mode. Once we remove the cede, state the numbers improve in favour of the patch. ebizzy: transactions per second (higher is better) without patch N Min Max Median Avg Stddev 100 1018433 1304470 1193208 1182315.7 60018.733 1018433 - 1047037 : ###### 3% (3) 1047037 - 1075640 : ######## 4% (4) 1075640 - 1104244 : ######## 4% (4) 1104244 - 1132848 : ############### 7% (7) 1132848 - 1161452 : #################################### 17% (17) 1161452 - 1190055 : ########################## 12% (12) 1190055 - 1218659 : ############################################# 21% (21) 1218659 - 1247263 : ################################################## 23% (23) 1247263 - 1275866 : ######## 4% (4) 1275866 - 1304470 : ######## 4% (4) with patch N Min Max Median Avg Stddev 100 967014 1292938 1208819 1185281.8 69815.851 967014 - 999606 : ## 1% (1) 999606 - 1032199 : ## 1% (1) 1032199 - 1064791 : ############ 6% (6) 1064791 - 1097384 : ########## 5% (5) 1097384 - 1129976 : ################## 9% (9) 1129976 - 1162568 : #################### 10% (10) 1162568 - 1195161 : ########################## 13% (13) 1195161 - 1227753 : ############################################ 22% (22) 1227753 - 1260346 : ################################################## 25% (25) 1260346 - 1292938 : ############## 7% (7) Observations: Not much changes, ebizzy is not much impacted. Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20200807074517.27957-2-srikar@linux.vnet.ibm.com
269 lines
7.1 KiB
C
269 lines
7.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* smp.h: PowerPC-specific SMP code.
|
|
*
|
|
* Original was a copy of sparc smp.h. Now heavily modified
|
|
* for PPC.
|
|
*
|
|
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
|
|
* Copyright (C) 1996-2001 Cort Dougan <cort@fsmlabs.com>
|
|
*/
|
|
|
|
#ifndef _ASM_POWERPC_SMP_H
|
|
#define _ASM_POWERPC_SMP_H
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/threads.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/irqreturn.h>
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#ifdef CONFIG_PPC64
|
|
#include <asm/paca.h>
|
|
#endif
|
|
#include <asm/percpu.h>
|
|
|
|
extern int boot_cpuid;
|
|
extern int spinning_secondaries;
|
|
extern u32 *cpu_to_phys_id;
|
|
|
|
extern void cpu_die(void);
|
|
extern int cpu_to_chip_id(int cpu);
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
struct smp_ops_t {
|
|
void (*message_pass)(int cpu, int msg);
|
|
#ifdef CONFIG_PPC_SMP_MUXED_IPI
|
|
void (*cause_ipi)(int cpu);
|
|
#endif
|
|
int (*cause_nmi_ipi)(int cpu);
|
|
void (*probe)(void);
|
|
int (*kick_cpu)(int nr);
|
|
int (*prepare_cpu)(int nr);
|
|
void (*setup_cpu)(int nr);
|
|
void (*bringup_done)(void);
|
|
void (*take_timebase)(void);
|
|
void (*give_timebase)(void);
|
|
int (*cpu_disable)(void);
|
|
void (*cpu_die)(unsigned int nr);
|
|
int (*cpu_bootable)(unsigned int nr);
|
|
};
|
|
|
|
extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
|
|
extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
|
|
extern void smp_send_debugger_break(void);
|
|
extern void start_secondary_resume(void);
|
|
extern void smp_generic_give_timebase(void);
|
|
extern void smp_generic_take_timebase(void);
|
|
|
|
DECLARE_PER_CPU(unsigned int, cpu_pvr);
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
int generic_cpu_disable(void);
|
|
void generic_cpu_die(unsigned int cpu);
|
|
void generic_set_cpu_dead(unsigned int cpu);
|
|
void generic_set_cpu_up(unsigned int cpu);
|
|
int generic_check_cpu_restart(unsigned int cpu);
|
|
int is_cpu_dead(unsigned int cpu);
|
|
#else
|
|
#define generic_set_cpu_up(i) do { } while (0)
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC64
|
|
#define raw_smp_processor_id() (local_paca->paca_index)
|
|
#define hard_smp_processor_id() (get_paca()->hw_cpu_id)
|
|
#else
|
|
/* 32-bit */
|
|
extern int smp_hw_index[];
|
|
|
|
/*
|
|
* This is particularly ugly: it appears we can't actually get the definition
|
|
* of task_struct here, but we need access to the CPU this task is running on.
|
|
* Instead of using task_struct we're using _TASK_CPU which is extracted from
|
|
* asm-offsets.h by kbuild to get the current processor ID.
|
|
*
|
|
* This also needs to be safeguarded when building asm-offsets.s because at
|
|
* that time _TASK_CPU is not defined yet. It could have been guarded by
|
|
* _TASK_CPU itself, but we want the build to fail if _TASK_CPU is missing
|
|
* when building something else than asm-offsets.s
|
|
*/
|
|
#ifdef GENERATING_ASM_OFFSETS
|
|
#define raw_smp_processor_id() (0)
|
|
#else
|
|
#define raw_smp_processor_id() (*(unsigned int *)((void *)current + _TASK_CPU))
|
|
#endif
|
|
#define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
|
|
|
|
static inline int get_hard_smp_processor_id(int cpu)
|
|
{
|
|
return smp_hw_index[cpu];
|
|
}
|
|
|
|
static inline void set_hard_smp_processor_id(int cpu, int phys)
|
|
{
|
|
smp_hw_index[cpu] = phys;
|
|
}
|
|
#endif
|
|
|
|
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
|
|
DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
|
|
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
|
|
DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
|
|
|
|
static inline struct cpumask *cpu_sibling_mask(int cpu)
|
|
{
|
|
return per_cpu(cpu_sibling_map, cpu);
|
|
}
|
|
|
|
static inline struct cpumask *cpu_core_mask(int cpu)
|
|
{
|
|
return per_cpu(cpu_core_map, cpu);
|
|
}
|
|
|
|
static inline struct cpumask *cpu_l2_cache_mask(int cpu)
|
|
{
|
|
return per_cpu(cpu_l2_cache_map, cpu);
|
|
}
|
|
|
|
static inline struct cpumask *cpu_smallcore_mask(int cpu)
|
|
{
|
|
return per_cpu(cpu_smallcore_map, cpu);
|
|
}
|
|
|
|
extern int cpu_to_core_id(int cpu);
|
|
|
|
extern bool has_big_cores;
|
|
|
|
#define cpu_smt_mask cpu_smt_mask
|
|
#ifdef CONFIG_SCHED_SMT
|
|
static inline const struct cpumask *cpu_smt_mask(int cpu)
|
|
{
|
|
if (has_big_cores)
|
|
return per_cpu(cpu_smallcore_map, cpu);
|
|
|
|
return per_cpu(cpu_sibling_map, cpu);
|
|
}
|
|
#endif /* CONFIG_SCHED_SMT */
|
|
|
|
/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
|
|
*
|
|
* Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
|
|
* in /proc/interrupts will be wrong!!! --Troy */
|
|
#define PPC_MSG_CALL_FUNCTION 0
|
|
#define PPC_MSG_RESCHEDULE 1
|
|
#define PPC_MSG_TICK_BROADCAST 2
|
|
#define PPC_MSG_NMI_IPI 3
|
|
|
|
/* This is only used by the powernv kernel */
|
|
#define PPC_MSG_RM_HOST_ACTION 4
|
|
|
|
#define NMI_IPI_ALL_OTHERS -2
|
|
|
|
#ifdef CONFIG_NMI_IPI
|
|
extern int smp_handle_nmi_ipi(struct pt_regs *regs);
|
|
#else
|
|
static inline int smp_handle_nmi_ipi(struct pt_regs *regs) { return 0; }
|
|
#endif
|
|
|
|
/* for irq controllers that have dedicated ipis per message (4) */
|
|
extern int smp_request_message_ipi(int virq, int message);
|
|
extern const char *smp_ipi_name[];
|
|
|
|
/* for irq controllers with only a single ipi */
|
|
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
|
|
extern void smp_muxed_ipi_set_message(int cpu, int msg);
|
|
extern irqreturn_t smp_ipi_demux(void);
|
|
extern irqreturn_t smp_ipi_demux_relaxed(void);
|
|
|
|
void smp_init_pSeries(void);
|
|
void smp_init_cell(void);
|
|
void smp_setup_cpu_maps(void);
|
|
|
|
extern int __cpu_disable(void);
|
|
extern void __cpu_die(unsigned int cpu);
|
|
|
|
#else
|
|
/* for UP */
|
|
#define hard_smp_processor_id() get_hard_smp_processor_id(0)
|
|
#define smp_setup_cpu_maps()
|
|
static inline void inhibit_secondary_onlining(void) {}
|
|
static inline void uninhibit_secondary_onlining(void) {}
|
|
static inline const struct cpumask *cpu_sibling_mask(int cpu)
|
|
{
|
|
return cpumask_of(cpu);
|
|
}
|
|
|
|
static inline const struct cpumask *cpu_smallcore_mask(int cpu)
|
|
{
|
|
return cpumask_of(cpu);
|
|
}
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
#ifdef CONFIG_PPC64
|
|
static inline int get_hard_smp_processor_id(int cpu)
|
|
{
|
|
return paca_ptrs[cpu]->hw_cpu_id;
|
|
}
|
|
|
|
static inline void set_hard_smp_processor_id(int cpu, int phys)
|
|
{
|
|
paca_ptrs[cpu]->hw_cpu_id = phys;
|
|
}
|
|
#else
|
|
/* 32-bit */
|
|
#ifndef CONFIG_SMP
|
|
extern int boot_cpuid_phys;
|
|
static inline int get_hard_smp_processor_id(int cpu)
|
|
{
|
|
return boot_cpuid_phys;
|
|
}
|
|
|
|
static inline void set_hard_smp_processor_id(int cpu, int phys)
|
|
{
|
|
boot_cpuid_phys = phys;
|
|
}
|
|
#endif /* !CONFIG_SMP */
|
|
#endif /* !CONFIG_PPC64 */
|
|
|
|
#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE))
|
|
extern void smp_release_cpus(void);
|
|
#else
|
|
static inline void smp_release_cpus(void) { };
|
|
#endif
|
|
|
|
extern int smt_enabled_at_boot;
|
|
|
|
extern void smp_mpic_probe(void);
|
|
extern void smp_mpic_setup_cpu(int cpu);
|
|
extern int smp_generic_kick_cpu(int nr);
|
|
extern int smp_generic_cpu_bootable(unsigned int nr);
|
|
|
|
|
|
extern void smp_generic_give_timebase(void);
|
|
extern void smp_generic_take_timebase(void);
|
|
|
|
extern struct smp_ops_t *smp_ops;
|
|
|
|
extern void arch_send_call_function_single_ipi(int cpu);
|
|
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
|
|
|
|
/* Definitions relative to the secondary CPU spin loop
|
|
* and entry point. Not all of them exist on both 32 and
|
|
* 64-bit but defining them all here doesn't harm
|
|
*/
|
|
extern void generic_secondary_smp_init(void);
|
|
extern unsigned long __secondary_hold_spinloop;
|
|
extern unsigned long __secondary_hold_acknowledge;
|
|
extern char __secondary_hold;
|
|
extern unsigned int booting_thread_hwid;
|
|
|
|
extern void __early_start(void);
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_SMP_H) */
|