2019-05-27 08:55:01 +02:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
2005-04-16 15:20:36 -07:00
/*
2005-11-05 10:33:55 +11:00
* smp . h : PowerPC - specific SMP code .
2005-04-16 15:20:36 -07:00
*
* Original was a copy of sparc smp . h . Now heavily modified
* for PPC .
*
* Copyright ( C ) 1996 David S . Miller ( davem @ caip . rutgers . edu )
* Copyright ( C ) 1996 - 2001 Cort Dougan < cort @ fsmlabs . com >
*/
2005-11-05 10:33:55 +11:00
# ifndef _ASM_POWERPC_SMP_H
# define _ASM_POWERPC_SMP_H
2005-04-16 15:20:36 -07:00
# ifdef __KERNEL__
# include <linux/threads.h>
# include <linux/cpumask.h>
# include <linux/kernel.h>
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-10 19:29:39 +00:00
# include <linux/irqreturn.h>
2005-04-16 15:20:36 -07:00
# ifndef __ASSEMBLY__
2005-11-05 10:33:55 +11:00
# ifdef CONFIG_PPC64
2005-04-16 15:20:36 -07:00
# include <asm/paca.h>
2005-11-05 10:33:55 +11:00
# endif
2007-10-16 01:24:05 -07:00
# include <asm/percpu.h>
2005-04-16 15:20:36 -07:00
extern int boot_cpuid ;
2011-05-25 18:09:12 +00:00
extern int spinning_secondaries ;
2018-02-14 01:08:18 +10:00
extern u32 * cpu_to_phys_id ;
2005-04-16 15:20:36 -07:00
extern void cpu_die ( void ) ;
2013-11-20 11:05:01 +11:00
extern int cpu_to_chip_id ( int cpu ) ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_SMP
2011-05-10 19:29:35 +00:00
struct smp_ops_t {
void ( * message_pass ) ( int cpu , int msg ) ;
2011-05-10 19:29:42 +00:00
# ifdef CONFIG_PPC_SMP_MUXED_IPI
2017-04-13 20:16:21 +10:00
void ( * cause_ipi ) ( int cpu ) ;
2011-05-10 19:29:42 +00:00
# endif
2016-12-20 04:30:09 +10:00
int ( * cause_nmi_ipi ) ( int cpu ) ;
2015-04-04 19:28:50 +11:00
void ( * probe ) ( void ) ;
2011-05-10 19:29:35 +00:00
int ( * kick_cpu ) ( int nr ) ;
2017-04-05 17:54:48 +10:00
int ( * prepare_cpu ) ( int nr ) ;
2011-05-10 19:29:35 +00:00
void ( * setup_cpu ) ( int nr ) ;
void ( * bringup_done ) ( void ) ;
void ( * take_timebase ) ( void ) ;
void ( * give_timebase ) ( void ) ;
int ( * cpu_disable ) ( void ) ;
void ( * cpu_die ) ( unsigned int nr ) ;
int ( * cpu_bootable ) ( unsigned int nr ) ;
} ;
2017-07-12 14:35:52 -07:00
extern int smp_send_nmi_ipi ( int cpu , void ( * fn ) ( struct pt_regs * ) , u64 delay_us ) ;
2018-05-02 23:07:27 +10:00
extern int smp_send_safe_nmi_ipi ( int cpu , void ( * fn ) ( struct pt_regs * ) , u64 delay_us ) ;
2011-05-10 19:29:06 +00:00
extern void smp_send_debugger_break ( void ) ;
2011-02-10 18:45:24 +11:00
extern void start_secondary_resume ( void ) ;
2012-12-21 14:04:10 -08:00
extern void smp_generic_give_timebase ( void ) ;
extern void smp_generic_take_timebase ( void ) ;
2005-04-16 15:20:36 -07:00
2009-10-29 22:34:14 +09:00
DECLARE_PER_CPU ( unsigned int , cpu_pvr ) ;
2008-05-08 14:27:19 +10:00
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_HOTPLUG_CPU
int generic_cpu_disable ( void ) ;
void generic_cpu_die ( unsigned int cpu ) ;
2011-04-01 09:23:37 +11:00
void generic_set_cpu_dead ( unsigned int cpu ) ;
2012-07-20 20:42:34 +08:00
void generic_set_cpu_up ( unsigned int cpu ) ;
2011-09-19 17:44:49 +00:00
int generic_check_cpu_restart ( unsigned int cpu ) ;
2015-11-20 17:14:01 +08:00
int is_cpu_dead ( unsigned int cpu ) ;
# else
# define generic_set_cpu_up(i) do { } while (0)
2005-04-16 15:20:36 -07:00
# endif
2005-11-05 10:33:55 +11:00
# ifdef CONFIG_PPC64
2006-11-01 05:44:54 +11:00
# define raw_smp_processor_id() (local_paca->paca_index)
2005-04-16 15:20:36 -07:00
# define hard_smp_processor_id() (get_paca()->hw_cpu_id)
2005-11-05 10:33:55 +11:00
# else
/* 32-bit */
extern int smp_hw_index [ ] ;
2019-01-31 10:08:58 +00:00
/*
* This is particularly ugly : it appears we can ' t actually get the definition
* of task_struct here , but we need access to the CPU this task is running on .
* Instead of using task_struct we ' re using _TASK_CPU which is extracted from
* asm - offsets . h by kbuild to get the current processor ID .
*
* This also needs to be safeguarded when building asm - offsets . s because at
* that time _TASK_CPU is not defined yet . It could have been guarded by
* _TASK_CPU itself , but we want the build to fail if _TASK_CPU is missing
* when building something else than asm - offsets . s
*/
# ifdef GENERATING_ASM_OFFSETS
# define raw_smp_processor_id() (0)
# else
# define raw_smp_processor_id() (*(unsigned int *)((void *)current + _TASK_CPU))
# endif
2005-11-05 10:33:55 +11:00
# define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
2008-08-18 14:23:48 +10:00
static inline int get_hard_smp_processor_id ( int cpu )
{
return smp_hw_index [ cpu ] ;
}
static inline void set_hard_smp_processor_id ( int cpu , int phys )
{
smp_hw_index [ cpu ] = phys ;
}
2005-11-05 10:33:55 +11:00
# endif
2005-04-16 15:20:36 -07:00
2010-04-26 15:32:41 +00:00
DECLARE_PER_CPU ( cpumask_var_t , cpu_sibling_map ) ;
2017-06-29 17:12:55 +10:00
DECLARE_PER_CPU ( cpumask_var_t , cpu_l2_cache_map ) ;
2010-04-26 15:32:41 +00:00
DECLARE_PER_CPU ( cpumask_var_t , cpu_core_map ) ;
2018-10-11 11:03:01 +05:30
DECLARE_PER_CPU ( cpumask_var_t , cpu_smallcore_map ) ;
2010-04-26 15:32:41 +00:00
static inline struct cpumask * cpu_sibling_mask ( int cpu )
{
return per_cpu ( cpu_sibling_map , cpu ) ;
}
static inline struct cpumask * cpu_core_mask ( int cpu )
{
return per_cpu ( cpu_core_map , cpu ) ;
}
2017-06-29 17:12:55 +10:00
static inline struct cpumask * cpu_l2_cache_mask ( int cpu )
{
return per_cpu ( cpu_l2_cache_map , cpu ) ;
}
2018-10-11 11:03:01 +05:30
static inline struct cpumask * cpu_smallcore_mask ( int cpu )
{
return per_cpu ( cpu_smallcore_map , cpu ) ;
}
2008-07-27 15:24:54 +10:00
extern int cpu_to_core_id ( int cpu ) ;
2005-04-16 15:20:36 -07:00
powerpc/topology: Override cpu_smt_mask
On Power9, a pair of SMT4 cores can be presented by the firmware as a SMT8
core for backward compatibility reasons, with the fusion of two SMT4 cores.
Powerpc allows LPARs to be live migrated from Power8 to Power9. Existing
software developed/configured for Power8, expects to see a SMT8 core.
In order to maintain userspace backward compatibility (with Power8 chips in
case of Power9) in enterprise Linux systems, the topology_sibling_cpumask
has to be set to SMT8 core.
cpu_smt_mask() should generally point to the cpu mask of the SMT4 core.
Hence override the default cpu_smt_mask() to be powerpc specific
allowing for better scheduling behaviour on Power.
schbench
(latency measured in usecs, so lesser is better)
Without patch With patch
Latency percentiles (usec) Latency percentiles (usec)
50.0000th: 34 50.0000th: 38
75.0000th: 47 75.0000th: 52
90.0000th: 54 90.0000th: 60
95.0000th: 57 95.0000th: 64
*99.0000th: 62 *99.0000th: 72
99.5000th: 65 99.5000th: 75
99.9000th: 76 99.9000th: 3452
min=0, max=9205 min=0, max=9344
schbench (With Cede disabled)
Without patch With patch
Latency percentiles (usec) Latency percentiles (usec)
50.0000th: 20 50.0000th: 21
75.0000th: 28 75.0000th: 29
90.0000th: 33 90.0000th: 34
95.0000th: 35 95.0000th: 37
*99.0000th: 40 *99.0000th: 40
99.5000th: 48 99.5000th: 42
99.9000th: 94 99.9000th: 79
min=0, max=791 min=0, max=791
perf bench sched pipe
usec/ops : lesser is better
Without patch
N Min Max Median Avg Stddev
101 5.095113 5.595269 5.204842 5.2298776 0.10762713
5.10 - 5.15 : ################################################## 23% (24)
5.15 - 5.20 : ############################################# 21% (22)
5.20 - 5.25 : ################################################## 23% (24)
5.25 - 5.30 : ######################### 11% (12)
5.30 - 5.35 : ########## 4% (5)
5.35 - 5.40 : ######## 3% (4)
5.40 - 5.45 : ######## 3% (4)
5.45 - 5.50 : #### 1% (2)
5.50 - 5.55 : ## 0% (1)
5.55 - 5.60 : #### 1% (2)
With patch
N Min Max Median Avg Stddev
101 5.134675 8.524719 5.207658 5.2780985 0.34911969
5.1 - 5.5 : ################################################## 94% (95)
5.5 - 5.8 : ## 3% (4)
5.8 - 6.2 : 0% (1)
6.2 - 6.5 :
6.5 - 6.8 :
6.8 - 7.2 :
7.2 - 7.5 :
7.5 - 7.8 :
7.8 - 8.2 :
8.2 - 8.5 :
perf bench sched pipe (cede disabled)
usec/ops : lesser is better
Without patch
N Min Max Median Avg Stddev
101 7.884227 12.576538 7.956474 8.0170722 0.46159054
7.9 - 8.4 : ################################################## 99% (100)
8.4 - 8.8 :
8.8 - 9.3 :
9.3 - 9.8 :
9.8 - 10.2 :
10.2 - 10.7 :
10.7 - 11.2 :
11.2 - 11.6 :
11.6 - 12.1 :
12.1 - 12.6 :
With patch
N Min Max Median Avg Stddev
101 7.956021 8.217284 8.015615 8.0283866 0.049844967
7.96 - 7.98 : ###################### 12% (13)
7.98 - 8.01 : ################################################## 28% (29)
8.01 - 8.03 : #################################### 20% (21)
8.03 - 8.06 : ######################### 14% (15)
8.06 - 8.09 : ###################### 12% (13)
8.09 - 8.11 : ###### 3% (4)
8.11 - 8.14 : ### 1% (2)
8.14 - 8.17 : ### 1% (2)
8.17 - 8.19 :
8.19 - 8.22 : # 0% (1)
Observations: With the patch, the initial run/iteration takes a slight
longer time. This can be attributed to the fact that now we pick a CPU
from a idle core which could be sleep mode. Once we remove the cede,
state the numbers improve in favour of the patch.
ebizzy:
transactions per second (higher is better)
without patch
N Min Max Median Avg Stddev
100 1018433 1304470 1193208 1182315.7 60018.733
1018433 - 1047037 : ###### 3% (3)
1047037 - 1075640 : ######## 4% (4)
1075640 - 1104244 : ######## 4% (4)
1104244 - 1132848 : ############### 7% (7)
1132848 - 1161452 : #################################### 17% (17)
1161452 - 1190055 : ########################## 12% (12)
1190055 - 1218659 : ############################################# 21% (21)
1218659 - 1247263 : ################################################## 23% (23)
1247263 - 1275866 : ######## 4% (4)
1275866 - 1304470 : ######## 4% (4)
with patch
N Min Max Median Avg Stddev
100 967014 1292938 1208819 1185281.8 69815.851
967014 - 999606 : ## 1% (1)
999606 - 1032199 : ## 1% (1)
1032199 - 1064791 : ############ 6% (6)
1064791 - 1097384 : ########## 5% (5)
1097384 - 1129976 : ################## 9% (9)
1129976 - 1162568 : #################### 10% (10)
1162568 - 1195161 : ########################## 13% (13)
1195161 - 1227753 : ############################################ 22% (22)
1227753 - 1260346 : ################################################## 25% (25)
1260346 - 1292938 : ############## 7% (7)
Observations: Not much changes, ebizzy is not much impacted.
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200807074517.27957-2-srikar@linux.vnet.ibm.com
2020-08-07 13:15:17 +05:30
extern bool has_big_cores ;
# define cpu_smt_mask cpu_smt_mask
# ifdef CONFIG_SCHED_SMT
static inline const struct cpumask * cpu_smt_mask ( int cpu )
{
if ( has_big_cores )
return per_cpu ( cpu_smallcore_map , cpu ) ;
return per_cpu ( cpu_sibling_map , cpu ) ;
}
# endif /* CONFIG_SCHED_SMT */
2005-04-16 15:20:36 -07:00
/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
*
* Make sure this matches openpic_request_IPIs in open_pic . c , or what shows up
* in / proc / interrupts will be wrong ! ! ! - - Troy */
2016-12-20 04:30:08 +10:00
# define PPC_MSG_CALL_FUNCTION 0
# define PPC_MSG_RESCHEDULE 1
2014-02-26 05:37:43 +05:30
# define PPC_MSG_TICK_BROADCAST 2
2016-12-20 04:30:08 +10:00
# define PPC_MSG_NMI_IPI 3
2005-04-16 15:20:36 -07:00
2015-12-17 14:59:03 -06:00
/* This is only used by the powernv kernel */
# define PPC_MSG_RM_HOST_ACTION 4
2016-12-20 04:30:08 +10:00
# define NMI_IPI_ALL_OTHERS -2
# ifdef CONFIG_NMI_IPI
extern int smp_handle_nmi_ipi ( struct pt_regs * regs ) ;
# else
static inline int smp_handle_nmi_ipi ( struct pt_regs * regs ) { return 0 ; }
# endif
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-10 19:29:39 +00:00
/* for irq controllers that have dedicated ipis per message (4) */
2008-11-14 20:11:49 +00:00
extern int smp_request_message_ipi ( int virq , int message ) ;
extern const char * smp_ipi_name [ ] ;
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-10 19:29:39 +00:00
/* for irq controllers with only a single ipi */
extern void smp_muxed_ipi_message_pass ( int cpu , int msg ) ;
2015-12-17 14:59:04 -06:00
extern void smp_muxed_ipi_set_message ( int cpu , int msg ) ;
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-10 19:29:39 +00:00
extern irqreturn_t smp_ipi_demux ( void ) ;
2017-04-13 20:16:22 +10:00
extern irqreturn_t smp_ipi_demux_relaxed ( void ) ;
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-10 19:29:39 +00:00
2005-04-16 15:20:36 -07:00
void smp_init_pSeries ( void ) ;
2005-10-31 20:08:38 -05:00
void smp_init_cell ( void ) ;
2005-11-05 10:33:55 +11:00
void smp_setup_cpu_maps ( void ) ;
2005-04-16 15:20:36 -07:00
extern int __cpu_disable ( void ) ;
extern void __cpu_die ( unsigned int cpu ) ;
2005-11-05 10:33:55 +11:00
# else
/* for UP */
2008-10-10 09:44:33 +00:00
# define hard_smp_processor_id() get_hard_smp_processor_id(0)
2005-11-05 10:33:55 +11:00
# define smp_setup_cpu_maps()
2013-04-15 20:28:01 +00:00
static inline void inhibit_secondary_onlining ( void ) { }
static inline void uninhibit_secondary_onlining ( void ) { }
2013-07-24 20:13:21 -05:00
static inline const struct cpumask * cpu_sibling_mask ( int cpu )
{
return cpumask_of ( cpu ) ;
}
2005-11-05 10:33:55 +11:00
2018-10-11 11:03:01 +05:30
static inline const struct cpumask * cpu_smallcore_mask ( int cpu )
{
return cpumask_of ( cpu ) ;
}
2005-04-16 15:20:36 -07:00
# endif /* CONFIG_SMP */
2005-11-05 10:33:55 +11:00
# ifdef CONFIG_PPC64
2008-08-18 14:23:48 +10:00
static inline int get_hard_smp_processor_id ( int cpu )
{
2018-02-14 01:08:12 +10:00
return paca_ptrs [ cpu ] - > hw_cpu_id ;
2008-08-18 14:23:48 +10:00
}
static inline void set_hard_smp_processor_id ( int cpu , int phys )
{
2018-02-14 01:08:12 +10:00
paca_ptrs [ cpu ] - > hw_cpu_id = phys ;
2008-08-18 14:23:48 +10:00
}
2005-11-05 10:33:55 +11:00
# else
/* 32-bit */
# ifndef CONFIG_SMP
2006-03-25 17:25:17 +11:00
extern int boot_cpuid_phys ;
2008-08-18 14:23:48 +10:00
static inline int get_hard_smp_processor_id ( int cpu )
{
return boot_cpuid_phys ;
}
static inline void set_hard_smp_processor_id ( int cpu , int phys )
{
2008-10-10 09:44:33 +00:00
boot_cpuid_phys = phys ;
2008-08-18 14:23:48 +10:00
}
# endif /* !CONFIG_SMP */
# endif /* !CONFIG_PPC64 */
2005-04-16 15:20:36 -07:00
2016-11-29 23:45:50 +11:00
# if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE))
2016-07-05 15:07:51 +10:00
extern void smp_release_cpus ( void ) ;
# else
static inline void smp_release_cpus ( void ) { } ;
# endif
2005-04-16 15:20:36 -07:00
extern int smt_enabled_at_boot ;
2015-04-04 19:28:50 +11:00
extern void smp_mpic_probe ( void ) ;
2005-04-16 15:20:36 -07:00
extern void smp_mpic_setup_cpu ( int cpu ) ;
2011-04-11 21:46:19 +00:00
extern int smp_generic_kick_cpu ( int nr ) ;
2013-08-05 14:58:34 -05:00
extern int smp_generic_cpu_bootable ( unsigned int nr ) ;
2005-04-16 15:20:36 -07:00
extern void smp_generic_give_timebase ( void ) ;
extern void smp_generic_take_timebase ( void ) ;
extern struct smp_ops_t * smp_ops ;
2008-06-26 11:22:13 +02:00
extern void arch_send_call_function_single_ipi ( int cpu ) ;
2009-09-24 09:34:45 -06:00
extern void arch_send_call_function_ipi_mask ( const struct cpumask * mask ) ;
2008-06-26 11:22:13 +02:00
2009-07-23 23:15:28 +00:00
/* Definitions relative to the secondary CPU spin loop
* and entry point . Not all of them exist on both 32 and
* 64 - bit but defining them all here doesn ' t harm
*/
extern void generic_secondary_smp_init ( void ) ;
extern unsigned long __secondary_hold_spinloop ;
extern unsigned long __secondary_hold_acknowledge ;
extern char __secondary_hold ;
2015-11-20 17:14:02 +08:00
extern unsigned int booting_thread_hwid ;
2009-07-23 23:15:28 +00:00
2012-07-20 20:42:36 +08:00
extern void __early_start ( void ) ;
2005-04-16 15:20:36 -07:00
# endif /* __ASSEMBLY__ */
# endif /* __KERNEL__ */
2005-11-05 10:33:55 +11:00
# endif /* _ASM_POWERPC_SMP_H) */