2021-01-30 23:08:36 +10:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
# ifndef _ASM_POWERPC_INTERRUPT_H
# define _ASM_POWERPC_INTERRUPT_H
2021-04-14 19:00:33 +08:00
/* BookE/4xx */
# define INTERRUPT_CRITICAL_INPUT 0x100
/* BookE */
# define INTERRUPT_DEBUG 0xd00
# ifdef CONFIG_BOOKE
# define INTERRUPT_PERFMON 0x260
# define INTERRUPT_DOORBELL 0x280
# endif
/* BookS/4xx/8xx */
# define INTERRUPT_MACHINE_CHECK 0x200
/* BookS/8xx */
# define INTERRUPT_SYSTEM_RESET 0x100
/* BookS */
# define INTERRUPT_DATA_SEGMENT 0x380
# define INTERRUPT_INST_SEGMENT 0x480
# define INTERRUPT_TRACE 0xd00
# define INTERRUPT_H_DATA_STORAGE 0xe00
2021-04-19 15:48:11 +00:00
# define INTERRUPT_HMI 0xe60
2021-04-14 19:00:33 +08:00
# define INTERRUPT_H_FAC_UNAVAIL 0xf80
# ifdef CONFIG_PPC_BOOK3S
# define INTERRUPT_DOORBELL 0xa00
# define INTERRUPT_PERFMON 0xf00
2021-04-19 15:48:10 +00:00
# define INTERRUPT_ALTIVEC_UNAVAIL 0xf20
2021-04-14 19:00:33 +08:00
# endif
/* BookE/BookS/4xx/8xx */
# define INTERRUPT_DATA_STORAGE 0x300
# define INTERRUPT_INST_STORAGE 0x400
2021-04-19 15:48:09 +00:00
# define INTERRUPT_EXTERNAL 0x500
2021-04-14 19:00:33 +08:00
# define INTERRUPT_ALIGNMENT 0x600
# define INTERRUPT_PROGRAM 0x700
# define INTERRUPT_SYSCALL 0xc00
2021-04-19 15:48:09 +00:00
# define INTERRUPT_TRACE 0xd00
2021-04-14 19:00:33 +08:00
/* BookE/BookS/44x */
# define INTERRUPT_FP_UNAVAIL 0x800
/* BookE/BookS/44x/8xx */
# define INTERRUPT_DECREMENTER 0x900
# ifndef INTERRUPT_PERFMON
# define INTERRUPT_PERFMON 0x0
# endif
2021-04-19 15:48:09 +00:00
/* 8xx */
# define INTERRUPT_SOFT_EMU_8xx 0x1000
# define INTERRUPT_INST_TLB_MISS_8xx 0x1100
# define INTERRUPT_DATA_TLB_MISS_8xx 0x1200
# define INTERRUPT_INST_TLB_ERROR_8xx 0x1300
# define INTERRUPT_DATA_TLB_ERROR_8xx 0x1400
# define INTERRUPT_DATA_BREAKPOINT_8xx 0x1c00
# define INTERRUPT_INST_BREAKPOINT_8xx 0x1d00
2021-04-19 15:48:10 +00:00
/* 603 */
# define INTERRUPT_INST_TLB_MISS_603 0x1000
# define INTERRUPT_DATA_LOAD_TLB_MISS_603 0x1100
# define INTERRUPT_DATA_STORE_TLB_MISS_603 0x1200
2021-04-19 15:48:09 +00:00
# ifndef __ASSEMBLY__
# include <linux/context_tracking.h>
# include <linux/hardirq.h>
# include <asm/cputime.h>
# include <asm/ftrace.h>
# include <asm/kprobes.h>
# include <asm/runlatch.h>
2021-06-30 17:46:15 +10:00
# ifdef CONFIG_PPC_BOOK3S_64
2021-06-18 01:51:08 +10:00
extern char __end_soft_masked [ ] ;
2021-06-30 17:46:16 +10:00
bool search_kernel_soft_mask_table ( unsigned long addr ) ;
2021-06-18 01:51:08 +10:00
unsigned long search_kernel_restart_table ( unsigned long addr ) ;
2021-06-18 01:51:11 +10:00
DECLARE_STATIC_KEY_FALSE ( interrupt_exit_not_reentrant ) ;
2021-06-30 17:46:15 +10:00
static inline bool is_implicit_soft_masked ( struct pt_regs * regs )
{
if ( regs - > msr & MSR_PR )
return false ;
if ( regs - > nip > = ( unsigned long ) __end_soft_masked )
return false ;
2021-06-30 17:46:16 +10:00
return search_kernel_soft_mask_table ( regs - > nip ) ;
2021-06-30 17:46:15 +10:00
}
2021-06-18 01:51:03 +10:00
static inline void srr_regs_clobbered ( void )
{
local_paca - > srr_valid = 0 ;
local_paca - > hsrr_valid = 0 ;
}
# else
2021-09-23 00:54:51 +10:00
static inline unsigned long search_kernel_restart_table ( unsigned long addr )
{
return 0 ;
}
2021-06-30 17:46:15 +10:00
static inline bool is_implicit_soft_masked ( struct pt_regs * regs )
{
return false ;
}
2021-06-18 01:51:03 +10:00
static inline void srr_regs_clobbered ( void )
{
}
# endif
2021-04-06 12:55:08 +10:00
static inline void nap_adjust_return ( struct pt_regs * regs )
{
# ifdef CONFIG_PPC_970_NAP
if ( unlikely ( test_thread_local_flags ( _TLF_NAPPING ) ) ) {
/* Can avoid a test-and-clear because NMIs do not call this */
clear_thread_local_flags ( _TLF_NAPPING ) ;
2021-06-18 01:51:03 +10:00
regs_set_return_ip ( regs , ( unsigned long ) power4_idle_nap_return ) ;
2021-04-06 12:55:08 +10:00
}
# endif
}
2021-01-30 23:08:37 +10:00
struct interrupt_state {
} ;
2021-02-09 19:29:28 +00:00
static inline void booke_restore_dbcr0 ( void )
{
# ifdef CONFIG_PPC_ADV_DEBUG_REGS
unsigned long dbcr0 = current - > thread . debug . dbcr0 ;
if ( IS_ENABLED ( CONFIG_PPC32 ) & & unlikely ( dbcr0 & DBCR0_IDM ) ) {
mtspr ( SPRN_DBSR , - 1 ) ;
mtspr ( SPRN_DBCR0 , global_dbcr0 [ smp_processor_id ( ) ] ) ;
}
# endif
}
2021-01-30 23:08:37 +10:00
static inline void interrupt_enter_prepare ( struct pt_regs * regs , struct interrupt_state * state )
{
2021-03-12 12:50:17 +00:00
# ifdef CONFIG_PPC32
if ( ! arch_irq_disabled_regs ( regs ) )
trace_hardirqs_off ( ) ;
2021-03-12 12:50:18 +00:00
powerpc/32s: Do kuep_lock() and kuep_unlock() in assembly
When interrupt and syscall entries where converted to C, KUEP locking
and unlocking was also converted. It improved performance by unrolling
the loop, and allowed easily implementing boot time deactivation of
KUEP.
However, null_syscall selftest shows that KUEP is still heavy
(361 cycles with KUEP, 212 cycles without).
A way to improve more is to group 'mtsr's together, instead of
repeating 'addi' + 'mtsr' several times.
In order to do that, more registers need to be available. In C, GCC
will always be able to provide the requested number of registers, but
at the cost of saving some data on the stack, which is counter
performant here.
So let's do it in assembly, when we have full control of which
register can be used. It also has the advantage of locking earlier
and unlocking later and it helps GCC generating less tricky code.
The only drawback is to make boot time deactivation less straight
forward and require 'hand' instruction patching.
Group 'mtsr's by 4.
With this change, null_syscall selftest reports 336 cycles. Without
the change it was 361 cycles, that's a 7% reduction.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/115cb279e9b9948dfd93a065e047081c59e3a2a6.1634627931.git.christophe.leroy@csgroup.eu
2021-10-19 09:29:17 +02:00
if ( user_mode ( regs ) )
2021-10-19 09:29:23 +02:00
kuap_lock ( ) ;
powerpc/32s: Do kuep_lock() and kuep_unlock() in assembly
When interrupt and syscall entries where converted to C, KUEP locking
and unlocking was also converted. It improved performance by unrolling
the loop, and allowed easily implementing boot time deactivation of
KUEP.
However, null_syscall selftest shows that KUEP is still heavy
(361 cycles with KUEP, 212 cycles without).
A way to improve more is to group 'mtsr's together, instead of
repeating 'addi' + 'mtsr' several times.
In order to do that, more registers need to be available. In C, GCC
will always be able to provide the requested number of registers, but
at the cost of saving some data on the stack, which is counter
performant here.
So let's do it in assembly, when we have full control of which
register can be used. It also has the advantage of locking earlier
and unlocking later and it helps GCC generating less tricky code.
The only drawback is to make boot time deactivation less straight
forward and require 'hand' instruction patching.
Group 'mtsr's by 4.
With this change, null_syscall selftest reports 336 cycles. Without
the change it was 361 cycles, that's a 7% reduction.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/115cb279e9b9948dfd93a065e047081c59e3a2a6.1634627931.git.christophe.leroy@csgroup.eu
2021-10-19 09:29:17 +02:00
else
2021-03-12 12:50:51 +00:00
kuap_save_and_lock ( regs ) ;
2021-10-19 09:29:23 +02:00
if ( user_mode ( regs ) )
account_cpu_user_entry ( ) ;
2021-03-12 12:50:17 +00:00
# endif
2021-03-16 20:42:00 +10:00
# ifdef CONFIG_PPC64
powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).
This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.
Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.
Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.
This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-3-npiggin@gmail.com
2021-09-23 00:54:48 +10:00
bool trace_enable = false ;
if ( IS_ENABLED ( CONFIG_TRACE_IRQFLAGS ) ) {
if ( irq_soft_mask_set_return ( IRQS_ALL_DISABLED ) = = IRQS_ENABLED )
trace_enable = true ;
} else {
irq_soft_mask_set ( IRQS_ALL_DISABLED ) ;
}
2021-09-23 00:54:47 +10:00
/*
* If the interrupt was taken with HARD_DIS clear , then enable MSR [ EE ] .
* Asynchronous interrupts get here with HARD_DIS set ( see below ) , so
* this enables MSR [ EE ] for synchronous interrupts . IRQs remain
* soft - masked . The interrupt handler may later call
* interrupt_cond_local_irq_enable ( ) to achieve a regular process
* context .
*/
if ( ! ( local_paca - > irq_happened & PACA_IRQ_HARD_DIS ) ) {
if ( IS_ENABLED ( CONFIG_PPC_IRQ_SOFT_MASK_DEBUG ) )
BUG_ON ( ! ( regs - > msr & MSR_EE ) ) ;
__hard_irq_enable ( ) ;
powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).
This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.
Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.
Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.
This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-3-npiggin@gmail.com
2021-09-23 00:54:48 +10:00
} else {
__hard_RI_enable ( ) ;
2021-09-23 00:54:47 +10:00
}
2021-01-30 23:08:46 +10:00
powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).
This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.
Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.
Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.
This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-3-npiggin@gmail.com
2021-09-23 00:54:48 +10:00
/* Do this when RI=1 because it can cause SLB faults */
if ( trace_enable )
trace_hardirqs_off ( ) ;
2021-01-30 23:08:45 +10:00
if ( user_mode ( regs ) ) {
2021-10-19 09:29:26 +02:00
kuap_lock ( ) ;
2021-01-30 23:08:45 +10:00
CT_WARN_ON ( ct_state ( ) ! = CONTEXT_USER ) ;
user_exit_irqoff ( ) ;
2021-01-30 23:08:48 +10:00
account_cpu_user_entry ( ) ;
account_stolen_time ( ) ;
2021-01-30 23:08:45 +10:00
} else {
2021-10-19 09:29:26 +02:00
kuap_save_and_lock ( regs ) ;
2021-01-30 23:08:45 +10:00
/*
* CT_WARN_ON comes here via program_check_exception ,
* so avoid recursion .
*/
2021-06-18 01:51:10 +10:00
if ( TRAP ( regs ) ! = INTERRUPT_PROGRAM ) {
2021-01-30 23:08:45 +10:00
CT_WARN_ON ( ct_state ( ) ! = CONTEXT_KERNEL ) ;
2021-09-23 00:54:51 +10:00
if ( IS_ENABLED ( CONFIG_PPC_IRQ_SOFT_MASK_DEBUG ) )
BUG_ON ( is_implicit_soft_masked ( regs ) ) ;
2021-06-18 01:51:10 +10:00
}
2021-09-23 00:54:51 +10:00
2021-06-18 01:51:10 +10:00
/* Move this under a debugging check */
2021-09-23 00:54:51 +10:00
if ( IS_ENABLED ( CONFIG_PPC_IRQ_SOFT_MASK_DEBUG ) & &
arch_irq_disabled_regs ( regs ) )
2021-06-18 01:51:10 +10:00
BUG_ON ( search_kernel_restart_table ( regs - > nip ) ) ;
2021-01-30 23:08:45 +10:00
}
2021-06-30 17:46:18 +10:00
if ( IS_ENABLED ( CONFIG_PPC_IRQ_SOFT_MASK_DEBUG ) )
BUG_ON ( ! arch_irq_disabled_regs ( regs ) & & ! ( regs - > msr & MSR_EE ) ) ;
2021-01-30 23:08:45 +10:00
# endif
2021-01-30 23:08:46 +10:00
2021-03-12 12:50:19 +00:00
booke_restore_dbcr0 ( ) ;
2021-01-30 23:08:37 +10:00
}
/*
* Care should be taken to note that interrupt_exit_prepare and
* interrupt_async_exit_prepare do not necessarily return immediately to
* regs context ( e . g . , if regs is usermode , we don ' t necessarily return to
* user mode ) . Other interrupts might be taken between here and return ,
* context switch / preemption may occur in the exit path after this , or a
* signal may be delivered , etc .
*
* The real interrupt exit code is platform specific , e . g . ,
* interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64 s .
*
* However interrupt_nmi_exit_prepare does return directly to regs , because
* NMIs do not do " exit work " or replay soft - masked interrupts .
*/
static inline void interrupt_exit_prepare ( struct pt_regs * regs , struct interrupt_state * state )
{
}
static inline void interrupt_async_enter_prepare ( struct pt_regs * regs , struct interrupt_state * state )
{
2021-09-23 00:54:47 +10:00
# ifdef CONFIG_PPC64
/* Ensure interrupt_enter_prepare does not enable MSR[EE] */
local_paca - > irq_happened | = PACA_IRQ_HARD_DIS ;
# endif
powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).
This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.
Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.
Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.
This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-3-npiggin@gmail.com
2021-09-23 00:54:48 +10:00
interrupt_enter_prepare ( regs , state ) ;
2021-01-30 23:08:51 +10:00
# ifdef CONFIG_PPC_BOOK3S_64
powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).
This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.
Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.
Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.
This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-3-npiggin@gmail.com
2021-09-23 00:54:48 +10:00
/*
* RI = 1 is set by interrupt_enter_prepare , so this thread flags access
* has to come afterward ( it can cause SLB faults ) .
*/
2021-01-30 23:08:51 +10:00
if ( cpu_has_feature ( CPU_FTR_CTRL ) & &
! test_thread_local_flags ( _TLF_RUNLATCH ) )
__ppc64_runlatch_on ( ) ;
# endif
2021-01-30 23:08:44 +10:00
irq_enter ( ) ;
2021-01-30 23:08:37 +10:00
}
static inline void interrupt_async_exit_prepare ( struct pt_regs * regs , struct interrupt_state * state )
{
2021-04-06 12:55:08 +10:00
/*
* Adjust at exit so the main handler sees the true NIA . This must
* come before irq_exit ( ) because irq_exit can enable interrupts , and
* if another interrupt is taken before nap_adjust_return has run
* here , then that interrupt would return directly to idle nap return .
*/
nap_adjust_return ( regs ) ;
2021-01-30 23:08:44 +10:00
irq_exit ( ) ;
2021-01-30 23:08:43 +10:00
interrupt_exit_prepare ( regs , state ) ;
2021-01-30 23:08:37 +10:00
}
struct interrupt_nmi_state {
2021-01-30 23:08:49 +10:00
# ifdef CONFIG_PPC64
2021-01-30 23:08:50 +10:00
u8 irq_soft_mask ;
u8 irq_happened ;
2021-01-30 23:08:49 +10:00
u8 ftrace_enabled ;
2021-06-30 17:46:17 +10:00
u64 softe ;
2021-01-30 23:08:49 +10:00
# endif
2021-01-30 23:08:37 +10:00
} ;
2021-03-16 20:41:59 +10:00
static inline bool nmi_disables_ftrace ( struct pt_regs * regs )
{
/* Allow DEC and PMI to be traced when they are soft-NMI */
if ( IS_ENABLED ( CONFIG_PPC_BOOK3S_64 ) ) {
2021-04-14 19:00:33 +08:00
if ( TRAP ( regs ) = = INTERRUPT_DECREMENTER )
2021-03-16 20:41:59 +10:00
return false ;
2021-04-14 19:00:33 +08:00
if ( TRAP ( regs ) = = INTERRUPT_PERFMON )
2021-03-16 20:41:59 +10:00
return false ;
}
if ( IS_ENABLED ( CONFIG_PPC_BOOK3E ) ) {
2021-04-14 19:00:33 +08:00
if ( TRAP ( regs ) = = INTERRUPT_PERFMON )
2021-03-16 20:41:59 +10:00
return false ;
}
return true ;
}
2021-01-30 23:08:37 +10:00
static inline void interrupt_nmi_enter_prepare ( struct pt_regs * regs , struct interrupt_nmi_state * state )
{
2021-01-30 23:08:49 +10:00
# ifdef CONFIG_PPC64
2021-01-30 23:08:50 +10:00
state - > irq_soft_mask = local_paca - > irq_soft_mask ;
state - > irq_happened = local_paca - > irq_happened ;
2021-06-30 17:46:17 +10:00
state - > softe = regs - > softe ;
2021-01-30 23:08:50 +10:00
/*
* Set IRQS_ALL_DISABLED unconditionally so irqs_disabled ( ) does
* the right thing , and set IRQ_HARD_DIS . We do not want to reconcile
* because that goes through irq tracing which we don ' t want in NMI .
*/
local_paca - > irq_soft_mask = IRQS_ALL_DISABLED ;
local_paca - > irq_happened | = PACA_IRQ_HARD_DIS ;
2021-10-05 00:56:41 +10:00
if ( ! ( regs - > msr & MSR_EE ) | | is_implicit_soft_masked ( regs ) ) {
/*
* Adjust regs - > softe to be soft - masked if it had not been
* reconcied ( e . g . , interrupt entry with MSR [ EE ] = 0 but softe
* not yet set disabled ) , or if it was in an implicit soft
* masked state . This makes arch_irq_disabled_regs ( regs )
* behave as expected .
*/
powerpc/64s: Make NMI record implicitly soft-masked code as irqs disabled
scv support introduced the notion of code that implicitly soft-masks
irqs due to the instruction addresses. This is required because scv
enters the kernel with MSR[EE]=1.
If a NMI (including soft-NMI) interrupt hits when we are implicitly
soft-masked then its regs->softe does not reflect this because it is
derived from the explicit soft mask state (paca->irq_soft_mask). This
makes arch_irq_disabled_regs(regs) return false.
This can trigger a warning in the soft-NMI watchdog code (shown below).
Fix it by having NMI interrupts set regs->softe to disabled in case of
interrupting an implicit soft-masked region.
------------[ cut here ]------------
WARNING: CPU: 41 PID: 1103 at arch/powerpc/kernel/watchdog.c:259 soft_nmi_interrupt+0x3e4/0x5f0
CPU: 41 PID: 1103 Comm: (spawn) Not tainted
NIP: c000000000039534 LR: c000000000039234 CTR: c000000000009a00
REGS: c000007fffbcf940 TRAP: 0700 Not tainted
MSR: 9000000000021033 <SF,HV,ME,IR,DR,RI,LE> CR: 22042482 XER: 200400ad
CFAR: c000000000039260 IRQMASK: 3
GPR00: c000000000039204 c000007fffbcfbe0 c000000001d6c300 0000000000000003
GPR04: 00007ffffa45d078 0000000000000000 0000000000000008 0000000000000020
GPR08: 0000007ffd4e0000 0000000000000000 c000007ffffceb00 7265677368657265
GPR12: 9000000000009033 c000007ffffceb00 00000f7075bf4480 000000000000002a
GPR16: 00000f705745a528 00007ffffa45ddd8 00000f70574d0008 0000000000000000
GPR20: 00000f7075c58d70 00000f7057459c38 0000000000000001 0000000000000040
GPR24: 0000000000000000 0000000000000029 c000000001dae058 0000000000000029
GPR28: 0000000000000000 0000000000000800 0000000000000009 c000007fffbcfd60
NIP [c000000000039534] soft_nmi_interrupt+0x3e4/0x5f0
LR [c000000000039234] soft_nmi_interrupt+0xe4/0x5f0
Call Trace:
[c000007fffbcfbe0] [c000000000039204] soft_nmi_interrupt+0xb4/0x5f0 (unreliable)
[c000007fffbcfcf0] [c00000000000c0e8] soft_nmi_common+0x138/0x1c4
--- interrupt: 900 at end_real_trampolines+0x0/0x1000
NIP: c000000000003000 LR: 00007ca426adb03c CTR: 900000000280f033
REGS: c000007fffbcfd60 TRAP: 0900
MSR: 9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 44042482 XER: 200400ad
CFAR: 00007ca426946020 IRQMASK: 0
GPR00: 00000000000000ad 00007ffffa45d050 00007ca426b07f00 0000000000000035
GPR04: 00007ffffa45d078 0000000000000000 0000000000000008 0000000000000020
GPR08: 0000000000000000 0000000000100000 0000000010000000 00007ffffa45d110
GPR12: 0000000000000001 00007ca426d4e680 00000f7075bf4480 000000000000002a
GPR16: 00000f705745a528 00007ffffa45ddd8 00000f70574d0008 0000000000000000
GPR20: 00000f7075c58d70 00000f7057459c38 0000000000000001 0000000000000040
GPR24: 0000000000000000 00000f7057473f68 0000000000000003 000000000000041b
GPR28: 00007ffffa45d4c4 0000000000000035 0000000000000000 00000f7057473f68
NIP [c000000000003000] end_real_trampolines+0x0/0x1000
LR [00007ca426adb03c] 0x7ca426adb03c
--- interrupt: 900
Instruction dump:
60000000 60000000 60420000 38600001 482b3ae5 60000000 e93f0138 a36d0008
7daa6b78 71290001 7f7907b4 4082fd34 <0fe00000> 4bfffd2c 60420000 ea6100a8
---[ end trace dc75f67d819779da ]---
Fixes: 118178e62e2e ("powerpc: move NMI entry/exit code into wrapper")
Reported-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210503111708.758261-1-npiggin@gmail.com
2021-05-03 21:17:08 +10:00
regs - > softe = IRQS_ALL_DISABLED ;
}
powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).
This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.
Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.
Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.
This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-3-npiggin@gmail.com
2021-09-23 00:54:48 +10:00
__hard_RI_enable ( ) ;
2021-01-30 23:08:50 +10:00
/* Don't do any per-CPU operations until interrupt state is fixed */
2021-03-16 20:41:59 +10:00
if ( nmi_disables_ftrace ( regs ) ) {
2021-01-30 23:08:49 +10:00
state - > ftrace_enabled = this_cpu_get_ftrace_enabled ( ) ;
this_cpu_set_ftrace_enabled ( 0 ) ;
}
# endif
/*
* Do not use nmi_enter ( ) for pseries hash guest taking a real - mode
* NMI because not everything it touches is within the RMA limit .
*/
if ( ! IS_ENABLED ( CONFIG_PPC_BOOK3S_64 ) | |
! firmware_has_feature ( FW_FEATURE_LPAR ) | |
radix_enabled ( ) | | ( mfmsr ( ) & MSR_DR ) )
nmi_enter ( ) ;
2021-01-30 23:08:37 +10:00
}
static inline void interrupt_nmi_exit_prepare ( struct pt_regs * regs , struct interrupt_nmi_state * state )
{
2021-01-30 23:08:49 +10:00
if ( ! IS_ENABLED ( CONFIG_PPC_BOOK3S_64 ) | |
! firmware_has_feature ( FW_FEATURE_LPAR ) | |
radix_enabled ( ) | | ( mfmsr ( ) & MSR_DR ) )
nmi_exit ( ) ;
2021-04-06 12:55:08 +10:00
/*
* nmi does not call nap_adjust_return because nmi should not create
* new work to do ( must use irq_work for that ) .
*/
2021-06-18 01:51:08 +10:00
# ifdef CONFIG_PPC64
2021-06-30 17:46:15 +10:00
# ifdef CONFIG_PPC_BOOK3S
2021-06-18 01:51:08 +10:00
if ( arch_irq_disabled_regs ( regs ) ) {
unsigned long rst = search_kernel_restart_table ( regs - > nip ) ;
if ( rst )
regs_set_return_ip ( regs , rst ) ;
}
# endif
2021-03-16 20:41:59 +10:00
if ( nmi_disables_ftrace ( regs ) )
2021-01-30 23:08:49 +10:00
this_cpu_set_ftrace_enabled ( state - > ftrace_enabled ) ;
2021-01-30 23:08:50 +10:00
/* Check we didn't change the pending interrupt mask. */
WARN_ON_ONCE ( ( state - > irq_happened | PACA_IRQ_HARD_DIS ) ! = local_paca - > irq_happened ) ;
2021-06-30 17:46:17 +10:00
regs - > softe = state - > softe ;
2021-01-30 23:08:50 +10:00
local_paca - > irq_happened = state - > irq_happened ;
local_paca - > irq_soft_mask = state - > irq_soft_mask ;
# endif
2021-01-30 23:08:37 +10:00
}
2021-02-11 16:36:36 +10:00
/*
* Don ' t use noinstr here like x86 , but rather add NOKPROBE_SYMBOL to each
* function definition . The reason for this is the noinstr section is placed
* after the main text section , i . e . , very far away from the interrupt entry
* asm . That creates problems with fitting linker stubs when building large
* kernels .
*/
# define interrupt_handler __visible noinline notrace __no_kcsan __no_sanitize_address
2021-01-30 23:08:36 +10:00
/**
* DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function
* @ func : Function name of the entry point
* @ returns : Returns a value back to asm caller
*/
# define DECLARE_INTERRUPT_HANDLER_RAW(func) \
__visible long func ( struct pt_regs * regs )
/**
* DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function
* @ func : Function name of the entry point
* @ returns : Returns a value back to asm caller
*
* @ func is called from ASM entry code .
*
* This is a plain function which does no tracing , reconciling , etc .
* The macro is written so it acts as function definition . Append the
* body with a pair of curly brackets .
*
* raw interrupt handlers must not enable or disable interrupts , or
* schedule , tracing and instrumentation ( ftrace , lockdep , etc ) would
* not be advisable either , although may be possible in a pinch , the
* trace will look odd at least .
*
* A raw handler may call one of the other interrupt handler functions
* to be converted into that interrupt context without these restrictions .
*
* On PPC64 , _RAW handlers may return with fast_interrupt_return .
*
* Specific handlers may have additional restrictions .
*/
# define DEFINE_INTERRUPT_HANDLER_RAW(func) \
static __always_inline long ____ # # func ( struct pt_regs * regs ) ; \
\
2021-02-11 16:36:36 +10:00
interrupt_handler long func ( struct pt_regs * regs ) \
2021-01-30 23:08:36 +10:00
{ \
long ret ; \
\
powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).
This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.
Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.
Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.
This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-3-npiggin@gmail.com
2021-09-23 00:54:48 +10:00
__hard_RI_enable ( ) ; \
\
2021-01-30 23:08:36 +10:00
ret = ____ # # func ( regs ) ; \
\
return ret ; \
} \
2021-02-11 16:36:36 +10:00
NOKPROBE_SYMBOL ( func ) ; \
2021-01-30 23:08:36 +10:00
\
static __always_inline long ____ # # func ( struct pt_regs * regs )
/**
* DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function
* @ func : Function name of the entry point
*/
# define DECLARE_INTERRUPT_HANDLER(func) \
__visible void func ( struct pt_regs * regs )
/**
* DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function
* @ func : Function name of the entry point
*
* @ func is called from ASM entry code .
*
* The macro is written so it acts as function definition . Append the
* body with a pair of curly brackets .
*/
# define DEFINE_INTERRUPT_HANDLER(func) \
static __always_inline void ____ # # func ( struct pt_regs * regs ) ; \
\
2021-02-11 16:36:36 +10:00
interrupt_handler void func ( struct pt_regs * regs ) \
2021-01-30 23:08:36 +10:00
{ \
2021-01-30 23:08:37 +10:00
struct interrupt_state state ; \
\
interrupt_enter_prepare ( regs , & state ) ; \
\
2021-01-30 23:08:36 +10:00
____ # # func ( regs ) ; \
2021-01-30 23:08:37 +10:00
\
interrupt_exit_prepare ( regs , & state ) ; \
2021-01-30 23:08:36 +10:00
} \
2021-02-11 16:36:36 +10:00
NOKPROBE_SYMBOL ( func ) ; \
2021-01-30 23:08:36 +10:00
\
static __always_inline void ____ # # func ( struct pt_regs * regs )
/**
* DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function
* @ func : Function name of the entry point
* @ returns : Returns a value back to asm caller
*/
# define DECLARE_INTERRUPT_HANDLER_RET(func) \
__visible long func ( struct pt_regs * regs )
/**
* DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function
* @ func : Function name of the entry point
* @ returns : Returns a value back to asm caller
*
* @ func is called from ASM entry code .
*
* The macro is written so it acts as function definition . Append the
* body with a pair of curly brackets .
*/
# define DEFINE_INTERRUPT_HANDLER_RET(func) \
static __always_inline long ____ # # func ( struct pt_regs * regs ) ; \
\
2021-02-11 16:36:36 +10:00
interrupt_handler long func ( struct pt_regs * regs ) \
2021-01-30 23:08:36 +10:00
{ \
2021-01-30 23:08:37 +10:00
struct interrupt_state state ; \
2021-01-30 23:08:36 +10:00
long ret ; \
\
2021-01-30 23:08:37 +10:00
interrupt_enter_prepare ( regs , & state ) ; \
\
2021-01-30 23:08:36 +10:00
ret = ____ # # func ( regs ) ; \
\
2021-01-30 23:08:37 +10:00
interrupt_exit_prepare ( regs , & state ) ; \
\
2021-01-30 23:08:36 +10:00
return ret ; \
} \
2021-02-11 16:36:36 +10:00
NOKPROBE_SYMBOL ( func ) ; \
2021-01-30 23:08:36 +10:00
\
static __always_inline long ____ # # func ( struct pt_regs * regs )
/**
* DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function
* @ func : Function name of the entry point
*/
# define DECLARE_INTERRUPT_HANDLER_ASYNC(func) \
__visible void func ( struct pt_regs * regs )
/**
* DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function
* @ func : Function name of the entry point
*
* @ func is called from ASM entry code .
*
* The macro is written so it acts as function definition . Append the
* body with a pair of curly brackets .
*/
# define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \
static __always_inline void ____ # # func ( struct pt_regs * regs ) ; \
\
2021-02-11 16:36:36 +10:00
interrupt_handler void func ( struct pt_regs * regs ) \
2021-01-30 23:08:36 +10:00
{ \
2021-01-30 23:08:37 +10:00
struct interrupt_state state ; \
\
interrupt_async_enter_prepare ( regs , & state ) ; \
\
2021-01-30 23:08:36 +10:00
____ # # func ( regs ) ; \
2021-01-30 23:08:37 +10:00
\
interrupt_async_exit_prepare ( regs , & state ) ; \
2021-01-30 23:08:36 +10:00
} \
2021-02-11 16:36:36 +10:00
NOKPROBE_SYMBOL ( func ) ; \
2021-01-30 23:08:36 +10:00
\
static __always_inline void ____ # # func ( struct pt_regs * regs )
/**
* DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function
* @ func : Function name of the entry point
* @ returns : Returns a value back to asm caller
*/
# define DECLARE_INTERRUPT_HANDLER_NMI(func) \
__visible long func ( struct pt_regs * regs )
/**
* DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function
* @ func : Function name of the entry point
* @ returns : Returns a value back to asm caller
*
* @ func is called from ASM entry code .
*
* The macro is written so it acts as function definition . Append the
* body with a pair of curly brackets .
*/
# define DEFINE_INTERRUPT_HANDLER_NMI(func) \
static __always_inline long ____ # # func ( struct pt_regs * regs ) ; \
\
2021-02-11 16:36:36 +10:00
interrupt_handler long func ( struct pt_regs * regs ) \
2021-01-30 23:08:36 +10:00
{ \
2021-01-30 23:08:37 +10:00
struct interrupt_nmi_state state ; \
2021-01-30 23:08:36 +10:00
long ret ; \
\
2021-01-30 23:08:37 +10:00
interrupt_nmi_enter_prepare ( regs , & state ) ; \
\
2021-01-30 23:08:36 +10:00
ret = ____ # # func ( regs ) ; \
\
2021-01-30 23:08:37 +10:00
interrupt_nmi_exit_prepare ( regs , & state ) ; \
\
2021-01-30 23:08:36 +10:00
return ret ; \
} \
2021-02-11 16:36:36 +10:00
NOKPROBE_SYMBOL ( func ) ; \
2021-01-30 23:08:36 +10:00
\
static __always_inline long ____ # # func ( struct pt_regs * regs )
2021-01-30 23:08:38 +10:00
/* Interrupt handlers */
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI ( system_reset_exception ) ;
# ifdef CONFIG_PPC_BOOK3S_64
2021-10-05 00:56:42 +10:00
DECLARE_INTERRUPT_HANDLER_ASYNC ( machine_check_exception_async ) ;
2021-01-30 23:08:38 +10:00
# endif
2021-10-05 00:56:42 +10:00
DECLARE_INTERRUPT_HANDLER_NMI ( machine_check_exception ) ;
2021-01-30 23:08:38 +10:00
DECLARE_INTERRUPT_HANDLER ( SMIException ) ;
DECLARE_INTERRUPT_HANDLER ( handle_hmi_exception ) ;
DECLARE_INTERRUPT_HANDLER ( unknown_exception ) ;
DECLARE_INTERRUPT_HANDLER_ASYNC ( unknown_async_exception ) ;
2021-03-16 20:41:59 +10:00
DECLARE_INTERRUPT_HANDLER_NMI ( unknown_nmi_exception ) ;
2021-01-30 23:08:38 +10:00
DECLARE_INTERRUPT_HANDLER ( instruction_breakpoint_exception ) ;
DECLARE_INTERRUPT_HANDLER ( RunModeException ) ;
DECLARE_INTERRUPT_HANDLER ( single_step_exception ) ;
DECLARE_INTERRUPT_HANDLER ( program_check_exception ) ;
DECLARE_INTERRUPT_HANDLER ( emulation_assist_interrupt ) ;
DECLARE_INTERRUPT_HANDLER ( alignment_exception ) ;
DECLARE_INTERRUPT_HANDLER ( StackOverflow ) ;
DECLARE_INTERRUPT_HANDLER ( stack_overflow_exception ) ;
DECLARE_INTERRUPT_HANDLER ( kernel_fp_unavailable_exception ) ;
DECLARE_INTERRUPT_HANDLER ( altivec_unavailable_exception ) ;
DECLARE_INTERRUPT_HANDLER ( vsx_unavailable_exception ) ;
DECLARE_INTERRUPT_HANDLER ( facility_unavailable_exception ) ;
DECLARE_INTERRUPT_HANDLER ( fp_unavailable_tm ) ;
DECLARE_INTERRUPT_HANDLER ( altivec_unavailable_tm ) ;
DECLARE_INTERRUPT_HANDLER ( vsx_unavailable_tm ) ;
DECLARE_INTERRUPT_HANDLER_NMI ( performance_monitor_exception_nmi ) ;
DECLARE_INTERRUPT_HANDLER_ASYNC ( performance_monitor_exception_async ) ;
DECLARE_INTERRUPT_HANDLER_RAW ( performance_monitor_exception ) ;
DECLARE_INTERRUPT_HANDLER ( DebugException ) ;
DECLARE_INTERRUPT_HANDLER ( altivec_assist_exception ) ;
DECLARE_INTERRUPT_HANDLER ( CacheLockingException ) ;
DECLARE_INTERRUPT_HANDLER ( SPEFloatingPointException ) ;
DECLARE_INTERRUPT_HANDLER ( SPEFloatingPointRoundException ) ;
2021-03-16 20:41:59 +10:00
DECLARE_INTERRUPT_HANDLER_NMI ( WatchdogException ) ;
2021-01-30 23:08:38 +10:00
DECLARE_INTERRUPT_HANDLER ( kernel_bad_stack ) ;
/* slb.c */
DECLARE_INTERRUPT_HANDLER_RAW ( do_slb_fault ) ;
2021-12-02 00:41:39 +10:00
DECLARE_INTERRUPT_HANDLER ( do_bad_segment_interrupt ) ;
2021-01-30 23:08:38 +10:00
/* hash_utils.c */
DECLARE_INTERRUPT_HANDLER_RAW ( do_hash_fault ) ;
/* fault.c */
2021-03-16 20:42:03 +10:00
DECLARE_INTERRUPT_HANDLER ( do_page_fault ) ;
2021-01-30 23:08:38 +10:00
DECLARE_INTERRUPT_HANDLER ( do_bad_page_fault_segv ) ;
/* process.c */
DECLARE_INTERRUPT_HANDLER ( do_break ) ;
/* time.c */
DECLARE_INTERRUPT_HANDLER_ASYNC ( timer_interrupt ) ;
/* mce.c */
DECLARE_INTERRUPT_HANDLER_NMI ( machine_check_early ) ;
DECLARE_INTERRUPT_HANDLER_NMI ( hmi_exception_realmode ) ;
DECLARE_INTERRUPT_HANDLER_ASYNC ( TAUException ) ;
powerpc/interrupt: Fix OOPS by not calling do_IRQ() from timer_interrupt()
An interrupt handler shall not be called from another interrupt
handler otherwise this leads to problems like the following:
Kernel attempted to write user page (afd4fa84) - exploit attempt? (uid: 1000)
------------[ cut here ]------------
Bug: Write fault blocked by KUAP!
WARNING: CPU: 0 PID: 1617 at arch/powerpc/mm/fault.c:230 do_page_fault+0x484/0x720
Modules linked in:
CPU: 0 PID: 1617 Comm: sshd Tainted: G W 5.13.0-pmac-00010-g8393422eb77 #7
NIP: c001b77c LR: c001b77c CTR: 00000000
REGS: cb9e5bc0 TRAP: 0700 Tainted: G W (5.13.0-pmac-00010-g8393422eb77)
MSR: 00021032 <ME,IR,DR,RI> CR: 24942424 XER: 00000000
GPR00: c001b77c cb9e5c80 c1582c00 00000021 3ffffbff 085b0000 00000027 c8eb644c
GPR08: 00000023 00000000 00000000 00000000 24942424 0063f8c8 00000000 000186a0
GPR16: afd52dd4 afd52dd0 afd52dcc afd52dc8 0065a990 c07640c4 cb9e5e98 cb9e5e90
GPR24: 00000040 afd4fa96 00000040 02000000 c1fda6c0 afd4fa84 00000300 cb9e5cc0
NIP [c001b77c] do_page_fault+0x484/0x720
LR [c001b77c] do_page_fault+0x484/0x720
Call Trace:
[cb9e5c80] [c001b77c] do_page_fault+0x484/0x720 (unreliable)
[cb9e5cb0] [c000424c] DataAccess_virt+0xd4/0xe4
--- interrupt: 300 at __copy_tofrom_user+0x110/0x20c
NIP: c001f9b4 LR: c03250a0 CTR: 00000004
REGS: cb9e5cc0 TRAP: 0300 Tainted: G W (5.13.0-pmac-00010-g8393422eb77)
MSR: 00009032 <EE,ME,IR,DR,RI> CR: 48028468 XER: 20000000
DAR: afd4fa84 DSISR: 0a000000
GPR00: 20726f6f cb9e5d80 c1582c00 00000004 cb9e5e3a 00000016 afd4fa80 00000000
GPR08: 3835202d 72777872 2d78722d 00000004 28028464 0063f8c8 00000000 000186a0
GPR16: afd52dd4 afd52dd0 afd52dcc afd52dc8 0065a990 c07640c4 cb9e5e98 cb9e5e90
GPR24: 00000040 afd4fa96 00000040 cb9e5e0c 00000daa a0000000 cb9e5e98 afd4fa56
NIP [c001f9b4] __copy_tofrom_user+0x110/0x20c
LR [c03250a0] _copy_to_iter+0x144/0x990
--- interrupt: 300
[cb9e5d80] [c03e89c0] n_tty_read+0xa4/0x598 (unreliable)
[cb9e5df0] [c03e2a0c] tty_read+0xdc/0x2b4
[cb9e5e80] [c0156bf8] vfs_read+0x274/0x340
[cb9e5f00] [c01571ac] ksys_read+0x70/0x118
[cb9e5f30] [c0016048] ret_from_syscall+0x0/0x28
--- interrupt: c00 at 0xa7855c88
NIP: a7855c88 LR: a7855c5c CTR: 00000000
REGS: cb9e5f40 TRAP: 0c00 Tainted: G W (5.13.0-pmac-00010-g8393422eb77)
MSR: 0000d032 <EE,PR,ME,IR,DR,RI> CR: 2402446c XER: 00000000
GPR00: 00000003 afd4ec70 a72137d0 0000000b afd4ecac 00004000 0065a990 00000800
GPR08: 00000000 a7947930 00000000 00000004 c15831b0 0063f8c8 00000000 000186a0
GPR16: afd52dd4 afd52dd0 afd52dcc afd52dc8 0065a990 0065a9e0 00000001 0065fac0
GPR24: 00000000 00000089 00664050 00000000 00668e30 a720c8dc a7943ff4 0065f9b0
NIP [a7855c88] 0xa7855c88
LR [a7855c5c] 0xa7855c5c
--- interrupt: c00
Instruction dump:
3884aa88 38630178 48076861 807f0080 48042e45 2f830000 419e0148 3c80c079
3c60c076 38841be4 386301c0 4801f705 <0fe00000> 3860000b 4bfffe30 3c80c06b
---[ end trace fd69b91a8046c2e5 ]---
Here the problem is that by re-enterring an exception handler,
kuap_save_and_lock() is called a second time with this time KUAP
access locked, leading to regs->kuap being overwritten hence
KUAP not being unlocked at exception exit as expected.
Do not call do_IRQ() from timer_interrupt() directly. Instead,
redefine do_IRQ() as a standard function named __do_IRQ(), and
call it from both do_IRQ() and time_interrupt() handlers.
Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers")
Cc: stable@vger.kernel.org # v5.12+
Reported-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c17d234f4927d39a1d7100864a8e1145323d33a0.1628611927.git.christophe.leroy@csgroup.eu
2021-08-10 16:13:16 +00:00
/* irq.c */
DECLARE_INTERRUPT_HANDLER_ASYNC ( do_IRQ ) ;
powerpc/traps: Declare unrecoverable_exception() as __noreturn
unrecoverable_exception() is never expected to return, most callers
have an infiniteloop in case it returns.
Ensure it really never returns by terminating it with a BUG(), and
declare it __no_return.
It always GCC to really simplify functions calling it. In the exemple
below, it avoids the stack frame in the likely fast path and avoids
code duplication for the exit.
With this patch:
00000348 <interrupt_exit_kernel_prepare>:
348: 81 43 00 84 lwz r10,132(r3)
34c: 71 48 00 02 andi. r8,r10,2
350: 41 82 00 2c beq 37c <interrupt_exit_kernel_prepare+0x34>
354: 71 4a 40 00 andi. r10,r10,16384
358: 40 82 00 20 bne 378 <interrupt_exit_kernel_prepare+0x30>
35c: 80 62 00 70 lwz r3,112(r2)
360: 74 63 00 01 andis. r3,r3,1
364: 40 82 00 28 bne 38c <interrupt_exit_kernel_prepare+0x44>
368: 7d 40 00 a6 mfmsr r10
36c: 7c 11 13 a6 mtspr 81,r0
370: 7c 12 13 a6 mtspr 82,r0
374: 4e 80 00 20 blr
378: 48 00 00 00 b 378 <interrupt_exit_kernel_prepare+0x30>
37c: 94 21 ff f0 stwu r1,-16(r1)
380: 7c 08 02 a6 mflr r0
384: 90 01 00 14 stw r0,20(r1)
388: 48 00 00 01 bl 388 <interrupt_exit_kernel_prepare+0x40>
388: R_PPC_REL24 unrecoverable_exception
38c: 38 e2 00 70 addi r7,r2,112
390: 3d 00 00 01 lis r8,1
394: 7c c0 38 28 lwarx r6,0,r7
398: 7c c6 40 78 andc r6,r6,r8
39c: 7c c0 39 2d stwcx. r6,0,r7
3a0: 40 a2 ff f4 bne 394 <interrupt_exit_kernel_prepare+0x4c>
3a4: 38 60 00 01 li r3,1
3a8: 4b ff ff c0 b 368 <interrupt_exit_kernel_prepare+0x20>
Without this patch:
00000348 <interrupt_exit_kernel_prepare>:
348: 94 21 ff f0 stwu r1,-16(r1)
34c: 93 e1 00 0c stw r31,12(r1)
350: 7c 7f 1b 78 mr r31,r3
354: 81 23 00 84 lwz r9,132(r3)
358: 71 2a 00 02 andi. r10,r9,2
35c: 41 82 00 34 beq 390 <interrupt_exit_kernel_prepare+0x48>
360: 71 29 40 00 andi. r9,r9,16384
364: 40 82 00 28 bne 38c <interrupt_exit_kernel_prepare+0x44>
368: 80 62 00 70 lwz r3,112(r2)
36c: 74 63 00 01 andis. r3,r3,1
370: 40 82 00 3c bne 3ac <interrupt_exit_kernel_prepare+0x64>
374: 7d 20 00 a6 mfmsr r9
378: 7c 11 13 a6 mtspr 81,r0
37c: 7c 12 13 a6 mtspr 82,r0
380: 83 e1 00 0c lwz r31,12(r1)
384: 38 21 00 10 addi r1,r1,16
388: 4e 80 00 20 blr
38c: 48 00 00 00 b 38c <interrupt_exit_kernel_prepare+0x44>
390: 7c 08 02 a6 mflr r0
394: 90 01 00 14 stw r0,20(r1)
398: 48 00 00 01 bl 398 <interrupt_exit_kernel_prepare+0x50>
398: R_PPC_REL24 unrecoverable_exception
39c: 80 01 00 14 lwz r0,20(r1)
3a0: 81 3f 00 84 lwz r9,132(r31)
3a4: 7c 08 03 a6 mtlr r0
3a8: 4b ff ff b8 b 360 <interrupt_exit_kernel_prepare+0x18>
3ac: 39 02 00 70 addi r8,r2,112
3b0: 3d 40 00 01 lis r10,1
3b4: 7c e0 40 28 lwarx r7,0,r8
3b8: 7c e7 50 78 andc r7,r7,r10
3bc: 7c e0 41 2d stwcx. r7,0,r8
3c0: 40 a2 ff f4 bne 3b4 <interrupt_exit_kernel_prepare+0x6c>
3c4: 38 60 00 01 li r3,1
3c8: 4b ff ff ac b 374 <interrupt_exit_kernel_prepare+0x2c>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1e883e9d93fdb256853d1434c8ad77c257349b2d.1615552866.git.christophe.leroy@csgroup.eu
2021-03-12 12:50:10 +00:00
void __noreturn unrecoverable_exception ( struct pt_regs * regs ) ;
2021-03-09 12:09:26 +00:00
2021-01-30 23:08:38 +10:00
void replay_system_reset ( void ) ;
void replay_soft_interrupts ( void ) ;
2021-01-30 23:08:39 +10:00
static inline void interrupt_cond_local_irq_enable ( struct pt_regs * regs )
{
if ( ! arch_irq_disabled_regs ( regs ) )
local_irq_enable ( ) ;
}
2021-04-19 15:48:09 +00:00
# endif /* __ASSEMBLY__ */
2021-01-30 23:08:36 +10:00
# endif /* _ASM_POWERPC_INTERRUPT_H */