2018-03-27 23:01:44 +11:00
// SPDX-License-Identifier: GPL-2.0+
//
// Security related flags and so on.
//
// Copyright 2018, Michael Ellerman, IBM Corporation.
2018-10-22 11:54:12 -03:00
# include <linux/cpu.h>
2018-03-27 23:01:44 +11:00
# include <linux/kernel.h>
2018-03-27 23:01:48 +11:00
# include <linux/device.h>
2021-03-26 21:12:01 +11:00
# include <linux/memblock.h>
2020-04-02 23:49:29 +11:00
# include <linux/nospec.h>
# include <linux/prctl.h>
2018-03-27 23:01:49 +11:00
# include <linux/seq_buf.h>
2021-08-12 18:58:31 +05:30
# include <linux/debugfs.h>
2018-03-27 23:01:48 +11:00
2018-07-24 01:07:54 +10:00
# include <asm/asm-prototypes.h>
# include <asm/code-patching.h>
2018-03-27 23:01:44 +11:00
# include <asm/security_features.h>
2018-04-24 14:15:55 +10:00
# include <asm/setup.h>
2020-05-06 13:40:26 +10:00
# include <asm/inst.h>
2018-03-27 23:01:44 +11:00
2021-03-26 21:12:01 +11:00
# include "setup.h"
2018-03-27 23:01:44 +11:00
2019-10-21 16:23:09 +02:00
u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT ;
2018-03-27 23:01:48 +11:00
2020-06-09 17:06:04 +10:00
enum branch_cache_flush_type {
BRANCH_CACHE_FLUSH_NONE = 0x1 ,
BRANCH_CACHE_FLUSH_SW = 0x2 ,
BRANCH_CACHE_FLUSH_HW = 0x4 ,
2018-07-24 01:07:54 +10:00
} ;
2020-06-09 17:06:04 +10:00
static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE ;
2020-06-09 17:06:05 +10:00
static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE ;
2018-07-24 01:07:54 +10:00
2018-04-24 14:15:56 +10:00
bool barrier_nospec_enabled ;
2018-07-28 09:06:32 +10:00
static bool no_nospec ;
2018-12-12 16:03:04 +02:00
static bool btb_flush_enabled ;
2019-05-23 21:46:48 -05:00
# if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
2018-12-12 16:03:04 +02:00
static bool no_spectrev2 ;
# endif
2018-04-24 14:15:55 +10:00
static void enable_barrier_nospec ( bool enable )
{
barrier_nospec_enabled = enable ;
do_barrier_nospec_fixups ( enable ) ;
}
2018-04-24 14:15:57 +10:00
void setup_barrier_nospec ( void )
{
bool enable ;
/*
* It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well .
* But there ' s a good reason not to . The two flags we check below are
* both are enabled by default in the kernel , so if the hcall is not
* functional they will be enabled .
* On a system where the host firmware has been updated ( so the ori
* functions as a barrier ) , but on which the hypervisor ( KVM / Qemu ) has
* not been updated , we would like to enable the barrier . Dropping the
* check for SEC_FTR_SPEC_BAR_ORI31 achieves that . The only downside is
* we potentially enable the barrier on systems where the host firmware
* is not updated , but that ' s harmless as it ' s a no - op .
*/
enable = security_ftr_enabled ( SEC_FTR_FAVOUR_SECURITY ) & &
security_ftr_enabled ( SEC_FTR_BNDS_CHK_SPEC_BAR ) ;
2019-04-12 15:39:30 -05:00
if ( ! no_nospec & & ! cpu_mitigations_off ( ) )
2018-07-28 09:06:32 +10:00
enable_barrier_nospec ( enable ) ;
2018-04-24 14:15:57 +10:00
}
2018-07-28 09:06:32 +10:00
static int __init handle_nospectre_v1 ( char * p )
{
no_nospec = true ;
return 0 ;
}
early_param ( " nospectre_v1 " , handle_nospectre_v1 ) ;
2018-04-24 14:15:57 +10:00
# ifdef CONFIG_DEBUG_FS
static int barrier_nospec_set ( void * data , u64 val )
{
switch ( val ) {
case 0 :
case 1 :
break ;
default :
return - EINVAL ;
}
if ( ! ! val = = ! ! barrier_nospec_enabled )
return 0 ;
enable_barrier_nospec ( ! ! val ) ;
return 0 ;
}
static int barrier_nospec_get ( void * data , u64 * val )
{
* val = barrier_nospec_enabled ? 1 : 0 ;
return 0 ;
}
2018-11-29 13:35:18 +00:00
DEFINE_DEBUGFS_ATTRIBUTE ( fops_barrier_nospec , barrier_nospec_get ,
barrier_nospec_set , " %llu \n " ) ;
2018-04-24 14:15:57 +10:00
static __init int barrier_nospec_debugfs_init ( void )
{
2018-11-29 13:35:18 +00:00
debugfs_create_file_unsafe ( " barrier_nospec " , 0600 ,
2021-08-12 18:58:31 +05:30
arch_debugfs_dir , NULL ,
2018-11-29 13:35:18 +00:00
& fops_barrier_nospec ) ;
2018-04-24 14:15:57 +10:00
return 0 ;
}
device_initcall ( barrier_nospec_debugfs_init ) ;
2019-04-09 23:14:20 +10:00
static __init int security_feature_debugfs_init ( void )
{
2021-08-12 18:58:31 +05:30
debugfs_create_x64 ( " security_features " , 0400 , arch_debugfs_dir ,
2019-10-21 16:23:09 +02:00
& powerpc_security_features ) ;
2019-04-09 23:14:20 +10:00
return 0 ;
}
device_initcall ( security_feature_debugfs_init ) ;
2018-04-24 14:15:57 +10:00
# endif /* CONFIG_DEBUG_FS */
2019-05-23 21:46:48 -05:00
# if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
2018-12-12 16:03:04 +02:00
static int __init handle_nospectre_v2 ( char * p )
{
no_spectrev2 = true ;
return 0 ;
}
early_param ( " nospectre_v2 " , handle_nospectre_v2 ) ;
2019-05-23 21:46:48 -05:00
# endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
# ifdef CONFIG_PPC_FSL_BOOK3E
2018-12-12 16:03:04 +02:00
void setup_spectre_v2 ( void )
{
2019-04-12 15:39:30 -05:00
if ( no_spectrev2 | | cpu_mitigations_off ( ) )
2018-12-12 16:03:04 +02:00
do_btb_flush_fixups ( ) ;
else
btb_flush_enabled = true ;
}
# endif /* CONFIG_PPC_FSL_BOOK3E */
2018-07-28 09:06:36 +10:00
# ifdef CONFIG_PPC_BOOK3S_64
2018-03-27 23:01:48 +11:00
ssize_t cpu_show_meltdown ( struct device * dev , struct device_attribute * attr , char * buf )
{
2018-03-27 23:01:49 +11:00
bool thread_priv ;
thread_priv = security_ftr_enabled ( SEC_FTR_L1D_THREAD_PRIV ) ;
2019-05-02 18:09:07 -03:00
if ( rfi_flush ) {
2018-03-27 23:01:49 +11:00
struct seq_buf s ;
seq_buf_init ( & s , buf , PAGE_SIZE - 1 ) ;
2019-05-02 18:09:07 -03:00
seq_buf_printf ( & s , " Mitigation: RFI Flush " ) ;
2018-03-27 23:01:49 +11:00
if ( thread_priv )
2019-05-02 18:09:07 -03:00
seq_buf_printf ( & s , " , L1D private per thread " ) ;
2018-03-27 23:01:49 +11:00
seq_buf_printf ( & s , " \n " ) ;
return s . len ;
}
2019-05-02 18:09:07 -03:00
if ( thread_priv )
return sprintf ( buf , " Vulnerable: L1D private per thread \n " ) ;
2018-03-27 23:01:49 +11:00
if ( ! security_ftr_enabled ( SEC_FTR_L1D_FLUSH_HV ) & &
! security_ftr_enabled ( SEC_FTR_L1D_FLUSH_PR ) )
return sprintf ( buf , " Not affected \n " ) ;
2018-03-27 23:01:48 +11:00
return sprintf ( buf , " Vulnerable \n " ) ;
}
2019-10-29 12:07:59 -07:00
ssize_t cpu_show_l1tf ( struct device * dev , struct device_attribute * attr , char * buf )
{
return cpu_show_meltdown ( dev , attr , buf ) ;
}
2018-07-28 09:06:36 +10:00
# endif
2018-03-27 23:01:52 +11:00
ssize_t cpu_show_spectre_v1 ( struct device * dev , struct device_attribute * attr , char * buf )
{
2018-07-09 16:25:21 +10:00
struct seq_buf s ;
seq_buf_init ( & s , buf , PAGE_SIZE - 1 ) ;
2018-03-27 23:01:52 +11:00
2018-07-09 16:25:21 +10:00
if ( security_ftr_enabled ( SEC_FTR_BNDS_CHK_SPEC_BAR ) ) {
if ( barrier_nospec_enabled )
seq_buf_printf ( & s , " Mitigation: __user pointer sanitization " ) ;
else
seq_buf_printf ( & s , " Vulnerable " ) ;
2018-05-28 15:19:14 +02:00
2018-07-09 16:25:21 +10:00
if ( security_ftr_enabled ( SEC_FTR_SPEC_BAR_ORI31 ) )
seq_buf_printf ( & s , " , ori31 speculation barrier enabled " ) ;
seq_buf_printf ( & s , " \n " ) ;
} else
seq_buf_printf ( & s , " Not affected \n " ) ;
return s . len ;
2018-03-27 23:01:52 +11:00
}
2018-03-27 23:01:53 +11:00
ssize_t cpu_show_spectre_v2 ( struct device * dev , struct device_attribute * attr , char * buf )
{
struct seq_buf s ;
2018-07-09 16:25:21 +10:00
bool bcs , ccd ;
2018-03-27 23:01:53 +11:00
seq_buf_init ( & s , buf , PAGE_SIZE - 1 ) ;
bcs = security_ftr_enabled ( SEC_FTR_BCCTRL_SERIALISED ) ;
ccd = security_ftr_enabled ( SEC_FTR_COUNT_CACHE_DISABLED ) ;
2019-03-21 15:24:33 +11:00
if ( bcs | | ccd ) {
2018-03-27 23:01:53 +11:00
seq_buf_printf ( & s , " Mitigation: " ) ;
2019-03-21 15:24:33 +11:00
if ( bcs )
2018-03-27 23:01:53 +11:00
seq_buf_printf ( & s , " Indirect branch serialisation (kernel only) " ) ;
2018-07-24 01:07:54 +10:00
2019-03-21 15:24:33 +11:00
if ( bcs & & ccd )
2018-03-27 23:01:53 +11:00
seq_buf_printf ( & s , " , " ) ;
2019-03-21 15:24:33 +11:00
if ( ccd )
seq_buf_printf ( & s , " Indirect branch cache disabled " ) ;
2019-11-13 21:05:41 +11:00
2020-06-09 17:06:04 +10:00
} else if ( count_cache_flush_type ! = BRANCH_CACHE_FLUSH_NONE ) {
2019-03-21 15:24:33 +11:00
seq_buf_printf ( & s , " Mitigation: Software count cache flush " ) ;
2018-07-24 01:07:54 +10:00
2020-06-09 17:06:04 +10:00
if ( count_cache_flush_type = = BRANCH_CACHE_FLUSH_HW )
2019-03-21 15:24:33 +11:00
seq_buf_printf ( & s , " (hardware accelerated) " ) ;
2019-11-13 21:05:41 +11:00
2018-12-12 16:03:09 +02:00
} else if ( btb_flush_enabled ) {
seq_buf_printf ( & s , " Mitigation: Branch predictor state flush " ) ;
} else {
2018-03-27 23:01:53 +11:00
seq_buf_printf ( & s , " Vulnerable " ) ;
2018-12-12 16:03:09 +02:00
}
2018-03-27 23:01:53 +11:00
2020-06-09 17:06:09 +10:00
if ( bcs | | ccd | | count_cache_flush_type ! = BRANCH_CACHE_FLUSH_NONE ) {
if ( link_stack_flush_type ! = BRANCH_CACHE_FLUSH_NONE )
seq_buf_printf ( & s , " , Software link stack flush " ) ;
if ( link_stack_flush_type = = BRANCH_CACHE_FLUSH_HW )
seq_buf_printf ( & s , " (hardware accelerated) " ) ;
}
2018-03-27 23:01:53 +11:00
seq_buf_printf ( & s , " \n " ) ;
return s . len ;
}
2018-05-22 09:00:00 +10:00
2018-07-28 09:06:33 +10:00
# ifdef CONFIG_PPC_BOOK3S_64
2018-05-22 09:00:00 +10:00
/*
* Store - forwarding barrier support .
*/
static enum stf_barrier_type stf_enabled_flush_types ;
static bool no_stf_barrier ;
2021-04-08 11:39:51 +08:00
static bool stf_barrier ;
2018-05-22 09:00:00 +10:00
static int __init handle_no_stf_barrier ( char * p )
{
pr_info ( " stf-barrier: disabled on command line. " ) ;
no_stf_barrier = true ;
return 0 ;
}
early_param ( " no_stf_barrier " , handle_no_stf_barrier ) ;
2021-10-06 01:55:24 +05:30
enum stf_barrier_type stf_barrier_type_get ( void )
{
return stf_enabled_flush_types ;
}
2018-05-22 09:00:00 +10:00
/* This is the generic flag used by other architectures */
static int __init handle_ssbd ( char * p )
{
if ( ! p | | strncmp ( p , " auto " , 5 ) = = 0 | | strncmp ( p , " on " , 2 ) = = 0 ) {
/* Until firmware tells us, we have the barrier with auto */
return 0 ;
} else if ( strncmp ( p , " off " , 3 ) = = 0 ) {
handle_no_stf_barrier ( NULL ) ;
return 0 ;
} else
return 1 ;
return 0 ;
}
early_param ( " spec_store_bypass_disable " , handle_ssbd ) ;
/* This is the generic flag used by other architectures */
static int __init handle_no_ssbd ( char * p )
{
handle_no_stf_barrier ( NULL ) ;
return 0 ;
}
early_param ( " nospec_store_bypass_disable " , handle_no_ssbd ) ;
static void stf_barrier_enable ( bool enable )
{
if ( enable )
do_stf_barrier_fixups ( stf_enabled_flush_types ) ;
else
do_stf_barrier_fixups ( STF_BARRIER_NONE ) ;
stf_barrier = enable ;
}
void setup_stf_barrier ( void )
{
enum stf_barrier_type type ;
2021-05-03 23:02:41 +10:00
bool enable ;
2018-05-22 09:00:00 +10:00
/* Default to fallback in case fw-features are not available */
if ( cpu_has_feature ( CPU_FTR_ARCH_300 ) )
type = STF_BARRIER_EIEIO ;
else if ( cpu_has_feature ( CPU_FTR_ARCH_207S ) )
type = STF_BARRIER_SYNC_ORI ;
else if ( cpu_has_feature ( CPU_FTR_ARCH_206 ) )
type = STF_BARRIER_FALLBACK ;
else
type = STF_BARRIER_NONE ;
enable = security_ftr_enabled ( SEC_FTR_FAVOUR_SECURITY ) & &
2021-05-03 23:02:41 +10:00
security_ftr_enabled ( SEC_FTR_STF_BARRIER ) ;
2018-05-22 09:00:00 +10:00
if ( type = = STF_BARRIER_FALLBACK ) {
pr_info ( " stf-barrier: fallback barrier available \n " ) ;
} else if ( type = = STF_BARRIER_SYNC_ORI ) {
pr_info ( " stf-barrier: hwsync barrier available \n " ) ;
} else if ( type = = STF_BARRIER_EIEIO ) {
pr_info ( " stf-barrier: eieio barrier available \n " ) ;
}
stf_enabled_flush_types = type ;
2019-04-12 15:39:30 -05:00
if ( ! no_stf_barrier & & ! cpu_mitigations_off ( ) )
2018-05-22 09:00:00 +10:00
stf_barrier_enable ( enable ) ;
}
ssize_t cpu_show_spec_store_bypass ( struct device * dev , struct device_attribute * attr , char * buf )
{
if ( stf_barrier & & stf_enabled_flush_types ! = STF_BARRIER_NONE ) {
const char * type ;
switch ( stf_enabled_flush_types ) {
case STF_BARRIER_EIEIO :
type = " eieio " ;
break ;
case STF_BARRIER_SYNC_ORI :
type = " hwsync " ;
break ;
case STF_BARRIER_FALLBACK :
type = " fallback " ;
break ;
default :
type = " unknown " ;
}
return sprintf ( buf , " Mitigation: Kernel entry/exit barrier (%s) \ n " , type) ;
}
if ( ! security_ftr_enabled ( SEC_FTR_L1D_FLUSH_HV ) & &
! security_ftr_enabled ( SEC_FTR_L1D_FLUSH_PR ) )
return sprintf ( buf , " Not affected \n " ) ;
return sprintf ( buf , " Vulnerable \n " ) ;
}
2020-04-02 23:49:29 +11:00
static int ssb_prctl_get ( struct task_struct * task )
{
if ( stf_enabled_flush_types = = STF_BARRIER_NONE )
/*
* We don ' t have an explicit signal from firmware that we ' re
* vulnerable or not , we only have certain CPU revisions that
* are known to be vulnerable .
*
* We assume that if we ' re on another CPU , where the barrier is
* NONE , then we are not vulnerable .
*/
return PR_SPEC_NOT_AFFECTED ;
else
/*
* If we do have a barrier type then we are vulnerable . The
* barrier is not a global or per - process mitigation , so the
* only value we can report here is PR_SPEC_ENABLE , which
* appears as " vulnerable " in / proc .
*/
return PR_SPEC_ENABLE ;
return - EINVAL ;
}
int arch_prctl_spec_ctrl_get ( struct task_struct * task , unsigned long which )
{
switch ( which ) {
case PR_SPEC_STORE_BYPASS :
return ssb_prctl_get ( task ) ;
default :
return - ENODEV ;
}
}
2018-05-22 09:00:00 +10:00
# ifdef CONFIG_DEBUG_FS
static int stf_barrier_set ( void * data , u64 val )
{
bool enable ;
if ( val = = 1 )
enable = true ;
else if ( val = = 0 )
enable = false ;
else
return - EINVAL ;
/* Only do anything if we're changing state */
if ( enable ! = stf_barrier )
stf_barrier_enable ( enable ) ;
return 0 ;
}
static int stf_barrier_get ( void * data , u64 * val )
{
* val = stf_barrier ? 1 : 0 ;
return 0 ;
}
2018-11-29 13:35:18 +00:00
DEFINE_DEBUGFS_ATTRIBUTE ( fops_stf_barrier , stf_barrier_get , stf_barrier_set ,
" %llu \n " ) ;
2018-05-22 09:00:00 +10:00
static __init int stf_barrier_debugfs_init ( void )
{
2021-08-12 18:58:31 +05:30
debugfs_create_file_unsafe ( " stf_barrier " , 0600 , arch_debugfs_dir ,
2018-11-29 13:35:18 +00:00
NULL , & fops_stf_barrier ) ;
2018-05-22 09:00:00 +10:00
return 0 ;
}
device_initcall ( stf_barrier_debugfs_init ) ;
# endif /* CONFIG_DEBUG_FS */
2018-07-24 01:07:54 +10:00
2020-06-09 17:06:07 +10:00
static void update_branch_cache_flush ( void )
2019-11-13 21:05:41 +11:00
{
KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C
Almost all logic is moved to C, by introducing a new in_guest mode for
the P9 path that branches very early in the KVM interrupt handler to P9
exit code.
The main P9 entry and exit assembly is now only about 160 lines of low
level stack setup and register save/restore, plus a bad-interrupt
handler.
There are two motivations for this, the first is just make the code more
maintainable being in C. The second is to reduce the amount of code
running in a special KVM mode, "realmode". In quotes because with radix
it is no longer necessarily real-mode in the MMU, but it still has to be
treated specially because it may be in real-mode, and has various
important registers like PID, DEC, TB, etc set to guest. This is hostile
to the rest of Linux and can't use arbitrary kernel functionality or be
instrumented well.
This initial patch is a reasonably faithful conversion of the asm code,
but it does lack any loop to return quickly back into the guest without
switching out of realmode in the case of unimportant or easily handled
interrupts. As explained in previous changes, handling HV interrupts
very quickly in this low level realmode is not so important for P9
performance, and are important to avoid for security, observability,
debugability reasons.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-15-npiggin@gmail.com
2021-05-28 19:07:34 +10:00
u32 * site , __maybe_unused * site2 ;
2020-10-07 18:06:05 +10:00
2019-11-13 21:05:44 +11:00
# ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
2020-10-07 18:06:05 +10:00
site = & patch__call_kvm_flush_link_stack ;
KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C
Almost all logic is moved to C, by introducing a new in_guest mode for
the P9 path that branches very early in the KVM interrupt handler to P9
exit code.
The main P9 entry and exit assembly is now only about 160 lines of low
level stack setup and register save/restore, plus a bad-interrupt
handler.
There are two motivations for this, the first is just make the code more
maintainable being in C. The second is to reduce the amount of code
running in a special KVM mode, "realmode". In quotes because with radix
it is no longer necessarily real-mode in the MMU, but it still has to be
treated specially because it may be in real-mode, and has various
important registers like PID, DEC, TB, etc set to guest. This is hostile
to the rest of Linux and can't use arbitrary kernel functionality or be
instrumented well.
This initial patch is a reasonably faithful conversion of the asm code,
but it does lack any loop to return quickly back into the guest without
switching out of realmode in the case of unimportant or easily handled
interrupts. As explained in previous changes, handling HV interrupts
very quickly in this low level realmode is not so important for P9
performance, and are important to avoid for security, observability,
debugability reasons.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-15-npiggin@gmail.com
2021-05-28 19:07:34 +10:00
site2 = & patch__call_kvm_flush_link_stack_p9 ;
2020-06-09 17:06:07 +10:00
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
if ( link_stack_flush_type = = BRANCH_CACHE_FLUSH_NONE ) {
2021-05-20 10:23:05 +00:00
patch_instruction_site ( site , ppc_inst ( PPC_RAW_NOP ( ) ) ) ;
2021-06-17 16:51:38 +10:00
patch_instruction_site ( site2 , ppc_inst ( PPC_RAW_NOP ( ) ) ) ;
2020-06-09 17:06:07 +10:00
} else {
2020-06-09 17:06:09 +10:00
// Could use HW flush, but that could also flush count cache
2020-10-07 18:06:05 +10:00
patch_branch_site ( site , ( u64 ) & kvm_flush_link_stack , BRANCH_SET_LINK ) ;
KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C
Almost all logic is moved to C, by introducing a new in_guest mode for
the P9 path that branches very early in the KVM interrupt handler to P9
exit code.
The main P9 entry and exit assembly is now only about 160 lines of low
level stack setup and register save/restore, plus a bad-interrupt
handler.
There are two motivations for this, the first is just make the code more
maintainable being in C. The second is to reduce the amount of code
running in a special KVM mode, "realmode". In quotes because with radix
it is no longer necessarily real-mode in the MMU, but it still has to be
treated specially because it may be in real-mode, and has various
important registers like PID, DEC, TB, etc set to guest. This is hostile
to the rest of Linux and can't use arbitrary kernel functionality or be
instrumented well.
This initial patch is a reasonably faithful conversion of the asm code,
but it does lack any loop to return quickly back into the guest without
switching out of realmode in the case of unimportant or easily handled
interrupts. As explained in previous changes, handling HV interrupts
very quickly in this low level realmode is not so important for P9
performance, and are important to avoid for security, observability,
debugability reasons.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-15-npiggin@gmail.com
2021-05-28 19:07:34 +10:00
patch_branch_site ( site2 , ( u64 ) & kvm_flush_link_stack , BRANCH_SET_LINK ) ;
2018-07-24 01:07:54 +10:00
}
2019-11-13 21:05:44 +11:00
# endif
2020-10-07 18:06:05 +10:00
// Patch out the bcctr first, then nop the rest
site = & patch__call_flush_branch_caches3 ;
2021-05-20 10:23:05 +00:00
patch_instruction_site ( site , ppc_inst ( PPC_RAW_NOP ( ) ) ) ;
2020-10-07 18:06:05 +10:00
site = & patch__call_flush_branch_caches2 ;
2021-05-20 10:23:05 +00:00
patch_instruction_site ( site , ppc_inst ( PPC_RAW_NOP ( ) ) ) ;
2020-10-07 18:06:05 +10:00
site = & patch__call_flush_branch_caches1 ;
2021-05-20 10:23:05 +00:00
patch_instruction_site ( site , ppc_inst ( PPC_RAW_NOP ( ) ) ) ;
2020-10-07 18:06:05 +10:00
2020-06-09 17:06:07 +10:00
// This controls the branch from _switch to flush_branch_caches
if ( count_cache_flush_type = = BRANCH_CACHE_FLUSH_NONE & &
link_stack_flush_type = = BRANCH_CACHE_FLUSH_NONE ) {
2020-10-07 18:06:05 +10:00
// Nothing to be done
2020-06-09 17:06:09 +10:00
} else if ( count_cache_flush_type = = BRANCH_CACHE_FLUSH_HW & &
link_stack_flush_type = = BRANCH_CACHE_FLUSH_HW ) {
2020-10-07 18:06:05 +10:00
// Patch in the bcctr last
site = & patch__call_flush_branch_caches1 ;
patch_instruction_site ( site , ppc_inst ( 0x39207fff ) ) ; // li r9,0x7fff
site = & patch__call_flush_branch_caches2 ;
patch_instruction_site ( site , ppc_inst ( 0x7d2903a6 ) ) ; // mtctr r9
site = & patch__call_flush_branch_caches3 ;
patch_instruction_site ( site , ppc_inst ( PPC_INST_BCCTR_FLUSH ) ) ;
2020-06-09 17:06:07 +10:00
} else {
2020-10-07 18:06:05 +10:00
patch_branch_site ( site , ( u64 ) & flush_branch_caches , BRANCH_SET_LINK ) ;
2020-06-09 17:06:07 +10:00
// If we just need to flush the link stack, early return
if ( count_cache_flush_type = = BRANCH_CACHE_FLUSH_NONE ) {
patch_instruction_site ( & patch__flush_link_stack_return ,
2021-05-20 10:23:05 +00:00
ppc_inst ( PPC_RAW_BLR ( ) ) ) ;
2020-06-09 17:06:07 +10:00
// If we have flush instruction, early return
} else if ( count_cache_flush_type = = BRANCH_CACHE_FLUSH_HW ) {
patch_instruction_site ( & patch__flush_count_cache_return ,
2021-05-20 10:23:05 +00:00
ppc_inst ( PPC_RAW_BLR ( ) ) ) ;
2020-06-09 17:06:07 +10:00
}
}
}
static void toggle_branch_cache_flush ( bool enable )
{
if ( ! enable | | ! security_ftr_enabled ( SEC_FTR_FLUSH_COUNT_CACHE ) ) {
if ( count_cache_flush_type ! = BRANCH_CACHE_FLUSH_NONE )
count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE ;
2019-11-13 21:05:41 +11:00
2020-06-09 17:06:07 +10:00
pr_info ( " count-cache-flush: flush disabled. \n " ) ;
} else {
if ( security_ftr_enabled ( SEC_FTR_BCCTR_FLUSH_ASSIST ) ) {
count_cache_flush_type = BRANCH_CACHE_FLUSH_HW ;
pr_info ( " count-cache-flush: hardware flush enabled. \n " ) ;
} else {
count_cache_flush_type = BRANCH_CACHE_FLUSH_SW ;
pr_info ( " count-cache-flush: software flush enabled. \n " ) ;
}
2019-11-13 21:05:41 +11:00
}
2020-06-09 17:06:07 +10:00
if ( ! enable | | ! security_ftr_enabled ( SEC_FTR_FLUSH_LINK_STACK ) ) {
if ( link_stack_flush_type ! = BRANCH_CACHE_FLUSH_NONE )
link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE ;
pr_info ( " link-stack-flush: flush disabled. \n " ) ;
} else {
2020-06-09 17:06:09 +10:00
if ( security_ftr_enabled ( SEC_FTR_BCCTR_LINK_FLUSH_ASSIST ) ) {
link_stack_flush_type = BRANCH_CACHE_FLUSH_HW ;
pr_info ( " link-stack-flush: hardware flush enabled. \n " ) ;
} else {
link_stack_flush_type = BRANCH_CACHE_FLUSH_SW ;
pr_info ( " link-stack-flush: software flush enabled. \n " ) ;
}
2018-07-24 01:07:54 +10:00
}
2020-06-09 17:06:07 +10:00
update_branch_cache_flush ( ) ;
2018-07-24 01:07:54 +10:00
}
void setup_count_cache_flush ( void )
{
2019-05-23 21:46:48 -05:00
bool enable = true ;
if ( no_spectrev2 | | cpu_mitigations_off ( ) ) {
if ( security_ftr_enabled ( SEC_FTR_BCCTRL_SERIALISED ) | |
security_ftr_enabled ( SEC_FTR_COUNT_CACHE_DISABLED ) )
2019-11-13 21:05:41 +11:00
pr_warn ( " Spectre v2 mitigations not fully under software control, can't disable \n " ) ;
2019-05-23 21:46:48 -05:00
enable = false ;
}
2019-11-13 21:05:41 +11:00
/*
* There ' s no firmware feature flag / hypervisor bit to tell us we need to
* flush the link stack on context switch . So we set it here if we see
* either of the Spectre v2 mitigations that aim to protect userspace .
*/
if ( security_ftr_enabled ( SEC_FTR_COUNT_CACHE_DISABLED ) | |
security_ftr_enabled ( SEC_FTR_FLUSH_COUNT_CACHE ) )
security_ftr_set ( SEC_FTR_FLUSH_LINK_STACK ) ;
2020-06-09 17:06:04 +10:00
toggle_branch_cache_flush ( enable ) ;
2018-07-24 01:07:54 +10:00
}
2021-03-26 21:12:01 +11:00
static enum l1d_flush_type enabled_flush_types ;
static void * l1d_flush_fallback_area ;
static bool no_rfi_flush ;
static bool no_entry_flush ;
static bool no_uaccess_flush ;
bool rfi_flush ;
static bool entry_flush ;
static bool uaccess_flush ;
DEFINE_STATIC_KEY_FALSE ( uaccess_flush_key ) ;
EXPORT_SYMBOL ( uaccess_flush_key ) ;
static int __init handle_no_rfi_flush ( char * p )
{
pr_info ( " rfi-flush: disabled on command line. " ) ;
no_rfi_flush = true ;
return 0 ;
}
early_param ( " no_rfi_flush " , handle_no_rfi_flush ) ;
static int __init handle_no_entry_flush ( char * p )
{
pr_info ( " entry-flush: disabled on command line. " ) ;
no_entry_flush = true ;
return 0 ;
}
early_param ( " no_entry_flush " , handle_no_entry_flush ) ;
static int __init handle_no_uaccess_flush ( char * p )
{
pr_info ( " uaccess-flush: disabled on command line. " ) ;
no_uaccess_flush = true ;
return 0 ;
}
early_param ( " no_uaccess_flush " , handle_no_uaccess_flush ) ;
/*
* The RFI flush is not KPTI , but because users will see doco that says to use
* nopti we hijack that option here to also disable the RFI flush .
*/
static int __init handle_no_pti ( char * p )
{
pr_info ( " rfi-flush: disabling due to 'nopti' on command line. \n " ) ;
handle_no_rfi_flush ( NULL ) ;
return 0 ;
}
early_param ( " nopti " , handle_no_pti ) ;
static void do_nothing ( void * unused )
{
/*
* We don ' t need to do the flush explicitly , just enter + exit kernel is
* sufficient , the RFI exit handlers will do the right thing .
*/
}
void rfi_flush_enable ( bool enable )
{
if ( enable ) {
do_rfi_flush_fixups ( enabled_flush_types ) ;
on_each_cpu ( do_nothing , NULL , 1 ) ;
} else
do_rfi_flush_fixups ( L1D_FLUSH_NONE ) ;
rfi_flush = enable ;
}
static void entry_flush_enable ( bool enable )
{
if ( enable ) {
do_entry_flush_fixups ( enabled_flush_types ) ;
on_each_cpu ( do_nothing , NULL , 1 ) ;
} else {
do_entry_flush_fixups ( L1D_FLUSH_NONE ) ;
}
entry_flush = enable ;
}
static void uaccess_flush_enable ( bool enable )
{
if ( enable ) {
do_uaccess_flush_fixups ( enabled_flush_types ) ;
static_branch_enable ( & uaccess_flush_key ) ;
on_each_cpu ( do_nothing , NULL , 1 ) ;
} else {
static_branch_disable ( & uaccess_flush_key ) ;
do_uaccess_flush_fixups ( L1D_FLUSH_NONE ) ;
}
uaccess_flush = enable ;
}
static void __ref init_fallback_flush ( void )
{
u64 l1d_size , limit ;
int cpu ;
/* Only allocate the fallback flush area once (at boot time). */
if ( l1d_flush_fallback_area )
return ;
l1d_size = ppc64_caches . l1d . size ;
/*
* If there is no d - cache - size property in the device tree , l1d_size
* could be zero . That leads to the loop in the asm wrapping around to
* 2 ^ 64 - 1 , and then walking off the end of the fallback area and
* eventually causing a page fault which is fatal . Just default to
* something vaguely sane .
*/
if ( ! l1d_size )
l1d_size = ( 64 * 1024 ) ;
limit = min ( ppc64_bolted_size ( ) , ppc64_rma_size ) ;
/*
* Align to L1d size , and size it at 2 x L1d size , to catch possible
* hardware prefetch runoff . We don ' t have a recipe for load patterns to
* reliably avoid the prefetcher .
*/
l1d_flush_fallback_area = memblock_alloc_try_nid ( l1d_size * 2 ,
l1d_size , MEMBLOCK_LOW_LIMIT ,
limit , NUMA_NO_NODE ) ;
if ( ! l1d_flush_fallback_area )
panic ( " %s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa \n " ,
__func__ , l1d_size * 2 , l1d_size , & limit ) ;
for_each_possible_cpu ( cpu ) {
struct paca_struct * paca = paca_ptrs [ cpu ] ;
paca - > rfi_flush_fallback_area = l1d_flush_fallback_area ;
paca - > l1d_flush_size = l1d_size ;
}
}
void setup_rfi_flush ( enum l1d_flush_type types , bool enable )
{
if ( types & L1D_FLUSH_FALLBACK ) {
pr_info ( " rfi-flush: fallback displacement flush available \n " ) ;
init_fallback_flush ( ) ;
}
if ( types & L1D_FLUSH_ORI )
pr_info ( " rfi-flush: ori type flush available \n " ) ;
if ( types & L1D_FLUSH_MTTRIG )
pr_info ( " rfi-flush: mttrig type flush available \n " ) ;
enabled_flush_types = types ;
if ( ! cpu_mitigations_off ( ) & & ! no_rfi_flush )
rfi_flush_enable ( enable ) ;
}
void setup_entry_flush ( bool enable )
{
if ( cpu_mitigations_off ( ) )
return ;
if ( ! no_entry_flush )
entry_flush_enable ( enable ) ;
}
void setup_uaccess_flush ( bool enable )
{
if ( cpu_mitigations_off ( ) )
return ;
if ( ! no_uaccess_flush )
uaccess_flush_enable ( enable ) ;
}
2018-07-24 01:07:54 +10:00
# ifdef CONFIG_DEBUG_FS
static int count_cache_flush_set ( void * data , u64 val )
{
bool enable ;
if ( val = = 1 )
enable = true ;
else if ( val = = 0 )
enable = false ;
else
return - EINVAL ;
2020-06-09 17:06:04 +10:00
toggle_branch_cache_flush ( enable ) ;
2018-07-24 01:07:54 +10:00
return 0 ;
}
static int count_cache_flush_get ( void * data , u64 * val )
{
2020-06-09 17:06:04 +10:00
if ( count_cache_flush_type = = BRANCH_CACHE_FLUSH_NONE )
2018-07-24 01:07:54 +10:00
* val = 0 ;
else
* val = 1 ;
return 0 ;
}
2018-11-29 13:35:18 +00:00
DEFINE_DEBUGFS_ATTRIBUTE ( fops_count_cache_flush , count_cache_flush_get ,
count_cache_flush_set , " %llu \n " ) ;
2018-07-24 01:07:54 +10:00
static __init int count_cache_flush_debugfs_init ( void )
{
2018-11-29 13:35:18 +00:00
debugfs_create_file_unsafe ( " count_cache_flush " , 0600 ,
2021-08-12 18:58:31 +05:30
arch_debugfs_dir , NULL ,
2018-11-29 13:35:18 +00:00
& fops_count_cache_flush ) ;
2018-07-24 01:07:54 +10:00
return 0 ;
}
device_initcall ( count_cache_flush_debugfs_init ) ;
2021-03-26 21:12:01 +11:00
static int rfi_flush_set ( void * data , u64 val )
{
bool enable ;
if ( val = = 1 )
enable = true ;
else if ( val = = 0 )
enable = false ;
else
return - EINVAL ;
/* Only do anything if we're changing state */
if ( enable ! = rfi_flush )
rfi_flush_enable ( enable ) ;
return 0 ;
}
static int rfi_flush_get ( void * data , u64 * val )
{
* val = rfi_flush ? 1 : 0 ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_rfi_flush , rfi_flush_get , rfi_flush_set , " %llu \n " ) ;
static int entry_flush_set ( void * data , u64 val )
{
bool enable ;
if ( val = = 1 )
enable = true ;
else if ( val = = 0 )
enable = false ;
else
return - EINVAL ;
/* Only do anything if we're changing state */
if ( enable ! = entry_flush )
entry_flush_enable ( enable ) ;
return 0 ;
}
static int entry_flush_get ( void * data , u64 * val )
{
* val = entry_flush ? 1 : 0 ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_entry_flush , entry_flush_get , entry_flush_set , " %llu \n " ) ;
static int uaccess_flush_set ( void * data , u64 val )
{
bool enable ;
if ( val = = 1 )
enable = true ;
else if ( val = = 0 )
enable = false ;
else
return - EINVAL ;
/* Only do anything if we're changing state */
if ( enable ! = uaccess_flush )
uaccess_flush_enable ( enable ) ;
return 0 ;
}
static int uaccess_flush_get ( void * data , u64 * val )
{
* val = uaccess_flush ? 1 : 0 ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_uaccess_flush , uaccess_flush_get , uaccess_flush_set , " %llu \n " ) ;
static __init int rfi_flush_debugfs_init ( void )
{
2021-08-12 18:58:31 +05:30
debugfs_create_file ( " rfi_flush " , 0600 , arch_debugfs_dir , NULL , & fops_rfi_flush ) ;
debugfs_create_file ( " entry_flush " , 0600 , arch_debugfs_dir , NULL , & fops_entry_flush ) ;
debugfs_create_file ( " uaccess_flush " , 0600 , arch_debugfs_dir , NULL , & fops_uaccess_flush ) ;
2021-03-26 21:12:01 +11:00
return 0 ;
}
device_initcall ( rfi_flush_debugfs_init ) ;
2018-07-24 01:07:54 +10:00
# endif /* CONFIG_DEBUG_FS */
2018-07-28 09:06:33 +10:00
# endif /* CONFIG_PPC_BOOK3S_64 */