2020-09-07 15:15:39 +02:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* AMD Memory Encryption Support
*
* Copyright ( C ) 2019 SUSE
*
* Author : Joerg Roedel < jroedel @ suse . de >
*/
2021-06-22 16:48:25 +02:00
# define pr_fmt(fmt) "SEV: " fmt
2020-09-07 15:15:47 +02:00
2020-09-08 14:38:16 +02:00
# include <linux/sched/debug.h> /* For show_regs() */
2020-09-07 15:15:42 +02:00
# include <linux/percpu-defs.h>
# include <linux/mem_encrypt.h>
2020-09-08 14:38:16 +02:00
# include <linux/printk.h>
2020-09-07 15:15:42 +02:00
# include <linux/mm_types.h>
# include <linux/set_memory.h>
# include <linux/memblock.h>
# include <linux/kernel.h>
2020-09-07 15:15:39 +02:00
# include <linux/mm.h>
2020-09-07 15:15:43 +02:00
# include <asm/cpu_entry_area.h>
2020-09-07 15:16:07 +02:00
# include <asm/stacktrace.h>
2021-04-27 06:16:34 -05:00
# include <asm/sev.h>
2020-09-07 15:15:39 +02:00
# include <asm/insn-eval.h>
# include <asm/fpu/internal.h>
# include <asm/processor.h>
2020-09-07 15:15:47 +02:00
# include <asm/realmode.h>
# include <asm/traps.h>
2020-09-07 15:15:39 +02:00
# include <asm/svm.h>
2020-09-07 15:16:10 +02:00
# include <asm/smp.h>
# include <asm/cpu.h>
2020-09-07 15:15:39 +02:00
2020-09-07 15:15:53 +02:00
# define DR7_RESET_VALUE 0x400
2020-09-08 14:38:16 +02:00
/* For early boot hypervisor communication in SEV-ES enabled guests */
static struct ghcb boot_ghcb_page __bss_decrypted __aligned ( PAGE_SIZE ) ;
/*
* Needs to be in the . data section because we need it NULL before bss is
* cleared
*/
static struct ghcb __initdata * boot_ghcb ;
2020-09-07 15:15:42 +02:00
/* #VC handler runtime per-CPU data */
struct sev_es_runtime_data {
struct ghcb ghcb_page ;
2020-09-07 15:15:43 +02:00
/* Physical storage for the per-CPU IST stack of the #VC handler */
char ist_stack [ EXCEPTION_STKSZ ] __aligned ( PAGE_SIZE ) ;
/*
* Physical storage for the per - CPU fall - back stack of the # VC handler .
* The fall - back stack is used when it is not safe to switch back to the
* interrupted stack in the # VC entry code .
*/
char fallback_stack [ EXCEPTION_STKSZ ] __aligned ( PAGE_SIZE ) ;
2020-09-07 15:15:47 +02:00
/*
* Reserve one page per CPU as backup storage for the unencrypted GHCB .
* It is needed when an NMI happens while the # VC handler uses the real
* GHCB , and the NMI handler itself is causing another # VC exception . In
* that case the GHCB content of the first handler needs to be backed up
* and restored .
*/
struct ghcb backup_ghcb ;
/*
* Mark the per - cpu GHCBs as in - use to detect nested # VC exceptions .
* There is no need for it to be atomic , because nothing is written to
* the GHCB between the read and the write of ghcb_active . So it is safe
* to use it when a nested # VC exception happens before the write .
*
* This is necessary for example in the # VC - > NMI - > # VC case when the NMI
* happens while the first # VC handler uses the GHCB . When the NMI code
* raises a second # VC handler it might overwrite the contents of the
* GHCB written by the first handler . To avoid this the content of the
* GHCB is saved and restored when the GHCB is detected to be in use
* already .
*/
bool ghcb_active ;
bool backup_ghcb_active ;
2020-09-07 15:15:53 +02:00
/*
* Cached DR7 value - write it on DR7 writes and return it on reads .
* That value will never make it to the real hardware DR7 as debugging
* is currently unsupported in SEV - ES guests .
*/
unsigned long dr7 ;
2020-09-07 15:15:47 +02:00
} ;
struct ghcb_state {
struct ghcb * ghcb ;
2020-09-07 15:15:42 +02:00
} ;
static DEFINE_PER_CPU ( struct sev_es_runtime_data * , runtime_data ) ;
2020-09-07 15:15:44 +02:00
DEFINE_STATIC_KEY_FALSE ( sev_es_enable_key ) ;
2020-09-07 15:15:42 +02:00
2020-09-07 15:15:47 +02:00
/* Needed in vc_early_forward_exception */
void do_early_exception ( struct pt_regs * regs , int trapnr ) ;
2020-09-07 15:15:43 +02:00
static void __init setup_vc_stacks ( int cpu )
{
struct sev_es_runtime_data * data ;
struct cpu_entry_area * cea ;
unsigned long vaddr ;
phys_addr_t pa ;
data = per_cpu ( runtime_data , cpu ) ;
cea = get_cpu_entry_area ( cpu ) ;
/* Map #VC IST stack */
vaddr = CEA_ESTACK_BOT ( & cea - > estacks , VC ) ;
pa = __pa ( data - > ist_stack ) ;
cea_set_pte ( ( void * ) vaddr , pa , PAGE_KERNEL ) ;
/* Map VC fall-back stack */
vaddr = CEA_ESTACK_BOT ( & cea - > estacks , VC2 ) ;
pa = __pa ( data - > fallback_stack ) ;
cea_set_pte ( ( void * ) vaddr , pa , PAGE_KERNEL ) ;
}
2021-03-03 15:17:13 +01:00
static __always_inline bool on_vc_stack ( struct pt_regs * regs )
2020-09-07 15:15:44 +02:00
{
2021-03-03 15:17:13 +01:00
unsigned long sp = regs - > sp ;
/* User-mode RSP is not trusted */
if ( user_mode ( regs ) )
return false ;
/* SYSCALL gap still has user-mode RSP */
if ( ip_within_syscall_gap ( regs ) )
return false ;
2020-09-07 15:15:44 +02:00
return ( ( sp > = __this_cpu_ist_bottom_va ( VC ) ) & & ( sp < __this_cpu_ist_top_va ( VC ) ) ) ;
}
/*
2021-03-03 15:17:14 +01:00
* This function handles the case when an NMI is raised in the # VC
* exception handler entry code , before the # VC handler has switched off
* its IST stack . In this case , the IST entry for # VC must be adjusted ,
* so that any nested # VC exception will not overwrite the stack
* contents of the interrupted # VC handler .
2020-09-07 15:15:44 +02:00
*
* The IST entry is adjusted unconditionally so that it can be also be
2021-03-03 15:17:14 +01:00
* unconditionally adjusted back in __sev_es_ist_exit ( ) . Otherwise a
* nested sev_es_ist_exit ( ) call may adjust back the IST entry too
* early .
*
* The __sev_es_ist_enter ( ) and __sev_es_ist_exit ( ) functions always run
* on the NMI IST stack , as they are only called from NMI handling code
* right now .
2020-09-07 15:15:44 +02:00
*/
void noinstr __sev_es_ist_enter ( struct pt_regs * regs )
{
unsigned long old_ist , new_ist ;
/* Read old IST entry */
2021-03-03 15:17:14 +01:00
new_ist = old_ist = __this_cpu_read ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] ) ;
2020-09-07 15:15:44 +02:00
2021-03-03 15:17:14 +01:00
/*
* If NMI happened while on the # VC IST stack , set the new IST
* value below regs - > sp , so that the interrupted stack frame is
* not overwritten by subsequent # VC exceptions .
*/
2021-03-03 15:17:13 +01:00
if ( on_vc_stack ( regs ) )
2021-03-03 15:17:14 +01:00
new_ist = regs - > sp ;
2020-09-07 15:15:44 +02:00
2021-03-03 15:17:14 +01:00
/*
* Reserve additional 8 bytes and store old IST value so this
* adjustment can be unrolled in __sev_es_ist_exit ( ) .
*/
new_ist - = sizeof ( old_ist ) ;
2020-09-07 15:15:44 +02:00
* ( unsigned long * ) new_ist = old_ist ;
/* Set new IST entry */
this_cpu_write ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] , new_ist ) ;
}
void noinstr __sev_es_ist_exit ( void )
{
unsigned long ist ;
/* Read IST entry */
ist = __this_cpu_read ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] ) ;
if ( WARN_ON ( ist = = __this_cpu_ist_top_va ( VC ) ) )
return ;
/* Read back old IST entry and write it to the TSS */
this_cpu_write ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] , * ( unsigned long * ) ist ) ;
}
2021-06-18 13:54:08 +02:00
/*
* Nothing shall interrupt this code path while holding the per - CPU
* GHCB . The backup GHCB is only for NMIs interrupting this path .
*
* Callers must disable local interrupts around it .
*/
static noinstr struct ghcb * __sev_get_ghcb ( struct ghcb_state * state )
2020-09-07 15:15:47 +02:00
{
struct sev_es_runtime_data * data ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
WARN_ON ( ! irqs_disabled ( ) ) ;
2020-09-07 15:15:47 +02:00
data = this_cpu_read ( runtime_data ) ;
ghcb = & data - > ghcb_page ;
if ( unlikely ( data - > ghcb_active ) ) {
/* GHCB is already in use - save its contents */
2021-05-19 15:52:44 +02:00
if ( unlikely ( data - > backup_ghcb_active ) ) {
/*
* Backup - GHCB is also already in use . There is no way
* to continue here so just kill the machine . To make
* panic ( ) work , mark GHCBs inactive so that messages
* can be printed out .
*/
data - > ghcb_active = false ;
data - > backup_ghcb_active = false ;
2021-06-18 13:54:08 +02:00
instrumentation_begin ( ) ;
2021-05-19 15:52:44 +02:00
panic ( " Unable to handle #VC exception! GHCB and Backup GHCB are already in use " ) ;
2021-06-18 13:54:08 +02:00
instrumentation_end ( ) ;
2021-05-19 15:52:44 +02:00
}
2020-09-07 15:15:47 +02:00
/* Mark backup_ghcb active before writing to it */
data - > backup_ghcb_active = true ;
state - > ghcb = & data - > backup_ghcb ;
/* Backup GHCB content */
* state - > ghcb = * ghcb ;
} else {
state - > ghcb = NULL ;
data - > ghcb_active = true ;
}
return ghcb ;
}
2020-09-07 15:16:07 +02:00
/* Needed in vc_early_forward_exception */
void do_early_exception ( struct pt_regs * regs , int trapnr ) ;
2020-09-07 15:15:39 +02:00
static inline u64 sev_es_rd_ghcb_msr ( void )
{
return __rdmsr ( MSR_AMD64_SEV_ES_GHCB ) ;
}
2021-01-06 15:36:21 +01:00
static __always_inline void sev_es_wr_ghcb_msr ( u64 val )
2020-09-07 15:15:39 +02:00
{
u32 low , high ;
low = ( u32 ) ( val ) ;
high = ( u32 ) ( val > > 32 ) ;
native_wrmsr ( MSR_AMD64_SEV_ES_GHCB , low , high ) ;
}
static int vc_fetch_insn_kernel ( struct es_em_ctxt * ctxt ,
unsigned char * buffer )
{
return copy_from_kernel_nofault ( buffer , ( unsigned char * ) ctxt - > regs - > ip , MAX_INSN_SIZE ) ;
}
2021-02-23 11:28:02 +01:00
static enum es_result __vc_decode_user_insn ( struct es_em_ctxt * ctxt )
2020-09-07 15:15:39 +02:00
{
char buffer [ MAX_INSN_SIZE ] ;
2021-06-14 15:53:26 +02:00
int insn_bytes ;
2020-09-07 15:15:39 +02:00
2021-06-14 15:53:26 +02:00
insn_bytes = insn_fetch_from_user_inatomic ( ctxt - > regs , buffer ) ;
2021-06-14 15:53:27 +02:00
if ( insn_bytes = = 0 ) {
/* Nothing could be copied */
2021-02-23 11:28:02 +01:00
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = X86_PF_INSTR | X86_PF_USER ;
ctxt - > fi . cr2 = ctxt - > regs - > ip ;
return ES_EXCEPTION ;
2021-06-14 15:53:27 +02:00
} else if ( insn_bytes = = - EINVAL ) {
/* Effective RIP could not be calculated */
ctxt - > fi . vector = X86_TRAP_GP ;
ctxt - > fi . error_code = 0 ;
ctxt - > fi . cr2 = 0 ;
return ES_EXCEPTION ;
2020-09-07 15:15:39 +02:00
}
2021-06-14 15:53:26 +02:00
if ( ! insn_decode_from_regs ( & ctxt - > insn , ctxt - > regs , buffer , insn_bytes ) )
2021-02-23 11:28:02 +01:00
return ES_DECODE_FAILED ;
2020-11-16 18:21:23 +01:00
if ( ctxt - > insn . immediate . got )
return ES_OK ;
else
return ES_DECODE_FAILED ;
2020-09-07 15:15:39 +02:00
}
2021-02-23 11:28:02 +01:00
static enum es_result __vc_decode_kern_insn ( struct es_em_ctxt * ctxt )
{
char buffer [ MAX_INSN_SIZE ] ;
2020-11-16 18:21:23 +01:00
int res , ret ;
2021-02-23 11:28:02 +01:00
res = vc_fetch_insn_kernel ( ctxt , buffer ) ;
if ( res ) {
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = X86_PF_INSTR ;
ctxt - > fi . cr2 = ctxt - > regs - > ip ;
return ES_EXCEPTION ;
2020-09-07 15:15:39 +02:00
}
2020-11-16 18:21:23 +01:00
ret = insn_decode ( & ctxt - > insn , buffer , MAX_INSN_SIZE , INSN_MODE_64 ) ;
if ( ret < 0 )
return ES_DECODE_FAILED ;
else
return ES_OK ;
2021-02-23 11:28:02 +01:00
}
2020-09-07 15:15:39 +02:00
2021-02-23 11:28:02 +01:00
static enum es_result vc_decode_insn ( struct es_em_ctxt * ctxt )
{
if ( user_mode ( ctxt - > regs ) )
return __vc_decode_user_insn ( ctxt ) ;
else
return __vc_decode_kern_insn ( ctxt ) ;
2020-09-07 15:15:39 +02:00
}
static enum es_result vc_write_mem ( struct es_em_ctxt * ctxt ,
char * dst , char * buf , size_t size )
{
unsigned long error_code = X86_PF_PROT | X86_PF_WRITE ;
char __user * target = ( char __user * ) dst ;
u64 d8 ;
u32 d4 ;
u16 d2 ;
u8 d1 ;
2021-05-19 15:52:46 +02:00
/*
* This function uses __put_user ( ) independent of whether kernel or user
* memory is accessed . This works fine because __put_user ( ) does no
* sanity checks of the pointer being accessed . All that it does is
* to report when the access failed .
*
* Also , this function runs in atomic context , so __put_user ( ) is not
* allowed to sleep . The page - fault handler detects that it is running
* in atomic context and will not try to take mmap_sem and handle the
* fault , so additional pagefault_enable ( ) / disable ( ) calls are not
* needed .
*
* The access can ' t be done via copy_to_user ( ) here because
* vc_write_mem ( ) must not use string instructions to access unsafe
* memory . The reason is that MOVS is emulated by the # VC handler by
* splitting the move up into a read and a write and taking a nested # VC
* exception on whatever of them is the MMIO access . Using string
* instructions here would cause infinite nesting .
*/
2020-09-07 15:15:39 +02:00
switch ( size ) {
case 1 :
memcpy ( & d1 , buf , 1 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d1 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
case 2 :
memcpy ( & d2 , buf , 2 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d2 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
case 4 :
memcpy ( & d4 , buf , 4 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d4 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
case 8 :
memcpy ( & d8 , buf , 8 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d8 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
default :
WARN_ONCE ( 1 , " %s: Invalid size: %zu \n " , __func__ , size ) ;
return ES_UNSUPPORTED ;
}
return ES_OK ;
fault :
if ( user_mode ( ctxt - > regs ) )
error_code | = X86_PF_USER ;
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = error_code ;
ctxt - > fi . cr2 = ( unsigned long ) dst ;
return ES_EXCEPTION ;
}
static enum es_result vc_read_mem ( struct es_em_ctxt * ctxt ,
char * src , char * buf , size_t size )
{
unsigned long error_code = X86_PF_PROT ;
char __user * s = ( char __user * ) src ;
u64 d8 ;
u32 d4 ;
u16 d2 ;
u8 d1 ;
2021-05-19 15:52:46 +02:00
/*
* This function uses __get_user ( ) independent of whether kernel or user
* memory is accessed . This works fine because __get_user ( ) does no
* sanity checks of the pointer being accessed . All that it does is
* to report when the access failed .
*
* Also , this function runs in atomic context , so __get_user ( ) is not
* allowed to sleep . The page - fault handler detects that it is running
* in atomic context and will not try to take mmap_sem and handle the
* fault , so additional pagefault_enable ( ) / disable ( ) calls are not
* needed .
*
* The access can ' t be done via copy_from_user ( ) here because
* vc_read_mem ( ) must not use string instructions to access unsafe
* memory . The reason is that MOVS is emulated by the # VC handler by
* splitting the move up into a read and a write and taking a nested # VC
* exception on whatever of them is the MMIO access . Using string
* instructions here would cause infinite nesting .
*/
2020-09-07 15:15:39 +02:00
switch ( size ) {
case 1 :
2021-05-19 15:52:46 +02:00
if ( __get_user ( d1 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d1 , 1 ) ;
break ;
case 2 :
2021-05-19 15:52:46 +02:00
if ( __get_user ( d2 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d2 , 2 ) ;
break ;
case 4 :
2021-05-19 15:52:46 +02:00
if ( __get_user ( d4 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d4 , 4 ) ;
break ;
case 8 :
2021-05-19 15:52:46 +02:00
if ( __get_user ( d8 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d8 , 8 ) ;
break ;
default :
WARN_ONCE ( 1 , " %s: Invalid size: %zu \n " , __func__ , size ) ;
return ES_UNSUPPORTED ;
}
return ES_OK ;
fault :
if ( user_mode ( ctxt - > regs ) )
error_code | = X86_PF_USER ;
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = error_code ;
ctxt - > fi . cr2 = ( unsigned long ) src ;
return ES_EXCEPTION ;
}
2020-10-28 17:46:59 +01:00
static enum es_result vc_slow_virt_to_phys ( struct ghcb * ghcb , struct es_em_ctxt * ctxt ,
unsigned long vaddr , phys_addr_t * paddr )
2020-09-07 15:15:50 +02:00
{
unsigned long va = ( unsigned long ) vaddr ;
unsigned int level ;
phys_addr_t pa ;
pgd_t * pgd ;
pte_t * pte ;
pgd = __va ( read_cr3_pa ( ) ) ;
pgd = & pgd [ pgd_index ( va ) ] ;
pte = lookup_address_in_pgd ( pgd , va , & level ) ;
if ( ! pte ) {
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . cr2 = vaddr ;
ctxt - > fi . error_code = 0 ;
if ( user_mode ( ctxt - > regs ) )
ctxt - > fi . error_code | = X86_PF_USER ;
2020-10-28 17:46:59 +01:00
return ES_EXCEPTION ;
2020-09-07 15:15:50 +02:00
}
2020-10-28 17:46:59 +01:00
if ( WARN_ON_ONCE ( pte_val ( * pte ) & _PAGE_ENC ) )
/* Emulated MMIO to/from encrypted memory not supported */
return ES_UNSUPPORTED ;
2020-09-07 15:15:50 +02:00
pa = ( phys_addr_t ) pte_pfn ( * pte ) < < PAGE_SHIFT ;
pa | = va & ~ page_level_mask ( level ) ;
* paddr = pa ;
2020-10-28 17:46:59 +01:00
return ES_OK ;
2020-09-07 15:15:50 +02:00
}
2020-09-07 15:15:39 +02:00
/* Include code shared with pre-decompression boot stage */
2021-04-27 06:16:34 -05:00
# include "sev-shared.c"
2020-09-08 14:38:16 +02:00
2021-06-18 13:54:08 +02:00
static noinstr void __sev_put_ghcb ( struct ghcb_state * state )
2021-05-17 12:42:32 -05:00
{
struct sev_es_runtime_data * data ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
WARN_ON ( ! irqs_disabled ( ) ) ;
2021-05-17 12:42:32 -05:00
data = this_cpu_read ( runtime_data ) ;
ghcb = & data - > ghcb_page ;
if ( state - > ghcb ) {
/* Restore GHCB from Backup */
* ghcb = * state - > ghcb ;
data - > backup_ghcb_active = false ;
state - > ghcb = NULL ;
} else {
2021-05-17 12:42:33 -05:00
/*
* Invalidate the GHCB so a VMGEXIT instruction issued
* from userspace won ' t appear to be valid .
*/
vc_ghcb_invalidate ( ghcb ) ;
2021-05-17 12:42:32 -05:00
data - > ghcb_active = false ;
}
}
2020-09-07 15:16:11 +02:00
void noinstr __sev_es_nmi_complete ( void )
{
struct ghcb_state state ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:16:11 +02:00
vc_ghcb_invalidate ( ghcb ) ;
ghcb_set_sw_exit_code ( ghcb , SVM_VMGEXIT_NMI_COMPLETE ) ;
ghcb_set_sw_exit_info_1 ( ghcb , 0 ) ;
ghcb_set_sw_exit_info_2 ( ghcb , 0 ) ;
sev_es_wr_ghcb_msr ( __pa_nodebug ( ghcb ) ) ;
VMGEXIT ( ) ;
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:16:11 +02:00
}
2020-09-07 15:16:07 +02:00
static u64 get_jump_table_addr ( void )
{
struct ghcb_state state ;
unsigned long flags ;
struct ghcb * ghcb ;
u64 ret = 0 ;
local_irq_save ( flags ) ;
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:16:07 +02:00
vc_ghcb_invalidate ( ghcb ) ;
ghcb_set_sw_exit_code ( ghcb , SVM_VMGEXIT_AP_JUMP_TABLE ) ;
ghcb_set_sw_exit_info_1 ( ghcb , SVM_VMGEXIT_GET_AP_JUMP_TABLE ) ;
ghcb_set_sw_exit_info_2 ( ghcb , 0 ) ;
sev_es_wr_ghcb_msr ( __pa ( ghcb ) ) ;
VMGEXIT ( ) ;
if ( ghcb_sw_exit_info_1_is_valid ( ghcb ) & &
ghcb_sw_exit_info_2_is_valid ( ghcb ) )
ret = ghcb - > save . sw_exit_info_2 ;
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:16:07 +02:00
local_irq_restore ( flags ) ;
return ret ;
}
int sev_es_setup_ap_jump_table ( struct real_mode_header * rmh )
{
u16 startup_cs , startup_ip ;
phys_addr_t jump_table_pa ;
u64 jump_table_addr ;
u16 __iomem * jump_table ;
jump_table_addr = get_jump_table_addr ( ) ;
/* On UP guests there is no jump table so this is not a failure */
if ( ! jump_table_addr )
return 0 ;
/* Check if AP Jump Table is page-aligned */
if ( jump_table_addr & ~ PAGE_MASK )
return - EINVAL ;
jump_table_pa = jump_table_addr & PAGE_MASK ;
startup_cs = ( u16 ) ( rmh - > trampoline_start > > 4 ) ;
startup_ip = ( u16 ) ( rmh - > sev_es_trampoline_start -
rmh - > trampoline_start ) ;
jump_table = ioremap_encrypted ( jump_table_pa , PAGE_SIZE ) ;
if ( ! jump_table )
return - EIO ;
writew ( startup_ip , & jump_table [ 0 ] ) ;
writew ( startup_cs , & jump_table [ 1 ] ) ;
iounmap ( jump_table ) ;
return 0 ;
}
2020-09-07 15:16:12 +02:00
/*
* This is needed by the OVMF UEFI firmware which will use whatever it finds in
* the GHCB MSR as its GHCB to talk to the hypervisor . So make sure the per - cpu
* runtime GHCBs used by the kernel are also mapped in the EFI page - table .
*/
int __init sev_es_efi_map_ghcbs ( pgd_t * pgd )
{
struct sev_es_runtime_data * data ;
unsigned long address , pflags ;
int cpu ;
u64 pfn ;
if ( ! sev_es_active ( ) )
return 0 ;
pflags = _PAGE_NX | _PAGE_RW ;
for_each_possible_cpu ( cpu ) {
data = per_cpu ( runtime_data , cpu ) ;
address = __pa ( & data - > ghcb_page ) ;
pfn = address > > PAGE_SHIFT ;
if ( kernel_map_pages_in_pgd ( pgd , pfn , address , 1 , pflags ) )
return 1 ;
}
return 0 ;
}
2020-09-07 15:15:52 +02:00
static enum es_result vc_handle_msr ( struct ghcb * ghcb , struct es_em_ctxt * ctxt )
{
struct pt_regs * regs = ctxt - > regs ;
enum es_result ret ;
u64 exit_info_1 ;
/* Is it a WRMSR? */
exit_info_1 = ( ctxt - > insn . opcode . bytes [ 1 ] = = 0x30 ) ? 1 : 0 ;
ghcb_set_rcx ( ghcb , regs - > cx ) ;
if ( exit_info_1 ) {
ghcb_set_rax ( ghcb , regs - > ax ) ;
ghcb_set_rdx ( ghcb , regs - > dx ) ;
}
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_MSR , exit_info_1 , 0 ) ;
if ( ( ret = = ES_OK ) & & ( ! exit_info_1 ) ) {
regs - > ax = ghcb - > save . rax ;
regs - > dx = ghcb - > save . rdx ;
}
return ret ;
}
2020-09-08 14:38:16 +02:00
/*
* This function runs on the first # VC exception after the kernel
* switched to virtual addresses .
*/
static bool __init sev_es_setup_ghcb ( void )
{
/* First make sure the hypervisor talks a supported protocol. */
if ( ! sev_es_negotiate_protocol ( ) )
return false ;
/*
* Clear the boot_ghcb . The first exception comes in before the bss
* section is cleared .
*/
memset ( & boot_ghcb_page , 0 , PAGE_SIZE ) ;
/* Alright - Make the boot-ghcb public */
boot_ghcb = & boot_ghcb_page ;
return true ;
}
2020-09-07 15:16:10 +02:00
# ifdef CONFIG_HOTPLUG_CPU
static void sev_es_ap_hlt_loop ( void )
{
struct ghcb_state state ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:16:10 +02:00
while ( true ) {
vc_ghcb_invalidate ( ghcb ) ;
ghcb_set_sw_exit_code ( ghcb , SVM_VMGEXIT_AP_HLT_LOOP ) ;
ghcb_set_sw_exit_info_1 ( ghcb , 0 ) ;
ghcb_set_sw_exit_info_2 ( ghcb , 0 ) ;
sev_es_wr_ghcb_msr ( __pa ( ghcb ) ) ;
VMGEXIT ( ) ;
/* Wakeup signal? */
if ( ghcb_sw_exit_info_2_is_valid ( ghcb ) & &
ghcb - > save . sw_exit_info_2 )
break ;
}
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:16:10 +02:00
}
/*
* Play_dead handler when running under SEV - ES . This is needed because
* the hypervisor can ' t deliver an SIPI request to restart the AP .
* Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
* hypervisor wakes it up again .
*/
static void sev_es_play_dead ( void )
{
play_dead_common ( ) ;
/* IRQs now disabled */
sev_es_ap_hlt_loop ( ) ;
/*
* If we get here , the VCPU was woken up again . Jump to CPU
* startup code to get it back online .
*/
start_cpu0 ( ) ;
}
# else /* CONFIG_HOTPLUG_CPU */
# define sev_es_play_dead native_play_dead
# endif /* CONFIG_HOTPLUG_CPU */
# ifdef CONFIG_SMP
static void __init sev_es_setup_play_dead ( void )
{
smp_ops . play_dead = sev_es_play_dead ;
}
# else
static inline void sev_es_setup_play_dead ( void ) { }
# endif
2020-09-07 15:15:42 +02:00
static void __init alloc_runtime_data ( int cpu )
{
struct sev_es_runtime_data * data ;
data = memblock_alloc ( sizeof ( * data ) , PAGE_SIZE ) ;
if ( ! data )
panic ( " Can't allocate SEV-ES runtime data " ) ;
per_cpu ( runtime_data , cpu ) = data ;
}
static void __init init_ghcb ( int cpu )
{
struct sev_es_runtime_data * data ;
int err ;
data = per_cpu ( runtime_data , cpu ) ;
err = early_set_memory_decrypted ( ( unsigned long ) & data - > ghcb_page ,
sizeof ( data - > ghcb_page ) ) ;
if ( err )
panic ( " Can't map GHCBs unencrypted " ) ;
memset ( & data - > ghcb_page , 0 , sizeof ( data - > ghcb_page ) ) ;
2020-09-07 15:15:47 +02:00
data - > ghcb_active = false ;
data - > backup_ghcb_active = false ;
2020-09-07 15:15:42 +02:00
}
void __init sev_es_init_vc_handling ( void )
{
int cpu ;
BUILD_BUG_ON ( offsetof ( struct sev_es_runtime_data , ghcb_page ) % PAGE_SIZE ) ;
if ( ! sev_es_active ( ) )
return ;
2020-09-07 15:16:13 +02:00
if ( ! sev_es_check_cpu_features ( ) )
panic ( " SEV-ES CPU Features missing " ) ;
2020-09-07 15:15:44 +02:00
/* Enable SEV-ES special handling */
static_branch_enable ( & sev_es_enable_key ) ;
2020-09-07 15:15:42 +02:00
/* Initialize per-cpu GHCB pages */
for_each_possible_cpu ( cpu ) {
alloc_runtime_data ( cpu ) ;
init_ghcb ( cpu ) ;
2020-09-07 15:15:43 +02:00
setup_vc_stacks ( cpu ) ;
2020-09-07 15:15:42 +02:00
}
2020-09-07 15:15:47 +02:00
2020-09-07 15:16:10 +02:00
sev_es_setup_play_dead ( ) ;
2020-09-07 15:15:47 +02:00
/* Secondary CPUs use the runtime #VC handler */
2021-06-18 13:54:09 +02:00
initial_vc_handler = ( unsigned long ) kernel_exc_vmm_communication ;
2020-09-07 15:15:42 +02:00
}
2020-09-08 14:38:16 +02:00
static void __init vc_early_forward_exception ( struct es_em_ctxt * ctxt )
{
int trapnr = ctxt - > fi . vector ;
if ( trapnr = = X86_TRAP_PF )
native_write_cr2 ( ctxt - > fi . cr2 ) ;
ctxt - > regs - > orig_ax = ctxt - > fi . error_code ;
do_early_exception ( ctxt - > regs , trapnr ) ;
}
2020-09-07 15:15:50 +02:00
static long * vc_insn_get_reg ( struct es_em_ctxt * ctxt )
{
long * reg_array ;
int offset ;
reg_array = ( long * ) ctxt - > regs ;
offset = insn_get_modrm_reg_off ( & ctxt - > insn , ctxt - > regs ) ;
if ( offset < 0 )
return NULL ;
offset / = sizeof ( long ) ;
return reg_array + offset ;
}
2020-09-07 15:15:53 +02:00
static long * vc_insn_get_rm ( struct es_em_ctxt * ctxt )
{
long * reg_array ;
int offset ;
reg_array = ( long * ) ctxt - > regs ;
offset = insn_get_modrm_rm_off ( & ctxt - > insn , ctxt - > regs ) ;
if ( offset < 0 )
return NULL ;
offset / = sizeof ( long ) ;
return reg_array + offset ;
}
2020-09-07 15:15:50 +02:00
static enum es_result vc_do_mmio ( struct ghcb * ghcb , struct es_em_ctxt * ctxt ,
unsigned int bytes , bool read )
{
u64 exit_code , exit_info_1 , exit_info_2 ;
unsigned long ghcb_pa = __pa ( ghcb ) ;
2020-10-28 17:46:59 +01:00
enum es_result res ;
2020-09-07 15:15:50 +02:00
phys_addr_t paddr ;
void __user * ref ;
ref = insn_get_addr_ref ( & ctxt - > insn , ctxt - > regs ) ;
if ( ref = = ( void __user * ) - 1L )
return ES_UNSUPPORTED ;
exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE ;
2020-10-28 17:46:59 +01:00
res = vc_slow_virt_to_phys ( ghcb , ctxt , ( unsigned long ) ref , & paddr ) ;
if ( res ! = ES_OK ) {
if ( res = = ES_EXCEPTION & & ! read )
2020-09-07 15:15:50 +02:00
ctxt - > fi . error_code | = X86_PF_WRITE ;
2020-10-28 17:46:59 +01:00
return res ;
2020-09-07 15:15:50 +02:00
}
exit_info_1 = paddr ;
/* Can never be greater than 8 */
exit_info_2 = bytes ;
2020-09-25 08:38:26 -05:00
ghcb_set_sw_scratch ( ghcb , ghcb_pa + offsetof ( struct ghcb , shared_buffer ) ) ;
2020-09-07 15:15:50 +02:00
return sev_es_ghcb_hv_call ( ghcb , ctxt , exit_code , exit_info_1 , exit_info_2 ) ;
}
static enum es_result vc_handle_mmio_twobyte_ops ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
struct insn * insn = & ctxt - > insn ;
unsigned int bytes = 0 ;
enum es_result ret ;
int sign_byte ;
long * reg_data ;
switch ( insn - > opcode . bytes [ 1 ] ) {
/* MMIO Read w/ zero-extension */
case 0xb6 :
bytes = 1 ;
fallthrough ;
case 0xb7 :
if ( ! bytes )
bytes = 2 ;
ret = vc_do_mmio ( ghcb , ctxt , bytes , true ) ;
if ( ret )
break ;
/* Zero extend based on operand size */
reg_data = vc_insn_get_reg ( ctxt ) ;
if ( ! reg_data )
return ES_DECODE_FAILED ;
memset ( reg_data , 0 , insn - > opnd_bytes ) ;
memcpy ( reg_data , ghcb - > shared_buffer , bytes ) ;
break ;
/* MMIO Read w/ sign-extension */
case 0xbe :
bytes = 1 ;
fallthrough ;
case 0xbf :
if ( ! bytes )
bytes = 2 ;
ret = vc_do_mmio ( ghcb , ctxt , bytes , true ) ;
if ( ret )
break ;
/* Sign extend based on operand size */
reg_data = vc_insn_get_reg ( ctxt ) ;
if ( ! reg_data )
return ES_DECODE_FAILED ;
if ( bytes = = 1 ) {
u8 * val = ( u8 * ) ghcb - > shared_buffer ;
sign_byte = ( * val & 0x80 ) ? 0xff : 0x00 ;
} else {
u16 * val = ( u16 * ) ghcb - > shared_buffer ;
sign_byte = ( * val & 0x8000 ) ? 0xff : 0x00 ;
}
memset ( reg_data , sign_byte , insn - > opnd_bytes ) ;
memcpy ( reg_data , ghcb - > shared_buffer , bytes ) ;
break ;
default :
ret = ES_UNSUPPORTED ;
}
return ret ;
}
2020-09-07 15:15:51 +02:00
/*
* The MOVS instruction has two memory operands , which raises the
* problem that it is not known whether the access to the source or the
* destination caused the # VC exception ( and hence whether an MMIO read
* or write operation needs to be emulated ) .
*
* Instead of playing games with walking page - tables and trying to guess
* whether the source or destination is an MMIO range , split the move
* into two operations , a read and a write with only one memory operand .
* This will cause a nested # VC exception on the MMIO address which can
* then be handled .
*
* This implementation has the benefit that it also supports MOVS where
* source _and_ destination are MMIO regions .
*
* It will slow MOVS on MMIO down a lot , but in SEV - ES guests it is a
* rare operation . If it turns out to be a performance problem the split
* operations can be moved to memcpy_fromio ( ) and memcpy_toio ( ) .
*/
static enum es_result vc_handle_mmio_movs ( struct es_em_ctxt * ctxt ,
unsigned int bytes )
{
unsigned long ds_base , es_base ;
unsigned char * src , * dst ;
unsigned char buffer [ 8 ] ;
enum es_result ret ;
bool rep ;
int off ;
ds_base = insn_get_seg_base ( ctxt - > regs , INAT_SEG_REG_DS ) ;
es_base = insn_get_seg_base ( ctxt - > regs , INAT_SEG_REG_ES ) ;
if ( ds_base = = - 1L | | es_base = = - 1L ) {
ctxt - > fi . vector = X86_TRAP_GP ;
ctxt - > fi . error_code = 0 ;
return ES_EXCEPTION ;
}
src = ds_base + ( unsigned char * ) ctxt - > regs - > si ;
dst = es_base + ( unsigned char * ) ctxt - > regs - > di ;
ret = vc_read_mem ( ctxt , src , buffer , bytes ) ;
if ( ret ! = ES_OK )
return ret ;
ret = vc_write_mem ( ctxt , dst , buffer , bytes ) ;
if ( ret ! = ES_OK )
return ret ;
if ( ctxt - > regs - > flags & X86_EFLAGS_DF )
off = - bytes ;
else
off = bytes ;
ctxt - > regs - > si + = off ;
ctxt - > regs - > di + = off ;
rep = insn_has_rep_prefix ( & ctxt - > insn ) ;
if ( rep )
ctxt - > regs - > cx - = 1 ;
if ( ! rep | | ctxt - > regs - > cx = = 0 )
return ES_OK ;
else
return ES_RETRY ;
}
2020-09-07 15:15:50 +02:00
static enum es_result vc_handle_mmio ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
struct insn * insn = & ctxt - > insn ;
unsigned int bytes = 0 ;
enum es_result ret ;
long * reg_data ;
switch ( insn - > opcode . bytes [ 0 ] ) {
/* MMIO Write */
case 0x88 :
bytes = 1 ;
fallthrough ;
case 0x89 :
if ( ! bytes )
bytes = insn - > opnd_bytes ;
reg_data = vc_insn_get_reg ( ctxt ) ;
if ( ! reg_data )
return ES_DECODE_FAILED ;
memcpy ( ghcb - > shared_buffer , reg_data , bytes ) ;
ret = vc_do_mmio ( ghcb , ctxt , bytes , false ) ;
break ;
case 0xc6 :
bytes = 1 ;
fallthrough ;
case 0xc7 :
if ( ! bytes )
bytes = insn - > opnd_bytes ;
memcpy ( ghcb - > shared_buffer , insn - > immediate1 . bytes , bytes ) ;
ret = vc_do_mmio ( ghcb , ctxt , bytes , false ) ;
break ;
/* MMIO Read */
case 0x8a :
bytes = 1 ;
fallthrough ;
case 0x8b :
if ( ! bytes )
bytes = insn - > opnd_bytes ;
ret = vc_do_mmio ( ghcb , ctxt , bytes , true ) ;
if ( ret )
break ;
reg_data = vc_insn_get_reg ( ctxt ) ;
if ( ! reg_data )
return ES_DECODE_FAILED ;
/* Zero-extend for 32-bit operation */
if ( bytes = = 4 )
* reg_data = 0 ;
memcpy ( reg_data , ghcb - > shared_buffer , bytes ) ;
break ;
2020-09-07 15:15:51 +02:00
/* MOVS instruction */
case 0xa4 :
bytes = 1 ;
fallthrough ;
case 0xa5 :
if ( ! bytes )
bytes = insn - > opnd_bytes ;
ret = vc_handle_mmio_movs ( ctxt , bytes ) ;
break ;
2020-09-07 15:15:50 +02:00
/* Two-Byte Opcodes */
case 0x0f :
ret = vc_handle_mmio_twobyte_ops ( ghcb , ctxt ) ;
break ;
default :
ret = ES_UNSUPPORTED ;
}
return ret ;
}
2020-09-07 15:15:53 +02:00
static enum es_result vc_handle_dr7_write ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
struct sev_es_runtime_data * data = this_cpu_read ( runtime_data ) ;
long val , * reg = vc_insn_get_rm ( ctxt ) ;
enum es_result ret ;
if ( ! reg )
return ES_DECODE_FAILED ;
val = * reg ;
/* Upper 32 bits must be written as zeroes */
if ( val > > 32 ) {
ctxt - > fi . vector = X86_TRAP_GP ;
ctxt - > fi . error_code = 0 ;
return ES_EXCEPTION ;
}
/* Clear out other reserved bits and set bit 10 */
val = ( val & 0xffff23ffL ) | BIT ( 10 ) ;
/* Early non-zero writes to DR7 are not supported */
if ( ! data & & ( val & ~ DR7_RESET_VALUE ) )
return ES_UNSUPPORTED ;
/* Using a value of 0 for ExitInfo1 means RAX holds the value */
ghcb_set_rax ( ghcb , val ) ;
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_WRITE_DR7 , 0 , 0 ) ;
if ( ret ! = ES_OK )
return ret ;
if ( data )
data - > dr7 = val ;
return ES_OK ;
}
static enum es_result vc_handle_dr7_read ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
struct sev_es_runtime_data * data = this_cpu_read ( runtime_data ) ;
long * reg = vc_insn_get_rm ( ctxt ) ;
if ( ! reg )
return ES_DECODE_FAILED ;
if ( data )
* reg = data - > dr7 ;
else
* reg = DR7_RESET_VALUE ;
return ES_OK ;
}
2020-09-07 15:15:54 +02:00
static enum es_result vc_handle_wbinvd ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
return sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_WBINVD , 0 , 0 ) ;
}
2020-09-07 15:15:56 +02:00
static enum es_result vc_handle_rdpmc ( struct ghcb * ghcb , struct es_em_ctxt * ctxt )
{
enum es_result ret ;
ghcb_set_rcx ( ghcb , ctxt - > regs - > cx ) ;
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_RDPMC , 0 , 0 ) ;
if ( ret ! = ES_OK )
return ret ;
if ( ! ( ghcb_rax_is_valid ( ghcb ) & & ghcb_rdx_is_valid ( ghcb ) ) )
return ES_VMM_ERROR ;
ctxt - > regs - > ax = ghcb - > save . rax ;
ctxt - > regs - > dx = ghcb - > save . rdx ;
return ES_OK ;
}
2020-09-07 15:15:58 +02:00
static enum es_result vc_handle_monitor ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
/*
* Treat it as a NOP and do not leak a physical address to the
* hypervisor .
*/
return ES_OK ;
}
2020-09-07 15:15:59 +02:00
static enum es_result vc_handle_mwait ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
/* Treat the same as MONITOR/MONITORX */
return ES_OK ;
}
2020-09-07 15:16:00 +02:00
static enum es_result vc_handle_vmmcall ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
enum es_result ret ;
ghcb_set_rax ( ghcb , ctxt - > regs - > ax ) ;
ghcb_set_cpl ( ghcb , user_mode ( ctxt - > regs ) ? 3 : 0 ) ;
2020-09-07 15:16:03 +02:00
if ( x86_platform . hyper . sev_es_hcall_prepare )
x86_platform . hyper . sev_es_hcall_prepare ( ghcb , ctxt - > regs ) ;
2020-09-07 15:16:00 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_VMMCALL , 0 , 0 ) ;
if ( ret ! = ES_OK )
return ret ;
if ( ! ghcb_rax_is_valid ( ghcb ) )
return ES_VMM_ERROR ;
ctxt - > regs - > ax = ghcb - > save . rax ;
2020-09-07 15:16:03 +02:00
/*
* Call sev_es_hcall_finish ( ) after regs - > ax is already set .
* This allows the hypervisor handler to overwrite it again if
* necessary .
*/
if ( x86_platform . hyper . sev_es_hcall_finish & &
! x86_platform . hyper . sev_es_hcall_finish ( ghcb , ctxt - > regs ) )
return ES_VMM_ERROR ;
2020-09-07 15:16:00 +02:00
return ES_OK ;
}
2020-09-07 15:16:01 +02:00
static enum es_result vc_handle_trap_ac ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
/*
* Calling ecx_alignment_check ( ) directly does not work , because it
* enables IRQs and the GHCB is active . Forward the exception and call
* it later from vc_forward_exception ( ) .
*/
ctxt - > fi . vector = X86_TRAP_AC ;
ctxt - > fi . error_code = 0 ;
return ES_EXCEPTION ;
}
2020-09-08 14:38:16 +02:00
static enum es_result vc_handle_exitcode ( struct es_em_ctxt * ctxt ,
struct ghcb * ghcb ,
unsigned long exit_code )
{
enum es_result result ;
switch ( exit_code ) {
2020-09-07 15:15:53 +02:00
case SVM_EXIT_READ_DR7 :
result = vc_handle_dr7_read ( ghcb , ctxt ) ;
break ;
case SVM_EXIT_WRITE_DR7 :
result = vc_handle_dr7_write ( ghcb , ctxt ) ;
break ;
2020-09-07 15:16:01 +02:00
case SVM_EXIT_EXCP_BASE + X86_TRAP_AC :
result = vc_handle_trap_ac ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:55 +02:00
case SVM_EXIT_RDTSC :
case SVM_EXIT_RDTSCP :
result = vc_handle_rdtsc ( ghcb , ctxt , exit_code ) ;
break ;
2020-09-07 15:15:56 +02:00
case SVM_EXIT_RDPMC :
result = vc_handle_rdpmc ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:57 +02:00
case SVM_EXIT_INVD :
pr_err_ratelimited ( " #VC exception for INVD??? Seriously??? \n " ) ;
result = ES_UNSUPPORTED ;
break ;
2020-09-07 15:15:48 +02:00
case SVM_EXIT_CPUID :
result = vc_handle_cpuid ( ghcb , ctxt ) ;
break ;
case SVM_EXIT_IOIO :
result = vc_handle_ioio ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:52 +02:00
case SVM_EXIT_MSR :
result = vc_handle_msr ( ghcb , ctxt ) ;
break ;
2020-09-07 15:16:00 +02:00
case SVM_EXIT_VMMCALL :
result = vc_handle_vmmcall ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:54 +02:00
case SVM_EXIT_WBINVD :
result = vc_handle_wbinvd ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:58 +02:00
case SVM_EXIT_MONITOR :
result = vc_handle_monitor ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:59 +02:00
case SVM_EXIT_MWAIT :
result = vc_handle_mwait ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:50 +02:00
case SVM_EXIT_NPF :
result = vc_handle_mmio ( ghcb , ctxt ) ;
break ;
2020-09-08 14:38:16 +02:00
default :
/*
* Unexpected # VC exception
*/
result = ES_UNSUPPORTED ;
}
return result ;
}
2020-09-07 15:15:47 +02:00
static __always_inline void vc_forward_exception ( struct es_em_ctxt * ctxt )
{
long error_code = ctxt - > fi . error_code ;
int trapnr = ctxt - > fi . vector ;
ctxt - > regs - > orig_ax = ctxt - > fi . error_code ;
switch ( trapnr ) {
case X86_TRAP_GP :
exc_general_protection ( ctxt - > regs , error_code ) ;
break ;
case X86_TRAP_UD :
exc_invalid_op ( ctxt - > regs ) ;
break ;
2021-05-19 15:52:45 +02:00
case X86_TRAP_PF :
write_cr2 ( ctxt - > fi . cr2 ) ;
exc_page_fault ( ctxt - > regs , error_code ) ;
break ;
2020-09-07 15:16:01 +02:00
case X86_TRAP_AC :
exc_alignment_check ( ctxt - > regs , error_code ) ;
break ;
2020-09-07 15:15:47 +02:00
default :
pr_emerg ( " Unsupported exception in #VC instruction emulation - can't continue \n " ) ;
BUG ( ) ;
}
}
static __always_inline bool on_vc_fallback_stack ( struct pt_regs * regs )
{
unsigned long sp = ( unsigned long ) regs ;
return ( sp > = __this_cpu_ist_bottom_va ( VC2 ) & & sp < __this_cpu_ist_top_va ( VC2 ) ) ;
}
2021-06-18 13:54:09 +02:00
static bool vc_raw_handle_exception ( struct pt_regs * regs , unsigned long error_code )
2020-09-07 15:15:47 +02:00
{
struct ghcb_state state ;
struct es_em_ctxt ctxt ;
enum es_result result ;
struct ghcb * ghcb ;
2021-06-18 13:54:09 +02:00
bool ret = true ;
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:15:47 +02:00
vc_ghcb_invalidate ( ghcb ) ;
result = vc_init_em_ctxt ( & ctxt , regs , error_code ) ;
if ( result = = ES_OK )
result = vc_handle_exitcode ( & ctxt , ghcb , error_code ) ;
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:15:47 +02:00
/* Done - now check the result */
switch ( result ) {
case ES_OK :
vc_finish_insn ( & ctxt ) ;
break ;
case ES_UNSUPPORTED :
2021-05-19 15:52:47 +02:00
pr_err_ratelimited ( " Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx) \n " ,
2020-09-07 15:15:47 +02:00
error_code , regs - > ip ) ;
2021-06-18 13:54:09 +02:00
ret = false ;
break ;
2020-09-07 15:15:47 +02:00
case ES_VMM_ERROR :
pr_err_ratelimited ( " Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx) \n " ,
error_code , regs - > ip ) ;
2021-06-18 13:54:09 +02:00
ret = false ;
break ;
2020-09-07 15:15:47 +02:00
case ES_DECODE_FAILED :
pr_err_ratelimited ( " Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx) \n " ,
error_code , regs - > ip ) ;
2021-06-18 13:54:09 +02:00
ret = false ;
break ;
2020-09-07 15:15:47 +02:00
case ES_EXCEPTION :
vc_forward_exception ( & ctxt ) ;
break ;
case ES_RETRY :
/* Nothing to do */
break ;
default :
pr_emerg ( " Unknown result in %s():%d \n " , __func__ , result ) ;
/*
* Emulating the instruction which caused the # VC exception
* failed - can ' t continue so print debug information
*/
BUG ( ) ;
}
2021-06-18 13:54:09 +02:00
return ret ;
}
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:09 +02:00
static __always_inline bool vc_is_db ( unsigned long error_code )
{
return error_code = = SVM_EXIT_EXCP_BASE + X86_TRAP_DB ;
}
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:09 +02:00
/*
* Runtime # VC exception handler when raised from kernel mode . Runs in NMI mode
* and will panic when an error happens .
*/
DEFINE_IDTENTRY_VC_KERNEL ( exc_vmm_communication )
{
irqentry_state_t irq_state ;
/*
* With the current implementation it is always possible to switch to a
* safe stack because # VC exceptions only happen at known places , like
* intercepted instructions or accesses to MMIO areas / IO ports . They can
* also happen with code instrumentation when the hypervisor intercepts
* # DB , but the critical paths are forbidden to be instrumented , so # DB
* exceptions currently also only happen in safe places .
*
* But keep this here in case the noinstr annotations are violated due
* to bug elsewhere .
*/
if ( unlikely ( on_vc_fallback_stack ( regs ) ) ) {
instrumentation_begin ( ) ;
panic ( " Can't handle #VC exception from unsupported context \n " ) ;
instrumentation_end ( ) ;
}
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:09 +02:00
/*
* Handle # DB before calling into ! noinstr code to avoid recursive # DB .
*/
if ( vc_is_db ( error_code ) ) {
exc_debug ( regs ) ;
return ;
}
irq_state = irqentry_nmi_enter ( regs ) ;
instrumentation_begin ( ) ;
if ( ! vc_raw_handle_exception ( regs , error_code ) ) {
2020-09-07 15:15:47 +02:00
/* Show some debug info */
show_regs ( regs ) ;
/* Ask hypervisor to sev_es_terminate */
sev_es_terminate ( GHCB_SEV_ES_REASON_GENERAL_REQUEST ) ;
/* If that fails and we get here - just panic */
panic ( " Returned from Terminate-Request to Hypervisor \n " ) ;
}
2021-06-18 13:54:09 +02:00
instrumentation_end ( ) ;
irqentry_nmi_exit ( regs , irq_state ) ;
2020-09-07 15:15:47 +02:00
}
2021-06-18 13:54:09 +02:00
/*
* Runtime # VC exception handler when raised from user mode . Runs in IRQ mode
* and will kill the current task with SIGBUS when an error happens .
*/
DEFINE_IDTENTRY_VC_USER ( exc_vmm_communication )
2020-09-07 15:15:47 +02:00
{
2021-06-18 13:54:09 +02:00
/*
* Handle # DB before calling into ! noinstr code to avoid recursive # DB .
*/
if ( vc_is_db ( error_code ) ) {
noist_exc_debug ( regs ) ;
return ;
}
irqentry_enter_from_user_mode ( regs ) ;
2020-09-07 15:15:47 +02:00
instrumentation_begin ( ) ;
2021-06-18 13:54:09 +02:00
if ( ! vc_raw_handle_exception ( regs , error_code ) ) {
/*
* Do not kill the machine if user - space triggered the
* exception . Send SIGBUS instead and let user - space deal with
* it .
*/
force_sig_fault ( SIGBUS , BUS_OBJERR , ( void __user * ) 0 ) ;
}
instrumentation_end ( ) ;
irqentry_exit_to_user_mode ( regs ) ;
2020-09-07 15:15:47 +02:00
}
2020-09-08 14:38:16 +02:00
bool __init handle_vc_boot_ghcb ( struct pt_regs * regs )
{
unsigned long exit_code = regs - > orig_ax ;
struct es_em_ctxt ctxt ;
enum es_result result ;
/* Do initial setup or terminate the guest */
if ( unlikely ( boot_ghcb = = NULL & & ! sev_es_setup_ghcb ( ) ) )
sev_es_terminate ( GHCB_SEV_ES_REASON_GENERAL_REQUEST ) ;
vc_ghcb_invalidate ( boot_ghcb ) ;
result = vc_init_em_ctxt ( & ctxt , regs , exit_code ) ;
if ( result = = ES_OK )
result = vc_handle_exitcode ( & ctxt , boot_ghcb , exit_code ) ;
/* Done - now check the result */
switch ( result ) {
case ES_OK :
vc_finish_insn ( & ctxt ) ;
break ;
case ES_UNSUPPORTED :
early_printk ( " PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx) \n " ,
exit_code , regs - > ip ) ;
goto fail ;
case ES_VMM_ERROR :
early_printk ( " PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx) \n " ,
exit_code , regs - > ip ) ;
goto fail ;
case ES_DECODE_FAILED :
early_printk ( " PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx) \n " ,
exit_code , regs - > ip ) ;
goto fail ;
case ES_EXCEPTION :
vc_early_forward_exception ( & ctxt ) ;
break ;
case ES_RETRY :
/* Nothing to do */
break ;
default :
BUG ( ) ;
}
return true ;
fail :
show_regs ( regs ) ;
while ( true )
halt ( ) ;
}