2020-09-07 15:15:39 +02:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* AMD Memory Encryption Support
*
* Copyright ( C ) 2019 SUSE
*
* Author : Joerg Roedel < jroedel @ suse . de >
*/
2021-06-22 16:48:25 +02:00
# define pr_fmt(fmt) "SEV: " fmt
2020-09-07 15:15:47 +02:00
2020-09-08 14:38:16 +02:00
# include <linux/sched/debug.h> /* For show_regs() */
2020-09-07 15:15:42 +02:00
# include <linux/percpu-defs.h>
2021-09-08 17:58:38 -05:00
# include <linux/cc_platform.h>
2020-09-08 14:38:16 +02:00
# include <linux/printk.h>
2020-09-07 15:15:42 +02:00
# include <linux/mm_types.h>
# include <linux/set_memory.h>
# include <linux/memblock.h>
# include <linux/kernel.h>
2020-09-07 15:15:39 +02:00
# include <linux/mm.h>
2022-03-07 15:33:32 -06:00
# include <linux/cpumask.h>
2022-02-24 10:56:21 -06:00
# include <linux/efi.h>
# include <linux/platform_device.h>
# include <linux/io.h>
2020-09-07 15:15:39 +02:00
2020-09-07 15:15:43 +02:00
# include <asm/cpu_entry_area.h>
2020-09-07 15:16:07 +02:00
# include <asm/stacktrace.h>
2021-04-27 06:16:34 -05:00
# include <asm/sev.h>
2020-09-07 15:15:39 +02:00
# include <asm/insn-eval.h>
2021-10-15 03:16:36 +02:00
# include <asm/fpu/xcr.h>
2020-09-07 15:15:39 +02:00
# include <asm/processor.h>
2020-09-07 15:15:47 +02:00
# include <asm/realmode.h>
2021-11-10 16:06:53 -06:00
# include <asm/setup.h>
2020-09-07 15:15:47 +02:00
# include <asm/traps.h>
2020-09-07 15:15:39 +02:00
# include <asm/svm.h>
2020-09-07 15:16:10 +02:00
# include <asm/smp.h>
# include <asm/cpu.h>
2022-03-07 15:33:32 -06:00
# include <asm/apic.h>
2022-02-24 10:56:11 -06:00
# include <asm/cpuid.h>
2022-03-07 15:33:49 -06:00
# include <asm/cmdline.h>
2020-09-07 15:15:39 +02:00
2020-09-07 15:15:53 +02:00
# define DR7_RESET_VALUE 0x400
2022-03-07 15:33:32 -06:00
/* AP INIT values as documented in the APM2 section "Processor Initialization State" */
# define AP_INIT_CS_LIMIT 0xffff
# define AP_INIT_DS_LIMIT 0xffff
# define AP_INIT_LDTR_LIMIT 0xffff
# define AP_INIT_GDTR_LIMIT 0xffff
# define AP_INIT_IDTR_LIMIT 0xffff
# define AP_INIT_TR_LIMIT 0xffff
# define AP_INIT_RFLAGS_DEFAULT 0x2
# define AP_INIT_DR6_DEFAULT 0xffff0ff0
# define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
# define AP_INIT_XCR0_DEFAULT 0x1
# define AP_INIT_X87_FTW_DEFAULT 0x5555
# define AP_INIT_X87_FCW_DEFAULT 0x0040
# define AP_INIT_CR0_DEFAULT 0x60000010
# define AP_INIT_MXCSR_DEFAULT 0x1f80
2020-09-08 14:38:16 +02:00
/* For early boot hypervisor communication in SEV-ES enabled guests */
static struct ghcb boot_ghcb_page __bss_decrypted __aligned ( PAGE_SIZE ) ;
/*
* Needs to be in the . data section because we need it NULL before bss is
* cleared
*/
2022-02-09 12:10:11 -06:00
static struct ghcb * boot_ghcb __section ( " .data " ) ;
2020-09-08 14:38:16 +02:00
2022-02-09 12:10:06 -06:00
/* Bitmap of SEV features supported by the hypervisor */
static u64 sev_hv_features __ro_after_init ;
2020-09-07 15:15:42 +02:00
/* #VC handler runtime per-CPU data */
struct sev_es_runtime_data {
struct ghcb ghcb_page ;
2020-09-07 15:15:43 +02:00
2020-09-07 15:15:47 +02:00
/*
* Reserve one page per CPU as backup storage for the unencrypted GHCB .
* It is needed when an NMI happens while the # VC handler uses the real
* GHCB , and the NMI handler itself is causing another # VC exception . In
* that case the GHCB content of the first handler needs to be backed up
* and restored .
*/
struct ghcb backup_ghcb ;
/*
* Mark the per - cpu GHCBs as in - use to detect nested # VC exceptions .
* There is no need for it to be atomic , because nothing is written to
* the GHCB between the read and the write of ghcb_active . So it is safe
* to use it when a nested # VC exception happens before the write .
*
* This is necessary for example in the # VC - > NMI - > # VC case when the NMI
* happens while the first # VC handler uses the GHCB . When the NMI code
* raises a second # VC handler it might overwrite the contents of the
* GHCB written by the first handler . To avoid this the content of the
* GHCB is saved and restored when the GHCB is detected to be in use
* already .
*/
bool ghcb_active ;
bool backup_ghcb_active ;
2020-09-07 15:15:53 +02:00
/*
* Cached DR7 value - write it on DR7 writes and return it on reads .
* That value will never make it to the real hardware DR7 as debugging
* is currently unsupported in SEV - ES guests .
*/
unsigned long dr7 ;
2020-09-07 15:15:47 +02:00
} ;
struct ghcb_state {
struct ghcb * ghcb ;
2020-09-07 15:15:42 +02:00
} ;
static DEFINE_PER_CPU ( struct sev_es_runtime_data * , runtime_data ) ;
2020-09-07 15:15:44 +02:00
DEFINE_STATIC_KEY_FALSE ( sev_es_enable_key ) ;
2020-09-07 15:15:42 +02:00
2022-03-07 15:33:32 -06:00
static DEFINE_PER_CPU ( struct sev_es_save_area * , sev_vmsa ) ;
2022-03-07 15:33:50 -06:00
struct sev_config {
__u64 debug : 1 ,
__reserved : 63 ;
} ;
static struct sev_config sev_cfg __read_mostly ;
2021-03-03 15:17:13 +01:00
static __always_inline bool on_vc_stack ( struct pt_regs * regs )
2020-09-07 15:15:44 +02:00
{
2021-03-03 15:17:13 +01:00
unsigned long sp = regs - > sp ;
/* User-mode RSP is not trusted */
if ( user_mode ( regs ) )
return false ;
/* SYSCALL gap still has user-mode RSP */
if ( ip_within_syscall_gap ( regs ) )
return false ;
2020-09-07 15:15:44 +02:00
return ( ( sp > = __this_cpu_ist_bottom_va ( VC ) ) & & ( sp < __this_cpu_ist_top_va ( VC ) ) ) ;
}
/*
2021-03-03 15:17:14 +01:00
* This function handles the case when an NMI is raised in the # VC
* exception handler entry code , before the # VC handler has switched off
* its IST stack . In this case , the IST entry for # VC must be adjusted ,
* so that any nested # VC exception will not overwrite the stack
* contents of the interrupted # VC handler .
2020-09-07 15:15:44 +02:00
*
* The IST entry is adjusted unconditionally so that it can be also be
2021-03-03 15:17:14 +01:00
* unconditionally adjusted back in __sev_es_ist_exit ( ) . Otherwise a
* nested sev_es_ist_exit ( ) call may adjust back the IST entry too
* early .
*
* The __sev_es_ist_enter ( ) and __sev_es_ist_exit ( ) functions always run
* on the NMI IST stack , as they are only called from NMI handling code
* right now .
2020-09-07 15:15:44 +02:00
*/
void noinstr __sev_es_ist_enter ( struct pt_regs * regs )
{
unsigned long old_ist , new_ist ;
/* Read old IST entry */
2021-03-03 15:17:14 +01:00
new_ist = old_ist = __this_cpu_read ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] ) ;
2020-09-07 15:15:44 +02:00
2021-03-03 15:17:14 +01:00
/*
* If NMI happened while on the # VC IST stack , set the new IST
* value below regs - > sp , so that the interrupted stack frame is
* not overwritten by subsequent # VC exceptions .
*/
2021-03-03 15:17:13 +01:00
if ( on_vc_stack ( regs ) )
2021-03-03 15:17:14 +01:00
new_ist = regs - > sp ;
2020-09-07 15:15:44 +02:00
2021-03-03 15:17:14 +01:00
/*
* Reserve additional 8 bytes and store old IST value so this
* adjustment can be unrolled in __sev_es_ist_exit ( ) .
*/
new_ist - = sizeof ( old_ist ) ;
2020-09-07 15:15:44 +02:00
* ( unsigned long * ) new_ist = old_ist ;
/* Set new IST entry */
this_cpu_write ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] , new_ist ) ;
}
void noinstr __sev_es_ist_exit ( void )
{
unsigned long ist ;
/* Read IST entry */
ist = __this_cpu_read ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] ) ;
if ( WARN_ON ( ist = = __this_cpu_ist_top_va ( VC ) ) )
return ;
/* Read back old IST entry and write it to the TSS */
this_cpu_write ( cpu_tss_rw . x86_tss . ist [ IST_INDEX_VC ] , * ( unsigned long * ) ist ) ;
}
2021-06-18 13:54:08 +02:00
/*
* Nothing shall interrupt this code path while holding the per - CPU
* GHCB . The backup GHCB is only for NMIs interrupting this path .
*
* Callers must disable local interrupts around it .
*/
static noinstr struct ghcb * __sev_get_ghcb ( struct ghcb_state * state )
2020-09-07 15:15:47 +02:00
{
struct sev_es_runtime_data * data ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
WARN_ON ( ! irqs_disabled ( ) ) ;
2020-09-07 15:15:47 +02:00
data = this_cpu_read ( runtime_data ) ;
ghcb = & data - > ghcb_page ;
if ( unlikely ( data - > ghcb_active ) ) {
/* GHCB is already in use - save its contents */
2021-05-19 15:52:44 +02:00
if ( unlikely ( data - > backup_ghcb_active ) ) {
/*
* Backup - GHCB is also already in use . There is no way
* to continue here so just kill the machine . To make
* panic ( ) work , mark GHCBs inactive so that messages
* can be printed out .
*/
data - > ghcb_active = false ;
data - > backup_ghcb_active = false ;
2021-06-18 13:54:08 +02:00
instrumentation_begin ( ) ;
2021-05-19 15:52:44 +02:00
panic ( " Unable to handle #VC exception! GHCB and Backup GHCB are already in use " ) ;
2021-06-18 13:54:08 +02:00
instrumentation_end ( ) ;
2021-05-19 15:52:44 +02:00
}
2020-09-07 15:15:47 +02:00
/* Mark backup_ghcb active before writing to it */
data - > backup_ghcb_active = true ;
state - > ghcb = & data - > backup_ghcb ;
/* Backup GHCB content */
* state - > ghcb = * ghcb ;
} else {
state - > ghcb = NULL ;
data - > ghcb_active = true ;
}
return ghcb ;
}
2020-09-07 15:15:39 +02:00
static inline u64 sev_es_rd_ghcb_msr ( void )
{
return __rdmsr ( MSR_AMD64_SEV_ES_GHCB ) ;
}
2021-01-06 15:36:21 +01:00
static __always_inline void sev_es_wr_ghcb_msr ( u64 val )
2020-09-07 15:15:39 +02:00
{
u32 low , high ;
low = ( u32 ) ( val ) ;
high = ( u32 ) ( val > > 32 ) ;
native_wrmsr ( MSR_AMD64_SEV_ES_GHCB , low , high ) ;
}
static int vc_fetch_insn_kernel ( struct es_em_ctxt * ctxt ,
unsigned char * buffer )
{
return copy_from_kernel_nofault ( buffer , ( unsigned char * ) ctxt - > regs - > ip , MAX_INSN_SIZE ) ;
}
2021-02-23 11:28:02 +01:00
static enum es_result __vc_decode_user_insn ( struct es_em_ctxt * ctxt )
2020-09-07 15:15:39 +02:00
{
char buffer [ MAX_INSN_SIZE ] ;
2021-06-14 15:53:26 +02:00
int insn_bytes ;
2020-09-07 15:15:39 +02:00
2021-06-14 15:53:26 +02:00
insn_bytes = insn_fetch_from_user_inatomic ( ctxt - > regs , buffer ) ;
2021-06-14 15:53:27 +02:00
if ( insn_bytes = = 0 ) {
/* Nothing could be copied */
2021-02-23 11:28:02 +01:00
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = X86_PF_INSTR | X86_PF_USER ;
ctxt - > fi . cr2 = ctxt - > regs - > ip ;
return ES_EXCEPTION ;
2021-06-14 15:53:27 +02:00
} else if ( insn_bytes = = - EINVAL ) {
/* Effective RIP could not be calculated */
ctxt - > fi . vector = X86_TRAP_GP ;
ctxt - > fi . error_code = 0 ;
ctxt - > fi . cr2 = 0 ;
return ES_EXCEPTION ;
2020-09-07 15:15:39 +02:00
}
2021-06-14 15:53:26 +02:00
if ( ! insn_decode_from_regs ( & ctxt - > insn , ctxt - > regs , buffer , insn_bytes ) )
2021-02-23 11:28:02 +01:00
return ES_DECODE_FAILED ;
2020-11-16 18:21:23 +01:00
if ( ctxt - > insn . immediate . got )
return ES_OK ;
else
return ES_DECODE_FAILED ;
2020-09-07 15:15:39 +02:00
}
2021-02-23 11:28:02 +01:00
static enum es_result __vc_decode_kern_insn ( struct es_em_ctxt * ctxt )
{
char buffer [ MAX_INSN_SIZE ] ;
2020-11-16 18:21:23 +01:00
int res , ret ;
2021-02-23 11:28:02 +01:00
res = vc_fetch_insn_kernel ( ctxt , buffer ) ;
if ( res ) {
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = X86_PF_INSTR ;
ctxt - > fi . cr2 = ctxt - > regs - > ip ;
return ES_EXCEPTION ;
2020-09-07 15:15:39 +02:00
}
2020-11-16 18:21:23 +01:00
ret = insn_decode ( & ctxt - > insn , buffer , MAX_INSN_SIZE , INSN_MODE_64 ) ;
if ( ret < 0 )
return ES_DECODE_FAILED ;
else
return ES_OK ;
2021-02-23 11:28:02 +01:00
}
2020-09-07 15:15:39 +02:00
2021-02-23 11:28:02 +01:00
static enum es_result vc_decode_insn ( struct es_em_ctxt * ctxt )
{
if ( user_mode ( ctxt - > regs ) )
return __vc_decode_user_insn ( ctxt ) ;
else
return __vc_decode_kern_insn ( ctxt ) ;
2020-09-07 15:15:39 +02:00
}
static enum es_result vc_write_mem ( struct es_em_ctxt * ctxt ,
char * dst , char * buf , size_t size )
{
unsigned long error_code = X86_PF_PROT | X86_PF_WRITE ;
2021-05-19 15:52:46 +02:00
/*
* This function uses __put_user ( ) independent of whether kernel or user
* memory is accessed . This works fine because __put_user ( ) does no
* sanity checks of the pointer being accessed . All that it does is
* to report when the access failed .
*
* Also , this function runs in atomic context , so __put_user ( ) is not
* allowed to sleep . The page - fault handler detects that it is running
* in atomic context and will not try to take mmap_sem and handle the
* fault , so additional pagefault_enable ( ) / disable ( ) calls are not
* needed .
*
* The access can ' t be done via copy_to_user ( ) here because
* vc_write_mem ( ) must not use string instructions to access unsafe
* memory . The reason is that MOVS is emulated by the # VC handler by
* splitting the move up into a read and a write and taking a nested # VC
* exception on whatever of them is the MMIO access . Using string
* instructions here would cause infinite nesting .
*/
2020-09-07 15:15:39 +02:00
switch ( size ) {
2021-11-19 15:27:57 -08:00
case 1 : {
u8 d1 ;
u8 __user * target = ( u8 __user * ) dst ;
2020-09-07 15:15:39 +02:00
memcpy ( & d1 , buf , 1 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d1 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
2021-11-19 15:27:57 -08:00
}
case 2 : {
u16 d2 ;
u16 __user * target = ( u16 __user * ) dst ;
2020-09-07 15:15:39 +02:00
memcpy ( & d2 , buf , 2 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d2 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
2021-11-19 15:27:57 -08:00
}
case 4 : {
u32 d4 ;
u32 __user * target = ( u32 __user * ) dst ;
2020-09-07 15:15:39 +02:00
memcpy ( & d4 , buf , 4 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d4 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
2021-11-19 15:27:57 -08:00
}
case 8 : {
u64 d8 ;
u64 __user * target = ( u64 __user * ) dst ;
2020-09-07 15:15:39 +02:00
memcpy ( & d8 , buf , 8 ) ;
2021-05-19 15:52:46 +02:00
if ( __put_user ( d8 , target ) )
2020-09-07 15:15:39 +02:00
goto fault ;
break ;
2021-11-19 15:27:57 -08:00
}
2020-09-07 15:15:39 +02:00
default :
WARN_ONCE ( 1 , " %s: Invalid size: %zu \n " , __func__ , size ) ;
return ES_UNSUPPORTED ;
}
return ES_OK ;
fault :
if ( user_mode ( ctxt - > regs ) )
error_code | = X86_PF_USER ;
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = error_code ;
ctxt - > fi . cr2 = ( unsigned long ) dst ;
return ES_EXCEPTION ;
}
static enum es_result vc_read_mem ( struct es_em_ctxt * ctxt ,
char * src , char * buf , size_t size )
{
unsigned long error_code = X86_PF_PROT ;
2021-05-19 15:52:46 +02:00
/*
* This function uses __get_user ( ) independent of whether kernel or user
* memory is accessed . This works fine because __get_user ( ) does no
* sanity checks of the pointer being accessed . All that it does is
* to report when the access failed .
*
* Also , this function runs in atomic context , so __get_user ( ) is not
* allowed to sleep . The page - fault handler detects that it is running
* in atomic context and will not try to take mmap_sem and handle the
* fault , so additional pagefault_enable ( ) / disable ( ) calls are not
* needed .
*
* The access can ' t be done via copy_from_user ( ) here because
* vc_read_mem ( ) must not use string instructions to access unsafe
* memory . The reason is that MOVS is emulated by the # VC handler by
* splitting the move up into a read and a write and taking a nested # VC
* exception on whatever of them is the MMIO access . Using string
* instructions here would cause infinite nesting .
*/
2020-09-07 15:15:39 +02:00
switch ( size ) {
2021-11-19 15:27:57 -08:00
case 1 : {
u8 d1 ;
u8 __user * s = ( u8 __user * ) src ;
2021-05-19 15:52:46 +02:00
if ( __get_user ( d1 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d1 , 1 ) ;
break ;
2021-11-19 15:27:57 -08:00
}
case 2 : {
u16 d2 ;
u16 __user * s = ( u16 __user * ) src ;
2021-05-19 15:52:46 +02:00
if ( __get_user ( d2 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d2 , 2 ) ;
break ;
2021-11-19 15:27:57 -08:00
}
case 4 : {
u32 d4 ;
u32 __user * s = ( u32 __user * ) src ;
2021-05-19 15:52:46 +02:00
if ( __get_user ( d4 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d4 , 4 ) ;
break ;
2021-11-19 15:27:57 -08:00
}
case 8 : {
u64 d8 ;
u64 __user * s = ( u64 __user * ) src ;
2021-05-19 15:52:46 +02:00
if ( __get_user ( d8 , s ) )
2020-09-07 15:15:39 +02:00
goto fault ;
memcpy ( buf , & d8 , 8 ) ;
break ;
2021-11-19 15:27:57 -08:00
}
2020-09-07 15:15:39 +02:00
default :
WARN_ONCE ( 1 , " %s: Invalid size: %zu \n " , __func__ , size ) ;
return ES_UNSUPPORTED ;
}
return ES_OK ;
fault :
if ( user_mode ( ctxt - > regs ) )
error_code | = X86_PF_USER ;
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . error_code = error_code ;
ctxt - > fi . cr2 = ( unsigned long ) src ;
return ES_EXCEPTION ;
}
2020-10-28 17:46:59 +01:00
static enum es_result vc_slow_virt_to_phys ( struct ghcb * ghcb , struct es_em_ctxt * ctxt ,
unsigned long vaddr , phys_addr_t * paddr )
2020-09-07 15:15:50 +02:00
{
unsigned long va = ( unsigned long ) vaddr ;
unsigned int level ;
phys_addr_t pa ;
pgd_t * pgd ;
pte_t * pte ;
pgd = __va ( read_cr3_pa ( ) ) ;
pgd = & pgd [ pgd_index ( va ) ] ;
pte = lookup_address_in_pgd ( pgd , va , & level ) ;
if ( ! pte ) {
ctxt - > fi . vector = X86_TRAP_PF ;
ctxt - > fi . cr2 = vaddr ;
ctxt - > fi . error_code = 0 ;
if ( user_mode ( ctxt - > regs ) )
ctxt - > fi . error_code | = X86_PF_USER ;
2020-10-28 17:46:59 +01:00
return ES_EXCEPTION ;
2020-09-07 15:15:50 +02:00
}
2020-10-28 17:46:59 +01:00
if ( WARN_ON_ONCE ( pte_val ( * pte ) & _PAGE_ENC ) )
/* Emulated MMIO to/from encrypted memory not supported */
return ES_UNSUPPORTED ;
2020-09-07 15:15:50 +02:00
pa = ( phys_addr_t ) pte_pfn ( * pte ) < < PAGE_SHIFT ;
pa | = va & ~ page_level_mask ( level ) ;
* paddr = pa ;
2020-10-28 17:46:59 +01:00
return ES_OK ;
2020-09-07 15:15:50 +02:00
}
2020-09-07 15:15:39 +02:00
/* Include code shared with pre-decompression boot stage */
2021-04-27 06:16:34 -05:00
# include "sev-shared.c"
2020-09-08 14:38:16 +02:00
2021-06-18 13:54:08 +02:00
static noinstr void __sev_put_ghcb ( struct ghcb_state * state )
2021-05-17 12:42:32 -05:00
{
struct sev_es_runtime_data * data ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
WARN_ON ( ! irqs_disabled ( ) ) ;
2021-05-17 12:42:32 -05:00
data = this_cpu_read ( runtime_data ) ;
ghcb = & data - > ghcb_page ;
if ( state - > ghcb ) {
/* Restore GHCB from Backup */
* ghcb = * state - > ghcb ;
data - > backup_ghcb_active = false ;
state - > ghcb = NULL ;
} else {
2021-05-17 12:42:33 -05:00
/*
* Invalidate the GHCB so a VMGEXIT instruction issued
* from userspace won ' t appear to be valid .
*/
vc_ghcb_invalidate ( ghcb ) ;
2021-05-17 12:42:32 -05:00
data - > ghcb_active = false ;
}
}
2020-09-07 15:16:11 +02:00
void noinstr __sev_es_nmi_complete ( void )
{
struct ghcb_state state ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:16:11 +02:00
vc_ghcb_invalidate ( ghcb ) ;
ghcb_set_sw_exit_code ( ghcb , SVM_VMGEXIT_NMI_COMPLETE ) ;
ghcb_set_sw_exit_info_1 ( ghcb , 0 ) ;
ghcb_set_sw_exit_info_2 ( ghcb , 0 ) ;
sev_es_wr_ghcb_msr ( __pa_nodebug ( ghcb ) ) ;
VMGEXIT ( ) ;
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:16:11 +02:00
}
2022-04-22 08:56:24 -05:00
static u64 __init get_secrets_page ( void )
{
u64 pa_data = boot_params . cc_blob_address ;
struct cc_blob_sev_info info ;
void * map ;
/*
* The CC blob contains the address of the secrets page , check if the
* blob is present .
*/
if ( ! pa_data )
return 0 ;
map = early_memremap ( pa_data , sizeof ( info ) ) ;
if ( ! map ) {
pr_err ( " Unable to locate SNP secrets page: failed to map the Confidential Computing blob. \n " ) ;
return 0 ;
}
memcpy ( & info , map , sizeof ( info ) ) ;
early_memunmap ( map , sizeof ( info ) ) ;
/* smoke-test the secrets page passed */
if ( ! info . secrets_phys | | info . secrets_len ! = PAGE_SIZE )
return 0 ;
return info . secrets_phys ;
}
static u64 __init get_snp_jump_table_addr ( void )
{
struct snp_secrets_page_layout * layout ;
2022-05-02 17:33:40 +02:00
void __iomem * mem ;
2022-04-22 08:56:24 -05:00
u64 pa , addr ;
pa = get_secrets_page ( ) ;
if ( ! pa )
return 0 ;
2022-05-02 17:33:40 +02:00
mem = ioremap_encrypted ( pa , PAGE_SIZE ) ;
if ( ! mem ) {
2022-04-22 08:56:24 -05:00
pr_err ( " Unable to locate AP jump table address: failed to map the SNP secrets page. \n " ) ;
return 0 ;
}
2022-05-02 17:33:40 +02:00
layout = ( __force struct snp_secrets_page_layout * ) mem ;
2022-04-22 08:56:24 -05:00
addr = layout - > os_area . ap_jump_table_pa ;
2022-05-02 17:33:40 +02:00
iounmap ( mem ) ;
2022-04-22 08:56:24 -05:00
return addr ;
}
2022-04-22 08:56:23 -05:00
static u64 __init get_jump_table_addr ( void )
2020-09-07 15:16:07 +02:00
{
struct ghcb_state state ;
unsigned long flags ;
struct ghcb * ghcb ;
u64 ret = 0 ;
2022-04-22 08:56:24 -05:00
if ( cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
return get_snp_jump_table_addr ( ) ;
2020-09-07 15:16:07 +02:00
local_irq_save ( flags ) ;
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:16:07 +02:00
vc_ghcb_invalidate ( ghcb ) ;
ghcb_set_sw_exit_code ( ghcb , SVM_VMGEXIT_AP_JUMP_TABLE ) ;
ghcb_set_sw_exit_info_1 ( ghcb , SVM_VMGEXIT_GET_AP_JUMP_TABLE ) ;
ghcb_set_sw_exit_info_2 ( ghcb , 0 ) ;
sev_es_wr_ghcb_msr ( __pa ( ghcb ) ) ;
VMGEXIT ( ) ;
if ( ghcb_sw_exit_info_1_is_valid ( ghcb ) & &
ghcb_sw_exit_info_2_is_valid ( ghcb ) )
ret = ghcb - > save . sw_exit_info_2 ;
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:16:07 +02:00
local_irq_restore ( flags ) ;
return ret ;
}
2022-02-09 12:10:12 -06:00
static void pvalidate_pages ( unsigned long vaddr , unsigned int npages , bool validate )
{
unsigned long vaddr_end ;
int rc ;
vaddr = vaddr & PAGE_MASK ;
vaddr_end = vaddr + ( npages < < PAGE_SHIFT ) ;
while ( vaddr < vaddr_end ) {
rc = pvalidate ( vaddr , RMP_PG_SIZE_4K , validate ) ;
if ( WARN ( rc , " Failed to validate address 0x%lx ret %d " , vaddr , rc ) )
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_PVALIDATE ) ;
vaddr = vaddr + PAGE_SIZE ;
}
}
static void __init early_set_pages_state ( unsigned long paddr , unsigned int npages , enum psc_op op )
{
unsigned long paddr_end ;
u64 val ;
paddr = paddr & PAGE_MASK ;
paddr_end = paddr + ( npages < < PAGE_SHIFT ) ;
while ( paddr < paddr_end ) {
/*
* Use the MSR protocol because this function can be called before
* the GHCB is established .
*/
sev_es_wr_ghcb_msr ( GHCB_MSR_PSC_REQ_GFN ( paddr > > PAGE_SHIFT , op ) ) ;
VMGEXIT ( ) ;
val = sev_es_rd_ghcb_msr ( ) ;
if ( WARN ( GHCB_RESP_CODE ( val ) ! = GHCB_MSR_PSC_RESP ,
" Wrong PSC response code: 0x%x \n " ,
( unsigned int ) GHCB_RESP_CODE ( val ) ) )
goto e_term ;
if ( WARN ( GHCB_MSR_PSC_RESP_VAL ( val ) ,
" Failed to change page state to '%s' paddr 0x%lx error 0x%llx \n " ,
op = = SNP_PAGE_STATE_PRIVATE ? " private " : " shared " ,
paddr , GHCB_MSR_PSC_RESP_VAL ( val ) ) )
goto e_term ;
paddr = paddr + PAGE_SIZE ;
}
return ;
e_term :
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_PSC ) ;
}
void __init early_snp_set_memory_private ( unsigned long vaddr , unsigned long paddr ,
unsigned int npages )
{
2022-08-23 16:55:51 -05:00
/*
* This can be invoked in early boot while running identity mapped , so
* use an open coded check for SNP instead of using cc_platform_has ( ) .
* This eliminates worries about jump tables or checking boot_cpu_data
* in the cc_platform_has ( ) function .
*/
if ( ! ( sev_status & MSR_AMD64_SEV_SNP_ENABLED ) )
2022-02-09 12:10:12 -06:00
return ;
/*
* Ask the hypervisor to mark the memory pages as private in the RMP
* table .
*/
early_set_pages_state ( paddr , npages , SNP_PAGE_STATE_PRIVATE ) ;
/* Validate the memory pages after they've been added in the RMP table. */
pvalidate_pages ( vaddr , npages , true ) ;
}
void __init early_snp_set_memory_shared ( unsigned long vaddr , unsigned long paddr ,
unsigned int npages )
{
2022-08-23 16:55:51 -05:00
/*
* This can be invoked in early boot while running identity mapped , so
* use an open coded check for SNP instead of using cc_platform_has ( ) .
* This eliminates worries about jump tables or checking boot_cpu_data
* in the cc_platform_has ( ) function .
*/
if ( ! ( sev_status & MSR_AMD64_SEV_SNP_ENABLED ) )
2022-02-09 12:10:12 -06:00
return ;
/* Invalidate the memory pages before they are marked shared in the RMP table. */
pvalidate_pages ( vaddr , npages , false ) ;
/* Ask hypervisor to mark the memory pages shared in the RMP table. */
early_set_pages_state ( paddr , npages , SNP_PAGE_STATE_SHARED ) ;
}
void __init snp_prep_memory ( unsigned long paddr , unsigned int sz , enum psc_op op )
{
unsigned long vaddr , npages ;
vaddr = ( unsigned long ) __va ( paddr ) ;
npages = PAGE_ALIGN ( sz ) > > PAGE_SHIFT ;
if ( op = = SNP_PAGE_STATE_PRIVATE )
early_snp_set_memory_private ( vaddr , paddr , npages ) ;
else if ( op = = SNP_PAGE_STATE_SHARED )
early_snp_set_memory_shared ( vaddr , paddr , npages ) ;
else
WARN ( 1 , " invalid memory op %d \n " , op ) ;
}
2022-02-24 10:56:01 -06:00
static int vmgexit_psc ( struct snp_psc_desc * desc )
{
int cur_entry , end_entry , ret = 0 ;
struct snp_psc_desc * data ;
struct ghcb_state state ;
struct es_em_ctxt ctxt ;
unsigned long flags ;
struct ghcb * ghcb ;
/*
* __sev_get_ghcb ( ) needs to run with IRQs disabled because it is using
* a per - CPU GHCB .
*/
local_irq_save ( flags ) ;
ghcb = __sev_get_ghcb ( & state ) ;
if ( ! ghcb ) {
ret = 1 ;
goto out_unlock ;
}
/* Copy the input desc into GHCB shared buffer */
data = ( struct snp_psc_desc * ) ghcb - > shared_buffer ;
memcpy ( ghcb - > shared_buffer , desc , min_t ( int , GHCB_SHARED_BUF_SIZE , sizeof ( * desc ) ) ) ;
/*
* As per the GHCB specification , the hypervisor can resume the guest
* before processing all the entries . Check whether all the entries
* are processed . If not , then keep retrying . Note , the hypervisor
* will update the data memory directly to indicate the status , so
* reference the data - > hdr everywhere .
*
* The strategy here is to wait for the hypervisor to change the page
* state in the RMP table before guest accesses the memory pages . If the
* page state change was not successful , then later memory access will
* result in a crash .
*/
cur_entry = data - > hdr . cur_entry ;
end_entry = data - > hdr . end_entry ;
while ( data - > hdr . cur_entry < = data - > hdr . end_entry ) {
ghcb_set_sw_scratch ( ghcb , ( u64 ) __pa ( data ) ) ;
/* This will advance the shared buffer data points to. */
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , & ctxt , SVM_VMGEXIT_PSC , 0 , 0 ) ;
2022-02-24 10:56:01 -06:00
/*
* Page State Change VMGEXIT can pass error code through
* exit_info_2 .
*/
if ( WARN ( ret | | ghcb - > save . sw_exit_info_2 ,
" SNP: PSC failed ret=%d exit_info_2=%llx \n " ,
ret , ghcb - > save . sw_exit_info_2 ) ) {
ret = 1 ;
goto out ;
}
/* Verify that reserved bit is not set */
if ( WARN ( data - > hdr . reserved , " Reserved bit is set in the PSC header \n " ) ) {
ret = 1 ;
goto out ;
}
/*
* Sanity check that entry processing is not going backwards .
* This will happen only if hypervisor is tricking us .
*/
if ( WARN ( data - > hdr . end_entry > end_entry | | cur_entry > data - > hdr . cur_entry ,
" SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d) \n " ,
end_entry , data - > hdr . end_entry , cur_entry , data - > hdr . cur_entry ) ) {
ret = 1 ;
goto out ;
}
}
out :
__sev_put_ghcb ( & state ) ;
out_unlock :
local_irq_restore ( flags ) ;
return ret ;
}
static void __set_pages_state ( struct snp_psc_desc * data , unsigned long vaddr ,
unsigned long vaddr_end , int op )
{
struct psc_hdr * hdr ;
struct psc_entry * e ;
unsigned long pfn ;
int i ;
hdr = & data - > hdr ;
e = data - > entries ;
memset ( data , 0 , sizeof ( * data ) ) ;
i = 0 ;
while ( vaddr < vaddr_end ) {
if ( is_vmalloc_addr ( ( void * ) vaddr ) )
pfn = vmalloc_to_pfn ( ( void * ) vaddr ) ;
else
pfn = __pa ( vaddr ) > > PAGE_SHIFT ;
e - > gfn = pfn ;
e - > operation = op ;
hdr - > end_entry = i ;
/*
* Current SNP implementation doesn ' t keep track of the RMP page
* size so use 4 K for simplicity .
*/
e - > pagesize = RMP_PG_SIZE_4K ;
vaddr = vaddr + PAGE_SIZE ;
e + + ;
i + + ;
}
if ( vmgexit_psc ( data ) )
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_PSC ) ;
}
static void set_pages_state ( unsigned long vaddr , unsigned int npages , int op )
{
unsigned long vaddr_end , next_vaddr ;
struct snp_psc_desc * desc ;
desc = kmalloc ( sizeof ( * desc ) , GFP_KERNEL_ACCOUNT ) ;
if ( ! desc )
panic ( " SNP: failed to allocate memory for PSC descriptor \n " ) ;
vaddr = vaddr & PAGE_MASK ;
vaddr_end = vaddr + ( npages < < PAGE_SHIFT ) ;
while ( vaddr < vaddr_end ) {
/* Calculate the last vaddr that fits in one struct snp_psc_desc. */
next_vaddr = min_t ( unsigned long , vaddr_end ,
( VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE ) + vaddr ) ;
__set_pages_state ( desc , vaddr , next_vaddr , op ) ;
vaddr = next_vaddr ;
}
kfree ( desc ) ;
}
void snp_set_memory_shared ( unsigned long vaddr , unsigned int npages )
{
if ( ! cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
return ;
pvalidate_pages ( vaddr , npages , false ) ;
set_pages_state ( vaddr , npages , SNP_PAGE_STATE_SHARED ) ;
}
void snp_set_memory_private ( unsigned long vaddr , unsigned int npages )
{
if ( ! cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
return ;
set_pages_state ( vaddr , npages , SNP_PAGE_STATE_PRIVATE ) ;
pvalidate_pages ( vaddr , npages , true ) ;
}
2022-03-07 15:33:32 -06:00
static int snp_set_vmsa ( void * va , bool vmsa )
{
u64 attrs ;
/*
* Running at VMPL0 allows the kernel to change the VMSA bit for a page
* using the RMPADJUST instruction . However , for the instruction to
* succeed it must target the permissions of a lesser privileged
* ( higher numbered ) VMPL level , so use VMPL1 ( refer to the RMPADJUST
* instruction in the AMD64 APM Volume 3 ) .
*/
attrs = 1 ;
if ( vmsa )
attrs | = RMPADJUST_VMSA_PAGE_BIT ;
return rmpadjust ( ( unsigned long ) va , RMP_PG_SIZE_4K , attrs ) ;
}
# define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
# define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
# define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
# define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
# define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
static void * snp_alloc_vmsa_page ( void )
{
struct page * p ;
/*
* Allocate VMSA page to work around the SNP erratum where the CPU will
* incorrectly signal an RMP violation # PF if a large page ( 2 MB or 1 GB )
* collides with the RMP entry of VMSA page . The recommended workaround
* is to not use a large page .
*
* Allocate an 8 k page which is also 8 k - aligned .
*/
p = alloc_pages ( GFP_KERNEL_ACCOUNT | __GFP_ZERO , 1 ) ;
if ( ! p )
return NULL ;
split_page ( p , 1 ) ;
/* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
__free_page ( p ) ;
return page_address ( p + 1 ) ;
}
static void snp_cleanup_vmsa ( struct sev_es_save_area * vmsa )
{
int err ;
err = snp_set_vmsa ( vmsa , false ) ;
if ( err )
pr_err ( " clear VMSA page failed (%u), leaking page \n " , err ) ;
else
free_page ( ( unsigned long ) vmsa ) ;
}
static int wakeup_cpu_via_vmgexit ( int apic_id , unsigned long start_ip )
{
struct sev_es_save_area * cur_vmsa , * vmsa ;
struct ghcb_state state ;
unsigned long flags ;
struct ghcb * ghcb ;
u8 sipi_vector ;
int cpu , ret ;
u64 cr4 ;
/*
* The hypervisor SNP feature support check has happened earlier , just check
* the AP_CREATION one here .
*/
if ( ! ( sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION ) )
return - EOPNOTSUPP ;
/*
* Verify the desired start IP against the known trampoline start IP
* to catch any future new trampolines that may be introduced that
* would require a new protected guest entry point .
*/
if ( WARN_ONCE ( start_ip ! = real_mode_header - > trampoline_start ,
" Unsupported SNP start_ip: %lx \n " , start_ip ) )
return - EINVAL ;
/* Override start_ip with known protected guest start IP */
start_ip = real_mode_header - > sev_es_trampoline_start ;
/* Find the logical CPU for the APIC ID */
for_each_present_cpu ( cpu ) {
if ( arch_match_cpu_phys_id ( cpu , apic_id ) )
break ;
}
if ( cpu > = nr_cpu_ids )
return - EINVAL ;
cur_vmsa = per_cpu ( sev_vmsa , cpu ) ;
/*
* A new VMSA is created each time because there is no guarantee that
* the current VMSA is the kernels or that the vCPU is not running . If
* an attempt was done to use the current VMSA with a running vCPU , a
* # VMEXIT of that vCPU would wipe out all of the settings being done
* here .
*/
vmsa = ( struct sev_es_save_area * ) snp_alloc_vmsa_page ( ) ;
if ( ! vmsa )
return - ENOMEM ;
/* CR4 should maintain the MCE value */
cr4 = native_read_cr4 ( ) & X86_CR4_MCE ;
/* Set the CS value based on the start_ip converted to a SIPI vector */
sipi_vector = ( start_ip > > 12 ) ;
vmsa - > cs . base = sipi_vector < < 12 ;
vmsa - > cs . limit = AP_INIT_CS_LIMIT ;
vmsa - > cs . attrib = INIT_CS_ATTRIBS ;
vmsa - > cs . selector = sipi_vector < < 8 ;
/* Set the RIP value based on start_ip */
vmsa - > rip = start_ip & 0xfff ;
/* Set AP INIT defaults as documented in the APM */
vmsa - > ds . limit = AP_INIT_DS_LIMIT ;
vmsa - > ds . attrib = INIT_DS_ATTRIBS ;
vmsa - > es = vmsa - > ds ;
vmsa - > fs = vmsa - > ds ;
vmsa - > gs = vmsa - > ds ;
vmsa - > ss = vmsa - > ds ;
vmsa - > gdtr . limit = AP_INIT_GDTR_LIMIT ;
vmsa - > ldtr . limit = AP_INIT_LDTR_LIMIT ;
vmsa - > ldtr . attrib = INIT_LDTR_ATTRIBS ;
vmsa - > idtr . limit = AP_INIT_IDTR_LIMIT ;
vmsa - > tr . limit = AP_INIT_TR_LIMIT ;
vmsa - > tr . attrib = INIT_TR_ATTRIBS ;
vmsa - > cr4 = cr4 ;
vmsa - > cr0 = AP_INIT_CR0_DEFAULT ;
vmsa - > dr7 = DR7_RESET_VALUE ;
vmsa - > dr6 = AP_INIT_DR6_DEFAULT ;
vmsa - > rflags = AP_INIT_RFLAGS_DEFAULT ;
vmsa - > g_pat = AP_INIT_GPAT_DEFAULT ;
vmsa - > xcr0 = AP_INIT_XCR0_DEFAULT ;
vmsa - > mxcsr = AP_INIT_MXCSR_DEFAULT ;
vmsa - > x87_ftw = AP_INIT_X87_FTW_DEFAULT ;
vmsa - > x87_fcw = AP_INIT_X87_FCW_DEFAULT ;
/* SVME must be set. */
vmsa - > efer = EFER_SVME ;
/*
* Set the SNP - specific fields for this VMSA :
* VMPL level
* SEV_FEATURES ( matches the SEV STATUS MSR right shifted 2 bits )
*/
vmsa - > vmpl = 0 ;
vmsa - > sev_features = sev_status > > 2 ;
/* Switch the page over to a VMSA page now that it is initialized */
ret = snp_set_vmsa ( vmsa , true ) ;
if ( ret ) {
pr_err ( " set VMSA page failed (%u) \n " , ret ) ;
free_page ( ( unsigned long ) vmsa ) ;
return - EINVAL ;
}
/* Issue VMGEXIT AP Creation NAE event */
local_irq_save ( flags ) ;
ghcb = __sev_get_ghcb ( & state ) ;
vc_ghcb_invalidate ( ghcb ) ;
ghcb_set_rax ( ghcb , vmsa - > sev_features ) ;
ghcb_set_sw_exit_code ( ghcb , SVM_VMGEXIT_AP_CREATION ) ;
ghcb_set_sw_exit_info_1 ( ghcb , ( ( u64 ) apic_id < < 32 ) | SVM_VMGEXIT_AP_CREATE ) ;
ghcb_set_sw_exit_info_2 ( ghcb , __pa ( vmsa ) ) ;
sev_es_wr_ghcb_msr ( __pa ( ghcb ) ) ;
VMGEXIT ( ) ;
if ( ! ghcb_sw_exit_info_1_is_valid ( ghcb ) | |
lower_32_bits ( ghcb - > save . sw_exit_info_1 ) ) {
pr_err ( " SNP AP Creation error \n " ) ;
ret = - EINVAL ;
}
__sev_put_ghcb ( & state ) ;
local_irq_restore ( flags ) ;
/* Perform cleanup if there was an error */
if ( ret ) {
snp_cleanup_vmsa ( vmsa ) ;
vmsa = NULL ;
}
/* Free up any previous VMSA page */
if ( cur_vmsa )
snp_cleanup_vmsa ( cur_vmsa ) ;
/* Record the current VMSA page */
per_cpu ( sev_vmsa , cpu ) = vmsa ;
return ret ;
}
void snp_set_wakeup_secondary_cpu ( void )
{
if ( ! cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
return ;
/*
* Always set this override if SNP is enabled . This makes it the
* required method to start APs under SNP . If the hypervisor does
* not support AP creation , then no APs will be started .
*/
apic - > wakeup_secondary_cpu = wakeup_cpu_via_vmgexit ;
}
2022-04-22 08:56:23 -05:00
int __init sev_es_setup_ap_jump_table ( struct real_mode_header * rmh )
2020-09-07 15:16:07 +02:00
{
u16 startup_cs , startup_ip ;
phys_addr_t jump_table_pa ;
u64 jump_table_addr ;
u16 __iomem * jump_table ;
jump_table_addr = get_jump_table_addr ( ) ;
/* On UP guests there is no jump table so this is not a failure */
if ( ! jump_table_addr )
return 0 ;
/* Check if AP Jump Table is page-aligned */
if ( jump_table_addr & ~ PAGE_MASK )
return - EINVAL ;
jump_table_pa = jump_table_addr & PAGE_MASK ;
startup_cs = ( u16 ) ( rmh - > trampoline_start > > 4 ) ;
startup_ip = ( u16 ) ( rmh - > sev_es_trampoline_start -
rmh - > trampoline_start ) ;
jump_table = ioremap_encrypted ( jump_table_pa , PAGE_SIZE ) ;
if ( ! jump_table )
return - EIO ;
writew ( startup_ip , & jump_table [ 0 ] ) ;
writew ( startup_cs , & jump_table [ 1 ] ) ;
iounmap ( jump_table ) ;
return 0 ;
}
2020-09-07 15:16:12 +02:00
/*
* This is needed by the OVMF UEFI firmware which will use whatever it finds in
* the GHCB MSR as its GHCB to talk to the hypervisor . So make sure the per - cpu
* runtime GHCBs used by the kernel are also mapped in the EFI page - table .
*/
int __init sev_es_efi_map_ghcbs ( pgd_t * pgd )
{
struct sev_es_runtime_data * data ;
unsigned long address , pflags ;
int cpu ;
u64 pfn ;
2021-09-08 17:58:38 -05:00
if ( ! cc_platform_has ( CC_ATTR_GUEST_STATE_ENCRYPT ) )
2020-09-07 15:16:12 +02:00
return 0 ;
pflags = _PAGE_NX | _PAGE_RW ;
for_each_possible_cpu ( cpu ) {
data = per_cpu ( runtime_data , cpu ) ;
address = __pa ( & data - > ghcb_page ) ;
pfn = address > > PAGE_SHIFT ;
if ( kernel_map_pages_in_pgd ( pgd , pfn , address , 1 , pflags ) )
return 1 ;
}
return 0 ;
}
2020-09-07 15:15:52 +02:00
static enum es_result vc_handle_msr ( struct ghcb * ghcb , struct es_em_ctxt * ctxt )
{
struct pt_regs * regs = ctxt - > regs ;
enum es_result ret ;
u64 exit_info_1 ;
/* Is it a WRMSR? */
exit_info_1 = ( ctxt - > insn . opcode . bytes [ 1 ] = = 0x30 ) ? 1 : 0 ;
ghcb_set_rcx ( ghcb , regs - > cx ) ;
if ( exit_info_1 ) {
ghcb_set_rax ( ghcb , regs - > ax ) ;
ghcb_set_rdx ( ghcb , regs - > dx ) ;
}
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_MSR , exit_info_1 , 0 ) ;
2020-09-07 15:15:52 +02:00
if ( ( ret = = ES_OK ) & & ( ! exit_info_1 ) ) {
regs - > ax = ghcb - > save . rax ;
regs - > dx = ghcb - > save . rdx ;
}
return ret ;
}
2022-02-09 12:10:11 -06:00
static void snp_register_per_cpu_ghcb ( void )
2020-09-08 14:38:16 +02:00
{
2022-02-09 12:10:11 -06:00
struct sev_es_runtime_data * data ;
struct ghcb * ghcb ;
data = this_cpu_read ( runtime_data ) ;
ghcb = & data - > ghcb_page ;
snp_register_ghcb_early ( __pa ( ghcb ) ) ;
}
void setup_ghcb ( void )
{
if ( ! cc_platform_has ( CC_ATTR_GUEST_STATE_ENCRYPT ) )
return ;
2020-09-08 14:38:16 +02:00
/* First make sure the hypervisor talks a supported protocol. */
if ( ! sev_es_negotiate_protocol ( ) )
2022-02-09 12:10:11 -06:00
sev_es_terminate ( SEV_TERM_SET_GEN , GHCB_SEV_ES_GEN_REQ ) ;
/*
* Check whether the runtime # VC exception handler is active . It uses
* the per - CPU GHCB page which is set up by sev_es_init_vc_handling ( ) .
*
* If SNP is active , register the per - CPU GHCB page so that the runtime
* exception handler can use it .
*/
if ( initial_vc_handler = = ( unsigned long ) kernel_exc_vmm_communication ) {
if ( cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
snp_register_per_cpu_ghcb ( ) ;
return ;
}
2020-09-08 14:38:16 +02:00
/*
* Clear the boot_ghcb . The first exception comes in before the bss
* section is cleared .
*/
memset ( & boot_ghcb_page , 0 , PAGE_SIZE ) ;
/* Alright - Make the boot-ghcb public */
boot_ghcb = & boot_ghcb_page ;
2022-02-09 12:10:11 -06:00
/* SNP guest requires that GHCB GPA must be registered. */
if ( cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
snp_register_ghcb_early ( __pa ( & boot_ghcb_page ) ) ;
2020-09-08 14:38:16 +02:00
}
2020-09-07 15:16:10 +02:00
# ifdef CONFIG_HOTPLUG_CPU
static void sev_es_ap_hlt_loop ( void )
{
struct ghcb_state state ;
struct ghcb * ghcb ;
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:16:10 +02:00
while ( true ) {
vc_ghcb_invalidate ( ghcb ) ;
ghcb_set_sw_exit_code ( ghcb , SVM_VMGEXIT_AP_HLT_LOOP ) ;
ghcb_set_sw_exit_info_1 ( ghcb , 0 ) ;
ghcb_set_sw_exit_info_2 ( ghcb , 0 ) ;
sev_es_wr_ghcb_msr ( __pa ( ghcb ) ) ;
VMGEXIT ( ) ;
/* Wakeup signal? */
if ( ghcb_sw_exit_info_2_is_valid ( ghcb ) & &
ghcb - > save . sw_exit_info_2 )
break ;
}
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:16:10 +02:00
}
/*
* Play_dead handler when running under SEV - ES . This is needed because
* the hypervisor can ' t deliver an SIPI request to restart the AP .
* Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
* hypervisor wakes it up again .
*/
static void sev_es_play_dead ( void )
{
play_dead_common ( ) ;
/* IRQs now disabled */
sev_es_ap_hlt_loop ( ) ;
/*
* If we get here , the VCPU was woken up again . Jump to CPU
* startup code to get it back online .
*/
start_cpu0 ( ) ;
}
# else /* CONFIG_HOTPLUG_CPU */
# define sev_es_play_dead native_play_dead
# endif /* CONFIG_HOTPLUG_CPU */
# ifdef CONFIG_SMP
static void __init sev_es_setup_play_dead ( void )
{
smp_ops . play_dead = sev_es_play_dead ;
}
# else
static inline void sev_es_setup_play_dead ( void ) { }
# endif
2020-09-07 15:15:42 +02:00
static void __init alloc_runtime_data ( int cpu )
{
struct sev_es_runtime_data * data ;
data = memblock_alloc ( sizeof ( * data ) , PAGE_SIZE ) ;
if ( ! data )
panic ( " Can't allocate SEV-ES runtime data " ) ;
per_cpu ( runtime_data , cpu ) = data ;
}
static void __init init_ghcb ( int cpu )
{
struct sev_es_runtime_data * data ;
int err ;
data = per_cpu ( runtime_data , cpu ) ;
err = early_set_memory_decrypted ( ( unsigned long ) & data - > ghcb_page ,
sizeof ( data - > ghcb_page ) ) ;
if ( err )
panic ( " Can't map GHCBs unencrypted " ) ;
memset ( & data - > ghcb_page , 0 , sizeof ( data - > ghcb_page ) ) ;
2020-09-07 15:15:47 +02:00
data - > ghcb_active = false ;
data - > backup_ghcb_active = false ;
2020-09-07 15:15:42 +02:00
}
void __init sev_es_init_vc_handling ( void )
{
int cpu ;
BUILD_BUG_ON ( offsetof ( struct sev_es_runtime_data , ghcb_page ) % PAGE_SIZE ) ;
2021-09-08 17:58:38 -05:00
if ( ! cc_platform_has ( CC_ATTR_GUEST_STATE_ENCRYPT ) )
2020-09-07 15:15:42 +02:00
return ;
2020-09-07 15:16:13 +02:00
if ( ! sev_es_check_cpu_features ( ) )
panic ( " SEV-ES CPU Features missing " ) ;
2022-02-09 12:10:06 -06:00
/*
* SNP is supported in v2 of the GHCB spec which mandates support for HV
* features .
*/
if ( cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) ) {
sev_hv_features = get_hv_features ( ) ;
if ( ! ( sev_hv_features & GHCB_HV_FT_SNP ) )
sev_es_terminate ( SEV_TERM_SET_GEN , GHCB_SNP_UNSUPPORTED ) ;
}
2020-09-07 15:15:44 +02:00
/* Enable SEV-ES special handling */
static_branch_enable ( & sev_es_enable_key ) ;
2020-09-07 15:15:42 +02:00
/* Initialize per-cpu GHCB pages */
for_each_possible_cpu ( cpu ) {
alloc_runtime_data ( cpu ) ;
init_ghcb ( cpu ) ;
}
2020-09-07 15:15:47 +02:00
2020-09-07 15:16:10 +02:00
sev_es_setup_play_dead ( ) ;
2020-09-07 15:15:47 +02:00
/* Secondary CPUs use the runtime #VC handler */
2021-06-18 13:54:09 +02:00
initial_vc_handler = ( unsigned long ) kernel_exc_vmm_communication ;
2020-09-07 15:15:42 +02:00
}
2020-09-08 14:38:16 +02:00
static void __init vc_early_forward_exception ( struct es_em_ctxt * ctxt )
{
int trapnr = ctxt - > fi . vector ;
if ( trapnr = = X86_TRAP_PF )
native_write_cr2 ( ctxt - > fi . cr2 ) ;
ctxt - > regs - > orig_ax = ctxt - > fi . error_code ;
do_early_exception ( ctxt - > regs , trapnr ) ;
}
2020-09-07 15:15:53 +02:00
static long * vc_insn_get_rm ( struct es_em_ctxt * ctxt )
{
long * reg_array ;
int offset ;
reg_array = ( long * ) ctxt - > regs ;
offset = insn_get_modrm_rm_off ( & ctxt - > insn , ctxt - > regs ) ;
if ( offset < 0 )
return NULL ;
offset / = sizeof ( long ) ;
return reg_array + offset ;
}
2020-09-07 15:15:50 +02:00
static enum es_result vc_do_mmio ( struct ghcb * ghcb , struct es_em_ctxt * ctxt ,
unsigned int bytes , bool read )
{
u64 exit_code , exit_info_1 , exit_info_2 ;
unsigned long ghcb_pa = __pa ( ghcb ) ;
2020-10-28 17:46:59 +01:00
enum es_result res ;
2020-09-07 15:15:50 +02:00
phys_addr_t paddr ;
void __user * ref ;
ref = insn_get_addr_ref ( & ctxt - > insn , ctxt - > regs ) ;
if ( ref = = ( void __user * ) - 1L )
return ES_UNSUPPORTED ;
exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE ;
2020-10-28 17:46:59 +01:00
res = vc_slow_virt_to_phys ( ghcb , ctxt , ( unsigned long ) ref , & paddr ) ;
if ( res ! = ES_OK ) {
if ( res = = ES_EXCEPTION & & ! read )
2020-09-07 15:15:50 +02:00
ctxt - > fi . error_code | = X86_PF_WRITE ;
2020-10-28 17:46:59 +01:00
return res ;
2020-09-07 15:15:50 +02:00
}
exit_info_1 = paddr ;
/* Can never be greater than 8 */
exit_info_2 = bytes ;
2020-09-25 08:38:26 -05:00
ghcb_set_sw_scratch ( ghcb , ghcb_pa + offsetof ( struct ghcb , shared_buffer ) ) ;
2020-09-07 15:15:50 +02:00
2022-07-27 13:24:21 +02:00
return sev_es_ghcb_hv_call ( ghcb , ctxt , exit_code , exit_info_1 , exit_info_2 ) ;
2020-09-07 15:15:50 +02:00
}
2020-09-07 15:15:51 +02:00
/*
* The MOVS instruction has two memory operands , which raises the
* problem that it is not known whether the access to the source or the
* destination caused the # VC exception ( and hence whether an MMIO read
* or write operation needs to be emulated ) .
*
* Instead of playing games with walking page - tables and trying to guess
* whether the source or destination is an MMIO range , split the move
* into two operations , a read and a write with only one memory operand .
* This will cause a nested # VC exception on the MMIO address which can
* then be handled .
*
* This implementation has the benefit that it also supports MOVS where
* source _and_ destination are MMIO regions .
*
* It will slow MOVS on MMIO down a lot , but in SEV - ES guests it is a
* rare operation . If it turns out to be a performance problem the split
* operations can be moved to memcpy_fromio ( ) and memcpy_toio ( ) .
*/
static enum es_result vc_handle_mmio_movs ( struct es_em_ctxt * ctxt ,
unsigned int bytes )
{
unsigned long ds_base , es_base ;
unsigned char * src , * dst ;
unsigned char buffer [ 8 ] ;
enum es_result ret ;
bool rep ;
int off ;
ds_base = insn_get_seg_base ( ctxt - > regs , INAT_SEG_REG_DS ) ;
es_base = insn_get_seg_base ( ctxt - > regs , INAT_SEG_REG_ES ) ;
if ( ds_base = = - 1L | | es_base = = - 1L ) {
ctxt - > fi . vector = X86_TRAP_GP ;
ctxt - > fi . error_code = 0 ;
return ES_EXCEPTION ;
}
src = ds_base + ( unsigned char * ) ctxt - > regs - > si ;
dst = es_base + ( unsigned char * ) ctxt - > regs - > di ;
ret = vc_read_mem ( ctxt , src , buffer , bytes ) ;
if ( ret ! = ES_OK )
return ret ;
ret = vc_write_mem ( ctxt , dst , buffer , bytes ) ;
if ( ret ! = ES_OK )
return ret ;
if ( ctxt - > regs - > flags & X86_EFLAGS_DF )
off = - bytes ;
else
off = bytes ;
ctxt - > regs - > si + = off ;
ctxt - > regs - > di + = off ;
rep = insn_has_rep_prefix ( & ctxt - > insn ) ;
if ( rep )
ctxt - > regs - > cx - = 1 ;
if ( ! rep | | ctxt - > regs - > cx = = 0 )
return ES_OK ;
else
return ES_RETRY ;
}
2021-11-30 21:49:33 +03:00
static enum es_result vc_handle_mmio ( struct ghcb * ghcb , struct es_em_ctxt * ctxt )
2020-09-07 15:15:50 +02:00
{
struct insn * insn = & ctxt - > insn ;
2023-01-01 17:29:04 +01:00
enum insn_mmio_type mmio ;
2020-09-07 15:15:50 +02:00
unsigned int bytes = 0 ;
enum es_result ret ;
2021-11-30 21:49:33 +03:00
u8 sign_byte ;
2020-09-07 15:15:50 +02:00
long * reg_data ;
2021-11-30 21:49:33 +03:00
mmio = insn_decode_mmio ( insn , & bytes ) ;
2023-01-01 17:29:04 +01:00
if ( mmio = = INSN_MMIO_DECODE_FAILED )
2021-11-30 21:49:33 +03:00
return ES_DECODE_FAILED ;
2020-09-07 15:15:50 +02:00
2023-01-01 17:29:04 +01:00
if ( mmio ! = INSN_MMIO_WRITE_IMM & & mmio ! = INSN_MMIO_MOVS ) {
2021-11-30 21:49:33 +03:00
reg_data = insn_get_modrm_reg_ptr ( insn , ctxt - > regs ) ;
2020-09-07 15:15:50 +02:00
if ( ! reg_data )
return ES_DECODE_FAILED ;
2021-11-30 21:49:33 +03:00
}
2020-09-07 15:15:50 +02:00
2021-11-30 21:49:33 +03:00
switch ( mmio ) {
2023-01-01 17:29:04 +01:00
case INSN_MMIO_WRITE :
2020-09-07 15:15:50 +02:00
memcpy ( ghcb - > shared_buffer , reg_data , bytes ) ;
ret = vc_do_mmio ( ghcb , ctxt , bytes , false ) ;
break ;
2023-01-01 17:29:04 +01:00
case INSN_MMIO_WRITE_IMM :
2020-09-07 15:15:50 +02:00
memcpy ( ghcb - > shared_buffer , insn - > immediate1 . bytes , bytes ) ;
ret = vc_do_mmio ( ghcb , ctxt , bytes , false ) ;
break ;
2023-01-01 17:29:04 +01:00
case INSN_MMIO_READ :
2020-09-07 15:15:50 +02:00
ret = vc_do_mmio ( ghcb , ctxt , bytes , true ) ;
if ( ret )
break ;
/* Zero-extend for 32-bit operation */
if ( bytes = = 4 )
* reg_data = 0 ;
memcpy ( reg_data , ghcb - > shared_buffer , bytes ) ;
break ;
2023-01-01 17:29:04 +01:00
case INSN_MMIO_READ_ZERO_EXTEND :
2021-11-30 21:49:33 +03:00
ret = vc_do_mmio ( ghcb , ctxt , bytes , true ) ;
if ( ret )
break ;
/* Zero extend based on operand size */
memset ( reg_data , 0 , insn - > opnd_bytes ) ;
memcpy ( reg_data , ghcb - > shared_buffer , bytes ) ;
break ;
2023-01-01 17:29:04 +01:00
case INSN_MMIO_READ_SIGN_EXTEND :
2021-11-30 21:49:33 +03:00
ret = vc_do_mmio ( ghcb , ctxt , bytes , true ) ;
if ( ret )
break ;
2020-09-07 15:15:50 +02:00
2021-11-30 21:49:33 +03:00
if ( bytes = = 1 ) {
u8 * val = ( u8 * ) ghcb - > shared_buffer ;
2020-09-07 15:15:51 +02:00
2021-11-30 21:49:33 +03:00
sign_byte = ( * val & 0x80 ) ? 0xff : 0x00 ;
} else {
u16 * val = ( u16 * ) ghcb - > shared_buffer ;
sign_byte = ( * val & 0x8000 ) ? 0xff : 0x00 ;
}
/* Sign extend based on operand size */
memset ( reg_data , sign_byte , insn - > opnd_bytes ) ;
memcpy ( reg_data , ghcb - > shared_buffer , bytes ) ;
2020-09-07 15:15:51 +02:00
break ;
2023-01-01 17:29:04 +01:00
case INSN_MMIO_MOVS :
2021-11-30 21:49:33 +03:00
ret = vc_handle_mmio_movs ( ctxt , bytes ) ;
2020-09-07 15:15:50 +02:00
break ;
default :
ret = ES_UNSUPPORTED ;
2021-11-30 21:49:33 +03:00
break ;
2020-09-07 15:15:50 +02:00
}
return ret ;
}
2020-09-07 15:15:53 +02:00
static enum es_result vc_handle_dr7_write ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
struct sev_es_runtime_data * data = this_cpu_read ( runtime_data ) ;
long val , * reg = vc_insn_get_rm ( ctxt ) ;
enum es_result ret ;
if ( ! reg )
return ES_DECODE_FAILED ;
val = * reg ;
/* Upper 32 bits must be written as zeroes */
if ( val > > 32 ) {
ctxt - > fi . vector = X86_TRAP_GP ;
ctxt - > fi . error_code = 0 ;
return ES_EXCEPTION ;
}
/* Clear out other reserved bits and set bit 10 */
val = ( val & 0xffff23ffL ) | BIT ( 10 ) ;
/* Early non-zero writes to DR7 are not supported */
if ( ! data & & ( val & ~ DR7_RESET_VALUE ) )
return ES_UNSUPPORTED ;
/* Using a value of 0 for ExitInfo1 means RAX holds the value */
ghcb_set_rax ( ghcb , val ) ;
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_WRITE_DR7 , 0 , 0 ) ;
2020-09-07 15:15:53 +02:00
if ( ret ! = ES_OK )
return ret ;
if ( data )
data - > dr7 = val ;
return ES_OK ;
}
static enum es_result vc_handle_dr7_read ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
struct sev_es_runtime_data * data = this_cpu_read ( runtime_data ) ;
long * reg = vc_insn_get_rm ( ctxt ) ;
if ( ! reg )
return ES_DECODE_FAILED ;
if ( data )
* reg = data - > dr7 ;
else
* reg = DR7_RESET_VALUE ;
return ES_OK ;
}
2020-09-07 15:15:54 +02:00
static enum es_result vc_handle_wbinvd ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
2022-07-27 13:24:21 +02:00
return sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_WBINVD , 0 , 0 ) ;
2020-09-07 15:15:54 +02:00
}
2020-09-07 15:15:56 +02:00
static enum es_result vc_handle_rdpmc ( struct ghcb * ghcb , struct es_em_ctxt * ctxt )
{
enum es_result ret ;
ghcb_set_rcx ( ghcb , ctxt - > regs - > cx ) ;
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_RDPMC , 0 , 0 ) ;
2020-09-07 15:15:56 +02:00
if ( ret ! = ES_OK )
return ret ;
if ( ! ( ghcb_rax_is_valid ( ghcb ) & & ghcb_rdx_is_valid ( ghcb ) ) )
return ES_VMM_ERROR ;
ctxt - > regs - > ax = ghcb - > save . rax ;
ctxt - > regs - > dx = ghcb - > save . rdx ;
return ES_OK ;
}
2020-09-07 15:15:58 +02:00
static enum es_result vc_handle_monitor ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
/*
* Treat it as a NOP and do not leak a physical address to the
* hypervisor .
*/
return ES_OK ;
}
2020-09-07 15:15:59 +02:00
static enum es_result vc_handle_mwait ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
/* Treat the same as MONITOR/MONITORX */
return ES_OK ;
}
2020-09-07 15:16:00 +02:00
static enum es_result vc_handle_vmmcall ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
enum es_result ret ;
ghcb_set_rax ( ghcb , ctxt - > regs - > ax ) ;
ghcb_set_cpl ( ghcb , user_mode ( ctxt - > regs ) ? 3 : 0 ) ;
2020-09-07 15:16:03 +02:00
if ( x86_platform . hyper . sev_es_hcall_prepare )
x86_platform . hyper . sev_es_hcall_prepare ( ghcb , ctxt - > regs ) ;
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_VMMCALL , 0 , 0 ) ;
2020-09-07 15:16:00 +02:00
if ( ret ! = ES_OK )
return ret ;
if ( ! ghcb_rax_is_valid ( ghcb ) )
return ES_VMM_ERROR ;
ctxt - > regs - > ax = ghcb - > save . rax ;
2020-09-07 15:16:03 +02:00
/*
* Call sev_es_hcall_finish ( ) after regs - > ax is already set .
* This allows the hypervisor handler to overwrite it again if
* necessary .
*/
if ( x86_platform . hyper . sev_es_hcall_finish & &
! x86_platform . hyper . sev_es_hcall_finish ( ghcb , ctxt - > regs ) )
return ES_VMM_ERROR ;
2020-09-07 15:16:00 +02:00
return ES_OK ;
}
2020-09-07 15:16:01 +02:00
static enum es_result vc_handle_trap_ac ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
{
/*
* Calling ecx_alignment_check ( ) directly does not work , because it
* enables IRQs and the GHCB is active . Forward the exception and call
* it later from vc_forward_exception ( ) .
*/
ctxt - > fi . vector = X86_TRAP_AC ;
ctxt - > fi . error_code = 0 ;
return ES_EXCEPTION ;
}
2020-09-08 14:38:16 +02:00
static enum es_result vc_handle_exitcode ( struct es_em_ctxt * ctxt ,
struct ghcb * ghcb ,
unsigned long exit_code )
{
enum es_result result ;
switch ( exit_code ) {
2020-09-07 15:15:53 +02:00
case SVM_EXIT_READ_DR7 :
result = vc_handle_dr7_read ( ghcb , ctxt ) ;
break ;
case SVM_EXIT_WRITE_DR7 :
result = vc_handle_dr7_write ( ghcb , ctxt ) ;
break ;
2020-09-07 15:16:01 +02:00
case SVM_EXIT_EXCP_BASE + X86_TRAP_AC :
result = vc_handle_trap_ac ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:55 +02:00
case SVM_EXIT_RDTSC :
case SVM_EXIT_RDTSCP :
result = vc_handle_rdtsc ( ghcb , ctxt , exit_code ) ;
break ;
2020-09-07 15:15:56 +02:00
case SVM_EXIT_RDPMC :
result = vc_handle_rdpmc ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:57 +02:00
case SVM_EXIT_INVD :
pr_err_ratelimited ( " #VC exception for INVD??? Seriously??? \n " ) ;
result = ES_UNSUPPORTED ;
break ;
2020-09-07 15:15:48 +02:00
case SVM_EXIT_CPUID :
result = vc_handle_cpuid ( ghcb , ctxt ) ;
break ;
case SVM_EXIT_IOIO :
result = vc_handle_ioio ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:52 +02:00
case SVM_EXIT_MSR :
result = vc_handle_msr ( ghcb , ctxt ) ;
break ;
2020-09-07 15:16:00 +02:00
case SVM_EXIT_VMMCALL :
result = vc_handle_vmmcall ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:54 +02:00
case SVM_EXIT_WBINVD :
result = vc_handle_wbinvd ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:58 +02:00
case SVM_EXIT_MONITOR :
result = vc_handle_monitor ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:59 +02:00
case SVM_EXIT_MWAIT :
result = vc_handle_mwait ( ghcb , ctxt ) ;
break ;
2020-09-07 15:15:50 +02:00
case SVM_EXIT_NPF :
result = vc_handle_mmio ( ghcb , ctxt ) ;
break ;
2020-09-08 14:38:16 +02:00
default :
/*
* Unexpected # VC exception
*/
result = ES_UNSUPPORTED ;
}
return result ;
}
2020-09-07 15:15:47 +02:00
static __always_inline void vc_forward_exception ( struct es_em_ctxt * ctxt )
{
long error_code = ctxt - > fi . error_code ;
int trapnr = ctxt - > fi . vector ;
ctxt - > regs - > orig_ax = ctxt - > fi . error_code ;
switch ( trapnr ) {
case X86_TRAP_GP :
exc_general_protection ( ctxt - > regs , error_code ) ;
break ;
case X86_TRAP_UD :
exc_invalid_op ( ctxt - > regs ) ;
break ;
2021-05-19 15:52:45 +02:00
case X86_TRAP_PF :
write_cr2 ( ctxt - > fi . cr2 ) ;
exc_page_fault ( ctxt - > regs , error_code ) ;
break ;
2020-09-07 15:16:01 +02:00
case X86_TRAP_AC :
exc_alignment_check ( ctxt - > regs , error_code ) ;
break ;
2020-09-07 15:15:47 +02:00
default :
pr_emerg ( " Unsupported exception in #VC instruction emulation - can't continue \n " ) ;
BUG ( ) ;
}
}
2021-10-21 10:08:33 +02:00
static __always_inline bool is_vc2_stack ( unsigned long sp )
2020-09-07 15:15:47 +02:00
{
return ( sp > = __this_cpu_ist_bottom_va ( VC2 ) & & sp < __this_cpu_ist_top_va ( VC2 ) ) ;
}
2021-10-21 10:08:33 +02:00
static __always_inline bool vc_from_invalid_context ( struct pt_regs * regs )
{
unsigned long sp , prev_sp ;
sp = ( unsigned long ) regs ;
prev_sp = regs - > sp ;
/*
* If the code was already executing on the VC2 stack when the # VC
* happened , let it proceed to the normal handling routine . This way the
* code executing on the VC2 stack can cause # VC exceptions to get handled .
*/
return is_vc2_stack ( sp ) & & ! is_vc2_stack ( prev_sp ) ;
}
2021-06-18 13:54:09 +02:00
static bool vc_raw_handle_exception ( struct pt_regs * regs , unsigned long error_code )
2020-09-07 15:15:47 +02:00
{
struct ghcb_state state ;
struct es_em_ctxt ctxt ;
enum es_result result ;
struct ghcb * ghcb ;
2021-06-18 13:54:09 +02:00
bool ret = true ;
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:08 +02:00
ghcb = __sev_get_ghcb ( & state ) ;
2020-09-07 15:15:47 +02:00
vc_ghcb_invalidate ( ghcb ) ;
result = vc_init_em_ctxt ( & ctxt , regs , error_code ) ;
if ( result = = ES_OK )
result = vc_handle_exitcode ( & ctxt , ghcb , error_code ) ;
2021-06-18 13:54:08 +02:00
__sev_put_ghcb ( & state ) ;
2020-09-07 15:15:47 +02:00
/* Done - now check the result */
switch ( result ) {
case ES_OK :
vc_finish_insn ( & ctxt ) ;
break ;
case ES_UNSUPPORTED :
2021-05-19 15:52:47 +02:00
pr_err_ratelimited ( " Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx) \n " ,
2020-09-07 15:15:47 +02:00
error_code , regs - > ip ) ;
2021-06-18 13:54:09 +02:00
ret = false ;
break ;
2020-09-07 15:15:47 +02:00
case ES_VMM_ERROR :
pr_err_ratelimited ( " Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx) \n " ,
error_code , regs - > ip ) ;
2021-06-18 13:54:09 +02:00
ret = false ;
break ;
2020-09-07 15:15:47 +02:00
case ES_DECODE_FAILED :
pr_err_ratelimited ( " Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx) \n " ,
error_code , regs - > ip ) ;
2021-06-18 13:54:09 +02:00
ret = false ;
break ;
2020-09-07 15:15:47 +02:00
case ES_EXCEPTION :
vc_forward_exception ( & ctxt ) ;
break ;
case ES_RETRY :
/* Nothing to do */
break ;
default :
pr_emerg ( " Unknown result in %s():%d \n " , __func__ , result ) ;
/*
* Emulating the instruction which caused the # VC exception
* failed - can ' t continue so print debug information
*/
BUG ( ) ;
}
2021-06-18 13:54:09 +02:00
return ret ;
}
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:09 +02:00
static __always_inline bool vc_is_db ( unsigned long error_code )
{
return error_code = = SVM_EXIT_EXCP_BASE + X86_TRAP_DB ;
}
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:09 +02:00
/*
* Runtime # VC exception handler when raised from kernel mode . Runs in NMI mode
* and will panic when an error happens .
*/
DEFINE_IDTENTRY_VC_KERNEL ( exc_vmm_communication )
{
irqentry_state_t irq_state ;
/*
* With the current implementation it is always possible to switch to a
* safe stack because # VC exceptions only happen at known places , like
* intercepted instructions or accesses to MMIO areas / IO ports . They can
* also happen with code instrumentation when the hypervisor intercepts
* # DB , but the critical paths are forbidden to be instrumented , so # DB
* exceptions currently also only happen in safe places .
*
* But keep this here in case the noinstr annotations are violated due
* to bug elsewhere .
*/
2021-10-21 10:08:33 +02:00
if ( unlikely ( vc_from_invalid_context ( regs ) ) ) {
2021-06-18 13:54:09 +02:00
instrumentation_begin ( ) ;
panic ( " Can't handle #VC exception from unsupported context \n " ) ;
instrumentation_end ( ) ;
}
2020-09-07 15:15:47 +02:00
2021-06-18 13:54:09 +02:00
/*
* Handle # DB before calling into ! noinstr code to avoid recursive # DB .
*/
if ( vc_is_db ( error_code ) ) {
exc_debug ( regs ) ;
return ;
}
irq_state = irqentry_nmi_enter ( regs ) ;
instrumentation_begin ( ) ;
if ( ! vc_raw_handle_exception ( regs , error_code ) ) {
2020-09-07 15:15:47 +02:00
/* Show some debug info */
show_regs ( regs ) ;
/* Ask hypervisor to sev_es_terminate */
2022-02-09 12:10:04 -06:00
sev_es_terminate ( SEV_TERM_SET_GEN , GHCB_SEV_ES_GEN_REQ ) ;
2020-09-07 15:15:47 +02:00
/* If that fails and we get here - just panic */
panic ( " Returned from Terminate-Request to Hypervisor \n " ) ;
}
2021-06-18 13:54:09 +02:00
instrumentation_end ( ) ;
irqentry_nmi_exit ( regs , irq_state ) ;
2020-09-07 15:15:47 +02:00
}
2021-06-18 13:54:09 +02:00
/*
* Runtime # VC exception handler when raised from user mode . Runs in IRQ mode
* and will kill the current task with SIGBUS when an error happens .
*/
DEFINE_IDTENTRY_VC_USER ( exc_vmm_communication )
2020-09-07 15:15:47 +02:00
{
2021-06-18 13:54:09 +02:00
/*
* Handle # DB before calling into ! noinstr code to avoid recursive # DB .
*/
if ( vc_is_db ( error_code ) ) {
noist_exc_debug ( regs ) ;
return ;
}
irqentry_enter_from_user_mode ( regs ) ;
2020-09-07 15:15:47 +02:00
instrumentation_begin ( ) ;
2021-06-18 13:54:09 +02:00
if ( ! vc_raw_handle_exception ( regs , error_code ) ) {
/*
* Do not kill the machine if user - space triggered the
* exception . Send SIGBUS instead and let user - space deal with
* it .
*/
force_sig_fault ( SIGBUS , BUS_OBJERR , ( void __user * ) 0 ) ;
}
instrumentation_end ( ) ;
irqentry_exit_to_user_mode ( regs ) ;
2020-09-07 15:15:47 +02:00
}
2020-09-08 14:38:16 +02:00
bool __init handle_vc_boot_ghcb ( struct pt_regs * regs )
{
unsigned long exit_code = regs - > orig_ax ;
struct es_em_ctxt ctxt ;
enum es_result result ;
vc_ghcb_invalidate ( boot_ghcb ) ;
result = vc_init_em_ctxt ( & ctxt , regs , exit_code ) ;
if ( result = = ES_OK )
result = vc_handle_exitcode ( & ctxt , boot_ghcb , exit_code ) ;
/* Done - now check the result */
switch ( result ) {
case ES_OK :
vc_finish_insn ( & ctxt ) ;
break ;
case ES_UNSUPPORTED :
early_printk ( " PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx) \n " ,
exit_code , regs - > ip ) ;
goto fail ;
case ES_VMM_ERROR :
early_printk ( " PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx) \n " ,
exit_code , regs - > ip ) ;
goto fail ;
case ES_DECODE_FAILED :
early_printk ( " PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx) \n " ,
exit_code , regs - > ip ) ;
goto fail ;
case ES_EXCEPTION :
vc_early_forward_exception ( & ctxt ) ;
break ;
case ES_RETRY :
/* Nothing to do */
break ;
default :
BUG ( ) ;
}
return true ;
fail :
show_regs ( regs ) ;
2022-03-17 14:19:13 -07:00
sev_es_terminate ( SEV_TERM_SET_GEN , GHCB_SEV_ES_GEN_REQ ) ;
2020-09-08 14:38:16 +02:00
}
2022-02-24 10:56:18 -06:00
/*
* Initial set up of SNP relies on information provided by the
* Confidential Computing blob , which can be passed to the kernel
* in the following ways , depending on how it is booted :
*
* - when booted via the boot / decompress kernel :
* - via boot_params
*
* - when booted directly by firmware / bootloader ( e . g . CONFIG_PVH ) :
* - via a setup_data entry , as defined by the Linux Boot Protocol
*
* Scan for the blob in that order .
*/
static __init struct cc_blob_sev_info * find_cc_blob ( struct boot_params * bp )
{
struct cc_blob_sev_info * cc_info ;
/* Boot kernel would have passed the CC blob via boot_params. */
if ( bp - > cc_blob_address ) {
cc_info = ( struct cc_blob_sev_info * ) ( unsigned long ) bp - > cc_blob_address ;
goto found_cc_info ;
}
/*
* If kernel was booted directly , without the use of the
* boot / decompression kernel , the CC blob may have been passed via
* setup_data instead .
*/
cc_info = find_cc_blob_setup_data ( bp ) ;
if ( ! cc_info )
return NULL ;
found_cc_info :
if ( cc_info - > magic ! = CC_BLOB_SEV_HDR_MAGIC )
snp_abort ( ) ;
return cc_info ;
}
bool __init snp_init ( struct boot_params * bp )
{
struct cc_blob_sev_info * cc_info ;
if ( ! bp )
return false ;
cc_info = find_cc_blob ( bp ) ;
if ( ! cc_info )
return false ;
2022-03-07 15:33:49 -06:00
setup_cpuid_table ( cc_info ) ;
2022-02-24 10:56:18 -06:00
/*
* The CC blob will be used later to access the secrets page . Cache
* it here like the boot kernel does .
*/
bp - > cc_blob_address = ( u32 ) ( unsigned long ) cc_info ;
return true ;
}
2022-08-24 17:13:26 +02:00
void __init __noreturn snp_abort ( void )
2022-02-24 10:56:18 -06:00
{
sev_es_terminate ( SEV_TERM_SET_GEN , GHCB_SNP_UNSUPPORTED ) ;
}
2022-03-07 15:33:49 -06:00
2022-03-07 15:33:50 -06:00
static void dump_cpuid_table ( void )
{
const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ( ) ;
int i = 0 ;
pr_info ( " count=%d reserved=0x%x reserved2=0x%llx \n " ,
cpuid_table - > count , cpuid_table - > __reserved1 , cpuid_table - > __reserved2 ) ;
for ( i = 0 ; i < SNP_CPUID_COUNT_MAX ; i + + ) {
const struct snp_cpuid_fn * fn = & cpuid_table - > fn [ i ] ;
pr_info ( " index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx \n " ,
i , fn - > eax_in , fn - > ecx_in , fn - > eax , fn - > ebx , fn - > ecx ,
fn - > edx , fn - > xcr0_in , fn - > xss_in , fn - > __reserved ) ;
}
}
2022-03-07 15:33:49 -06:00
/*
* It is useful from an auditing / testing perspective to provide an easy way
* for the guest owner to know that the CPUID table has been initialized as
* expected , but that initialization happens too early in boot to print any
* sort of indicator , and there ' s not really any other good place to do it ,
* so do it here .
*/
static int __init report_cpuid_table ( void )
{
const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ( ) ;
if ( ! cpuid_table - > count )
return 0 ;
pr_info ( " Using SNP CPUID table, %d entries present. \n " ,
cpuid_table - > count ) ;
2022-03-07 15:33:50 -06:00
if ( sev_cfg . debug )
dump_cpuid_table ( ) ;
2022-03-07 15:33:49 -06:00
return 0 ;
}
arch_initcall ( report_cpuid_table ) ;
2022-03-07 15:33:50 -06:00
static int __init init_sev_config ( char * str )
{
char * s ;
while ( ( s = strsep ( & str , " , " ) ) ) {
if ( ! strcmp ( s , " debug " ) ) {
sev_cfg . debug = true ;
continue ;
}
pr_info ( " SEV command-line option '%s' was not recognized \n " , s ) ;
}
return 1 ;
}
__setup ( " sev= " , init_sev_config ) ;
2022-03-07 15:33:51 -06:00
int snp_issue_guest_request ( u64 exit_code , struct snp_req_data * input , unsigned long * fw_err )
{
struct ghcb_state state ;
struct es_em_ctxt ctxt ;
unsigned long flags ;
struct ghcb * ghcb ;
int ret ;
if ( ! cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
return - ENODEV ;
if ( ! fw_err )
return - EINVAL ;
/*
* __sev_get_ghcb ( ) needs to run with IRQs disabled because it is using
* a per - CPU GHCB .
*/
local_irq_save ( flags ) ;
ghcb = __sev_get_ghcb ( & state ) ;
if ( ! ghcb ) {
ret = - EIO ;
goto e_restore_irq ;
}
vc_ghcb_invalidate ( ghcb ) ;
if ( exit_code = = SVM_VMGEXIT_EXT_GUEST_REQUEST ) {
ghcb_set_rax ( ghcb , input - > data_gpa ) ;
ghcb_set_rbx ( ghcb , input - > data_npages ) ;
}
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , & ctxt , exit_code , input - > req_gpa , input - > resp_gpa ) ;
2022-03-07 15:33:51 -06:00
if ( ret )
goto e_put ;
if ( ghcb - > save . sw_exit_info_2 ) {
/* Number of expected pages are returned in RBX */
if ( exit_code = = SVM_VMGEXIT_EXT_GUEST_REQUEST & &
ghcb - > save . sw_exit_info_2 = = SNP_GUEST_REQ_INVALID_LEN )
input - > data_npages = ghcb_get_rbx ( ghcb ) ;
* fw_err = ghcb - > save . sw_exit_info_2 ;
ret = - EIO ;
}
e_put :
__sev_put_ghcb ( & state ) ;
e_restore_irq :
local_irq_restore ( flags ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( snp_issue_guest_request ) ;
2022-02-24 10:56:21 -06:00
2022-04-20 09:14:13 -05:00
static struct platform_device sev_guest_device = {
. name = " sev-guest " ,
2022-02-24 10:56:21 -06:00
. id = - 1 ,
} ;
static int __init snp_init_platform_device ( void )
{
2022-04-20 09:14:13 -05:00
struct sev_guest_platform_data data ;
2022-02-24 10:56:21 -06:00
u64 gpa ;
if ( ! cc_platform_has ( CC_ATTR_GUEST_SEV_SNP ) )
return - ENODEV ;
gpa = get_secrets_page ( ) ;
if ( ! gpa )
return - ENODEV ;
data . secrets_gpa = gpa ;
2022-04-20 09:14:13 -05:00
if ( platform_device_add_data ( & sev_guest_device , & data , sizeof ( data ) ) )
2022-02-24 10:56:21 -06:00
return - ENODEV ;
2022-04-20 09:14:13 -05:00
if ( platform_device_register ( & sev_guest_device ) )
2022-02-24 10:56:21 -06:00
return - ENODEV ;
pr_info ( " SNP guest platform device initialized. \n " ) ;
return 0 ;
}
device_initcall ( snp_init_platform_device ) ;