2020-09-07 15:15:20 +02:00
// SPDX-License-Identifier: GPL-2.0
/*
* AMD Encrypted Register State Support
*
* Author : Joerg Roedel < jroedel @ suse . de >
*
* This file is not compiled stand - alone . It contains code shared
* between the pre - decompression boot code and the running Linux kernel
* and is included directly into both code - bases .
*/
2020-09-07 15:16:13 +02:00
# ifndef __BOOT_COMPRESSED
# define error(v) pr_err(v)
# define has_cpuflag(f) boot_cpu_has(f)
2023-06-06 09:51:26 -05:00
# else
# undef WARN
# define WARN(condition, format...) (!!(condition))
2020-09-07 15:16:13 +02:00
# endif
2022-02-24 10:56:11 -06:00
/* I/O parameters for CPUID-related helpers */
struct cpuid_leaf {
u32 fn ;
u32 subfn ;
u32 eax ;
u32 ebx ;
u32 ecx ;
u32 edx ;
} ;
2022-02-24 10:56:12 -06:00
/*
* Individual entries of the SNP CPUID table , as defined by the SNP
* Firmware ABI , Revision 0.9 , Section 7.1 , Table 14.
*/
struct snp_cpuid_fn {
u32 eax_in ;
u32 ecx_in ;
u64 xcr0_in ;
u64 xss_in ;
u32 eax ;
u32 ebx ;
u32 ecx ;
u32 edx ;
u64 __reserved ;
} __packed ;
/*
* SNP CPUID table , as defined by the SNP Firmware ABI , Revision 0.9 ,
* Section 8.14 .2 .6 . Also noted there is the SNP firmware - enforced limit
* of 64 entries per CPUID table .
*/
# define SNP_CPUID_COUNT_MAX 64
struct snp_cpuid_table {
u32 count ;
u32 __reserved1 ;
u64 __reserved2 ;
struct snp_cpuid_fn fn [ SNP_CPUID_COUNT_MAX ] ;
} __packed ;
2022-02-09 12:10:05 -06:00
/*
* Since feature negotiation related variables are set early in the boot
* process they must reside in the . data section so as not to be zeroed
* out when the . bss section is later cleared .
*
* GHCB protocol version negotiated with the hypervisor .
*/
static u16 ghcb_version __ro_after_init ;
2022-02-24 10:56:12 -06:00
/* Copy of the SNP firmware's CPUID page. */
static struct snp_cpuid_table cpuid_table_copy __ro_after_init ;
/*
* These will be initialized based on CPUID table so that non - present
* all - zero leaves ( for sparse tables ) can be differentiated from
* invalid / out - of - range leaves . This is needed since all - zero leaves
* still need to be post - processed .
*/
static u32 cpuid_std_range_max __ro_after_init ;
static u32 cpuid_hyp_range_max __ro_after_init ;
static u32 cpuid_ext_range_max __ro_after_init ;
2020-09-07 15:16:13 +02:00
static bool __init sev_es_check_cpu_features ( void )
{
if ( ! has_cpuflag ( X86_FEATURE_RDRAND ) ) {
error ( " RDRAND instruction not supported - no trusted source of randomness available \n " ) ;
return false ;
}
return true ;
}
2022-02-09 12:10:04 -06:00
static void __noreturn sev_es_terminate ( unsigned int set , unsigned int reason )
2020-09-07 15:15:24 +02:00
{
2021-04-27 06:16:35 -05:00
u64 val = GHCB_MSR_TERM_REQ ;
2020-09-07 15:15:24 +02:00
2022-02-09 12:10:04 -06:00
/* Tell the hypervisor what went wrong. */
val | = GHCB_SEV_TERM_REASON ( set , reason ) ;
2020-09-07 15:15:24 +02:00
/* Request Guest Termination from Hypvervisor */
sev_es_wr_ghcb_msr ( val ) ;
VMGEXIT ( ) ;
while ( true )
asm volatile ( " hlt \n " : : : " memory " ) ;
}
2022-02-09 12:10:06 -06:00
/*
* The hypervisor features are available from GHCB version 2 onward .
*/
static u64 get_hv_features ( void )
{
u64 val ;
if ( ghcb_version < 2 )
return 0 ;
sev_es_wr_ghcb_msr ( GHCB_MSR_HV_FT_REQ ) ;
VMGEXIT ( ) ;
val = sev_es_rd_ghcb_msr ( ) ;
if ( GHCB_RESP_CODE ( val ) ! = GHCB_MSR_HV_FT_RESP )
return 0 ;
return GHCB_MSR_HV_FT_RESP_VAL ( val ) ;
}
2022-02-09 12:10:11 -06:00
static void snp_register_ghcb_early ( unsigned long paddr )
2022-02-09 12:10:10 -06:00
{
unsigned long pfn = paddr > > PAGE_SHIFT ;
u64 val ;
sev_es_wr_ghcb_msr ( GHCB_MSR_REG_GPA_REQ_VAL ( pfn ) ) ;
VMGEXIT ( ) ;
val = sev_es_rd_ghcb_msr ( ) ;
/* If the response GPA is not ours then abort the guest */
if ( ( GHCB_RESP_CODE ( val ) ! = GHCB_MSR_REG_GPA_RESP ) | |
( GHCB_MSR_REG_GPA_RESP_VAL ( val ) ! = pfn ) )
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_REGISTER ) ;
}
2020-09-08 14:38:16 +02:00
static bool sev_es_negotiate_protocol ( void )
2020-09-07 15:15:24 +02:00
{
u64 val ;
/* Do the GHCB protocol version negotiation */
2021-04-27 06:16:35 -05:00
sev_es_wr_ghcb_msr ( GHCB_MSR_SEV_INFO_REQ ) ;
2020-09-07 15:15:24 +02:00
VMGEXIT ( ) ;
val = sev_es_rd_ghcb_msr ( ) ;
2021-04-27 06:16:35 -05:00
if ( GHCB_MSR_INFO ( val ) ! = GHCB_MSR_SEV_INFO_RESP )
2020-09-07 15:15:24 +02:00
return false ;
2022-02-09 12:10:05 -06:00
if ( GHCB_MSR_PROTO_MAX ( val ) < GHCB_PROTOCOL_MIN | |
GHCB_MSR_PROTO_MIN ( val ) > GHCB_PROTOCOL_MAX )
2020-09-07 15:15:24 +02:00
return false ;
2022-02-09 12:10:05 -06:00
ghcb_version = min_t ( size_t , GHCB_MSR_PROTO_MAX ( val ) , GHCB_PROTOCOL_MAX ) ;
2020-09-07 15:15:24 +02:00
return true ;
}
2020-09-08 14:38:16 +02:00
static __always_inline void vc_ghcb_invalidate ( struct ghcb * ghcb )
2020-09-07 15:15:24 +02:00
{
2021-05-17 12:42:33 -05:00
ghcb - > save . sw_exit_code = 0 ;
2021-06-24 11:41:09 +02:00
__builtin_memset ( ghcb - > save . valid_bitmap , 0 , sizeof ( ghcb - > save . valid_bitmap ) ) ;
2020-09-07 15:15:24 +02:00
}
static bool vc_decoding_needed ( unsigned long exit_code )
{
/* Exceptions don't require to decode the instruction */
return ! ( exit_code > = SVM_EXIT_EXCP_BASE & &
exit_code < = SVM_EXIT_LAST_EXCP ) ;
}
2020-09-08 14:38:16 +02:00
static enum es_result vc_init_em_ctxt ( struct es_em_ctxt * ctxt ,
struct pt_regs * regs ,
unsigned long exit_code )
2020-09-07 15:15:24 +02:00
{
enum es_result ret = ES_OK ;
memset ( ctxt , 0 , sizeof ( * ctxt ) ) ;
ctxt - > regs = regs ;
if ( vc_decoding_needed ( exit_code ) )
ret = vc_decode_insn ( ctxt ) ;
return ret ;
}
2020-09-08 14:38:16 +02:00
static void vc_finish_insn ( struct es_em_ctxt * ctxt )
2020-09-07 15:15:24 +02:00
{
ctxt - > regs - > ip + = ctxt - > insn . length ;
}
2021-10-01 11:41:05 +02:00
static enum es_result verify_exception_info ( struct ghcb * ghcb , struct es_em_ctxt * ctxt )
2020-09-07 15:15:24 +02:00
{
2021-10-01 11:41:05 +02:00
u32 ret ;
2020-09-07 15:15:24 +02:00
2021-10-01 11:41:05 +02:00
ret = ghcb - > save . sw_exit_info_1 & GENMASK_ULL ( 31 , 0 ) ;
if ( ! ret )
return ES_OK ;
2020-09-07 15:15:24 +02:00
2021-10-01 11:41:05 +02:00
if ( ret = = 1 ) {
2020-09-07 15:15:24 +02:00
u64 info = ghcb - > save . sw_exit_info_2 ;
2022-05-16 19:42:15 +01:00
unsigned long v = info & SVM_EVTINJ_VEC_MASK ;
2020-09-07 15:15:24 +02:00
/* Check if exception information from hypervisor is sane. */
if ( ( info & SVM_EVTINJ_VALID ) & &
( ( v = = X86_TRAP_GP ) | | ( v = = X86_TRAP_UD ) ) & &
( ( info & SVM_EVTINJ_TYPE_MASK ) = = SVM_EVTINJ_TYPE_EXEPT ) ) {
ctxt - > fi . vector = v ;
2021-10-01 11:41:05 +02:00
2020-09-07 15:15:24 +02:00
if ( info & SVM_EVTINJ_VALID_ERR )
ctxt - > fi . error_code = info > > 32 ;
2021-10-01 11:41:05 +02:00
return ES_EXCEPTION ;
2020-09-07 15:15:24 +02:00
}
}
2021-10-01 11:41:05 +02:00
return ES_VMM_ERROR ;
}
2022-07-27 13:24:21 +02:00
static enum es_result sev_es_ghcb_hv_call ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt ,
u64 exit_code , u64 exit_info_1 ,
u64 exit_info_2 )
2021-10-01 11:41:05 +02:00
{
/* Fill in protocol and format specifiers */
2022-02-09 12:10:05 -06:00
ghcb - > protocol_version = ghcb_version ;
2021-10-01 11:41:05 +02:00
ghcb - > ghcb_usage = GHCB_DEFAULT_USAGE ;
ghcb_set_sw_exit_code ( ghcb , exit_code ) ;
ghcb_set_sw_exit_info_1 ( ghcb , exit_info_1 ) ;
ghcb_set_sw_exit_info_2 ( ghcb , exit_info_2 ) ;
2022-07-27 13:24:21 +02:00
sev_es_wr_ghcb_msr ( __pa ( ghcb ) ) ;
2021-10-01 11:41:05 +02:00
VMGEXIT ( ) ;
return verify_exception_info ( ghcb , ctxt ) ;
2020-09-07 15:15:24 +02:00
}
2022-02-24 10:56:11 -06:00
static int __sev_cpuid_hv ( u32 fn , int reg_idx , u32 * reg )
{
u64 val ;
sev_es_wr_ghcb_msr ( GHCB_CPUID_REQ ( fn , reg_idx ) ) ;
VMGEXIT ( ) ;
val = sev_es_rd_ghcb_msr ( ) ;
if ( GHCB_RESP_CODE ( val ) ! = GHCB_MSR_CPUID_RESP )
return - EIO ;
* reg = ( val > > 32 ) ;
return 0 ;
}
static int sev_cpuid_hv ( struct cpuid_leaf * leaf )
{
int ret ;
/*
* MSR protocol does not support fetching non - zero subfunctions , but is
* sufficient to handle current early - boot cases . Should that change ,
* make sure to report an error rather than ignoring the index and
* grabbing random values . If this issue arises in the future , handling
* can be added here to use GHCB - page protocol for cases that occur late
* enough in boot that GHCB page is available .
*/
if ( cpuid_function_is_indexed ( leaf - > fn ) & & leaf - > subfn )
return - EINVAL ;
ret = __sev_cpuid_hv ( leaf - > fn , GHCB_CPUID_REQ_EAX , & leaf - > eax ) ;
ret = ret ? : __sev_cpuid_hv ( leaf - > fn , GHCB_CPUID_REQ_EBX , & leaf - > ebx ) ;
ret = ret ? : __sev_cpuid_hv ( leaf - > fn , GHCB_CPUID_REQ_ECX , & leaf - > ecx ) ;
ret = ret ? : __sev_cpuid_hv ( leaf - > fn , GHCB_CPUID_REQ_EDX , & leaf - > edx ) ;
return ret ;
}
2022-02-24 10:56:12 -06:00
/*
* This may be called early while still running on the initial identity
* mapping . Use RIP - relative addressing to obtain the correct address
* while running with the initial identity mapping as well as the
* switch - over to kernel virtual addresses later .
*/
static const struct snp_cpuid_table * snp_cpuid_get_table ( void )
{
void * ptr ;
asm ( " lea cpuid_table_copy(%%rip), %0 "
: " =r " ( ptr )
: " p " ( & cpuid_table_copy ) ) ;
return ptr ;
}
/*
* The SNP Firmware ABI , Revision 0.9 , Section 7.1 , details the use of
* XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
* and 1 based on the corresponding features enabled by a particular
* combination of XCR0 and XSS registers so that a guest can look up the
* version corresponding to the features currently enabled in its XCR0 / XSS
* registers . The only values that differ between these versions / table
* entries is the enabled XSAVE area size advertised via EBX .
*
* While hypervisors may choose to make use of this support , it is more
* robust / secure for a guest to simply find the entry corresponding to the
* base / legacy XSAVE area size ( XCR0 = 1 or XCR0 = 3 ) , and then calculate the
* XSAVE area size using subfunctions 2 through 64 , as documented in APM
* Volume 3 , Rev 3.31 , Appendix E .3 .8 , which is what is done here .
*
* Since base / legacy XSAVE area size is documented as 0x240 , use that value
* directly rather than relying on the base size in the CPUID table .
*
* Return : XSAVE area size on success , 0 otherwise .
*/
static u32 snp_cpuid_calc_xsave_size ( u64 xfeatures_en , bool compacted )
{
const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ( ) ;
u64 xfeatures_found = 0 ;
u32 xsave_size = 0x240 ;
int i ;
for ( i = 0 ; i < cpuid_table - > count ; i + + ) {
const struct snp_cpuid_fn * e = & cpuid_table - > fn [ i ] ;
if ( ! ( e - > eax_in = = 0xD & & e - > ecx_in > 1 & & e - > ecx_in < 64 ) )
continue ;
if ( ! ( xfeatures_en & ( BIT_ULL ( e - > ecx_in ) ) ) )
continue ;
if ( xfeatures_found & ( BIT_ULL ( e - > ecx_in ) ) )
continue ;
xfeatures_found | = ( BIT_ULL ( e - > ecx_in ) ) ;
if ( compacted )
xsave_size + = e - > eax ;
else
xsave_size = max ( xsave_size , e - > eax + e - > ebx ) ;
}
/*
* Either the guest set unsupported XCR0 / XSS bits , or the corresponding
* entries in the CPUID table were not present . This is not a valid
* state to be in .
*/
if ( xfeatures_found ! = ( xfeatures_en & GENMASK_ULL ( 63 , 2 ) ) )
return 0 ;
return xsave_size ;
}
static bool
snp_cpuid_get_validated_func ( struct cpuid_leaf * leaf )
{
const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ( ) ;
int i ;
for ( i = 0 ; i < cpuid_table - > count ; i + + ) {
const struct snp_cpuid_fn * e = & cpuid_table - > fn [ i ] ;
if ( e - > eax_in ! = leaf - > fn )
continue ;
if ( cpuid_function_is_indexed ( leaf - > fn ) & & e - > ecx_in ! = leaf - > subfn )
continue ;
/*
* For 0xD subfunctions 0 and 1 , only use the entry corresponding
* to the base / legacy XSAVE area size ( XCR0 = 1 or XCR0 = 3 , XSS = 0 ) .
* See the comments above snp_cpuid_calc_xsave_size ( ) for more
* details .
*/
if ( e - > eax_in = = 0xD & & ( e - > ecx_in = = 0 | | e - > ecx_in = = 1 ) )
if ( ! ( e - > xcr0_in = = 1 | | e - > xcr0_in = = 3 ) | | e - > xss_in )
continue ;
leaf - > eax = e - > eax ;
leaf - > ebx = e - > ebx ;
leaf - > ecx = e - > ecx ;
leaf - > edx = e - > edx ;
return true ;
}
return false ;
}
static void snp_cpuid_hv ( struct cpuid_leaf * leaf )
{
if ( sev_cpuid_hv ( leaf ) )
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_CPUID_HV ) ;
}
static int snp_cpuid_postprocess ( struct cpuid_leaf * leaf )
{
struct cpuid_leaf leaf_hv = * leaf ;
switch ( leaf - > fn ) {
case 0x1 :
snp_cpuid_hv ( & leaf_hv ) ;
/* initial APIC ID */
leaf - > ebx = ( leaf_hv . ebx & GENMASK ( 31 , 24 ) ) | ( leaf - > ebx & GENMASK ( 23 , 0 ) ) ;
/* APIC enabled bit */
leaf - > edx = ( leaf_hv . edx & BIT ( 9 ) ) | ( leaf - > edx & ~ BIT ( 9 ) ) ;
/* OSXSAVE enabled bit */
if ( native_read_cr4 ( ) & X86_CR4_OSXSAVE )
leaf - > ecx | = BIT ( 27 ) ;
break ;
case 0x7 :
/* OSPKE enabled bit */
leaf - > ecx & = ~ BIT ( 4 ) ;
if ( native_read_cr4 ( ) & X86_CR4_PKE )
leaf - > ecx | = BIT ( 4 ) ;
break ;
case 0xB :
leaf_hv . subfn = 0 ;
snp_cpuid_hv ( & leaf_hv ) ;
/* extended APIC ID */
leaf - > edx = leaf_hv . edx ;
break ;
case 0xD : {
bool compacted = false ;
u64 xcr0 = 1 , xss = 0 ;
u32 xsave_size ;
if ( leaf - > subfn ! = 0 & & leaf - > subfn ! = 1 )
return 0 ;
if ( native_read_cr4 ( ) & X86_CR4_OSXSAVE )
xcr0 = xgetbv ( XCR_XFEATURE_ENABLED_MASK ) ;
if ( leaf - > subfn = = 1 ) {
/* Get XSS value if XSAVES is enabled. */
if ( leaf - > eax & BIT ( 3 ) ) {
unsigned long lo , hi ;
asm volatile ( " rdmsr " : " =a " ( lo ) , " =d " ( hi )
: " c " ( MSR_IA32_XSS ) ) ;
xss = ( hi < < 32 ) | lo ;
}
/*
* The PPR and APM aren ' t clear on what size should be
* encoded in 0xD : 0x1 : EBX when compaction is not enabled
* by either XSAVEC ( feature bit 1 ) or XSAVES ( feature
* bit 3 ) since SNP - capable hardware has these feature
* bits fixed as 1. KVM sets it to 0 in this case , but
* to avoid this becoming an issue it ' s safer to simply
* treat this as unsupported for SNP guests .
*/
if ( ! ( leaf - > eax & ( BIT ( 1 ) | BIT ( 3 ) ) ) )
return - EINVAL ;
compacted = true ;
}
xsave_size = snp_cpuid_calc_xsave_size ( xcr0 | xss , compacted ) ;
if ( ! xsave_size )
return - EINVAL ;
leaf - > ebx = xsave_size ;
}
break ;
case 0x8000001E :
snp_cpuid_hv ( & leaf_hv ) ;
/* extended APIC ID */
leaf - > eax = leaf_hv . eax ;
/* compute ID */
leaf - > ebx = ( leaf - > ebx & GENMASK ( 31 , 8 ) ) | ( leaf_hv . ebx & GENMASK ( 7 , 0 ) ) ;
/* node ID */
leaf - > ecx = ( leaf - > ecx & GENMASK ( 31 , 8 ) ) | ( leaf_hv . ecx & GENMASK ( 7 , 0 ) ) ;
break ;
default :
/* No fix-ups needed, use values as-is. */
break ;
}
return 0 ;
}
/*
* Returns - EOPNOTSUPP if feature not enabled . Any other non - zero return value
* should be treated as fatal by caller .
*/
static int snp_cpuid ( struct cpuid_leaf * leaf )
{
const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ( ) ;
if ( ! cpuid_table - > count )
return - EOPNOTSUPP ;
if ( ! snp_cpuid_get_validated_func ( leaf ) ) {
/*
* Some hypervisors will avoid keeping track of CPUID entries
* where all values are zero , since they can be handled the
* same as out - of - range values ( all - zero ) . This is useful here
* as well as it allows virtually all guest configurations to
* work using a single SNP CPUID table .
*
* To allow for this , there is a need to distinguish between
* out - of - range entries and in - range zero entries , since the
* CPUID table entries are only a template that may need to be
* augmented with additional values for things like
* CPU - specific information during post - processing . So if it ' s
* not in the table , set the values to zero . Then , if they are
* within a valid CPUID range , proceed with post - processing
* using zeros as the initial values . Otherwise , skip
* post - processing and just return zeros immediately .
*/
leaf - > eax = leaf - > ebx = leaf - > ecx = leaf - > edx = 0 ;
/* Skip post-processing for out-of-range zero leafs. */
if ( ! ( leaf - > fn < = cpuid_std_range_max | |
( leaf - > fn > = 0x40000000 & & leaf - > fn < = cpuid_hyp_range_max ) | |
( leaf - > fn > = 0x80000000 & & leaf - > fn < = cpuid_ext_range_max ) ) )
return 0 ;
}
return snp_cpuid_postprocess ( leaf ) ;
}
2020-09-07 15:15:20 +02:00
/*
* Boot VC Handler - This is the first VC handler during boot , there is no GHCB
* page yet , so it only supports the MSR based communication with the
* hypervisor and only the CPUID exit - code .
*/
void __init do_vc_no_ghcb ( struct pt_regs * regs , unsigned long exit_code )
{
2022-02-24 10:56:11 -06:00
unsigned int subfn = lower_bits ( regs - > cx , 32 ) ;
2020-09-07 15:15:20 +02:00
unsigned int fn = lower_bits ( regs - > ax , 32 ) ;
2022-02-24 10:56:11 -06:00
struct cpuid_leaf leaf ;
2022-02-24 10:56:12 -06:00
int ret ;
2020-09-07 15:15:20 +02:00
/* Only CPUID is supported via MSR protocol */
if ( exit_code ! = SVM_EXIT_CPUID )
goto fail ;
2022-02-24 10:56:11 -06:00
leaf . fn = fn ;
leaf . subfn = subfn ;
2022-02-24 10:56:12 -06:00
ret = snp_cpuid ( & leaf ) ;
if ( ! ret )
goto cpuid_done ;
if ( ret ! = - EOPNOTSUPP )
goto fail ;
2022-02-24 10:56:11 -06:00
if ( sev_cpuid_hv ( & leaf ) )
2020-09-07 15:15:20 +02:00
goto fail ;
2022-02-24 10:56:12 -06:00
cpuid_done :
2022-02-24 10:56:11 -06:00
regs - > ax = leaf . eax ;
regs - > bx = leaf . ebx ;
regs - > cx = leaf . ecx ;
regs - > dx = leaf . edx ;
2020-09-07 15:15:20 +02:00
2020-10-28 17:46:56 +01:00
/*
* This is a VC handler and the # VC is only raised when SEV - ES is
* active , which means SEV must be active too . Do sanity checks on the
* CPUID results to make sure the hypervisor does not trick the kernel
* into the no - sev path . This could map sensitive data unencrypted and
* make it accessible to the hypervisor .
*
* In particular , check for :
* - Availability of CPUID leaf 0x8000001f
* - SEV CPUID bit .
*
* The hypervisor might still report the wrong C - bit position , but this
* can ' t be checked here .
*/
2021-03-12 13:38:18 +01:00
if ( fn = = 0x80000000 & & ( regs - > ax < 0x8000001f ) )
2020-10-28 17:46:56 +01:00
/* SEV leaf check */
goto fail ;
else if ( ( fn = = 0x8000001f & & ! ( regs - > ax & BIT ( 1 ) ) ) )
/* SEV bit */
goto fail ;
2020-09-07 15:15:20 +02:00
/* Skip over the CPUID two-byte opcode */
regs - > ip + = 2 ;
return ;
fail :
2021-03-12 13:38:24 +01:00
/* Terminate the guest */
2022-02-09 12:10:04 -06:00
sev_es_terminate ( SEV_TERM_SET_GEN , GHCB_SEV_ES_GEN_REQ ) ;
2020-09-07 15:15:20 +02:00
}
2020-09-07 15:15:24 +02:00
static enum es_result vc_insn_string_read ( struct es_em_ctxt * ctxt ,
void * src , char * buf ,
unsigned int data_size ,
unsigned int count ,
bool backwards )
{
int i , b = backwards ? - 1 : 1 ;
enum es_result ret = ES_OK ;
for ( i = 0 ; i < count ; i + + ) {
void * s = src + ( i * data_size * b ) ;
char * d = buf + ( i * data_size ) ;
ret = vc_read_mem ( ctxt , s , d , data_size ) ;
if ( ret ! = ES_OK )
break ;
}
return ret ;
}
static enum es_result vc_insn_string_write ( struct es_em_ctxt * ctxt ,
void * dst , char * buf ,
unsigned int data_size ,
unsigned int count ,
bool backwards )
{
int i , s = backwards ? - 1 : 1 ;
enum es_result ret = ES_OK ;
for ( i = 0 ; i < count ; i + + ) {
void * d = dst + ( i * data_size * s ) ;
char * b = buf + ( i * data_size ) ;
ret = vc_write_mem ( ctxt , d , b , data_size ) ;
if ( ret ! = ES_OK )
break ;
}
return ret ;
}
2020-09-07 15:15:26 +02:00
# define IOIO_TYPE_STR BIT(2)
# define IOIO_TYPE_IN 1
# define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR)
# define IOIO_TYPE_OUT 0
# define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
# define IOIO_REP BIT(3)
# define IOIO_ADDR_64 BIT(9)
# define IOIO_ADDR_32 BIT(8)
# define IOIO_ADDR_16 BIT(7)
# define IOIO_DATA_32 BIT(6)
# define IOIO_DATA_16 BIT(5)
# define IOIO_DATA_8 BIT(4)
# define IOIO_SEG_ES (0 << 10)
# define IOIO_SEG_DS (3 << 10)
static enum es_result vc_ioio_exitinfo ( struct es_em_ctxt * ctxt , u64 * exitinfo )
{
struct insn * insn = & ctxt - > insn ;
* exitinfo = 0 ;
switch ( insn - > opcode . bytes [ 0 ] ) {
/* INS opcodes */
case 0x6c :
case 0x6d :
* exitinfo | = IOIO_TYPE_INS ;
* exitinfo | = IOIO_SEG_ES ;
* exitinfo | = ( ctxt - > regs - > dx & 0xffff ) < < 16 ;
break ;
/* OUTS opcodes */
case 0x6e :
case 0x6f :
* exitinfo | = IOIO_TYPE_OUTS ;
* exitinfo | = IOIO_SEG_DS ;
* exitinfo | = ( ctxt - > regs - > dx & 0xffff ) < < 16 ;
break ;
/* IN immediate opcodes */
case 0xe4 :
case 0xe5 :
* exitinfo | = IOIO_TYPE_IN ;
2021-01-05 08:33:11 -08:00
* exitinfo | = ( u8 ) insn - > immediate . value < < 16 ;
2020-09-07 15:15:26 +02:00
break ;
/* OUT immediate opcodes */
case 0xe6 :
case 0xe7 :
* exitinfo | = IOIO_TYPE_OUT ;
2021-01-05 08:33:11 -08:00
* exitinfo | = ( u8 ) insn - > immediate . value < < 16 ;
2020-09-07 15:15:26 +02:00
break ;
/* IN register opcodes */
case 0xec :
case 0xed :
* exitinfo | = IOIO_TYPE_IN ;
* exitinfo | = ( ctxt - > regs - > dx & 0xffff ) < < 16 ;
break ;
/* OUT register opcodes */
case 0xee :
case 0xef :
* exitinfo | = IOIO_TYPE_OUT ;
* exitinfo | = ( ctxt - > regs - > dx & 0xffff ) < < 16 ;
break ;
default :
return ES_DECODE_FAILED ;
}
switch ( insn - > opcode . bytes [ 0 ] ) {
case 0x6c :
case 0x6e :
case 0xe4 :
case 0xe6 :
case 0xec :
case 0xee :
/* Single byte opcodes */
* exitinfo | = IOIO_DATA_8 ;
break ;
default :
/* Length determined by instruction parsing */
* exitinfo | = ( insn - > opnd_bytes = = 2 ) ? IOIO_DATA_16
: IOIO_DATA_32 ;
}
switch ( insn - > addr_bytes ) {
case 2 :
* exitinfo | = IOIO_ADDR_16 ;
break ;
case 4 :
* exitinfo | = IOIO_ADDR_32 ;
break ;
case 8 :
* exitinfo | = IOIO_ADDR_64 ;
break ;
}
if ( insn_has_rep_prefix ( insn ) )
* exitinfo | = IOIO_REP ;
return ES_OK ;
}
2020-09-07 15:15:48 +02:00
static enum es_result vc_handle_ioio ( struct ghcb * ghcb , struct es_em_ctxt * ctxt )
2020-09-07 15:15:26 +02:00
{
struct pt_regs * regs = ctxt - > regs ;
u64 exit_info_1 , exit_info_2 ;
enum es_result ret ;
ret = vc_ioio_exitinfo ( ctxt , & exit_info_1 ) ;
if ( ret ! = ES_OK )
return ret ;
if ( exit_info_1 & IOIO_TYPE_STR ) {
/* (REP) INS/OUTS */
bool df = ( ( regs - > flags & X86_EFLAGS_DF ) = = X86_EFLAGS_DF ) ;
unsigned int io_bytes , exit_bytes ;
unsigned int ghcb_count , op_count ;
unsigned long es_base ;
u64 sw_scratch ;
/*
* For the string variants with rep prefix the amount of in / out
* operations per # VC exception is limited so that the kernel
* has a chance to take interrupts and re - schedule while the
* instruction is emulated .
*/
io_bytes = ( exit_info_1 > > 4 ) & 0x7 ;
ghcb_count = sizeof ( ghcb - > shared_buffer ) / io_bytes ;
op_count = ( exit_info_1 & IOIO_REP ) ? regs - > cx : 1 ;
exit_info_2 = min ( op_count , ghcb_count ) ;
exit_bytes = exit_info_2 * io_bytes ;
es_base = insn_get_seg_base ( ctxt - > regs , INAT_SEG_REG_ES ) ;
/* Read bytes of OUTS into the shared buffer */
if ( ! ( exit_info_1 & IOIO_TYPE_IN ) ) {
ret = vc_insn_string_read ( ctxt ,
( void * ) ( es_base + regs - > si ) ,
ghcb - > shared_buffer , io_bytes ,
exit_info_2 , df ) ;
if ( ret )
return ret ;
}
/*
* Issue an VMGEXIT to the HV to consume the bytes from the
* shared buffer or to have it write them into the shared buffer
* depending on the instruction : OUTS or INS .
*/
sw_scratch = __pa ( ghcb ) + offsetof ( struct ghcb , shared_buffer ) ;
ghcb_set_sw_scratch ( ghcb , sw_scratch ) ;
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_IOIO ,
2020-09-07 15:15:26 +02:00
exit_info_1 , exit_info_2 ) ;
if ( ret ! = ES_OK )
return ret ;
/* Read bytes from shared buffer into the guest's destination. */
if ( exit_info_1 & IOIO_TYPE_IN ) {
ret = vc_insn_string_write ( ctxt ,
( void * ) ( es_base + regs - > di ) ,
ghcb - > shared_buffer , io_bytes ,
exit_info_2 , df ) ;
if ( ret )
return ret ;
if ( df )
regs - > di - = exit_bytes ;
else
regs - > di + = exit_bytes ;
} else {
if ( df )
regs - > si - = exit_bytes ;
else
regs - > si + = exit_bytes ;
}
if ( exit_info_1 & IOIO_REP )
regs - > cx - = exit_info_2 ;
ret = regs - > cx ? ES_RETRY : ES_OK ;
} else {
/* IN/OUT into/from rAX */
int bits = ( exit_info_1 & 0x70 ) > > 1 ;
u64 rax = 0 ;
if ( ! ( exit_info_1 & IOIO_TYPE_IN ) )
rax = lower_bits ( regs - > ax , bits ) ;
ghcb_set_rax ( ghcb , rax ) ;
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_IOIO , exit_info_1 , 0 ) ;
2020-09-07 15:15:26 +02:00
if ( ret ! = ES_OK )
return ret ;
if ( exit_info_1 & IOIO_TYPE_IN ) {
if ( ! ghcb_rax_is_valid ( ghcb ) )
return ES_VMM_ERROR ;
regs - > ax = lower_bits ( ghcb - > save . rax , bits ) ;
}
}
return ret ;
}
2020-09-07 15:15:28 +02:00
2022-02-24 10:56:12 -06:00
static int vc_handle_cpuid_snp ( struct pt_regs * regs )
{
struct cpuid_leaf leaf ;
int ret ;
leaf . fn = regs - > ax ;
leaf . subfn = regs - > cx ;
ret = snp_cpuid ( & leaf ) ;
if ( ! ret ) {
regs - > ax = leaf . eax ;
regs - > bx = leaf . ebx ;
regs - > cx = leaf . ecx ;
regs - > dx = leaf . edx ;
}
return ret ;
}
2020-09-07 15:15:48 +02:00
static enum es_result vc_handle_cpuid ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt )
2020-09-07 15:15:28 +02:00
{
struct pt_regs * regs = ctxt - > regs ;
u32 cr4 = native_read_cr4 ( ) ;
enum es_result ret ;
2022-02-24 10:56:12 -06:00
int snp_cpuid_ret ;
snp_cpuid_ret = vc_handle_cpuid_snp ( regs ) ;
if ( ! snp_cpuid_ret )
return ES_OK ;
if ( snp_cpuid_ret ! = - EOPNOTSUPP )
return ES_VMM_ERROR ;
2020-09-07 15:15:28 +02:00
ghcb_set_rax ( ghcb , regs - > ax ) ;
ghcb_set_rcx ( ghcb , regs - > cx ) ;
if ( cr4 & X86_CR4_OSXSAVE )
/* Safe to read xcr0 */
ghcb_set_xcr0 ( ghcb , xgetbv ( XCR_XFEATURE_ENABLED_MASK ) ) ;
else
/* xgetbv will cause #GP - use reset value for xcr0 */
ghcb_set_xcr0 ( ghcb , 1 ) ;
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , SVM_EXIT_CPUID , 0 , 0 ) ;
2020-09-07 15:15:28 +02:00
if ( ret ! = ES_OK )
return ret ;
if ( ! ( ghcb_rax_is_valid ( ghcb ) & &
ghcb_rbx_is_valid ( ghcb ) & &
ghcb_rcx_is_valid ( ghcb ) & &
ghcb_rdx_is_valid ( ghcb ) ) )
return ES_VMM_ERROR ;
regs - > ax = ghcb - > save . rax ;
regs - > bx = ghcb - > save . rbx ;
regs - > cx = ghcb - > save . rcx ;
regs - > dx = ghcb - > save . rdx ;
return ES_OK ;
}
2020-09-07 15:15:55 +02:00
static enum es_result vc_handle_rdtsc ( struct ghcb * ghcb ,
struct es_em_ctxt * ctxt ,
unsigned long exit_code )
{
bool rdtscp = ( exit_code = = SVM_EXIT_RDTSCP ) ;
enum es_result ret ;
2022-07-27 13:24:21 +02:00
ret = sev_es_ghcb_hv_call ( ghcb , ctxt , exit_code , 0 , 0 ) ;
2020-09-07 15:15:55 +02:00
if ( ret ! = ES_OK )
return ret ;
if ( ! ( ghcb_rax_is_valid ( ghcb ) & & ghcb_rdx_is_valid ( ghcb ) & &
( ! rdtscp | | ghcb_rcx_is_valid ( ghcb ) ) ) )
return ES_VMM_ERROR ;
ctxt - > regs - > ax = ghcb - > save . rax ;
ctxt - > regs - > dx = ghcb - > save . rdx ;
if ( rdtscp )
ctxt - > regs - > cx = ghcb - > save . rcx ;
return ES_OK ;
}
2022-02-24 10:56:18 -06:00
struct cc_setup_data {
struct setup_data header ;
u32 cc_blob_address ;
} ;
/*
* Search for a Confidential Computing blob passed in as a setup_data entry
* via the Linux Boot Protocol .
*/
static struct cc_blob_sev_info * find_cc_blob_setup_data ( struct boot_params * bp )
{
struct cc_setup_data * sd = NULL ;
struct setup_data * hdr ;
hdr = ( struct setup_data * ) bp - > hdr . setup_data ;
while ( hdr ) {
if ( hdr - > type = = SETUP_CC_BLOB ) {
sd = ( struct cc_setup_data * ) hdr ;
return ( struct cc_blob_sev_info * ) ( unsigned long ) sd - > cc_blob_address ;
}
hdr = ( struct setup_data * ) hdr - > next ;
}
return NULL ;
}
2022-03-07 15:33:49 -06:00
/*
* Initialize the kernel ' s copy of the SNP CPUID table , and set up the
* pointer that will be used to access it .
*
* Maintaining a direct mapping of the SNP CPUID table used by firmware would
* be possible as an alternative , but the approach is brittle since the
* mapping needs to be updated in sync with all the changes to virtual memory
* layout and related mapping facilities throughout the boot process .
*/
static void __init setup_cpuid_table ( const struct cc_blob_sev_info * cc_info )
{
const struct snp_cpuid_table * cpuid_table_fw , * cpuid_table ;
int i ;
if ( ! cc_info | | ! cc_info - > cpuid_phys | | cc_info - > cpuid_len < PAGE_SIZE )
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_CPUID ) ;
cpuid_table_fw = ( const struct snp_cpuid_table * ) cc_info - > cpuid_phys ;
if ( ! cpuid_table_fw - > count | | cpuid_table_fw - > count > SNP_CPUID_COUNT_MAX )
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_CPUID ) ;
cpuid_table = snp_cpuid_get_table ( ) ;
memcpy ( ( void * ) cpuid_table , cpuid_table_fw , sizeof ( * cpuid_table ) ) ;
/* Initialize CPUID ranges for range-checking. */
for ( i = 0 ; i < cpuid_table - > count ; i + + ) {
const struct snp_cpuid_fn * fn = & cpuid_table - > fn [ i ] ;
if ( fn - > eax_in = = 0x0 )
cpuid_std_range_max = fn - > eax ;
else if ( fn - > eax_in = = 0x40000000 )
cpuid_hyp_range_max = fn - > eax ;
else if ( fn - > eax_in = = 0x80000000 )
cpuid_ext_range_max = fn - > eax ;
}
}
2023-06-06 09:51:26 -05:00
static void pvalidate_pages ( struct snp_psc_desc * desc )
{
struct psc_entry * e ;
unsigned long vaddr ;
unsigned int size ;
unsigned int i ;
bool validate ;
int rc ;
for ( i = 0 ; i < = desc - > hdr . end_entry ; i + + ) {
e = & desc - > entries [ i ] ;
vaddr = ( unsigned long ) pfn_to_kaddr ( e - > gfn ) ;
size = e - > pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K ;
validate = e - > operation = = SNP_PAGE_STATE_PRIVATE ;
rc = pvalidate ( vaddr , size , validate ) ;
if ( rc = = PVALIDATE_FAIL_SIZEMISMATCH & & size = = RMP_PG_SIZE_2M ) {
unsigned long vaddr_end = vaddr + PMD_SIZE ;
for ( ; vaddr < vaddr_end ; vaddr + = PAGE_SIZE ) {
rc = pvalidate ( vaddr , RMP_PG_SIZE_4K , validate ) ;
if ( rc )
break ;
}
}
if ( rc ) {
WARN ( 1 , " Failed to validate address 0x%lx ret %d " , vaddr , rc ) ;
sev_es_terminate ( SEV_TERM_SET_LINUX , GHCB_TERM_PVALIDATE ) ;
}
}
}
static int vmgexit_psc ( struct ghcb * ghcb , struct snp_psc_desc * desc )
{
int cur_entry , end_entry , ret = 0 ;
struct snp_psc_desc * data ;
struct es_em_ctxt ctxt ;
vc_ghcb_invalidate ( ghcb ) ;
/* Copy the input desc into GHCB shared buffer */
data = ( struct snp_psc_desc * ) ghcb - > shared_buffer ;
memcpy ( ghcb - > shared_buffer , desc , min_t ( int , GHCB_SHARED_BUF_SIZE , sizeof ( * desc ) ) ) ;
/*
* As per the GHCB specification , the hypervisor can resume the guest
* before processing all the entries . Check whether all the entries
* are processed . If not , then keep retrying . Note , the hypervisor
* will update the data memory directly to indicate the status , so
* reference the data - > hdr everywhere .
*
* The strategy here is to wait for the hypervisor to change the page
* state in the RMP table before guest accesses the memory pages . If the
* page state change was not successful , then later memory access will
* result in a crash .
*/
cur_entry = data - > hdr . cur_entry ;
end_entry = data - > hdr . end_entry ;
while ( data - > hdr . cur_entry < = data - > hdr . end_entry ) {
ghcb_set_sw_scratch ( ghcb , ( u64 ) __pa ( data ) ) ;
/* This will advance the shared buffer data points to. */
ret = sev_es_ghcb_hv_call ( ghcb , & ctxt , SVM_VMGEXIT_PSC , 0 , 0 ) ;
/*
* Page State Change VMGEXIT can pass error code through
* exit_info_2 .
*/
if ( WARN ( ret | | ghcb - > save . sw_exit_info_2 ,
" SNP: PSC failed ret=%d exit_info_2=%llx \n " ,
ret , ghcb - > save . sw_exit_info_2 ) ) {
ret = 1 ;
goto out ;
}
/* Verify that reserved bit is not set */
if ( WARN ( data - > hdr . reserved , " Reserved bit is set in the PSC header \n " ) ) {
ret = 1 ;
goto out ;
}
/*
* Sanity check that entry processing is not going backwards .
* This will happen only if hypervisor is tricking us .
*/
if ( WARN ( data - > hdr . end_entry > end_entry | | cur_entry > data - > hdr . cur_entry ,
" SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d) \n " ,
end_entry , data - > hdr . end_entry , cur_entry , data - > hdr . cur_entry ) ) {
ret = 1 ;
goto out ;
}
}
out :
return ret ;
}