KVM: x86: SVM changes for 6.6:
- Add support for SEV-ES DebugSwap, i.e. allow SEV-ES guests to use debug registers and generate/handle #DBs - Clean up LBR virtualization code - Fix a bug where KVM fails to set the target pCPU during an IRTE update - Fix fatal bugs in SEV-ES intrahost migration - Fix a bug where the recent (architecturally correct) change to reinject #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it) -----BEGIN PGP SIGNATURE----- iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmTue8YSHHNlYW5qY0Bn b29nbGUuY29tAAoJEGCRIgFNDBL5aqUP/jF7DyMXyQGYMKoQhFxWyGRhfqV8Ov8i 7sUpEKSx5WTxOsFHBgdGeNU+m9eBJHWVmrJM9imI4OCUvJmxasRRsnyhvEUvBIUE amQT45aVm2xqjRNRUkOCUUHiDKtUdwpSRlOSyhnDTKmlMbNoH+fO3SLJ1oB/fsae wnmyiF98j2vT/5mD6F/F87hlNMq4CqG/OZWJ9Kk8GfvfJpUcC8r/H0NsMgSMF2/L Q+Hn+r/XDfMSrBiyEzevWyPbJi7nL+WF9EQDJASf+aAkmFMHK6AU4XNITwVw3XcZ FGtSP/NzvnePhd5gqtbiW9hRQkWcKjqnydtyI3ZDVVBpEbJ6OJn3+UFoLZ8NoSE+ D3EDs1PA7Qjty6kYx9/NZpXz5BAMd9mikkTL7PTrlrAZAEimToqoHx7mBjmLp4E+ diKrpG2w1OTtO/Pafi0z0zZN6Yc9MJOyZVK78DpIiLey3rNip9SawWGh+wV14WNC nbn7Wpf8EGE1E8n00mwrGMRCuRm7LQhLbcVXITiGKrbpxUzam6sqDIgt73Q7xma2 NWcPizeFNy47uurNOA2V9xHkbEAYjWaM12uyzmGzILvvmvNnpU0NuZ78cgV5ZWMk 4US53CAQbG4+qUCJWhIDoriluaLXjL9tLiZgJW0T6cus3nL5NWYqvlq6TWYyK00J zjiK7vky77Pq =WC5V -----END PGP SIGNATURE----- Merge tag 'kvm-x86-svm-6.6' of https://github.com/kvm-x86/linux into HEAD KVM: x86: SVM changes for 6.6: - Add support for SEV-ES DebugSwap, i.e. allow SEV-ES guests to use debug registers and generate/handle #DBs - Clean up LBR virtualization code - Fix a bug where KVM fails to set the target pCPU during an IRTE update - Fix fatal bugs in SEV-ES intrahost migration - Fix a bug where the recent (architecturally correct) change to reinject #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it)
This commit is contained in:
commit
bd7fe98b35
@ -438,6 +438,7 @@
|
||||
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
|
||||
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
|
||||
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
|
||||
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
|
||||
|
||||
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
|
||||
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
|
||||
|
@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
|
||||
|
||||
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
|
||||
|
||||
#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
|
||||
|
||||
struct vmcb_seg {
|
||||
u16 selector;
|
||||
@ -345,7 +346,7 @@ struct vmcb_save_area {
|
||||
u64 last_excp_from;
|
||||
u64 last_excp_to;
|
||||
u8 reserved_0x298[72];
|
||||
u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
|
||||
u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
|
||||
} __packed;
|
||||
|
||||
/* Save area definition for SEV-ES and SEV-SNP guests */
|
||||
@ -512,7 +513,7 @@ struct ghcb {
|
||||
} __packed;
|
||||
|
||||
|
||||
#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
|
||||
#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
|
||||
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
|
||||
#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
|
||||
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
|
||||
|
@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
struct amd_svm_iommu_ir *ir;
|
||||
u64 entry;
|
||||
|
||||
/**
|
||||
* In some cases, the existing irte is updated and re-set,
|
||||
@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||
ir->data = pi->ir_data;
|
||||
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
/*
|
||||
* Update the target pCPU for IOMMU doorbells if the vCPU is running.
|
||||
* If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
|
||||
* will update the pCPU info when the vCPU awkened and/or scheduled in.
|
||||
* See also avic_vcpu_load().
|
||||
*/
|
||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
|
||||
amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
|
||||
true, pi->ir_data);
|
||||
|
||||
list_add(&ir->node, &svm->ir_list);
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
out:
|
||||
@ -986,10 +999,11 @@ static inline int
|
||||
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
struct amd_svm_iommu_ir *ir;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
lockdep_assert_held(&svm->ir_list_lock);
|
||||
|
||||
if (!kvm_arch_has_assigned_device(vcpu->kvm))
|
||||
return 0;
|
||||
|
||||
@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
||||
* Here, we go through the per-vcpu ir_list to update all existing
|
||||
* interrupt remapping table entry targeting this vcpu.
|
||||
*/
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
if (list_empty(&svm->ir_list))
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(ir, &svm->ir_list, node) {
|
||||
ret = amd_iommu_update_ga(cpu, r, ir->data);
|
||||
if (ret)
|
||||
break;
|
||||
return ret;
|
||||
}
|
||||
out:
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
u64 entry;
|
||||
int h_physical_id = kvm_cpu_get_apicid(cpu);
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
if (kvm_vcpu_is_blocking(vcpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Grab the per-vCPU interrupt remapping lock even if the VM doesn't
|
||||
* _currently_ have assigned devices, as that can change. Holding
|
||||
* ir_list_lock ensures that either svm_ir_list_add() will consume
|
||||
* up-to-date entry information, or that this task will wait until
|
||||
* svm_ir_list_add() completes to set the new target pCPU.
|
||||
*/
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
|
||||
|
||||
@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
||||
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
|
||||
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
}
|
||||
|
||||
void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 entry;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
/*
|
||||
* Note, reading the Physical ID entry outside of ir_list_lock is safe
|
||||
* as only the pCPU that has loaded (or is loading) the vCPU is allowed
|
||||
* to modify the entry, and preemption is disabled. I.e. the vCPU
|
||||
* can't be scheduled out and thus avic_vcpu_{put,load}() can't run
|
||||
* recursively.
|
||||
*/
|
||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||
|
||||
/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
|
||||
if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Take and hold the per-vCPU interrupt remapping lock while updating
|
||||
* the Physical ID entry even though the lock doesn't protect against
|
||||
* multiple writers (see above). Holding ir_list_lock ensures that
|
||||
* either svm_ir_list_add() will consume up-to-date entry information,
|
||||
* or that this task will wait until svm_ir_list_add() completes to
|
||||
* mark the vCPU as not running.
|
||||
*/
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
|
||||
|
||||
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
||||
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
|
||||
}
|
||||
|
||||
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <asm/pkru.h>
|
||||
#include <asm/trapnr.h>
|
||||
#include <asm/fpu/xcr.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
#include "mmu.h"
|
||||
#include "x86.h"
|
||||
@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
|
||||
/* enable/disable SEV-ES support */
|
||||
static bool sev_es_enabled = true;
|
||||
module_param_named(sev_es, sev_es_enabled, bool, 0444);
|
||||
|
||||
/* enable/disable SEV-ES DebugSwap support */
|
||||
static bool sev_es_debug_swap_enabled = true;
|
||||
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
|
||||
#else
|
||||
#define sev_enabled false
|
||||
#define sev_es_enabled false
|
||||
#define sev_es_debug_swap_enabled false
|
||||
#endif /* CONFIG_KVM_AMD_SEV */
|
||||
|
||||
static u8 sev_enc_bit;
|
||||
@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
|
||||
save->xss = svm->vcpu.arch.ia32_xss;
|
||||
save->dr6 = svm->vcpu.arch.dr6;
|
||||
|
||||
if (sev_es_debug_swap_enabled)
|
||||
save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
|
||||
|
||||
pr_debug("Virtual Machine Save Area (VMSA):\n");
|
||||
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
|
||||
|
||||
@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int ret;
|
||||
|
||||
if (vcpu->guest_debug) {
|
||||
pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Perform some pre-encryption checks against the VMSA */
|
||||
ret = sev_es_sync_vmsa(svm);
|
||||
if (ret)
|
||||
@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
|
||||
* Note, the source is not required to have the same number of
|
||||
* vCPUs as the destination when migrating a vanilla SEV VM.
|
||||
*/
|
||||
src_vcpu = kvm_get_vcpu(dst_kvm, i);
|
||||
src_vcpu = kvm_get_vcpu(src_kvm, i);
|
||||
src_svm = to_svm(src_vcpu);
|
||||
|
||||
/*
|
||||
@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
|
||||
bool sev_es_supported = false;
|
||||
bool sev_supported = false;
|
||||
|
||||
if (!sev_enabled || !npt_enabled)
|
||||
if (!sev_enabled || !npt_enabled || !nrips)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
@ -2256,6 +2270,9 @@ out:
|
||||
|
||||
sev_enabled = sev_supported;
|
||||
sev_es_enabled = sev_es_supported;
|
||||
if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
|
||||
!cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
|
||||
sev_es_debug_swap_enabled = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
||||
svm->sev_es.ghcb_sa);
|
||||
break;
|
||||
case SVM_VMGEXIT_NMI_COMPLETE:
|
||||
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
|
||||
++vcpu->stat.nmi_window_exits;
|
||||
svm->nmi_masked = false;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
ret = 1;
|
||||
break;
|
||||
case SVM_VMGEXIT_AP_HLT_LOOP:
|
||||
ret = kvm_emulate_ap_reset_hold(vcpu);
|
||||
@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
|
||||
|
||||
static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
|
||||
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
|
||||
@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
/*
|
||||
* An SEV-ES guest requires a VMSA area that is a separate from the
|
||||
* VMCB page. Do not include the encryption mask on the VMSA physical
|
||||
* address since hardware will access it using the guest key.
|
||||
* address since hardware will access it using the guest key. Note,
|
||||
* the VMSA will be NULL if this vCPU is the destination for intrahost
|
||||
* migration, and will be copied later.
|
||||
*/
|
||||
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
|
||||
if (svm->sev_es.vmsa)
|
||||
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
|
||||
|
||||
/* Can't intercept CR register access, HV can't modify CR registers */
|
||||
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
|
||||
@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
svm_set_intercept(svm, TRAP_CR4_WRITE);
|
||||
svm_set_intercept(svm, TRAP_CR8_WRITE);
|
||||
|
||||
/* No support for enable_vmware_backdoor */
|
||||
clr_exception_intercept(svm, GP_VECTOR);
|
||||
vmcb->control.intercepts[INTERCEPT_DR] = 0;
|
||||
if (!sev_es_debug_swap_enabled) {
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
recalc_intercepts(svm);
|
||||
} else {
|
||||
/*
|
||||
* Disable #DB intercept iff DebugSwap is enabled. KVM doesn't
|
||||
* allow debugging SEV-ES guests, and enables DebugSwap iff
|
||||
* NO_NESTED_DATA_BP is supported, so there's no reason to
|
||||
* intercept #DB when DebugSwap is enabled. For simplicity
|
||||
* with respect to guest debug, intercept #DB for other VMs
|
||||
* even if NO_NESTED_DATA_BP is supported, i.e. even if the
|
||||
* guest can't DoS the CPU with infinite #DB vectoring.
|
||||
*/
|
||||
clr_exception_intercept(svm, DB_VECTOR);
|
||||
}
|
||||
|
||||
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
|
||||
svm_clr_intercept(svm, INTERCEPT_XSETBV);
|
||||
@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
|
||||
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
|
||||
clr_exception_intercept(svm, UD_VECTOR);
|
||||
|
||||
/*
|
||||
* Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
|
||||
* KVM can't decrypt guest memory to decode the faulting instruction.
|
||||
*/
|
||||
clr_exception_intercept(svm, GP_VECTOR);
|
||||
|
||||
if (sev_es_guest(svm->vcpu.kvm))
|
||||
sev_es_init_vmcb(svm);
|
||||
}
|
||||
@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
|
||||
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
|
||||
{
|
||||
/*
|
||||
* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
|
||||
* of which one step is to perform a VMLOAD. KVM performs the
|
||||
* corresponding VMSAVE in svm_prepare_guest_switch for both
|
||||
* traditional and SEV-ES guests.
|
||||
* All host state for SEV-ES guests is categorized into three swap types
|
||||
* based on how it is handled by hardware during a world switch:
|
||||
*
|
||||
* A: VMRUN: Host state saved in host save area
|
||||
* VMEXIT: Host state loaded from host save area
|
||||
*
|
||||
* B: VMRUN: Host state _NOT_ saved in host save area
|
||||
* VMEXIT: Host state loaded from host save area
|
||||
*
|
||||
* C: VMRUN: Host state _NOT_ saved in host save area
|
||||
* VMEXIT: Host state initialized to default(reset) values
|
||||
*
|
||||
* Manually save type-B state, i.e. state that is loaded by VMEXIT but
|
||||
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
|
||||
* by common SVM code).
|
||||
*/
|
||||
|
||||
/* XCR0 is restored on VMEXIT, save the current host value */
|
||||
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
|
||||
/* PKRU is restored on VMEXIT, save the current host value */
|
||||
hostsa->pkru = read_pkru();
|
||||
|
||||
/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
|
||||
hostsa->xss = host_xss;
|
||||
|
||||
/*
|
||||
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
|
||||
* the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
|
||||
* saves and loads debug registers (Type-A).
|
||||
*/
|
||||
if (sev_es_debug_swap_enabled) {
|
||||
hostsa->dr0 = native_get_debugreg(0);
|
||||
hostsa->dr1 = native_get_debugreg(1);
|
||||
hostsa->dr2 = native_get_debugreg(2);
|
||||
hostsa->dr3 = native_get_debugreg(3);
|
||||
hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
|
||||
hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
|
||||
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
|
||||
hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
|
||||
}
|
||||
}
|
||||
|
||||
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
|
||||
|
@ -203,7 +203,7 @@ static int nested = true;
|
||||
module_param(nested, int, S_IRUGO);
|
||||
|
||||
/* enable/disable Next RIP Save */
|
||||
static int nrips = true;
|
||||
int nrips = true;
|
||||
module_param(nrips, int, 0444);
|
||||
|
||||
/* enable/disable Virtual VMLOAD VMSAVE */
|
||||
@ -365,6 +365,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
|
||||
svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
|
||||
|
||||
}
|
||||
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len);
|
||||
|
||||
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
|
||||
bool commit_side_effects)
|
||||
@ -385,6 +387,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
if (!svm->next_rip) {
|
||||
/*
|
||||
* FIXME: Drop this when kvm_emulate_instruction() does the
|
||||
* right thing and treats "can't emulate" as outright failure
|
||||
* for EMULTYPE_SKIP.
|
||||
*/
|
||||
if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
|
||||
return 0;
|
||||
|
||||
if (unlikely(!commit_side_effects))
|
||||
old_rflags = svm->vmcb->save.rflags;
|
||||
|
||||
@ -677,6 +687,39 @@ free_save_area:
|
||||
|
||||
}
|
||||
|
||||
static void set_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static void clr_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
vmcb->control.intercepts[INTERCEPT_DR] = 0;
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static int direct_access_msr_slot(u32 msr)
|
||||
{
|
||||
u32 i;
|
||||
@ -947,50 +990,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
|
||||
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
|
||||
}
|
||||
|
||||
static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
|
||||
static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
/*
|
||||
* If the LBR virtualization is disabled, the LBR msrs are always
|
||||
* kept in the vmcb01 to avoid copying them on nested guest entries.
|
||||
*
|
||||
* If nested, and the LBR virtualization is enabled/disabled, the msrs
|
||||
* are moved between the vmcb01 and vmcb02 as needed.
|
||||
* If LBR virtualization is disabled, the LBR MSRs are always kept in
|
||||
* vmcb01. If LBR virtualization is enabled and L1 is running VMs of
|
||||
* its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
|
||||
*/
|
||||
struct vmcb *vmcb =
|
||||
(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
|
||||
svm->vmcb : svm->vmcb01.ptr;
|
||||
|
||||
switch (index) {
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
return vmcb->save.dbgctl;
|
||||
case MSR_IA32_LASTBRANCHFROMIP:
|
||||
return vmcb->save.br_from;
|
||||
case MSR_IA32_LASTBRANCHTOIP:
|
||||
return vmcb->save.br_to;
|
||||
case MSR_IA32_LASTINTFROMIP:
|
||||
return vmcb->save.last_excp_from;
|
||||
case MSR_IA32_LASTINTTOIP:
|
||||
return vmcb->save.last_excp_to;
|
||||
default:
|
||||
KVM_BUG(false, svm->vcpu.kvm,
|
||||
"%s: Unknown MSR 0x%x", __func__, index);
|
||||
return 0;
|
||||
}
|
||||
return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
|
||||
svm->vmcb01.ptr;
|
||||
}
|
||||
|
||||
void svm_update_lbrv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
|
||||
DEBUGCTLMSR_LBR;
|
||||
|
||||
bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
|
||||
LBR_CTL_ENABLE_MASK);
|
||||
|
||||
if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
|
||||
if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
|
||||
enable_lbrv = true;
|
||||
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
|
||||
bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
|
||||
(is_guest_mode(vcpu) && svm->lbrv_enabled &&
|
||||
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
|
||||
|
||||
if (enable_lbrv == current_enable_lbrv)
|
||||
return;
|
||||
@ -1201,10 +1218,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
||||
* Guest access to VMware backdoor ports could legitimately
|
||||
* trigger #GP because of TSS I/O permission bitmap.
|
||||
* We intercept those #GP and allow access to them anyway
|
||||
* as VMware does. Don't intercept #GP for SEV guests as KVM can't
|
||||
* decrypt guest memory to decode the faulting instruction.
|
||||
* as VMware does.
|
||||
*/
|
||||
if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
|
||||
if (enable_vmware_backdoor)
|
||||
set_exception_intercept(svm, GP_VECTOR);
|
||||
|
||||
svm_set_intercept(svm, INTERCEPT_INTR);
|
||||
@ -1949,7 +1965,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (vcpu->arch.guest_state_protected)
|
||||
if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
|
||||
return;
|
||||
|
||||
get_debugreg(vcpu->arch.db[0], 0);
|
||||
@ -2510,12 +2526,13 @@ static int iret_interception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
|
||||
|
||||
++vcpu->stat.nmi_window_exits;
|
||||
svm->awaiting_iret_completion = true;
|
||||
|
||||
svm_clr_iret_intercept(svm);
|
||||
if (!sev_es_guest(vcpu->kvm))
|
||||
svm->nmi_iret_rip = kvm_rip_read(vcpu);
|
||||
svm->nmi_iret_rip = kvm_rip_read(vcpu);
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
return 1;
|
||||
@ -2680,6 +2697,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
|
||||
unsigned long val;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
|
||||
* for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
|
||||
*/
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
return 1;
|
||||
|
||||
if (vcpu->guest_debug == 0) {
|
||||
/*
|
||||
* No more DR vmexits; force a reload of the debug registers
|
||||
@ -2802,11 +2826,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
msr_info->data = svm->tsc_aux;
|
||||
break;
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
|
||||
break;
|
||||
case MSR_IA32_LASTBRANCHFROMIP:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
|
||||
break;
|
||||
case MSR_IA32_LASTBRANCHTOIP:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
|
||||
break;
|
||||
case MSR_IA32_LASTINTFROMIP:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
|
||||
break;
|
||||
case MSR_IA32_LASTINTTOIP:
|
||||
msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
|
||||
break;
|
||||
case MSR_VM_HSAVE_PA:
|
||||
msr_info->data = svm->nested.hsave_msr;
|
||||
@ -3037,13 +3069,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
if (data & DEBUGCTL_RESERVED_BITS)
|
||||
return 1;
|
||||
|
||||
if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
|
||||
svm->vmcb->save.dbgctl = data;
|
||||
else
|
||||
svm->vmcb01.ptr->save.dbgctl = data;
|
||||
|
||||
svm_get_lbr_vmcb(svm)->save.dbgctl = data;
|
||||
svm_update_lbrv(vcpu);
|
||||
|
||||
break;
|
||||
case MSR_VM_HSAVE_PA:
|
||||
/*
|
||||
@ -3769,6 +3796,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
|
||||
return; /* IRET will cause a vm exit */
|
||||
|
||||
/*
|
||||
* SEV-ES guests are responsible for signaling when a vCPU is ready to
|
||||
* receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
|
||||
* KVM can't intercept and single-step IRET to detect when NMIs are
|
||||
* unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE.
|
||||
*
|
||||
* Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
|
||||
* ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
|
||||
* supported NAEs in the GHCB protocol.
|
||||
*/
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
return;
|
||||
|
||||
if (!gif_set(svm)) {
|
||||
if (vgif)
|
||||
svm_set_intercept(svm, INTERCEPT_STGI);
|
||||
@ -3918,12 +3958,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
|
||||
svm->soft_int_injected = false;
|
||||
|
||||
/*
|
||||
* If we've made progress since setting HF_IRET_MASK, we've
|
||||
* If we've made progress since setting awaiting_iret_completion, we've
|
||||
* executed an IRET and can allow NMI injection.
|
||||
*/
|
||||
if (svm->awaiting_iret_completion &&
|
||||
(sev_es_guest(vcpu->kvm) ||
|
||||
kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
|
||||
kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
|
||||
svm->awaiting_iret_completion = false;
|
||||
svm->nmi_masked = false;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
@ -4651,16 +4690,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
|
||||
* and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
|
||||
* decode garbage.
|
||||
*
|
||||
* Inject #UD if KVM reached this point without an instruction buffer.
|
||||
* In practice, this path should never be hit by a well-behaved guest,
|
||||
* e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
|
||||
* is still theoretically reachable, e.g. via unaccelerated fault-like
|
||||
* AVIC access, and needs to be handled by KVM to avoid putting the
|
||||
* guest into an infinite loop. Injecting #UD is somewhat arbitrary,
|
||||
* but its the least awful option given lack of insight into the guest.
|
||||
* If KVM is NOT trying to simply skip an instruction, inject #UD if
|
||||
* KVM reached this point without an instruction buffer. In practice,
|
||||
* this path should never be hit by a well-behaved guest, e.g. KVM
|
||||
* doesn't intercept #UD or #GP for SEV guests, but this path is still
|
||||
* theoretically reachable, e.g. via unaccelerated fault-like AVIC
|
||||
* access, and needs to be handled by KVM to avoid putting the guest
|
||||
* into an infinite loop. Injecting #UD is somewhat arbitrary, but
|
||||
* its the least awful option given lack of insight into the guest.
|
||||
*
|
||||
* If KVM is trying to skip an instruction, simply resume the guest.
|
||||
* If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
|
||||
* will attempt to re-inject the INT3/INTO and skip the instruction.
|
||||
* In that scenario, retrying the INT3/INTO and hoping the guest will
|
||||
* make forward progress is the only option that has a chance of
|
||||
* success (and in practice it will work the vast majority of the time).
|
||||
*/
|
||||
if (unlikely(!insn)) {
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
if (!(emul_type & EMULTYPE_SKIP))
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -5112,9 +5160,11 @@ static __init int svm_hardware_setup(void)
|
||||
|
||||
svm_adjust_mmio_mask();
|
||||
|
||||
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
|
||||
|
||||
/*
|
||||
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
|
||||
* may be modified by svm_adjust_mmio_mask()).
|
||||
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
|
||||
*/
|
||||
sev_hardware_setup();
|
||||
|
||||
@ -5126,11 +5176,6 @@ static __init int svm_hardware_setup(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (nrips) {
|
||||
if (!boot_cpu_has(X86_FEATURE_NRIPS))
|
||||
nrips = false;
|
||||
}
|
||||
|
||||
enable_apicv = avic = avic && avic_hardware_setup();
|
||||
|
||||
if (!enable_apicv) {
|
||||
|
@ -33,6 +33,7 @@
|
||||
#define MSRPM_OFFSETS 32
|
||||
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
|
||||
extern bool npt_enabled;
|
||||
extern int nrips;
|
||||
extern int vgif;
|
||||
extern bool intercept_smi;
|
||||
extern bool x2avic_enabled;
|
||||
@ -406,48 +407,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
|
||||
return test_bit(bit, (unsigned long *)&control->intercepts);
|
||||
}
|
||||
|
||||
static inline void set_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
if (!sev_es_guest(svm->vcpu.kvm)) {
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
|
||||
}
|
||||
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static inline void clr_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
vmcb->control.intercepts[INTERCEPT_DR] = 0;
|
||||
|
||||
/* DR7 access must remain intercepted for an SEV-ES guest */
|
||||
if (sev_es_guest(svm->vcpu.kvm)) {
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
}
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
@ -434,6 +434,7 @@
|
||||
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
|
||||
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
|
||||
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
|
||||
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
|
||||
|
||||
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
|
||||
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
|
||||
|
Loading…
Reference in New Issue
Block a user