ARM:
* A couple of fixes when handling an exception while a SError has been delivered * Workaround for Cortex-A510's single-step erratum RISCV: * Make CY, TM, and IR counters accessible in VU mode * Fix SBI implementation version x86: * Report deprecation of x87 features in supported CPUID * Preparation for fixing an interrupt delivery race on AMD hardware * Sparse fix All except POWER and s390: * Rework guest entry code to correctly mark noinstr areas and fix vtime' accounting (for x86, this was already mostly correct but not entirely; for ARM, MIPS and RISC-V it wasn't) -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmH+E4AUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNujwf+ON/8pBWyMPdjiY5l5SyNLpRup8Su zkQoMEDICI7khYUz2bEAjOazFWHmHPsdogAlG82QeJCbFmCqyMb6iX0uWj53BdGP P2bOM/tXbulvKBBeiTritkUUNO+hBmmSF+57AOJSW+Enhc7HFwk54cuft6f30r+d JRaEOhPOP34hQ+wFQQhZZh72BaZBqgnrYwZDp1TiC0Wh8v7P4Nf+NFtEgba2nsxC xfz5PrEhvegtU8Ee9JAF2bAl7851WJq557P2cOpghtUMgh4t6GzCcUOCKIie67oQ 0Vaf+OieAopdT+QNazSEWO9zxl7eTpWjk2hrwsDrgKHAL/YmuWJuSyEYIA== =C0LZ -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "ARM: - A couple of fixes when handling an exception while a SError has been delivered - Workaround for Cortex-A510's single-step erratum RISC-V: - Make CY, TM, and IR counters accessible in VU mode - Fix SBI implementation version x86: - Report deprecation of x87 features in supported CPUID - Preparation for fixing an interrupt delivery race on AMD hardware - Sparse fix All except POWER and s390: - Rework guest entry code to correctly mark noinstr areas and fix vtime' accounting (for x86, this was already mostly correct but not entirely; for ARM, MIPS and RISC-V it wasn't)" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Use ERR_PTR_USR() to return -EFAULT as a __user pointer KVM: x86: Report deprecated x87 features in supported CPUID KVM: arm64: Workaround Cortex-A510's single-step and PAC trap errata KVM: arm64: Stop handle_exit() from handling HVC twice when an SError occurs KVM: arm64: Avoid consuming a stale esr value when SError occur RISC-V: KVM: Fix SBI implementation version RISC-V: KVM: make CY, TM, and IR counters accessible in VU mode kvm/riscv: rework guest entry logic kvm/arm64: rework guest entry logic kvm/x86: rework guest entry logic kvm/mips: rework guest entry logic kvm: add guest_state_{enter,exit}_irqoff() KVM: x86: Move delivery of non-APICv interrupt into vendor code kvm: Move KVM_GET_XSAVE2 IOCTL definition at the end of kvm.h
This commit is contained in:
commit
5fdb26213f
@ -100,6 +100,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
|
||||
|
@ -680,6 +680,22 @@ config ARM64_ERRATUM_2051678
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config ARM64_ERRATUM_2077057
|
||||
bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
|
||||
help
|
||||
This option adds the workaround for ARM Cortex-A510 erratum 2077057.
|
||||
Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
|
||||
expected, but a Pointer Authentication trap is taken instead. The
|
||||
erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
|
||||
EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
|
||||
|
||||
This can only happen when EL2 is stepping EL1.
|
||||
|
||||
When these conditions occur, the SPSR_EL2 value is unchanged from the
|
||||
previous guest entry, and can be restored from the in-memory copy.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config ARM64_ERRATUM_2119858
|
||||
bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
|
||||
default y
|
||||
|
@ -600,6 +600,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
|
||||
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_ARM64_ERRATUM_2077057
|
||||
{
|
||||
.desc = "ARM erratum 2077057",
|
||||
.capability = ARM64_WORKAROUND_2077057,
|
||||
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
|
||||
ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_ARM64_ERRATUM_2064142
|
||||
{
|
||||
.desc = "ARM erratum 2064142",
|
||||
|
@ -797,6 +797,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
|
||||
xfer_to_guest_mode_work_pending();
|
||||
}
|
||||
|
||||
/*
|
||||
* Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
|
||||
* the vCPU is running.
|
||||
*
|
||||
* This must be noinstr as instrumentation may make use of RCU, and this is not
|
||||
* safe during the EQS.
|
||||
*/
|
||||
static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
guest_state_enter_irqoff();
|
||||
ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
|
||||
guest_state_exit_irqoff();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
|
||||
* @vcpu: The VCPU pointer
|
||||
@ -881,9 +899,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
* Enter the guest
|
||||
*/
|
||||
trace_kvm_entry(*vcpu_pc(vcpu));
|
||||
guest_enter_irqoff();
|
||||
guest_timing_enter_irqoff();
|
||||
|
||||
ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
|
||||
ret = kvm_arm_vcpu_enter_exit(vcpu);
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
vcpu->stat.exits++;
|
||||
@ -918,26 +936,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
kvm_arch_vcpu_ctxsync_fp(vcpu);
|
||||
|
||||
/*
|
||||
* We may have taken a host interrupt in HYP mode (ie
|
||||
* while executing the guest). This interrupt is still
|
||||
* pending, as we haven't serviced it yet!
|
||||
* We must ensure that any pending interrupts are taken before
|
||||
* we exit guest timing so that timer ticks are accounted as
|
||||
* guest time. Transiently unmask interrupts so that any
|
||||
* pending interrupts are taken.
|
||||
*
|
||||
* We're now back in SVC mode, with interrupts
|
||||
* disabled. Enabling the interrupts now will have
|
||||
* the effect of taking the interrupt again, in SVC
|
||||
* mode this time.
|
||||
* Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
|
||||
* context synchronization event) is necessary to ensure that
|
||||
* pending interrupts are taken.
|
||||
*/
|
||||
local_irq_enable();
|
||||
isb();
|
||||
local_irq_disable();
|
||||
|
||||
guest_timing_exit_irqoff();
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* We do local_irq_enable() before calling guest_exit() so
|
||||
* that if a timer interrupt hits while running the guest we
|
||||
* account that tick as being spent in the guest. We enable
|
||||
* preemption after calling guest_exit() so that if we get
|
||||
* preempted we make sure ticks after that is not counted as
|
||||
* guest time.
|
||||
*/
|
||||
guest_exit();
|
||||
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
|
||||
|
||||
/* Exit types that need handling before we can be preempted */
|
||||
|
@ -228,6 +228,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
|
||||
{
|
||||
struct kvm_run *run = vcpu->run;
|
||||
|
||||
if (ARM_SERROR_PENDING(exception_index)) {
|
||||
/*
|
||||
* The SError is handled by handle_exit_early(). If the guest
|
||||
* survives it will re-execute the original instruction.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
exception_index = ARM_EXCEPTION_CODE(exception_index);
|
||||
|
||||
switch (exception_index) {
|
||||
|
@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
/*
|
||||
* Check for the conditions of Cortex-A510's #2077057. When these occur
|
||||
* SPSR_EL2 can't be trusted, but isn't needed either as it is
|
||||
* unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
|
||||
* Are we single-stepping the guest, and took a PAC exception from the
|
||||
* active-not-pending state?
|
||||
*/
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
|
||||
vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
|
||||
*vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
|
||||
ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
|
||||
write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
|
||||
|
||||
vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true when we were able to fixup the guest exit and should return to
|
||||
* the guest, false when we should restore the host state and return to the
|
||||
@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
* Save PSTATE early so that we can evaluate the vcpu mode
|
||||
* early on.
|
||||
*/
|
||||
vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
|
||||
synchronize_vcpu_pstate(vcpu, exit_code);
|
||||
|
||||
/*
|
||||
* Check whether we want to repaint the state one way or
|
||||
@ -424,7 +442,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
|
||||
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
|
||||
|
||||
if (ARM_SERROR_PENDING(*exit_code)) {
|
||||
if (ARM_SERROR_PENDING(*exit_code) &&
|
||||
ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) {
|
||||
u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
|
||||
|
||||
/*
|
||||
|
@ -55,9 +55,10 @@ WORKAROUND_1418040
|
||||
WORKAROUND_1463225
|
||||
WORKAROUND_1508412
|
||||
WORKAROUND_1542419
|
||||
WORKAROUND_2064142
|
||||
WORKAROUND_2038923
|
||||
WORKAROUND_1902691
|
||||
WORKAROUND_2038923
|
||||
WORKAROUND_2064142
|
||||
WORKAROUND_2077057
|
||||
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
|
||||
WORKAROUND_TSB_FLUSH_FAILURE
|
||||
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
|
||||
|
@ -414,6 +414,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
/*
|
||||
* Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
|
||||
* the vCPU is running.
|
||||
*
|
||||
* This must be noinstr as instrumentation may make use of RCU, and this is not
|
||||
* safe during the EQS.
|
||||
*/
|
||||
static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
guest_state_enter_irqoff();
|
||||
ret = kvm_mips_callbacks->vcpu_run(vcpu);
|
||||
guest_state_exit_irqoff();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r = -EINTR;
|
||||
@ -434,7 +452,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
lose_fpu(1);
|
||||
|
||||
local_irq_disable();
|
||||
guest_enter_irqoff();
|
||||
guest_timing_enter_irqoff();
|
||||
trace_kvm_enter(vcpu);
|
||||
|
||||
/*
|
||||
@ -445,10 +463,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
|
||||
|
||||
r = kvm_mips_callbacks->vcpu_run(vcpu);
|
||||
r = kvm_mips_vcpu_enter_exit(vcpu);
|
||||
|
||||
/*
|
||||
* We must ensure that any pending interrupts are taken before
|
||||
* we exit guest timing so that timer ticks are accounted as
|
||||
* guest time. Transiently unmask interrupts so that any
|
||||
* pending interrupts are taken.
|
||||
*
|
||||
* TODO: is there a barrier which ensures that pending interrupts are
|
||||
* recognised? Currently this just hopes that the CPU takes any pending
|
||||
* interrupts between the enable and disable.
|
||||
*/
|
||||
local_irq_enable();
|
||||
local_irq_disable();
|
||||
|
||||
trace_kvm_out(vcpu);
|
||||
guest_exit_irqoff();
|
||||
guest_timing_exit_irqoff();
|
||||
local_irq_enable();
|
||||
|
||||
out:
|
||||
@ -1168,7 +1199,7 @@ static void kvm_mips_set_c0_status(void)
|
||||
/*
|
||||
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
|
||||
*/
|
||||
int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
|
||||
static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_run *run = vcpu->run;
|
||||
u32 cause = vcpu->arch.host_cp0_cause;
|
||||
@ -1357,6 +1388,17 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
guest_state_exit_irqoff();
|
||||
ret = __kvm_mips_handle_exit(vcpu);
|
||||
guest_state_enter_irqoff();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Enable FPU for guest and restore context */
|
||||
void kvm_own_fpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -90,6 +90,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
||||
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *cntx;
|
||||
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
|
||||
|
||||
/* Mark this VCPU never ran */
|
||||
vcpu->arch.ran_atleast_once = false;
|
||||
@ -106,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
cntx->hstatus |= HSTATUS_SPVP;
|
||||
cntx->hstatus |= HSTATUS_SPV;
|
||||
|
||||
/* By default, make CY, TM, and IR counters accessible in VU mode */
|
||||
reset_csr->scounteren = 0x7;
|
||||
|
||||
/* Setup VCPU timer */
|
||||
kvm_riscv_vcpu_timer_init(vcpu);
|
||||
|
||||
@ -699,6 +703,20 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
|
||||
csr_write(CSR_HVIP, csr->hvip);
|
||||
}
|
||||
|
||||
/*
|
||||
* Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
|
||||
* the vCPU is running.
|
||||
*
|
||||
* This must be noinstr as instrumentation may make use of RCU, and this is not
|
||||
* safe during the EQS.
|
||||
*/
|
||||
static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
guest_state_enter_irqoff();
|
||||
__kvm_riscv_switch_to(&vcpu->arch);
|
||||
guest_state_exit_irqoff();
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
@ -790,9 +808,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
continue;
|
||||
}
|
||||
|
||||
guest_enter_irqoff();
|
||||
guest_timing_enter_irqoff();
|
||||
|
||||
__kvm_riscv_switch_to(&vcpu->arch);
|
||||
kvm_riscv_vcpu_enter_exit(vcpu);
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
vcpu->stat.exits++;
|
||||
@ -812,25 +830,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
kvm_riscv_vcpu_sync_interrupts(vcpu);
|
||||
|
||||
/*
|
||||
* We may have taken a host interrupt in VS/VU-mode (i.e.
|
||||
* while executing the guest). This interrupt is still
|
||||
* pending, as we haven't serviced it yet!
|
||||
* We must ensure that any pending interrupts are taken before
|
||||
* we exit guest timing so that timer ticks are accounted as
|
||||
* guest time. Transiently unmask interrupts so that any
|
||||
* pending interrupts are taken.
|
||||
*
|
||||
* We're now back in HS-mode with interrupts disabled
|
||||
* so enabling the interrupts now will have the effect
|
||||
* of taking the interrupt again, in HS-mode this time.
|
||||
* There's no barrier which ensures that pending interrupts are
|
||||
* recognised, so we just hope that the CPU takes any pending
|
||||
* interrupts between the enable and disable.
|
||||
*/
|
||||
local_irq_enable();
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
* We do local_irq_enable() before calling guest_exit() so
|
||||
* that if a timer interrupt hits while running the guest
|
||||
* we account that tick as being spent in the guest. We
|
||||
* enable preemption after calling guest_exit() so that if
|
||||
* we get preempted we make sure ticks after that is not
|
||||
* counted as guest time.
|
||||
*/
|
||||
guest_exit();
|
||||
guest_timing_exit_irqoff();
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
preempt_enable();
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/version.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/kvm_vcpu_timer.h>
|
||||
@ -32,7 +33,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
*out_val = KVM_SBI_IMPID;
|
||||
break;
|
||||
case SBI_EXT_BASE_GET_IMP_VERSION:
|
||||
*out_val = 0;
|
||||
*out_val = LINUX_VERSION_CODE;
|
||||
break;
|
||||
case SBI_EXT_BASE_PROBE_EXT:
|
||||
if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
|
||||
|
@ -82,7 +82,7 @@ KVM_X86_OP_NULL(guest_apic_has_interrupt)
|
||||
KVM_X86_OP(load_eoi_exitmap)
|
||||
KVM_X86_OP(set_virtual_apic_mode)
|
||||
KVM_X86_OP_NULL(set_apic_access_page_addr)
|
||||
KVM_X86_OP(deliver_posted_interrupt)
|
||||
KVM_X86_OP(deliver_interrupt)
|
||||
KVM_X86_OP_NULL(sync_pir_to_irr)
|
||||
KVM_X86_OP(set_tss_addr)
|
||||
KVM_X86_OP(set_identity_map_addr)
|
||||
|
@ -1410,7 +1410,8 @@ struct kvm_x86_ops {
|
||||
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
|
||||
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
|
||||
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
|
||||
int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
|
||||
void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
|
||||
int trig_mode, int vector);
|
||||
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
|
||||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
|
||||
|
@ -554,12 +554,13 @@ void kvm_set_cpu_caps(void)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_mask(CPUID_7_0_EBX,
|
||||
F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
|
||||
F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
|
||||
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
|
||||
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
|
||||
F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
|
||||
);
|
||||
F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |
|
||||
F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |
|
||||
F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |
|
||||
F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |
|
||||
F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |
|
||||
F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |
|
||||
F(AVX512VL));
|
||||
|
||||
kvm_cpu_cap_mask(CPUID_7_ECX,
|
||||
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
|
||||
|
@ -1096,14 +1096,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
apic->regs + APIC_TMR);
|
||||
}
|
||||
|
||||
if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
|
||||
kvm_lapic_set_irr(vector, apic);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
} else {
|
||||
trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
|
||||
trig_mode, vector);
|
||||
}
|
||||
static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
|
||||
trig_mode, vector);
|
||||
break;
|
||||
|
||||
case APIC_DM_REMRD:
|
||||
|
@ -3291,6 +3291,21 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
|
||||
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
|
||||
}
|
||||
|
||||
static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
|
||||
int trig_mode, int vector)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
|
||||
if (svm_deliver_avic_intr(vcpu, vector)) {
|
||||
kvm_lapic_set_irr(vector, apic);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
} else {
|
||||
trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
|
||||
trig_mode, vector);
|
||||
}
|
||||
}
|
||||
|
||||
static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -3615,7 +3630,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned long vmcb_pa = svm->current_vmcb->pa;
|
||||
|
||||
kvm_guest_enter_irqoff();
|
||||
guest_state_enter_irqoff();
|
||||
|
||||
if (sev_es_guest(vcpu->kvm)) {
|
||||
__svm_sev_es_vcpu_run(vmcb_pa);
|
||||
@ -3635,7 +3650,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
|
||||
vmload(__sme_page_pa(sd->save_area));
|
||||
}
|
||||
|
||||
kvm_guest_exit_irqoff();
|
||||
guest_state_exit_irqoff();
|
||||
}
|
||||
|
||||
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
@ -4545,7 +4560,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.pmu_ops = &amd_pmu_ops,
|
||||
.nested_ops = &svm_nested_ops,
|
||||
|
||||
.deliver_posted_interrupt = svm_deliver_avic_intr,
|
||||
.deliver_interrupt = svm_deliver_interrupt,
|
||||
.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
|
||||
.update_pi_irte = svm_update_pi_irte,
|
||||
.setup_mce = svm_setup_mce,
|
||||
|
@ -4041,6 +4041,21 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
|
||||
int trig_mode, int vector)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
|
||||
if (vmx_deliver_posted_interrupt(vcpu, vector)) {
|
||||
kvm_lapic_set_irr(vector, apic);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
} else {
|
||||
trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
|
||||
trig_mode, vector);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
|
||||
* will not change in the lifetime of the guest.
|
||||
@ -6754,7 +6769,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
|
||||
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
|
||||
struct vcpu_vmx *vmx)
|
||||
{
|
||||
kvm_guest_enter_irqoff();
|
||||
guest_state_enter_irqoff();
|
||||
|
||||
/* L1D Flush includes CPU buffer clear to mitigate MDS */
|
||||
if (static_branch_unlikely(&vmx_l1d_should_flush))
|
||||
@ -6770,7 +6785,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
|
||||
|
||||
vcpu->arch.cr2 = native_read_cr2();
|
||||
|
||||
kvm_guest_exit_irqoff();
|
||||
guest_state_exit_irqoff();
|
||||
}
|
||||
|
||||
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
@ -7768,7 +7783,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
|
||||
.hwapic_isr_update = vmx_hwapic_isr_update,
|
||||
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
|
||||
.sync_pir_to_irr = vmx_sync_pir_to_irr,
|
||||
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
|
||||
.deliver_interrupt = vmx_deliver_interrupt,
|
||||
.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
|
||||
|
||||
.set_tss_addr = vmx_set_tss_addr,
|
||||
|
@ -90,6 +90,8 @@
|
||||
u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
|
||||
EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
|
||||
|
||||
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
|
||||
|
||||
#define emul_to_vcpu(ctxt) \
|
||||
((struct kvm_vcpu *)(ctxt)->vcpu)
|
||||
|
||||
@ -4340,7 +4342,7 @@ static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
|
||||
void __user *uaddr = (void __user*)(unsigned long)attr->addr;
|
||||
|
||||
if ((u64)(unsigned long)uaddr != attr->addr)
|
||||
return ERR_PTR(-EFAULT);
|
||||
return ERR_PTR_USR(-EFAULT);
|
||||
return uaddr;
|
||||
}
|
||||
|
||||
@ -10041,6 +10043,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
set_debugreg(0, 7);
|
||||
}
|
||||
|
||||
guest_timing_enter_irqoff();
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Assert that vCPU vs. VM APICv state is consistent. An APICv
|
||||
@ -10125,7 +10129,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
* of accounting via context tracking, but the loss of accuracy is
|
||||
* acceptable for all known use cases.
|
||||
*/
|
||||
vtime_account_guest_exit();
|
||||
guest_timing_exit_irqoff();
|
||||
|
||||
if (lapic_in_kernel(vcpu)) {
|
||||
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
|
||||
@ -11639,8 +11643,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
|
||||
kvm_free_pit(kvm);
|
||||
}
|
||||
|
||||
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
|
||||
|
||||
/**
|
||||
* __x86_set_memory_region: Setup KVM internal memory slot
|
||||
*
|
||||
|
@ -10,51 +10,6 @@
|
||||
|
||||
void kvm_spurious_fault(void);
|
||||
|
||||
static __always_inline void kvm_guest_enter_irqoff(void)
|
||||
{
|
||||
/*
|
||||
* VMENTER enables interrupts (host state), but the kernel state is
|
||||
* interrupts disabled when this is invoked. Also tell RCU about
|
||||
* it. This is the same logic as for exit_to_user_mode().
|
||||
*
|
||||
* This ensures that e.g. latency analysis on the host observes
|
||||
* guest mode as interrupt enabled.
|
||||
*
|
||||
* guest_enter_irqoff() informs context tracking about the
|
||||
* transition to guest mode and if enabled adjusts RCU state
|
||||
* accordingly.
|
||||
*/
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
||||
instrumentation_end();
|
||||
|
||||
guest_enter_irqoff();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
}
|
||||
|
||||
static __always_inline void kvm_guest_exit_irqoff(void)
|
||||
{
|
||||
/*
|
||||
* VMEXIT disables interrupts (host state), but tracing and lockdep
|
||||
* have them in state 'on' as recorded before entering guest mode.
|
||||
* Same as enter_from_user_mode().
|
||||
*
|
||||
* context_tracking_guest_exit() restores host context and reinstates
|
||||
* RCU if enabled and required.
|
||||
*
|
||||
* This needs to be done immediately after VM-Exit, before any code
|
||||
* that might contain tracepoints or call out to the greater world,
|
||||
* e.g. before x86_spec_ctrl_restore_host().
|
||||
*/
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
context_tracking_guest_exit();
|
||||
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
|
||||
({ \
|
||||
bool failed = (consistency_check); \
|
||||
|
@ -29,7 +29,9 @@
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/instrumentation.h>
|
||||
#include <linux/interval_tree.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/xarray.h>
|
||||
@ -368,8 +370,11 @@ struct kvm_vcpu {
|
||||
u64 last_used_slot_gen;
|
||||
};
|
||||
|
||||
/* must be called with irqs disabled */
|
||||
static __always_inline void guest_enter_irqoff(void)
|
||||
/*
|
||||
* Start accounting time towards a guest.
|
||||
* Must be called before entering guest context.
|
||||
*/
|
||||
static __always_inline void guest_timing_enter_irqoff(void)
|
||||
{
|
||||
/*
|
||||
* This is running in ioctl context so its safe to assume that it's the
|
||||
@ -378,7 +383,18 @@ static __always_inline void guest_enter_irqoff(void)
|
||||
instrumentation_begin();
|
||||
vtime_account_guest_enter();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* Enter guest context and enter an RCU extended quiescent state.
|
||||
*
|
||||
* Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
|
||||
* unsafe to use any code which may directly or indirectly use RCU, tracing
|
||||
* (including IRQ flag tracing), or lockdep. All code in this period must be
|
||||
* non-instrumentable.
|
||||
*/
|
||||
static __always_inline void guest_context_enter_irqoff(void)
|
||||
{
|
||||
/*
|
||||
* KVM does not hold any references to rcu protected data when it
|
||||
* switches CPU into a guest mode. In fact switching to a guest mode
|
||||
@ -394,16 +410,79 @@ static __always_inline void guest_enter_irqoff(void)
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void guest_exit_irqoff(void)
|
||||
/*
|
||||
* Deprecated. Architectures should move to guest_timing_enter_irqoff() and
|
||||
* guest_state_enter_irqoff().
|
||||
*/
|
||||
static __always_inline void guest_enter_irqoff(void)
|
||||
{
|
||||
guest_timing_enter_irqoff();
|
||||
guest_context_enter_irqoff();
|
||||
}
|
||||
|
||||
/**
|
||||
* guest_state_enter_irqoff - Fixup state when entering a guest
|
||||
*
|
||||
* Entry to a guest will enable interrupts, but the kernel state is interrupts
|
||||
* disabled when this is invoked. Also tell RCU about it.
|
||||
*
|
||||
* 1) Trace interrupts on state
|
||||
* 2) Invoke context tracking if enabled to adjust RCU state
|
||||
* 3) Tell lockdep that interrupts are enabled
|
||||
*
|
||||
* Invoked from architecture specific code before entering a guest.
|
||||
* Must be called with interrupts disabled and the caller must be
|
||||
* non-instrumentable.
|
||||
* The caller has to invoke guest_timing_enter_irqoff() before this.
|
||||
*
|
||||
* Note: this is analogous to exit_to_user_mode().
|
||||
*/
|
||||
static __always_inline void guest_state_enter_irqoff(void)
|
||||
{
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
||||
instrumentation_end();
|
||||
|
||||
guest_context_enter_irqoff();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Exit guest context and exit an RCU extended quiescent state.
|
||||
*
|
||||
* Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
|
||||
* unsafe to use any code which may directly or indirectly use RCU, tracing
|
||||
* (including IRQ flag tracing), or lockdep. All code in this period must be
|
||||
* non-instrumentable.
|
||||
*/
|
||||
static __always_inline void guest_context_exit_irqoff(void)
|
||||
{
|
||||
context_tracking_guest_exit();
|
||||
}
|
||||
|
||||
/*
|
||||
* Stop accounting time towards a guest.
|
||||
* Must be called after exiting guest context.
|
||||
*/
|
||||
static __always_inline void guest_timing_exit_irqoff(void)
|
||||
{
|
||||
instrumentation_begin();
|
||||
/* Flush the guest cputime we spent on the guest */
|
||||
vtime_account_guest_exit();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* Deprecated. Architectures should move to guest_state_exit_irqoff() and
|
||||
* guest_timing_exit_irqoff().
|
||||
*/
|
||||
static __always_inline void guest_exit_irqoff(void)
|
||||
{
|
||||
guest_context_exit_irqoff();
|
||||
guest_timing_exit_irqoff();
|
||||
}
|
||||
|
||||
static inline void guest_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
@ -413,6 +492,33 @@ static inline void guest_exit(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* guest_state_exit_irqoff - Establish state when returning from guest mode
|
||||
*
|
||||
* Entry from a guest disables interrupts, but guest mode is traced as
|
||||
* interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
|
||||
*
|
||||
* 1) Tell lockdep that interrupts are disabled
|
||||
* 2) Invoke context tracking if enabled to reactivate RCU
|
||||
* 3) Trace interrupts off state
|
||||
*
|
||||
* Invoked from architecture specific code after exiting a guest.
|
||||
* Must be invoked with interrupts disabled and the caller must be
|
||||
* non-instrumentable.
|
||||
* The caller has to invoke guest_timing_exit_irqoff() after this.
|
||||
*
|
||||
* Note: this is analogous to enter_from_user_mode().
|
||||
*/
|
||||
static __always_inline void guest_state_exit_irqoff(void)
|
||||
{
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
guest_context_exit_irqoff();
|
||||
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
|
@ -1624,9 +1624,6 @@ struct kvm_enc_region {
|
||||
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
|
||||
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
|
||||
|
||||
/* Available with KVM_CAP_XSAVE2 */
|
||||
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
|
||||
|
||||
struct kvm_s390_pv_sec_parm {
|
||||
__u64 origin;
|
||||
__u64 length;
|
||||
@ -2048,4 +2045,7 @@ struct kvm_stats_desc {
|
||||
|
||||
#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
|
||||
|
||||
/* Available with KVM_CAP_XSAVE2 */
|
||||
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
|
||||
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user