ARM64:
* Properly reset the SVE/SME flags on vcpu load * Fix a vgic-v2 regression regarding accessing the pending state of a HW interrupt from userspace (and make the code common with vgic-v3) * Fix access to the idreg range for protected guests * Ignore 'kvm-arm.mode=protected' when using VHE * Return an error from kvm_arch_init_vm() on allocation failure * A bunch of small cleanups (comments, annotations, indentation) RISC-V: * Typo fix in arch/riscv/kvm/vmid.c * Remove broken reference pattern from MAINTAINERS entry x86-64: * Fix error in page tables with MKTME enabled * Dirty page tracking performance test extended to running a nested guest * Disable APICv/AVIC in cases that it cannot implement correctly -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmKjTIAUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNhPQgAiIVtp8aepujUM/NhkNyK3SIdLzlS oZCZiS6bvaecKXi/QvhBU0EBxAEyrovk3lmVuYNd41xI+PDjyaA4SDIl5DnToGUw bVPNFSYqjpF939vUUKjc0RCdZR4o5g3Od3tvWoHTHviS1a8aAe5o9pcpHpD0D6Mp Gc/o58nKAOPl3htcFKmjymqo3Y6yvkJU9NB7DCbL8T5mp5pJ959Mw1/LlmBaAzJC OofrynUm4NjMyAj/mAB1FhHKFyQfjBXLhiVlS0SLiiEA/tn9/OXyVFMKG+n5VkAZ Q337GMFe2RikEIuMEr3Rc4qbZK3PpxHhaj+6MPRuM0ho/P4yzl2Nyb/OhA== =h81Q -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "While last week's pull request contained miscellaneous fixes for x86, this one covers other architectures, selftests changes, and a bigger series for APIC virtualization bugs that were discovered during 5.20 development. The idea is to base 5.20 development for KVM on top of this tag. ARM64: - Properly reset the SVE/SME flags on vcpu load - Fix a vgic-v2 regression regarding accessing the pending state of a HW interrupt from userspace (and make the code common with vgic-v3) - Fix access to the idreg range for protected guests - Ignore 'kvm-arm.mode=protected' when using VHE - Return an error from kvm_arch_init_vm() on allocation failure - A bunch of small cleanups (comments, annotations, indentation) RISC-V: - Typo fix in arch/riscv/kvm/vmid.c - Remove broken reference pattern from MAINTAINERS entry x86-64: - Fix error in page tables with MKTME enabled - Dirty page tracking performance test extended to running a nested guest - Disable APICv/AVIC in cases that it cannot implement correctly" [ This merge also fixes a misplaced end parenthesis bug introduced in commit 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base") pointed out by Sean Christopherson ] Link: https://lore.kernel.org/all/20220610191813.371682-1-seanjc@google.com/ * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (34 commits) KVM: selftests: Restrict test region to 48-bit physical addresses when using nested KVM: selftests: Add option to run dirty_log_perf_test vCPUs in L2 KVM: selftests: Clean up LIBKVM files in Makefile KVM: selftests: Link selftests directly with lib object files KVM: selftests: Drop unnecessary rule for STATIC_LIBS KVM: selftests: Add a helper to check EPT/VPID capabilities KVM: selftests: Move VMX_EPT_VPID_CAP_AD_BITS to vmx.h KVM: selftests: Refactor nested_map() to specify target level KVM: selftests: Drop stale function parameter comment for nested_map() KVM: selftests: Add option to create 2M and 1G EPT mappings KVM: selftests: Replace x86_page_size with PG_LEVEL_XX KVM: x86: SVM: fix nested PAUSE filtering when L0 intercepts PAUSE KVM: x86: SVM: drop preempt-safe wrappers for avic_vcpu_load/put KVM: x86: disable preemption around the call to kvm_arch_vcpu_{un|}blocking KVM: x86: disable preemption while updating apicv inhibition KVM: x86: SVM: fix avic_kick_target_vcpus_fast KVM: x86: SVM: remove avic's broken code that updated APIC ID KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base KVM: x86: document AVIC/APICv inhibit reasons KVM: x86/mmu: Set memory encryption "value", not "mask", in shadow PDPTRs ...
This commit is contained in:
commit
24625f7d91
@ -2469,7 +2469,6 @@
|
||||
|
||||
protected: nVHE-based mode with support for guests whose
|
||||
state is kept private from the host.
|
||||
Not valid if the kernel is running in EL2.
|
||||
|
||||
Defaults to VHE/nVHE based on hardware support. Setting
|
||||
mode to "protected" will disable kexec and hibernation
|
||||
|
@ -10872,7 +10872,6 @@ F: arch/riscv/include/asm/kvm*
|
||||
F: arch/riscv/include/uapi/asm/kvm*
|
||||
F: arch/riscv/kvm/
|
||||
F: tools/testing/selftests/kvm/*/riscv/
|
||||
F: tools/testing/selftests/kvm/riscv/
|
||||
|
||||
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
|
||||
M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
|
@ -362,11 +362,6 @@ struct kvm_vcpu_arch {
|
||||
struct arch_timer_cpu timer_cpu;
|
||||
struct kvm_pmu pmu;
|
||||
|
||||
/*
|
||||
* Anything that is not used directly from assembly code goes
|
||||
* here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Guest registers we preserve during guest debugging.
|
||||
*
|
||||
|
@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void)
|
||||
/*
|
||||
* Code only run in VHE/NVHE hyp context can assume VHE is present or
|
||||
* absent. Otherwise fall back to caps.
|
||||
* This allows the compiler to discard VHE-specific code from the
|
||||
* nVHE object, reducing the number of external symbol references
|
||||
* needed to link.
|
||||
*/
|
||||
if (is_vhe_hyp_code())
|
||||
return true;
|
||||
|
@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
|
||||
#ifdef CONFIG_KVM
|
||||
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
|
||||
{
|
||||
if (kvm_get_mode() != KVM_MODE_PROTECTED)
|
||||
return false;
|
||||
|
||||
if (is_kernel_in_hyp_mode()) {
|
||||
pr_warn("Protected KVM not available with VHE\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return kvm_get_mode() == KVM_MODE_PROTECTED;
|
||||
}
|
||||
#endif /* CONFIG_KVM */
|
||||
|
||||
|
@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid)
|
||||
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
|
||||
struct arch_timer_context *timer;
|
||||
|
||||
if (WARN(!vcpu, "No vcpu context!\n"))
|
||||
return false;
|
||||
|
||||
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
|
||||
timer = vcpu_vtimer(vcpu);
|
||||
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
|
||||
|
@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
if (ret)
|
||||
goto out_free_stage2_pgd;
|
||||
|
||||
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
|
||||
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_stage2_pgd;
|
||||
}
|
||||
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
|
||||
|
||||
kvm_vgic_early_init(kvm);
|
||||
@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(arg, "protected") == 0) {
|
||||
kvm_mode = KVM_MODE_PROTECTED;
|
||||
if (!is_kernel_in_hyp_mode())
|
||||
kvm_mode = KVM_MODE_PROTECTED;
|
||||
else
|
||||
pr_warn_once("Protected KVM not available with VHE\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
|
||||
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
|
||||
|
||||
vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
|
||||
|
||||
@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
* operations. Do this for ZA as well for now for simplicity.
|
||||
*/
|
||||
if (system_supports_sme()) {
|
||||
vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
|
||||
|
||||
|
@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
|
||||
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
|
||||
}
|
||||
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
|
||||
{
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
|
||||
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
|
||||
addr, size, &host_s2_pool, owner_id);
|
||||
}
|
||||
|
@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
|
||||
case SYS_ID_AA64MMFR2_EL1:
|
||||
return get_pvm_id_aa64mmfr2(vcpu);
|
||||
default:
|
||||
/*
|
||||
* Should never happen because all cases are covered in
|
||||
* pvm_sys_reg_descs[].
|
||||
*/
|
||||
WARN_ON(1);
|
||||
break;
|
||||
/* Unhandled ID register, RAZ */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
|
||||
/* Mark the specified system register as an AArch64 feature id register. */
|
||||
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
|
||||
|
||||
/*
|
||||
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
|
||||
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
|
||||
* (1 <= crm < 8, 0 <= Op2 < 8).
|
||||
*/
|
||||
#define ID_UNALLOCATED(crm, op2) { \
|
||||
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
|
||||
.access = pvm_access_id_aarch64, \
|
||||
}
|
||||
|
||||
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
|
||||
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
|
||||
|
||||
@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
|
||||
AARCH32(SYS_MVFR0_EL1),
|
||||
AARCH32(SYS_MVFR1_EL1),
|
||||
AARCH32(SYS_MVFR2_EL1),
|
||||
ID_UNALLOCATED(3,3),
|
||||
AARCH32(SYS_ID_PFR2_EL1),
|
||||
AARCH32(SYS_ID_DFR1_EL1),
|
||||
AARCH32(SYS_ID_MMFR5_EL1),
|
||||
ID_UNALLOCATED(3,7),
|
||||
|
||||
/* AArch64 ID registers */
|
||||
/* CRm=4 */
|
||||
AARCH64(SYS_ID_AA64PFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64PFR1_EL1),
|
||||
ID_UNALLOCATED(4,2),
|
||||
ID_UNALLOCATED(4,3),
|
||||
AARCH64(SYS_ID_AA64ZFR0_EL1),
|
||||
ID_UNALLOCATED(4,5),
|
||||
ID_UNALLOCATED(4,6),
|
||||
ID_UNALLOCATED(4,7),
|
||||
AARCH64(SYS_ID_AA64DFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64DFR1_EL1),
|
||||
ID_UNALLOCATED(5,2),
|
||||
ID_UNALLOCATED(5,3),
|
||||
AARCH64(SYS_ID_AA64AFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64AFR1_EL1),
|
||||
ID_UNALLOCATED(5,6),
|
||||
ID_UNALLOCATED(5,7),
|
||||
AARCH64(SYS_ID_AA64ISAR0_EL1),
|
||||
AARCH64(SYS_ID_AA64ISAR1_EL1),
|
||||
AARCH64(SYS_ID_AA64ISAR2_EL1),
|
||||
ID_UNALLOCATED(6,3),
|
||||
ID_UNALLOCATED(6,4),
|
||||
ID_UNALLOCATED(6,5),
|
||||
ID_UNALLOCATED(6,6),
|
||||
ID_UNALLOCATED(6,7),
|
||||
AARCH64(SYS_ID_AA64MMFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64MMFR1_EL1),
|
||||
AARCH64(SYS_ID_AA64MMFR2_EL1),
|
||||
ID_UNALLOCATED(7,3),
|
||||
ID_UNALLOCATED(7,4),
|
||||
ID_UNALLOCATED(7,5),
|
||||
ID_UNALLOCATED(7,6),
|
||||
ID_UNALLOCATED(7,7),
|
||||
|
||||
/* Scalable Vector Registers are restricted. */
|
||||
|
||||
|
@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_spending,
|
||||
NULL, vgic_uaccess_write_spending, 1,
|
||||
vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
NULL, vgic_uaccess_write_cpending, 1,
|
||||
vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
|
||||
vgic_mmio_read_active, vgic_mmio_write_sactive,
|
||||
|
@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
u32 value = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* pending state of interrupt is latched in pending_latch variable.
|
||||
* Userspace will save and restore pending state and line_level
|
||||
* separately.
|
||||
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
|
||||
* for handling of ISPENDR and ICPENDR.
|
||||
*/
|
||||
for (i = 0; i < len * 8; i++) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
bool state = irq->pending_latch;
|
||||
|
||||
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
||||
int err;
|
||||
|
||||
err = irq_get_irqchip_state(irq->host_irq,
|
||||
IRQCHIP_STATE_PENDING,
|
||||
&state);
|
||||
WARN_ON(err);
|
||||
}
|
||||
|
||||
if (state)
|
||||
value |= (1U << i);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_spending,
|
||||
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
|
||||
vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_spending,
|
||||
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
|
||||
vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
|
@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
static unsigned long __read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
bool is_user)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
u32 value = 0;
|
||||
@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
unsigned long flags;
|
||||
bool val;
|
||||
|
||||
/*
|
||||
* When used from userspace with a GICv3 model:
|
||||
*
|
||||
* Pending state of interrupt is latched in pending_latch
|
||||
* variable. Userspace will save and restore pending state
|
||||
* and line_level separately.
|
||||
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
|
||||
* for handling of ISPENDR and ICPENDR.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
||||
int err;
|
||||
@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
IRQCHIP_STATE_PENDING,
|
||||
&val);
|
||||
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
|
||||
} else if (vgic_irq_is_mapped_level(irq)) {
|
||||
} else if (!is_user && vgic_irq_is_mapped_level(irq)) {
|
||||
val = vgic_get_phys_line_level(irq);
|
||||
} else {
|
||||
val = irq_is_pending(irq);
|
||||
switch (vcpu->kvm->arch.vgic.vgic_model) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
if (is_user) {
|
||||
val = irq->pending_latch;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
val = irq_is_pending(irq);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
value |= ((u32)val << i);
|
||||
@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
return value;
|
||||
}
|
||||
|
||||
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
return __read_pending(vcpu, addr, len, false);
|
||||
}
|
||||
|
||||
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
return __read_pending(vcpu, addr, len, true);
|
||||
}
|
||||
|
||||
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
||||
{
|
||||
return (vgic_irq_is_sgi(irq->intid) &&
|
||||
|
@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
|
||||
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len);
|
||||
|
||||
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len);
|
||||
|
||||
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
|
@ -66,7 +66,7 @@ static void flush_context(void)
|
||||
* the next context-switch, we broadcast TLB flush + I-cache
|
||||
* invalidation over the inner shareable domain on rollover.
|
||||
*/
|
||||
kvm_call_hyp(__kvm_flush_vm_context);
|
||||
kvm_call_hyp(__kvm_flush_vm_context);
|
||||
}
|
||||
|
||||
static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
|
||||
|
@ -97,7 +97,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
|
||||
* We ran out of VMIDs so we increment vmid_version and
|
||||
* start assigning VMIDs from 1.
|
||||
*
|
||||
* This also means existing VMIDs assignement to all Guest
|
||||
* This also means existing VMIDs assignment to all Guest
|
||||
* instances is invalid and we have force VMID re-assignement
|
||||
* for all Guest instances. The Guest instances that were not
|
||||
* running will automatically pick-up new VMIDs because will
|
||||
|
@ -1047,14 +1047,77 @@ struct kvm_x86_msr_filter {
|
||||
};
|
||||
|
||||
enum kvm_apicv_inhibit {
|
||||
|
||||
/********************************************************************/
|
||||
/* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
|
||||
/********************************************************************/
|
||||
|
||||
/*
|
||||
* APIC acceleration is disabled by a module parameter
|
||||
* and/or not supported in hardware.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_DISABLE,
|
||||
|
||||
/*
|
||||
* APIC acceleration is inhibited because AutoEOI feature is
|
||||
* being used by a HyperV guest.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_HYPERV,
|
||||
APICV_INHIBIT_REASON_NESTED,
|
||||
APICV_INHIBIT_REASON_IRQWIN,
|
||||
APICV_INHIBIT_REASON_PIT_REINJ,
|
||||
APICV_INHIBIT_REASON_X2APIC,
|
||||
APICV_INHIBIT_REASON_BLOCKIRQ,
|
||||
|
||||
/*
|
||||
* APIC acceleration is inhibited because the userspace didn't yet
|
||||
* enable the kernel/split irqchip.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_ABSENT,
|
||||
|
||||
/* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
|
||||
* (out of band, debug measure of blocking all interrupts on this vCPU)
|
||||
* was enabled, to avoid AVIC/APICv bypassing it.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_BLOCKIRQ,
|
||||
|
||||
/*
|
||||
* For simplicity, the APIC acceleration is inhibited
|
||||
* first time either APIC ID or APIC base are changed by the guest
|
||||
* from their reset values.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
|
||||
APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
|
||||
|
||||
/******************************************************/
|
||||
/* INHIBITs that are relevant only to the AMD's AVIC. */
|
||||
/******************************************************/
|
||||
|
||||
/*
|
||||
* AVIC is inhibited on a vCPU because it runs a nested guest.
|
||||
*
|
||||
* This is needed because unlike APICv, the peers of this vCPU
|
||||
* cannot use the doorbell mechanism to signal interrupts via AVIC when
|
||||
* a vCPU runs nested.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_NESTED,
|
||||
|
||||
/*
|
||||
* On SVM, the wait for the IRQ window is implemented with pending vIRQ,
|
||||
* which cannot be injected when the AVIC is enabled, thus AVIC
|
||||
* is inhibited while KVM waits for IRQ window.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_IRQWIN,
|
||||
|
||||
/*
|
||||
* PIT (i8254) 're-inject' mode, relies on EOI intercept,
|
||||
* which AVIC doesn't support for edge triggered interrupts.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_PIT_REINJ,
|
||||
|
||||
/*
|
||||
* AVIC is inhibited because the guest has x2apic in its CPUID.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_X2APIC,
|
||||
|
||||
/*
|
||||
* AVIC is disabled because SEV doesn't support it.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_SEV,
|
||||
};
|
||||
|
||||
|
@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
|
||||
{
|
||||
struct kvm *kvm = apic->vcpu->kvm;
|
||||
|
||||
if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
|
||||
return;
|
||||
|
||||
if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
|
||||
return;
|
||||
|
||||
kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
|
||||
}
|
||||
|
||||
static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
||||
|
||||
switch (reg) {
|
||||
case APIC_ID: /* Local APIC ID */
|
||||
if (!apic_x2apic_mode(apic))
|
||||
if (!apic_x2apic_mode(apic)) {
|
||||
kvm_apic_set_xapic_id(apic, val >> 24);
|
||||
else
|
||||
kvm_lapic_xapic_id_updated(apic);
|
||||
} else {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case APIC_TASKPRI:
|
||||
@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
MSR_IA32_APICBASE_BASE;
|
||||
|
||||
if ((value & MSR_IA32_APICBASE_ENABLE) &&
|
||||
apic->base_address != APIC_DEFAULT_PHYS_BASE)
|
||||
pr_warn_once("APIC base relocation is unsupported by KVM");
|
||||
apic->base_address != APIC_DEFAULT_PHYS_BASE) {
|
||||
kvm_set_apicv_inhibit(apic->vcpu->kvm,
|
||||
APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
|
||||
@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
|
||||
icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
|
||||
__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
|
||||
}
|
||||
} else {
|
||||
kvm_lapic_xapic_id_updated(vcpu->arch.apic);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
||||
root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
|
||||
i << 30, PT32_ROOT_LEVEL, true);
|
||||
mmu->pae_root[i] = root | PT_PRESENT_MASK |
|
||||
shadow_me_mask;
|
||||
shadow_me_value;
|
||||
}
|
||||
mmu->root.hpa = __pa(mmu->pae_root);
|
||||
} else {
|
||||
|
@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
|
||||
static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
|
||||
u32 icrl, u32 icrh, u32 index)
|
||||
{
|
||||
u32 dest, apic_id;
|
||||
struct kvm_vcpu *vcpu;
|
||||
u32 l1_physical_id, dest;
|
||||
struct kvm_vcpu *target_vcpu;
|
||||
int dest_mode = icrl & APIC_DEST_MASK;
|
||||
int shorthand = icrl & APIC_SHORT_MASK;
|
||||
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
|
||||
u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
|
||||
|
||||
if (shorthand != APIC_DEST_NOSHORT)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The AVIC incomplete IPI #vmexit info provides index into
|
||||
* the physical APIC ID table, which can be used to derive
|
||||
* guest physical APIC ID.
|
||||
*/
|
||||
if (dest_mode == APIC_DEST_PHYSICAL) {
|
||||
apic_id = index;
|
||||
} else {
|
||||
if (!apic_x2apic_mode(source)) {
|
||||
/* For xAPIC logical mode, the index is for logical APIC table. */
|
||||
apic_id = avic_logical_id_table[index] & 0x1ff;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Assuming vcpu ID is the same as physical apic ID,
|
||||
* and use it to retrieve the target vCPU.
|
||||
*/
|
||||
vcpu = kvm_get_vcpu_by_id(kvm, apic_id);
|
||||
if (!vcpu)
|
||||
return -EINVAL;
|
||||
|
||||
if (apic_x2apic_mode(vcpu->arch.apic))
|
||||
if (apic_x2apic_mode(source))
|
||||
dest = icrh;
|
||||
else
|
||||
dest = GET_APIC_DEST_FIELD(icrh);
|
||||
|
||||
/*
|
||||
* Try matching the destination APIC ID with the vCPU.
|
||||
*/
|
||||
if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) {
|
||||
vcpu->arch.apic->irr_pending = true;
|
||||
svm_complete_interrupt_delivery(vcpu,
|
||||
icrl & APIC_MODE_MASK,
|
||||
icrl & APIC_INT_LEVELTRIG,
|
||||
icrl & APIC_VECTOR_MASK);
|
||||
return 0;
|
||||
if (dest_mode == APIC_DEST_PHYSICAL) {
|
||||
/* broadcast destination, use slow path */
|
||||
if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
|
||||
return -EINVAL;
|
||||
if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
|
||||
return -EINVAL;
|
||||
|
||||
l1_physical_id = dest;
|
||||
|
||||
if (WARN_ON_ONCE(l1_physical_id != index))
|
||||
return -EINVAL;
|
||||
|
||||
} else {
|
||||
u32 bitmap, cluster;
|
||||
int logid_index;
|
||||
|
||||
if (apic_x2apic_mode(source)) {
|
||||
/* 16 bit dest mask, 16 bit cluster id */
|
||||
bitmap = dest & 0xFFFF0000;
|
||||
cluster = (dest >> 16) << 4;
|
||||
} else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
|
||||
/* 8 bit dest mask*/
|
||||
bitmap = dest;
|
||||
cluster = 0;
|
||||
} else {
|
||||
/* 4 bit desk mask, 4 bit cluster id */
|
||||
bitmap = dest & 0xF;
|
||||
cluster = (dest >> 4) << 2;
|
||||
}
|
||||
|
||||
if (unlikely(!bitmap))
|
||||
/* guest bug: nobody to send the logical interrupt to */
|
||||
return 0;
|
||||
|
||||
if (!is_power_of_2(bitmap))
|
||||
/* multiple logical destinations, use slow path */
|
||||
return -EINVAL;
|
||||
|
||||
logid_index = cluster + __ffs(bitmap);
|
||||
|
||||
if (apic_x2apic_mode(source)) {
|
||||
l1_physical_id = logid_index;
|
||||
} else {
|
||||
u32 *avic_logical_id_table =
|
||||
page_address(kvm_svm->avic_logical_id_table_page);
|
||||
|
||||
u32 logid_entry = avic_logical_id_table[logid_index];
|
||||
|
||||
if (WARN_ON_ONCE(index != logid_index))
|
||||
return -EINVAL;
|
||||
|
||||
/* guest bug: non existing/reserved logical destination */
|
||||
if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
|
||||
return 0;
|
||||
|
||||
l1_physical_id = logid_entry &
|
||||
AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
|
||||
if (unlikely(!target_vcpu))
|
||||
/* guest bug: non existing vCPU is a target of this IPI*/
|
||||
return 0;
|
||||
|
||||
target_vcpu->arch.apic->irr_pending = true;
|
||||
svm_complete_interrupt_delivery(target_vcpu,
|
||||
icrl & APIC_MODE_MASK,
|
||||
icrl & APIC_INT_LEVELTRIG,
|
||||
icrl & APIC_VECTOR_MASK);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
|
||||
@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 *old, *new;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 id = kvm_xapic_id(vcpu->arch.apic);
|
||||
|
||||
if (vcpu->vcpu_id == id)
|
||||
return 0;
|
||||
|
||||
old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
|
||||
new = avic_get_physical_id_entry(vcpu, id);
|
||||
if (!new || !old)
|
||||
return 1;
|
||||
|
||||
/* We need to move physical_id_entry to new offset */
|
||||
*new = *old;
|
||||
*old = 0ULL;
|
||||
to_svm(vcpu)->avic_physical_id_cache = new;
|
||||
|
||||
/*
|
||||
* Also update the guest physical APIC ID in the logical
|
||||
* APIC ID table entry if already setup the LDR.
|
||||
*/
|
||||
if (svm->ldr_reg)
|
||||
avic_handle_ldr_update(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
|
||||
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
|
||||
|
||||
switch (offset) {
|
||||
case APIC_ID:
|
||||
if (avic_handle_apic_id_update(vcpu))
|
||||
return 0;
|
||||
break;
|
||||
case APIC_LDR:
|
||||
if (avic_handle_ldr_update(vcpu))
|
||||
return 0;
|
||||
@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm)
|
||||
|
||||
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (avic_handle_apic_id_update(vcpu) != 0)
|
||||
return;
|
||||
avic_handle_dfr_update(vcpu);
|
||||
avic_handle_ldr_update(vcpu);
|
||||
}
|
||||
@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
|
||||
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
|
||||
BIT(APICV_INHIBIT_REASON_X2APIC) |
|
||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
|
||||
BIT(APICV_INHIBIT_REASON_SEV);
|
||||
BIT(APICV_INHIBIT_REASON_SEV) |
|
||||
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
|
||||
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
|
||||
|
||||
return supported & BIT(reason);
|
||||
}
|
||||
@ -946,7 +946,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
u64 entry;
|
||||
int h_physical_id = kvm_cpu_get_apicid(cpu);
|
||||
@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
|
||||
}
|
||||
|
||||
void __avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 entry;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
||||
}
|
||||
|
||||
static void avic_vcpu_load(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
|
||||
WARN_ON(cpu != vcpu->cpu);
|
||||
|
||||
__avic_vcpu_load(vcpu, cpu);
|
||||
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
|
||||
__avic_vcpu_put(vcpu);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||
vmcb_mark_dirty(vmcb, VMCB_AVIC);
|
||||
|
||||
if (activated)
|
||||
avic_vcpu_load(vcpu);
|
||||
avic_vcpu_load(vcpu, vcpu->cpu);
|
||||
else
|
||||
avic_vcpu_put(vcpu);
|
||||
|
||||
@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
return;
|
||||
|
||||
avic_vcpu_load(vcpu);
|
||||
avic_vcpu_load(vcpu, vcpu->cpu);
|
||||
}
|
||||
|
@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
struct vmcb *vmcb01 = svm->vmcb01.ptr;
|
||||
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
|
||||
u32 pause_count12;
|
||||
u32 pause_thresh12;
|
||||
|
||||
/*
|
||||
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
|
||||
@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
||||
if (!nested_vmcb_needs_vls_intercept(svm))
|
||||
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
|
||||
pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
|
||||
pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
|
||||
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
|
||||
/* use guest values since host doesn't use them */
|
||||
vmcb02->control.pause_filter_count =
|
||||
svm->pause_filter_enabled ?
|
||||
svm->nested.ctl.pause_filter_count : 0;
|
||||
/* use guest values since host doesn't intercept PAUSE */
|
||||
vmcb02->control.pause_filter_count = pause_count12;
|
||||
vmcb02->control.pause_filter_thresh = pause_thresh12;
|
||||
|
||||
vmcb02->control.pause_filter_thresh =
|
||||
svm->pause_threshold_enabled ?
|
||||
svm->nested.ctl.pause_filter_thresh : 0;
|
||||
|
||||
} else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
|
||||
/* use host values when guest doesn't use them */
|
||||
} else {
|
||||
/* start from host values otherwise */
|
||||
vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
|
||||
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
|
||||
} else {
|
||||
/*
|
||||
* Intercept every PAUSE otherwise and
|
||||
* ignore both host and guest values
|
||||
*/
|
||||
vmcb02->control.pause_filter_count = 0;
|
||||
vmcb02->control.pause_filter_thresh = 0;
|
||||
|
||||
/* ... but ensure filtering is disabled if so requested. */
|
||||
if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
|
||||
if (!pause_count12)
|
||||
vmcb02->control.pause_filter_count = 0;
|
||||
if (!pause_thresh12)
|
||||
vmcb02->control.pause_filter_thresh = 0;
|
||||
}
|
||||
}
|
||||
|
||||
nested_svm_transition_tlb_flush(vcpu);
|
||||
@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
|
||||
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
|
||||
|
||||
if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
|
||||
if (!kvm_pause_in_guest(vcpu->kvm)) {
|
||||
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
|
||||
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
|
||||
|
||||
}
|
||||
|
||||
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
|
||||
|
||||
|
@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
int old = control->pause_filter_count;
|
||||
|
||||
if (kvm_pause_in_guest(vcpu->kvm) || !old)
|
||||
if (kvm_pause_in_guest(vcpu->kvm))
|
||||
return;
|
||||
|
||||
control->pause_filter_count = __grow_ple_window(old,
|
||||
@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
int old = control->pause_filter_count;
|
||||
|
||||
if (kvm_pause_in_guest(vcpu->kvm) || !old)
|
||||
if (kvm_pause_in_guest(vcpu->kvm))
|
||||
return;
|
||||
|
||||
control->pause_filter_count =
|
||||
@ -1400,13 +1400,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
indirect_branch_prediction_barrier();
|
||||
}
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
__avic_vcpu_load(vcpu, cpu);
|
||||
avic_vcpu_load(vcpu, cpu);
|
||||
}
|
||||
|
||||
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
__avic_vcpu_put(vcpu);
|
||||
avic_vcpu_put(vcpu);
|
||||
|
||||
svm_prepare_host_switch(vcpu);
|
||||
|
||||
|
@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
|
||||
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
|
||||
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
|
||||
int avic_init_vcpu(struct vcpu_svm *svm);
|
||||
void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
void __avic_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
void avic_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
|
||||
void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
|
||||
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
|
||||
|
@ -7779,7 +7779,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
|
||||
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
|
||||
BIT(APICV_INHIBIT_REASON_ABSENT) |
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV) |
|
||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
|
||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
|
||||
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
|
||||
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
|
||||
|
||||
return supported & BIT(reason);
|
||||
}
|
||||
|
@ -9853,6 +9853,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
down_read(&vcpu->kvm->arch.apicv_update_lock);
|
||||
preempt_disable();
|
||||
|
||||
activate = kvm_vcpu_apicv_activated(vcpu);
|
||||
|
||||
@ -9873,6 +9874,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
up_read(&vcpu->kvm->arch.apicv_update_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
|
||||
|
@ -37,11 +37,38 @@ ifeq ($(ARCH),riscv)
|
||||
UNAME_M := riscv
|
||||
endif
|
||||
|
||||
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
|
||||
LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
|
||||
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
|
||||
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
|
||||
LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
|
||||
LIBKVM += lib/assert.c
|
||||
LIBKVM += lib/elf.c
|
||||
LIBKVM += lib/guest_modes.c
|
||||
LIBKVM += lib/io.c
|
||||
LIBKVM += lib/kvm_util.c
|
||||
LIBKVM += lib/perf_test_util.c
|
||||
LIBKVM += lib/rbtree.c
|
||||
LIBKVM += lib/sparsebit.c
|
||||
LIBKVM += lib/test_util.c
|
||||
|
||||
LIBKVM_x86_64 += lib/x86_64/apic.c
|
||||
LIBKVM_x86_64 += lib/x86_64/handlers.S
|
||||
LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
|
||||
LIBKVM_x86_64 += lib/x86_64/processor.c
|
||||
LIBKVM_x86_64 += lib/x86_64/svm.c
|
||||
LIBKVM_x86_64 += lib/x86_64/ucall.c
|
||||
LIBKVM_x86_64 += lib/x86_64/vmx.c
|
||||
|
||||
LIBKVM_aarch64 += lib/aarch64/gic.c
|
||||
LIBKVM_aarch64 += lib/aarch64/gic_v3.c
|
||||
LIBKVM_aarch64 += lib/aarch64/handlers.S
|
||||
LIBKVM_aarch64 += lib/aarch64/processor.c
|
||||
LIBKVM_aarch64 += lib/aarch64/spinlock.c
|
||||
LIBKVM_aarch64 += lib/aarch64/ucall.c
|
||||
LIBKVM_aarch64 += lib/aarch64/vgic.c
|
||||
|
||||
LIBKVM_s390x += lib/s390x/diag318_test_handler.c
|
||||
LIBKVM_s390x += lib/s390x/processor.c
|
||||
LIBKVM_s390x += lib/s390x/ucall.c
|
||||
|
||||
LIBKVM_riscv += lib/riscv/processor.c
|
||||
LIBKVM_riscv += lib/riscv/ucall.c
|
||||
|
||||
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
|
||||
@ -173,12 +200,13 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
|
||||
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
|
||||
include ../lib.mk
|
||||
|
||||
STATIC_LIBS := $(OUTPUT)/libkvm.a
|
||||
LIBKVM_C := $(filter %.c,$(LIBKVM))
|
||||
LIBKVM_S := $(filter %.S,$(LIBKVM))
|
||||
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
|
||||
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
|
||||
EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
|
||||
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
|
||||
|
||||
EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
|
||||
|
||||
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
|
||||
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
|
||||
@ -187,13 +215,8 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
|
||||
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
|
||||
|
||||
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
|
||||
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
|
||||
$(AR) crs $@ $^
|
||||
|
||||
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
|
||||
all: $(STATIC_LIBS)
|
||||
$(TEST_GEN_PROGS): $(STATIC_LIBS)
|
||||
$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
|
||||
|
||||
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
|
||||
cscope:
|
||||
|
@ -336,8 +336,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
static void help(char *name)
|
||||
{
|
||||
puts("");
|
||||
printf("usage: %s [-h] [-i iterations] [-p offset] [-g]"
|
||||
"[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
|
||||
printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
|
||||
"[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
|
||||
"[-x memslots]\n", name);
|
||||
puts("");
|
||||
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
|
||||
@ -351,6 +351,7 @@ static void help(char *name)
|
||||
printf(" -p: specify guest physical test memory offset\n"
|
||||
" Warning: a low offset can conflict with the loaded test code.\n");
|
||||
guest_modes_help();
|
||||
printf(" -n: Run the vCPUs in nested mode (L2)\n");
|
||||
printf(" -b: specify the size of the memory region which should be\n"
|
||||
" dirtied by each vCPU. e.g. 10M or 3G.\n"
|
||||
" (default: 1G)\n");
|
||||
@ -387,7 +388,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
guest_modes_append_default();
|
||||
|
||||
while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'g':
|
||||
dirty_log_manual_caps = 0;
|
||||
@ -401,6 +402,9 @@ int main(int argc, char *argv[])
|
||||
case 'm':
|
||||
guest_modes_cmdline(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
perf_test_args.nested = true;
|
||||
break;
|
||||
case 'b':
|
||||
guest_percpu_mem_size = parse_size(optarg);
|
||||
break;
|
||||
|
@ -30,10 +30,15 @@ struct perf_test_vcpu_args {
|
||||
|
||||
struct perf_test_args {
|
||||
struct kvm_vm *vm;
|
||||
/* The starting address and size of the guest test region. */
|
||||
uint64_t gpa;
|
||||
uint64_t size;
|
||||
uint64_t guest_page_size;
|
||||
int wr_fract;
|
||||
|
||||
/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
|
||||
bool nested;
|
||||
|
||||
struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
|
||||
};
|
||||
|
||||
@ -49,5 +54,9 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
|
||||
|
||||
void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
|
||||
void perf_test_join_vcpu_threads(int vcpus);
|
||||
void perf_test_guest_code(uint32_t vcpu_id);
|
||||
|
||||
uint64_t perf_test_nested_pages(int nr_vcpus);
|
||||
void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);
|
||||
|
||||
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
|
||||
|
@ -482,13 +482,23 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
void vm_xsave_req_perm(int bit);
|
||||
|
||||
enum x86_page_size {
|
||||
X86_PAGE_SIZE_4K = 0,
|
||||
X86_PAGE_SIZE_2M,
|
||||
X86_PAGE_SIZE_1G,
|
||||
enum pg_level {
|
||||
PG_LEVEL_NONE,
|
||||
PG_LEVEL_4K,
|
||||
PG_LEVEL_2M,
|
||||
PG_LEVEL_1G,
|
||||
PG_LEVEL_512G,
|
||||
PG_LEVEL_NUM
|
||||
};
|
||||
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
enum x86_page_size page_size);
|
||||
|
||||
#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
|
||||
#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
|
||||
|
||||
#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
|
||||
#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
|
||||
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
|
||||
|
||||
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
|
||||
|
||||
/*
|
||||
* Basic CPU control in CR0
|
||||
@ -505,9 +515,6 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
|
||||
#define X86_CR0_PG (1UL<<31) /* Paging */
|
||||
|
||||
/* VMX_EPT_VPID_CAP bits */
|
||||
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
|
||||
|
||||
#define XSTATE_XTILE_CFG_BIT 17
|
||||
#define XSTATE_XTILE_DATA_BIT 18
|
||||
|
||||
|
@ -96,6 +96,9 @@
|
||||
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
|
||||
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
|
||||
|
||||
#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
|
||||
#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
|
||||
|
||||
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
|
||||
#define EXIT_REASON_EXCEPTION_NMI 0
|
||||
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
|
||||
@ -606,6 +609,7 @@ bool load_vmcs(struct vmx_pages *vmx);
|
||||
|
||||
bool nested_vmx_supported(void);
|
||||
void nested_vmx_check_supported(void);
|
||||
bool ept_1g_pages_supported(void);
|
||||
|
||||
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr);
|
||||
@ -613,6 +617,8 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr, uint64_t size);
|
||||
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint32_t memslot);
|
||||
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t addr, uint64_t size);
|
||||
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint32_t eptp_memslot);
|
||||
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
|
||||
|
@ -40,7 +40,7 @@ static bool all_vcpu_threads_running;
|
||||
* Continuously write to the first 8 bytes of each page in the
|
||||
* specified region.
|
||||
*/
|
||||
static void guest_code(uint32_t vcpu_id)
|
||||
void perf_test_guest_code(uint32_t vcpu_id)
|
||||
{
|
||||
struct perf_test_args *pta = &perf_test_args;
|
||||
struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
|
||||
@ -108,8 +108,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
|
||||
{
|
||||
struct perf_test_args *pta = &perf_test_args;
|
||||
struct kvm_vm *vm;
|
||||
uint64_t guest_num_pages;
|
||||
uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
|
||||
uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
|
||||
uint64_t region_end_gfn;
|
||||
int i;
|
||||
|
||||
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
||||
@ -134,34 +135,54 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
|
||||
"Guest memory cannot be evenly divided into %d slots.",
|
||||
slots);
|
||||
|
||||
/*
|
||||
* If using nested, allocate extra pages for the nested page tables and
|
||||
* in-memory data structures.
|
||||
*/
|
||||
if (pta->nested)
|
||||
slot0_pages += perf_test_nested_pages(vcpus);
|
||||
|
||||
/*
|
||||
* Pass guest_num_pages to populate the page tables for test memory.
|
||||
* The memory is also added to memslot 0, but that's a benign side
|
||||
* effect as KVM allows aliasing HVAs in meslots.
|
||||
*/
|
||||
vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
|
||||
guest_num_pages, 0, guest_code, NULL);
|
||||
vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,
|
||||
perf_test_guest_code, NULL);
|
||||
|
||||
pta->vm = vm;
|
||||
|
||||
/* Put the test region at the top guest physical memory. */
|
||||
region_end_gfn = vm_get_max_gfn(vm) + 1;
|
||||
|
||||
#ifdef __x86_64__
|
||||
/*
|
||||
* When running vCPUs in L2, restrict the test region to 48 bits to
|
||||
* avoid needing 5-level page tables to identity map L2.
|
||||
*/
|
||||
if (pta->nested)
|
||||
region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
|
||||
#endif
|
||||
/*
|
||||
* If there should be more memory in the guest test region than there
|
||||
* can be pages in the guest, it will definitely cause problems.
|
||||
*/
|
||||
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
|
||||
TEST_ASSERT(guest_num_pages < region_end_gfn,
|
||||
"Requested more guest memory than address space allows.\n"
|
||||
" guest pages: %" PRIx64 " max gfn: %" PRIx64
|
||||
" vcpus: %d wss: %" PRIx64 "]\n",
|
||||
guest_num_pages, vm_get_max_gfn(vm), vcpus,
|
||||
guest_num_pages, region_end_gfn - 1, vcpus,
|
||||
vcpu_memory_bytes);
|
||||
|
||||
pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
|
||||
pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
|
||||
pta->gpa = align_down(pta->gpa, backing_src_pagesz);
|
||||
#ifdef __s390x__
|
||||
/* Align to 1M (segment size) */
|
||||
pta->gpa = align_down(pta->gpa, 1 << 20);
|
||||
#endif
|
||||
pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
|
||||
pta->size = guest_num_pages * pta->guest_page_size;
|
||||
pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
|
||||
pta->gpa, pta->gpa + pta->size);
|
||||
|
||||
/* Add extra memory slots for testing */
|
||||
for (i = 0; i < slots; i++) {
|
||||
@ -178,6 +199,11 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
|
||||
|
||||
perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
|
||||
|
||||
if (pta->nested) {
|
||||
pr_info("Configuring vCPUs to run in L2 (nested).\n");
|
||||
perf_test_setup_nested(vm, vcpus);
|
||||
}
|
||||
|
||||
ucall_init(vm, NULL);
|
||||
|
||||
/* Export the shared variables to the guest. */
|
||||
@ -198,6 +224,17 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
|
||||
sync_global_to_guest(vm, perf_test_args);
|
||||
}
|
||||
|
||||
uint64_t __weak perf_test_nested_pages(int nr_vcpus)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
|
||||
{
|
||||
pr_info("%s() not support on this architecture, skipping.\n", __func__);
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
|
||||
static void *vcpu_thread_main(void *data)
|
||||
{
|
||||
struct vcpu_thread *vcpu = data;
|
||||
|
112
tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
Normal file
112
tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
Normal file
@ -0,0 +1,112 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* x86_64-specific extensions to perf_test_util.c.
|
||||
*
|
||||
* Copyright (C) 2022, Google, Inc.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "perf_test_util.h"
|
||||
#include "../kvm_util_internal.h"
|
||||
#include "processor.h"
|
||||
#include "vmx.h"
|
||||
|
||||
void perf_test_l2_guest_code(uint64_t vcpu_id)
|
||||
{
|
||||
perf_test_guest_code(vcpu_id);
|
||||
vmcall();
|
||||
}
|
||||
|
||||
extern char perf_test_l2_guest_entry[];
|
||||
__asm__(
|
||||
"perf_test_l2_guest_entry:"
|
||||
" mov (%rsp), %rdi;"
|
||||
" call perf_test_l2_guest_code;"
|
||||
" ud2;"
|
||||
);
|
||||
|
||||
static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
unsigned long *rsp;
|
||||
|
||||
GUEST_ASSERT(vmx->vmcs_gpa);
|
||||
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
|
||||
GUEST_ASSERT(load_vmcs(vmx));
|
||||
GUEST_ASSERT(ept_1g_pages_supported());
|
||||
|
||||
rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
|
||||
*rsp = vcpu_id;
|
||||
prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp);
|
||||
|
||||
GUEST_ASSERT(!vmlaunch());
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
uint64_t perf_test_nested_pages(int nr_vcpus)
|
||||
{
|
||||
/*
|
||||
* 513 page tables is enough to identity-map 256 TiB of L2 with 1G
|
||||
* pages and 4-level paging, plus a few pages per-vCPU for data
|
||||
* structures such as the VMCS.
|
||||
*/
|
||||
return 513 + 10 * nr_vcpus;
|
||||
}
|
||||
|
||||
void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
|
||||
{
|
||||
uint64_t start, end;
|
||||
|
||||
prepare_eptp(vmx, vm, 0);
|
||||
|
||||
/*
|
||||
* Identity map the first 4G and the test region with 1G pages so that
|
||||
* KVM can shadow the EPT12 with the maximum huge page size supported
|
||||
* by the backing source.
|
||||
*/
|
||||
nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
|
||||
|
||||
start = align_down(perf_test_args.gpa, PG_SIZE_1G);
|
||||
end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G);
|
||||
nested_identity_map_1g(vmx, vm, start, end - start);
|
||||
}
|
||||
|
||||
void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
|
||||
{
|
||||
struct vmx_pages *vmx, *vmx0 = NULL;
|
||||
struct kvm_regs regs;
|
||||
vm_vaddr_t vmx_gva;
|
||||
int vcpu_id;
|
||||
|
||||
nested_vmx_check_supported();
|
||||
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
|
||||
vmx = vcpu_alloc_vmx(vm, &vmx_gva);
|
||||
|
||||
if (vcpu_id == 0) {
|
||||
perf_test_setup_ept(vmx, vm);
|
||||
vmx0 = vmx;
|
||||
} else {
|
||||
/* Share the same EPT table across all vCPUs. */
|
||||
vmx->eptp = vmx0->eptp;
|
||||
vmx->eptp_hva = vmx0->eptp_hva;
|
||||
vmx->eptp_gpa = vmx0->eptp_gpa;
|
||||
}
|
||||
|
||||
/*
|
||||
* Override the vCPU to run perf_test_l1_guest_code() which will
|
||||
* bounce it into L2 before calling perf_test_guest_code().
|
||||
*/
|
||||
vcpu_regs_get(vm, vcpu_id, ®s);
|
||||
regs.rip = (unsigned long) perf_test_l1_guest_code;
|
||||
vcpu_regs_set(vm, vcpu_id, ®s);
|
||||
vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id);
|
||||
}
|
||||
}
|
@ -158,7 +158,7 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
|
||||
int level)
|
||||
{
|
||||
uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
|
||||
int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
|
||||
int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
|
||||
|
||||
return &page_table[index];
|
||||
}
|
||||
@ -167,14 +167,14 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
|
||||
uint64_t pt_pfn,
|
||||
uint64_t vaddr,
|
||||
uint64_t paddr,
|
||||
int level,
|
||||
enum x86_page_size page_size)
|
||||
int current_level,
|
||||
int target_level)
|
||||
{
|
||||
uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
|
||||
uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
|
||||
|
||||
if (!(*pte & PTE_PRESENT_MASK)) {
|
||||
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
|
||||
if (level == page_size)
|
||||
if (current_level == target_level)
|
||||
*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
|
||||
else
|
||||
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
|
||||
@ -184,20 +184,19 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
|
||||
* a hugepage at this level, and that there isn't a hugepage at
|
||||
* this level.
|
||||
*/
|
||||
TEST_ASSERT(level != page_size,
|
||||
TEST_ASSERT(current_level != target_level,
|
||||
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
|
||||
page_size, vaddr);
|
||||
current_level, vaddr);
|
||||
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
|
||||
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
|
||||
level, vaddr);
|
||||
current_level, vaddr);
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
enum x86_page_size page_size)
|
||||
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
|
||||
{
|
||||
const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
|
||||
const uint64_t pg_size = PG_LEVEL_SIZE(level);
|
||||
uint64_t *pml4e, *pdpe, *pde;
|
||||
uint64_t *pte;
|
||||
|
||||
@ -222,20 +221,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
* early if a hugepage was created.
|
||||
*/
|
||||
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
|
||||
vaddr, paddr, 3, page_size);
|
||||
vaddr, paddr, PG_LEVEL_512G, level);
|
||||
if (*pml4e & PTE_LARGE_MASK)
|
||||
return;
|
||||
|
||||
pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
|
||||
pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
|
||||
if (*pdpe & PTE_LARGE_MASK)
|
||||
return;
|
||||
|
||||
pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
|
||||
pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
|
||||
if (*pde & PTE_LARGE_MASK)
|
||||
return;
|
||||
|
||||
/* Fill in page table entry. */
|
||||
pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
|
||||
pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
|
||||
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
|
||||
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
|
||||
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
|
||||
@ -243,7 +242,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
|
||||
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
{
|
||||
__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
|
||||
__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
|
||||
}
|
||||
|
||||
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
|
||||
|
@ -198,6 +198,16 @@ bool load_vmcs(struct vmx_pages *vmx)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ept_vpid_cap_supported(uint64_t mask)
|
||||
{
|
||||
return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
|
||||
}
|
||||
|
||||
bool ept_1g_pages_supported(void)
|
||||
{
|
||||
return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the control fields to the most basic settings possible.
|
||||
*/
|
||||
@ -215,7 +225,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
|
||||
struct eptPageTablePointer eptp = {
|
||||
.memory_type = VMX_BASIC_MEM_TYPE_WB,
|
||||
.page_walk_length = 3, /* + 1 */
|
||||
.ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
|
||||
.ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
|
||||
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
|
||||
};
|
||||
|
||||
@ -392,80 +402,93 @@ void nested_vmx_check_supported(void)
|
||||
}
|
||||
}
|
||||
|
||||
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr)
|
||||
static void nested_create_pte(struct kvm_vm *vm,
|
||||
struct eptPageTableEntry *pte,
|
||||
uint64_t nested_paddr,
|
||||
uint64_t paddr,
|
||||
int current_level,
|
||||
int target_level)
|
||||
{
|
||||
uint16_t index[4];
|
||||
struct eptPageTableEntry *pml4e;
|
||||
if (!pte->readable) {
|
||||
pte->writable = true;
|
||||
pte->readable = true;
|
||||
pte->executable = true;
|
||||
pte->page_size = (current_level == target_level);
|
||||
if (pte->page_size)
|
||||
pte->address = paddr >> vm->page_shift;
|
||||
else
|
||||
pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
|
||||
} else {
|
||||
/*
|
||||
* Entry already present. Assert that the caller doesn't want
|
||||
* a hugepage at this level, and that there isn't a hugepage at
|
||||
* this level.
|
||||
*/
|
||||
TEST_ASSERT(current_level != target_level,
|
||||
"Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
|
||||
current_level, nested_paddr);
|
||||
TEST_ASSERT(!pte->page_size,
|
||||
"Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
|
||||
current_level, nested_paddr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr, int target_level)
|
||||
{
|
||||
const uint64_t page_size = PG_LEVEL_SIZE(target_level);
|
||||
struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
|
||||
uint16_t index;
|
||||
|
||||
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
|
||||
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
||||
|
||||
TEST_ASSERT((nested_paddr % vm->page_size) == 0,
|
||||
TEST_ASSERT((nested_paddr >> 48) == 0,
|
||||
"Nested physical address 0x%lx requires 5-level paging",
|
||||
nested_paddr);
|
||||
TEST_ASSERT((nested_paddr % page_size) == 0,
|
||||
"Nested physical address not on page boundary,\n"
|
||||
" nested_paddr: 0x%lx vm->page_size: 0x%x",
|
||||
nested_paddr, vm->page_size);
|
||||
" nested_paddr: 0x%lx page_size: 0x%lx",
|
||||
nested_paddr, page_size);
|
||||
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
|
||||
"Physical address beyond beyond maximum supported,\n"
|
||||
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
|
||||
paddr, vm->max_gfn, vm->page_size);
|
||||
TEST_ASSERT((paddr % vm->page_size) == 0,
|
||||
TEST_ASSERT((paddr % page_size) == 0,
|
||||
"Physical address not on page boundary,\n"
|
||||
" paddr: 0x%lx vm->page_size: 0x%x",
|
||||
paddr, vm->page_size);
|
||||
" paddr: 0x%lx page_size: 0x%lx",
|
||||
paddr, page_size);
|
||||
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
|
||||
"Physical address beyond beyond maximum supported,\n"
|
||||
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
|
||||
paddr, vm->max_gfn, vm->page_size);
|
||||
|
||||
index[0] = (nested_paddr >> 12) & 0x1ffu;
|
||||
index[1] = (nested_paddr >> 21) & 0x1ffu;
|
||||
index[2] = (nested_paddr >> 30) & 0x1ffu;
|
||||
index[3] = (nested_paddr >> 39) & 0x1ffu;
|
||||
for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
|
||||
index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
|
||||
pte = &pt[index];
|
||||
|
||||
/* Allocate page directory pointer table if not present. */
|
||||
pml4e = vmx->eptp_hva;
|
||||
if (!pml4e[index[3]].readable) {
|
||||
pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
|
||||
pml4e[index[3]].writable = true;
|
||||
pml4e[index[3]].readable = true;
|
||||
pml4e[index[3]].executable = true;
|
||||
nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
|
||||
|
||||
if (pte->page_size)
|
||||
break;
|
||||
|
||||
pt = addr_gpa2hva(vm, pte->address * vm->page_size);
|
||||
}
|
||||
|
||||
/* Allocate page directory table if not present. */
|
||||
struct eptPageTableEntry *pdpe;
|
||||
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
|
||||
if (!pdpe[index[2]].readable) {
|
||||
pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
|
||||
pdpe[index[2]].writable = true;
|
||||
pdpe[index[2]].readable = true;
|
||||
pdpe[index[2]].executable = true;
|
||||
}
|
||||
|
||||
/* Allocate page table if not present. */
|
||||
struct eptPageTableEntry *pde;
|
||||
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
|
||||
if (!pde[index[1]].readable) {
|
||||
pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
|
||||
pde[index[1]].writable = true;
|
||||
pde[index[1]].readable = true;
|
||||
pde[index[1]].executable = true;
|
||||
}
|
||||
|
||||
/* Fill in page table entry. */
|
||||
struct eptPageTableEntry *pte;
|
||||
pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
|
||||
pte[index[0]].address = paddr >> vm->page_shift;
|
||||
pte[index[0]].writable = true;
|
||||
pte[index[0]].readable = true;
|
||||
pte[index[0]].executable = true;
|
||||
|
||||
/*
|
||||
* For now mark these as accessed and dirty because the only
|
||||
* testcase we have needs that. Can be reconsidered later.
|
||||
*/
|
||||
pte[index[0]].accessed = true;
|
||||
pte[index[0]].dirty = true;
|
||||
pte->accessed = true;
|
||||
pte->dirty = true;
|
||||
|
||||
}
|
||||
|
||||
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr)
|
||||
{
|
||||
__nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -476,7 +499,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
* nested_paddr - Nested guest physical address to map
|
||||
* paddr - VM Physical Address
|
||||
* size - The size of the range to map
|
||||
* eptp_memslot - Memory region slot for new virtual translation tables
|
||||
* level - The level at which to map the range
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
@ -485,22 +508,29 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
* Within the VM given by vm, creates a nested guest translation for the
|
||||
* page range starting at nested_paddr to the page range starting at paddr.
|
||||
*/
|
||||
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr, uint64_t size)
|
||||
void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr, uint64_t size,
|
||||
int level)
|
||||
{
|
||||
size_t page_size = vm->page_size;
|
||||
size_t page_size = PG_LEVEL_SIZE(level);
|
||||
size_t npages = size / page_size;
|
||||
|
||||
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
|
||||
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
|
||||
|
||||
while (npages--) {
|
||||
nested_pg_map(vmx, vm, nested_paddr, paddr);
|
||||
__nested_pg_map(vmx, vm, nested_paddr, paddr, level);
|
||||
nested_paddr += page_size;
|
||||
paddr += page_size;
|
||||
}
|
||||
}
|
||||
|
||||
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t nested_paddr, uint64_t paddr, uint64_t size)
|
||||
{
|
||||
__nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
|
||||
}
|
||||
|
||||
/* Prepare an identity extended page table that maps all the
|
||||
* physical pages in VM.
|
||||
*/
|
||||
@ -525,6 +555,13 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
}
|
||||
}
|
||||
|
||||
/* Identity map a region with 1GiB Pages. */
|
||||
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint64_t addr, uint64_t size)
|
||||
{
|
||||
__nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
|
||||
}
|
||||
|
||||
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
|
||||
uint32_t eptp_memslot)
|
||||
{
|
||||
|
@ -244,7 +244,7 @@ int main(int argc, char *argv[])
|
||||
#ifdef __x86_64__
|
||||
/* Identity map memory in the guest using 1gb pages. */
|
||||
for (i = 0; i < slot_size; i += size_1gb)
|
||||
__virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);
|
||||
__virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
|
||||
#else
|
||||
for (i = 0; i < slot_size; i += vm_get_page_size(vm))
|
||||
virt_pg_map(vm, gpa + i, gpa + i);
|
||||
|
@ -35,7 +35,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
/* Map 1gb page without a backing memlot. */
|
||||
__virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
|
||||
__virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G);
|
||||
|
||||
r = _vcpu_run(vm, VCPU_ID);
|
||||
|
||||
|
@ -3328,9 +3328,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->stat.generic.blocking = 1;
|
||||
|
||||
preempt_disable();
|
||||
kvm_arch_vcpu_blocking(vcpu);
|
||||
|
||||
prepare_to_rcuwait(wait);
|
||||
preempt_enable();
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
@ -3340,9 +3342,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
waited = true;
|
||||
schedule();
|
||||
}
|
||||
finish_rcuwait(wait);
|
||||
|
||||
preempt_disable();
|
||||
finish_rcuwait(wait);
|
||||
kvm_arch_vcpu_unblocking(vcpu);
|
||||
preempt_enable();
|
||||
|
||||
vcpu->stat.generic.blocking = 0;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user