* Properly reset the SVE/SME flags on vcpu load
 
 * Fix a vgic-v2 regression regarding accessing the pending
 state of a HW interrupt from userspace (and make the code
 common with vgic-v3)
 
 * Fix access to the idreg range for protected guests
 
 * Ignore 'kvm-arm.mode=protected' when using VHE
 
 * Return an error from kvm_arch_init_vm() on allocation failure
 
 * A bunch of small cleanups (comments, annotations, indentation)
 
 RISC-V:
 
 * Typo fix in arch/riscv/kvm/vmid.c
 
 * Remove broken reference pattern from MAINTAINERS entry
 
 x86-64:
 
 * Fix error in page tables with MKTME enabled
 
 * Dirty page tracking performance test extended to running a nested
   guest
 
 * Disable APICv/AVIC in cases that it cannot implement correctly
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmKjTIAUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroNhPQgAiIVtp8aepujUM/NhkNyK3SIdLzlS
 oZCZiS6bvaecKXi/QvhBU0EBxAEyrovk3lmVuYNd41xI+PDjyaA4SDIl5DnToGUw
 bVPNFSYqjpF939vUUKjc0RCdZR4o5g3Od3tvWoHTHviS1a8aAe5o9pcpHpD0D6Mp
 Gc/o58nKAOPl3htcFKmjymqo3Y6yvkJU9NB7DCbL8T5mp5pJ959Mw1/LlmBaAzJC
 OofrynUm4NjMyAj/mAB1FhHKFyQfjBXLhiVlS0SLiiEA/tn9/OXyVFMKG+n5VkAZ
 Q337GMFe2RikEIuMEr3Rc4qbZK3PpxHhaj+6MPRuM0ho/P4yzl2Nyb/OhA==
 =h81Q
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "While last week's pull request contained miscellaneous fixes for x86,
  this one covers other architectures, selftests changes, and a bigger
  series for APIC virtualization bugs that were discovered during 5.20
  development. The idea is to base 5.20 development for KVM on top of
  this tag.

  ARM64:

   - Properly reset the SVE/SME flags on vcpu load

   - Fix a vgic-v2 regression regarding accessing the pending state of a
     HW interrupt from userspace (and make the code common with vgic-v3)

   - Fix access to the idreg range for protected guests

   - Ignore 'kvm-arm.mode=protected' when using VHE

   - Return an error from kvm_arch_init_vm() on allocation failure

   - A bunch of small cleanups (comments, annotations, indentation)

  RISC-V:

   - Typo fix in arch/riscv/kvm/vmid.c

   - Remove broken reference pattern from MAINTAINERS entry

  x86-64:

   - Fix error in page tables with MKTME enabled

   - Dirty page tracking performance test extended to running a nested
     guest

   - Disable APICv/AVIC in cases that it cannot implement correctly"

[ This merge also fixes a misplaced end parenthesis bug introduced in
  commit 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC
  ID or APIC base") pointed out by Sean Christopherson ]

Link: https://lore.kernel.org/all/20220610191813.371682-1-seanjc@google.com/

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (34 commits)
  KVM: selftests: Restrict test region to 48-bit physical addresses when using nested
  KVM: selftests: Add option to run dirty_log_perf_test vCPUs in L2
  KVM: selftests: Clean up LIBKVM files in Makefile
  KVM: selftests: Link selftests directly with lib object files
  KVM: selftests: Drop unnecessary rule for STATIC_LIBS
  KVM: selftests: Add a helper to check EPT/VPID capabilities
  KVM: selftests: Move VMX_EPT_VPID_CAP_AD_BITS to vmx.h
  KVM: selftests: Refactor nested_map() to specify target level
  KVM: selftests: Drop stale function parameter comment for nested_map()
  KVM: selftests: Add option to create 2M and 1G EPT mappings
  KVM: selftests: Replace x86_page_size with PG_LEVEL_XX
  KVM: x86: SVM: fix nested PAUSE filtering when L0 intercepts PAUSE
  KVM: x86: SVM: drop preempt-safe wrappers for avic_vcpu_load/put
  KVM: x86: disable preemption around the call to kvm_arch_vcpu_{un|}blocking
  KVM: x86: disable preemption while updating apicv inhibition
  KVM: x86: SVM: fix avic_kick_target_vcpus_fast
  KVM: x86: SVM: remove avic's broken code that updated APIC ID
  KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base
  KVM: x86: document AVIC/APICv inhibit reasons
  KVM: x86/mmu: Set memory encryption "value", not "mask", in shadow PDPTRs
  ...
This commit is contained in:
Linus Torvalds 2022-06-14 07:57:18 -07:00
commit 24625f7d91
37 changed files with 647 additions and 319 deletions

View File

@ -2469,7 +2469,6 @@
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
Not valid if the kernel is running in EL2.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation

View File

@ -10872,7 +10872,6 @@ F: arch/riscv/include/asm/kvm*
F: arch/riscv/include/uapi/asm/kvm*
F: arch/riscv/kvm/
F: tools/testing/selftests/kvm/*/riscv/
F: tools/testing/selftests/kvm/riscv/
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@linux.ibm.com>

View File

@ -362,11 +362,6 @@ struct kvm_vcpu_arch {
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
/*
* Anything that is not used directly from assembly code goes
* here.
*/
/*
* Guest registers we preserve during guest debugging.
*

View File

@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void)
/*
* Code only run in VHE/NVHE hyp context can assume VHE is present or
* absent. Otherwise fall back to caps.
* This allows the compiler to discard VHE-specific code from the
* nVHE object, reducing the number of external symbol references
* needed to link.
*/
if (is_vhe_hyp_code())
return true;

View File

@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
#ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{
if (kvm_get_mode() != KVM_MODE_PROTECTED)
return false;
if (is_kernel_in_hyp_mode()) {
pr_warn("Protected KVM not available with VHE\n");
return false;
}
return true;
return kvm_get_mode() == KVM_MODE_PROTECTED;
}
#endif /* CONFIG_KVM */

View File

@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid)
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
if (WARN(!vcpu, "No vcpu context!\n"))
return false;
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
timer = vcpu_vtimer(vcpu);
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)

View File

@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
ret = -ENOMEM;
goto out_free_stage2_pgd;
}
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
kvm_vgic_early_init(kvm);
@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg)
return -EINVAL;
if (strcmp(arg, "protected") == 0) {
kvm_mode = KVM_MODE_PROTECTED;
if (!is_kernel_in_hyp_mode())
kvm_mode = KVM_MODE_PROTECTED;
else
pr_warn_once("Protected KVM not available with VHE\n");
return 0;
}

View File

@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* operations. Do this for ZA as well for now for simplicity.
*/
if (system_supports_sme()) {
vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;

View File

@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
enum kvm_pgtable_prot prot)
{
hyp_assert_lock_held(&host_kvm.lock);
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
hyp_assert_lock_held(&host_kvm.lock);
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
addr, size, &host_s2_pool, owner_id);
}

View File

@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
case SYS_ID_AA64MMFR2_EL1:
return get_pvm_id_aa64mmfr2(vcpu);
default:
/*
* Should never happen because all cases are covered in
* pvm_sys_reg_descs[].
*/
WARN_ON(1);
break;
/* Unhandled ID register, RAZ */
return 0;
}
return 0;
}
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
/* Mark the specified system register as an AArch64 feature id register. */
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
/*
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
* (1 <= crm < 8, 0 <= Op2 < 8).
*/
#define ID_UNALLOCATED(crm, op2) { \
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
.access = pvm_access_id_aarch64, \
}
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
AARCH32(SYS_MVFR0_EL1),
AARCH32(SYS_MVFR1_EL1),
AARCH32(SYS_MVFR2_EL1),
ID_UNALLOCATED(3,3),
AARCH32(SYS_ID_PFR2_EL1),
AARCH32(SYS_ID_DFR1_EL1),
AARCH32(SYS_ID_MMFR5_EL1),
ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
/* CRm=4 */
AARCH64(SYS_ID_AA64PFR0_EL1),
AARCH64(SYS_ID_AA64PFR1_EL1),
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
AARCH64(SYS_ID_AA64ZFR0_EL1),
ID_UNALLOCATED(4,5),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
AARCH64(SYS_ID_AA64DFR0_EL1),
AARCH64(SYS_ID_AA64DFR1_EL1),
ID_UNALLOCATED(5,2),
ID_UNALLOCATED(5,3),
AARCH64(SYS_ID_AA64AFR0_EL1),
AARCH64(SYS_ID_AA64AFR1_EL1),
ID_UNALLOCATED(5,6),
ID_UNALLOCATED(5,7),
AARCH64(SYS_ID_AA64ISAR0_EL1),
AARCH64(SYS_ID_AA64ISAR1_EL1),
AARCH64(SYS_ID_AA64ISAR2_EL1),
ID_UNALLOCATED(6,3),
ID_UNALLOCATED(6,4),
ID_UNALLOCATED(6,5),
ID_UNALLOCATED(6,6),
ID_UNALLOCATED(6,7),
AARCH64(SYS_ID_AA64MMFR0_EL1),
AARCH64(SYS_ID_AA64MMFR1_EL1),
AARCH64(SYS_ID_AA64MMFR2_EL1),
ID_UNALLOCATED(7,3),
ID_UNALLOCATED(7,4),
ID_UNALLOCATED(7,5),
ID_UNALLOCATED(7,6),
ID_UNALLOCATED(7,7),
/* Scalable Vector Registers are restricted. */

View File

@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
vgic_mmio_read_pending, vgic_mmio_write_spending,
NULL, vgic_uaccess_write_spending, 1,
vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
NULL, vgic_uaccess_write_cpending, 1,
vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
vgic_mmio_read_active, vgic_mmio_write_sactive,

View File

@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
return 0;
}
static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0;
int i;
/*
* pending state of interrupt is latched in pending_latch variable.
* Userspace will save and restore pending state and line_level
* separately.
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
* for handling of ISPENDR and ICPENDR.
*/
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
bool state = irq->pending_latch;
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err;
err = irq_get_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING,
&state);
WARN_ON(err);
}
if (state)
value |= (1U << i);
vgic_put_irq(vcpu->kvm, irq);
}
return value;
}
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
vgic_mmio_read_pending, vgic_mmio_write_spending,
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_spending,
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending,

View File

@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
return 0;
}
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
static unsigned long __read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0;
@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
unsigned long flags;
bool val;
/*
* When used from userspace with a GICv3 model:
*
* Pending state of interrupt is latched in pending_latch
* variable. Userspace will save and restore pending state
* and line_level separately.
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
* for handling of ISPENDR and ICPENDR.
*/
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err;
@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
IRQCHIP_STATE_PENDING,
&val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
} else if (vgic_irq_is_mapped_level(irq)) {
} else if (!is_user && vgic_irq_is_mapped_level(irq)) {
val = vgic_get_phys_line_level(irq);
} else {
val = irq_is_pending(irq);
switch (vcpu->kvm->arch.vgic.vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V3:
if (is_user) {
val = irq->pending_latch;
break;
}
fallthrough;
default:
val = irq_is_pending(irq);
break;
}
}
value |= ((u32)val << i);
@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
return __read_pending(vcpu, addr, len, false);
}
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
return __read_pending(vcpu, addr, len, true);
}
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
return (vgic_irq_is_sgi(irq->intid) &&

View File

@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);

View File

@ -66,7 +66,7 @@ static void flush_context(void)
* the next context-switch, we broadcast TLB flush + I-cache
* invalidation over the inner shareable domain on rollover.
*/
kvm_call_hyp(__kvm_flush_vm_context);
kvm_call_hyp(__kvm_flush_vm_context);
}
static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)

View File

@ -97,7 +97,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
* We ran out of VMIDs so we increment vmid_version and
* start assigning VMIDs from 1.
*
* This also means existing VMIDs assignement to all Guest
* This also means existing VMIDs assignment to all Guest
* instances is invalid and we have force VMID re-assignement
* for all Guest instances. The Guest instances that were not
* running will automatically pick-up new VMIDs because will

View File

@ -1047,14 +1047,77 @@ struct kvm_x86_msr_filter {
};
enum kvm_apicv_inhibit {
/********************************************************************/
/* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
/********************************************************************/
/*
* APIC acceleration is disabled by a module parameter
* and/or not supported in hardware.
*/
APICV_INHIBIT_REASON_DISABLE,
/*
* APIC acceleration is inhibited because AutoEOI feature is
* being used by a HyperV guest.
*/
APICV_INHIBIT_REASON_HYPERV,
APICV_INHIBIT_REASON_NESTED,
APICV_INHIBIT_REASON_IRQWIN,
APICV_INHIBIT_REASON_PIT_REINJ,
APICV_INHIBIT_REASON_X2APIC,
APICV_INHIBIT_REASON_BLOCKIRQ,
/*
* APIC acceleration is inhibited because the userspace didn't yet
* enable the kernel/split irqchip.
*/
APICV_INHIBIT_REASON_ABSENT,
/* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
* (out of band, debug measure of blocking all interrupts on this vCPU)
* was enabled, to avoid AVIC/APICv bypassing it.
*/
APICV_INHIBIT_REASON_BLOCKIRQ,
/*
* For simplicity, the APIC acceleration is inhibited
* first time either APIC ID or APIC base are changed by the guest
* from their reset values.
*/
APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
/******************************************************/
/* INHIBITs that are relevant only to the AMD's AVIC. */
/******************************************************/
/*
* AVIC is inhibited on a vCPU because it runs a nested guest.
*
* This is needed because unlike APICv, the peers of this vCPU
* cannot use the doorbell mechanism to signal interrupts via AVIC when
* a vCPU runs nested.
*/
APICV_INHIBIT_REASON_NESTED,
/*
* On SVM, the wait for the IRQ window is implemented with pending vIRQ,
* which cannot be injected when the AVIC is enabled, thus AVIC
* is inhibited while KVM waits for IRQ window.
*/
APICV_INHIBIT_REASON_IRQWIN,
/*
* PIT (i8254) 're-inject' mode, relies on EOI intercept,
* which AVIC doesn't support for edge triggered interrupts.
*/
APICV_INHIBIT_REASON_PIT_REINJ,
/*
* AVIC is inhibited because the guest has x2apic in its CPUID.
*/
APICV_INHIBIT_REASON_X2APIC,
/*
* AVIC is disabled because SEV doesn't support it.
*/
APICV_INHIBIT_REASON_SEV,
};

View File

@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
}
}
static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
{
struct kvm *kvm = apic->vcpu->kvm;
if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
return;
if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
return;
kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
}
static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{
int ret = 0;
@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
switch (reg) {
case APIC_ID: /* Local APIC ID */
if (!apic_x2apic_mode(apic))
if (!apic_x2apic_mode(apic)) {
kvm_apic_set_xapic_id(apic, val >> 24);
else
kvm_lapic_xapic_id_updated(apic);
} else {
ret = 1;
}
break;
case APIC_TASKPRI:
@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
MSR_IA32_APICBASE_BASE;
if ((value & MSR_IA32_APICBASE_ENABLE) &&
apic->base_address != APIC_DEFAULT_PHYS_BASE)
pr_warn_once("APIC base relocation is unsupported by KVM");
apic->base_address != APIC_DEFAULT_PHYS_BASE) {
kvm_set_apicv_inhibit(apic->vcpu->kvm,
APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
}
}
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
}
} else {
kvm_lapic_xapic_id_updated(vcpu->arch.apic);
}
return 0;

View File

@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, true);
mmu->pae_root[i] = root | PT_PRESENT_MASK |
shadow_me_mask;
shadow_me_value;
}
mmu->root.hpa = __pa(mmu->pae_root);
} else {

View File

@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh, u32 index)
{
u32 dest, apic_id;
struct kvm_vcpu *vcpu;
u32 l1_physical_id, dest;
struct kvm_vcpu *target_vcpu;
int dest_mode = icrl & APIC_DEST_MASK;
int shorthand = icrl & APIC_SHORT_MASK;
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
if (shorthand != APIC_DEST_NOSHORT)
return -EINVAL;
/*
* The AVIC incomplete IPI #vmexit info provides index into
* the physical APIC ID table, which can be used to derive
* guest physical APIC ID.
*/
if (dest_mode == APIC_DEST_PHYSICAL) {
apic_id = index;
} else {
if (!apic_x2apic_mode(source)) {
/* For xAPIC logical mode, the index is for logical APIC table. */
apic_id = avic_logical_id_table[index] & 0x1ff;
} else {
return -EINVAL;
}
}
/*
* Assuming vcpu ID is the same as physical apic ID,
* and use it to retrieve the target vCPU.
*/
vcpu = kvm_get_vcpu_by_id(kvm, apic_id);
if (!vcpu)
return -EINVAL;
if (apic_x2apic_mode(vcpu->arch.apic))
if (apic_x2apic_mode(source))
dest = icrh;
else
dest = GET_APIC_DEST_FIELD(icrh);
/*
* Try matching the destination APIC ID with the vCPU.
*/
if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) {
vcpu->arch.apic->irr_pending = true;
svm_complete_interrupt_delivery(vcpu,
icrl & APIC_MODE_MASK,
icrl & APIC_INT_LEVELTRIG,
icrl & APIC_VECTOR_MASK);
return 0;
if (dest_mode == APIC_DEST_PHYSICAL) {
/* broadcast destination, use slow path */
if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
return -EINVAL;
if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
return -EINVAL;
l1_physical_id = dest;
if (WARN_ON_ONCE(l1_physical_id != index))
return -EINVAL;
} else {
u32 bitmap, cluster;
int logid_index;
if (apic_x2apic_mode(source)) {
/* 16 bit dest mask, 16 bit cluster id */
bitmap = dest & 0xFFFF0000;
cluster = (dest >> 16) << 4;
} else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
/* 8 bit dest mask*/
bitmap = dest;
cluster = 0;
} else {
/* 4 bit desk mask, 4 bit cluster id */
bitmap = dest & 0xF;
cluster = (dest >> 4) << 2;
}
if (unlikely(!bitmap))
/* guest bug: nobody to send the logical interrupt to */
return 0;
if (!is_power_of_2(bitmap))
/* multiple logical destinations, use slow path */
return -EINVAL;
logid_index = cluster + __ffs(bitmap);
if (apic_x2apic_mode(source)) {
l1_physical_id = logid_index;
} else {
u32 *avic_logical_id_table =
page_address(kvm_svm->avic_logical_id_table_page);
u32 logid_entry = avic_logical_id_table[logid_index];
if (WARN_ON_ONCE(index != logid_index))
return -EINVAL;
/* guest bug: non existing/reserved logical destination */
if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
return 0;
l1_physical_id = logid_entry &
AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
}
}
return -EINVAL;
target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
if (unlikely(!target_vcpu))
/* guest bug: non existing vCPU is a target of this IPI*/
return 0;
target_vcpu->arch.apic->irr_pending = true;
svm_complete_interrupt_delivery(target_vcpu,
icrl & APIC_MODE_MASK,
icrl & APIC_INT_LEVELTRIG,
icrl & APIC_VECTOR_MASK);
return 0;
}
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
return ret;
}
static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
{
u64 *old, *new;
struct vcpu_svm *svm = to_svm(vcpu);
u32 id = kvm_xapic_id(vcpu->arch.apic);
if (vcpu->vcpu_id == id)
return 0;
old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
new = avic_get_physical_id_entry(vcpu, id);
if (!new || !old)
return 1;
/* We need to move physical_id_entry to new offset */
*new = *old;
*old = 0ULL;
to_svm(vcpu)->avic_physical_id_cache = new;
/*
* Also update the guest physical APIC ID in the logical
* APIC ID table entry if already setup the LDR.
*/
if (svm->ldr_reg)
avic_handle_ldr_update(vcpu);
return 0;
}
static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
switch (offset) {
case APIC_ID:
if (avic_handle_apic_id_update(vcpu))
return 0;
break;
case APIC_LDR:
if (avic_handle_ldr_update(vcpu))
return 0;
@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm)
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
{
if (avic_handle_apic_id_update(vcpu) != 0)
return;
avic_handle_dfr_update(vcpu);
avic_handle_ldr_update(vcpu);
}
@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
BIT(APICV_INHIBIT_REASON_X2APIC) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
BIT(APICV_INHIBIT_REASON_SEV);
BIT(APICV_INHIBIT_REASON_SEV) |
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
@ -946,7 +946,7 @@ out:
return ret;
}
void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
int h_physical_id = kvm_cpu_get_apicid(cpu);
@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
void __avic_vcpu_put(struct kvm_vcpu *vcpu)
void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
static void avic_vcpu_load(struct kvm_vcpu *vcpu)
{
int cpu = get_cpu();
WARN_ON(cpu != vcpu->cpu);
__avic_vcpu_load(vcpu, cpu);
put_cpu();
}
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
preempt_disable();
__avic_vcpu_put(vcpu);
preempt_enable();
}
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
vmcb_mark_dirty(vmcb, VMCB_AVIC);
if (activated)
avic_vcpu_load(vcpu);
avic_vcpu_load(vcpu, vcpu->cpu);
else
avic_vcpu_put(vcpu);
@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_apicv_active(vcpu))
return;
avic_vcpu_load(vcpu);
avic_vcpu_load(vcpu, vcpu->cpu);
}

View File

@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
u32 pause_count12;
u32 pause_thresh12;
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
/* use guest values since host doesn't use them */
vmcb02->control.pause_filter_count =
svm->pause_filter_enabled ?
svm->nested.ctl.pause_filter_count : 0;
/* use guest values since host doesn't intercept PAUSE */
vmcb02->control.pause_filter_count = pause_count12;
vmcb02->control.pause_filter_thresh = pause_thresh12;
vmcb02->control.pause_filter_thresh =
svm->pause_threshold_enabled ?
svm->nested.ctl.pause_filter_thresh : 0;
} else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
/* use host values when guest doesn't use them */
} else {
/* start from host values otherwise */
vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
} else {
/*
* Intercept every PAUSE otherwise and
* ignore both host and guest values
*/
vmcb02->control.pause_filter_count = 0;
vmcb02->control.pause_filter_thresh = 0;
/* ... but ensure filtering is disabled if so requested. */
if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
if (!pause_count12)
vmcb02->control.pause_filter_count = 0;
if (!pause_thresh12)
vmcb02->control.pause_filter_thresh = 0;
}
}
nested_svm_transition_tlb_flush(vcpu);
@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
if (!kvm_pause_in_guest(vcpu->kvm)) {
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
}
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);

View File

@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
if (kvm_pause_in_guest(vcpu->kvm) || !old)
if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count = __grow_ple_window(old,
@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
if (kvm_pause_in_guest(vcpu->kvm) || !old)
if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count =
@ -1400,13 +1400,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
indirect_branch_prediction_barrier();
}
if (kvm_vcpu_apicv_active(vcpu))
__avic_vcpu_load(vcpu, cpu);
avic_vcpu_load(vcpu, cpu);
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_apicv_active(vcpu))
__avic_vcpu_put(vcpu);
avic_vcpu_put(vcpu);
svm_prepare_host_switch(vcpu);

View File

@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void __avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);

View File

@ -7779,7 +7779,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}

View File

@ -9853,6 +9853,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
return;
down_read(&vcpu->kvm->arch.apicv_update_lock);
preempt_disable();
activate = kvm_vcpu_apicv_activated(vcpu);
@ -9873,6 +9874,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
out:
preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);

View File

@ -37,11 +37,38 @@ ifeq ($(ARCH),riscv)
UNAME_M := riscv
endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
LIBKVM += lib/assert.c
LIBKVM += lib/elf.c
LIBKVM += lib/guest_modes.c
LIBKVM += lib/io.c
LIBKVM += lib/kvm_util.c
LIBKVM += lib/perf_test_util.c
LIBKVM += lib/rbtree.c
LIBKVM += lib/sparsebit.c
LIBKVM += lib/test_util.c
LIBKVM_x86_64 += lib/x86_64/apic.c
LIBKVM_x86_64 += lib/x86_64/handlers.S
LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
LIBKVM_x86_64 += lib/x86_64/processor.c
LIBKVM_x86_64 += lib/x86_64/svm.c
LIBKVM_x86_64 += lib/x86_64/ucall.c
LIBKVM_x86_64 += lib/x86_64/vmx.c
LIBKVM_aarch64 += lib/aarch64/gic.c
LIBKVM_aarch64 += lib/aarch64/gic_v3.c
LIBKVM_aarch64 += lib/aarch64/handlers.S
LIBKVM_aarch64 += lib/aarch64/processor.c
LIBKVM_aarch64 += lib/aarch64/spinlock.c
LIBKVM_aarch64 += lib/aarch64/ucall.c
LIBKVM_aarch64 += lib/aarch64/vgic.c
LIBKVM_s390x += lib/s390x/diag318_test_handler.c
LIBKVM_s390x += lib/s390x/processor.c
LIBKVM_s390x += lib/s390x/ucall.c
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
@ -173,12 +200,13 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
include ../lib.mk
STATIC_LIBS := $(OUTPUT)/libkvm.a
LIBKVM_C := $(filter %.c,$(LIBKVM))
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
@ -187,13 +215,8 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
$(AR) crs $@ $^
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
all: $(STATIC_LIBS)
$(TEST_GEN_PROGS): $(STATIC_LIBS)
$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
cscope:

View File

@ -336,8 +336,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-i iterations] [-p offset] [-g]"
"[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
"[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
"[-x memslots]\n", name);
puts("");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
@ -351,6 +351,7 @@ static void help(char *name)
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
guest_modes_help();
printf(" -n: Run the vCPUs in nested mode (L2)\n");
printf(" -b: specify the size of the memory region which should be\n"
" dirtied by each vCPU. e.g. 10M or 3G.\n"
" (default: 1G)\n");
@ -387,7 +388,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) {
while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) {
switch (opt) {
case 'g':
dirty_log_manual_caps = 0;
@ -401,6 +402,9 @@ int main(int argc, char *argv[])
case 'm':
guest_modes_cmdline(optarg);
break;
case 'n':
perf_test_args.nested = true;
break;
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;

View File

@ -30,10 +30,15 @@ struct perf_test_vcpu_args {
struct perf_test_args {
struct kvm_vm *vm;
/* The starting address and size of the guest test region. */
uint64_t gpa;
uint64_t size;
uint64_t guest_page_size;
int wr_fract;
/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
bool nested;
struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
};
@ -49,5 +54,9 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
void perf_test_join_vcpu_threads(int vcpus);
void perf_test_guest_code(uint32_t vcpu_id);
uint64_t perf_test_nested_pages(int nr_vcpus);
void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */

View File

@ -482,13 +482,23 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
void vm_xsave_req_perm(int bit);
enum x86_page_size {
X86_PAGE_SIZE_4K = 0,
X86_PAGE_SIZE_2M,
X86_PAGE_SIZE_1G,
enum pg_level {
PG_LEVEL_NONE,
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
PG_LEVEL_NUM
};
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
enum x86_page_size page_size);
#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
/*
* Basic CPU control in CR0
@ -505,9 +515,6 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
#define XSTATE_XTILE_CFG_BIT 17
#define XSTATE_XTILE_DATA_BIT 18

View File

@ -96,6 +96,9 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
@ -606,6 +609,7 @@ bool load_vmcs(struct vmx_pages *vmx);
bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
bool ept_1g_pages_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr);
@ -613,6 +617,8 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot);
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t addr, uint64_t size);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);

View File

@ -40,7 +40,7 @@ static bool all_vcpu_threads_running;
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
static void guest_code(uint32_t vcpu_id)
void perf_test_guest_code(uint32_t vcpu_id)
{
struct perf_test_args *pta = &perf_test_args;
struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
@ -108,8 +108,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
{
struct perf_test_args *pta = &perf_test_args;
struct kvm_vm *vm;
uint64_t guest_num_pages;
uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
uint64_t region_end_gfn;
int i;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
@ -134,34 +135,54 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
"Guest memory cannot be evenly divided into %d slots.",
slots);
/*
* If using nested, allocate extra pages for the nested page tables and
* in-memory data structures.
*/
if (pta->nested)
slot0_pages += perf_test_nested_pages(vcpus);
/*
* Pass guest_num_pages to populate the page tables for test memory.
* The memory is also added to memslot 0, but that's a benign side
* effect as KVM allows aliasing HVAs in meslots.
*/
vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
guest_num_pages, 0, guest_code, NULL);
vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,
perf_test_guest_code, NULL);
pta->vm = vm;
/* Put the test region at the top guest physical memory. */
region_end_gfn = vm_get_max_gfn(vm) + 1;
#ifdef __x86_64__
/*
* When running vCPUs in L2, restrict the test region to 48 bits to
* avoid needing 5-level page tables to identity map L2.
*/
if (pta->nested)
region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
#endif
/*
* If there should be more memory in the guest test region than there
* can be pages in the guest, it will definitely cause problems.
*/
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
TEST_ASSERT(guest_num_pages < region_end_gfn,
"Requested more guest memory than address space allows.\n"
" guest pages: %" PRIx64 " max gfn: %" PRIx64
" vcpus: %d wss: %" PRIx64 "]\n",
guest_num_pages, vm_get_max_gfn(vm), vcpus,
guest_num_pages, region_end_gfn - 1, vcpus,
vcpu_memory_bytes);
pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
pta->gpa = align_down(pta->gpa, backing_src_pagesz);
#ifdef __s390x__
/* Align to 1M (segment size) */
pta->gpa = align_down(pta->gpa, 1 << 20);
#endif
pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
pta->size = guest_num_pages * pta->guest_page_size;
pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
pta->gpa, pta->gpa + pta->size);
/* Add extra memory slots for testing */
for (i = 0; i < slots; i++) {
@ -178,6 +199,11 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
if (pta->nested) {
pr_info("Configuring vCPUs to run in L2 (nested).\n");
perf_test_setup_nested(vm, vcpus);
}
ucall_init(vm, NULL);
/* Export the shared variables to the guest. */
@ -198,6 +224,17 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
sync_global_to_guest(vm, perf_test_args);
}
uint64_t __weak perf_test_nested_pages(int nr_vcpus)
{
return 0;
}
void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
{
pr_info("%s() not support on this architecture, skipping.\n", __func__);
exit(KSFT_SKIP);
}
static void *vcpu_thread_main(void *data)
{
struct vcpu_thread *vcpu = data;

View File

@ -0,0 +1,112 @@
// SPDX-License-Identifier: GPL-2.0
/*
* x86_64-specific extensions to perf_test_util.c.
*
* Copyright (C) 2022, Google, Inc.
*/
#include <stdio.h>
#include <stdlib.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include "test_util.h"
#include "kvm_util.h"
#include "perf_test_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
void perf_test_l2_guest_code(uint64_t vcpu_id)
{
perf_test_guest_code(vcpu_id);
vmcall();
}
extern char perf_test_l2_guest_entry[];
__asm__(
"perf_test_l2_guest_entry:"
" mov (%rsp), %rdi;"
" call perf_test_l2_guest_code;"
" ud2;"
);
static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
unsigned long *rsp;
GUEST_ASSERT(vmx->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
GUEST_ASSERT(load_vmcs(vmx));
GUEST_ASSERT(ept_1g_pages_supported());
rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
*rsp = vcpu_id;
prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp);
GUEST_ASSERT(!vmlaunch());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
GUEST_DONE();
}
uint64_t perf_test_nested_pages(int nr_vcpus)
{
/*
* 513 page tables is enough to identity-map 256 TiB of L2 with 1G
* pages and 4-level paging, plus a few pages per-vCPU for data
* structures such as the VMCS.
*/
return 513 + 10 * nr_vcpus;
}
void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
{
uint64_t start, end;
prepare_eptp(vmx, vm, 0);
/*
* Identity map the first 4G and the test region with 1G pages so that
* KVM can shadow the EPT12 with the maximum huge page size supported
* by the backing source.
*/
nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
start = align_down(perf_test_args.gpa, PG_SIZE_1G);
end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G);
nested_identity_map_1g(vmx, vm, start, end - start);
}
void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
{
struct vmx_pages *vmx, *vmx0 = NULL;
struct kvm_regs regs;
vm_vaddr_t vmx_gva;
int vcpu_id;
nested_vmx_check_supported();
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
vmx = vcpu_alloc_vmx(vm, &vmx_gva);
if (vcpu_id == 0) {
perf_test_setup_ept(vmx, vm);
vmx0 = vmx;
} else {
/* Share the same EPT table across all vCPUs. */
vmx->eptp = vmx0->eptp;
vmx->eptp_hva = vmx0->eptp_hva;
vmx->eptp_gpa = vmx0->eptp_gpa;
}
/*
* Override the vCPU to run perf_test_l1_guest_code() which will
* bounce it into L2 before calling perf_test_guest_code().
*/
vcpu_regs_get(vm, vcpu_id, &regs);
regs.rip = (unsigned long) perf_test_l1_guest_code;
vcpu_regs_set(vm, vcpu_id, &regs);
vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id);
}
}

View File

@ -158,7 +158,7 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
int level)
{
uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
return &page_table[index];
}
@ -167,14 +167,14 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
uint64_t pt_pfn,
uint64_t vaddr,
uint64_t paddr,
int level,
enum x86_page_size page_size)
int current_level,
int target_level)
{
uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
if (!(*pte & PTE_PRESENT_MASK)) {
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
if (level == page_size)
if (current_level == target_level)
*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
else
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
@ -184,20 +184,19 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
* a hugepage at this level, and that there isn't a hugepage at
* this level.
*/
TEST_ASSERT(level != page_size,
TEST_ASSERT(current_level != target_level,
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
page_size, vaddr);
current_level, vaddr);
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
level, vaddr);
current_level, vaddr);
}
return pte;
}
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
enum x86_page_size page_size)
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
{
const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
const uint64_t pg_size = PG_LEVEL_SIZE(level);
uint64_t *pml4e, *pdpe, *pde;
uint64_t *pte;
@ -222,20 +221,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
* early if a hugepage was created.
*/
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
vaddr, paddr, 3, page_size);
vaddr, paddr, PG_LEVEL_512G, level);
if (*pml4e & PTE_LARGE_MASK)
return;
pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
if (*pdpe & PTE_LARGE_MASK)
return;
pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
if (*pde & PTE_LARGE_MASK)
return;
/* Fill in page table entry. */
pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
@ -243,7 +242,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,

View File

@ -198,6 +198,16 @@ bool load_vmcs(struct vmx_pages *vmx)
return true;
}
static bool ept_vpid_cap_supported(uint64_t mask)
{
return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
}
bool ept_1g_pages_supported(void)
{
return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
}
/*
* Initialize the control fields to the most basic settings possible.
*/
@ -215,7 +225,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
struct eptPageTablePointer eptp = {
.memory_type = VMX_BASIC_MEM_TYPE_WB,
.page_walk_length = 3, /* + 1 */
.ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
.ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
@ -392,80 +402,93 @@ void nested_vmx_check_supported(void)
}
}
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr)
static void nested_create_pte(struct kvm_vm *vm,
struct eptPageTableEntry *pte,
uint64_t nested_paddr,
uint64_t paddr,
int current_level,
int target_level)
{
uint16_t index[4];
struct eptPageTableEntry *pml4e;
if (!pte->readable) {
pte->writable = true;
pte->readable = true;
pte->executable = true;
pte->page_size = (current_level == target_level);
if (pte->page_size)
pte->address = paddr >> vm->page_shift;
else
pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
} else {
/*
* Entry already present. Assert that the caller doesn't want
* a hugepage at this level, and that there isn't a hugepage at
* this level.
*/
TEST_ASSERT(current_level != target_level,
"Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
current_level, nested_paddr);
TEST_ASSERT(!pte->page_size,
"Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
current_level, nested_paddr);
}
}
void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, int target_level)
{
const uint64_t page_size = PG_LEVEL_SIZE(target_level);
struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
uint16_t index;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
TEST_ASSERT((nested_paddr % vm->page_size) == 0,
TEST_ASSERT((nested_paddr >> 48) == 0,
"Nested physical address 0x%lx requires 5-level paging",
nested_paddr);
TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
" nested_paddr: 0x%lx vm->page_size: 0x%x",
nested_paddr, vm->page_size);
" nested_paddr: 0x%lx page_size: 0x%lx",
nested_paddr, page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
TEST_ASSERT((paddr % vm->page_size) == 0,
TEST_ASSERT((paddr % page_size) == 0,
"Physical address not on page boundary,\n"
" paddr: 0x%lx vm->page_size: 0x%x",
paddr, vm->page_size);
" paddr: 0x%lx page_size: 0x%lx",
paddr, page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
index[0] = (nested_paddr >> 12) & 0x1ffu;
index[1] = (nested_paddr >> 21) & 0x1ffu;
index[2] = (nested_paddr >> 30) & 0x1ffu;
index[3] = (nested_paddr >> 39) & 0x1ffu;
for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
pte = &pt[index];
/* Allocate page directory pointer table if not present. */
pml4e = vmx->eptp_hva;
if (!pml4e[index[3]].readable) {
pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pml4e[index[3]].writable = true;
pml4e[index[3]].readable = true;
pml4e[index[3]].executable = true;
nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
if (pte->page_size)
break;
pt = addr_gpa2hva(vm, pte->address * vm->page_size);
}
/* Allocate page directory table if not present. */
struct eptPageTableEntry *pdpe;
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
if (!pdpe[index[2]].readable) {
pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pdpe[index[2]].writable = true;
pdpe[index[2]].readable = true;
pdpe[index[2]].executable = true;
}
/* Allocate page table if not present. */
struct eptPageTableEntry *pde;
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
if (!pde[index[1]].readable) {
pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pde[index[1]].writable = true;
pde[index[1]].readable = true;
pde[index[1]].executable = true;
}
/* Fill in page table entry. */
struct eptPageTableEntry *pte;
pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
pte[index[0]].address = paddr >> vm->page_shift;
pte[index[0]].writable = true;
pte[index[0]].readable = true;
pte[index[0]].executable = true;
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
pte[index[0]].accessed = true;
pte[index[0]].dirty = true;
pte->accessed = true;
pte->dirty = true;
}
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr)
{
__nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
}
/*
@ -476,7 +499,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
* eptp_memslot - Memory region slot for new virtual translation tables
* level - The level at which to map the range
*
* Output Args: None
*
@ -485,22 +508,29 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size)
void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size,
int level)
{
size_t page_size = vm->page_size;
size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
nested_pg_map(vmx, vm, nested_paddr, paddr);
__nested_pg_map(vmx, vm, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
}
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size)
{
__nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
}
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
@ -525,6 +555,13 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
}
}
/* Identity map a region with 1GiB Pages. */
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t addr, uint64_t size)
{
__nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
}
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{

View File

@ -244,7 +244,7 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
for (i = 0; i < slot_size; i += size_1gb)
__virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);
__virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
#else
for (i = 0; i < slot_size; i += vm_get_page_size(vm))
virt_pg_map(vm, gpa + i, gpa + i);

View File

@ -35,7 +35,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
run = vcpu_state(vm, VCPU_ID);
/* Map 1gb page without a backing memlot. */
__virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
__virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G);
r = _vcpu_run(vm, VCPU_ID);

View File

@ -3328,9 +3328,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
vcpu->stat.generic.blocking = 1;
preempt_disable();
kvm_arch_vcpu_blocking(vcpu);
prepare_to_rcuwait(wait);
preempt_enable();
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@ -3340,9 +3342,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
waited = true;
schedule();
}
finish_rcuwait(wait);
preempt_disable();
finish_rcuwait(wait);
kvm_arch_vcpu_unblocking(vcpu);
preempt_enable();
vcpu->stat.generic.blocking = 0;