* Fix constant sign extension affecting TCR_EL2 and preventing
 running on ARMv8.7 models due to spurious bits being set
 
 * Fix use of helpers using PSTATE early on exit by always sampling
 it as soon as the exit takes place
 
 * Move pkvm's 32bit handling into a common helper
 
 RISC-V:
 
 * Fix incorrect KVM_MAX_VCPUS value
 
 * Unmap stage2 mapping when deleting/moving a memslot
 
 x86:
 
 * Fix and downgrade BUG_ON due to uninitialized cache
 
 * Many APICv and MOVE_ENC_CONTEXT_FROM fixes
 
 * Correctly emulate TLB flushes around nested vmentry/vmexit
 and when the nested hypervisor uses VPID
 
 * Prevent modifications to CPUID after the VM has run
 
 * Other smaller bugfixes
 
 Generic:
 
 * Memslot handling bugfixes
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmGmHBEUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOkGgf/RBjt1d7H6Um7tD7oA5QiIHmNY4ko
 K/90OAa8h62rilxpqxkRgLNmphBc5AzcbufVXN4J1hVhw2M+u1ouDxKeHS1GEZTA
 /XdNb0dwK99TpOJkIcuV/NQVIZUxkM00VbIiCoLkX06VuIc1Gie1G4bqzLhWCP8Y
 ts9l/pkfafvfEmjmcjVd7gkDOnEPbT+JPDJcuo/RA7C7Z2L4+8DsFeyfWGqBP647
 J6omUUxD82QRm28OVOK4V7aNALWsAdlaqHrVFAPZywQl7QTWMO0UTcKTdCCB2B4Q
 QnHejFV6pFh55q3/fhe7epy9e2Sw+NOsmWKTEGPbU5nn94R8lyW1GV4ZUQ==
 =Nduu
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM64:

   - Fix constant sign extension affecting TCR_EL2 and preventing
     running on ARMv8.7 models due to spurious bits being set

   - Fix use of helpers using PSTATE early on exit by always sampling it
     as soon as the exit takes place

   - Move pkvm's 32bit handling into a common helper

  RISC-V:

   - Fix incorrect KVM_MAX_VCPUS value

   - Unmap stage2 mapping when deleting/moving a memslot

  x86:

   - Fix and downgrade BUG_ON due to uninitialized cache

   - Many APICv and MOVE_ENC_CONTEXT_FROM fixes

   - Correctly emulate TLB flushes around nested vmentry/vmexit and when
     the nested hypervisor uses VPID

   - Prevent modifications to CPUID after the VM has run

   - Other smaller bugfixes

  Generic:

   - Memslot handling bugfixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (44 commits)
  KVM: fix avic_set_running for preemptable kernels
  KVM: VMX: clear vmx_x86_ops.sync_pir_to_irr if APICv is disabled
  KVM: SEV: accept signals in sev_lock_two_vms
  KVM: SEV: do not take kvm->lock when destroying
  KVM: SEV: Prohibit migration of a VM that has mirrors
  KVM: SEV: Do COPY_ENC_CONTEXT_FROM with both VMs locked
  selftests: sev_migrate_tests: add tests for KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
  KVM: SEV: move mirror status to destination of KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM
  KVM: SEV: initialize regions_list of a mirror VM
  KVM: SEV: cleanup locking for KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM
  KVM: SEV: do not use list_replace_init on an empty list
  KVM: x86: Use a stable condition around all VT-d PI paths
  KVM: x86: check PIR even for vCPUs with disabled APICv
  KVM: VMX: prepare sync_pir_to_irr for running with APICv disabled
  KVM: selftests: page_table_test: fix calculation of guest_test_phys_mem
  KVM: x86/mmu: Handle "default" period when selectively waking kthread
  KVM: MMU: shadow nested paging does not have PKU
  KVM: x86/mmu: Remove spurious TLB flushes in TDP MMU zap collapsible path
  KVM: x86/mmu: Use yield-safe TDP MMU root iter in MMU notifier unmapping
  KVM: X86: Use vcpu->arch.walk_mmu for kvm_mmu_invlpg()
  ...
This commit is contained in:
Linus Torvalds 2021-11-30 09:22:15 -08:00
commit f080815fdb
27 changed files with 623 additions and 352 deletions

View File

@ -91,7 +91,7 @@
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
@ -276,7 +276,7 @@
#define CPTR_EL2_TFP_SHIFT 10
/* Hyp Coprocessor Trap Register */
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TCPAC (1U << 31)
#define CPTR_EL2_TAM (1 << 30)
#define CPTR_EL2_TTA (1 << 20)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)

View File

@ -403,6 +403,8 @@ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
@ -429,6 +431,18 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
*/
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
/*
* Save PSTATE early so that we can evaluate the vcpu mode
* early on.
*/
vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
/*
* Check whether we want to repaint the state one way or
* another.
*/
early_exit_filter(vcpu, exit_code);
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);

View File

@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
/*
* Guest PSTATE gets saved at guest fixup time in all
* cases. We still need to handle the nVHE host side here.
*/
if (!has_vhe() && ctxt->__hyp_running_vcpu)
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);

View File

@ -233,7 +233,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
* Returns false if the guest ran in AArch32 when it shouldn't have, and
* thus should exit to the host, or true if a the guest run loop can continue.
*/
static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
@ -248,10 +248,7 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
vcpu->arch.target = -1;
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
return false;
}
return true;
}
/* Switch to the guest for legacy non-VHE systems */
@ -316,9 +313,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
break;
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));

View File

@ -112,6 +112,10 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
return hyp_exit_handlers;
}
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
}
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{

View File

@ -12,14 +12,12 @@
#include <linux/types.h>
#include <linux/kvm.h>
#include <linux/kvm_types.h>
#include <asm/csr.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_timer.h>
#ifdef CONFIG_64BIT
#define KVM_MAX_VCPUS (1U << 16)
#else
#define KVM_MAX_VCPUS (1U << 9)
#endif
#define KVM_MAX_VCPUS \
((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
#define KVM_HALT_POLL_NS_DEFAULT 500000

View File

@ -453,6 +453,12 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
stage2_unmap_range(kvm, gpa, size, false);
spin_unlock(&kvm->mmu_lock);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,

View File

@ -81,7 +81,6 @@ struct kvm_ioapic {
unsigned long irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
struct rtc_status rtc_status;
struct delayed_work eoi_inject;

View File

@ -56,7 +56,6 @@ struct kvm_pic {
struct kvm_io_device dev_master;
struct kvm_io_device dev_slave;
struct kvm_io_device dev_elcr;
void (*ack_notifier)(void *opaque, int irq);
unsigned long irq_states[PIC_NUM_PINS];
};

View File

@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
if (apic->vcpu->arch.apicv_active)
if (kvm_x86_ops.sync_pir_to_irr)
highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);

View File

@ -1582,7 +1582,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
if (is_tdp_mmu_enabled(kvm))
flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
return flush;
}
@ -2173,10 +2173,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
iterator->shadow_addr = root;
iterator->level = vcpu->arch.mmu->shadow_root_level;
if (iterator->level == PT64_ROOT_4LEVEL &&
if (iterator->level >= PT64_ROOT_4LEVEL &&
vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
!vcpu->arch.mmu->direct_map)
--iterator->level;
iterator->level = PT32E_ROOT_LEVEL;
if (iterator->level == PT32E_ROOT_LEVEL) {
/*
@ -4855,7 +4855,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
struct kvm_mmu *context = &vcpu->arch.guest_mmu;
struct kvm_mmu_role_regs regs = {
.cr0 = cr0,
.cr4 = cr4,
.cr4 = cr4 & ~X86_CR4_PKE,
.efer = efer,
};
union kvm_mmu_role new_role;
@ -4919,7 +4919,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->direct_map = false;
update_permission_bitmask(context, true);
update_pkru_bitmask(context);
context->pkru_mask = 0;
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
@ -5025,6 +5025,14 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
/*
* Invalidate all MMU roles to force them to reinitialize as CPUID
* information is factored into reserved bit calculations.
*
* Correctly handling multiple vCPU models with respect to paging and
* physical address properties) in a single VM would require tracking
* all relevant CPUID information in kvm_mmu_page_role. That is very
* undesirable as it would increase the memory requirements for
* gfn_track (see struct kvm_mmu_page_role comments). For now that
* problem is swept under the rug; KVM's CPUID API is horrific and
* it's all but impossible to solve it without introducing a new API.
*/
vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
@ -5032,24 +5040,10 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
/*
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
* MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
* tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
* faults due to reusing SPs/SPTEs. Alert userspace, but otherwise
* sweep the problem under the rug.
*
* KVM's horrific CPUID ABI makes the problem all but impossible to
* solve, as correctly handling multiple vCPU models (with respect to
* paging and physical address properties) in a single VM would require
* tracking all relevant CPUID information in kvm_mmu_page_role. That
* is very undesirable as it would double the memory requirements for
* gfn_track (see struct kvm_mmu_page_role comments), and in practice
* no sane VMM mucks with the core vCPU model on the fly.
* Changing guest CPUID after KVM_RUN is forbidden, see the comment in
* kvm_arch_vcpu_ioctl().
*/
if (vcpu->arch.last_vmentry_cpu != -1) {
pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
}
KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@ -5369,7 +5363,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@ -5854,8 +5848,6 @@ restart:
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot)
{
bool flush = false;
if (kvm_memslots_have_rmaps(kvm)) {
write_lock(&kvm->mmu_lock);
/*
@ -5863,17 +5855,14 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
* logging at a 4k granularity and never creates collapsible
* 2m SPTEs during dirty logging.
*/
flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
if (flush)
if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
write_unlock(&kvm->mmu_lock);
}
if (is_tdp_mmu_enabled(kvm)) {
read_lock(&kvm->mmu_lock);
flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
read_unlock(&kvm->mmu_lock);
}
}
@ -6182,23 +6171,46 @@ void kvm_mmu_module_exit(void)
mmu_audit_disable();
}
/*
* Calculate the effective recovery period, accounting for '0' meaning "let KVM
* select a halving time of 1 hour". Returns true if recovery is enabled.
*/
static bool calc_nx_huge_pages_recovery_period(uint *period)
{
/*
* Use READ_ONCE to get the params, this may be called outside of the
* param setters, e.g. by the kthread to compute its next timeout.
*/
bool enabled = READ_ONCE(nx_huge_pages);
uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
if (!enabled || !ratio)
return false;
*period = READ_ONCE(nx_huge_pages_recovery_period_ms);
if (!*period) {
/* Make sure the period is not less than one second. */
ratio = min(ratio, 3600u);
*period = 60 * 60 * 1000 / ratio;
}
return true;
}
static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
{
bool was_recovery_enabled, is_recovery_enabled;
uint old_period, new_period;
int err;
was_recovery_enabled = nx_huge_pages_recovery_ratio;
old_period = nx_huge_pages_recovery_period_ms;
was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
err = param_set_uint(val, kp);
if (err)
return err;
is_recovery_enabled = nx_huge_pages_recovery_ratio;
new_period = nx_huge_pages_recovery_period_ms;
is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period);
if (READ_ONCE(nx_huge_pages) && is_recovery_enabled &&
if (is_recovery_enabled &&
(!was_recovery_enabled || old_period > new_period)) {
struct kvm *kvm;
@ -6262,18 +6274,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
static long get_nx_lpage_recovery_timeout(u64 start_time)
{
uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
bool enabled;
uint period;
if (!period && ratio) {
/* Make sure the period is not less than one second. */
ratio = min(ratio, 3600u);
period = 60 * 60 * 1000 / ratio;
}
enabled = calc_nx_huge_pages_recovery_period(&period);
return READ_ONCE(nx_huge_pages) && ratio
? start_time + msecs_to_jiffies(period) - get_jiffies_64()
: MAX_SCHEDULE_TIMEOUT;
return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
: MAX_SCHEDULE_TIMEOUT;
}
static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)

View File

@ -317,9 +317,6 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
int level = sp->role.level;
gfn_t base_gfn = sp->gfn;
u64 old_child_spte;
u64 *sptep;
gfn_t gfn;
int i;
trace_kvm_mmu_prepare_zap_page(sp);
@ -327,8 +324,9 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
tdp_mmu_unlink_page(kvm, sp, shared);
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
sptep = rcu_dereference(pt) + i;
gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
u64 *sptep = rcu_dereference(pt) + i;
gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
u64 old_child_spte;
if (shared) {
/*
@ -374,7 +372,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
shared);
}
kvm_flush_remote_tlbs_with_address(kvm, gfn,
kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
KVM_PAGES_PER_HPAGE(level + 1));
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
@ -1033,9 +1031,9 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
{
struct kvm_mmu_page *root;
for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
flush |= zap_gfn_range(kvm, root, range->start, range->end,
range->may_block, flush, false);
for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
flush = zap_gfn_range(kvm, root, range->start, range->end,
range->may_block, flush, false);
return flush;
}
@ -1364,10 +1362,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
* Clear leaf entries which could be replaced by large mappings, for
* GFNs within the slot.
*/
static bool zap_collapsible_spte_range(struct kvm *kvm,
static void zap_collapsible_spte_range(struct kvm *kvm,
struct kvm_mmu_page *root,
const struct kvm_memory_slot *slot,
bool flush)
const struct kvm_memory_slot *slot)
{
gfn_t start = slot->base_gfn;
gfn_t end = start + slot->npages;
@ -1378,10 +1375,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
tdp_root_for_each_pte(iter, root, start, end) {
retry:
if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
flush = false;
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
}
if (!is_shadow_present_pte(iter.old_spte) ||
!is_last_spte(iter.old_spte, iter.level))
@ -1393,6 +1388,7 @@ retry:
pfn, PG_LEVEL_NUM))
continue;
/* Note, a successful atomic zap also does a remote TLB flush. */
if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
/*
* The iter must explicitly re-read the SPTE because
@ -1401,30 +1397,24 @@ retry:
iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
goto retry;
}
flush = true;
}
rcu_read_unlock();
return flush;
}
/*
* Clear non-leaf entries (and free associated page tables) which could
* be replaced by large mappings, for GFNs within the slot.
*/
bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot,
bool flush)
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot)
{
struct kvm_mmu_page *root;
lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
flush = zap_collapsible_spte_range(kvm, root, slot, flush);
return flush;
zap_collapsible_spte_range(kvm, root, slot);
}
/*

View File

@ -64,9 +64,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn, unsigned long mask,
bool wrprot);
bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot,
bool flush);
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot);
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,

View File

@ -989,16 +989,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{
struct vcpu_svm *svm = to_svm(vcpu);
int cpu = get_cpu();
WARN_ON(cpu != vcpu->cpu);
svm->avic_is_running = is_run;
if (!kvm_vcpu_apicv_active(vcpu))
return;
if (is_run)
avic_vcpu_load(vcpu, vcpu->cpu);
else
avic_vcpu_put(vcpu);
if (kvm_vcpu_apicv_active(vcpu)) {
if (is_run)
avic_vcpu_load(vcpu, cpu);
else
avic_vcpu_put(vcpu);
}
put_cpu();
}
void svm_vcpu_blocking(struct kvm_vcpu *vcpu)

View File

@ -1543,28 +1543,50 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
return false;
}
static int sev_lock_for_migration(struct kvm *kvm)
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
int r = -EBUSY;
if (dst_kvm == src_kvm)
return -EINVAL;
/*
* Bail if this VM is already involved in a migration to avoid deadlock
* between two VMs trying to migrate to/from each other.
* Bail if these VMs are already involved in a migration to avoid
* deadlock between two VMs trying to migrate to/from each other.
*/
if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
return -EBUSY;
mutex_lock(&kvm->lock);
if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
goto release_dst;
r = -EINTR;
if (mutex_lock_killable(&dst_kvm->lock))
goto release_src;
if (mutex_lock_killable(&src_kvm->lock))
goto unlock_dst;
return 0;
unlock_dst:
mutex_unlock(&dst_kvm->lock);
release_src:
atomic_set_release(&src_sev->migration_in_progress, 0);
release_dst:
atomic_set_release(&dst_sev->migration_in_progress, 0);
return r;
}
static void sev_unlock_after_migration(struct kvm *kvm)
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
mutex_unlock(&kvm->lock);
atomic_set_release(&sev->migration_in_progress, 0);
mutex_unlock(&dst_kvm->lock);
mutex_unlock(&src_kvm->lock);
atomic_set_release(&dst_sev->migration_in_progress, 0);
atomic_set_release(&src_sev->migration_in_progress, 0);
}
@ -1607,14 +1629,15 @@ static void sev_migrate_from(struct kvm_sev_info *dst,
dst->asid = src->asid;
dst->handle = src->handle;
dst->pages_locked = src->pages_locked;
dst->enc_context_owner = src->enc_context_owner;
src->asid = 0;
src->active = false;
src->handle = 0;
src->pages_locked = 0;
src->enc_context_owner = NULL;
INIT_LIST_HEAD(&dst->regions_list);
list_replace_init(&src->regions_list, &dst->regions_list);
list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
}
static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
@ -1666,15 +1689,6 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
bool charged = false;
int ret;
ret = sev_lock_for_migration(kvm);
if (ret)
return ret;
if (sev_guest(kvm)) {
ret = -EINVAL;
goto out_unlock;
}
source_kvm_file = fget(source_fd);
if (!file_is_kvm(source_kvm_file)) {
ret = -EBADF;
@ -1682,16 +1696,26 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
}
source_kvm = source_kvm_file->private_data;
ret = sev_lock_for_migration(source_kvm);
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
goto out_fput;
if (!sev_guest(source_kvm)) {
if (sev_guest(kvm) || !sev_guest(source_kvm)) {
ret = -EINVAL;
goto out_source;
goto out_unlock;
}
src_sev = &to_kvm_svm(source_kvm)->sev_info;
/*
* VMs mirroring src's encryption context rely on it to keep the
* ASID allocated, but below we are clearing src_sev->asid.
*/
if (src_sev->num_mirrored_vms) {
ret = -EBUSY;
goto out_unlock;
}
dst_sev->misc_cg = get_current_misc_cg();
cg_cleanup_sev = dst_sev;
if (dst_sev->misc_cg != src_sev->misc_cg) {
@ -1728,13 +1752,11 @@ out_dst_cgroup:
sev_misc_cg_uncharge(cg_cleanup_sev);
put_misc_cg(cg_cleanup_sev->misc_cg);
cg_cleanup_sev->misc_cg = NULL;
out_source:
sev_unlock_after_migration(source_kvm);
out_unlock:
sev_unlock_two_vms(kvm, source_kvm);
out_fput:
if (source_kvm_file)
fput(source_kvm_file);
out_unlock:
sev_unlock_after_migration(kvm);
return ret;
}
@ -1953,76 +1975,60 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
{
struct file *source_kvm_file;
struct kvm *source_kvm;
struct kvm_sev_info source_sev, *mirror_sev;
struct kvm_sev_info *source_sev, *mirror_sev;
int ret;
source_kvm_file = fget(source_fd);
if (!file_is_kvm(source_kvm_file)) {
ret = -EBADF;
goto e_source_put;
goto e_source_fput;
}
source_kvm = source_kvm_file->private_data;
mutex_lock(&source_kvm->lock);
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
goto e_source_fput;
if (!sev_guest(source_kvm)) {
/*
* Mirrors of mirrors should work, but let's not get silly. Also
* disallow out-of-band SEV/SEV-ES init if the target is already an
* SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
* created after SEV/SEV-ES initialization, e.g. to init intercepts.
*/
if (sev_guest(kvm) || !sev_guest(source_kvm) ||
is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
ret = -EINVAL;
goto e_source_unlock;
goto e_unlock;
}
/* Mirrors of mirrors should work, but let's not get silly */
if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
ret = -EINVAL;
goto e_source_unlock;
}
memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
sizeof(source_sev));
/*
* The mirror kvm holds an enc_context_owner ref so its asid can't
* disappear until we're done with it
*/
source_sev = &to_kvm_svm(source_kvm)->sev_info;
kvm_get_kvm(source_kvm);
fput(source_kvm_file);
mutex_unlock(&source_kvm->lock);
mutex_lock(&kvm->lock);
/*
* Disallow out-of-band SEV/SEV-ES init if the target is already an
* SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
* created after SEV/SEV-ES initialization, e.g. to init intercepts.
*/
if (sev_guest(kvm) || kvm->created_vcpus) {
ret = -EINVAL;
goto e_mirror_unlock;
}
source_sev->num_mirrored_vms++;
/* Set enc_context_owner and copy its encryption context over */
mirror_sev = &to_kvm_svm(kvm)->sev_info;
mirror_sev->enc_context_owner = source_kvm;
mirror_sev->active = true;
mirror_sev->asid = source_sev.asid;
mirror_sev->fd = source_sev.fd;
mirror_sev->es_active = source_sev.es_active;
mirror_sev->handle = source_sev.handle;
mirror_sev->asid = source_sev->asid;
mirror_sev->fd = source_sev->fd;
mirror_sev->es_active = source_sev->es_active;
mirror_sev->handle = source_sev->handle;
INIT_LIST_HEAD(&mirror_sev->regions_list);
ret = 0;
/*
* Do not copy ap_jump_table. Since the mirror does not share the same
* KVM contexts as the original, and they may have different
* memory-views.
*/
mutex_unlock(&kvm->lock);
return 0;
e_mirror_unlock:
mutex_unlock(&kvm->lock);
kvm_put_kvm(source_kvm);
return ret;
e_source_unlock:
mutex_unlock(&source_kvm->lock);
e_source_put:
e_unlock:
sev_unlock_two_vms(kvm, source_kvm);
e_source_fput:
if (source_kvm_file)
fput(source_kvm_file);
return ret;
@ -2034,17 +2040,24 @@ void sev_vm_destroy(struct kvm *kvm)
struct list_head *head = &sev->regions_list;
struct list_head *pos, *q;
WARN_ON(sev->num_mirrored_vms);
if (!sev_guest(kvm))
return;
/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
if (is_mirroring_enc_context(kvm)) {
kvm_put_kvm(sev->enc_context_owner);
struct kvm *owner_kvm = sev->enc_context_owner;
struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
mutex_lock(&owner_kvm->lock);
if (!WARN_ON(!owner_sev->num_mirrored_vms))
owner_sev->num_mirrored_vms--;
mutex_unlock(&owner_kvm->lock);
kvm_put_kvm(owner_kvm);
return;
}
mutex_lock(&kvm->lock);
/*
* Ensure that all guest tagged cache entries are flushed before
* releasing the pages back to the system for use. CLFLUSH will
@ -2064,8 +2077,6 @@ void sev_vm_destroy(struct kvm *kvm)
}
}
mutex_unlock(&kvm->lock);
sev_unbind_asid(kvm, sev->handle);
sev_asid_free(sev);
}

View File

@ -4651,7 +4651,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
.sync_pir_to_irr = kvm_lapic_find_highest_irr,
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,

View File

@ -79,6 +79,7 @@ struct kvm_sev_info {
struct list_head regions_list; /* List of registered regions */
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
struct kvm *enc_context_owner; /* Owner of copied encryption context */
unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
atomic_t migration_in_progress;
};

View File

@ -1162,29 +1162,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
WARN_ON(!enable_vpid);
/*
* If VPID is enabled and used by vmc12, but L2 does not have a unique
* TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
* a VPID for L2, flush the current context as the effective ASID is
* common to both L1 and L2.
*
* Defer the flush so that it runs after vmcs02.EPTP has been set by
* KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
* redundant flushes further down the nested pipeline.
*
* If a TLB flush isn't required due to any of the above, and vpid12 is
* changing then the new "virtual" VPID (vpid12) will reuse the same
* "real" VPID (vpid02), and so needs to be flushed. There's no direct
* mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
* all nested vCPUs. Remember, a flush on VM-Enter does not invalidate
* guest-physical mappings, so there is no need to sync the nEPT MMU.
* VPID is enabled and in use by vmcs12. If vpid12 is changing, then
* emulate a guest TLB flush as KVM does not track vpid12 history nor
* is the VPID incorporated into the MMU context. I.e. KVM must assume
* that the new vpid12 has never been used and thus represents a new
* guest ASID that cannot have entries in the TLB.
*/
if (!nested_has_guest_tlb_tag(vcpu)) {
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
} else if (is_vmenter &&
vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
vpid_sync_context(nested_get_vpid02(vcpu));
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
return;
}
/*
* If VPID is enabled, used by vmc12, and vpid12 is not changing but
* does not have a unique TLB tag (ASID), i.e. EPT is disabled and
* KVM was unable to allocate a VPID for L2, flush the current context
* as the effective ASID is common to both L1 and L2.
*/
if (!nested_has_guest_tlb_tag(vcpu))
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
@ -3344,8 +3341,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
};
u32 failed_index;
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
kvm_service_local_tlb_flush_requests(vcpu);
evaluate_pending_interrupts = exec_controls_get(vmx) &
(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
@ -4502,9 +4498,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
(void)nested_get_evmcs_page(vcpu);
}
/* Service the TLB flush request for L2 before switching to L1. */
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
/* Service pending TLB flush requests for L2 before switching to L1. */
kvm_service_local_tlb_flush_requests(vcpu);
/*
* VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
@ -4857,6 +4852,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
if (!vmx->nested.cached_vmcs12)
goto out_cached_vmcs12;
vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
if (!vmx->nested.cached_shadow_vmcs12)
goto out_cached_shadow_vmcs12;
@ -5289,8 +5285,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
struct vmcs_hdr hdr;
if (ghc->gpa != vmptr &&
kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
/*
* Reads from an unbacked page return all 1s,
* which means that the 32 bits located at the

View File

@ -5,6 +5,7 @@
#include <asm/cpu.h>
#include "lapic.h"
#include "irq.h"
#include "posted_intr.h"
#include "trace.h"
#include "vmx.h"
@ -77,13 +78,18 @@ after_clear_sn:
pi_set_on(pi_desc);
}
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
{
return irqchip_in_kernel(kvm) && enable_apicv &&
kvm_arch_has_assigned_device(kvm) &&
irq_remapping_cap(IRQ_POSTING_CAP);
}
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
!kvm_vcpu_apicv_active(vcpu))
if (!vmx_can_use_vtd_pi(vcpu->kvm))
return;
/* Set SN when the vCPU is preempted */
@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
!kvm_vcpu_apicv_active(vcpu))
if (!vmx_can_use_vtd_pi(vcpu->kvm))
return 0;
WARN_ON(irqs_disabled());
@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
struct vcpu_data vcpu_info;
int idx, ret = 0;
if (!kvm_arch_has_assigned_device(kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
!kvm_vcpu_apicv_active(kvm->vcpus[0]))
if (!vmx_can_use_vtd_pi(kvm))
return 0;
idx = srcu_read_lock(&kvm->irq_srcu);

View File

@ -2918,6 +2918,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
}
}
static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu))
return nested_get_vpid02(vcpu);
return to_vmx(vcpu)->vpid;
}
static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
@ -2930,31 +2937,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
if (enable_ept)
ept_sync_context(construct_eptp(vcpu, root_hpa,
mmu->shadow_root_level));
else if (!is_guest_mode(vcpu))
vpid_sync_context(to_vmx(vcpu)->vpid);
else
vpid_sync_context(nested_get_vpid02(vcpu));
vpid_sync_context(vmx_get_current_vpid(vcpu));
}
static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
{
/*
* vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
* vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
* vmx_flush_tlb_guest() for an explanation of why this is ok.
*/
vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
}
static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
/*
* vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
* or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit
* are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
* vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
* vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are
* required to flush GVA->{G,H}PA mappings from the TLB if vpid is
* disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
* i.e. no explicit INVVPID is necessary.
*/
vpid_sync_context(to_vmx(vcpu)->vpid);
vpid_sync_context(vmx_get_current_vpid(vcpu));
}
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
@ -6262,9 +6267,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
bool max_irr_updated;
bool got_posted_interrupt;
if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
return -EIO;
if (pi_test_on(&vmx->pi_desc)) {
@ -6274,22 +6279,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
max_irr_updated =
got_posted_interrupt =
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
/*
* If we are running L2 and L1 has a new pending interrupt
* which can be injected, this may cause a vmexit or it may
* be injected into L2. Either way, this interrupt will be
* processed via KVM_REQ_EVENT, not RVI, because we do not use
* virtual interrupt delivery to inject L1 interrupts into L2.
*/
if (is_guest_mode(vcpu) && max_irr_updated)
kvm_make_request(KVM_REQ_EVENT, vcpu);
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
got_posted_interrupt = false;
}
vmx_hwapic_irr_update(vcpu, max_irr);
/*
* Newly recognized interrupts are injected via either virtual interrupt
* delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is
* disabled in two cases:
*
* 1) If L2 is running and the vCPU has a new pending interrupt. If L1
* wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
* VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected
* into L2, but KVM doesn't use virtual interrupt delivery to inject
* interrupts into L2, and so KVM_REQ_EVENT is again needed.
*
* 2) If APICv is disabled for this vCPU, assigned devices may still
* attempt to post interrupts. The posted interrupt vector will cause
* a VM-Exit and the subsequent entry will call sync_pir_to_irr.
*/
if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
vmx_set_rvi(max_irr);
else if (got_posted_interrupt)
kvm_make_request(KVM_REQ_EVENT, vcpu);
return max_irr;
}
@ -7761,10 +7777,10 @@ static __init int hardware_setup(void)
ple_window_shrink = 0;
}
if (!cpu_has_vmx_apicv()) {
if (!cpu_has_vmx_apicv())
enable_apicv = 0;
if (!enable_apicv)
vmx_x86_ops.sync_pir_to_irr = NULL;
}
if (cpu_has_vmx_tsc_scaling()) {
kvm_has_tsc_control = true;

View File

@ -3258,6 +3258,29 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
static_call(kvm_x86_tlb_flush_guest)(vcpu);
}
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
static_call(kvm_x86_tlb_flush_current)(vcpu);
}
/*
* Service "local" TLB flush requests, which are specific to the current MMU
* context. In addition to the generic event handling in vcpu_enter_guest(),
* TLB flushes that are targeted at an MMU context also need to be serviced
* prior before nested VM-Enter/VM-Exit.
*/
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
{
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
kvm_vcpu_flush_tlb_guest(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
static void record_steal_time(struct kvm_vcpu *vcpu)
{
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
@ -4133,6 +4156,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SGX_ATTRIBUTE:
#endif
case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
case KVM_CAP_SREGS2:
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
case KVM_CAP_VCPU_ATTRIBUTES:
@ -4448,8 +4472,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
if (vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
return kvm_apic_get_state(vcpu, s);
}
@ -5124,6 +5147,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
/*
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
* MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
* tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
* faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
* the core vCPU model on the fly, so fail.
*/
r = -EINVAL;
if (vcpu->arch.last_vmentry_cpu != -1)
goto out;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@ -5134,6 +5168,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
/*
* KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
* KVM_SET_CPUID case above.
*/
r = -EINVAL;
if (vcpu->arch.last_vmentry_cpu != -1)
goto out;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@ -9528,8 +9570,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
if (vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@ -9648,10 +9689,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/* Flushing all ASIDs flushes the current ASID... */
kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
kvm_vcpu_flush_tlb_guest(vcpu);
kvm_service_local_tlb_flush_requests(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
@ -9802,10 +9840,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/*
* This handles the case where a posted interrupt was
* notified with kvm_vcpu_kick.
* notified with kvm_vcpu_kick. Assigned devices can
* use the POSTED_INTR_VECTOR even if APICv is disabled,
* so do it even if APICv is disabled on this vCPU.
*/
if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_lapic_enabled(vcpu))
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_vcpu_exit_request(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
@ -9849,8 +9889,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
if (vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_lapic_enabled(vcpu))
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (unlikely(kvm_vcpu_exit_request(vcpu))) {
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;

View File

@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@ -185,12 +186,6 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
}
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
static_call(kvm_x86_tlb_flush_current)(vcpu);
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);

View File

@ -12,6 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include "test_util.h"
@ -40,10 +41,39 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
/*
* Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
* an arbitrary number for everything else.
*/
int nr_fds_wanted = kvm_max_vcpus + 100;
struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
/*
* Check that we're allowed to open nr_fds_wanted file descriptors and
* try raising the limits if needed.
*/
TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
if (rl.rlim_cur < nr_fds_wanted) {
rl.rlim_cur = nr_fds_wanted;
if (rl.rlim_max < nr_fds_wanted) {
int old_rlim_max = rl.rlim_max;
rl.rlim_max = nr_fds_wanted;
int r = setrlimit(RLIMIT_NOFILE, &rl);
if (r < 0) {
printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
old_rlim_max, nr_fds_wanted);
exit(KSFT_SKIP);
}
} else {
TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
}
}
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
* Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID

View File

@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
#ifdef __s390x__
alignment = max(0x100000, alignment);
#endif
guest_test_phys_mem = align_down(guest_test_virt_mem, alignment);
guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
/* Set up the shared data structure test_args */
test_args.vm = vm;

View File

@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
vcpu_set_cpuid(vm, VCPU_ID, cpuid);
}
static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
struct kvm_cpuid2 *best)
static void guest_test_msrs_access(void)
{
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
int stage = 0, r;
struct kvm_cpuid_entry2 feat = {
@ -180,11 +180,34 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
struct kvm_cpuid_entry2 dbg = {
.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
};
struct kvm_enable_cap cap = {0};
run = vcpu_state(vm, VCPU_ID);
struct kvm_cpuid2 *best;
vm_vaddr_t msr_gva;
struct kvm_enable_cap cap = {
.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
.args = {1}
};
struct msr_data *msr;
while (true) {
vm = vm_create_default(VCPU_ID, 0, guest_msr);
msr_gva = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
msr = addr_gva2hva(vm, msr_gva);
vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
vcpu_enable_cap(vm, VCPU_ID, &cap);
vcpu_set_hv_cpuid(vm, VCPU_ID);
best = kvm_get_supported_hv_cpuid();
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
run = vcpu_state(vm, VCPU_ID);
switch (stage) {
case 0:
/*
@ -315,6 +338,7 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
* capability enabled and guest visible CPUID bit unset.
*/
cap.cap = KVM_CAP_HYPERV_SYNIC2;
cap.args[0] = 0;
vcpu_enable_cap(vm, VCPU_ID, &cap);
break;
case 22:
@ -461,9 +485,9 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(uc.args[1] == stage,
"Unexpected stage: %ld (%d expected)\n",
uc.args[1], stage);
TEST_ASSERT(uc.args[1] == 0,
"Unexpected stage: %ld (0 expected)\n",
uc.args[1]);
break;
case UCALL_ABORT:
TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@ -474,13 +498,14 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
}
stage++;
kvm_vm_free(vm);
}
}
static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
void *input, void *output, struct kvm_cpuid2 *best)
static void guest_test_hcalls_access(void)
{
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
int stage = 0, r;
struct kvm_cpuid_entry2 feat = {
@ -493,10 +518,38 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
struct kvm_cpuid_entry2 dbg = {
.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
};
run = vcpu_state(vm, VCPU_ID);
struct kvm_enable_cap cap = {
.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
.args = {1}
};
vm_vaddr_t hcall_page, hcall_params;
struct hcall_data *hcall;
struct kvm_cpuid2 *best;
while (true) {
vm = vm_create_default(VCPU_ID, 0, guest_hcall);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
/* Hypercall input/output */
hcall_page = vm_vaddr_alloc_pages(vm, 2);
hcall = addr_gva2hva(vm, hcall_page);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
hcall_params = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
vcpu_enable_cap(vm, VCPU_ID, &cap);
vcpu_set_hv_cpuid(vm, VCPU_ID);
best = kvm_get_supported_hv_cpuid();
run = vcpu_state(vm, VCPU_ID);
switch (stage) {
case 0:
hcall->control = 0xdeadbeef;
@ -606,9 +659,9 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(uc.args[1] == stage,
"Unexpected stage: %ld (%d expected)\n",
uc.args[1], stage);
TEST_ASSERT(uc.args[1] == 0,
"Unexpected stage: %ld (0 expected)\n",
uc.args[1]);
break;
case UCALL_ABORT:
TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@ -619,66 +672,15 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
}
stage++;
kvm_vm_free(vm);
}
}
int main(void)
{
struct kvm_cpuid2 *best;
struct kvm_vm *vm;
vm_vaddr_t msr_gva, hcall_page, hcall_params;
struct kvm_enable_cap cap = {
.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
.args = {1}
};
/* Test MSRs */
vm = vm_create_default(VCPU_ID, 0, guest_msr);
msr_gva = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
vcpu_enable_cap(vm, VCPU_ID, &cap);
vcpu_set_hv_cpuid(vm, VCPU_ID);
best = kvm_get_supported_hv_cpuid();
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
pr_info("Testing access to Hyper-V specific MSRs\n");
guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
best);
kvm_vm_free(vm);
/* Test hypercalls */
vm = vm_create_default(VCPU_ID, 0, guest_hcall);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
/* Hypercall input/output */
hcall_page = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
hcall_params = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
vcpu_enable_cap(vm, VCPU_ID, &cap);
vcpu_set_hv_cpuid(vm, VCPU_ID);
best = kvm_get_supported_hv_cpuid();
guest_test_msrs_access();
pr_info("Testing access to Hyper-V hypercalls\n");
guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
addr_gva2hva(vm, hcall_page),
addr_gva2hva(vm, hcall_page) + getpagesize(),
best);
kvm_vm_free(vm);
guest_test_hcalls_access();
}

View File

@ -54,12 +54,15 @@ static struct kvm_vm *sev_vm_create(bool es)
return vm;
}
static struct kvm_vm *__vm_create(void)
static struct kvm_vm *aux_vm_create(bool with_vcpus)
{
struct kvm_vm *vm;
int i;
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
if (!with_vcpus)
return vm;
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(vm, i);
@ -89,11 +92,11 @@ static void test_sev_migrate_from(bool es)
{
struct kvm_vm *src_vm;
struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
int i;
int i, ret;
src_vm = sev_vm_create(es);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
dst_vms[i] = __vm_create();
dst_vms[i] = aux_vm_create(true);
/* Initial migration from the src to the first dst. */
sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
@ -102,7 +105,10 @@ static void test_sev_migrate_from(bool es)
sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
/* Migrate the guest back to the original VM. */
sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
TEST_ASSERT(ret == -1 && errno == EIO,
"VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
errno);
kvm_vm_free(src_vm);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
@ -146,6 +152,8 @@ static void test_sev_migrate_locking(void)
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
pthread_join(pt[i], NULL);
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
kvm_vm_free(input[i].vm);
}
static void test_sev_migrate_parameters(void)
@ -157,12 +165,11 @@ static void test_sev_migrate_parameters(void)
sev_vm = sev_vm_create(/* es= */ false);
sev_es_vm = sev_vm_create(/* es= */ true);
vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
vm_no_sev = __vm_create();
vm_no_sev = aux_vm_create(true);
sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
vm_vcpu_add(sev_es_vm_no_vmsa, 1);
ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
@ -191,13 +198,151 @@ static void test_sev_migrate_parameters(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Migrations require SEV enabled. ret %d, errno: %d\n", ret,
errno);
kvm_vm_free(sev_vm);
kvm_vm_free(sev_es_vm);
kvm_vm_free(sev_es_vm_no_vmsa);
kvm_vm_free(vm_no_vcpu);
kvm_vm_free(vm_no_sev);
}
static int __sev_mirror_create(int dst_fd, int src_fd)
{
struct kvm_enable_cap cap = {
.cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM,
.args = { src_fd }
};
return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
}
static void sev_mirror_create(int dst_fd, int src_fd)
{
int ret;
ret = __sev_mirror_create(dst_fd, src_fd);
TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
}
static void test_sev_mirror(bool es)
{
struct kvm_vm *src_vm, *dst_vm;
struct kvm_sev_launch_start start = {
.policy = es ? SEV_POLICY_ES : 0
};
int i;
src_vm = sev_vm_create(es);
dst_vm = aux_vm_create(false);
sev_mirror_create(dst_vm->fd, src_vm->fd);
/* Check that we can complete creation of the mirror VM. */
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(dst_vm, i);
sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start);
if (es)
sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
kvm_vm_free(src_vm);
kvm_vm_free(dst_vm);
}
static void test_sev_mirror_parameters(void)
{
struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
int ret;
sev_vm = sev_vm_create(/* es= */ false);
sev_es_vm = sev_vm_create(/* es= */ true);
vm_with_vcpu = aux_vm_create(true);
vm_no_vcpu = aux_vm_create(false);
ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able copy context to self. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
errno);
ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
ret, errno);
kvm_vm_free(sev_vm);
kvm_vm_free(sev_es_vm);
kvm_vm_free(vm_with_vcpu);
kvm_vm_free(vm_no_vcpu);
}
static void test_sev_move_copy(void)
{
struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm;
int ret;
sev_vm = sev_vm_create(/* es= */ false);
dst_vm = aux_vm_create(true);
mirror_vm = aux_vm_create(false);
dst_mirror_vm = aux_vm_create(false);
sev_mirror_create(mirror_vm->fd, sev_vm->fd);
ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
TEST_ASSERT(ret == -1 && errno == EBUSY,
"Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
errno);
/* The mirror itself can be migrated. */
sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
TEST_ASSERT(ret == -1 && errno == EBUSY,
"Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
errno);
/*
* mirror_vm is not a mirror anymore, dst_mirror_vm is. Thus,
* the owner can be copied as soon as dst_mirror_vm is gone.
*/
kvm_vm_free(dst_mirror_vm);
sev_migrate_from(dst_vm->fd, sev_vm->fd);
kvm_vm_free(mirror_vm);
kvm_vm_free(dst_vm);
kvm_vm_free(sev_vm);
}
int main(int argc, char *argv[])
{
test_sev_migrate_from(/* es= */ false);
test_sev_migrate_from(/* es= */ true);
test_sev_migrate_locking();
test_sev_migrate_parameters();
if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
test_sev_migrate_from(/* es= */ false);
test_sev_migrate_from(/* es= */ true);
test_sev_migrate_locking();
test_sev_migrate_parameters();
if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
test_sev_move_copy();
}
if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
test_sev_mirror(/* es= */ false);
test_sev_mirror(/* es= */ true);
test_sev_mirror_parameters();
}
return 0;
}

View File

@ -1531,11 +1531,10 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
static int kvm_set_memslot(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
struct kvm_memory_slot *new, int as_id,
enum kvm_mr_change change)
{
struct kvm_memory_slot *slot;
struct kvm_memory_slot *slot, old;
struct kvm_memslots *slots;
int r;
@ -1566,7 +1565,7 @@ static int kvm_set_memslot(struct kvm *kvm,
* Note, the INVALID flag needs to be in the appropriate entry
* in the freshly allocated memslots, not in @old or @new.
*/
slot = id_to_memslot(slots, old->id);
slot = id_to_memslot(slots, new->id);
slot->flags |= KVM_MEMSLOT_INVALID;
/*
@ -1597,6 +1596,26 @@ static int kvm_set_memslot(struct kvm *kvm,
kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
}
/*
* Make a full copy of the old memslot, the pointer will become stale
* when the memslots are re-sorted by update_memslots(), and the old
* memslot needs to be referenced after calling update_memslots(), e.g.
* to free its resources and for arch specific behavior. This needs to
* happen *after* (re)acquiring slots_arch_lock.
*/
slot = id_to_memslot(slots, new->id);
if (slot) {
old = *slot;
} else {
WARN_ON_ONCE(change != KVM_MR_CREATE);
memset(&old, 0, sizeof(old));
old.id = new->id;
old.as_id = as_id;
}
/* Copy the arch-specific data, again after (re)acquiring slots_arch_lock. */
memcpy(&new->arch, &old.arch, sizeof(old.arch));
r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
if (r)
goto out_slots;
@ -1604,14 +1623,18 @@ static int kvm_set_memslot(struct kvm *kvm,
update_memslots(slots, new, change);
slots = install_new_memslots(kvm, as_id, slots);
kvm_arch_commit_memory_region(kvm, mem, old, new, change);
kvm_arch_commit_memory_region(kvm, mem, &old, new, change);
/* Free the old memslot's metadata. Note, this is the full copy!!! */
if (change == KVM_MR_DELETE)
kvm_free_memslot(kvm, &old);
kvfree(slots);
return 0;
out_slots:
if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
slot = id_to_memslot(slots, old->id);
slot = id_to_memslot(slots, new->id);
slot->flags &= ~KVM_MEMSLOT_INVALID;
slots = install_new_memslots(kvm, as_id, slots);
} else {
@ -1626,7 +1649,6 @@ static int kvm_delete_memslot(struct kvm *kvm,
struct kvm_memory_slot *old, int as_id)
{
struct kvm_memory_slot new;
int r;
if (!old->npages)
return -EINVAL;
@ -1639,12 +1661,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
*/
new.as_id = as_id;
r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE);
if (r)
return r;
kvm_free_memslot(kvm, old);
return 0;
return kvm_set_memslot(kvm, mem, &new, as_id, KVM_MR_DELETE);
}
/*
@ -1672,7 +1689,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
id = (u16)mem->slot;
/* General sanity checks */
if (mem->memory_size & (PAGE_SIZE - 1))
if ((mem->memory_size & (PAGE_SIZE - 1)) ||
(mem->memory_size != (unsigned long)mem->memory_size))
return -EINVAL;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
return -EINVAL;
@ -1718,7 +1736,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (!old.npages) {
change = KVM_MR_CREATE;
new.dirty_bitmap = NULL;
memset(&new.arch, 0, sizeof(new.arch));
} else { /* Modify an existing slot. */
if ((new.userspace_addr != old.userspace_addr) ||
(new.npages != old.npages) ||
@ -1732,9 +1749,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
else /* Nothing to change. */
return 0;
/* Copy dirty_bitmap and arch from the current memslot. */
/* Copy dirty_bitmap from the current memslot. */
new.dirty_bitmap = old.dirty_bitmap;
memcpy(&new.arch, &old.arch, sizeof(new.arch));
}
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
@ -1760,7 +1776,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
bitmap_set(new.dirty_bitmap, 0, new.npages);
}
r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
r = kvm_set_memslot(kvm, mem, &new, as_id, change);
if (r)
goto out_bitmap;
@ -2915,7 +2931,8 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
int r;
gpa_t gpa = ghc->gpa + offset;
BUG_ON(len + offset > ghc->len);
if (WARN_ON_ONCE(len + offset > ghc->len))
return -EINVAL;
if (slots->generation != ghc->generation) {
if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@ -2952,7 +2969,8 @@ int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
int r;
gpa_t gpa = ghc->gpa + offset;
BUG_ON(len + offset > ghc->len);
if (WARN_ON_ONCE(len + offset > ghc->len))
return -EINVAL;
if (slots->generation != ghc->generation) {
if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))