Merge branch 'kvm-late-6.1' into HEAD
x86: * Change tdp_mmu to a read-only parameter * Separate TDP and shadow MMU page fault paths * Enable Hyper-V invariant TSC control selftests: * Use TAP interface for kvm_binary_stats_test and tsc_msrs_test Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
commit
fc471e8310
@ -5343,9 +5343,9 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
|
||||
32 vCPUs in the shared_info page, KVM does not automatically do so
|
||||
and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
|
||||
explicitly even when the vcpu_info for a given vCPU resides at the
|
||||
"default" location in the shared_info page. This is because KVM is
|
||||
not aware of the Xen CPU id which is used as the index into the
|
||||
vcpu_info[] array, so cannot know the correct default location.
|
||||
"default" location in the shared_info page. This is because KVM may
|
||||
not be aware of the Xen CPU id which is used as the index into the
|
||||
vcpu_info[] array, so may know the correct default location.
|
||||
|
||||
Note that the shared info page may be constantly written to by KVM;
|
||||
it contains the event channel bitmap used to deliver interrupts to
|
||||
@ -5356,23 +5356,29 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
|
||||
any vCPU has been running or any event channel interrupts can be
|
||||
routed to the guest.
|
||||
|
||||
Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared info
|
||||
page.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
Sets the exception vector used to deliver Xen event channel upcalls.
|
||||
This is the HVM-wide vector injected directly by the hypervisor
|
||||
(not through the local APIC), typically configured by a guest via
|
||||
HVM_PARAM_CALLBACK_IRQ.
|
||||
HVM_PARAM_CALLBACK_IRQ. This can be disabled again (e.g. for guest
|
||||
SHUTDOWN_soft_reset) by setting it to zero.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_EVTCHN
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
|
||||
an outbound port number for interception of EVTCHNOP_send requests
|
||||
from the guest. A given sending port number may be directed back
|
||||
to a specified vCPU (by APIC ID) / port / priority on the guest,
|
||||
or to trigger events on an eventfd. The vCPU and priority can be
|
||||
changed by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call,
|
||||
but other fields cannot change for a given sending port. A port
|
||||
mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags
|
||||
field.
|
||||
from the guest. A given sending port number may be directed back to
|
||||
a specified vCPU (by APIC ID) / port / priority on the guest, or to
|
||||
trigger events on an eventfd. The vCPU and priority can be changed
|
||||
by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call, but but other
|
||||
fields cannot change for a given sending port. A port mapping is
|
||||
removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags field. Passing
|
||||
KVM_XEN_EVTCHN_RESET in the flags field removes all interception of
|
||||
outbound event channels. The values of the flags field are mutually
|
||||
exclusive and cannot be combined as a bitmask.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_XEN_VERSION
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
@ -5388,7 +5394,7 @@ KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
|
||||
support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
|
||||
XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
|
||||
other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
|
||||
the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
|
||||
the VMASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
|
||||
hypercall.
|
||||
|
||||
4.127 KVM_XEN_HVM_GET_ATTR
|
||||
@ -5446,15 +5452,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
|
||||
As with the shared_info page for the VM, the corresponding page may be
|
||||
dirtied at any time if event channel interrupt delivery is enabled, so
|
||||
userspace should always assume that the page is dirty without relying
|
||||
on dirty logging.
|
||||
on dirty logging. Setting the gpa to KVM_XEN_INVALID_GPA will disable
|
||||
the vcpu_info.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
|
||||
Sets the guest physical address of an additional pvclock structure
|
||||
for a given vCPU. This is typically used for guest vsyscall support.
|
||||
Setting the gpa to KVM_XEN_INVALID_GPA will disable the structure.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
|
||||
Sets the guest physical address of the vcpu_runstate_info for a given
|
||||
vCPU. This is how a Xen guest tracks CPU state such as steal time.
|
||||
Setting the gpa to KVM_XEN_INVALID_GPA will disable the runstate area.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
|
||||
Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
|
||||
@ -5487,7 +5496,8 @@ KVM_XEN_VCPU_ATTR_TYPE_TIMER
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
|
||||
event channel port/priority for the VIRQ_TIMER of the vCPU, as well
|
||||
as allowing a pending timer to be saved/restored.
|
||||
as allowing a pending timer to be saved/restored. Setting the timer
|
||||
port to zero disables kernel handling of the singleshot timer.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
@ -5495,7 +5505,8 @@ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
|
||||
per-vCPU local APIC upcall vector, configured by a Xen guest with
|
||||
the HVMOP_set_evtchn_upcall_vector hypercall. This is typically
|
||||
used by Windows guests, and is distinct from the HVM-wide upcall
|
||||
vector configured with HVM_PARAM_CALLBACK_IRQ.
|
||||
vector configured with HVM_PARAM_CALLBACK_IRQ. It is disabled by
|
||||
setting the vector to zero.
|
||||
|
||||
|
||||
4.129 KVM_XEN_VCPU_GET_ATTR
|
||||
@ -6577,11 +6588,6 @@ Please note that the kernel is allowed to use the kvm_run structure as the
|
||||
primary storage for certain register types. Therefore, the kernel may use the
|
||||
values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
|
||||
|
||||
::
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
6. Capabilities that can be enabled on vCPUs
|
||||
============================================
|
||||
|
@ -16,17 +16,26 @@ The acquisition orders for mutexes are as follows:
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
- Unlike kvm->slots_lock, kvm->slots_arch_lock is released before
|
||||
synchronize_srcu(&kvm->srcu). Therefore kvm->slots_arch_lock
|
||||
can be taken inside a kvm->srcu read-side critical section,
|
||||
while kvm->slots_lock cannot.
|
||||
|
||||
- kvm->mn_active_invalidate_count ensures that pairs of
|
||||
invalidate_range_start() and invalidate_range_end() callbacks
|
||||
use the same memslots array. kvm->slots_lock and kvm->slots_arch_lock
|
||||
are taken on the waiting side in install_new_memslots, so MMU notifiers
|
||||
must not take either kvm->slots_lock or kvm->slots_arch_lock.
|
||||
|
||||
For SRCU:
|
||||
|
||||
- ``synchronize_srcu(&kvm->srcu)`` is called _inside_
|
||||
the kvm->slots_lock critical section, therefore kvm->slots_lock
|
||||
cannot be taken inside a kvm->srcu read-side critical section.
|
||||
Instead, kvm->slots_arch_lock is released before the call
|
||||
to ``synchronize_srcu()`` and _can_ be taken inside a
|
||||
kvm->srcu read-side critical section.
|
||||
|
||||
- kvm->lock is taken inside kvm->srcu, therefore
|
||||
``synchronize_srcu(&kvm->srcu)`` cannot be called inside
|
||||
a kvm->lock critical section. If you cannot delay the
|
||||
call until after kvm->lock is released, use ``call_srcu``.
|
||||
|
||||
On x86:
|
||||
|
||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||
|
@ -11468,7 +11468,7 @@ F: arch/x86/kvm/hyperv.*
|
||||
F: arch/x86/kvm/kvm_onhyperv.*
|
||||
F: arch/x86/kvm/svm/hyperv.*
|
||||
F: arch/x86/kvm/svm/svm_onhyperv.*
|
||||
F: arch/x86/kvm/vmx/evmcs.*
|
||||
F: arch/x86/kvm/vmx/hyperv.*
|
||||
|
||||
KVM X86 Xen (KVM/Xen)
|
||||
M: David Woodhouse <dwmw2@infradead.org>
|
||||
|
@ -255,6 +255,9 @@ enum hv_isolation_type {
|
||||
/* TSC invariant control */
|
||||
#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
|
||||
|
||||
/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
|
||||
#define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0)
|
||||
|
||||
/* Register name aliases for temporary compatibility */
|
||||
#define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT
|
||||
#define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG
|
||||
|
@ -1088,6 +1088,7 @@ struct kvm_hv {
|
||||
u64 hv_reenlightenment_control;
|
||||
u64 hv_tsc_emulation_control;
|
||||
u64 hv_tsc_emulation_status;
|
||||
u64 hv_invtsc_control;
|
||||
|
||||
/* How many vCPUs have VP index != vCPU index */
|
||||
atomic_t num_mismatched_vp_indexes;
|
||||
@ -1341,21 +1342,12 @@ struct kvm_arch {
|
||||
struct task_struct *nx_huge_page_recovery_thread;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Whether the TDP MMU is enabled for this VM. This contains a
|
||||
* snapshot of the TDP MMU module parameter from when the VM was
|
||||
* created and remains unchanged for the life of the VM. If this is
|
||||
* true, TDP MMU handler functions will run for various MMU
|
||||
* operations.
|
||||
*/
|
||||
bool tdp_mmu_enabled;
|
||||
|
||||
/* The number of TDP MMU pages across all roots. */
|
||||
atomic64_t tdp_mmu_pages;
|
||||
|
||||
/*
|
||||
* List of kvm_mmu_page structs being used as roots.
|
||||
* All kvm_mmu_page structs in the list should have
|
||||
* List of struct kvm_mmu_pages being used as roots.
|
||||
* All struct kvm_mmu_pages in the list should have
|
||||
* tdp_mmu_page set.
|
||||
*
|
||||
* For reads, this list is protected by:
|
||||
|
@ -388,7 +388,7 @@ static void __init ms_hyperv_init_platform(void)
|
||||
* setting of this MSR bit should happen before init_intel()
|
||||
* is called.
|
||||
*/
|
||||
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
|
||||
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_EXPOSE_INVARIANT_TSC);
|
||||
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
|
||||
}
|
||||
|
||||
|
@ -701,6 +701,10 @@ void kvm_set_cpu_caps(void)
|
||||
if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64))
|
||||
kvm_cpu_cap_set(X86_FEATURE_GBPAGES);
|
||||
|
||||
kvm_cpu_cap_init_kvm_defined(CPUID_8000_0007_EDX,
|
||||
SF(CONSTANT_TSC)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
|
||||
F(CLZERO) | F(XSAVEERPTR) |
|
||||
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
|
||||
@ -1151,8 +1155,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
entry->edx &= ~GENMASK(17, 16);
|
||||
break;
|
||||
case 0x80000007: /* Advanced power management */
|
||||
/* invariant TSC is CPUID.80000007H:EDX[8] */
|
||||
entry->edx &= (1 << 8);
|
||||
cpuid_entry_override(entry, CPUID_8000_0007_EDX);
|
||||
|
||||
/* mask against host */
|
||||
entry->edx &= boot_cpu_data.x86_power;
|
||||
entry->eax = entry->ebx = entry->ecx = 0;
|
||||
@ -1482,6 +1486,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
|
||||
if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
|
||||
(data & TSX_CTRL_CPUID_CLEAR))
|
||||
*ebx &= ~(F(RTM) | F(HLE));
|
||||
} else if (function == 0x80000007) {
|
||||
if (kvm_hv_invtsc_suppressed(vcpu))
|
||||
*edx &= ~SF(CONSTANT_TSC);
|
||||
}
|
||||
} else {
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
|
@ -999,6 +999,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
|
||||
case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
|
||||
case HV_X64_MSR_TSC_EMULATION_CONTROL:
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
case HV_X64_MSR_TSC_INVARIANT_CONTROL:
|
||||
case HV_X64_MSR_SYNDBG_OPTIONS:
|
||||
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
|
||||
r = true;
|
||||
@ -1283,6 +1284,9 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
return hv_vcpu->cpuid_cache.features_eax &
|
||||
HV_ACCESS_REENLIGHTENMENT;
|
||||
case HV_X64_MSR_TSC_INVARIANT_CONTROL:
|
||||
return hv_vcpu->cpuid_cache.features_eax &
|
||||
HV_ACCESS_TSC_INVARIANT;
|
||||
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
||||
case HV_X64_MSR_CRASH_CTL:
|
||||
return hv_vcpu->cpuid_cache.features_edx &
|
||||
@ -1410,6 +1414,17 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
||||
if (!host)
|
||||
return 1;
|
||||
break;
|
||||
case HV_X64_MSR_TSC_INVARIANT_CONTROL:
|
||||
/* Only bit 0 is supported */
|
||||
if (data & ~HV_EXPOSE_INVARIANT_TSC)
|
||||
return 1;
|
||||
|
||||
/* The feature can't be disabled from the guest */
|
||||
if (!host && hv->hv_invtsc_control && !data)
|
||||
return 1;
|
||||
|
||||
hv->hv_invtsc_control = data;
|
||||
break;
|
||||
case HV_X64_MSR_SYNDBG_OPTIONS:
|
||||
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
|
||||
return syndbg_set_msr(vcpu, msr, data, host);
|
||||
@ -1585,6 +1600,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
data = hv->hv_tsc_emulation_status;
|
||||
break;
|
||||
case HV_X64_MSR_TSC_INVARIANT_CONTROL:
|
||||
data = hv->hv_invtsc_control;
|
||||
break;
|
||||
case HV_X64_MSR_SYNDBG_OPTIONS:
|
||||
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
|
||||
return syndbg_get_msr(vcpu, msr, pdata, host);
|
||||
@ -1769,6 +1787,7 @@ static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_ba
|
||||
}
|
||||
|
||||
struct kvm_hv_hcall {
|
||||
/* Hypercall input data */
|
||||
u64 param;
|
||||
u64 ingpa;
|
||||
u64 outgpa;
|
||||
@ -1779,12 +1798,21 @@ struct kvm_hv_hcall {
|
||||
bool fast;
|
||||
bool rep;
|
||||
sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
|
||||
|
||||
/*
|
||||
* Current read offset when KVM reads hypercall input data gradually,
|
||||
* either offset in bytes from 'ingpa' for regular hypercalls or the
|
||||
* number of already consumed 'XMM halves' for 'fast' hypercalls.
|
||||
*/
|
||||
union {
|
||||
gpa_t data_offset;
|
||||
int consumed_xmm_halves;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
||||
u16 orig_cnt, u16 cnt_cap, u64 *data,
|
||||
int consumed_xmm_halves, gpa_t offset)
|
||||
u16 orig_cnt, u16 cnt_cap, u64 *data)
|
||||
{
|
||||
/*
|
||||
* Preserve the original count when ignoring entries via a "cap", KVM
|
||||
@ -1799,11 +1827,11 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
||||
* Each XMM holds two sparse banks, but do not count halves that
|
||||
* have already been consumed for hypercall parameters.
|
||||
*/
|
||||
if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
|
||||
if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - hc->consumed_xmm_halves)
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
|
||||
for (i = 0; i < cnt; i++) {
|
||||
j = i + consumed_xmm_halves;
|
||||
j = i + hc->consumed_xmm_halves;
|
||||
if (j % 2)
|
||||
data[i] = sse128_hi(hc->xmm[j / 2]);
|
||||
else
|
||||
@ -1812,27 +1840,24 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
||||
return 0;
|
||||
}
|
||||
|
||||
return kvm_read_guest(kvm, hc->ingpa + offset, data,
|
||||
return kvm_read_guest(kvm, hc->ingpa + hc->data_offset, data,
|
||||
cnt * sizeof(*data));
|
||||
}
|
||||
|
||||
static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
||||
u64 *sparse_banks, int consumed_xmm_halves,
|
||||
gpa_t offset)
|
||||
u64 *sparse_banks)
|
||||
{
|
||||
if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS)
|
||||
return -EINVAL;
|
||||
|
||||
/* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
|
||||
return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS,
|
||||
sparse_banks, consumed_xmm_halves, offset);
|
||||
sparse_banks);
|
||||
}
|
||||
|
||||
static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[],
|
||||
int consumed_xmm_halves, gpa_t offset)
|
||||
static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[])
|
||||
{
|
||||
return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt,
|
||||
entries, consumed_xmm_halves, offset);
|
||||
return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, entries);
|
||||
}
|
||||
|
||||
static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu,
|
||||
@ -1926,8 +1951,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
||||
struct kvm_vcpu *v;
|
||||
unsigned long i;
|
||||
bool all_cpus;
|
||||
int consumed_xmm_halves = 0;
|
||||
gpa_t data_offset;
|
||||
|
||||
/*
|
||||
* The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS
|
||||
@ -1955,12 +1978,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
||||
flush.address_space = hc->ingpa;
|
||||
flush.flags = hc->outgpa;
|
||||
flush.processor_mask = sse128_lo(hc->xmm[0]);
|
||||
consumed_xmm_halves = 1;
|
||||
hc->consumed_xmm_halves = 1;
|
||||
} else {
|
||||
if (unlikely(kvm_read_guest(kvm, hc->ingpa,
|
||||
&flush, sizeof(flush))))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
data_offset = sizeof(flush);
|
||||
hc->data_offset = sizeof(flush);
|
||||
}
|
||||
|
||||
trace_kvm_hv_flush_tlb(flush.processor_mask,
|
||||
@ -1985,12 +2008,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
||||
flush_ex.flags = hc->outgpa;
|
||||
memcpy(&flush_ex.hv_vp_set,
|
||||
&hc->xmm[0], sizeof(hc->xmm[0]));
|
||||
consumed_xmm_halves = 2;
|
||||
hc->consumed_xmm_halves = 2;
|
||||
} else {
|
||||
if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
|
||||
sizeof(flush_ex))))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
data_offset = sizeof(flush_ex);
|
||||
hc->data_offset = sizeof(flush_ex);
|
||||
}
|
||||
|
||||
trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
|
||||
@ -2009,8 +2032,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
||||
if (!hc->var_cnt)
|
||||
goto ret_success;
|
||||
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks,
|
||||
consumed_xmm_halves, data_offset))
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
}
|
||||
|
||||
@ -2021,8 +2043,10 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
||||
* consumed_xmm_halves to make sure TLB flush entries are read
|
||||
* from the correct offset.
|
||||
*/
|
||||
data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
|
||||
consumed_xmm_halves += hc->var_cnt;
|
||||
if (hc->fast)
|
||||
hc->consumed_xmm_halves += hc->var_cnt;
|
||||
else
|
||||
hc->data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
|
||||
}
|
||||
|
||||
if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
|
||||
@ -2030,8 +2054,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
||||
hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) {
|
||||
tlb_flush_entries = NULL;
|
||||
} else {
|
||||
if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries,
|
||||
consumed_xmm_halves, data_offset))
|
||||
if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
tlb_flush_entries = __tlb_flush_entries;
|
||||
}
|
||||
@ -2180,9 +2203,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
||||
if (!hc->var_cnt)
|
||||
goto ret_success;
|
||||
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1,
|
||||
offsetof(struct hv_send_ipi_ex,
|
||||
vp_set.bank_contents)))
|
||||
if (!hc->fast)
|
||||
hc->data_offset = offsetof(struct hv_send_ipi_ex,
|
||||
vp_set.bank_contents);
|
||||
else
|
||||
hc->consumed_xmm_halves = 1;
|
||||
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
}
|
||||
|
||||
@ -2724,6 +2751,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||
ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
|
||||
ent->eax |= HV_ACCESS_FREQUENCY_MSRS;
|
||||
ent->eax |= HV_ACCESS_REENLIGHTENMENT;
|
||||
ent->eax |= HV_ACCESS_TSC_INVARIANT;
|
||||
|
||||
ent->ebx |= HV_POST_MESSAGES;
|
||||
ent->ebx |= HV_SIGNAL_EVENTS;
|
||||
|
@ -136,6 +136,33 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
|
||||
HV_SYNIC_STIMER_COUNT);
|
||||
}
|
||||
|
||||
/*
|
||||
* With HV_ACCESS_TSC_INVARIANT feature, invariant TSC (CPUID.80000007H:EDX[8])
|
||||
* is only observed after HV_X64_MSR_TSC_INVARIANT_CONTROL was written to.
|
||||
*/
|
||||
static inline bool kvm_hv_invtsc_suppressed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
/*
|
||||
* If Hyper-V's invariant TSC control is not exposed to the guest,
|
||||
* the invariant TSC CPUID flag is not suppressed, Windows guests were
|
||||
* observed to be able to handle it correctly. Going forward, VMMs are
|
||||
* encouraged to enable Hyper-V's invariant TSC control when invariant
|
||||
* TSC CPUID flag is set to make KVM's behavior match genuine Hyper-V.
|
||||
*/
|
||||
if (!hv_vcpu ||
|
||||
!(hv_vcpu->cpuid_cache.features_eax & HV_ACCESS_TSC_INVARIANT))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If Hyper-V's invariant TSC control is exposed to the guest, KVM is
|
||||
* responsible for suppressing the invariant TSC CPUID flag if the
|
||||
* Hyper-V control is not enabled.
|
||||
*/
|
||||
return !(to_kvm_hv(vcpu->kvm)->hv_invtsc_control & HV_EXPOSE_INVARIANT_TSC);
|
||||
}
|
||||
|
||||
void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
|
@ -426,8 +426,9 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
||||
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
|
||||
|
||||
if (irq.trig_mode &&
|
||||
kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
|
||||
irq.dest_id, irq.dest_mode))
|
||||
(kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
|
||||
irq.dest_id, irq.dest_mode) ||
|
||||
kvm_apic_pending_eoi(vcpu, irq.vector)))
|
||||
__set_bit(irq.vector, ioapic_handled_vectors);
|
||||
}
|
||||
}
|
||||
|
@ -188,11 +188,11 @@ static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu)
|
||||
|
||||
extern struct static_key_false_deferred apic_hw_disabled;
|
||||
|
||||
static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
|
||||
static inline bool kvm_apic_hw_enabled(struct kvm_lapic *apic)
|
||||
{
|
||||
if (static_branch_unlikely(&apic_hw_disabled.key))
|
||||
return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
|
||||
return MSR_IA32_APICBASE_ENABLE;
|
||||
return true;
|
||||
}
|
||||
|
||||
extern struct static_key_false_deferred apic_sw_disabled;
|
||||
|
@ -230,14 +230,14 @@ static inline bool kvm_shadow_root_allocated(struct kvm *kvm)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
|
||||
extern bool tdp_mmu_enabled;
|
||||
#else
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
|
||||
#define tdp_mmu_enabled false
|
||||
#endif
|
||||
|
||||
static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
|
||||
{
|
||||
return !is_tdp_mmu_enabled(kvm) || kvm_shadow_root_allocated(kvm);
|
||||
return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm);
|
||||
}
|
||||
|
||||
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
||||
|
@ -99,6 +99,13 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
|
||||
*/
|
||||
bool tdp_enabled = false;
|
||||
|
||||
bool __ro_after_init tdp_mmu_allowed;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
bool __read_mostly tdp_mmu_enabled = true;
|
||||
module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444);
|
||||
#endif
|
||||
|
||||
static int max_huge_page_level __read_mostly;
|
||||
static int tdp_root_level __read_mostly;
|
||||
static int max_tdp_level __read_mostly;
|
||||
@ -609,9 +616,14 @@ static bool mmu_spte_age(u64 *sptep)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct;
|
||||
}
|
||||
|
||||
static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_tdp_mmu(vcpu->arch.mmu)) {
|
||||
if (is_tdp_mmu_active(vcpu)) {
|
||||
kvm_tdp_mmu_walk_lockless_begin();
|
||||
} else {
|
||||
/*
|
||||
@ -630,7 +642,7 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_tdp_mmu(vcpu->arch.mmu)) {
|
||||
if (is_tdp_mmu_active(vcpu)) {
|
||||
kvm_tdp_mmu_walk_lockless_end();
|
||||
} else {
|
||||
/*
|
||||
@ -1279,7 +1291,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_rmap_head *rmap_head;
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
|
||||
slot->base_gfn + gfn_offset, mask, true);
|
||||
|
||||
@ -1312,7 +1324,7 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_rmap_head *rmap_head;
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
|
||||
slot->base_gfn + gfn_offset, mask, false);
|
||||
|
||||
@ -1395,7 +1407,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
|
||||
}
|
||||
}
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
write_protected |=
|
||||
kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level);
|
||||
|
||||
@ -1558,7 +1570,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
flush = kvm_handle_gfn_range(kvm, range, kvm_zap_rmap);
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
|
||||
|
||||
return flush;
|
||||
@ -1571,7 +1583,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap);
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
|
||||
|
||||
return flush;
|
||||
@ -1646,7 +1658,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap);
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
young |= kvm_tdp_mmu_age_gfn_range(kvm, range);
|
||||
|
||||
return young;
|
||||
@ -1659,7 +1671,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap);
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
young |= kvm_tdp_mmu_test_age_gfn(kvm, range);
|
||||
|
||||
return young;
|
||||
@ -1921,7 +1933,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
return true;
|
||||
|
||||
/* TDP MMU pages do not use the MMU generation. */
|
||||
return !sp->tdp_mmu_page &&
|
||||
return !is_tdp_mmu_page(sp) &&
|
||||
unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
|
||||
}
|
||||
|
||||
@ -2355,7 +2367,16 @@ static void __link_shadow_page(struct kvm *kvm,
|
||||
|
||||
mmu_page_add_parent_pte(cache, sp, sptep);
|
||||
|
||||
if (sp->unsync_children || sp->unsync)
|
||||
/*
|
||||
* The non-direct sub-pagetable must be updated before linking. For
|
||||
* L1 sp, the pagetable is updated via kvm_sync_page() in
|
||||
* kvm_mmu_find_shadow_page() without write-protecting the gfn,
|
||||
* so sp->unsync can be true or false. For higher level non-direct
|
||||
* sp, the pagetable is updated/synced via mmu_sync_children() in
|
||||
* FNAME(fetch)(), so sp->unsync_children can only be false.
|
||||
* WARN_ON_ONCE() if anything happens unexpectedly.
|
||||
*/
|
||||
if (WARN_ON_ONCE(sp->unsync_children) || sp->unsync)
|
||||
mark_unsync(sptep);
|
||||
}
|
||||
|
||||
@ -3116,11 +3137,11 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
|
||||
!is_large_pte(spte) &&
|
||||
spte_to_child_sp(spte)->nx_huge_page_disallowed) {
|
||||
/*
|
||||
* A small SPTE exists for this pfn, but FNAME(fetch)
|
||||
* and __direct_map would like to create a large PTE
|
||||
* instead: just force them to go down another level,
|
||||
* patching back for them into pfn the next 9 bits of
|
||||
* the address.
|
||||
* A small SPTE exists for this pfn, but FNAME(fetch),
|
||||
* direct_map(), or kvm_tdp_mmu_map() would like to create a
|
||||
* large PTE instead: just force them to go down another level,
|
||||
* patching back for them into pfn the next 9 bits of the
|
||||
* address.
|
||||
*/
|
||||
u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) -
|
||||
KVM_PAGES_PER_HPAGE(cur_level - 1);
|
||||
@ -3129,7 +3150,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
|
||||
}
|
||||
}
|
||||
|
||||
static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
struct kvm_mmu_page *sp;
|
||||
@ -3173,14 +3194,16 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
|
||||
static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk);
|
||||
unsigned long hva = gfn_to_hva_memslot(slot, gfn);
|
||||
|
||||
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current);
|
||||
}
|
||||
|
||||
static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
|
||||
static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
if (is_sigpending_pfn(pfn)) {
|
||||
if (is_sigpending_pfn(fault->pfn)) {
|
||||
kvm_handle_signal_exit(vcpu);
|
||||
return -EINTR;
|
||||
}
|
||||
@ -3190,43 +3213,43 @@ static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
|
||||
* into the spte otherwise read access on readonly gfn also can
|
||||
* caused mmio page fault and treat it as mmio access.
|
||||
*/
|
||||
if (pfn == KVM_PFN_ERR_RO_FAULT)
|
||||
if (fault->pfn == KVM_PFN_ERR_RO_FAULT)
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
if (pfn == KVM_PFN_ERR_HWPOISON) {
|
||||
kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
|
||||
if (fault->pfn == KVM_PFN_ERR_HWPOISON) {
|
||||
kvm_send_hwpoison_signal(fault->slot, fault->gfn);
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
unsigned int access)
|
||||
static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault,
|
||||
unsigned int access)
|
||||
{
|
||||
/* The pfn is invalid, report the error! */
|
||||
if (unlikely(is_error_pfn(fault->pfn)))
|
||||
return kvm_handle_error_pfn(vcpu, fault->gfn, fault->pfn);
|
||||
gva_t gva = fault->is_tdp ? 0 : fault->addr;
|
||||
|
||||
if (unlikely(!fault->slot)) {
|
||||
gva_t gva = fault->is_tdp ? 0 : fault->addr;
|
||||
vcpu_cache_mmio_info(vcpu, gva, fault->gfn,
|
||||
access & shadow_mmio_access_mask);
|
||||
|
||||
vcpu_cache_mmio_info(vcpu, gva, fault->gfn,
|
||||
access & shadow_mmio_access_mask);
|
||||
/*
|
||||
* If MMIO caching is disabled, emulate immediately without
|
||||
* touching the shadow page tables as attempting to install an
|
||||
* MMIO SPTE will just be an expensive nop. Do not cache MMIO
|
||||
* whose gfn is greater than host.MAXPHYADDR, any guest that
|
||||
* generates such gfns is running nested and is being tricked
|
||||
* by L0 userspace (you can observe gfn > L1.MAXPHYADDR if
|
||||
* and only if L1's MAXPHYADDR is inaccurate with respect to
|
||||
* the hardware's).
|
||||
*/
|
||||
if (unlikely(!enable_mmio_caching) ||
|
||||
unlikely(fault->gfn > kvm_mmu_max_gfn()))
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
/*
|
||||
* If MMIO caching is disabled, emulate immediately without
|
||||
* touching the shadow page tables as attempting to install an
|
||||
* MMIO SPTE will just be an expensive nop.
|
||||
*/
|
||||
if (unlikely(!enable_mmio_caching))
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
/*
|
||||
* Do not create an MMIO SPTE for a gfn greater than host.MAXPHYADDR,
|
||||
* any guest that generates such gfns is running nested and is being
|
||||
* tricked by L0 userspace (you can observe gfn > L1.MAXPHYADDR if and
|
||||
* only if L1's MAXPHYADDR is inaccurate with respect to the
|
||||
* hardware's).
|
||||
*/
|
||||
if (unlikely(fault->gfn > kvm_mmu_max_gfn()))
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
return RET_PF_CONTINUE;
|
||||
}
|
||||
@ -3350,7 +3373,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
do {
|
||||
u64 new_spte;
|
||||
|
||||
if (is_tdp_mmu(vcpu->arch.mmu))
|
||||
if (tdp_mmu_enabled)
|
||||
sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, fault->addr, &spte);
|
||||
else
|
||||
sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte);
|
||||
@ -3596,7 +3619,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
||||
if (r < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (is_tdp_mmu_enabled(vcpu->kvm)) {
|
||||
if (tdp_mmu_enabled) {
|
||||
root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
|
||||
mmu->root.hpa = root;
|
||||
} else if (shadow_root_level >= PT64_ROOT_4LEVEL) {
|
||||
@ -4026,7 +4049,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
|
||||
|
||||
walk_shadow_page_lockless_begin(vcpu);
|
||||
|
||||
if (is_tdp_mmu(vcpu->arch.mmu))
|
||||
if (is_tdp_mmu_active(vcpu))
|
||||
leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
|
||||
else
|
||||
leaf = get_walk(vcpu, addr, sptes, &root);
|
||||
@ -4174,7 +4197,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
|
||||
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
|
||||
}
|
||||
|
||||
static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_memory_slot *slot = fault->slot;
|
||||
bool async;
|
||||
@ -4235,12 +4258,33 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
return RET_PF_CONTINUE;
|
||||
}
|
||||
|
||||
static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
unsigned int access)
|
||||
{
|
||||
int ret;
|
||||
|
||||
fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
|
||||
smp_rmb();
|
||||
|
||||
ret = __kvm_faultin_pfn(vcpu, fault);
|
||||
if (ret != RET_PF_CONTINUE)
|
||||
return ret;
|
||||
|
||||
if (unlikely(is_error_pfn(fault->pfn)))
|
||||
return kvm_handle_error_pfn(vcpu, fault);
|
||||
|
||||
if (unlikely(!fault->slot))
|
||||
return kvm_handle_noslot_fault(vcpu, fault, access);
|
||||
|
||||
return RET_PF_CONTINUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the page fault is stale and needs to be retried, i.e. if the
|
||||
* root was invalidated by a memslot update or a relevant mmu_notifier fired.
|
||||
*/
|
||||
static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault, int mmu_seq)
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
|
||||
|
||||
@ -4260,19 +4304,13 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
|
||||
return fault->slot &&
|
||||
mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
|
||||
mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva);
|
||||
}
|
||||
|
||||
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
|
||||
|
||||
unsigned long mmu_seq;
|
||||
int r;
|
||||
|
||||
fault->gfn = fault->addr >> PAGE_SHIFT;
|
||||
fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
|
||||
|
||||
if (page_fault_handle_page_track(vcpu, fault))
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
@ -4284,41 +4322,24 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
|
||||
smp_rmb();
|
||||
|
||||
r = kvm_faultin_pfn(vcpu, fault);
|
||||
if (r != RET_PF_CONTINUE)
|
||||
return r;
|
||||
|
||||
r = handle_abnormal_pfn(vcpu, fault, ACC_ALL);
|
||||
r = kvm_faultin_pfn(vcpu, fault, ACC_ALL);
|
||||
if (r != RET_PF_CONTINUE)
|
||||
return r;
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (is_tdp_mmu_fault)
|
||||
read_lock(&vcpu->kvm->mmu_lock);
|
||||
else
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (is_page_fault_stale(vcpu, fault, mmu_seq))
|
||||
if (is_page_fault_stale(vcpu, fault))
|
||||
goto out_unlock;
|
||||
|
||||
if (is_tdp_mmu_fault) {
|
||||
r = kvm_tdp_mmu_map(vcpu, fault);
|
||||
} else {
|
||||
r = make_mmu_pages_available(vcpu);
|
||||
if (r)
|
||||
goto out_unlock;
|
||||
r = __direct_map(vcpu, fault);
|
||||
}
|
||||
r = make_mmu_pages_available(vcpu);
|
||||
if (r)
|
||||
goto out_unlock;
|
||||
|
||||
r = direct_map(vcpu, fault);
|
||||
|
||||
out_unlock:
|
||||
if (is_tdp_mmu_fault)
|
||||
read_unlock(&vcpu->kvm->mmu_lock);
|
||||
else
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(fault->pfn);
|
||||
return r;
|
||||
}
|
||||
@ -4366,6 +4387,42 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (page_fault_handle_page_track(vcpu, fault))
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
r = fast_page_fault(vcpu, fault);
|
||||
if (r != RET_PF_INVALID)
|
||||
return r;
|
||||
|
||||
r = mmu_topup_memory_caches(vcpu, false);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = kvm_faultin_pfn(vcpu, fault, ACC_ALL);
|
||||
if (r != RET_PF_CONTINUE)
|
||||
return r;
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
read_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (is_page_fault_stale(vcpu, fault))
|
||||
goto out_unlock;
|
||||
|
||||
r = kvm_tdp_mmu_map(vcpu, fault);
|
||||
|
||||
out_unlock:
|
||||
read_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(fault->pfn);
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
/*
|
||||
@ -4383,13 +4440,18 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
|
||||
for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
|
||||
int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
|
||||
gfn_t base = (fault->addr >> PAGE_SHIFT) & ~(page_num - 1);
|
||||
gfn_t base = fault->gfn & ~(page_num - 1);
|
||||
|
||||
if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (tdp_mmu_enabled)
|
||||
return kvm_tdp_mmu_page_fault(vcpu, fault);
|
||||
#endif
|
||||
|
||||
return direct_page_fault(vcpu, fault);
|
||||
}
|
||||
|
||||
@ -5719,6 +5781,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
|
||||
tdp_root_level = tdp_forced_root_level;
|
||||
max_tdp_level = tdp_max_root_level;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
tdp_mmu_enabled = tdp_mmu_allowed && tdp_enabled;
|
||||
#endif
|
||||
/*
|
||||
* max_huge_page_level reflects KVM's MMU capabilities irrespective
|
||||
* of kernel support, e.g. KVM may be capable of using 1GB pages when
|
||||
@ -5966,7 +6031,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
|
||||
* write and in the same critical section as making the reload request,
|
||||
* e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield.
|
||||
*/
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_tdp_mmu_invalidate_all_roots(kvm);
|
||||
|
||||
/*
|
||||
@ -5991,7 +6056,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
|
||||
* Deferring the zap until the final reference to the root is put would
|
||||
* lead to use-after-free.
|
||||
*/
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_tdp_mmu_zap_invalidated_roots(kvm);
|
||||
}
|
||||
|
||||
@ -6017,9 +6082,11 @@ int kvm_mmu_init_vm(struct kvm *kvm)
|
||||
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
|
||||
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
|
||||
|
||||
r = kvm_mmu_init_tdp_mmu(kvm);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (tdp_mmu_enabled) {
|
||||
r = kvm_mmu_init_tdp_mmu(kvm);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
node->track_write = kvm_mmu_pte_write;
|
||||
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
|
||||
@ -6049,7 +6116,8 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
|
||||
|
||||
kvm_page_track_unregister_notifier(kvm, node);
|
||||
|
||||
kvm_mmu_uninit_tdp_mmu(kvm);
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_mmu_uninit_tdp_mmu(kvm);
|
||||
|
||||
mmu_free_vm_memory_caches(kvm);
|
||||
}
|
||||
@ -6103,7 +6171,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
|
||||
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
if (tdp_mmu_enabled) {
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start,
|
||||
gfn_end, true, flush);
|
||||
@ -6136,7 +6204,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
if (tdp_mmu_enabled) {
|
||||
read_lock(&kvm->mmu_lock);
|
||||
kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
@ -6379,7 +6447,7 @@ void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
|
||||
u64 start, u64 end,
|
||||
int target_level)
|
||||
{
|
||||
if (!is_tdp_mmu_enabled(kvm))
|
||||
if (!tdp_mmu_enabled)
|
||||
return;
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
@ -6400,7 +6468,7 @@ void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
|
||||
u64 start = memslot->base_gfn;
|
||||
u64 end = start + memslot->npages;
|
||||
|
||||
if (!is_tdp_mmu_enabled(kvm))
|
||||
if (!tdp_mmu_enabled)
|
||||
return;
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm)) {
|
||||
@ -6483,7 +6551,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
if (tdp_mmu_enabled) {
|
||||
read_lock(&kvm->mmu_lock);
|
||||
kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
@ -6518,7 +6586,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
if (tdp_mmu_enabled) {
|
||||
read_lock(&kvm->mmu_lock);
|
||||
kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
@ -6553,7 +6621,7 @@ restart:
|
||||
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_tdp_mmu_zap_all(kvm);
|
||||
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
@ -6718,6 +6786,13 @@ void __init kvm_mmu_x86_module_init(void)
|
||||
if (nx_huge_pages == -1)
|
||||
__set_nx_huge_pages(get_nx_auto_mode());
|
||||
|
||||
/*
|
||||
* Snapshot userspace's desire to enable the TDP MMU. Whether or not the
|
||||
* TDP MMU is actually enabled is determined in kvm_configure_mmu()
|
||||
* when the vendor module is loaded.
|
||||
*/
|
||||
tdp_mmu_allowed = tdp_mmu_enabled;
|
||||
|
||||
kvm_mmu_spte_module_init();
|
||||
}
|
||||
|
||||
|
@ -199,7 +199,7 @@ struct kvm_page_fault {
|
||||
|
||||
/*
|
||||
* Maximum page size that can be created for this fault; input to
|
||||
* FNAME(fetch), __direct_map and kvm_tdp_mmu_map.
|
||||
* FNAME(fetch), direct_map() and kvm_tdp_mmu_map().
|
||||
*/
|
||||
u8 max_level;
|
||||
|
||||
@ -222,6 +222,7 @@ struct kvm_page_fault {
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
/* Outputs of kvm_faultin_pfn. */
|
||||
unsigned long mmu_seq;
|
||||
kvm_pfn_t pfn;
|
||||
hva_t hva;
|
||||
bool map_writable;
|
||||
@ -279,6 +280,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
};
|
||||
int r;
|
||||
|
||||
if (vcpu->arch.mmu->root_role.direct) {
|
||||
fault.gfn = fault.addr >> PAGE_SHIFT;
|
||||
fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
|
||||
}
|
||||
|
||||
/*
|
||||
* Async #PF "faults", a.k.a. prefetch faults, are not faults from the
|
||||
* guest perspective and have already been counted at the time of the
|
||||
|
@ -791,7 +791,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
{
|
||||
struct guest_walker walker;
|
||||
int r;
|
||||
unsigned long mmu_seq;
|
||||
bool is_self_change_mapping;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
|
||||
@ -838,14 +837,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
else
|
||||
fault->max_level = walker.level;
|
||||
|
||||
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
|
||||
smp_rmb();
|
||||
|
||||
r = kvm_faultin_pfn(vcpu, fault);
|
||||
if (r != RET_PF_CONTINUE)
|
||||
return r;
|
||||
|
||||
r = handle_abnormal_pfn(vcpu, fault, walker.pte_access);
|
||||
r = kvm_faultin_pfn(vcpu, fault, walker.pte_access);
|
||||
if (r != RET_PF_CONTINUE)
|
||||
return r;
|
||||
|
||||
@ -871,7 +863,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
r = RET_PF_RETRY;
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (is_page_fault_stale(vcpu, fault, mmu_seq))
|
||||
if (is_page_fault_stale(vcpu, fault))
|
||||
goto out_unlock;
|
||||
|
||||
r = make_mmu_pages_available(vcpu);
|
||||
|
@ -363,7 +363,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
|
||||
* A shadow-present leaf SPTE may be non-writable for 4 possible reasons:
|
||||
*
|
||||
* 1. To intercept writes for dirty logging. KVM write-protects huge pages
|
||||
* so that they can be split be split down into the dirty logging
|
||||
* so that they can be split down into the dirty logging
|
||||
* granularity (4KiB) whenever the guest writes to them. KVM also
|
||||
* write-protects 4KiB pages so that writes can be recorded in the dirty log
|
||||
* (e.g. if not using PML). SPTEs are write-protected for dirty logging
|
||||
|
@ -10,23 +10,15 @@
|
||||
#include <asm/cmpxchg.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
static bool __read_mostly tdp_mmu_enabled = true;
|
||||
module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
|
||||
|
||||
/* Initializes the TDP MMU for the VM, if enabled. */
|
||||
int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
|
||||
{
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
|
||||
return 0;
|
||||
|
||||
wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0);
|
||||
if (!wq)
|
||||
return -ENOMEM;
|
||||
|
||||
/* This should not be changed for the lifetime of the VM. */
|
||||
kvm->arch.tdp_mmu_enabled = true;
|
||||
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
|
||||
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
|
||||
kvm->arch.tdp_mmu_zap_wq = wq;
|
||||
@ -47,9 +39,6 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm,
|
||||
|
||||
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
|
||||
{
|
||||
if (!kvm->arch.tdp_mmu_enabled)
|
||||
return;
|
||||
|
||||
/* Also waits for any queued work items. */
|
||||
destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
|
||||
|
||||
@ -144,7 +133,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
|
||||
return;
|
||||
|
||||
WARN_ON(!root->tdp_mmu_page);
|
||||
WARN_ON(!is_tdp_mmu_page(root));
|
||||
|
||||
/*
|
||||
* The root now has refcount=0. It is valid, but readers already
|
||||
@ -1074,7 +1063,9 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
|
||||
int ret = RET_PF_FIXED;
|
||||
bool wrprot = false;
|
||||
|
||||
WARN_ON(sp->role.level != fault->goal_level);
|
||||
if (WARN_ON_ONCE(sp->role.level != fault->goal_level))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
if (unlikely(!fault->slot))
|
||||
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
||||
else
|
||||
@ -1173,9 +1164,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
if (fault->nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
|
||||
|
||||
if (iter.level == fault->goal_level)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If SPTE has been frozen by another thread, just give up and
|
||||
* retry, avoiding unnecessary page table allocation and free.
|
||||
@ -1183,6 +1171,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
if (is_removed_spte(iter.old_spte))
|
||||
goto retry;
|
||||
|
||||
if (iter.level == fault->goal_level)
|
||||
goto map_target_level;
|
||||
|
||||
/* Step down into the lower level page table if it exists. */
|
||||
if (is_shadow_present_pte(iter.old_spte) &&
|
||||
!is_large_pte(iter.old_spte))
|
||||
@ -1203,8 +1194,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
r = tdp_mmu_link_sp(kvm, &iter, sp, true);
|
||||
|
||||
/*
|
||||
* Also force the guest to retry the access if the upper level SPTEs
|
||||
* aren't in place.
|
||||
* Force the guest to retry if installing an upper level SPTE
|
||||
* failed, e.g. because a different task modified the SPTE.
|
||||
*/
|
||||
if (r) {
|
||||
tdp_mmu_free_sp(sp);
|
||||
@ -1214,11 +1205,20 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
if (fault->huge_page_disallowed &&
|
||||
fault->req_level >= iter.level) {
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
track_possible_nx_huge_page(kvm, sp);
|
||||
if (sp->nx_huge_page_disallowed)
|
||||
track_possible_nx_huge_page(kvm, sp);
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The walk aborted before reaching the target level, e.g. because the
|
||||
* iterator detected an upper level SPTE was frozen during traversal.
|
||||
*/
|
||||
WARN_ON_ONCE(iter.level == fault->goal_level);
|
||||
goto retry;
|
||||
|
||||
map_target_level:
|
||||
ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);
|
||||
|
||||
retry:
|
||||
|
@ -7,6 +7,9 @@
|
||||
|
||||
#include "spte.h"
|
||||
|
||||
int kvm_mmu_init_tdp_mmu(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
|
||||
|
||||
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
|
||||
|
||||
__must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
|
||||
@ -68,31 +71,9 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
|
||||
u64 *spte);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
int kvm_mmu_init_tdp_mmu(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
|
||||
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
|
||||
|
||||
static inline bool is_tdp_mmu(struct kvm_mmu *mmu)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
hpa_t hpa = mmu->root.hpa;
|
||||
|
||||
if (WARN_ON(!VALID_PAGE(hpa)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* A NULL shadow page is legal when shadowing a non-paging guest with
|
||||
* PAE paging, as the MMU will be direct with root_hpa pointing at the
|
||||
* pae_root page, not a shadow page.
|
||||
*/
|
||||
sp = to_shadow_page(hpa);
|
||||
return sp && is_tdp_mmu_page(sp) && sp->root_count;
|
||||
}
|
||||
#else
|
||||
static inline int kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return 0; }
|
||||
static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
|
||||
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
|
||||
static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; }
|
||||
#endif
|
||||
|
||||
#endif /* __KVM_X86_MMU_TDP_MMU_H */
|
||||
|
@ -238,7 +238,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
||||
return false;
|
||||
|
||||
/* recalibrate sample period and check if it's accepted by perf core */
|
||||
if (perf_event_period(pmc->perf_event,
|
||||
if (is_sampling_event(pmc->perf_event) &&
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, pmc->counter)))
|
||||
return false;
|
||||
|
||||
|
@ -140,7 +140,8 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
|
||||
|
||||
static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (!pmc->perf_event || pmc->is_paused)
|
||||
if (!pmc->perf_event || pmc->is_paused ||
|
||||
!is_sampling_event(pmc->perf_event))
|
||||
return;
|
||||
|
||||
perf_event_period(pmc->perf_event,
|
||||
|
@ -14,6 +14,7 @@
|
||||
enum kvm_only_cpuid_leafs {
|
||||
CPUID_12_EAX = NCAPINTS,
|
||||
CPUID_7_1_EDX,
|
||||
CPUID_8000_0007_EDX,
|
||||
NR_KVM_CPU_CAPS,
|
||||
|
||||
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
|
||||
@ -43,6 +44,9 @@ enum kvm_only_cpuid_leafs {
|
||||
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
|
||||
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
|
||||
|
||||
/* CPUID level 0x80000007 (EDX). */
|
||||
#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
|
||||
|
||||
struct cpuid_reg {
|
||||
u32 function;
|
||||
u32 index;
|
||||
@ -68,6 +72,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
|
||||
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
|
||||
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
|
||||
[CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
|
||||
[CPUID_8000_0007_EDX] = {0x80000007, 0, CPUID_EDX},
|
||||
};
|
||||
|
||||
/*
|
||||
@ -100,6 +105,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
|
||||
return KVM_X86_FEATURE_SGX2;
|
||||
else if (x86_feature == X86_FEATURE_SGX_EDECCSSA)
|
||||
return KVM_X86_FEATURE_SGX_EDECCSSA;
|
||||
else if (x86_feature == X86_FEATURE_CONSTANT_TSC)
|
||||
return KVM_X86_FEATURE_CONSTANT_TSC;
|
||||
|
||||
return x86_feature;
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#define pr_fmt(fmt) "kvm/hyper-v: " fmt
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
@ -361,35 +363,43 @@ enum evmcs_revision {
|
||||
enum evmcs_ctrl_type {
|
||||
EVMCS_EXIT_CTRLS,
|
||||
EVMCS_ENTRY_CTRLS,
|
||||
EVMCS_EXEC_CTRL,
|
||||
EVMCS_2NDEXEC,
|
||||
EVMCS_3RDEXEC,
|
||||
EVMCS_PINCTRL,
|
||||
EVMCS_VMFUNC,
|
||||
NR_EVMCS_CTRLS,
|
||||
};
|
||||
|
||||
static const u32 evmcs_unsupported_ctrls[NR_EVMCS_CTRLS][NR_EVMCS_REVISIONS] = {
|
||||
static const u32 evmcs_supported_ctrls[NR_EVMCS_CTRLS][NR_EVMCS_REVISIONS] = {
|
||||
[EVMCS_EXIT_CTRLS] = {
|
||||
[EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL,
|
||||
[EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMEXIT_CTRL,
|
||||
},
|
||||
[EVMCS_ENTRY_CTRLS] = {
|
||||
[EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL,
|
||||
[EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMENTRY_CTRL,
|
||||
},
|
||||
[EVMCS_EXEC_CTRL] = {
|
||||
[EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_EXEC_CTRL,
|
||||
},
|
||||
[EVMCS_2NDEXEC] = {
|
||||
[EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_2NDEXEC,
|
||||
[EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_2NDEXEC & ~SECONDARY_EXEC_TSC_SCALING,
|
||||
},
|
||||
[EVMCS_3RDEXEC] = {
|
||||
[EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_3RDEXEC,
|
||||
},
|
||||
[EVMCS_PINCTRL] = {
|
||||
[EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_PINCTRL,
|
||||
[EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_PINCTRL,
|
||||
},
|
||||
[EVMCS_VMFUNC] = {
|
||||
[EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMFUNC,
|
||||
[EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMFUNC,
|
||||
},
|
||||
};
|
||||
|
||||
static u32 evmcs_get_unsupported_ctls(enum evmcs_ctrl_type ctrl_type)
|
||||
static u32 evmcs_get_supported_ctls(enum evmcs_ctrl_type ctrl_type)
|
||||
{
|
||||
enum evmcs_revision evmcs_rev = EVMCSv1_LEGACY;
|
||||
|
||||
return evmcs_unsupported_ctrls[ctrl_type][evmcs_rev];
|
||||
return evmcs_supported_ctrls[ctrl_type][evmcs_rev];
|
||||
}
|
||||
|
||||
static bool evmcs_has_perf_global_ctrl(struct kvm_vcpu *vcpu)
|
||||
@ -413,7 +423,7 @@ void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *
|
||||
{
|
||||
u32 ctl_low = (u32)*pdata;
|
||||
u32 ctl_high = (u32)(*pdata >> 32);
|
||||
u32 unsupported_ctrls;
|
||||
u32 supported_ctrls;
|
||||
|
||||
/*
|
||||
* Hyper-V 2016 and 2019 try using these features even when eVMCS
|
||||
@ -422,27 +432,31 @@ void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *
|
||||
switch (msr_index) {
|
||||
case MSR_IA32_VMX_EXIT_CTLS:
|
||||
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
||||
unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_EXIT_CTRLS);
|
||||
supported_ctrls = evmcs_get_supported_ctls(EVMCS_EXIT_CTRLS);
|
||||
if (!evmcs_has_perf_global_ctrl(vcpu))
|
||||
unsupported_ctrls |= VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
ctl_high &= ~unsupported_ctrls;
|
||||
supported_ctrls &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
ctl_high &= supported_ctrls;
|
||||
break;
|
||||
case MSR_IA32_VMX_ENTRY_CTLS:
|
||||
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
||||
unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_ENTRY_CTRLS);
|
||||
supported_ctrls = evmcs_get_supported_ctls(EVMCS_ENTRY_CTRLS);
|
||||
if (!evmcs_has_perf_global_ctrl(vcpu))
|
||||
unsupported_ctrls |= VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
ctl_high &= ~unsupported_ctrls;
|
||||
supported_ctrls &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
ctl_high &= supported_ctrls;
|
||||
break;
|
||||
case MSR_IA32_VMX_PROCBASED_CTLS:
|
||||
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
|
||||
ctl_high &= evmcs_get_supported_ctls(EVMCS_EXEC_CTRL);
|
||||
break;
|
||||
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
||||
ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_2NDEXEC);
|
||||
ctl_high &= evmcs_get_supported_ctls(EVMCS_2NDEXEC);
|
||||
break;
|
||||
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
||||
case MSR_IA32_VMX_PINBASED_CTLS:
|
||||
ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_PINCTRL);
|
||||
ctl_high &= evmcs_get_supported_ctls(EVMCS_PINCTRL);
|
||||
break;
|
||||
case MSR_IA32_VMX_VMFUNC:
|
||||
ctl_low &= ~evmcs_get_unsupported_ctls(EVMCS_VMFUNC);
|
||||
ctl_low &= evmcs_get_supported_ctls(EVMCS_VMFUNC);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -452,7 +466,7 @@ void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *
|
||||
static bool nested_evmcs_is_valid_controls(enum evmcs_ctrl_type ctrl_type,
|
||||
u32 val)
|
||||
{
|
||||
return !(val & evmcs_get_unsupported_ctls(ctrl_type));
|
||||
return !(val & ~evmcs_get_supported_ctls(ctrl_type));
|
||||
}
|
||||
|
||||
int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
|
||||
@ -461,6 +475,10 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
|
||||
vmcs12->pin_based_vm_exec_control)))
|
||||
return -EINVAL;
|
||||
|
||||
if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXEC_CTRL,
|
||||
vmcs12->cpu_based_vm_exec_control)))
|
||||
return -EINVAL;
|
||||
|
||||
if (CC(!nested_evmcs_is_valid_controls(EVMCS_2NDEXEC,
|
||||
vmcs12->secondary_vm_exec_control)))
|
||||
return -EINVAL;
|
||||
@ -488,6 +506,38 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
/*
|
||||
* KVM on Hyper-V always uses the latest known eVMCSv1 revision, the assumption
|
||||
* is: in case a feature has corresponding fields in eVMCS described and it was
|
||||
* exposed in VMX feature MSRs, KVM is free to use it. Warn if KVM meets a
|
||||
* feature which has no corresponding eVMCS field, this likely means that KVM
|
||||
* needs to be updated.
|
||||
*/
|
||||
#define evmcs_check_vmcs_conf(field, ctrl) \
|
||||
do { \
|
||||
typeof(vmcs_conf->field) unsupported; \
|
||||
\
|
||||
unsupported = vmcs_conf->field & ~EVMCS1_SUPPORTED_ ## ctrl; \
|
||||
if (unsupported) { \
|
||||
pr_warn_once(#field " unsupported with eVMCS: 0x%llx\n",\
|
||||
(u64)unsupported); \
|
||||
vmcs_conf->field &= EVMCS1_SUPPORTED_ ## ctrl; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
|
||||
{
|
||||
evmcs_check_vmcs_conf(cpu_based_exec_ctrl, EXEC_CTRL);
|
||||
evmcs_check_vmcs_conf(pin_based_exec_ctrl, PINCTRL);
|
||||
evmcs_check_vmcs_conf(cpu_based_2nd_exec_ctrl, 2NDEXEC);
|
||||
evmcs_check_vmcs_conf(cpu_based_3rd_exec_ctrl, 3RDEXEC);
|
||||
evmcs_check_vmcs_conf(vmentry_ctrl, VMENTRY_CTRL);
|
||||
evmcs_check_vmcs_conf(vmexit_ctrl, VMEXIT_CTRL);
|
||||
}
|
||||
#endif
|
||||
|
||||
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version)
|
||||
{
|
||||
|
@ -48,22 +48,84 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
|
||||
* Currently unsupported in KVM:
|
||||
* GUEST_IA32_RTIT_CTL = 0x00002814,
|
||||
*/
|
||||
#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER)
|
||||
#define EVMCS1_UNSUPPORTED_EXEC_CTRL (CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
|
||||
#define EVMCS1_UNSUPPORTED_2NDEXEC \
|
||||
(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT | \
|
||||
SECONDARY_EXEC_ENABLE_PML | \
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC | \
|
||||
SECONDARY_EXEC_SHADOW_VMCS | \
|
||||
#define EVMCS1_SUPPORTED_PINCTRL \
|
||||
(PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
|
||||
PIN_BASED_EXT_INTR_MASK | \
|
||||
PIN_BASED_NMI_EXITING | \
|
||||
PIN_BASED_VIRTUAL_NMIS)
|
||||
|
||||
#define EVMCS1_SUPPORTED_EXEC_CTRL \
|
||||
(CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
|
||||
CPU_BASED_HLT_EXITING | \
|
||||
CPU_BASED_CR3_LOAD_EXITING | \
|
||||
CPU_BASED_CR3_STORE_EXITING | \
|
||||
CPU_BASED_UNCOND_IO_EXITING | \
|
||||
CPU_BASED_MOV_DR_EXITING | \
|
||||
CPU_BASED_USE_TSC_OFFSETTING | \
|
||||
CPU_BASED_MWAIT_EXITING | \
|
||||
CPU_BASED_MONITOR_EXITING | \
|
||||
CPU_BASED_INVLPG_EXITING | \
|
||||
CPU_BASED_RDPMC_EXITING | \
|
||||
CPU_BASED_INTR_WINDOW_EXITING | \
|
||||
CPU_BASED_CR8_LOAD_EXITING | \
|
||||
CPU_BASED_CR8_STORE_EXITING | \
|
||||
CPU_BASED_RDTSC_EXITING | \
|
||||
CPU_BASED_TPR_SHADOW | \
|
||||
CPU_BASED_USE_IO_BITMAPS | \
|
||||
CPU_BASED_MONITOR_TRAP_FLAG | \
|
||||
CPU_BASED_USE_MSR_BITMAPS | \
|
||||
CPU_BASED_NMI_WINDOW_EXITING | \
|
||||
CPU_BASED_PAUSE_EXITING | \
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
|
||||
|
||||
#define EVMCS1_SUPPORTED_2NDEXEC \
|
||||
(SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \
|
||||
SECONDARY_EXEC_WBINVD_EXITING | \
|
||||
SECONDARY_EXEC_ENABLE_VPID | \
|
||||
SECONDARY_EXEC_ENABLE_EPT | \
|
||||
SECONDARY_EXEC_UNRESTRICTED_GUEST | \
|
||||
SECONDARY_EXEC_DESC | \
|
||||
SECONDARY_EXEC_ENABLE_RDTSCP | \
|
||||
SECONDARY_EXEC_ENABLE_INVPCID | \
|
||||
SECONDARY_EXEC_XSAVES | \
|
||||
SECONDARY_EXEC_RDSEED_EXITING | \
|
||||
SECONDARY_EXEC_RDRAND_EXITING | \
|
||||
SECONDARY_EXEC_TSC_SCALING | \
|
||||
SECONDARY_EXEC_PAUSE_LOOP_EXITING)
|
||||
#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \
|
||||
(VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
|
||||
#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (0)
|
||||
#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
|
||||
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \
|
||||
SECONDARY_EXEC_PT_USE_GPA | \
|
||||
SECONDARY_EXEC_PT_CONCEAL_VMX | \
|
||||
SECONDARY_EXEC_BUS_LOCK_DETECTION | \
|
||||
SECONDARY_EXEC_NOTIFY_VM_EXITING | \
|
||||
SECONDARY_EXEC_ENCLS_EXITING)
|
||||
|
||||
#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
|
||||
|
||||
#define EVMCS1_SUPPORTED_VMEXIT_CTRL \
|
||||
(VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | \
|
||||
VM_EXIT_SAVE_DEBUG_CONTROLS | \
|
||||
VM_EXIT_ACK_INTR_ON_EXIT | \
|
||||
VM_EXIT_HOST_ADDR_SPACE_SIZE | \
|
||||
VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
|
||||
VM_EXIT_SAVE_IA32_PAT | \
|
||||
VM_EXIT_LOAD_IA32_PAT | \
|
||||
VM_EXIT_SAVE_IA32_EFER | \
|
||||
VM_EXIT_LOAD_IA32_EFER | \
|
||||
VM_EXIT_CLEAR_BNDCFGS | \
|
||||
VM_EXIT_PT_CONCEAL_PIP | \
|
||||
VM_EXIT_CLEAR_IA32_RTIT_CTL)
|
||||
|
||||
#define EVMCS1_SUPPORTED_VMENTRY_CTRL \
|
||||
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | \
|
||||
VM_ENTRY_LOAD_DEBUG_CONTROLS | \
|
||||
VM_ENTRY_IA32E_MODE | \
|
||||
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \
|
||||
VM_ENTRY_LOAD_IA32_PAT | \
|
||||
VM_ENTRY_LOAD_IA32_EFER | \
|
||||
VM_ENTRY_LOAD_BNDCFGS | \
|
||||
VM_ENTRY_PT_CONCEAL_PIP | \
|
||||
VM_ENTRY_LOAD_IA32_RTIT_CTL)
|
||||
|
||||
#define EVMCS1_SUPPORTED_VMFUNC (0)
|
||||
|
||||
struct evmcs_field {
|
||||
u16 offset;
|
||||
@ -211,6 +273,7 @@ static inline void evmcs_load(u64 phys_addr)
|
||||
vp_ap->enlighten_vmentry = 1;
|
||||
}
|
||||
|
||||
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
|
||||
#else /* !IS_ENABLED(CONFIG_HYPERV) */
|
||||
static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
|
||||
static inline void evmcs_write32(unsigned long field, u32 value) {}
|
||||
|
@ -5296,10 +5296,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
|
||||
if (vmptr == vmx->nested.current_vmptr)
|
||||
nested_release_vmcs12(vcpu);
|
||||
|
||||
kvm_vcpu_write_guest(vcpu,
|
||||
vmptr + offsetof(struct vmcs12,
|
||||
launch_state),
|
||||
&zero, sizeof(zero));
|
||||
/*
|
||||
* Silently ignore memory errors on VMCLEAR, Intel's pseudocode
|
||||
* for VMCLEAR includes a "ensure that data for VMCS referenced
|
||||
* by the operand is in memory" clause that guards writes to
|
||||
* memory, i.e. doing nothing for I/O is architecturally valid.
|
||||
*
|
||||
* FIXME: Suppress failures if and only if no memslot is found,
|
||||
* i.e. exit to userspace if __copy_to_user() fails.
|
||||
*/
|
||||
(void)kvm_vcpu_write_guest(vcpu,
|
||||
vmptr + offsetof(struct vmcs12,
|
||||
launch_state),
|
||||
&zero, sizeof(zero));
|
||||
} else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
|
||||
nested_release_evmcs(vcpu);
|
||||
}
|
||||
@ -6873,7 +6882,8 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
|
||||
SECONDARY_EXEC_ENABLE_INVPCID |
|
||||
SECONDARY_EXEC_RDSEED_EXITING |
|
||||
SECONDARY_EXEC_XSAVES |
|
||||
SECONDARY_EXEC_TSC_SCALING;
|
||||
SECONDARY_EXEC_TSC_SCALING |
|
||||
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||
|
||||
/*
|
||||
* We can emulate "VMCS shadowing," even if the hardware
|
||||
|
@ -2752,6 +2752,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
vmcs_conf->vmentry_ctrl = _vmentry_control;
|
||||
vmcs_conf->misc = misc_msr;
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
if (enlightened_vmcs)
|
||||
evmcs_sanitize_exec_ctrls(vmcs_conf);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4459,6 +4464,13 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
|
||||
* controls for features that are/aren't exposed to the guest.
|
||||
*/
|
||||
if (nested) {
|
||||
/*
|
||||
* All features that can be added or removed to VMX MSRs must
|
||||
* be supported in the first place for nested virtualization.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!(vmcs_config.nested.secondary_ctls_high & control)))
|
||||
enabled = false;
|
||||
|
||||
if (enabled)
|
||||
vmx->nested.msrs.secondary_ctls_high |= control;
|
||||
else
|
||||
|
@ -1480,7 +1480,7 @@ static const u32 emulated_msrs_all[] = {
|
||||
HV_X64_MSR_STIMER0_CONFIG,
|
||||
HV_X64_MSR_VP_ASSIST_PAGE,
|
||||
HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
|
||||
HV_X64_MSR_TSC_EMULATION_STATUS,
|
||||
HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL,
|
||||
HV_X64_MSR_SYNDBG_OPTIONS,
|
||||
HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
|
||||
HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
|
||||
@ -3821,6 +3821,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
|
||||
case HV_X64_MSR_TSC_EMULATION_CONTROL:
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
case HV_X64_MSR_TSC_INVARIANT_CONTROL:
|
||||
return kvm_hv_set_msr_common(vcpu, msr, data,
|
||||
msr_info->host_initiated);
|
||||
case MSR_IA32_BBL_CR_CTL3:
|
||||
@ -4191,6 +4192,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
|
||||
case HV_X64_MSR_TSC_EMULATION_CONTROL:
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
case HV_X64_MSR_TSC_INVARIANT_CONTROL:
|
||||
return kvm_hv_get_msr_common(vcpu,
|
||||
msr_info->index, &msr_info->data,
|
||||
msr_info->host_initiated);
|
||||
@ -13132,6 +13134,9 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
|
||||
struct x86_exception *e)
|
||||
{
|
||||
if (r == X86EMUL_PROPAGATE_FAULT) {
|
||||
if (KVM_BUG_ON(!e, vcpu->kvm))
|
||||
return -EIO;
|
||||
|
||||
kvm_inject_emulated_page_fault(vcpu, e);
|
||||
return 1;
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
|
||||
int ret = 0;
|
||||
int idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
if (gfn == GPA_INVALID) {
|
||||
if (gfn == KVM_XEN_INVALID_GFN) {
|
||||
kvm_gpc_deactivate(gpc);
|
||||
goto out;
|
||||
}
|
||||
@ -659,7 +659,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
|
||||
if (kvm->arch.xen.shinfo_cache.active)
|
||||
data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
|
||||
else
|
||||
data->u.shared_info.gfn = GPA_INVALID;
|
||||
data->u.shared_info.gfn = KVM_XEN_INVALID_GFN;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
@ -705,7 +705,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
|
||||
offsetof(struct compat_vcpu_info, time));
|
||||
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
if (data->u.gpa == KVM_XEN_INVALID_GPA) {
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
|
||||
r = 0;
|
||||
break;
|
||||
@ -719,7 +719,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
if (data->u.gpa == KVM_XEN_INVALID_GPA) {
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
|
||||
r = 0;
|
||||
break;
|
||||
@ -739,7 +739,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
if (data->u.gpa == KVM_XEN_INVALID_GPA) {
|
||||
r = 0;
|
||||
deactivate_out:
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
|
||||
@ -937,7 +937,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
if (vcpu->arch.xen.vcpu_info_cache.active)
|
||||
data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
|
||||
else
|
||||
data->u.gpa = GPA_INVALID;
|
||||
data->u.gpa = KVM_XEN_INVALID_GPA;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
@ -945,7 +945,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
if (vcpu->arch.xen.vcpu_time_info_cache.active)
|
||||
data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
|
||||
else
|
||||
data->u.gpa = GPA_INVALID;
|
||||
data->u.gpa = KVM_XEN_INVALID_GPA;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
@ -1069,6 +1069,7 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
|
||||
u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
|
||||
: kvm->arch.xen_hvm_config.blob_size_32;
|
||||
u8 *page;
|
||||
int ret;
|
||||
|
||||
if (page_num >= blob_size)
|
||||
return 1;
|
||||
@ -1079,10 +1080,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
|
||||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
|
||||
if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
|
||||
kfree(page);
|
||||
ret = kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE);
|
||||
kfree(page);
|
||||
if (ret)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1183,30 +1184,22 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
|
||||
static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
|
||||
u64 param, u64 *r)
|
||||
{
|
||||
int idx, i;
|
||||
struct sched_poll sched_poll;
|
||||
evtchn_port_t port, *ports;
|
||||
gpa_t gpa;
|
||||
struct x86_exception e;
|
||||
int i;
|
||||
|
||||
if (!lapic_in_kernel(vcpu) ||
|
||||
!(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
|
||||
return false;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
if (!gpa) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT) && !longmode) {
|
||||
struct compat_sched_poll sp32;
|
||||
|
||||
/* Sanity check that the compat struct definition is correct */
|
||||
BUILD_BUG_ON(sizeof(sp32) != 16);
|
||||
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) {
|
||||
if (kvm_read_guest_virt(vcpu, param, &sp32, sizeof(sp32), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
@ -1220,8 +1213,8 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
|
||||
sched_poll.nr_ports = sp32.nr_ports;
|
||||
sched_poll.timeout = sp32.timeout;
|
||||
} else {
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
|
||||
sizeof(sched_poll))) {
|
||||
if (kvm_read_guest_virt(vcpu, param, &sched_poll,
|
||||
sizeof(sched_poll), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
@ -1243,18 +1236,13 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
|
||||
} else
|
||||
ports = &port;
|
||||
|
||||
for (i = 0; i < sched_poll.nr_ports; i++) {
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu,
|
||||
(gva_t)(sched_poll.ports + i),
|
||||
NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports,
|
||||
sched_poll.nr_ports * sizeof(*ports), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
|
||||
&ports[i], sizeof(port))) {
|
||||
*r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
for (i = 0; i < sched_poll.nr_ports; i++) {
|
||||
if (ports[i] >= max_evtchn_port(vcpu->kvm)) {
|
||||
*r = -EINVAL;
|
||||
goto out;
|
||||
@ -1330,9 +1318,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
|
||||
int vcpu_id, u64 param, u64 *r)
|
||||
{
|
||||
struct vcpu_set_singleshot_timer oneshot;
|
||||
struct x86_exception e;
|
||||
s64 delta;
|
||||
gpa_t gpa;
|
||||
int idx;
|
||||
|
||||
if (!kvm_xen_timer_enabled(vcpu))
|
||||
return false;
|
||||
@ -1343,9 +1330,6 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
|
||||
*r = -EINVAL;
|
||||
return true;
|
||||
}
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
|
||||
/*
|
||||
* The only difference for 32-bit compat is the 4 bytes of
|
||||
@ -1363,9 +1347,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
|
||||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) !=
|
||||
sizeof_field(struct vcpu_set_singleshot_timer, flags));
|
||||
|
||||
if (!gpa ||
|
||||
kvm_vcpu_read_guest(vcpu, gpa, &oneshot, longmode ? sizeof(oneshot) :
|
||||
sizeof(struct compat_vcpu_set_singleshot_timer))) {
|
||||
if (kvm_read_guest_virt(vcpu, param, &oneshot, longmode ? sizeof(oneshot) :
|
||||
sizeof(struct compat_vcpu_set_singleshot_timer), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
@ -1825,20 +1808,20 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
|
||||
{
|
||||
u32 port = data->u.evtchn.send_port;
|
||||
struct evtchnfd *evtchnfd;
|
||||
int ret;
|
||||
|
||||
if (!port || port >= max_evtchn_port(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
/* Protect writes to evtchnfd as well as the idr lookup. */
|
||||
mutex_lock(&kvm->lock);
|
||||
evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
ret = -ENOENT;
|
||||
if (!evtchnfd)
|
||||
return -ENOENT;
|
||||
goto out_unlock;
|
||||
|
||||
/* For an UPDATE, nothing may change except the priority/vcpu */
|
||||
ret = -EINVAL;
|
||||
if (evtchnfd->type != data->u.evtchn.type)
|
||||
return -EINVAL;
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Port cannot change, and if it's zero that was an eventfd
|
||||
@ -1846,20 +1829,21 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
|
||||
*/
|
||||
if (!evtchnfd->deliver.port.port ||
|
||||
evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port)
|
||||
return -EINVAL;
|
||||
goto out_unlock;
|
||||
|
||||
/* We only support 2 level event channels for now */
|
||||
if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
|
||||
return -EINVAL;
|
||||
goto out_unlock;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
|
||||
if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) {
|
||||
evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu;
|
||||
evtchnfd->deliver.port.vcpu_idx = -1;
|
||||
}
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1871,12 +1855,9 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
|
||||
{
|
||||
u32 port = data->u.evtchn.send_port;
|
||||
struct eventfd_ctx *eventfd = NULL;
|
||||
struct evtchnfd *evtchnfd = NULL;
|
||||
struct evtchnfd *evtchnfd;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!port || port >= max_evtchn_port(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL);
|
||||
if (!evtchnfd)
|
||||
return -ENOMEM;
|
||||
@ -1952,8 +1933,7 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
|
||||
if (!evtchnfd)
|
||||
return -ENOENT;
|
||||
|
||||
if (kvm)
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
if (!evtchnfd->deliver.port.port)
|
||||
eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
|
||||
kfree(evtchnfd);
|
||||
@ -1962,18 +1942,42 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
|
||||
|
||||
static int kvm_xen_eventfd_reset(struct kvm *kvm)
|
||||
{
|
||||
struct evtchnfd *evtchnfd;
|
||||
struct evtchnfd *evtchnfd, **all_evtchnfds;
|
||||
int i;
|
||||
int n = 0;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/*
|
||||
* Because synchronize_srcu() cannot be called inside the
|
||||
* critical section, first collect all the evtchnfd objects
|
||||
* in an array as they are removed from evtchn_ports.
|
||||
*/
|
||||
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i)
|
||||
n++;
|
||||
|
||||
all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL);
|
||||
if (!all_evtchnfds) {
|
||||
mutex_unlock(&kvm->lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
n = 0;
|
||||
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
|
||||
all_evtchnfds[n++] = evtchnfd;
|
||||
idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
|
||||
while (n--) {
|
||||
evtchnfd = all_evtchnfds[n];
|
||||
if (!evtchnfd->deliver.port.port)
|
||||
eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
|
||||
kfree(evtchnfd);
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
kfree(all_evtchnfds);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2002,20 +2006,22 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
|
||||
{
|
||||
struct evtchnfd *evtchnfd;
|
||||
struct evtchn_send send;
|
||||
gpa_t gpa;
|
||||
int idx;
|
||||
struct x86_exception e;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
|
||||
if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &send, sizeof(send))) {
|
||||
/* Sanity check: this structure is the same for 32-bit and 64-bit */
|
||||
BUILD_BUG_ON(sizeof(send) != 4);
|
||||
if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The evtchn_ports idr is protected by vcpu->kvm->srcu */
|
||||
/*
|
||||
* evtchnfd is protected by kvm->srcu; the idr lookup instead
|
||||
* is protected by RCU.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port);
|
||||
rcu_read_unlock();
|
||||
if (!evtchnfd)
|
||||
return false;
|
||||
|
||||
|
@ -1767,6 +1767,7 @@ struct kvm_xen_hvm_attr {
|
||||
__u8 runstate_update_flag;
|
||||
struct {
|
||||
__u64 gfn;
|
||||
#define KVM_XEN_INVALID_GFN ((__u64)-1)
|
||||
} shared_info;
|
||||
struct {
|
||||
__u32 send_port;
|
||||
@ -1798,6 +1799,7 @@ struct kvm_xen_hvm_attr {
|
||||
} u;
|
||||
};
|
||||
|
||||
|
||||
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
|
||||
#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
|
||||
#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
|
||||
@ -1823,6 +1825,7 @@ struct kvm_xen_vcpu_attr {
|
||||
__u16 pad[3];
|
||||
union {
|
||||
__u64 gpa;
|
||||
#define KVM_XEN_INVALID_GPA ((__u64)-1)
|
||||
__u64 pad[8];
|
||||
struct {
|
||||
__u64 state;
|
||||
|
91
tools/testing/selftests/kvm/.gitignore
vendored
91
tools/testing/selftests/kvm/.gitignore
vendored
@ -1,86 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
/aarch64/aarch32_id_regs
|
||||
/aarch64/arch_timer
|
||||
/aarch64/debug-exceptions
|
||||
/aarch64/get-reg-list
|
||||
/aarch64/hypercalls
|
||||
/aarch64/page_fault_test
|
||||
/aarch64/psci_test
|
||||
/aarch64/vcpu_width_config
|
||||
/aarch64/vgic_init
|
||||
/aarch64/vgic_irq
|
||||
/s390x/memop
|
||||
/s390x/resets
|
||||
/s390x/sync_regs_test
|
||||
/s390x/tprot
|
||||
/x86_64/amx_test
|
||||
/x86_64/cpuid_test
|
||||
/x86_64/cr4_cpuid_sync_test
|
||||
/x86_64/debug_regs
|
||||
/x86_64/exit_on_emulation_failure_test
|
||||
/x86_64/fix_hypercall_test
|
||||
/x86_64/get_msr_index_features
|
||||
/x86_64/kvm_clock_test
|
||||
/x86_64/kvm_pv_test
|
||||
/x86_64/hyperv_clock
|
||||
/x86_64/hyperv_cpuid
|
||||
/x86_64/hyperv_evmcs
|
||||
/x86_64/hyperv_features
|
||||
/x86_64/hyperv_ipi
|
||||
/x86_64/hyperv_svm_test
|
||||
/x86_64/hyperv_tlb_flush
|
||||
/x86_64/max_vcpuid_cap_test
|
||||
/x86_64/mmio_warning_test
|
||||
/x86_64/monitor_mwait_test
|
||||
/x86_64/nested_exceptions_test
|
||||
/x86_64/nx_huge_pages_test
|
||||
/x86_64/platform_info_test
|
||||
/x86_64/pmu_event_filter_test
|
||||
/x86_64/set_boot_cpu_id
|
||||
/x86_64/set_sregs_test
|
||||
/x86_64/sev_migrate_tests
|
||||
/x86_64/smaller_maxphyaddr_emulation_test
|
||||
/x86_64/smm_test
|
||||
/x86_64/state_test
|
||||
/x86_64/svm_vmcall_test
|
||||
/x86_64/svm_int_ctl_test
|
||||
/x86_64/svm_nested_soft_inject_test
|
||||
/x86_64/svm_nested_shutdown_test
|
||||
/x86_64/sync_regs_test
|
||||
/x86_64/tsc_msrs_test
|
||||
/x86_64/tsc_scaling_sync
|
||||
/x86_64/ucna_injection_test
|
||||
/x86_64/userspace_io_test
|
||||
/x86_64/userspace_msr_exit_test
|
||||
/x86_64/vmx_apic_access_test
|
||||
/x86_64/vmx_close_while_nested_test
|
||||
/x86_64/vmx_dirty_log_test
|
||||
/x86_64/vmx_exception_with_invalid_guest_state
|
||||
/x86_64/vmx_invalid_nested_guest_state
|
||||
/x86_64/vmx_msrs_test
|
||||
/x86_64/vmx_preemption_timer_test
|
||||
/x86_64/vmx_set_nested_state_test
|
||||
/x86_64/vmx_tsc_adjust_test
|
||||
/x86_64/vmx_nested_tsc_scaling_test
|
||||
/x86_64/xapic_ipi_test
|
||||
/x86_64/xapic_state_test
|
||||
/x86_64/xen_shinfo_test
|
||||
/x86_64/xen_vmcall_test
|
||||
/x86_64/xss_msr_test
|
||||
/x86_64/vmx_pmu_caps_test
|
||||
/x86_64/triple_fault_event_test
|
||||
/access_tracking_perf_test
|
||||
/demand_paging_test
|
||||
/dirty_log_test
|
||||
/dirty_log_perf_test
|
||||
/hardware_disable_test
|
||||
/kvm_create_max_vcpus
|
||||
/kvm_page_table_test
|
||||
/max_guest_memory_test
|
||||
/memslot_modification_stress_test
|
||||
/memslot_perf_test
|
||||
/rseq_test
|
||||
/set_memory_region_test
|
||||
/steal_time
|
||||
/kvm_binary_stats_test
|
||||
/system_counter_offset_test
|
||||
*
|
||||
!/**/
|
||||
!*.c
|
||||
!*.h
|
||||
!*.S
|
||||
!*.sh
|
||||
|
@ -7,35 +7,14 @@ top_srcdir = ../../../..
|
||||
include $(top_srcdir)/scripts/subarch.include
|
||||
ARCH ?= $(SUBARCH)
|
||||
|
||||
# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
|
||||
# directories and targets in this Makefile. "uname -m" doesn't map to
|
||||
# arch specific sub-directory names.
|
||||
#
|
||||
# UNAME_M variable to used to run the compiles pointing to the right arch
|
||||
# directories and build the right targets for these supported architectures.
|
||||
#
|
||||
# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
|
||||
# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
|
||||
#
|
||||
# x86_64 targets are named to include x86_64 as a suffix and directories
|
||||
# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
|
||||
# same convention. "uname -m" doesn't result in the correct mapping for
|
||||
# s390x and aarch64.
|
||||
#
|
||||
# No change necessary for x86_64
|
||||
UNAME_M := $(shell uname -m)
|
||||
|
||||
# Set UNAME_M for arm64 compile/install to work
|
||||
ifeq ($(ARCH),arm64)
|
||||
UNAME_M := aarch64
|
||||
endif
|
||||
# Set UNAME_M s390x compile/install to work
|
||||
ifeq ($(ARCH),s390)
|
||||
UNAME_M := s390x
|
||||
endif
|
||||
# Set UNAME_M riscv compile/install to work
|
||||
ifeq ($(ARCH),riscv)
|
||||
UNAME_M := riscv
|
||||
ifeq ($(ARCH),x86)
|
||||
ARCH_DIR := x86_64
|
||||
else ifeq ($(ARCH),arm64)
|
||||
ARCH_DIR := aarch64
|
||||
else ifeq ($(ARCH),s390)
|
||||
ARCH_DIR := s390x
|
||||
else
|
||||
ARCH_DIR := $(ARCH)
|
||||
endif
|
||||
|
||||
LIBKVM += lib/assert.c
|
||||
@ -196,10 +175,15 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test
|
||||
TEST_GEN_PROGS_riscv += set_memory_region_test
|
||||
TEST_GEN_PROGS_riscv += kvm_binary_stats_test
|
||||
|
||||
TEST_PROGS += $(TEST_PROGS_$(UNAME_M))
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
||||
TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(UNAME_M))
|
||||
LIBKVM += $(LIBKVM_$(UNAME_M))
|
||||
TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
|
||||
TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
|
||||
LIBKVM += $(LIBKVM_$(ARCH_DIR))
|
||||
|
||||
# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
|
||||
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
|
||||
# which causes the environment variable to override the makefile).
|
||||
include ../lib.mk
|
||||
|
||||
INSTALL_HDR_PATH = $(top_srcdir)/usr
|
||||
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
|
||||
@ -210,25 +194,23 @@ else
|
||||
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
|
||||
endif
|
||||
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
|
||||
-Wno-gnu-variable-sized-type-not-at-end \
|
||||
-fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
|
||||
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
|
||||
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
|
||||
-I$(<D) -Iinclude/$(UNAME_M) -I ../rseq -I.. $(EXTRA_CFLAGS) \
|
||||
-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
|
||||
$(KHDR_INCLUDES)
|
||||
|
||||
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
|
||||
$(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
|
||||
no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
|
||||
$(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
|
||||
|
||||
# On s390, build the testcases KVM-enabled
|
||||
pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
|
||||
pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
|
||||
$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
|
||||
|
||||
LDLIBS += -ldl
|
||||
LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
|
||||
|
||||
# After inclusion, $(OUTPUT) is defined and
|
||||
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
|
||||
include ../lib.mk
|
||||
|
||||
LIBKVM_C := $(filter %.c,$(LIBKVM))
|
||||
LIBKVM_S := $(filter %.S,$(LIBKVM))
|
||||
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
|
||||
|
@ -117,7 +117,7 @@ static void guest_cas(void)
|
||||
GUEST_ASSERT(guest_check_lse());
|
||||
asm volatile(".arch_extension lse\n"
|
||||
"casal %0, %1, [%2]\n"
|
||||
:: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory));
|
||||
:: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
|
||||
val = READ_ONCE(*guest_test_memory);
|
||||
GUEST_ASSERT_EQ(val, TEST_DATA);
|
||||
}
|
||||
|
@ -85,61 +85,108 @@
|
||||
#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
|
||||
|
||||
/* HYPERV_CPUID_FEATURES.EAX */
|
||||
#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0)
|
||||
#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
|
||||
#define HV_MSR_SYNIC_AVAILABLE BIT(2)
|
||||
#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
|
||||
#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4)
|
||||
#define HV_MSR_HYPERCALL_AVAILABLE BIT(5)
|
||||
#define HV_MSR_VP_INDEX_AVAILABLE BIT(6)
|
||||
#define HV_MSR_RESET_AVAILABLE BIT(7)
|
||||
#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8)
|
||||
#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
|
||||
#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10)
|
||||
#define HV_ACCESS_FREQUENCY_MSRS BIT(11)
|
||||
#define HV_ACCESS_REENLIGHTENMENT BIT(13)
|
||||
#define HV_ACCESS_TSC_INVARIANT BIT(15)
|
||||
#define HV_MSR_VP_RUNTIME_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
|
||||
#define HV_MSR_TIME_REF_COUNT_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
|
||||
#define HV_MSR_SYNIC_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
|
||||
#define HV_MSR_SYNTIMER_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
|
||||
#define HV_MSR_APIC_ACCESS_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
|
||||
#define HV_MSR_HYPERCALL_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
|
||||
#define HV_MSR_VP_INDEX_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
|
||||
#define HV_MSR_RESET_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
|
||||
#define HV_MSR_STAT_PAGES_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
|
||||
#define HV_MSR_REFERENCE_TSC_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
|
||||
#define HV_MSR_GUEST_IDLE_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
|
||||
#define HV_ACCESS_FREQUENCY_MSRS \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
|
||||
#define HV_ACCESS_REENLIGHTENMENT \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
|
||||
#define HV_ACCESS_TSC_INVARIANT \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
|
||||
|
||||
/* HYPERV_CPUID_FEATURES.EBX */
|
||||
#define HV_CREATE_PARTITIONS BIT(0)
|
||||
#define HV_ACCESS_PARTITION_ID BIT(1)
|
||||
#define HV_ACCESS_MEMORY_POOL BIT(2)
|
||||
#define HV_ADJUST_MESSAGE_BUFFERS BIT(3)
|
||||
#define HV_POST_MESSAGES BIT(4)
|
||||
#define HV_SIGNAL_EVENTS BIT(5)
|
||||
#define HV_CREATE_PORT BIT(6)
|
||||
#define HV_CONNECT_PORT BIT(7)
|
||||
#define HV_ACCESS_STATS BIT(8)
|
||||
#define HV_DEBUGGING BIT(11)
|
||||
#define HV_CPU_MANAGEMENT BIT(12)
|
||||
#define HV_ISOLATION BIT(22)
|
||||
#define HV_CREATE_PARTITIONS \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
|
||||
#define HV_ACCESS_PARTITION_ID \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
|
||||
#define HV_ACCESS_MEMORY_POOL \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
|
||||
#define HV_ADJUST_MESSAGE_BUFFERS \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
|
||||
#define HV_POST_MESSAGES \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
|
||||
#define HV_SIGNAL_EVENTS \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
|
||||
#define HV_CREATE_PORT \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
|
||||
#define HV_CONNECT_PORT \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
|
||||
#define HV_ACCESS_STATS \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
|
||||
#define HV_DEBUGGING \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
|
||||
#define HV_CPU_MANAGEMENT \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
|
||||
#define HV_ISOLATION \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
|
||||
|
||||
/* HYPERV_CPUID_FEATURES.EDX */
|
||||
#define HV_X64_MWAIT_AVAILABLE BIT(0)
|
||||
#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
|
||||
#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
|
||||
#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
|
||||
#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
|
||||
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
|
||||
#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
|
||||
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
|
||||
#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
|
||||
#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
|
||||
#define HV_X64_MWAIT_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
|
||||
#define HV_X64_GUEST_DEBUGGING_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
|
||||
#define HV_X64_PERF_MONITOR_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
|
||||
#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
|
||||
#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
|
||||
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
|
||||
#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
|
||||
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
|
||||
#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
|
||||
#define HV_STIMER_DIRECT_MODE_AVAILABLE \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
|
||||
|
||||
/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
|
||||
#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
|
||||
#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
|
||||
#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
|
||||
#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
|
||||
#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
|
||||
#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
|
||||
#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
|
||||
#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
|
||||
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
|
||||
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
|
||||
#define HV_X64_AS_SWITCH_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
|
||||
#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
|
||||
#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
|
||||
#define HV_X64_APIC_ACCESS_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
|
||||
#define HV_X64_SYSTEM_RESET_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
|
||||
#define HV_X64_RELAXED_TIMING_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
|
||||
#define HV_DEPRECATING_AEOI_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
|
||||
#define HV_X64_CLUSTER_IPI_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
|
||||
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
|
||||
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
|
||||
|
||||
/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
|
||||
#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1)
|
||||
#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
|
||||
|
||||
/* Hypercalls */
|
||||
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
|
||||
@ -288,4 +335,7 @@ struct hyperv_test_pages {
|
||||
struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
|
||||
vm_vaddr_t *p_hv_pages_gva);
|
||||
|
||||
/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
|
||||
#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0)
|
||||
|
||||
#endif /* !SELFTEST_KVM_HYPERV_H */
|
||||
|
@ -137,6 +137,7 @@ struct kvm_x86_cpu_feature {
|
||||
#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
|
||||
#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
|
||||
#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
|
||||
#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
|
||||
#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
|
||||
#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
|
||||
#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "kvm_util.h"
|
||||
#include "asm/kvm.h"
|
||||
#include "linux/kvm.h"
|
||||
#include "kselftest.h"
|
||||
|
||||
static void stats_test(int stats_fd)
|
||||
{
|
||||
@ -51,7 +52,7 @@ static void stats_test(int stats_fd)
|
||||
|
||||
/* Sanity check for other fields in header */
|
||||
if (header.num_desc == 0) {
|
||||
printf("No KVM stats defined!");
|
||||
ksft_print_msg("No KVM stats defined!\n");
|
||||
return;
|
||||
}
|
||||
/*
|
||||
@ -224,9 +225,13 @@ int main(int argc, char *argv[])
|
||||
max_vcpu = DEFAULT_NUM_VCPU;
|
||||
}
|
||||
|
||||
ksft_print_header();
|
||||
|
||||
/* Check the extension for binary stats */
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD));
|
||||
|
||||
ksft_set_plan(max_vm);
|
||||
|
||||
/* Create VMs and VCPUs */
|
||||
vms = malloc(sizeof(vms[0]) * max_vm);
|
||||
TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
|
||||
@ -245,10 +250,12 @@ int main(int argc, char *argv[])
|
||||
vm_stats_test(vms[i]);
|
||||
for (j = 0; j < max_vcpu; ++j)
|
||||
vcpu_stats_test(vcpus[i * max_vcpu + j]);
|
||||
ksft_test_result_pass("vm%i\n", i);
|
||||
}
|
||||
|
||||
for (i = 0; i < max_vm; ++i)
|
||||
kvm_vm_free(vms[i]);
|
||||
free(vms);
|
||||
return 0;
|
||||
|
||||
ksft_finished(); /* Print results and exit() accordingly */
|
||||
}
|
||||
|
@ -14,11 +14,13 @@ static vm_vaddr_t *ucall_exit_mmio_addr;
|
||||
|
||||
void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
|
||||
{
|
||||
virt_pg_map(vm, mmio_gpa, mmio_gpa);
|
||||
vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
|
||||
|
||||
virt_map(vm, mmio_gva, mmio_gpa, 1);
|
||||
|
||||
vm->ucall_mmio_addr = mmio_gpa;
|
||||
|
||||
write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa);
|
||||
write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
|
||||
}
|
||||
|
||||
void ucall_arch_do_ucall(vm_vaddr_t uc)
|
||||
|
@ -186,6 +186,15 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
|
||||
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
|
||||
"Missing new mode params?");
|
||||
|
||||
/*
|
||||
* Initializes vm->vpages_valid to match the canonical VA space of the
|
||||
* architecture.
|
||||
*
|
||||
* The default implementation is valid for architectures which split the
|
||||
* range addressed by a single page table into a low and high region
|
||||
* based on the MSB of the VA. On architectures with this behavior
|
||||
* the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].
|
||||
*/
|
||||
__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
|
||||
{
|
||||
sparsebit_set_num(vm->vpages_valid,
|
||||
@ -1416,10 +1425,10 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
|
||||
while (npages--) {
|
||||
virt_pg_map(vm, vaddr, paddr);
|
||||
sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
|
||||
|
||||
vaddr += page_size;
|
||||
paddr += page_size;
|
||||
|
||||
sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include "linux/bitmap.h"
|
||||
#include "linux/atomic.h"
|
||||
|
||||
#define GUEST_UCALL_FAILED -1
|
||||
|
||||
struct ucall_header {
|
||||
DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
|
||||
struct ucall ucalls[KVM_MAX_VCPUS];
|
||||
@ -41,7 +43,8 @@ static struct ucall *ucall_alloc(void)
|
||||
struct ucall *uc;
|
||||
int i;
|
||||
|
||||
GUEST_ASSERT(ucall_pool);
|
||||
if (!ucall_pool)
|
||||
goto ucall_failed;
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (!test_and_set_bit(i, ucall_pool->in_use)) {
|
||||
@ -51,7 +54,13 @@ static struct ucall *ucall_alloc(void)
|
||||
}
|
||||
}
|
||||
|
||||
GUEST_ASSERT(0);
|
||||
ucall_failed:
|
||||
/*
|
||||
* If the vCPU cannot grab a ucall structure, make a bare ucall with a
|
||||
* magic value to signal to get_ucall() that things went sideways.
|
||||
* GUEST_ASSERT() depends on ucall_alloc() and so cannot be used here.
|
||||
*/
|
||||
ucall_arch_do_ucall(GUEST_UCALL_FAILED);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -93,6 +102,9 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
|
||||
|
||||
addr = ucall_arch_get_ucall(vcpu);
|
||||
if (addr) {
|
||||
TEST_ASSERT(addr != (void *)GUEST_UCALL_FAILED,
|
||||
"Guest failed to allocate ucall struct");
|
||||
|
||||
memcpy(uc, addr, sizeof(*uc));
|
||||
vcpu_run_complete_io(vcpu);
|
||||
} else {
|
||||
|
@ -1031,7 +1031,7 @@ bool is_amd_cpu(void)
|
||||
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
|
||||
{
|
||||
if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
|
||||
*pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
|
||||
*pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
|
||||
*va_bits = 32;
|
||||
} else {
|
||||
*pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
|
||||
|
@ -265,6 +265,9 @@ static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
|
||||
slots = data->nslots;
|
||||
while (--slots > 1) {
|
||||
pages_per_slot = mempages / slots;
|
||||
if (!pages_per_slot)
|
||||
continue;
|
||||
|
||||
rempages = mempages % pages_per_slot;
|
||||
if (check_slot_pages(host_page_size, guest_page_size,
|
||||
pages_per_slot, rempages))
|
||||
|
@ -13,9 +13,17 @@
|
||||
#include "processor.h"
|
||||
#include "hyperv.h"
|
||||
|
||||
/*
|
||||
* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
|
||||
* but to activate the feature it is sufficient to set it to a non-zero
|
||||
* value. Use BIT(0) for that.
|
||||
*/
|
||||
#define HV_PV_SPINLOCKS_TEST \
|
||||
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
|
||||
|
||||
struct msr_data {
|
||||
uint32_t idx;
|
||||
bool available;
|
||||
bool fault_expected;
|
||||
bool write;
|
||||
u64 write_val;
|
||||
};
|
||||
@ -26,22 +34,46 @@ struct hcall_data {
|
||||
bool ud_expected;
|
||||
};
|
||||
|
||||
static bool is_write_only_msr(uint32_t msr)
|
||||
{
|
||||
return msr == HV_X64_MSR_EOI;
|
||||
}
|
||||
|
||||
static void guest_msr(struct msr_data *msr)
|
||||
{
|
||||
uint64_t ignored;
|
||||
uint8_t vector;
|
||||
uint8_t vector = 0;
|
||||
uint64_t msr_val = 0;
|
||||
|
||||
GUEST_ASSERT(msr->idx);
|
||||
|
||||
if (!msr->write)
|
||||
vector = rdmsr_safe(msr->idx, &ignored);
|
||||
else
|
||||
if (msr->write)
|
||||
vector = wrmsr_safe(msr->idx, msr->write_val);
|
||||
|
||||
if (msr->available)
|
||||
GUEST_ASSERT_2(!vector, msr->idx, vector);
|
||||
if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
|
||||
vector = rdmsr_safe(msr->idx, &msr_val);
|
||||
|
||||
if (msr->fault_expected)
|
||||
GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR);
|
||||
else
|
||||
GUEST_ASSERT_2(vector == GP_VECTOR, msr->idx, vector);
|
||||
GUEST_ASSERT_3(!vector, msr->idx, vector, 0);
|
||||
|
||||
if (vector || is_write_only_msr(msr->idx))
|
||||
goto done;
|
||||
|
||||
if (msr->write)
|
||||
GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx,
|
||||
msr_val, msr->write_val);
|
||||
|
||||
/* Invariant TSC bit appears when TSC invariant control MSR is written to */
|
||||
if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
|
||||
if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
|
||||
GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
|
||||
else
|
||||
GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
|
||||
!!(msr_val & HV_INVARIANT_TSC_EXPOSED));
|
||||
}
|
||||
|
||||
done:
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
@ -89,7 +121,6 @@ static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
|
||||
static void guest_test_msrs_access(void)
|
||||
{
|
||||
struct kvm_cpuid2 *prev_cpuid = NULL;
|
||||
struct kvm_cpuid_entry2 *feat, *dbg;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_run *run;
|
||||
struct kvm_vm *vm;
|
||||
@ -97,6 +128,7 @@ static void guest_test_msrs_access(void)
|
||||
int stage = 0;
|
||||
vm_vaddr_t msr_gva;
|
||||
struct msr_data *msr;
|
||||
bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
|
||||
|
||||
while (true) {
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
|
||||
@ -116,9 +148,6 @@ static void guest_test_msrs_access(void)
|
||||
vcpu_init_cpuid(vcpu, prev_cpuid);
|
||||
}
|
||||
|
||||
feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
|
||||
dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
|
||||
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(vcpu);
|
||||
|
||||
@ -134,133 +163,139 @@ static void guest_test_msrs_access(void)
|
||||
* Only available when Hyper-V identification is set
|
||||
*/
|
||||
msr->idx = HV_X64_MSR_GUEST_OS_ID;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 1:
|
||||
msr->idx = HV_X64_MSR_HYPERCALL;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 2:
|
||||
feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
|
||||
/*
|
||||
* HV_X64_MSR_GUEST_OS_ID has to be written first to make
|
||||
* HV_X64_MSR_HYPERCALL available.
|
||||
*/
|
||||
msr->idx = HV_X64_MSR_GUEST_OS_ID;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = HYPERV_LINUX_OS_ID;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 3:
|
||||
msr->idx = HV_X64_MSR_GUEST_OS_ID;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 4:
|
||||
msr->idx = HV_X64_MSR_HYPERCALL;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
msr->idx = HV_X64_MSR_VP_RUNTIME;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 6:
|
||||
feat->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_VP_RUNTIME;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 7:
|
||||
/* Read only */
|
||||
msr->idx = HV_X64_MSR_VP_RUNTIME;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 0;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
msr->idx = HV_X64_MSR_TIME_REF_COUNT;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 9:
|
||||
feat->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_TIME_REF_COUNT;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 10:
|
||||
/* Read only */
|
||||
msr->idx = HV_X64_MSR_TIME_REF_COUNT;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 0;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
|
||||
case 11:
|
||||
msr->idx = HV_X64_MSR_VP_INDEX;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 12:
|
||||
feat->eax |= HV_MSR_VP_INDEX_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_VP_INDEX;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 13:
|
||||
/* Read only */
|
||||
msr->idx = HV_X64_MSR_VP_INDEX;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 0;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
|
||||
case 14:
|
||||
msr->idx = HV_X64_MSR_RESET;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 15:
|
||||
feat->eax |= HV_MSR_RESET_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_RESET;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 16:
|
||||
msr->idx = HV_X64_MSR_RESET;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
/*
|
||||
* TODO: the test only writes '0' to HV_X64_MSR_RESET
|
||||
* at the moment, writing some other value there will
|
||||
* trigger real vCPU reset and the code is not prepared
|
||||
* to handle it yet.
|
||||
*/
|
||||
msr->write_val = 0;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 17:
|
||||
msr->idx = HV_X64_MSR_REFERENCE_TSC;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 18:
|
||||
feat->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_REFERENCE_TSC;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 19:
|
||||
msr->idx = HV_X64_MSR_REFERENCE_TSC;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 0;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 20:
|
||||
msr->idx = HV_X64_MSR_EOM;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 21:
|
||||
/*
|
||||
@ -268,149 +303,185 @@ static void guest_test_msrs_access(void)
|
||||
* capability enabled and guest visible CPUID bit unset.
|
||||
*/
|
||||
msr->idx = HV_X64_MSR_EOM;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 22:
|
||||
feat->eax |= HV_MSR_SYNIC_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_EOM;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 23:
|
||||
msr->idx = HV_X64_MSR_EOM;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 0;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 24:
|
||||
msr->idx = HV_X64_MSR_STIMER0_CONFIG;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 25:
|
||||
feat->eax |= HV_MSR_SYNTIMER_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_STIMER0_CONFIG;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 26:
|
||||
msr->idx = HV_X64_MSR_STIMER0_CONFIG;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 0;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 27:
|
||||
/* Direct mode test */
|
||||
msr->idx = HV_X64_MSR_STIMER0_CONFIG;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1 << 12;
|
||||
msr->available = 0;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 28:
|
||||
feat->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_STIMER0_CONFIG;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1 << 12;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 29:
|
||||
msr->idx = HV_X64_MSR_EOI;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 30:
|
||||
feat->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_EOI;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 31:
|
||||
msr->idx = HV_X64_MSR_TSC_FREQUENCY;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 32:
|
||||
feat->eax |= HV_ACCESS_FREQUENCY_MSRS;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
|
||||
msr->idx = HV_X64_MSR_TSC_FREQUENCY;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 33:
|
||||
/* Read only */
|
||||
msr->idx = HV_X64_MSR_TSC_FREQUENCY;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 0;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
|
||||
case 34:
|
||||
msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 35:
|
||||
feat->eax |= HV_ACCESS_REENLIGHTENMENT;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
|
||||
msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 36:
|
||||
msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 37:
|
||||
/* Can only write '0' */
|
||||
msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 0;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
|
||||
case 38:
|
||||
msr->idx = HV_X64_MSR_CRASH_P0;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 39:
|
||||
feat->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
|
||||
msr->idx = HV_X64_MSR_CRASH_P0;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 40:
|
||||
msr->idx = HV_X64_MSR_CRASH_P0;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 41:
|
||||
msr->idx = HV_X64_MSR_SYNDBG_STATUS;
|
||||
msr->write = 0;
|
||||
msr->available = 0;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 42:
|
||||
feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
|
||||
dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
|
||||
vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
|
||||
msr->idx = HV_X64_MSR_SYNDBG_STATUS;
|
||||
msr->write = 0;
|
||||
msr->available = 1;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 43:
|
||||
msr->idx = HV_X64_MSR_SYNDBG_STATUS;
|
||||
msr->write = 1;
|
||||
msr->write = true;
|
||||
msr->write_val = 0;
|
||||
msr->available = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
case 44:
|
||||
/* MSR is not available when CPUID feature bit is unset */
|
||||
if (!has_invtsc)
|
||||
continue;
|
||||
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
|
||||
msr->write = false;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 45:
|
||||
/* MSR is vailable when CPUID feature bit is set */
|
||||
if (!has_invtsc)
|
||||
continue;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
|
||||
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
|
||||
msr->write = false;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
case 46:
|
||||
/* Writing bits other than 0 is forbidden */
|
||||
if (!has_invtsc)
|
||||
continue;
|
||||
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
|
||||
msr->write = true;
|
||||
msr->write_val = 0xdeadbeef;
|
||||
msr->fault_expected = true;
|
||||
break;
|
||||
case 47:
|
||||
/* Setting bit 0 enables the feature */
|
||||
if (!has_invtsc)
|
||||
continue;
|
||||
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
|
||||
msr->write = true;
|
||||
msr->write_val = 1;
|
||||
msr->fault_expected = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
kvm_vm_free(vm);
|
||||
return;
|
||||
}
|
||||
@ -429,7 +500,7 @@ static void guest_test_msrs_access(void)
|
||||
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT_2(uc, "MSR = %lx, vector = %lx");
|
||||
REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx");
|
||||
return;
|
||||
case UCALL_DONE:
|
||||
break;
|
||||
@ -445,7 +516,6 @@ static void guest_test_msrs_access(void)
|
||||
|
||||
static void guest_test_hcalls_access(void)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *feat, *recomm, *dbg;
|
||||
struct kvm_cpuid2 *prev_cpuid = NULL;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_run *run;
|
||||
@ -480,15 +550,11 @@ static void guest_test_hcalls_access(void)
|
||||
vcpu_init_cpuid(vcpu, prev_cpuid);
|
||||
}
|
||||
|
||||
feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
|
||||
recomm = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
|
||||
dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
|
||||
|
||||
run = vcpu->run;
|
||||
|
||||
switch (stage) {
|
||||
case 0:
|
||||
feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
|
||||
hcall->control = 0xbeef;
|
||||
hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
|
||||
break;
|
||||
@ -498,7 +564,7 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->expect = HV_STATUS_ACCESS_DENIED;
|
||||
break;
|
||||
case 2:
|
||||
feat->ebx |= HV_POST_MESSAGES;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
|
||||
hcall->control = HVCALL_POST_MESSAGE;
|
||||
hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
break;
|
||||
@ -508,7 +574,7 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->expect = HV_STATUS_ACCESS_DENIED;
|
||||
break;
|
||||
case 4:
|
||||
feat->ebx |= HV_SIGNAL_EVENTS;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
|
||||
hcall->control = HVCALL_SIGNAL_EVENT;
|
||||
hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
break;
|
||||
@ -518,12 +584,12 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
|
||||
break;
|
||||
case 6:
|
||||
dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
|
||||
hcall->control = HVCALL_RESET_DEBUG_SESSION;
|
||||
hcall->expect = HV_STATUS_ACCESS_DENIED;
|
||||
break;
|
||||
case 7:
|
||||
feat->ebx |= HV_DEBUGGING;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
|
||||
hcall->control = HVCALL_RESET_DEBUG_SESSION;
|
||||
hcall->expect = HV_STATUS_OPERATION_DENIED;
|
||||
break;
|
||||
@ -533,7 +599,7 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->expect = HV_STATUS_ACCESS_DENIED;
|
||||
break;
|
||||
case 9:
|
||||
recomm->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
|
||||
hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
|
||||
hcall->expect = HV_STATUS_SUCCESS;
|
||||
break;
|
||||
@ -542,7 +608,7 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->expect = HV_STATUS_ACCESS_DENIED;
|
||||
break;
|
||||
case 11:
|
||||
recomm->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
|
||||
hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
|
||||
hcall->expect = HV_STATUS_SUCCESS;
|
||||
break;
|
||||
@ -552,7 +618,7 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->expect = HV_STATUS_ACCESS_DENIED;
|
||||
break;
|
||||
case 13:
|
||||
recomm->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
|
||||
hcall->control = HVCALL_SEND_IPI;
|
||||
hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
break;
|
||||
@ -567,7 +633,7 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->expect = HV_STATUS_ACCESS_DENIED;
|
||||
break;
|
||||
case 16:
|
||||
recomm->ebx = 0xfff;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
|
||||
hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
|
||||
hcall->expect = HV_STATUS_SUCCESS;
|
||||
break;
|
||||
@ -577,7 +643,7 @@ static void guest_test_hcalls_access(void)
|
||||
hcall->ud_expected = true;
|
||||
break;
|
||||
case 18:
|
||||
feat->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
|
||||
vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
|
||||
hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
|
||||
hcall->ud_expected = false;
|
||||
hcall->expect = HV_STATUS_SUCCESS;
|
||||
|
@ -193,8 +193,9 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
|
||||
GUEST_SYNC(stage++);
|
||||
/*
|
||||
* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
|
||||
* Nothing to write anything to XMM regs.
|
||||
*/
|
||||
ipi_ex->vp_set.valid_bank_mask = 0;
|
||||
hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
|
||||
hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
|
||||
IPI_VECTOR, HV_GENERIC_SET_ALL);
|
||||
nop_loop();
|
||||
|
@ -41,8 +41,17 @@ static void guest_int_handler(struct ex_regs *regs)
|
||||
static void l2_guest_code_int(void)
|
||||
{
|
||||
GUEST_ASSERT_1(int_fired == 1, int_fired);
|
||||
vmmcall();
|
||||
ud2();
|
||||
|
||||
/*
|
||||
* Same as the vmmcall() function, but with a ud2 sneaked after the
|
||||
* vmmcall. The caller injects an exception with the return address
|
||||
* increased by 2, so the "pop rbp" must be after the ud2 and we cannot
|
||||
* use vmmcall() directly.
|
||||
*/
|
||||
__asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
|
||||
: : "a"(0xdeadbeef), "c"(0xbeefdead)
|
||||
: "rbx", "rdx", "rsi", "rdi", "r8", "r9",
|
||||
"r10", "r11", "r12", "r13", "r14", "r15");
|
||||
|
||||
GUEST_ASSERT_1(bp_fired == 1, bp_fired);
|
||||
hlt();
|
||||
|
@ -72,11 +72,16 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
|
||||
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_SYNC:
|
||||
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
|
||||
uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
|
||||
stage + 1, (ulong)uc.args[1]);
|
||||
if (!strcmp((const char *)uc.args[0], "hello") &&
|
||||
uc.args[1] == stage + 1)
|
||||
ksft_test_result_pass("stage %d passed\n", stage + 1);
|
||||
else
|
||||
ksft_test_result_fail(
|
||||
"stage %d: Unexpected register values vmexit, got %lx",
|
||||
stage + 1, (ulong)uc.args[1]);
|
||||
return;
|
||||
case UCALL_DONE:
|
||||
ksft_test_result_pass("stage %d passed\n", stage + 1);
|
||||
return;
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
|
||||
@ -92,6 +97,9 @@ int main(void)
|
||||
struct kvm_vm *vm;
|
||||
uint64_t val;
|
||||
|
||||
ksft_print_header();
|
||||
ksft_set_plan(5);
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
|
||||
val = 0;
|
||||
@ -149,5 +157,5 @@ int main(void)
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
return 0;
|
||||
ksft_finished(); /* Print results and exit() accordingly */
|
||||
}
|
||||
|
@ -49,11 +49,6 @@ enum {
|
||||
NUM_VMX_PAGES,
|
||||
};
|
||||
|
||||
struct kvm_single_msr {
|
||||
struct kvm_msrs header;
|
||||
struct kvm_msr_entry entry;
|
||||
} __attribute__((packed));
|
||||
|
||||
/* The virtual machine object. */
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
|
@ -962,6 +962,12 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
done:
|
||||
struct kvm_xen_hvm_attr evt_reset = {
|
||||
.type = KVM_XEN_ATTR_TYPE_EVTCHN,
|
||||
.u.evtchn.flags = KVM_XEN_EVTCHN_RESET,
|
||||
};
|
||||
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
|
||||
|
||||
alarm(0);
|
||||
clock_gettime(CLOCK_REALTIME, &max_ts);
|
||||
|
||||
|
@ -14,14 +14,10 @@
|
||||
#define KVM_MMU_LOCK_INIT(kvm) rwlock_init(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_LOCK(kvm) write_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_UNLOCK(kvm) write_unlock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_LOCK(kvm) read_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_UNLOCK(kvm) read_unlock(&(kvm)->mmu_lock)
|
||||
#else
|
||||
#define KVM_MMU_LOCK_INIT(kvm) spin_lock_init(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
|
||||
#endif /* KVM_HAVE_MMU_RWLOCK */
|
||||
|
||||
kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
|
||||
|
Loading…
Reference in New Issue
Block a user