KVM x86 changes for 6.6:

- Misc cleanups
 
  - Retry APIC optimized recalculation if a vCPU is added/enabled
 
  - Overhaul emergency reboot code to bring SVM up to par with VMX, tie the
    "emergency disabling" behavior to KVM actually being loaded, and move all of
    the logic within KVM
 
  - Fix user triggerable WARNs in SVM where KVM incorrectly assumes the TSC
    ratio MSR can diverge from the default iff TSC scaling is enabled, and clean
    up related code
 
  - Add a framework to allow "caching" feature flags so that KVM can check if
    the guest can use a feature without needing to search guest CPUID
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmTueMwSHHNlYW5qY0Bn
 b29nbGUuY29tAAoJEGCRIgFNDBL5hp4P/i/UmIJEJupryUrD/ZXcSjqmupCtv4JS
 Z2o1KIAPbM5GUX4iyF1cnZrI4Ac5zMtULN8Tp3ATOp3AqKy72AqB1Z82e+v6SKis
 KfSXlDFCPFisrwv3Ys7JEu9vIS8oqITHmSBk8OAmElwujdQ5jYLZjwGbCXbM9qas
 yCFGLqD4fjX8XqkZLmXggjT99MPSgiTPoKL592Wq4JR8mY4hyQqJzBepDjb94sT7
 wrsAv1B+BchGDguk0+nOdmHM4emGrZU7fVqi3OFPofSlwAAdkqZObleb422KB058
 5bcpNow+9VH5pzgq8XSAU7DLNgH9aXH0PcVU8ASU6P0D9fceKoOFuL47nnFbwz0t
 vKafcXNWFs8xHE4iyzvAAsZK/X8GR0ngNByPnamATMsjt2tTmsa5BOyAPkIN+GpT
 DzZCIk27SbdGC3lGYlSV+5ob/+sOr6m384DkvSZnU6JiiFLlZiTxURj1/9Zvfka8
 2co2wnf8cJxnKFUThFfuxs9XpKgvhkOE8LauwCSo4MAQM95Pen+NAK960RBWj0xl
 wof5kIGmKbwmMXyg2Sr+EKqe5KRPba22Yi3x24tURAXafKK/AW7T8dgEEXOll7dp
 pKmTPAevwUk9wYIGultjhEBXKYgMOeD2BVoTa5je5h1Da28onrSJ7aLQUixHHs0J
 gLdtzs8M9K9t
 =yGM1
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-misc-6.6' of https://github.com/kvm-x86/linux into HEAD

KVM x86 changes for 6.6:

 - Misc cleanups

 - Retry APIC optimized recalculation if a vCPU is added/enabled

 - Overhaul emergency reboot code to bring SVM up to par with VMX, tie the
   "emergency disabling" behavior to KVM actually being loaded, and move all of
   the logic within KVM

 - Fix user triggerable WARNs in SVM where KVM incorrectly assumes the TSC
   ratio MSR can diverge from the default iff TSC scaling is enabled, and clean
   up related code

 - Add a framework to allow "caching" feature flags so that KVM can check if
   the guest can use a feature without needing to search guest CPUID
This commit is contained in:
Paolo Bonzini 2023-08-31 13:36:33 -04:00
commit 6d5e3c318a
28 changed files with 491 additions and 446 deletions

View File

@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
#endif
#endif
typedef void crash_vmclear_fn(void);
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);
#endif /* __ASSEMBLY__ */

View File

@ -746,7 +746,6 @@ struct kvm_vcpu_arch {
u64 smi_count;
bool at_instruction_boundary;
bool tpr_access_reporting;
bool xsaves_enabled;
bool xfd_no_write_intercept;
u64 ia32_xss;
u64 microcode_version;
@ -831,6 +830,25 @@ struct kvm_vcpu_arch {
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
/*
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
* when "struct kvm_vcpu_arch" is no longer defined in an
* arch/x86/include/asm header. The max is mostly arbitrary, i.e.
* can be increased as necessary.
*/
#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
/*
* Track whether or not the guest is allowed to use features that are
* governed by KVM, where "governed" means KVM needs to manage state
* and/or explicitly enable the feature in hardware. Typically, but
* not always, governed features can be used by the guest if and only
* if both KVM and userspace want to expose the feature to the guest.
*/
struct {
DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
} governed_features;
u64 reserved_gpa_bits;
int maxphyaddr;
@ -1655,8 +1673,8 @@ struct kvm_x86_ops {
u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
* Retrieve somewhat arbitrary exit information. Intended to

View File

@ -25,7 +25,14 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
typedef void (cpu_emergency_virt_cb)(void);
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
#else
static inline void cpu_emergency_disable_virtualization(void) {}
#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);

View File

@ -1,154 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* CPU virtualization extensions handling
*
* This should carry the code for handling CPU virtualization extensions
* that needs to live in the kernel core.
*
* Author: Eduardo Habkost <ehabkost@redhat.com>
*
* Copyright (C) 2008, Red Hat Inc.
*
* Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
*/
#ifndef _ASM_X86_VIRTEX_H
#define _ASM_X86_VIRTEX_H
#include <asm/processor.h>
#include <asm/vmx.h>
#include <asm/svm.h>
#include <asm/tlbflush.h>
/*
* VMX functions:
*/
static inline int cpu_has_vmx(void)
{
unsigned long ecx = cpuid_ecx(1);
return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
}
/**
* cpu_vmxoff() - Disable VMX on the current CPU
*
* Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
*
* Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
* atomically track post-VMXON state, e.g. this may be called in NMI context.
* Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
* faults are guaranteed to be due to the !post-VMXON check unless the CPU is
* magically in RM, VM86, compat mode, or at CPL>0.
*/
static inline int cpu_vmxoff(void)
{
asm_volatile_goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);
cr4_clear_bits(X86_CR4_VMXE);
return 0;
fault:
cr4_clear_bits(X86_CR4_VMXE);
return -EIO;
}
static inline int cpu_vmx_enabled(void)
{
return __read_cr4() & X86_CR4_VMXE;
}
/** Disable VMX if it is enabled on the current CPU
*
* You shouldn't call this if cpu_has_vmx() returns 0.
*/
static inline void __cpu_emergency_vmxoff(void)
{
if (cpu_vmx_enabled())
cpu_vmxoff();
}
/** Disable VMX if it is supported and enabled on the current CPU
*/
static inline void cpu_emergency_vmxoff(void)
{
if (cpu_has_vmx())
__cpu_emergency_vmxoff();
}
/*
* SVM functions:
*/
/** Check if the CPU has SVM support
*
* You can use the 'msg' arg to get a message describing the problem,
* if the function returns zero. Simply pass NULL if you are not interested
* on the messages; gcc should take care of not generating code for
* the messages on this case.
*/
static inline int cpu_has_svm(const char **msg)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
if (msg)
*msg = "not amd or hygon";
return 0;
}
if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
if (msg)
*msg = "can't execute cpuid_8000000a";
return 0;
}
if (!boot_cpu_has(X86_FEATURE_SVM)) {
if (msg)
*msg = "svm not available";
return 0;
}
return 1;
}
/** Disable SVM on the current CPU
*
* You should call this only if cpu_has_svm() returned true.
*/
static inline void cpu_svm_disable(void)
{
uint64_t efer;
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
if (efer & EFER_SVME) {
/*
* Force GIF=1 prior to disabling SVM to ensure INIT and NMI
* aren't blocked, e.g. if a fatal error occurred between CLGI
* and STGI. Note, STGI may #UD if SVM is disabled from NMI
* context between reading EFER and executing STGI. In that
* case, GIF must already be set, otherwise the NMI would have
* been blocked, so just eat the fault.
*/
asm_volatile_goto("1: stgi\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "memory" : fault);
fault:
wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
}
/** Makes sure SVM is disabled, if it is supported on the CPU
*/
static inline void cpu_emergency_svm_disable(void)
{
if (cpu_has_svm(NULL))
cpu_svm_disable();
}
#endif /* _ASM_X86_VIRTEX_H */

View File

@ -71,7 +71,7 @@
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES)
#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)

View File

@ -48,38 +48,12 @@ struct crash_memmap_data {
unsigned int type;
};
/*
* This is used to VMCLEAR all VMCSs loaded on the
* processor. And when loading kvm_intel module, the
* callback function pointer will be assigned.
*
* protected by rcu.
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
crash_vmclear_fn *do_vmclear_operation = NULL;
rcu_read_lock();
do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
if (do_vmclear_operation)
do_vmclear_operation();
rcu_read_unlock();
}
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
{
crash_save_cpu(regs, cpu);
/*
* VMCLEAR VMCSs loaded on all cpus if needed.
*/
cpu_crash_vmclear_loaded_vmcss();
/*
* Disable Intel PT to stop its logging
*/
@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
/*
* VMCLEAR VMCSs loaded on this cpu if needed.
*/
cpu_crash_vmclear_loaded_vmcss();
cpu_emergency_disable_virtualization();
/*

View File

@ -22,7 +22,6 @@
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
#include <asm/pci_x86.h>
#include <asm/virtext.h>
#include <asm/cpu.h>
#include <asm/nmi.h>
#include <asm/smp.h>
@ -530,9 +529,54 @@ static inline void kb_wait(void)
static inline void nmi_shootdown_cpus_on_restart(void);
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
/* RCU-protected callback to disable virtualization prior to reboot. */
static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
{
if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
return;
rcu_assign_pointer(cpu_emergency_virt_callback, callback);
}
EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
{
if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
return;
rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
/*
* Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
* reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
* GIF=0, i.e. if the crash occurred between CLGI and STGI.
*/
void cpu_emergency_disable_virtualization(void)
{
cpu_emergency_virt_cb *callback;
/*
* IRQs must be disabled as KVM enables virtualization in hardware via
* function call IPIs, i.e. IRQs need to be disabled to guarantee
* virtualization stays disabled.
*/
lockdep_assert_irqs_disabled();
rcu_read_lock();
callback = rcu_dereference(cpu_emergency_virt_callback);
if (callback)
callback();
rcu_read_unlock();
}
static void emergency_reboot_disable_virtualization(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/*
@ -545,7 +589,7 @@ static void emergency_reboot_disable_virtualization(void)
* Do the NMI shootdown even if virtualization is off on _this_ CPU, as
* other CPUs may have virtualization enabled.
*/
if (cpu_has_vmx() || cpu_has_svm(NULL)) {
if (rcu_access_pointer(cpu_emergency_virt_callback)) {
/* Safely force _this_ CPU out of VMX/SVM operation. */
cpu_emergency_disable_virtualization();
@ -553,7 +597,9 @@ static void emergency_reboot_disable_virtualization(void)
nmi_shootdown_cpus_on_restart();
}
}
#else
static void emergency_reboot_disable_virtualization(void) { }
#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
void __attribute__((weak)) mach_reboot_fixups(void)
{
@ -787,21 +833,9 @@ void machine_crash_shutdown(struct pt_regs *regs)
}
#endif
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
/*
* Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
* reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
* GIF=0, i.e. if the crash occurred between CLGI and STGI.
*/
void cpu_emergency_disable_virtualization(void)
{
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
}
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;

View File

@ -101,7 +101,7 @@ config X86_SGX_KVM
config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM
depends on KVM && (CPU_SUP_AMD || CPU_SUP_HYGON)
help
Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions.

View File

@ -11,6 +11,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kvm_host.h>
#include "linux/lockdep.h"
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
@ -84,6 +85,18 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
struct kvm_cpuid_entry2 *e;
int i;
/*
* KVM has a semi-arbitrary rule that querying the guest's CPUID model
* with IRQs disabled is disallowed. The CPUID model can legitimately
* have over one hundred entries, i.e. the lookup is slow, and IRQs are
* typically disabled in KVM only when KVM is in a performance critical
* path, e.g. the core VM-Enter/VM-Exit run loop. Nothing will break
* if this rule is violated, this assertion is purely to flag potential
* performance issues. If this fires, consider moving the lookup out
* of the hotpath, e.g. by caching information during CPUID updates.
*/
lockdep_assert_irqs_enabled();
for (i = 0; i < nent; i++) {
e = &entries[i];
@ -312,6 +325,27 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
bool allow_gbpages;
BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
bitmap_zero(vcpu->arch.governed_features.enabled,
KVM_MAX_NR_GOVERNED_FEATURES);
/*
* If TDP is enabled, let the guest use GBPAGES if they're supported in
* hardware. The hardware page walker doesn't let KVM disable GBPAGES,
* i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
* walk for performance and complexity reasons. Not to mention KVM
* _can't_ solve the problem because GVA->GPA walks aren't visible to
* KVM once a TDP translation is installed. Mimic hardware behavior so
* that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
* If TDP is disabled, honor *only* guest CPUID as KVM has full control
* and can install smaller shadow pages if the host lacks 1GiB support.
*/
allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
if (allow_gbpages)
kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
@ -647,7 +681,8 @@ void kvm_set_cpu_caps(void)
);
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
F(AMX_COMPLEX)
);
kvm_cpu_cap_mask(CPUID_D_1_EAX,
@ -1154,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
case 0x80000005:
/* Pass host L1 cache and TLB info. */
break;
case 0x80000006:
/* Drop reserved bits, pass host L2 cache and TLB info. */
entry->edx &= ~GENMASK(17, 16);

View File

@ -232,4 +232,50 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}
enum kvm_governed_features {
#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
#include "governed_features.h"
KVM_NR_GOVERNED_FEATURES
};
static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
{
switch (x86_feature) {
#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
#include "governed_features.h"
default:
return -1;
}
}
static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
{
return kvm_governed_feature_index(x86_feature) >= 0;
}
static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
__set_bit(kvm_governed_feature_index(x86_feature),
vcpu->arch.governed_features.enabled);
}
static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
kvm_governed_feature_set(vcpu, x86_feature);
}
static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
return test_bit(kvm_governed_feature_index(x86_feature),
vcpu->arch.governed_features.enabled);
}
#endif

View File

@ -1799,13 +1799,11 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
op->addr.mem,
&op->val,
op->bytes);
break;
case OP_MEM_STR:
return segmented_write(ctxt,
op->addr.mem,
op->data,
op->bytes * op->count);
break;
case OP_XMM:
kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
break;

View File

@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
BUILD_BUG()
#endif
#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
KVM_GOVERNED_X86_FEATURE(GBPAGES)
KVM_GOVERNED_X86_FEATURE(XSAVES)
KVM_GOVERNED_X86_FEATURE(VMX)
KVM_GOVERNED_X86_FEATURE(NRIPS)
KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
KVM_GOVERNED_X86_FEATURE(LBRV)
KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
KVM_GOVERNED_X86_FEATURE(VGIF)
KVM_GOVERNED_X86_FEATURE(VNMI)
#undef KVM_GOVERNED_X86_FEATURE
#undef KVM_GOVERNED_FEATURE

View File

@ -1293,7 +1293,6 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
case HV_X64_MSR_VP_ASSIST_PAGE:
return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_APIC_ACCESS_AVAILABLE;
break;
case HV_X64_MSR_TSC_FREQUENCY:
case HV_X64_MSR_APIC_FREQUENCY:
return hv_vcpu->cpuid_cache.features_eax &

View File

@ -213,7 +213,6 @@ struct x86_emulate_ops {
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool exact_only);
bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);

View File

@ -376,7 +376,8 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
struct kvm_vcpu *vcpu;
unsigned long i;
u32 max_id = 255; /* enough space for any xAPIC ID */
bool xapic_id_mismatch = false;
bool xapic_id_mismatch;
int r;
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
@ -386,9 +387,14 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
"Dirty APIC map without an in-kernel local APIC");
mutex_lock(&kvm->arch.apic_map_lock);
retry:
/*
* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
* (if clean) or the APIC registers (if dirty).
* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
* or the APIC registers (if dirty). Note, on retry the map may have
* not yet been marked dirty by whatever task changed a vCPU's x2APIC
* ID, i.e. the map may still show up as in-progress. In that case
* this task still needs to retry and complete its calculation.
*/
if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
@ -397,6 +403,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
return;
}
/*
* Reset the mismatch flag between attempts so that KVM does the right
* thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
* keep max_id strictly increasing. Disallowing max_id from shrinking
* ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
* with the highest x2APIC ID is toggling its APIC on and off.
*/
xapic_id_mismatch = false;
kvm_for_each_vcpu(i, vcpu, kvm)
if (kvm_apic_present(vcpu))
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
@ -415,9 +430,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
if (!kvm_apic_present(vcpu))
continue;
if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
if (r) {
kvfree(new);
new = NULL;
if (r == -E2BIG) {
cond_resched();
goto retry;
}
goto out;
}

View File

@ -4804,28 +4804,13 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
}
}
static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
{
/*
* If TDP is enabled, let the guest use GBPAGES if they're supported in
* hardware. The hardware page walker doesn't let KVM disable GBPAGES,
* i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
* walk for performance and complexity reasons. Not to mention KVM
* _can't_ solve the problem because GVA->GPA walks aren't visible to
* KVM once a TDP translation is installed. Mimic hardware behavior so
* that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
*/
return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
}
static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
struct kvm_mmu *context)
{
__reset_rsvds_bits_mask(&context->guest_rsvd_check,
vcpu->arch.reserved_gpa_bits,
context->cpu_role.base.level, is_efer_nx(context),
guest_can_use_gbpages(vcpu),
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
guest_cpuid_is_amd_or_hygon(vcpu));
}
@ -4902,7 +4887,8 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
context->root_role.level,
context->root_role.efer_nx,
guest_can_use_gbpages(vcpu), is_pse, is_amd);
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
is_pse, is_amd);
if (!shadow_me_mask)
return;
@ -6844,7 +6830,7 @@ static void mmu_destroy_caches(void)
static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
{
if (nx_hugepage_mitigation_hard_disabled)
return sprintf(buffer, "never\n");
return sysfs_emit(buffer, "never\n");
return param_get_bool(buffer, kp);
}

View File

@ -43,6 +43,7 @@ enum kvm_only_cpuid_leafs {
/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
/* CPUID level 0x80000007 (EDX). */

View File

@ -107,7 +107,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
{
if (!svm->v_vmload_vmsave_enabled)
if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
return true;
if (!nested_npt_enabled(svm))
@ -552,6 +552,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
bool new_vmcb12 = false;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
struct kvm_vcpu *vcpu = &svm->vcpu;
nested_vmcb02_compute_g_pat(svm);
@ -577,18 +578,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
vmcb_mark_dirty(vmcb02, VMCB_DT);
}
kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
svm_set_efer(&svm->vcpu, svm->nested.save.efer);
svm_set_efer(vcpu, svm->nested.save.efer);
svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
svm_set_cr0(vcpu, svm->nested.save.cr0);
svm_set_cr4(vcpu, svm->nested.save.cr4);
svm->vcpu.arch.cr2 = vmcb12->save.cr2;
kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
kvm_rax_write(vcpu, vmcb12->save.rax);
kvm_rsp_write(vcpu, vmcb12->save.rsp);
kvm_rip_write(vcpu, vmcb12->save.rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
vmcb02->save.rax = vmcb12->save.rax;
@ -602,7 +603,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
vmcb_mark_dirty(vmcb02, VMCB_DR);
}
if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
/*
* Reserved bits of DEBUGCTL are ignored. Be consistent with
* svm_set_msr's definition of reserved bits.
@ -658,7 +660,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
*/
if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
else
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
@ -695,10 +698,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
WARN_ON(!svm->tsc_scaling_enabled);
if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
nested_svm_update_tsc_ratio_msr(vcpu);
}
vmcb02->control.int_ctl =
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
@ -717,7 +719,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* what a nrips=0 CPU would do (L1 is responsible for advancing RIP
* prior to injecting the event).
*/
if (svm->nrips_enabled)
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
vmcb02->control.next_rip = svm->nested.ctl.next_rip;
else if (boot_cpu_has(X86_FEATURE_NRIPS))
vmcb02->control.next_rip = vmcb12_rip;
@ -727,7 +729,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
svm->soft_int_injected = true;
svm->soft_int_csbase = vmcb12_csbase;
svm->soft_int_old_rip = vmcb12_rip;
if (svm->nrips_enabled)
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
svm->soft_int_next_rip = svm->nested.ctl.next_rip;
else
svm->soft_int_next_rip = vmcb12_rip;
@ -735,15 +737,21 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.virt_ext = vmcb01->control.virt_ext &
LBR_CTL_ENABLE_MASK;
if (svm->lbrv_enabled)
if (guest_can_use(vcpu, X86_FEATURE_LBRV))
vmcb02->control.virt_ext |=
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
pause_count12 = svm->nested.ctl.pause_filter_count;
else
pause_count12 = 0;
if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
else
pause_thresh12 = 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
/* use guest values since host doesn't intercept PAUSE */
vmcb02->control.pause_filter_count = pause_count12;
@ -1027,7 +1035,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
nested_save_pending_event_to_vmcb12(svm, vmcb12);
if (svm->nrips_enabled)
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
vmcb12->control.next_rip = vmcb02->control.next_rip;
vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
@ -1066,7 +1074,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (!nested_exit_on_intr(svm))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
svm_copy_lbrs(vmcb12, vmcb02);
svm_update_lbrv(vcpu);
} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
@ -1101,10 +1110,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
}
if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
WARN_ON(!svm->tsc_scaling_enabled);
if (kvm_caps.has_tsc_control &&
vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
svm_write_tsc_multiplier(vcpu);
}
svm->nested.ctl.nested_cr3 = 0;
@ -1537,7 +1546,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
vcpu->arch.tsc_scaling_ratio =
kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
svm->tsc_ratio_msr);
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
svm_write_tsc_multiplier(vcpu);
}
/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */

View File

@ -39,10 +39,9 @@
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
#include <asm/traps.h>
#include <asm/reboot.h>
#include <asm/fpu/api.h>
#include <asm/virtext.h>
#include <trace/events/ipi.h>
#include "trace.h"
@ -527,14 +526,21 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu)
vcpu->arch.osvw.status |= 1;
}
static bool kvm_is_svm_supported(void)
static bool __kvm_is_svm_supported(void)
{
int cpu = raw_smp_processor_id();
const char *msg;
int cpu = smp_processor_id();
struct cpuinfo_x86 *c = &cpu_data(cpu);
u64 vm_cr;
if (!cpu_has_svm(&msg)) {
pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
if (c->x86_vendor != X86_VENDOR_AMD &&
c->x86_vendor != X86_VENDOR_HYGON) {
pr_err("CPU %d isn't AMD or Hygon\n", cpu);
return false;
}
if (!cpu_has(c, X86_FEATURE_SVM)) {
pr_err("SVM not supported by CPU %d\n", cpu);
return false;
}
@ -552,25 +558,55 @@ static bool kvm_is_svm_supported(void)
return true;
}
static bool kvm_is_svm_supported(void)
{
bool supported;
migrate_disable();
supported = __kvm_is_svm_supported();
migrate_enable();
return supported;
}
static int svm_check_processor_compat(void)
{
if (!kvm_is_svm_supported())
if (!__kvm_is_svm_supported())
return -EIO;
return 0;
}
void __svm_write_tsc_multiplier(u64 multiplier)
static void __svm_write_tsc_multiplier(u64 multiplier)
{
preempt_disable();
if (multiplier == __this_cpu_read(current_tsc_ratio))
goto out;
return;
wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
__this_cpu_write(current_tsc_ratio, multiplier);
out:
preempt_enable();
}
static inline void kvm_cpu_svm_disable(void)
{
uint64_t efer;
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
if (efer & EFER_SVME) {
/*
* Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
* NMI aren't blocked.
*/
stgi();
wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
}
static void svm_emergency_disable(void)
{
kvm_rebooting = true;
kvm_cpu_svm_disable();
}
static void svm_hardware_disable(void)
@ -579,7 +615,7 @@ static void svm_hardware_disable(void)
if (tsc_scaling)
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
cpu_svm_disable();
kvm_cpu_svm_disable();
amd_pmu_disable_virt();
}
@ -1006,7 +1042,7 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
(is_guest_mode(vcpu) && svm->lbrv_enabled &&
(is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
if (enable_lbrv == current_enable_lbrv)
@ -1118,21 +1154,23 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
return svm->tsc_ratio_msr;
}
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
svm->vmcb->control.tsc_offset = offset;
svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
{
__svm_write_tsc_multiplier(multiplier);
preempt_disable();
if (to_svm(vcpu)->guest_state_loaded)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
preempt_enable();
}
/* Evaluate instruction intercepts that depend on guest CPUID features. */
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm)
@ -1173,8 +1211,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
svm->v_vmload_vmsave_enabled = false;
} else {
/*
* If hardware supports Virtual VMLOAD VMSAVE then enable it
@ -2788,7 +2824,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_AMD64_TSC_RATIO:
if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
if (!msr_info->host_initiated &&
!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
return 1;
msr_info->data = svm->tsc_ratio_msr;
break;
@ -2938,7 +2975,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
if (!svm->tsc_scaling_enabled) {
if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
if (!msr->host_initiated)
return 1;
@ -2960,7 +2997,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->tsc_ratio_msr = data;
if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
is_guest_mode(vcpu))
nested_svm_update_tsc_ratio_msr(vcpu);
break;
@ -4248,28 +4286,37 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_cpuid_entry2 *best;
vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
boot_cpu_has(X86_FEATURE_XSAVE) &&
boot_cpu_has(X86_FEATURE_XSAVES);
/*
* SVM doesn't provide a way to disable just XSAVES in the guest, KVM
* can only disable all variants of by disallowing CR4.OSXSAVE from
* being set. As a result, if the host has XSAVE and XSAVES, and the
* guest has XSAVE enabled, the guest can execute XSAVES without
* faulting. Treat XSAVES as enabled in this case regardless of
* whether it's advertised to the guest so that KVM context switches
* XSS on VM-Enter/VM-Exit. Failure to do so would effectively give
* the guest read/write access to the host's XSS.
*/
if (boot_cpu_has(X86_FEATURE_XSAVE) &&
boot_cpu_has(X86_FEATURE_XSAVES) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
/* Update nrips enabled cache */
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
/*
* Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
* SVM on Intel is bonkers and extremely unlikely to work).
*/
if (!guest_cpuid_is_intel(vcpu))
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
svm_recalc_instruction_intercepts(vcpu, svm);
@ -5258,6 +5305,13 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
.pmu_ops = &amd_pmu_ops,
};
static void __svm_exit(void)
{
kvm_x86_vendor_exit();
cpu_emergency_unregister_virt_callback(svm_emergency_disable);
}
static int __init svm_init(void)
{
int r;
@ -5271,6 +5325,8 @@ static int __init svm_init(void)
if (r)
return r;
cpu_emergency_register_virt_callback(svm_emergency_disable);
/*
* Common KVM initialization _must_ come last, after this, /dev/kvm is
* exposed to userspace!
@ -5283,14 +5339,14 @@ static int __init svm_init(void)
return 0;
err_kvm_init:
kvm_x86_vendor_exit();
__svm_exit();
return r;
}
static void __exit svm_exit(void)
{
kvm_exit();
kvm_x86_vendor_exit();
__svm_exit();
}
module_init(svm_init)

View File

@ -22,6 +22,7 @@
#include <asm/svm.h>
#include <asm/sev-common.h>
#include "cpuid.h"
#include "kvm_cache_regs.h"
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
@ -261,16 +262,6 @@ struct vcpu_svm {
unsigned long soft_int_next_rip;
bool soft_int_injected;
/* optional nested SVM features that are enabled for this guest */
bool nrips_enabled : 1;
bool tsc_scaling_enabled : 1;
bool v_vmload_vmsave_enabled : 1;
bool lbrv_enabled : 1;
bool pause_filter_enabled : 1;
bool pause_threshold_enabled : 1;
bool vgif_enabled : 1;
bool vnmi_enabled : 1;
u32 ldr_reg;
u32 dfr_reg;
struct page *avic_backing_page;
@ -452,7 +443,8 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
{
return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
}
static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
@ -503,7 +495,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
{
return svm->vnmi_enabled &&
return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
(svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
}
@ -619,7 +611,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
void __svm_write_tsc_multiplier(u64 multiplier);
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
struct vmcb_control_area *control);
void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,

View File

@ -252,7 +252,7 @@ static inline bool cpu_has_vmx_pml(void)
static inline bool cpu_has_vmx_xsaves(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_XSAVES;
SECONDARY_EXEC_ENABLE_XSAVES;
}
static inline bool cpu_has_vmx_waitpkg(void)

View File

@ -78,7 +78,7 @@
SECONDARY_EXEC_DESC | \
SECONDARY_EXEC_ENABLE_RDTSCP | \
SECONDARY_EXEC_ENABLE_INVPCID | \
SECONDARY_EXEC_XSAVES | \
SECONDARY_EXEC_ENABLE_XSAVES | \
SECONDARY_EXEC_RDSEED_EXITING | \
SECONDARY_EXEC_RDRAND_EXITING | \
SECONDARY_EXEC_TSC_SCALING | \

View File

@ -2307,7 +2307,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_ENABLE_RDTSCP |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_XSAVES |
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@ -6331,7 +6331,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
* If if it were, XSS would have to be checked against
* the XSS exit bitmap in vmcs12.
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
case EXIT_REASON_UMWAIT:
case EXIT_REASON_TPAUSE:
return nested_cpu_has2(vmcs12,
@ -6426,7 +6426,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
if (nested_vmx_allowed(vcpu) &&
if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
@ -6567,7 +6567,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL;
} else {
if (!nested_vmx_allowed(vcpu))
if (!guest_can_use(vcpu, X86_FEATURE_VMX))
return -EINVAL;
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
@ -6601,7 +6601,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
(!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
(!guest_can_use(vcpu, X86_FEATURE_VMX) ||
!vmx->nested.enlightened_vmcs_enabled))
return -EINVAL;
vmx_leave_nested(vcpu);
@ -6874,7 +6875,7 @@ static void nested_vmx_setup_secondary_ctls(u32 ept_caps,
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_ENABLE_VMFUNC |
SECONDARY_EXEC_RDSEED_EXITING |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_XSAVES |
SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;

View File

@ -168,7 +168,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
}
static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)

View File

@ -41,13 +41,12 @@
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
#include <asm/kexec.h>
#include <asm/reboot.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/mshyperv.h>
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include <asm/vmx.h>
#include "capabilities.h"
@ -237,9 +236,6 @@ static const struct {
#define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages;
/* Control for disabling CPU Fill buffer clear */
static bool __read_mostly vmx_fb_clear_ctrl_available;
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
struct page *page;
@ -255,14 +251,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
return 0;
}
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
u64 msr;
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
return 0;
}
if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
return 0;
}
/* If set to auto use the default l1tf mitigation method */
@ -366,22 +357,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
{
if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
return sprintf(s, "???\n");
return sysfs_emit(s, "???\n");
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
static void vmx_setup_fb_clear_ctrl(void)
{
u64 msr;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_TAA)) {
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
if (msr & ARCH_CAP_FB_CLEAR_CTRL)
vmx_fb_clear_ctrl_available = true;
}
return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
@ -409,7 +387,9 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_TAA);
/*
* If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@ -754,17 +734,51 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
return ret;
}
#ifdef CONFIG_KEXEC_CORE
static void crash_vmclear_local_loaded_vmcss(void)
/*
* Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
*
* Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
* atomically track post-VMXON state, e.g. this may be called in NMI context.
* Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
* faults are guaranteed to be due to the !post-VMXON check unless the CPU is
* magically in RM, VM86, compat mode, or at CPL>0.
*/
static int kvm_cpu_vmxoff(void)
{
asm_volatile_goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);
cr4_clear_bits(X86_CR4_VMXE);
return 0;
fault:
cr4_clear_bits(X86_CR4_VMXE);
return -EIO;
}
static void vmx_emergency_disable(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
kvm_rebooting = true;
/*
* Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
* set in task context. If this races with VMX is disabled by an NMI,
* VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
* kvm_rebooting set.
*/
if (!(__read_cr4() & X86_CR4_VMXE))
return;
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
loaded_vmcss_on_cpu_link)
vmcs_clear(v->vmcs);
kvm_cpu_vmxoff();
}
#endif /* CONFIG_KEXEC_CORE */
static void __loaded_vmcs_clear(void *arg)
{
@ -1899,25 +1913,14 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
return kvm_caps.default_tsc_scaling_ratio;
}
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
{
vmcs_write64(TSC_OFFSET, offset);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
}
static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
{
vmcs_write64(TSC_MULTIPLIER, multiplier);
}
/*
* nested_vmx_allowed() checks whether a guest should be allowed to use VMX
* instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
* all guests if the "nested" module option is off, and can also be disabled
* for a single guest by disabling its VMX cpuid bit.
*/
bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
{
return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
}
/*
@ -2047,7 +2050,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested_vmx_allowed(vcpu))
if (!guest_can_use(vcpu, X86_FEATURE_VMX))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data))
@ -2355,7 +2358,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
if (!nested_vmx_allowed(vcpu))
if (!guest_can_use(vcpu, X86_FEATURE_VMX))
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_RTIT_CTL:
@ -2729,11 +2732,11 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
return 0;
}
static bool kvm_is_vmx_supported(void)
static bool __kvm_is_vmx_supported(void)
{
int cpu = raw_smp_processor_id();
int cpu = smp_processor_id();
if (!cpu_has_vmx()) {
if (!(cpuid_ecx(1) & feature_bit(VMX))) {
pr_err("VMX not supported by CPU %d\n", cpu);
return false;
}
@ -2747,13 +2750,24 @@ static bool kvm_is_vmx_supported(void)
return true;
}
static bool kvm_is_vmx_supported(void)
{
bool supported;
migrate_disable();
supported = __kvm_is_vmx_supported();
migrate_enable();
return supported;
}
static int vmx_check_processor_compat(void)
{
int cpu = raw_smp_processor_id();
struct vmcs_config vmcs_conf;
struct vmx_capability vmx_cap;
if (!kvm_is_vmx_supported())
if (!__kvm_is_vmx_supported())
return -EIO;
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
@ -2833,7 +2847,7 @@ static void vmx_hardware_disable(void)
{
vmclear_local_loaded_vmcss();
if (cpu_vmxoff())
if (kvm_cpu_vmxoff())
kvm_spurious_fault();
hv_reset_evmcs();
@ -4546,16 +4560,19 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
* based on a single guest CPUID bit, with a dedicated feature bit. This also
* verifies that the control is actually supported by KVM and hardware.
*/
#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
({ \
bool __enabled; \
\
if (cpu_has_vmx_##name()) { \
__enabled = guest_cpuid_has(&(vmx)->vcpu, \
X86_FEATURE_##feat_name); \
vmx_adjust_secondary_exec_control(vmx, exec_control, \
SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
} \
#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
({ \
struct kvm_vcpu *__vcpu = &(vmx)->vcpu; \
bool __enabled; \
\
if (cpu_has_vmx_##name()) { \
if (kvm_is_governed_feature(X86_FEATURE_##feat_name)) \
__enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name); \
else \
__enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name); \
vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
__enabled, exiting); \
} \
})
/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
@ -4615,19 +4632,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
if (cpu_has_vmx_xsaves()) {
/* Exposing XSAVES only when XSAVE is exposed */
bool xsaves_enabled =
boot_cpu_has(X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
vcpu->arch.xsaves_enabled = xsaves_enabled;
vmx_adjust_secondary_exec_control(vmx, &exec_control,
SECONDARY_EXEC_XSAVES,
xsaves_enabled, false);
}
vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES);
/*
* RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
@ -4646,6 +4651,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
SECONDARY_EXEC_ENABLE_RDTSCP,
rdpid_or_rdtscp_enabled, false);
}
vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@ -7747,8 +7753,16 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
vcpu->arch.xsaves_enabled = false;
/*
* XSAVES is effectively enabled if and only if XSAVE is also exposed
* to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
* set if and only if XSAVE is supported.
*/
if (boot_cpu_has(X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
vmx_setup_uret_msrs(vmx);
@ -7756,7 +7770,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmcs_set_secondary_exec_control(vmx,
vmx_secondary_exec_control(vmx));
if (nested_vmx_allowed(vcpu))
if (guest_can_use(vcpu, X86_FEATURE_VMX))
vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
@ -7765,7 +7779,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
if (nested_vmx_allowed(vcpu))
if (guest_can_use(vcpu, X86_FEATURE_VMX))
nested_vmx_cr_fixed1_bits_update(vcpu);
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@ -8618,10 +8632,8 @@ static void __vmx_exit(void)
{
allow_smaller_maxphyaddr = false;
#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
vmx_cleanup_l1d_flush();
}
@ -8662,18 +8674,14 @@ static int __init vmx_init(void)
if (r)
goto err_l1d_flush;
vmx_setup_fb_clear_ctrl();
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
pi_init_cpu(cpu);
}
#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
crash_vmclear_local_loaded_vmcss);
#endif
cpu_emergency_register_virt_callback(vmx_emergency_disable);
vmx_check_vmcs12_offsets();
/*

View File

@ -374,7 +374,6 @@ struct kvm_vmx {
u64 *pid_table;
};
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
struct loaded_vmcs *buddy);
int allocate_vpid(void);
@ -562,7 +561,7 @@ static inline u8 vmx_get_rvi(void)
SECONDARY_EXEC_APIC_REGISTER_VIRT | \
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
SECONDARY_EXEC_SHADOW_VMCS | \
SECONDARY_EXEC_XSAVES | \
SECONDARY_EXEC_ENABLE_XSAVES | \
SECONDARY_EXEC_RDSEED_EXITING | \
SECONDARY_EXEC_RDRAND_EXITING | \
SECONDARY_EXEC_ENABLE_PML | \

View File

@ -237,6 +237,9 @@ EXPORT_SYMBOL_GPL(enable_apicv);
u64 __read_mostly host_xss;
EXPORT_SYMBOL_GPL(host_xss);
u64 __read_mostly host_arch_capabilities;
EXPORT_SYMBOL_GPL(host_arch_capabilities);
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@ -1021,7 +1024,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
if (vcpu->arch.xsaves_enabled &&
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != host_xss)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
@ -1052,7 +1055,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
if (vcpu->arch.xsaves_enabled &&
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != host_xss)
wrmsrl(MSR_IA32_XSS, host_xss);
}
@ -1620,12 +1623,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
static u64 kvm_get_arch_capabilities(void)
{
u64 data = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
data &= KVM_SUPPORTED_ARCH_CAP;
}
u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@ -2631,7 +2629,7 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
else
vcpu->arch.tsc_offset = l1_offset;
static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
static_call(kvm_x86_write_tsc_offset)(vcpu);
}
static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
@ -2647,8 +2645,7 @@ static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multipli
vcpu->arch.tsc_scaling_ratio = l1_multiplier;
if (kvm_caps.has_tsc_control)
static_call(kvm_x86_write_tsc_multiplier)(
vcpu, vcpu->arch.tsc_scaling_ratio);
static_call(kvm_x86_write_tsc_multiplier)(vcpu);
}
static inline bool kvm_check_tsc_unstable(void)
@ -4665,7 +4662,6 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
return 0;
default:
return -ENXIO;
break;
}
}
@ -6532,7 +6528,7 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
struct kvm_msr_filter_range *user_range)
{
unsigned long *bitmap = NULL;
unsigned long *bitmap;
size_t bitmap_size;
if (!user_range->nmsrs)
@ -8245,11 +8241,6 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
}
static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
{
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
}
static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
{
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
@ -8351,7 +8342,6 @@ static const struct x86_emulate_ops emulate_ops = {
.fix_hypercall = emulator_fix_hypercall,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.guest_has_long_mode = emulator_guest_has_long_mode,
.guest_has_movbe = emulator_guest_has_movbe,
.guest_has_fxsr = emulator_guest_has_fxsr,
.guest_has_rdpid = emulator_guest_has_rdpid,
@ -9172,7 +9162,7 @@ static int kvmclock_cpu_down_prep(unsigned int cpu)
static void tsc_khz_changed(void *data)
{
struct cpufreq_freqs *freq = data;
unsigned long khz = 0;
unsigned long khz;
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
@ -9512,6 +9502,9 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_init_pmu_capability(ops->pmu_ops);
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
r = ops->hardware_setup();
if (r != 0)
goto out_mmu_exit;
@ -11111,12 +11104,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = -EINTR;
goto out;
}
/*
* It should be impossible for the hypervisor timer to be in
* use before KVM has ever run the vCPU.
*/
WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
/*
* Don't bother switching APIC timer emulation from the
* hypervisor timer to the software timer, the only way for the
* APIC timer to be active is if userspace stuffed vCPU state,
* i.e. put the vCPU into a nonsensical state. Only an INIT
* will transition the vCPU out of UNINITIALIZED (without more
* state stuffing from userspace), which will reset the local
* APIC and thus cancel the timer or drop the IRQ (if the timer
* already expired).
*/
kvm_vcpu_srcu_read_unlock(vcpu);
kvm_vcpu_block(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);

View File

@ -323,6 +323,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
extern u64 host_xcr0;
extern u64 host_xss;
extern u64 host_arch_capabilities;
extern struct kvm_caps kvm_caps;