KVM: x86: Add Intel PT virtualization work mode

Intel Processor Trace virtualization can be work in one
of 2 possible modes:

a. System-Wide mode (default):
   When the host configures Intel PT to collect trace packets
   of the entire system, it can leave the relevant VMX controls
   clear to allow VMX-specific packets to provide information
   across VMX transitions.
   KVM guest will not aware this feature in this mode and both
   host and KVM guest trace will output to host buffer.

b. Host-Guest mode:
   Host can configure trace-packet generation while in
   VMX non-root operation for guests and root operation
   for native executing normally.
   Intel PT will be exposed to KVM guest in this mode, and
   the trace output to respective buffer of host and guest.
   In this mode, tht status of PT will be saved and disabled
   before VM-entry and restored after VM-exit if trace
   a virtual machine.

Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Chao Peng 2018-10-24 16:05:10 +08:00 committed by Paolo Bonzini
parent e0018afec5
commit f99e3daf94
5 changed files with 51 additions and 3 deletions

View File

@ -807,6 +807,7 @@
#define VMX_BASIC_INOUT 0x0040000000000000LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU
/* MSR_IA32_VMX_MISC bits */ /* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */ /* AMD-V MSRs */

View File

@ -77,7 +77,9 @@
#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 #define SECONDARY_EXEC_ENCLS_EXITING 0x00008000
#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 #define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000
#define SECONDARY_EXEC_XSAVES 0x00100000 #define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000
@ -99,6 +101,8 @@
#define VM_EXIT_LOAD_IA32_EFER 0x00200000 #define VM_EXIT_LOAD_IA32_EFER 0x00200000
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000 #define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@ -110,6 +114,8 @@
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 #define VM_ENTRY_LOAD_IA32_PAT 0x00004000
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 #define VM_ENTRY_LOAD_IA32_EFER 0x00008000
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000 #define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000
#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
@ -241,6 +247,8 @@ enum vmcs_field {
GUEST_PDPTR3_HIGH = 0x00002811, GUEST_PDPTR3_HIGH = 0x00002811,
GUEST_BNDCFGS = 0x00002812, GUEST_BNDCFGS = 0x00002812,
GUEST_BNDCFGS_HIGH = 0x00002813, GUEST_BNDCFGS_HIGH = 0x00002813,
GUEST_IA32_RTIT_CTL = 0x00002814,
GUEST_IA32_RTIT_CTL_HIGH = 0x00002815,
HOST_IA32_PAT = 0x00002c00, HOST_IA32_PAT = 0x00002c00,
HOST_IA32_PAT_HIGH = 0x00002c01, HOST_IA32_PAT_HIGH = 0x00002c01,
HOST_IA32_EFER = 0x00002c02, HOST_IA32_EFER = 0x00002c02,

View File

@ -10,6 +10,10 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml; extern bool __read_mostly enable_pml;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
#define PT_MODE_HOST_GUEST 1
struct nested_vmx_msrs { struct nested_vmx_msrs {
/* /*
@ -325,4 +329,15 @@ static inline bool cpu_has_vmx_invvpid_global(void)
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
} }
static inline bool cpu_has_vmx_intel_pt(void)
{
u64 vmx_msr;
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) &&
(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) &&
(vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_RTIT_CTL) &&
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
}
#endif /* __KVM_X86_VMX_CAPS_H */ #endif /* __KVM_X86_VMX_CAPS_H */

View File

@ -169,6 +169,10 @@ module_param(ple_window_shrink, uint, 0444);
static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, uint, 0444); module_param(ple_window_max, uint, 0444);
/* Default is SYSTEM mode, 1 for host-guest mode */
int __read_mostly pt_mode = PT_MODE_SYSTEM;
module_param(pt_mode, int, S_IRUGO);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
static DEFINE_MUTEX(vmx_l1d_flush_mutex); static DEFINE_MUTEX(vmx_l1d_flush_mutex);
@ -1975,6 +1979,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_RDRAND_EXITING |
SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_ENABLE_PML |
SECONDARY_EXEC_TSC_SCALING | SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_PT_USE_GPA |
SECONDARY_EXEC_PT_CONCEAL_VMX |
SECONDARY_EXEC_ENABLE_VMFUNC | SECONDARY_EXEC_ENABLE_VMFUNC |
SECONDARY_EXEC_ENCLS_EXITING; SECONDARY_EXEC_ENCLS_EXITING;
if (adjust_vmx_controls(min2, opt2, if (adjust_vmx_controls(min2, opt2,
@ -2023,7 +2029,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_LOAD_IA32_EFER |
VM_EXIT_CLEAR_BNDCFGS; VM_EXIT_CLEAR_BNDCFGS |
VM_EXIT_PT_CONCEAL_PIP |
VM_EXIT_CLEAR_IA32_RTIT_CTL;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0) &_vmexit_control) < 0)
return -EIO; return -EIO;
@ -2045,7 +2053,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_PAT |
VM_ENTRY_LOAD_IA32_EFER | VM_ENTRY_LOAD_IA32_EFER |
VM_ENTRY_LOAD_BNDCFGS; VM_ENTRY_LOAD_BNDCFGS |
VM_ENTRY_PT_CONCEAL_PIP |
VM_ENTRY_LOAD_IA32_RTIT_CTL;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0) &_vmentry_control) < 0)
return -EIO; return -EIO;
@ -3567,6 +3577,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
if (pt_mode == PT_MODE_SYSTEM)
exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
if (!cpu_need_virtualize_apic_accesses(vcpu)) if (!cpu_need_virtualize_apic_accesses(vcpu))
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
if (vmx->vpid == 0) if (vmx->vpid == 0)
@ -7248,6 +7260,11 @@ static __init int hardware_setup(void)
kvm_mce_cap_supported |= MCG_LMCE_P; kvm_mce_cap_supported |= MCG_LMCE_P;
if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
return -EINVAL;
if (!enable_ept || !cpu_has_vmx_intel_pt())
pt_mode = PT_MODE_SYSTEM;
if (nested) { if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested, nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
vmx_capability.ept, enable_apicv); vmx_capability.ept, enable_apicv);

View File

@ -5,6 +5,7 @@
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <asm/kvm.h> #include <asm/kvm.h>
#include <asm/intel_pt.h>
#include "capabilities.h" #include "capabilities.h"
#include "ops.h" #include "ops.h"
@ -421,13 +422,19 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
static inline u32 vmx_vmentry_ctrl(void) static inline u32 vmx_vmentry_ctrl(void)
{ {
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
if (pt_mode == PT_MODE_SYSTEM)
vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL);
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
return vmcs_config.vmentry_ctrl & return vmentry_ctrl &
~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
} }
static inline u32 vmx_vmexit_ctrl(void) static inline u32 vmx_vmexit_ctrl(void)
{ {
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
if (pt_mode == PT_MODE_SYSTEM)
vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL);
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
return vmcs_config.vmexit_ctrl & return vmcs_config.vmexit_ctrl &
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);