2019-06-04 10:11:32 +02:00
// SPDX-License-Identifier: GPL-2.0-only
2015-06-19 15:45:05 +02:00
/*
* KVM PMU support for AMD
*
* Copyright 2015 , Red Hat , Inc . and / or its affiliates .
*
* Author :
* Wei Huang < wei @ redhat . com >
*
* Implementation is based on pmu_intel . c file
*/
# include <linux/types.h>
# include <linux/kvm_host.h>
# include <linux/perf_event.h>
# include "x86.h"
# include "cpuid.h"
# include "lapic.h"
# include "pmu.h"
2021-11-17 16:03:04 +08:00
# include "svm.h"
2015-06-19 15:45:05 +02:00
2018-02-05 13:24:52 -06:00
enum pmu_type {
PMU_TYPE_COUNTER = 0 ,
PMU_TYPE_EVNTSEL ,
} ;
enum index {
INDEX_ZERO = 0 ,
INDEX_ONE ,
INDEX_TWO ,
INDEX_THREE ,
INDEX_FOUR ,
INDEX_FIVE ,
INDEX_ERROR ,
} ;
static unsigned int get_msr_base ( struct kvm_pmu * pmu , enum pmu_type type )
{
struct kvm_vcpu * vcpu = pmu_to_vcpu ( pmu ) ;
if ( guest_cpuid_has ( vcpu , X86_FEATURE_PERFCTR_CORE ) ) {
if ( type = = PMU_TYPE_COUNTER )
return MSR_F15H_PERF_CTR ;
else
return MSR_F15H_PERF_CTL ;
} else {
if ( type = = PMU_TYPE_COUNTER )
return MSR_K7_PERFCTR0 ;
else
return MSR_K7_EVNTSEL0 ;
}
}
static enum index msr_to_index ( u32 msr )
{
switch ( msr ) {
case MSR_F15H_PERF_CTL0 :
case MSR_F15H_PERF_CTR0 :
case MSR_K7_EVNTSEL0 :
case MSR_K7_PERFCTR0 :
return INDEX_ZERO ;
case MSR_F15H_PERF_CTL1 :
case MSR_F15H_PERF_CTR1 :
case MSR_K7_EVNTSEL1 :
case MSR_K7_PERFCTR1 :
return INDEX_ONE ;
case MSR_F15H_PERF_CTL2 :
case MSR_F15H_PERF_CTR2 :
case MSR_K7_EVNTSEL2 :
case MSR_K7_PERFCTR2 :
return INDEX_TWO ;
case MSR_F15H_PERF_CTL3 :
case MSR_F15H_PERF_CTR3 :
case MSR_K7_EVNTSEL3 :
case MSR_K7_PERFCTR3 :
return INDEX_THREE ;
case MSR_F15H_PERF_CTL4 :
case MSR_F15H_PERF_CTR4 :
return INDEX_FOUR ;
case MSR_F15H_PERF_CTL5 :
case MSR_F15H_PERF_CTR5 :
return INDEX_FIVE ;
default :
return INDEX_ERROR ;
}
}
static inline struct kvm_pmc * get_gp_pmc_amd ( struct kvm_pmu * pmu , u32 msr ,
enum pmu_type type )
{
2021-03-23 09:45:15 +01:00
struct kvm_vcpu * vcpu = pmu_to_vcpu ( pmu ) ;
2022-02-23 22:57:41 +00:00
if ( ! vcpu - > kvm - > arch . enable_pmu )
2021-11-17 16:03:04 +08:00
return NULL ;
2018-02-05 13:24:52 -06:00
switch ( msr ) {
case MSR_F15H_PERF_CTL0 :
case MSR_F15H_PERF_CTL1 :
case MSR_F15H_PERF_CTL2 :
case MSR_F15H_PERF_CTL3 :
case MSR_F15H_PERF_CTL4 :
case MSR_F15H_PERF_CTL5 :
2021-03-23 09:45:15 +01:00
if ( ! guest_cpuid_has ( vcpu , X86_FEATURE_PERFCTR_CORE ) )
return NULL ;
fallthrough ;
2018-02-05 13:24:52 -06:00
case MSR_K7_EVNTSEL0 . . . MSR_K7_EVNTSEL3 :
if ( type ! = PMU_TYPE_EVNTSEL )
return NULL ;
break ;
case MSR_F15H_PERF_CTR0 :
case MSR_F15H_PERF_CTR1 :
case MSR_F15H_PERF_CTR2 :
case MSR_F15H_PERF_CTR3 :
case MSR_F15H_PERF_CTR4 :
case MSR_F15H_PERF_CTR5 :
2021-03-23 09:45:15 +01:00
if ( ! guest_cpuid_has ( vcpu , X86_FEATURE_PERFCTR_CORE ) )
return NULL ;
fallthrough ;
2018-02-05 13:24:52 -06:00
case MSR_K7_PERFCTR0 . . . MSR_K7_PERFCTR3 :
if ( type ! = PMU_TYPE_COUNTER )
return NULL ;
break ;
default :
return NULL ;
}
return & pmu - > gp_counters [ msr_to_index ( msr ) ] ;
}
2022-05-18 21:25:12 +08:00
static bool amd_hw_event_available ( struct kvm_pmc * pmc )
2015-06-19 15:45:05 +02:00
{
2022-05-18 21:25:12 +08:00
return true ;
2015-06-19 15:45:05 +02:00
}
2015-06-12 01:34:55 -04:00
/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
* AMD CPU doesn ' t have global_ctrl MSR , all PMCs are enabled ( return TRUE ) .
*/
2015-06-19 15:45:05 +02:00
static bool amd_pmc_is_enabled ( struct kvm_pmc * pmc )
{
2015-06-12 01:34:55 -04:00
return true ;
2015-06-19 15:45:05 +02:00
}
static struct kvm_pmc * amd_pmc_idx_to_pmc ( struct kvm_pmu * pmu , int pmc_idx )
{
2018-02-05 13:24:52 -06:00
unsigned int base = get_msr_base ( pmu , PMU_TYPE_COUNTER ) ;
struct kvm_vcpu * vcpu = pmu_to_vcpu ( pmu ) ;
if ( guest_cpuid_has ( vcpu , X86_FEATURE_PERFCTR_CORE ) ) {
/*
* The idx is contiguous . The MSRs are not . The counter MSRs
* are interleaved with the event select MSRs .
*/
pmc_idx * = 2 ;
}
return get_gp_pmc_amd ( pmu , base + pmc_idx , PMU_TYPE_COUNTER ) ;
2015-06-19 15:45:05 +02:00
}
2021-11-05 13:20:58 -07:00
static bool amd_is_valid_rdpmc_ecx ( struct kvm_vcpu * vcpu , unsigned int idx )
2015-06-19 15:45:05 +02:00
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
idx & = ~ ( 3u < < 30 ) ;
2021-11-05 13:20:58 -07:00
return idx < pmu - > nr_arch_gp_counters ;
2015-06-19 15:45:05 +02:00
}
/* idx is the ECX register of RDPMC instruction */
2019-10-27 18:52:40 +08:00
static struct kvm_pmc * amd_rdpmc_ecx_to_pmc ( struct kvm_vcpu * vcpu ,
unsigned int idx , u64 * mask )
2015-06-19 15:45:05 +02:00
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
struct kvm_pmc * counters ;
idx & = ~ ( 3u < < 30 ) ;
if ( idx > = pmu - > nr_arch_gp_counters )
return NULL ;
counters = pmu - > gp_counters ;
return & counters [ idx ] ;
2015-06-19 15:45:05 +02:00
}
2022-06-11 00:57:52 +00:00
static bool amd_is_valid_msr ( struct kvm_vcpu * vcpu , u32 msr )
2019-10-27 18:52:41 +08:00
{
/* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
2022-06-11 00:57:51 +00:00
return false ;
2019-10-27 18:52:41 +08:00
}
static struct kvm_pmc * amd_msr_idx_to_pmc ( struct kvm_vcpu * vcpu , u32 msr )
2015-06-19 15:45:05 +02:00
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
2019-10-27 18:52:41 +08:00
struct kvm_pmc * pmc ;
2015-06-12 01:34:55 -04:00
2019-10-27 18:52:41 +08:00
pmc = get_gp_pmc_amd ( pmu , msr , PMU_TYPE_COUNTER ) ;
pmc = pmc ? pmc : get_gp_pmc_amd ( pmu , msr , PMU_TYPE_EVNTSEL ) ;
2015-06-12 01:34:55 -04:00
2019-10-27 18:52:41 +08:00
return pmc ;
2015-06-19 15:45:05 +02:00
}
2020-05-29 15:43:44 +08:00
static int amd_pmu_get_msr ( struct kvm_vcpu * vcpu , struct msr_data * msr_info )
2015-06-19 15:45:05 +02:00
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
struct kvm_pmc * pmc ;
2020-05-29 15:43:44 +08:00
u32 msr = msr_info - > index ;
2015-06-12 01:34:55 -04:00
2018-02-05 13:24:52 -06:00
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd ( pmu , msr , PMU_TYPE_COUNTER ) ;
2015-06-12 01:34:55 -04:00
if ( pmc ) {
2020-05-29 15:43:44 +08:00
msr_info - > data = pmc_read_counter ( pmc ) ;
2015-06-12 01:34:55 -04:00
return 0 ;
}
2018-02-05 13:24:52 -06:00
/* MSR_EVNTSELn */
pmc = get_gp_pmc_amd ( pmu , msr , PMU_TYPE_EVNTSEL ) ;
2015-06-12 01:34:55 -04:00
if ( pmc ) {
2020-05-29 15:43:44 +08:00
msr_info - > data = pmc - > eventsel ;
2015-06-12 01:34:55 -04:00
return 0 ;
}
2015-06-19 15:45:05 +02:00
return 1 ;
}
static int amd_pmu_set_msr ( struct kvm_vcpu * vcpu , struct msr_data * msr_info )
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
struct kvm_pmc * pmc ;
u32 msr = msr_info - > index ;
u64 data = msr_info - > data ;
2018-02-05 13:24:52 -06:00
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd ( pmu , msr , PMU_TYPE_COUNTER ) ;
2015-06-12 01:34:55 -04:00
if ( pmc ) {
pmc - > counter + = data - pmc_read_counter ( pmc ) ;
2022-04-09 09:52:26 +08:00
pmc_update_sample_period ( pmc ) ;
2015-06-12 01:34:55 -04:00
return 0 ;
}
2018-02-05 13:24:52 -06:00
/* MSR_EVNTSELn */
pmc = get_gp_pmc_amd ( pmu , msr , PMU_TYPE_EVNTSEL ) ;
2015-06-12 01:34:55 -04:00
if ( pmc ) {
KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs
AMD EPYC CPUs never raise a #GP for a WRMSR to a PerfEvtSeln MSR. Some
reserved bits are cleared, and some are not. Specifically, on
Zen3/Milan, bits 19 and 42 are not cleared.
When emulating such a WRMSR, KVM should not synthesize a #GP,
regardless of which bits are set. However, undocumented bits should
not be passed through to the hardware MSR. So, rather than checking
for reserved bits and synthesizing a #GP, just clear the reserved
bits.
This may seem pedantic, but since KVM currently does not support the
"Host/Guest Only" bits (41:40), it is necessary to clear these bits
rather than synthesizing #GP, because some popular guests (e.g Linux)
will set the "Host Only" bit even on CPUs that don't support
EFER.SVME, and they don't expect a #GP.
For example,
root@Ubuntu1804:~# perf stat -e r26 -a sleep 1
Performance counter stats for 'system wide':
0 r26
1.001070977 seconds time elapsed
Feb 23 03:59:58 Ubuntu1804 kernel: [ 405.379957] unchecked MSR access error: WRMSR to 0xc0010200 (tried to write 0x0000020000130026) at rIP: 0xffffffff9b276a28 (native_write_msr+0x8/0x30)
Feb 23 03:59:58 Ubuntu1804 kernel: [ 405.379958] Call Trace:
Feb 23 03:59:58 Ubuntu1804 kernel: [ 405.379963] amd_pmu_disable_event+0x27/0x90
Fixes: ca724305a2b0 ("KVM: x86/vPMU: Implement AMD vPMU code for KVM")
Reported-by: Lotus Fenn <lotusf@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Like Xu <likexu@tencent.com>
Reviewed-by: David Dunn <daviddunn@google.com>
Message-Id: <20220226234131.2167175-1-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-26 15:41:31 -08:00
data & = ~ pmu - > reserved_bits ;
2022-05-18 21:25:06 +08:00
if ( data ! = pmc - > eventsel ) {
pmc - > eventsel = data ;
2022-05-25 05:28:56 -04:00
reprogram_counter ( pmc ) ;
2022-05-18 21:25:06 +08:00
}
KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs
AMD EPYC CPUs never raise a #GP for a WRMSR to a PerfEvtSeln MSR. Some
reserved bits are cleared, and some are not. Specifically, on
Zen3/Milan, bits 19 and 42 are not cleared.
When emulating such a WRMSR, KVM should not synthesize a #GP,
regardless of which bits are set. However, undocumented bits should
not be passed through to the hardware MSR. So, rather than checking
for reserved bits and synthesizing a #GP, just clear the reserved
bits.
This may seem pedantic, but since KVM currently does not support the
"Host/Guest Only" bits (41:40), it is necessary to clear these bits
rather than synthesizing #GP, because some popular guests (e.g Linux)
will set the "Host Only" bit even on CPUs that don't support
EFER.SVME, and they don't expect a #GP.
For example,
root@Ubuntu1804:~# perf stat -e r26 -a sleep 1
Performance counter stats for 'system wide':
0 r26
1.001070977 seconds time elapsed
Feb 23 03:59:58 Ubuntu1804 kernel: [ 405.379957] unchecked MSR access error: WRMSR to 0xc0010200 (tried to write 0x0000020000130026) at rIP: 0xffffffff9b276a28 (native_write_msr+0x8/0x30)
Feb 23 03:59:58 Ubuntu1804 kernel: [ 405.379958] Call Trace:
Feb 23 03:59:58 Ubuntu1804 kernel: [ 405.379963] amd_pmu_disable_event+0x27/0x90
Fixes: ca724305a2b0 ("KVM: x86/vPMU: Implement AMD vPMU code for KVM")
Reported-by: Lotus Fenn <lotusf@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Like Xu <likexu@tencent.com>
Reviewed-by: David Dunn <daviddunn@google.com>
Message-Id: <20220226234131.2167175-1-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-26 15:41:31 -08:00
return 0 ;
2015-06-12 01:34:55 -04:00
}
2015-06-19 15:45:05 +02:00
return 1 ;
}
static void amd_pmu_refresh ( struct kvm_vcpu * vcpu )
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
2018-02-05 13:24:52 -06:00
if ( guest_cpuid_has ( vcpu , X86_FEATURE_PERFCTR_CORE ) )
pmu - > nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE ;
else
pmu - > nr_arch_gp_counters = AMD64_NUM_COUNTERS ;
2015-06-12 01:34:55 -04:00
pmu - > counter_bitmask [ KVM_PMC_GP ] = ( ( u64 ) 1 < < 48 ) - 1 ;
2021-11-18 21:03:20 +08:00
pmu - > reserved_bits = 0xfffffff000280000ull ;
2022-03-07 17:24:52 -08:00
pmu - > raw_event_mask = AMD64_RAW_EVENT_MASK ;
2019-05-08 19:02:48 +02:00
pmu - > version = 1 ;
2015-06-12 01:34:55 -04:00
/* not applicable to AMD; but clean them to prevent any fall out */
pmu - > counter_bitmask [ KVM_PMC_FIXED ] = 0 ;
pmu - > nr_arch_fixed_counters = 0 ;
pmu - > global_status = 0 ;
2019-10-27 18:52:43 +08:00
bitmap_set ( pmu - > all_valid_pmc_idx , 0 , pmu - > nr_arch_gp_counters ) ;
2015-06-19 15:45:05 +02:00
}
static void amd_pmu_init ( struct kvm_vcpu * vcpu )
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
int i ;
2018-02-05 13:24:52 -06:00
BUILD_BUG_ON ( AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC ) ;
for ( i = 0 ; i < AMD64_NUM_COUNTERS_CORE ; i + + ) {
2015-06-12 01:34:55 -04:00
pmu - > gp_counters [ i ] . type = KVM_PMC_GP ;
pmu - > gp_counters [ i ] . vcpu = vcpu ;
pmu - > gp_counters [ i ] . idx = i ;
KVM: x86/vPMU: Reuse perf_event to avoid unnecessary pmc_reprogram_counter
The perf_event_create_kernel_counter() in the pmc_reprogram_counter() is
a heavyweight and high-frequency operation, especially when host disables
the watchdog (maximum 21000000 ns) which leads to an unacceptable latency
of the guest NMI handler. It limits the use of vPMUs in the guest.
When a vPMC is fully enabled, the legacy reprogram_*_counter() would stop
and release its existing perf_event (if any) every time EVEN in most cases
almost the same requested perf_event will be created and configured again.
For each vPMC, if the reuqested config ('u64 eventsel' for gp and 'u8 ctrl'
for fixed) is the same as its current config AND a new sample period based
on pmc->counter is accepted by host perf interface, the current event could
be reused safely as a new created one does. Otherwise, do release the
undesirable perf_event and reprogram a new one as usual.
It's light-weight to call pmc_pause_counter (disable, read and reset event)
and pmc_resume_counter (recalibrate period and re-enable event) as guest
expects instead of release-and-create again on any condition. Compared to
use the filterable event->attr or hw.config, a new 'u64 current_config'
field is added to save the last original programed config for each vPMC.
Based on this implementation, the number of calls to pmc_reprogram_counter
is reduced by ~82.5% for a gp sampling event and ~99.9% for a fixed event.
In the usage of multiplexing perf sampling mode, the average latency of the
guest NMI handler is reduced from 104923 ns to 48393 ns (~2.16x speed up).
If host disables watchdog, the minimum latecy of guest NMI handler could be
speed up at ~3413x (from 20407603 to 5979 ns) and at ~786x in the average.
Suggested-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2019-10-27 18:52:42 +08:00
pmu - > gp_counters [ i ] . current_config = 0 ;
2015-06-12 01:34:55 -04:00
}
2015-06-19 15:45:05 +02:00
}
static void amd_pmu_reset ( struct kvm_vcpu * vcpu )
{
2015-06-12 01:34:55 -04:00
struct kvm_pmu * pmu = vcpu_to_pmu ( vcpu ) ;
int i ;
2018-02-05 13:24:52 -06:00
for ( i = 0 ; i < AMD64_NUM_COUNTERS_CORE ; i + + ) {
2015-06-12 01:34:55 -04:00
struct kvm_pmc * pmc = & pmu - > gp_counters [ i ] ;
pmc_stop_counter ( pmc ) ;
pmc - > counter = pmc - > eventsel = 0 ;
}
2015-06-19 15:45:05 +02:00
}
2022-03-29 23:50:53 +00:00
struct kvm_pmu_ops amd_pmu_ops __initdata = {
2022-05-18 21:25:12 +08:00
. hw_event_available = amd_hw_event_available ,
2015-06-19 15:45:05 +02:00
. pmc_is_enabled = amd_pmc_is_enabled ,
. pmc_idx_to_pmc = amd_pmc_idx_to_pmc ,
2019-10-27 18:52:40 +08:00
. rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc ,
2019-10-27 18:52:41 +08:00
. msr_idx_to_pmc = amd_msr_idx_to_pmc ,
2019-10-27 18:52:40 +08:00
. is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx ,
2015-06-19 15:45:05 +02:00
. is_valid_msr = amd_is_valid_msr ,
. get_msr = amd_pmu_get_msr ,
. set_msr = amd_pmu_set_msr ,
. refresh = amd_pmu_refresh ,
. init = amd_pmu_init ,
. reset = amd_pmu_reset ,
} ;