KVM x86 PMU changes for 6.8:
- Fix a variety of bugs where KVM fail to stop/reset counters and other state prior to refreshing the vPMU model. - Fix a double-overflow PMU bug by tracking emulated counter events using a dedicated field instead of snapshotting the "previous" counter. If the hardware PMC count triggers overflow that is recognized in the same VM-Exit that KVM manually bumps an event count, KVM would pend PMIs for both the hardware-triggered overflow and for KVM-triggered overflow. -----BEGIN PGP SIGNATURE----- iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmWW/rsSHHNlYW5qY0Bn b29nbGUuY29tAAoJEGCRIgFNDBL5Q8gQAJc4y9NOd09kYXpI+DhkTVe6v07dmYds NzBI2uViqxXFwA5pTs5VTVVYAl1FEmK6NvIVnJdc3epSYRSqyaeN/Z2NoulNxekj /jLA/aA4+dTeJf2lfMFeH65IIuSJhuhyGeZV31RfW3NzEmlglcsb74QkHnJB8rLQ RFJXZcOxSSap72AWxKmxk0alRaI6ONZ9NyqOWFWjZdQuAE7id9Ae5OixKUrlJkmR 6CbY8ra51MFIXQEsomVlcl5b1DNiv0drPPf5YaC9T4CERtt5yZxpvZeTPhq70evm OutoZpzfi69cF1fFCxqN5cWZSt1C/Bu3xp8+ILI1+bZkMCV/ty85DU6hfMZQZzcV JeJkRg/AAgOrG4dtHskwg9LDMs867kgbaqZ8l8K7Dt8rGmcLc5/rZ1ZdjTStFj6V ukmVKMAVgkmh88u62wQ5HjrN1IE1oE6nmDp3zivfPuohEr49A8mAT02A2x9AVxAr HvmwfDMA92xOGSRAN9Gt0mbOA+G0WZe4A36XgPEXloYeskYZgHzgW2hT6VWTd86O ydU9s4L8g+Fy4jcObAiKsT8YwFgAMfVXZKTXvuTME4m/WUNBCrYCwqEOp/NM5qrk qYWVXxOMMjZo71tQfvSPu1TWCtW/4ckvmqMrdQosgwLFy5pSqgXEwTruDvbJ1KWU KhIWVbUfmgFA =+Emh -----END PGP SIGNATURE----- Merge tag 'kvm-x86-pmu-6.8' of https://github.com/kvm-x86/linux into HEAD KVM x86 PMU changes for 6.8: - Fix a variety of bugs where KVM fail to stop/reset counters and other state prior to refreshing the vPMU model. - Fix a double-overflow PMU bug by tracking emulated counter events using a dedicated field instead of snapshotting the "previous" counter. If the hardware PMC count triggers overflow that is recognized in the same VM-Exit that KVM manually bumps an event count, KVM would pend PMIs for both the hardware-triggered overflow and for KVM-triggered overflow.
This commit is contained in:
commit
01edb1cfbd
@ -22,7 +22,7 @@ KVM_X86_PMU_OP(get_msr)
|
||||
KVM_X86_PMU_OP(set_msr)
|
||||
KVM_X86_PMU_OP(refresh)
|
||||
KVM_X86_PMU_OP(init)
|
||||
KVM_X86_PMU_OP(reset)
|
||||
KVM_X86_PMU_OP_OPTIONAL(reset)
|
||||
KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
|
||||
KVM_X86_PMU_OP_OPTIONAL(cleanup)
|
||||
|
||||
|
@ -500,8 +500,23 @@ struct kvm_pmc {
|
||||
u8 idx;
|
||||
bool is_paused;
|
||||
bool intr;
|
||||
/*
|
||||
* Base value of the PMC counter, relative to the *consumed* count in
|
||||
* the associated perf_event. This value includes counter updates from
|
||||
* the perf_event and emulated_count since the last time the counter
|
||||
* was reprogrammed, but it is *not* the current value as seen by the
|
||||
* guest or userspace.
|
||||
*
|
||||
* The count is relative to the associated perf_event so that KVM
|
||||
* doesn't need to reprogram the perf_event every time the guest writes
|
||||
* to the counter.
|
||||
*/
|
||||
u64 counter;
|
||||
u64 prev_counter;
|
||||
/*
|
||||
* PMC events triggered by KVM emulation that haven't been fully
|
||||
* processed, i.e. haven't undergone overflow detection.
|
||||
*/
|
||||
u64 emulated_counter;
|
||||
u64 eventsel;
|
||||
struct perf_event *perf_event;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
@ -127,9 +127,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
|
||||
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
|
||||
|
||||
/*
|
||||
* Ignore overflow events for counters that are scheduled to be
|
||||
* reprogrammed, e.g. if a PMI for the previous event races with KVM's
|
||||
* handling of a related guest WRMSR.
|
||||
* Ignore asynchronous overflow events for counters that are scheduled
|
||||
* to be reprogrammed, e.g. if a PMI for the previous event races with
|
||||
* KVM's handling of a related guest WRMSR.
|
||||
*/
|
||||
if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi))
|
||||
return;
|
||||
@ -161,6 +161,15 @@ static u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
|
||||
{
|
||||
u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
|
||||
|
||||
if (!sample_period)
|
||||
sample_period = pmc_bitmask(pmc) + 1;
|
||||
return sample_period;
|
||||
}
|
||||
|
||||
static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
|
||||
bool exclude_user, bool exclude_kernel,
|
||||
bool intr)
|
||||
@ -215,17 +224,30 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pmc_pause_counter(struct kvm_pmc *pmc)
|
||||
static bool pmc_pause_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter = pmc->counter;
|
||||
|
||||
if (!pmc->perf_event || pmc->is_paused)
|
||||
return;
|
||||
u64 prev_counter;
|
||||
|
||||
/* update counter, reset event value to avoid redundant accumulation */
|
||||
counter += perf_event_pause(pmc->perf_event, true);
|
||||
if (pmc->perf_event && !pmc->is_paused)
|
||||
counter += perf_event_pause(pmc->perf_event, true);
|
||||
|
||||
/*
|
||||
* Snapshot the previous counter *after* accumulating state from perf.
|
||||
* If overflow already happened, hardware (via perf) is responsible for
|
||||
* generating a PMI. KVM just needs to detect overflow on emulated
|
||||
* counter events that haven't yet been processed.
|
||||
*/
|
||||
prev_counter = counter & pmc_bitmask(pmc);
|
||||
|
||||
counter += pmc->emulated_counter;
|
||||
pmc->counter = counter & pmc_bitmask(pmc);
|
||||
|
||||
pmc->emulated_counter = 0;
|
||||
pmc->is_paused = true;
|
||||
|
||||
return pmc->counter < prev_counter;
|
||||
}
|
||||
|
||||
static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
||||
@ -250,6 +272,51 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void pmc_release_perf_event(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (pmc->perf_event) {
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
pmc->perf_event = NULL;
|
||||
pmc->current_config = 0;
|
||||
pmc_to_pmu(pmc)->event_count--;
|
||||
}
|
||||
}
|
||||
|
||||
static void pmc_stop_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (pmc->perf_event) {
|
||||
pmc->counter = pmc_read_counter(pmc);
|
||||
pmc_release_perf_event(pmc);
|
||||
}
|
||||
}
|
||||
|
||||
static void pmc_update_sample_period(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (!pmc->perf_event || pmc->is_paused ||
|
||||
!is_sampling_event(pmc->perf_event))
|
||||
return;
|
||||
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, pmc->counter));
|
||||
}
|
||||
|
||||
void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
|
||||
{
|
||||
/*
|
||||
* Drop any unconsumed accumulated counts, the WRMSR is a write, not a
|
||||
* read-modify-write. Adjust the counter value so that its value is
|
||||
* relative to the current count, as reading the current count from
|
||||
* perf is faster than pausing and repgrogramming the event in order to
|
||||
* reset it to '0'. Note, this very sneakily offsets the accumulated
|
||||
* emulated count too, by using pmc_read_counter()!
|
||||
*/
|
||||
pmc->emulated_counter = 0;
|
||||
pmc->counter += val - pmc_read_counter(pmc);
|
||||
pmc->counter &= pmc_bitmask(pmc);
|
||||
pmc_update_sample_period(pmc);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pmc_write_counter);
|
||||
|
||||
static int filter_cmp(const void *pa, const void *pb, u64 mask)
|
||||
{
|
||||
u64 a = *(u64 *)pa & mask;
|
||||
@ -383,14 +450,15 @@ static void reprogram_counter(struct kvm_pmc *pmc)
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
u64 eventsel = pmc->eventsel;
|
||||
u64 new_config = eventsel;
|
||||
bool emulate_overflow;
|
||||
u8 fixed_ctr_ctrl;
|
||||
|
||||
pmc_pause_counter(pmc);
|
||||
emulate_overflow = pmc_pause_counter(pmc);
|
||||
|
||||
if (!pmc_event_is_allowed(pmc))
|
||||
goto reprogram_complete;
|
||||
|
||||
if (pmc->counter < pmc->prev_counter)
|
||||
if (emulate_overflow)
|
||||
__kvm_perf_overflow(pmc, false);
|
||||
|
||||
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
|
||||
@ -430,7 +498,6 @@ static void reprogram_counter(struct kvm_pmc *pmc)
|
||||
|
||||
reprogram_complete:
|
||||
clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
|
||||
pmc->prev_counter = 0;
|
||||
}
|
||||
|
||||
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
|
||||
@ -639,32 +706,60 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* refresh PMU settings. This function generally is called when underlying
|
||||
* settings are changed (such as changes of PMU CPUID by guest VMs), which
|
||||
* should rarely happen.
|
||||
static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
int i;
|
||||
|
||||
pmu->need_cleanup = false;
|
||||
|
||||
bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX);
|
||||
|
||||
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
|
||||
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
|
||||
if (!pmc)
|
||||
continue;
|
||||
|
||||
pmc_stop_counter(pmc);
|
||||
pmc->counter = 0;
|
||||
pmc->emulated_counter = 0;
|
||||
|
||||
if (pmc_is_gp(pmc))
|
||||
pmc->eventsel = 0;
|
||||
}
|
||||
|
||||
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
|
||||
|
||||
static_call_cond(kvm_x86_pmu_reset)(vcpu);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Refresh the PMU configuration for the vCPU, e.g. if userspace changes CPUID
|
||||
* and/or PERF_CAPABILITIES.
|
||||
*/
|
||||
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Stop/release all existing counters/events before realizing the new
|
||||
* vPMU model.
|
||||
*/
|
||||
kvm_pmu_reset(vcpu);
|
||||
|
||||
bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
|
||||
static_call(kvm_x86_pmu_refresh)(vcpu);
|
||||
}
|
||||
|
||||
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
static_call(kvm_x86_pmu_reset)(vcpu);
|
||||
}
|
||||
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
memset(pmu, 0, sizeof(*pmu));
|
||||
static_call(kvm_x86_pmu_init)(vcpu);
|
||||
pmu->event_count = 0;
|
||||
pmu->need_cleanup = false;
|
||||
kvm_pmu_refresh(vcpu);
|
||||
}
|
||||
|
||||
@ -700,8 +795,7 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
pmc->prev_counter = pmc->counter;
|
||||
pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
|
||||
pmc->emulated_counter++;
|
||||
kvm_pmu_request_counter_reprogram(pmc);
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,8 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter, enabled, running;
|
||||
|
||||
counter = pmc->counter;
|
||||
counter = pmc->counter + pmc->emulated_counter;
|
||||
|
||||
if (pmc->perf_event && !pmc->is_paused)
|
||||
counter += perf_event_read_value(pmc->perf_event,
|
||||
&enabled, &running);
|
||||
@ -74,29 +75,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
|
||||
return counter & pmc_bitmask(pmc);
|
||||
}
|
||||
|
||||
static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
|
||||
{
|
||||
pmc->counter += val - pmc_read_counter(pmc);
|
||||
pmc->counter &= pmc_bitmask(pmc);
|
||||
}
|
||||
|
||||
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (pmc->perf_event) {
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
pmc->perf_event = NULL;
|
||||
pmc->current_config = 0;
|
||||
pmc_to_pmu(pmc)->event_count--;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pmc_stop_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (pmc->perf_event) {
|
||||
pmc->counter = pmc_read_counter(pmc);
|
||||
pmc_release_perf_event(pmc);
|
||||
}
|
||||
}
|
||||
void pmc_write_counter(struct kvm_pmc *pmc, u64 val);
|
||||
|
||||
static inline bool pmc_is_gp(struct kvm_pmc *pmc)
|
||||
{
|
||||
@ -146,25 +125,6 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
|
||||
{
|
||||
u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
|
||||
|
||||
if (!sample_period)
|
||||
sample_period = pmc_bitmask(pmc) + 1;
|
||||
return sample_period;
|
||||
}
|
||||
|
||||
static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (!pmc->perf_event || pmc->is_paused ||
|
||||
!is_sampling_event(pmc->perf_event))
|
||||
return;
|
||||
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, pmc->counter));
|
||||
}
|
||||
|
||||
static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
@ -261,7 +221,6 @@ bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
@ -161,7 +161,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
|
||||
if (pmc) {
|
||||
pmc_write_counter(pmc, data);
|
||||
pmc_update_sample_period(pmc);
|
||||
return 0;
|
||||
}
|
||||
/* MSR_EVNTSELn */
|
||||
@ -233,21 +232,6 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static void amd_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
|
||||
struct kvm_pmc *pmc = &pmu->gp_counters[i];
|
||||
|
||||
pmc_stop_counter(pmc);
|
||||
pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
|
||||
}
|
||||
|
||||
pmu->global_ctrl = pmu->global_status = 0;
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops amd_pmu_ops __initdata = {
|
||||
.hw_event_available = amd_hw_event_available,
|
||||
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
|
||||
@ -259,7 +243,6 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = {
|
||||
.set_msr = amd_pmu_set_msr,
|
||||
.refresh = amd_pmu_refresh,
|
||||
.init = amd_pmu_init,
|
||||
.reset = amd_pmu_reset,
|
||||
.EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT,
|
||||
.MAX_NR_GP_COUNTERS = KVM_AMD_PMC_MAX_GENERIC,
|
||||
.MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS,
|
||||
|
@ -437,11 +437,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
!(msr & MSR_PMC_FULL_WIDTH_BIT))
|
||||
data = (s64)(s32)data;
|
||||
pmc_write_counter(pmc, data);
|
||||
pmc_update_sample_period(pmc);
|
||||
break;
|
||||
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
|
||||
pmc_write_counter(pmc, data);
|
||||
pmc_update_sample_period(pmc);
|
||||
break;
|
||||
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
|
||||
reserved_bits = pmu->reserved_bits;
|
||||
@ -632,26 +630,6 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc = NULL;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
|
||||
pmc = &pmu->gp_counters[i];
|
||||
|
||||
pmc_stop_counter(pmc);
|
||||
pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
|
||||
pmc = &pmu->fixed_counters[i];
|
||||
|
||||
pmc_stop_counter(pmc);
|
||||
pmc->counter = pmc->prev_counter = 0;
|
||||
}
|
||||
|
||||
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
|
||||
|
||||
intel_pmu_release_guest_lbr_event(vcpu);
|
||||
}
|
||||
|
||||
|
@ -12252,7 +12252,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
}
|
||||
|
||||
if (!init_event) {
|
||||
kvm_pmu_reset(vcpu);
|
||||
vcpu->arch.smbase = 0x30000;
|
||||
|
||||
vcpu->arch.msr_misc_features_enables = 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user