Pull x96 apic updates from Thomas Gleixner: "Updates for the x86 APIC interrupt handling and APIC timer: - Fix a long standing issue with spurious interrupts which was caused by the big vector management rework a few years ago. Robert Hodaszi provided finally enough debug data and an excellent initial failure analysis which allowed to understand the underlying issues. This contains a change to the core interrupt management code which is required to handle this correctly for the APIC/IO_APIC. The core changes are NOOPs for most architectures except ARM64. ARM64 is not impacted by the change as confirmed by Marc Zyngier. - Newer systems allow to disable the PIT clock for power saving causing panic in the timer interrupt delivery check of the IO/APIC when the HPET timer is not enabled either. While the clock could be turned on this would cause an endless whack a mole game to chase the proper register in each affected chipset. These systems provide the relevant frequencies for TSC, CPU and the local APIC timer via CPUID and/or MSRs, which allows to avoid the PIT/HPET based calibration. As the calibration code is the only usage of the legacy timers on modern systems and is skipped anyway when the frequencies are known already, there is no point in setting up the PIT and actually checking for the interrupt delivery via IO/APIC. To achieve this on a wide variety of platforms, the CPUID/MSR based frequency readout has been made more robust, which also allowed to remove quite some workarounds which turned out to be not longer required. Thanks to Daniel Drake for analysis, patches and verification" * 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/irq: Seperate unused system vectors from spurious entry again x86/irq: Handle spurious interrupt after shutdown gracefully x86/ioapic: Implement irq_get_irqchip_state() callback genirq: Add optional hardware synchronization for shutdown genirq: Fix misleading synchronize_irq() documentation genirq: Delay deactivation in free_irq() x86/timer: Skip PIT initialization on modern chipsets x86/apic: Use non-atomic operations when possible x86/apic: Make apic_bsp_setup() static x86/tsc: Set LAPIC timer period to crystal clock frequency x86/apic: Rename 'lapic_timer_frequency' to 'lapic_timer_period' x86/tsc: Use CPUID.0x16 to calculate missing crystal frequency
350 lines
8.8 KiB
C
350 lines
8.8 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* HyperV Detection code.
|
|
*
|
|
* Copyright (C) 2010, Novell, Inc.
|
|
* Author : K. Y. Srinivasan <ksrinivasan@novell.com>
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/time.h>
|
|
#include <linux/clocksource.h>
|
|
#include <linux/init.h>
|
|
#include <linux/export.h>
|
|
#include <linux/hardirq.h>
|
|
#include <linux/efi.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/i8253.h>
|
|
#include <linux/random.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/hypervisor.h>
|
|
#include <asm/hyperv-tlfs.h>
|
|
#include <asm/mshyperv.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/irq_regs.h>
|
|
#include <asm/i8259.h>
|
|
#include <asm/apic.h>
|
|
#include <asm/timer.h>
|
|
#include <asm/reboot.h>
|
|
#include <asm/nmi.h>
|
|
|
|
struct ms_hyperv_info ms_hyperv;
|
|
EXPORT_SYMBOL_GPL(ms_hyperv);
|
|
|
|
#if IS_ENABLED(CONFIG_HYPERV)
|
|
static void (*vmbus_handler)(void);
|
|
static void (*hv_stimer0_handler)(void);
|
|
static void (*hv_kexec_handler)(void);
|
|
static void (*hv_crash_handler)(struct pt_regs *regs);
|
|
|
|
__visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
entering_irq();
|
|
inc_irq_stat(irq_hv_callback_count);
|
|
if (vmbus_handler)
|
|
vmbus_handler();
|
|
|
|
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
|
|
ack_APIC_irq();
|
|
|
|
exiting_irq();
|
|
set_irq_regs(old_regs);
|
|
}
|
|
|
|
void hv_setup_vmbus_irq(void (*handler)(void))
|
|
{
|
|
vmbus_handler = handler;
|
|
}
|
|
|
|
void hv_remove_vmbus_irq(void)
|
|
{
|
|
/* We have no way to deallocate the interrupt gate */
|
|
vmbus_handler = NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
|
|
EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
|
|
|
|
/*
|
|
* Routines to do per-architecture handling of stimer0
|
|
* interrupts when in Direct Mode
|
|
*/
|
|
|
|
__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
entering_irq();
|
|
inc_irq_stat(hyperv_stimer0_count);
|
|
if (hv_stimer0_handler)
|
|
hv_stimer0_handler();
|
|
add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
|
|
ack_APIC_irq();
|
|
|
|
exiting_irq();
|
|
set_irq_regs(old_regs);
|
|
}
|
|
|
|
int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
|
|
{
|
|
*vector = HYPERV_STIMER0_VECTOR;
|
|
*irq = -1; /* Unused on x86/x64 */
|
|
hv_stimer0_handler = handler;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq);
|
|
|
|
void hv_remove_stimer0_irq(int irq)
|
|
{
|
|
/* We have no way to deallocate the interrupt gate */
|
|
hv_stimer0_handler = NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq);
|
|
|
|
void hv_setup_kexec_handler(void (*handler)(void))
|
|
{
|
|
hv_kexec_handler = handler;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
|
|
|
|
void hv_remove_kexec_handler(void)
|
|
{
|
|
hv_kexec_handler = NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
|
|
|
|
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
|
|
{
|
|
hv_crash_handler = handler;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
|
|
|
|
void hv_remove_crash_handler(void)
|
|
{
|
|
hv_crash_handler = NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
|
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
|
static void hv_machine_shutdown(void)
|
|
{
|
|
if (kexec_in_progress && hv_kexec_handler)
|
|
hv_kexec_handler();
|
|
native_machine_shutdown();
|
|
}
|
|
|
|
static void hv_machine_crash_shutdown(struct pt_regs *regs)
|
|
{
|
|
if (hv_crash_handler)
|
|
hv_crash_handler(regs);
|
|
native_machine_crash_shutdown(regs);
|
|
}
|
|
#endif /* CONFIG_KEXEC_CORE */
|
|
#endif /* CONFIG_HYPERV */
|
|
|
|
static uint32_t __init ms_hyperv_platform(void)
|
|
{
|
|
u32 eax;
|
|
u32 hyp_signature[3];
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
|
return 0;
|
|
|
|
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
|
|
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
|
|
|
|
if (eax >= HYPERV_CPUID_MIN &&
|
|
eax <= HYPERV_CPUID_MAX &&
|
|
!memcmp("Microsoft Hv", hyp_signature, 12))
|
|
return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static unsigned char hv_get_nmi_reason(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
/*
|
|
* Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
|
|
* it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle
|
|
* unknown NMI on the first CPU which gets it.
|
|
*/
|
|
static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
|
|
{
|
|
static atomic_t nmi_cpu = ATOMIC_INIT(-1);
|
|
|
|
if (!unknown_nmi_panic)
|
|
return NMI_DONE;
|
|
|
|
if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
|
|
return NMI_HANDLED;
|
|
|
|
return NMI_DONE;
|
|
}
|
|
#endif
|
|
|
|
static unsigned long hv_get_tsc_khz(void)
|
|
{
|
|
unsigned long freq;
|
|
|
|
rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
|
|
|
|
return freq / 1000;
|
|
}
|
|
|
|
#if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV)
|
|
static void __init hv_smp_prepare_boot_cpu(void)
|
|
{
|
|
native_smp_prepare_boot_cpu();
|
|
#if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS)
|
|
hv_init_spinlocks();
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
static void __init ms_hyperv_init_platform(void)
|
|
{
|
|
int hv_host_info_eax;
|
|
int hv_host_info_ebx;
|
|
int hv_host_info_ecx;
|
|
int hv_host_info_edx;
|
|
|
|
/*
|
|
* Extract the features and hints
|
|
*/
|
|
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
|
|
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
|
|
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
|
|
|
pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
|
|
ms_hyperv.features, ms_hyperv.hints);
|
|
|
|
ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
|
|
ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
|
|
|
|
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
|
|
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
|
|
|
|
/*
|
|
* Extract host information.
|
|
*/
|
|
if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >=
|
|
HYPERV_CPUID_VERSION) {
|
|
hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
|
|
hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
|
|
hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
|
|
hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
|
|
|
|
pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
|
|
hv_host_info_eax, hv_host_info_ebx >> 16,
|
|
hv_host_info_ebx & 0xFFFF, hv_host_info_ecx,
|
|
hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF);
|
|
}
|
|
|
|
if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
|
|
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
|
|
x86_platform.calibrate_tsc = hv_get_tsc_khz;
|
|
x86_platform.calibrate_cpu = hv_get_tsc_khz;
|
|
}
|
|
|
|
if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
|
|
ms_hyperv.nested_features =
|
|
cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
|
|
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
|
|
/*
|
|
* Get the APIC frequency.
|
|
*/
|
|
u64 hv_lapic_frequency;
|
|
|
|
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
|
|
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
|
|
lapic_timer_period = hv_lapic_frequency;
|
|
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
|
|
lapic_timer_period);
|
|
}
|
|
|
|
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
|
|
"hv_nmi_unknown");
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
no_timer_check = 1;
|
|
#endif
|
|
|
|
#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
|
|
machine_ops.shutdown = hv_machine_shutdown;
|
|
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
|
|
#endif
|
|
mark_tsc_unstable("running on Hyper-V");
|
|
|
|
/*
|
|
* Generation 2 instances don't support reading the NMI status from
|
|
* 0x61 port.
|
|
*/
|
|
if (efi_enabled(EFI_BOOT))
|
|
x86_platform.get_nmi_reason = hv_get_nmi_reason;
|
|
|
|
/*
|
|
* Hyper-V VMs have a PIT emulation quirk such that zeroing the
|
|
* counter register during PIT shutdown restarts the PIT. So it
|
|
* continues to interrupt @18.2 HZ. Setting i8253_clear_counter
|
|
* to false tells pit_shutdown() not to zero the counter so that
|
|
* the PIT really is shutdown. Generation 2 VMs don't have a PIT,
|
|
* and setting this value has no effect.
|
|
*/
|
|
i8253_clear_counter_on_shutdown = false;
|
|
|
|
#if IS_ENABLED(CONFIG_HYPERV)
|
|
/*
|
|
* Setup the hook to get control post apic initialization.
|
|
*/
|
|
x86_platform.apic_post_init = hyperv_init;
|
|
hyperv_setup_mmu_ops();
|
|
/* Setup the IDT for hypervisor callback */
|
|
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
|
|
|
|
/* Setup the IDT for reenlightenment notifications */
|
|
if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
|
|
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
|
|
hyperv_reenlightenment_vector);
|
|
|
|
/* Setup the IDT for stimer0 */
|
|
if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE)
|
|
alloc_intr_gate(HYPERV_STIMER0_VECTOR,
|
|
hv_stimer0_callback_vector);
|
|
|
|
# ifdef CONFIG_SMP
|
|
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
|
|
# endif
|
|
|
|
/*
|
|
* Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
|
|
* set x2apic destination mode to physcial mode when x2apic is available
|
|
* and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
|
|
* have 8-bit APIC id.
|
|
*/
|
|
# ifdef CONFIG_X86_X2APIC
|
|
if (x2apic_supported())
|
|
x2apic_phys = 1;
|
|
# endif
|
|
|
|
#endif
|
|
}
|
|
|
|
const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
|
|
.name = "Microsoft Hyper-V",
|
|
.detect = ms_hyperv_platform,
|
|
.type = X86_HYPER_MS_HYPERV,
|
|
.init.init_platform = ms_hyperv_init_platform,
|
|
};
|