b18d62891a
Pull x86 APIC updates from Thomas Gleixner: "This update provides a major overhaul of the APIC initialization and vector allocation code: - Unification of the APIC and interrupt mode setup which was scattered all over the place and was hard to follow. This also distangles the timer setup from the APIC initialization which brings a clear separation of functionality. Great detective work from Dou Lyiang! - Refactoring of the x86 vector allocation mechanism. The existing code was based on nested loops and rather convoluted APIC callbacks which had a horrible worst case behaviour and tried to serve all different use cases in one go. This led to quite odd hacks when supporting the new managed interupt facility for multiqueue devices and made it more or less impossible to deal with the vector space exhaustion which was a major roadblock for server hibernation. Aside of that the code dealing with cpu hotplug and the system vectors was disconnected from the actual vector management and allocation code, which made it hard to follow and maintain. Utilizing the new bitmap matrix allocator core mechanism, the new allocator and management code consolidates the handling of system vectors, legacy vectors, cpu hotplug mechanisms and the actual allocation which needs to be aware of system and legacy vectors and hotplug constraints into a single consistent entity. This has one visible change: The support for multi CPU targets of interrupts, which is only available on a certain subset of CPUs/APIC variants has been removed in favour of single interrupt targets. A proper analysis of the multi CPU target feature revealed that there is no real advantage as the vast majority of interrupts end up on the CPU with the lowest APIC id in the set of target CPUs anyway. That change was agreed on by the relevant folks and allowed to simplify the implementation significantly and to replace rather fragile constructs like the vector cleanup IPI with straight forward and solid code. Furthermore this allowed to cleanly separate the allocation details for legacy, normal and managed interrupts: * Legacy interrupts are not longer wasting 16 vectors unconditionally * Managed interrupts have now a guaranteed vector reservation, but the actual vector assignment happens when the interrupt is requested. It's guaranteed not to fail. * Normal interrupts no longer allocate vectors unconditionally when the interrupt is set up (IO/APIC init or MSI(X) enable). The mechanism has been switched to a best effort reservation mode. The actual allocation happens when the interrupt is requested. Contrary to managed interrupts the request can fail due to vector space exhaustion, but drivers must handle a fail of request_irq() anyway. When the interrupt is freed, the vector is handed back as well. This solves a long standing problem with large unconditional vector allocations for a certain class of enterprise devices which prevented server hibernation due to vector space exhaustion when the unused allocated vectors had to be migrated to CPU0 while unplugging all non boot CPUs. The code has been equipped with trace points and detailed debugfs information to aid analysis of the vector space" * 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits) x86/vector/msi: Select CONFIG_GENERIC_IRQ_RESERVATION_MODE PCI/MSI: Set MSI_FLAG_MUST_REACTIVATE in core code genirq: Add config option for reservation mode x86/vector: Use correct per cpu variable in free_moved_vector() x86/apic/vector: Ignore set_affinity call for inactive interrupts x86/apic: Fix spelling mistake: "symmectic" -> "symmetric" x86/apic: Use dead_cpu instead of current CPU when cleaning up ACPI/init: Invoke early ACPI initialization earlier x86/vector: Respect affinity mask in irq descriptor x86/irq: Simplify hotplug vector accounting x86/vector: Switch IOAPIC to global reservation mode x86/vector/msi: Switch to global reservation mode x86/vector: Handle managed interrupts proper x86/io_apic: Reevaluate vector configuration on activate() iommu/amd: Reevaluate vector configuration on activate() iommu/vt-d: Reevaluate vector configuration on activate() x86/apic/msi: Force reactivation of interrupts at startup time x86/vector: Untangle internal state from irq_cfg x86/vector: Compile SMP only code conditionally x86/apic: Remove unused callbacks ...
370 lines
9.5 KiB
C
370 lines
9.5 KiB
C
/*
|
|
* Interrupt descriptor table related code
|
|
*
|
|
* This file is licensed under the GPL V2
|
|
*/
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <asm/traps.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/desc.h>
|
|
|
|
struct idt_data {
|
|
unsigned int vector;
|
|
unsigned int segment;
|
|
struct idt_bits bits;
|
|
const void *addr;
|
|
};
|
|
|
|
#define DPL0 0x0
|
|
#define DPL3 0x3
|
|
|
|
#define DEFAULT_STACK 0
|
|
|
|
#define G(_vector, _addr, _ist, _type, _dpl, _segment) \
|
|
{ \
|
|
.vector = _vector, \
|
|
.bits.ist = _ist, \
|
|
.bits.type = _type, \
|
|
.bits.dpl = _dpl, \
|
|
.bits.p = 1, \
|
|
.addr = _addr, \
|
|
.segment = _segment, \
|
|
}
|
|
|
|
/* Interrupt gate */
|
|
#define INTG(_vector, _addr) \
|
|
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
|
|
|
/* System interrupt gate */
|
|
#define SYSG(_vector, _addr) \
|
|
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
|
|
|
/* Interrupt gate with interrupt stack */
|
|
#define ISTG(_vector, _addr, _ist) \
|
|
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
|
|
|
/* System interrupt gate with interrupt stack */
|
|
#define SISTG(_vector, _addr, _ist) \
|
|
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
|
|
|
/* Task gate */
|
|
#define TSKG(_vector, _gdt) \
|
|
G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
|
|
|
|
/*
|
|
* Early traps running on the DEFAULT_STACK because the other interrupt
|
|
* stacks work only after cpu_init().
|
|
*/
|
|
static const __initdata struct idt_data early_idts[] = {
|
|
INTG(X86_TRAP_DB, debug),
|
|
SYSG(X86_TRAP_BP, int3),
|
|
#ifdef CONFIG_X86_32
|
|
INTG(X86_TRAP_PF, page_fault),
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* The default IDT entries which are set up in trap_init() before
|
|
* cpu_init() is invoked. Interrupt stacks cannot be used at that point and
|
|
* the traps which use them are reinitialized with IST after cpu_init() has
|
|
* set up TSS.
|
|
*/
|
|
static const __initdata struct idt_data def_idts[] = {
|
|
INTG(X86_TRAP_DE, divide_error),
|
|
INTG(X86_TRAP_NMI, nmi),
|
|
INTG(X86_TRAP_BR, bounds),
|
|
INTG(X86_TRAP_UD, invalid_op),
|
|
INTG(X86_TRAP_NM, device_not_available),
|
|
INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun),
|
|
INTG(X86_TRAP_TS, invalid_TSS),
|
|
INTG(X86_TRAP_NP, segment_not_present),
|
|
INTG(X86_TRAP_SS, stack_segment),
|
|
INTG(X86_TRAP_GP, general_protection),
|
|
INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug),
|
|
INTG(X86_TRAP_MF, coprocessor_error),
|
|
INTG(X86_TRAP_AC, alignment_check),
|
|
INTG(X86_TRAP_XF, simd_coprocessor_error),
|
|
|
|
#ifdef CONFIG_X86_32
|
|
TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS),
|
|
#else
|
|
INTG(X86_TRAP_DF, double_fault),
|
|
#endif
|
|
INTG(X86_TRAP_DB, debug),
|
|
|
|
#ifdef CONFIG_X86_MCE
|
|
INTG(X86_TRAP_MC, &machine_check),
|
|
#endif
|
|
|
|
SYSG(X86_TRAP_OF, overflow),
|
|
#if defined(CONFIG_IA32_EMULATION)
|
|
SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat),
|
|
#elif defined(CONFIG_X86_32)
|
|
SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32),
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* The APIC and SMP idt entries
|
|
*/
|
|
static const __initdata struct idt_data apic_idts[] = {
|
|
#ifdef CONFIG_SMP
|
|
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
|
|
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
|
|
INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt),
|
|
INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt),
|
|
INTG(REBOOT_VECTOR, reboot_interrupt),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_THERMAL_VECTOR
|
|
INTG(THERMAL_APIC_VECTOR, thermal_interrupt),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_MCE_THRESHOLD
|
|
INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_MCE_AMD
|
|
INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
INTG(LOCAL_TIMER_VECTOR, apic_timer_interrupt),
|
|
INTG(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi),
|
|
# ifdef CONFIG_HAVE_KVM
|
|
INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
|
|
INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
|
|
INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
|
|
# endif
|
|
# ifdef CONFIG_IRQ_WORK
|
|
INTG(IRQ_WORK_VECTOR, irq_work_interrupt),
|
|
# endif
|
|
INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
|
|
INTG(ERROR_APIC_VECTOR, error_interrupt),
|
|
#endif
|
|
};
|
|
|
|
#ifdef CONFIG_X86_64
|
|
/*
|
|
* Early traps running on the DEFAULT_STACK because the other interrupt
|
|
* stacks work only after cpu_init().
|
|
*/
|
|
static const __initdata struct idt_data early_pf_idts[] = {
|
|
INTG(X86_TRAP_PF, page_fault),
|
|
};
|
|
|
|
/*
|
|
* Override for the debug_idt. Same as the default, but with interrupt
|
|
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
|
|
*/
|
|
static const __initdata struct idt_data dbg_idts[] = {
|
|
INTG(X86_TRAP_DB, debug),
|
|
INTG(X86_TRAP_BP, int3),
|
|
};
|
|
#endif
|
|
|
|
/* Must be page-aligned because the real IDT is used in a fixmap. */
|
|
gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
|
|
|
|
struct desc_ptr idt_descr __ro_after_init = {
|
|
.size = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1,
|
|
.address = (unsigned long) idt_table,
|
|
};
|
|
|
|
#ifdef CONFIG_X86_64
|
|
/* No need to be aligned, but done to keep all IDTs defined the same way. */
|
|
gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
|
|
|
|
/*
|
|
* The exceptions which use Interrupt stacks. They are setup after
|
|
* cpu_init() when the TSS has been initialized.
|
|
*/
|
|
static const __initdata struct idt_data ist_idts[] = {
|
|
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
|
|
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
|
|
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
|
|
ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
|
|
#ifdef CONFIG_X86_MCE
|
|
ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* Override for the debug_idt. Same as the default, but with interrupt
|
|
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
|
|
*/
|
|
const struct desc_ptr debug_idt_descr = {
|
|
.size = IDT_ENTRIES * 16 - 1,
|
|
.address = (unsigned long) debug_idt_table,
|
|
};
|
|
#endif
|
|
|
|
static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
|
|
{
|
|
unsigned long addr = (unsigned long) d->addr;
|
|
|
|
gate->offset_low = (u16) addr;
|
|
gate->segment = (u16) d->segment;
|
|
gate->bits = d->bits;
|
|
gate->offset_middle = (u16) (addr >> 16);
|
|
#ifdef CONFIG_X86_64
|
|
gate->offset_high = (u32) (addr >> 32);
|
|
gate->reserved = 0;
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys)
|
|
{
|
|
gate_desc desc;
|
|
|
|
for (; size > 0; t++, size--) {
|
|
idt_init_desc(&desc, t);
|
|
write_idt_entry(idt, t->vector, &desc);
|
|
if (sys)
|
|
set_bit(t->vector, system_vectors);
|
|
}
|
|
}
|
|
|
|
static void set_intr_gate(unsigned int n, const void *addr)
|
|
{
|
|
struct idt_data data;
|
|
|
|
BUG_ON(n > 0xFF);
|
|
|
|
memset(&data, 0, sizeof(data));
|
|
data.vector = n;
|
|
data.addr = addr;
|
|
data.segment = __KERNEL_CS;
|
|
data.bits.type = GATE_INTERRUPT;
|
|
data.bits.p = 1;
|
|
|
|
idt_setup_from_table(idt_table, &data, 1, false);
|
|
}
|
|
|
|
/**
|
|
* idt_setup_early_traps - Initialize the idt table with early traps
|
|
*
|
|
* On X8664 these traps do not use interrupt stacks as they can't work
|
|
* before cpu_init() is invoked and sets up TSS. The IST variants are
|
|
* installed after that.
|
|
*/
|
|
void __init idt_setup_early_traps(void)
|
|
{
|
|
idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts),
|
|
true);
|
|
load_idt(&idt_descr);
|
|
}
|
|
|
|
/**
|
|
* idt_setup_traps - Initialize the idt table with default traps
|
|
*/
|
|
void __init idt_setup_traps(void)
|
|
{
|
|
idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_64
|
|
/**
|
|
* idt_setup_early_pf - Initialize the idt table with early pagefault handler
|
|
*
|
|
* On X8664 this does not use interrupt stacks as they can't work before
|
|
* cpu_init() is invoked and sets up TSS. The IST variant is installed
|
|
* after that.
|
|
*
|
|
* FIXME: Why is 32bit and 64bit installing the PF handler at different
|
|
* places in the early setup code?
|
|
*/
|
|
void __init idt_setup_early_pf(void)
|
|
{
|
|
idt_setup_from_table(idt_table, early_pf_idts,
|
|
ARRAY_SIZE(early_pf_idts), true);
|
|
}
|
|
|
|
/**
|
|
* idt_setup_ist_traps - Initialize the idt table with traps using IST
|
|
*/
|
|
void __init idt_setup_ist_traps(void)
|
|
{
|
|
idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true);
|
|
}
|
|
|
|
/**
|
|
* idt_setup_debugidt_traps - Initialize the debug idt table with debug traps
|
|
*/
|
|
void __init idt_setup_debugidt_traps(void)
|
|
{
|
|
memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
|
|
|
|
idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), false);
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates
|
|
*/
|
|
void __init idt_setup_apic_and_irq_gates(void)
|
|
{
|
|
int i = FIRST_EXTERNAL_VECTOR;
|
|
void *entry;
|
|
|
|
idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
|
|
|
|
for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) {
|
|
entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
|
|
set_intr_gate(i, entry);
|
|
}
|
|
|
|
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
set_bit(i, system_vectors);
|
|
set_intr_gate(i, spurious_interrupt);
|
|
#else
|
|
entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
|
|
set_intr_gate(i, entry);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/**
|
|
* idt_setup_early_handler - Initializes the idt table with early handlers
|
|
*/
|
|
void __init idt_setup_early_handler(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
|
|
set_intr_gate(i, early_idt_handler_array[i]);
|
|
#ifdef CONFIG_X86_32
|
|
for ( ; i < NR_VECTORS; i++)
|
|
set_intr_gate(i, early_ignore_irq);
|
|
#endif
|
|
load_idt(&idt_descr);
|
|
}
|
|
|
|
/**
|
|
* idt_invalidate - Invalidate interrupt descriptor table
|
|
* @addr: The virtual address of the 'invalid' IDT
|
|
*/
|
|
void idt_invalidate(void *addr)
|
|
{
|
|
struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 };
|
|
|
|
load_idt(&idt);
|
|
}
|
|
|
|
void __init update_intr_gate(unsigned int n, const void *addr)
|
|
{
|
|
if (WARN_ON_ONCE(!test_bit(n, system_vectors)))
|
|
return;
|
|
set_intr_gate(n, addr);
|
|
}
|
|
|
|
void alloc_intr_gate(unsigned int n, const void *addr)
|
|
{
|
|
BUG_ON(n < FIRST_SYSTEM_VECTOR);
|
|
if (!test_and_set_bit(n, system_vectors))
|
|
set_intr_gate(n, addr);
|
|
}
|