da9803dfd3
called SEV by also encrypting the guest register state, making the registers inaccessible to the hypervisor by en-/decrypting them on world switches. Thus, it adds additional protection to Linux guests against exfiltration, control flow and rollback attacks. With SEV-ES, the guest is in full control of what registers the hypervisor can access. This is provided by a guest-host exchange mechanism based on a new exception vector called VMM Communication Exception (#VC), a new instruction called VMGEXIT and a shared Guest-Host Communication Block which is a decrypted page shared between the guest and the hypervisor. Intercepts to the hypervisor become #VC exceptions in an SEV-ES guest so in order for that exception mechanism to work, the early x86 init code needed to be made able to handle exceptions, which, in itself, brings a bunch of very nice cleanups and improvements to the early boot code like an early page fault handler, allowing for on-demand building of the identity mapping. With that, !KASLR configurations do not use the EFI page table anymore but switch to a kernel-controlled one. The main part of this series adds the support for that new exchange mechanism. The goal has been to keep this as much as possibly separate from the core x86 code by concentrating the machinery in two SEV-ES-specific files: arch/x86/kernel/sev-es-shared.c arch/x86/kernel/sev-es.c Other interaction with core x86 code has been kept at minimum and behind static keys to minimize the performance impact on !SEV-ES setups. Work by Joerg Roedel and Thomas Lendacky and others. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl+FiKYACgkQEsHwGGHe VUqS5BAAlh5mKwtxXMyFyAIHa5tpsgDjbecFzy1UVmZyxN0JHLlM3NLmb+K52drY PiWjNNMi/cFMFazkuLFHuY0poBWrZml8zRS/mExKgUJC6EtguS9FQnRE9xjDBoWQ gOTSGJWEzT5wnFqo8qHwlC2CDCSF1hfL8ks3cUFW2tCWus4F9pyaMSGfFqD224rg Lh/8+arDMSIKE4uH0cm7iSuyNpbobId0l5JNDfCEFDYRigQZ6pZsQ9pbmbEpncs4 rmjDvBA5eHDlNMXq0ukqyrjxWTX4ZLBOBvuLhpyssSXnnu2T+Tcxg09+ZSTyJAe0 LyC9Wfo0v78JASXMAdeH9b1d1mRYNMqjvnBItNQoqweoqUXWz7kvgxCOp6b/G4xp cX5YhB6BprBW2DXL45frMRT/zX77UkEKYc5+0IBegV2xfnhRsjqQAQaWLIksyEaX nz9/C6+1Sr2IAv271yykeJtY6gtlRjg/usTlYpev+K0ghvGvTmuilEiTltjHrso1 XAMbfWHQGSd61LNXofvx/GLNfGBisS6dHVHwtkayinSjXNdWxI6w9fhbWVjQ+y2V hOF05lmzaJSG5kPLrsFHFqm2YcxOmsWkYYDBHvtmBkMZSf5B+9xxDv97Uy9NETcr eSYk//TEkKQqVazfCQS/9LSm0MllqKbwNO25sl0Tw2k6PnheO2g= =toqi -----END PGP SIGNATURE----- Merge tag 'x86_seves_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 SEV-ES support from Borislav Petkov: "SEV-ES enhances the current guest memory encryption support called SEV by also encrypting the guest register state, making the registers inaccessible to the hypervisor by en-/decrypting them on world switches. Thus, it adds additional protection to Linux guests against exfiltration, control flow and rollback attacks. With SEV-ES, the guest is in full control of what registers the hypervisor can access. This is provided by a guest-host exchange mechanism based on a new exception vector called VMM Communication Exception (#VC), a new instruction called VMGEXIT and a shared Guest-Host Communication Block which is a decrypted page shared between the guest and the hypervisor. Intercepts to the hypervisor become #VC exceptions in an SEV-ES guest so in order for that exception mechanism to work, the early x86 init code needed to be made able to handle exceptions, which, in itself, brings a bunch of very nice cleanups and improvements to the early boot code like an early page fault handler, allowing for on-demand building of the identity mapping. With that, !KASLR configurations do not use the EFI page table anymore but switch to a kernel-controlled one. The main part of this series adds the support for that new exchange mechanism. The goal has been to keep this as much as possibly separate from the core x86 code by concentrating the machinery in two SEV-ES-specific files: arch/x86/kernel/sev-es-shared.c arch/x86/kernel/sev-es.c Other interaction with core x86 code has been kept at minimum and behind static keys to minimize the performance impact on !SEV-ES setups. Work by Joerg Roedel and Thomas Lendacky and others" * tag 'x86_seves_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (73 commits) x86/sev-es: Use GHCB accessor for setting the MMIO scratch buffer x86/sev-es: Check required CPU features for SEV-ES x86/efi: Add GHCB mappings when SEV-ES is active x86/sev-es: Handle NMI State x86/sev-es: Support CPU offline/online x86/head/64: Don't call verify_cpu() on starting APs x86/smpboot: Load TSS and getcpu GDT entry before loading IDT x86/realmode: Setup AP jump table x86/realmode: Add SEV-ES specific trampoline entry point x86/vmware: Add VMware-specific handling for VMMCALL under SEV-ES x86/kvm: Add KVM-specific VMMCALL handling under SEV-ES x86/paravirt: Allow hypervisor-specific VMMCALL handling under SEV-ES x86/sev-es: Handle #DB Events x86/sev-es: Handle #AC Events x86/sev-es: Handle VMMCALL Events x86/sev-es: Handle MWAIT/MWAITX Events x86/sev-es: Handle MONITOR/MONITORX Events x86/sev-es: Handle INVD Events x86/sev-es: Handle RDPMC Events x86/sev-es: Handle RDTSC(P) Events ...
354 lines
9.5 KiB
C
354 lines
9.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Interrupt descriptor table related code
|
|
*/
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <asm/cpu_entry_area.h>
|
|
#include <asm/set_memory.h>
|
|
#include <asm/traps.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/hw_irq.h>
|
|
|
|
#define DPL0 0x0
|
|
#define DPL3 0x3
|
|
|
|
#define DEFAULT_STACK 0
|
|
|
|
#define G(_vector, _addr, _ist, _type, _dpl, _segment) \
|
|
{ \
|
|
.vector = _vector, \
|
|
.bits.ist = _ist, \
|
|
.bits.type = _type, \
|
|
.bits.dpl = _dpl, \
|
|
.bits.p = 1, \
|
|
.addr = _addr, \
|
|
.segment = _segment, \
|
|
}
|
|
|
|
/* Interrupt gate */
|
|
#define INTG(_vector, _addr) \
|
|
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
|
|
|
/* System interrupt gate */
|
|
#define SYSG(_vector, _addr) \
|
|
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
|
|
|
/*
|
|
* Interrupt gate with interrupt stack. The _ist index is the index in
|
|
* the tss.ist[] array, but for the descriptor it needs to start at 1.
|
|
*/
|
|
#define ISTG(_vector, _addr, _ist) \
|
|
G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
|
|
|
/* Task gate */
|
|
#define TSKG(_vector, _gdt) \
|
|
G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
|
|
|
|
#define IDT_TABLE_SIZE (IDT_ENTRIES * sizeof(gate_desc))
|
|
|
|
static bool idt_setup_done __initdata;
|
|
|
|
/*
|
|
* Early traps running on the DEFAULT_STACK because the other interrupt
|
|
* stacks work only after cpu_init().
|
|
*/
|
|
static const __initconst struct idt_data early_idts[] = {
|
|
INTG(X86_TRAP_DB, asm_exc_debug),
|
|
SYSG(X86_TRAP_BP, asm_exc_int3),
|
|
|
|
#ifdef CONFIG_X86_32
|
|
/*
|
|
* Not possible on 64-bit. See idt_setup_early_pf() for details.
|
|
*/
|
|
INTG(X86_TRAP_PF, asm_exc_page_fault),
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* The default IDT entries which are set up in trap_init() before
|
|
* cpu_init() is invoked. Interrupt stacks cannot be used at that point and
|
|
* the traps which use them are reinitialized with IST after cpu_init() has
|
|
* set up TSS.
|
|
*/
|
|
static const __initconst struct idt_data def_idts[] = {
|
|
INTG(X86_TRAP_DE, asm_exc_divide_error),
|
|
INTG(X86_TRAP_NMI, asm_exc_nmi),
|
|
INTG(X86_TRAP_BR, asm_exc_bounds),
|
|
INTG(X86_TRAP_UD, asm_exc_invalid_op),
|
|
INTG(X86_TRAP_NM, asm_exc_device_not_available),
|
|
INTG(X86_TRAP_OLD_MF, asm_exc_coproc_segment_overrun),
|
|
INTG(X86_TRAP_TS, asm_exc_invalid_tss),
|
|
INTG(X86_TRAP_NP, asm_exc_segment_not_present),
|
|
INTG(X86_TRAP_SS, asm_exc_stack_segment),
|
|
INTG(X86_TRAP_GP, asm_exc_general_protection),
|
|
INTG(X86_TRAP_SPURIOUS, asm_exc_spurious_interrupt_bug),
|
|
INTG(X86_TRAP_MF, asm_exc_coprocessor_error),
|
|
INTG(X86_TRAP_AC, asm_exc_alignment_check),
|
|
INTG(X86_TRAP_XF, asm_exc_simd_coprocessor_error),
|
|
|
|
#ifdef CONFIG_X86_32
|
|
TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS),
|
|
#else
|
|
INTG(X86_TRAP_DF, asm_exc_double_fault),
|
|
#endif
|
|
INTG(X86_TRAP_DB, asm_exc_debug),
|
|
|
|
#ifdef CONFIG_X86_MCE
|
|
INTG(X86_TRAP_MC, asm_exc_machine_check),
|
|
#endif
|
|
|
|
SYSG(X86_TRAP_OF, asm_exc_overflow),
|
|
#if defined(CONFIG_IA32_EMULATION)
|
|
SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat),
|
|
#elif defined(CONFIG_X86_32)
|
|
SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32),
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* The APIC and SMP idt entries
|
|
*/
|
|
static const __initconst struct idt_data apic_idts[] = {
|
|
#ifdef CONFIG_SMP
|
|
INTG(RESCHEDULE_VECTOR, asm_sysvec_reschedule_ipi),
|
|
INTG(CALL_FUNCTION_VECTOR, asm_sysvec_call_function),
|
|
INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single),
|
|
INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup),
|
|
INTG(REBOOT_VECTOR, asm_sysvec_reboot),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_THERMAL_VECTOR
|
|
INTG(THERMAL_APIC_VECTOR, asm_sysvec_thermal),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_MCE_THRESHOLD
|
|
INTG(THRESHOLD_APIC_VECTOR, asm_sysvec_threshold),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_MCE_AMD
|
|
INTG(DEFERRED_ERROR_VECTOR, asm_sysvec_deferred_error),
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt),
|
|
INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi),
|
|
# ifdef CONFIG_HAVE_KVM
|
|
INTG(POSTED_INTR_VECTOR, asm_sysvec_kvm_posted_intr_ipi),
|
|
INTG(POSTED_INTR_WAKEUP_VECTOR, asm_sysvec_kvm_posted_intr_wakeup_ipi),
|
|
INTG(POSTED_INTR_NESTED_VECTOR, asm_sysvec_kvm_posted_intr_nested_ipi),
|
|
# endif
|
|
# ifdef CONFIG_IRQ_WORK
|
|
INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work),
|
|
# endif
|
|
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
|
|
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
|
|
#endif
|
|
};
|
|
|
|
/* Must be page-aligned because the real IDT is used in the cpu entry area */
|
|
static gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
|
|
|
|
static struct desc_ptr idt_descr __ro_after_init = {
|
|
.size = IDT_TABLE_SIZE - 1,
|
|
.address = (unsigned long) idt_table,
|
|
};
|
|
|
|
void load_current_idt(void)
|
|
{
|
|
lockdep_assert_irqs_disabled();
|
|
load_idt(&idt_descr);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_F00F_BUG
|
|
bool idt_is_f00f_address(unsigned long address)
|
|
{
|
|
return ((address - idt_descr.address) >> 3) == 6;
|
|
}
|
|
#endif
|
|
|
|
static __init void
|
|
idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys)
|
|
{
|
|
gate_desc desc;
|
|
|
|
for (; size > 0; t++, size--) {
|
|
idt_init_desc(&desc, t);
|
|
write_idt_entry(idt, t->vector, &desc);
|
|
if (sys)
|
|
set_bit(t->vector, system_vectors);
|
|
}
|
|
}
|
|
|
|
static __init void set_intr_gate(unsigned int n, const void *addr)
|
|
{
|
|
struct idt_data data;
|
|
|
|
init_idt_data(&data, n, addr);
|
|
|
|
idt_setup_from_table(idt_table, &data, 1, false);
|
|
}
|
|
|
|
/**
|
|
* idt_setup_early_traps - Initialize the idt table with early traps
|
|
*
|
|
* On X8664 these traps do not use interrupt stacks as they can't work
|
|
* before cpu_init() is invoked and sets up TSS. The IST variants are
|
|
* installed after that.
|
|
*/
|
|
void __init idt_setup_early_traps(void)
|
|
{
|
|
idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts),
|
|
true);
|
|
load_idt(&idt_descr);
|
|
}
|
|
|
|
/**
|
|
* idt_setup_traps - Initialize the idt table with default traps
|
|
*/
|
|
void __init idt_setup_traps(void)
|
|
{
|
|
idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_64
|
|
/*
|
|
* Early traps running on the DEFAULT_STACK because the other interrupt
|
|
* stacks work only after cpu_init().
|
|
*/
|
|
static const __initconst struct idt_data early_pf_idts[] = {
|
|
INTG(X86_TRAP_PF, asm_exc_page_fault),
|
|
};
|
|
|
|
/*
|
|
* The exceptions which use Interrupt stacks. They are setup after
|
|
* cpu_init() when the TSS has been initialized.
|
|
*/
|
|
static const __initconst struct idt_data ist_idts[] = {
|
|
ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB),
|
|
ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI),
|
|
ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF),
|
|
#ifdef CONFIG_X86_MCE
|
|
ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
|
|
#endif
|
|
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
|
ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC),
|
|
#endif
|
|
};
|
|
|
|
/**
|
|
* idt_setup_early_pf - Initialize the idt table with early pagefault handler
|
|
*
|
|
* On X8664 this does not use interrupt stacks as they can't work before
|
|
* cpu_init() is invoked and sets up TSS. The IST variant is installed
|
|
* after that.
|
|
*
|
|
* Note, that X86_64 cannot install the real #PF handler in
|
|
* idt_setup_early_traps() because the memory intialization needs the #PF
|
|
* handler from the early_idt_handler_array to initialize the early page
|
|
* tables.
|
|
*/
|
|
void __init idt_setup_early_pf(void)
|
|
{
|
|
idt_setup_from_table(idt_table, early_pf_idts,
|
|
ARRAY_SIZE(early_pf_idts), true);
|
|
}
|
|
|
|
/**
|
|
* idt_setup_ist_traps - Initialize the idt table with traps using IST
|
|
*/
|
|
void __init idt_setup_ist_traps(void)
|
|
{
|
|
idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true);
|
|
}
|
|
#endif
|
|
|
|
static void __init idt_map_in_cea(void)
|
|
{
|
|
/*
|
|
* Set the IDT descriptor to a fixed read-only location in the cpu
|
|
* entry area, so that the "sidt" instruction will not leak the
|
|
* location of the kernel, and to defend the IDT against arbitrary
|
|
* memory write vulnerabilities.
|
|
*/
|
|
cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
|
|
PAGE_KERNEL_RO);
|
|
idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
|
|
}
|
|
|
|
/**
|
|
* idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates
|
|
*/
|
|
void __init idt_setup_apic_and_irq_gates(void)
|
|
{
|
|
int i = FIRST_EXTERNAL_VECTOR;
|
|
void *entry;
|
|
|
|
idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
|
|
|
|
for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) {
|
|
entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
|
|
set_intr_gate(i, entry);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
|
|
/*
|
|
* Don't set the non assigned system vectors in the
|
|
* system_vectors bitmap. Otherwise they show up in
|
|
* /proc/interrupts.
|
|
*/
|
|
entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
|
|
set_intr_gate(i, entry);
|
|
}
|
|
#endif
|
|
/* Map IDT into CPU entry area and reload it. */
|
|
idt_map_in_cea();
|
|
load_idt(&idt_descr);
|
|
|
|
/* Make the IDT table read only */
|
|
set_memory_ro((unsigned long)&idt_table, 1);
|
|
|
|
idt_setup_done = true;
|
|
}
|
|
|
|
/**
|
|
* idt_setup_early_handler - Initializes the idt table with early handlers
|
|
*/
|
|
void __init idt_setup_early_handler(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
|
|
set_intr_gate(i, early_idt_handler_array[i]);
|
|
#ifdef CONFIG_X86_32
|
|
for ( ; i < NR_VECTORS; i++)
|
|
set_intr_gate(i, early_ignore_irq);
|
|
#endif
|
|
load_idt(&idt_descr);
|
|
}
|
|
|
|
/**
|
|
* idt_invalidate - Invalidate interrupt descriptor table
|
|
* @addr: The virtual address of the 'invalid' IDT
|
|
*/
|
|
void idt_invalidate(void *addr)
|
|
{
|
|
struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 };
|
|
|
|
load_idt(&idt);
|
|
}
|
|
|
|
void __init alloc_intr_gate(unsigned int n, const void *addr)
|
|
{
|
|
if (WARN_ON(n < FIRST_SYSTEM_VECTOR))
|
|
return;
|
|
|
|
if (WARN_ON(idt_setup_done))
|
|
return;
|
|
|
|
if (!WARN_ON(test_and_set_bit(n, system_vectors)))
|
|
set_intr_gate(n, addr);
|
|
}
|