5868f3651f
Add support to the early boot code to use Secure Memory Encryption (SME). Since the kernel has been loaded into memory in a decrypted state, encrypt the kernel in place and update the early pagetables with the memory encryption mask so that new pagetable entries will use memory encryption. The routines to set the encryption mask and perform the encryption are stub routines for now with functionality to be added in a later patch. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Dave Young <dyoung@redhat.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Toshimitsu Kani <toshi.kani@hpe.com> Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/e52ad781f085224bf835b3caff9aa3aee6febccb.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
350 lines
9.3 KiB
C
350 lines
9.3 KiB
C
/*
|
|
* prepare to run common code
|
|
*
|
|
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
|
|
*/
|
|
|
|
#define DISABLE_BRANCH_PROFILING
|
|
#include <linux/init.h>
|
|
#include <linux/linkage.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/start_kernel.h>
|
|
#include <linux/io.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/mem_encrypt.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/kdebug.h>
|
|
#include <asm/e820/api.h>
|
|
#include <asm/bios_ebda.h>
|
|
#include <asm/bootparam_utils.h>
|
|
#include <asm/microcode.h>
|
|
#include <asm/kasan.h>
|
|
|
|
/*
|
|
* Manage page tables very early on.
|
|
*/
|
|
extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
|
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
|
|
static unsigned int __initdata next_early_pgt;
|
|
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
|
|
|
|
#define __head __section(.head.text)
|
|
|
|
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
|
|
{
|
|
return ptr - (void *)_text + (void *)physaddr;
|
|
}
|
|
|
|
unsigned long __head __startup_64(unsigned long physaddr)
|
|
{
|
|
unsigned long load_delta, *p;
|
|
unsigned long pgtable_flags;
|
|
pgdval_t *pgd;
|
|
p4dval_t *p4d;
|
|
pudval_t *pud;
|
|
pmdval_t *pmd, pmd_entry;
|
|
int i;
|
|
|
|
/* Is the address too large? */
|
|
if (physaddr >> MAX_PHYSMEM_BITS)
|
|
for (;;);
|
|
|
|
/*
|
|
* Compute the delta between the address I am compiled to run at
|
|
* and the address I am actually running at.
|
|
*/
|
|
load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
|
|
|
|
/* Is the address not 2M aligned? */
|
|
if (load_delta & ~PMD_PAGE_MASK)
|
|
for (;;);
|
|
|
|
/* Activate Secure Memory Encryption (SME) if supported and enabled */
|
|
sme_enable();
|
|
|
|
/* Include the SME encryption mask in the fixup value */
|
|
load_delta += sme_get_me_mask();
|
|
|
|
/* Fixup the physical addresses in the page table */
|
|
|
|
pgd = fixup_pointer(&early_top_pgt, physaddr);
|
|
pgd[pgd_index(__START_KERNEL_map)] += load_delta;
|
|
|
|
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
|
p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
|
|
p4d[511] += load_delta;
|
|
}
|
|
|
|
pud = fixup_pointer(&level3_kernel_pgt, physaddr);
|
|
pud[510] += load_delta;
|
|
pud[511] += load_delta;
|
|
|
|
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
|
|
pmd[506] += load_delta;
|
|
|
|
/*
|
|
* Set up the identity mapping for the switchover. These
|
|
* entries should *NOT* have the global bit set! This also
|
|
* creates a bunch of nonsense entries but that is fine --
|
|
* it avoids problems around wraparound.
|
|
*/
|
|
|
|
pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
|
pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
|
pgtable_flags = _KERNPG_TABLE + sme_get_me_mask();
|
|
|
|
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
|
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
|
|
|
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
|
|
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
|
|
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
|
|
|
|
i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
|
|
p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
|
|
p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
|
|
} else {
|
|
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
|
|
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
|
|
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
|
|
}
|
|
|
|
i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
|
|
pud[i + 0] = (pudval_t)pmd + pgtable_flags;
|
|
pud[i + 1] = (pudval_t)pmd + pgtable_flags;
|
|
|
|
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
|
|
pmd_entry += sme_get_me_mask();
|
|
pmd_entry += physaddr;
|
|
|
|
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
|
|
int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
|
|
pmd[idx] = pmd_entry + i * PMD_SIZE;
|
|
}
|
|
|
|
/*
|
|
* Fixup the kernel text+data virtual addresses. Note that
|
|
* we might write invalid pmds, when the kernel is relocated
|
|
* cleanup_highmap() fixes this up along with the mappings
|
|
* beyond _end.
|
|
*/
|
|
|
|
pmd = fixup_pointer(level2_kernel_pgt, physaddr);
|
|
for (i = 0; i < PTRS_PER_PMD; i++) {
|
|
if (pmd[i] & _PAGE_PRESENT)
|
|
pmd[i] += load_delta;
|
|
}
|
|
|
|
/*
|
|
* Fixup phys_base - remove the memory encryption mask to obtain
|
|
* the true physical address.
|
|
*/
|
|
p = fixup_pointer(&phys_base, physaddr);
|
|
*p += load_delta - sme_get_me_mask();
|
|
|
|
/* Encrypt the kernel (if SME is active) */
|
|
sme_encrypt_kernel();
|
|
|
|
/*
|
|
* Return the SME encryption mask (if SME is active) to be used as a
|
|
* modifier for the initial pgdir entry programmed into CR3.
|
|
*/
|
|
return sme_get_me_mask();
|
|
}
|
|
|
|
unsigned long __startup_secondary_64(void)
|
|
{
|
|
/*
|
|
* Return the SME encryption mask (if SME is active) to be used as a
|
|
* modifier for the initial pgdir entry programmed into CR3.
|
|
*/
|
|
return sme_get_me_mask();
|
|
}
|
|
|
|
/* Wipe all early page tables except for the kernel symbol map */
|
|
static void __init reset_early_page_tables(void)
|
|
{
|
|
memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
|
|
next_early_pgt = 0;
|
|
write_cr3(__pa_nodebug(early_top_pgt));
|
|
}
|
|
|
|
/* Create a new PMD entry */
|
|
int __init early_make_pgtable(unsigned long address)
|
|
{
|
|
unsigned long physaddr = address - __PAGE_OFFSET;
|
|
pgdval_t pgd, *pgd_p;
|
|
p4dval_t p4d, *p4d_p;
|
|
pudval_t pud, *pud_p;
|
|
pmdval_t pmd, *pmd_p;
|
|
|
|
/* Invalid address or early pgt is done ? */
|
|
if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
|
|
return -1;
|
|
|
|
again:
|
|
pgd_p = &early_top_pgt[pgd_index(address)].pgd;
|
|
pgd = *pgd_p;
|
|
|
|
/*
|
|
* The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
|
|
* critical -- __PAGE_OFFSET would point us back into the dynamic
|
|
* range and we might end up looping forever...
|
|
*/
|
|
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
|
p4d_p = pgd_p;
|
|
else if (pgd)
|
|
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
|
else {
|
|
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
|
reset_early_page_tables();
|
|
goto again;
|
|
}
|
|
|
|
p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
|
|
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
|
|
*pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
|
}
|
|
p4d_p += p4d_index(address);
|
|
p4d = *p4d_p;
|
|
|
|
if (p4d)
|
|
pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
|
else {
|
|
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
|
reset_early_page_tables();
|
|
goto again;
|
|
}
|
|
|
|
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
|
|
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
|
*p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
|
}
|
|
pud_p += pud_index(address);
|
|
pud = *pud_p;
|
|
|
|
if (pud)
|
|
pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
|
else {
|
|
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
|
reset_early_page_tables();
|
|
goto again;
|
|
}
|
|
|
|
pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
|
|
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
|
|
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
|
}
|
|
pmd = (physaddr & PMD_MASK) + early_pmd_flags;
|
|
pmd_p[pmd_index(address)] = pmd;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Don't add a printk in there. printk relies on the PDA which is not initialized
|
|
yet. */
|
|
static void __init clear_bss(void)
|
|
{
|
|
memset(__bss_start, 0,
|
|
(unsigned long) __bss_stop - (unsigned long) __bss_start);
|
|
}
|
|
|
|
static unsigned long get_cmd_line_ptr(void)
|
|
{
|
|
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
|
|
|
|
cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
|
|
|
|
return cmd_line_ptr;
|
|
}
|
|
|
|
static void __init copy_bootdata(char *real_mode_data)
|
|
{
|
|
char * command_line;
|
|
unsigned long cmd_line_ptr;
|
|
|
|
memcpy(&boot_params, real_mode_data, sizeof boot_params);
|
|
sanitize_boot_params(&boot_params);
|
|
cmd_line_ptr = get_cmd_line_ptr();
|
|
if (cmd_line_ptr) {
|
|
command_line = __va(cmd_line_ptr);
|
|
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
|
|
}
|
|
}
|
|
|
|
asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* Build-time sanity checks on the kernel image and module
|
|
* area mappings. (these are purely build-time and produce no code)
|
|
*/
|
|
BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
|
|
BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
|
|
BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
|
|
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
|
|
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
|
|
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
|
|
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
|
|
(__START_KERNEL & PGDIR_MASK)));
|
|
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
|
|
|
|
cr4_init_shadow();
|
|
|
|
/* Kill off the identity-map trampoline */
|
|
reset_early_page_tables();
|
|
|
|
clear_bss();
|
|
|
|
clear_page(init_top_pgt);
|
|
|
|
kasan_early_init();
|
|
|
|
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
|
|
set_intr_gate(i, early_idt_handler_array[i]);
|
|
load_idt((const struct desc_ptr *)&idt_descr);
|
|
|
|
copy_bootdata(__va(real_mode_data));
|
|
|
|
/*
|
|
* Load microcode early on BSP.
|
|
*/
|
|
load_ucode_bsp();
|
|
|
|
/* set init_top_pgt kernel high mapping*/
|
|
init_top_pgt[511] = early_top_pgt[511];
|
|
|
|
x86_64_start_reservations(real_mode_data);
|
|
}
|
|
|
|
void __init x86_64_start_reservations(char *real_mode_data)
|
|
{
|
|
/* version is always not zero if it is copied */
|
|
if (!boot_params.hdr.version)
|
|
copy_bootdata(__va(real_mode_data));
|
|
|
|
x86_early_init_platform_quirks();
|
|
|
|
switch (boot_params.hdr.hardware_subarch) {
|
|
case X86_SUBARCH_INTEL_MID:
|
|
x86_intel_mid_early_setup();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
start_kernel();
|
|
}
|