b9d05200bc
The boot data and command line data are present in memory in a decrypted state and are copied early in the boot process. The early page fault support will map these areas as encrypted, so before attempting to copy them, add decrypted mappings so the data is accessed properly when copied. For the initrd, encrypt this data in place. Since the future mapping of the initrd area will be mapped as encrypted the data will be accessed properly. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Dave Young <dyoung@redhat.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Toshimitsu Kani <toshi.kani@hpe.com> Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/bb0d430b41efefd45ee515aaf0979dcfda8b6a44.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
379 lines
10 KiB
C
379 lines
10 KiB
C
/*
|
|
* prepare to run common code
|
|
*
|
|
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
|
|
*/
|
|
|
|
#define DISABLE_BRANCH_PROFILING
|
|
#include <linux/init.h>
|
|
#include <linux/linkage.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/start_kernel.h>
|
|
#include <linux/io.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/mem_encrypt.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/kdebug.h>
|
|
#include <asm/e820/api.h>
|
|
#include <asm/bios_ebda.h>
|
|
#include <asm/bootparam_utils.h>
|
|
#include <asm/microcode.h>
|
|
#include <asm/kasan.h>
|
|
|
|
/*
|
|
* Manage page tables very early on.
|
|
*/
|
|
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
|
|
static unsigned int __initdata next_early_pgt;
|
|
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
|
|
|
|
#define __head __section(.head.text)
|
|
|
|
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
|
|
{
|
|
return ptr - (void *)_text + (void *)physaddr;
|
|
}
|
|
|
|
unsigned long __head __startup_64(unsigned long physaddr)
|
|
{
|
|
unsigned long load_delta, *p;
|
|
unsigned long pgtable_flags;
|
|
pgdval_t *pgd;
|
|
p4dval_t *p4d;
|
|
pudval_t *pud;
|
|
pmdval_t *pmd, pmd_entry;
|
|
int i;
|
|
|
|
/* Is the address too large? */
|
|
if (physaddr >> MAX_PHYSMEM_BITS)
|
|
for (;;);
|
|
|
|
/*
|
|
* Compute the delta between the address I am compiled to run at
|
|
* and the address I am actually running at.
|
|
*/
|
|
load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
|
|
|
|
/* Is the address not 2M aligned? */
|
|
if (load_delta & ~PMD_PAGE_MASK)
|
|
for (;;);
|
|
|
|
/* Activate Secure Memory Encryption (SME) if supported and enabled */
|
|
sme_enable();
|
|
|
|
/* Include the SME encryption mask in the fixup value */
|
|
load_delta += sme_get_me_mask();
|
|
|
|
/* Fixup the physical addresses in the page table */
|
|
|
|
pgd = fixup_pointer(&early_top_pgt, physaddr);
|
|
pgd[pgd_index(__START_KERNEL_map)] += load_delta;
|
|
|
|
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
|
p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
|
|
p4d[511] += load_delta;
|
|
}
|
|
|
|
pud = fixup_pointer(&level3_kernel_pgt, physaddr);
|
|
pud[510] += load_delta;
|
|
pud[511] += load_delta;
|
|
|
|
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
|
|
pmd[506] += load_delta;
|
|
|
|
/*
|
|
* Set up the identity mapping for the switchover. These
|
|
* entries should *NOT* have the global bit set! This also
|
|
* creates a bunch of nonsense entries but that is fine --
|
|
* it avoids problems around wraparound.
|
|
*/
|
|
|
|
pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
|
pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
|
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
|
|
|
|
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
|
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
|
|
|
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
|
|
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
|
|
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
|
|
|
|
i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
|
|
p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
|
|
p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
|
|
} else {
|
|
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
|
|
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
|
|
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
|
|
}
|
|
|
|
i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
|
|
pud[i + 0] = (pudval_t)pmd + pgtable_flags;
|
|
pud[i + 1] = (pudval_t)pmd + pgtable_flags;
|
|
|
|
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
|
|
pmd_entry += sme_get_me_mask();
|
|
pmd_entry += physaddr;
|
|
|
|
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
|
|
int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
|
|
pmd[idx] = pmd_entry + i * PMD_SIZE;
|
|
}
|
|
|
|
/*
|
|
* Fixup the kernel text+data virtual addresses. Note that
|
|
* we might write invalid pmds, when the kernel is relocated
|
|
* cleanup_highmap() fixes this up along with the mappings
|
|
* beyond _end.
|
|
*/
|
|
|
|
pmd = fixup_pointer(level2_kernel_pgt, physaddr);
|
|
for (i = 0; i < PTRS_PER_PMD; i++) {
|
|
if (pmd[i] & _PAGE_PRESENT)
|
|
pmd[i] += load_delta;
|
|
}
|
|
|
|
/*
|
|
* Fixup phys_base - remove the memory encryption mask to obtain
|
|
* the true physical address.
|
|
*/
|
|
p = fixup_pointer(&phys_base, physaddr);
|
|
*p += load_delta - sme_get_me_mask();
|
|
|
|
/* Encrypt the kernel (if SME is active) */
|
|
sme_encrypt_kernel();
|
|
|
|
/*
|
|
* Return the SME encryption mask (if SME is active) to be used as a
|
|
* modifier for the initial pgdir entry programmed into CR3.
|
|
*/
|
|
return sme_get_me_mask();
|
|
}
|
|
|
|
unsigned long __startup_secondary_64(void)
|
|
{
|
|
/*
|
|
* Return the SME encryption mask (if SME is active) to be used as a
|
|
* modifier for the initial pgdir entry programmed into CR3.
|
|
*/
|
|
return sme_get_me_mask();
|
|
}
|
|
|
|
/* Wipe all early page tables except for the kernel symbol map */
|
|
static void __init reset_early_page_tables(void)
|
|
{
|
|
memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
|
|
next_early_pgt = 0;
|
|
write_cr3(__sme_pa_nodebug(early_top_pgt));
|
|
}
|
|
|
|
/* Create a new PMD entry */
|
|
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
|
|
{
|
|
unsigned long physaddr = address - __PAGE_OFFSET;
|
|
pgdval_t pgd, *pgd_p;
|
|
p4dval_t p4d, *p4d_p;
|
|
pudval_t pud, *pud_p;
|
|
pmdval_t *pmd_p;
|
|
|
|
/* Invalid address or early pgt is done ? */
|
|
if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
|
|
return -1;
|
|
|
|
again:
|
|
pgd_p = &early_top_pgt[pgd_index(address)].pgd;
|
|
pgd = *pgd_p;
|
|
|
|
/*
|
|
* The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
|
|
* critical -- __PAGE_OFFSET would point us back into the dynamic
|
|
* range and we might end up looping forever...
|
|
*/
|
|
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
|
p4d_p = pgd_p;
|
|
else if (pgd)
|
|
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
|
else {
|
|
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
|
reset_early_page_tables();
|
|
goto again;
|
|
}
|
|
|
|
p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
|
|
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
|
|
*pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
|
}
|
|
p4d_p += p4d_index(address);
|
|
p4d = *p4d_p;
|
|
|
|
if (p4d)
|
|
pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
|
else {
|
|
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
|
reset_early_page_tables();
|
|
goto again;
|
|
}
|
|
|
|
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
|
|
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
|
*p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
|
}
|
|
pud_p += pud_index(address);
|
|
pud = *pud_p;
|
|
|
|
if (pud)
|
|
pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
|
else {
|
|
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
|
reset_early_page_tables();
|
|
goto again;
|
|
}
|
|
|
|
pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
|
|
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
|
|
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
|
}
|
|
pmd_p[pmd_index(address)] = pmd;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int __init early_make_pgtable(unsigned long address)
|
|
{
|
|
unsigned long physaddr = address - __PAGE_OFFSET;
|
|
pmdval_t pmd;
|
|
|
|
pmd = (physaddr & PMD_MASK) + early_pmd_flags;
|
|
|
|
return __early_make_pgtable(address, pmd);
|
|
}
|
|
|
|
/* Don't add a printk in there. printk relies on the PDA which is not initialized
|
|
yet. */
|
|
static void __init clear_bss(void)
|
|
{
|
|
memset(__bss_start, 0,
|
|
(unsigned long) __bss_stop - (unsigned long) __bss_start);
|
|
}
|
|
|
|
static unsigned long get_cmd_line_ptr(void)
|
|
{
|
|
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
|
|
|
|
cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
|
|
|
|
return cmd_line_ptr;
|
|
}
|
|
|
|
static void __init copy_bootdata(char *real_mode_data)
|
|
{
|
|
char * command_line;
|
|
unsigned long cmd_line_ptr;
|
|
|
|
/*
|
|
* If SME is active, this will create decrypted mappings of the
|
|
* boot data in advance of the copy operations.
|
|
*/
|
|
sme_map_bootdata(real_mode_data);
|
|
|
|
memcpy(&boot_params, real_mode_data, sizeof boot_params);
|
|
sanitize_boot_params(&boot_params);
|
|
cmd_line_ptr = get_cmd_line_ptr();
|
|
if (cmd_line_ptr) {
|
|
command_line = __va(cmd_line_ptr);
|
|
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
|
|
}
|
|
|
|
/*
|
|
* The old boot data is no longer needed and won't be reserved,
|
|
* freeing up that memory for use by the system. If SME is active,
|
|
* we need to remove the mappings that were created so that the
|
|
* memory doesn't remain mapped as decrypted.
|
|
*/
|
|
sme_unmap_bootdata(real_mode_data);
|
|
}
|
|
|
|
asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* Build-time sanity checks on the kernel image and module
|
|
* area mappings. (these are purely build-time and produce no code)
|
|
*/
|
|
BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
|
|
BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
|
|
BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
|
|
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
|
|
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
|
|
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
|
|
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
|
|
(__START_KERNEL & PGDIR_MASK)));
|
|
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
|
|
|
|
cr4_init_shadow();
|
|
|
|
/* Kill off the identity-map trampoline */
|
|
reset_early_page_tables();
|
|
|
|
clear_bss();
|
|
|
|
clear_page(init_top_pgt);
|
|
|
|
/*
|
|
* SME support may update early_pmd_flags to include the memory
|
|
* encryption mask, so it needs to be called before anything
|
|
* that may generate a page fault.
|
|
*/
|
|
sme_early_init();
|
|
|
|
kasan_early_init();
|
|
|
|
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
|
|
set_intr_gate(i, early_idt_handler_array[i]);
|
|
load_idt((const struct desc_ptr *)&idt_descr);
|
|
|
|
copy_bootdata(__va(real_mode_data));
|
|
|
|
/*
|
|
* Load microcode early on BSP.
|
|
*/
|
|
load_ucode_bsp();
|
|
|
|
/* set init_top_pgt kernel high mapping*/
|
|
init_top_pgt[511] = early_top_pgt[511];
|
|
|
|
x86_64_start_reservations(real_mode_data);
|
|
}
|
|
|
|
void __init x86_64_start_reservations(char *real_mode_data)
|
|
{
|
|
/* version is always not zero if it is copied */
|
|
if (!boot_params.hdr.version)
|
|
copy_bootdata(__va(real_mode_data));
|
|
|
|
x86_early_init_platform_quirks();
|
|
|
|
switch (boot_params.hdr.hardware_subarch) {
|
|
case X86_SUBARCH_INTEL_MID:
|
|
x86_intel_mid_early_setup();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
start_kernel();
|
|
}
|