linux/drivers/firmware/efi/efi-init.c
Pingfan Liu b398123bff efi: apply memblock cap after memblock_add()
On arm64, during kdump kernel saves vmcore, it runs into the following bug:
...
[   15.148919] usercopy: Kernel memory exposure attempt detected from SLUB object 'kmem_cache_node' (offset 0, size 4096)!
[   15.159707] ------------[ cut here ]------------
[   15.164311] kernel BUG at mm/usercopy.c:99!
[   15.168482] Internal error: Oops - BUG: 0 [#1] SMP
[   15.173261] Modules linked in: xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce sbsa_gwdt ast i2c_algo_bit drm_vram_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec drm_ttm_helper ttm drm nvme nvme_core xgene_hwmon i2c_designware_platform i2c_designware_core dm_mirror dm_region_hash dm_log dm_mod overlay squashfs zstd_decompress loop
[   15.206186] CPU: 0 PID: 542 Comm: cp Not tainted 5.16.0-rc4 #1
[   15.212006] Hardware name: GIGABYTE R272-P30-JG/MP32-AR0-JG, BIOS F12 (SCP: 1.5.20210426) 05/13/2021
[   15.221125] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   15.228073] pc : usercopy_abort+0x9c/0xa0
[   15.232074] lr : usercopy_abort+0x9c/0xa0
[   15.236070] sp : ffff8000121abba0
[   15.239371] x29: ffff8000121abbb0 x28: 0000000000003000 x27: 0000000000000000
[   15.246494] x26: 0000000080000400 x25: 0000ffff885c7000 x24: 0000000000000000
[   15.253617] x23: 000007ff80400000 x22: ffff07ff80401000 x21: 0000000000000001
[   15.260739] x20: 0000000000001000 x19: ffff07ff80400000 x18: ffffffffffffffff
[   15.267861] x17: 656a626f2042554c x16: 53206d6f72662064 x15: 6574636574656420
[   15.274983] x14: 74706d6574746120 x13: 2129363930342065 x12: 7a6973202c302074
[   15.282105] x11: ffffc8b041d1b148 x10: 00000000ffff8000 x9 : ffffc8b04012812c
[   15.289228] x8 : 00000000ffff7fff x7 : ffffc8b041d1b148 x6 : 0000000000000000
[   15.296349] x5 : 0000000000000000 x4 : 0000000000007fff x3 : 0000000000000000
[   15.303471] x2 : 0000000000000000 x1 : ffff07ff8c064800 x0 : 000000000000006b
[   15.310593] Call trace:
[   15.313027]  usercopy_abort+0x9c/0xa0
[   15.316677]  __check_heap_object+0xd4/0xf0
[   15.320762]  __check_object_size.part.0+0x160/0x1e0
[   15.325628]  __check_object_size+0x2c/0x40
[   15.329711]  copy_oldmem_page+0x7c/0x140
[   15.333623]  read_from_oldmem.part.0+0xfc/0x1c0
[   15.338142]  __read_vmcore.constprop.0+0x23c/0x350
[   15.342920]  read_vmcore+0x28/0x34
[   15.346309]  proc_reg_read+0xb4/0xf0
[   15.349871]  vfs_read+0xb8/0x1f0
[   15.353088]  ksys_read+0x74/0x100
[   15.356390]  __arm64_sys_read+0x28/0x34
...

This bug introduced by commit b261dba2fdb2 ("arm64: kdump: Remove custom
linux,usable-memory-range handling"), which moves
memblock_cap_memory_range() to fdt, but it breaches the rules that
memblock_cap_memory_range() should come after memblock_add() etc as said
in commit e888fa7bb882 ("memblock: Check memory add/cap ordering").

As a consequence, the virtual address set up by copy_oldmem_page() does
not bail out from the test of virt_addr_valid() in check_heap_object(),
and finally hits the BUG_ON().

Since memblock allocator has no idea about when the memblock is fully
populated, while efi_init() is aware, so tackling this issue by calling the
interface early_init_dt_check_for_usable_mem_range() exposed by of/fdt.

Fixes: b261dba2fdb2 ("arm64: kdump: Remove custom linux,usable-memory-range handling")
Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Zhen Lei <thunder.leizhen@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Nick Terrell <terrelln@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
To: devicetree@vger.kernel.org
To: linux-efi@vger.kernel.org
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211215021348.8766-1-kernelfans@gmail.com
2021-12-22 09:28:26 -04:00

283 lines
7.5 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Extensible Firmware Interface
*
* Based on Extensible Firmware Interface Specification version 2.4
*
* Copyright (C) 2013 - 2015 Linaro Ltd.
*/
#define pr_fmt(fmt) "efi: " fmt
#include <linux/efi.h>
#include <linux/fwnode.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/mm_types.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_fdt.h>
#include <linux/platform_device.h>
#include <linux/screen_info.h>
#include <asm/efi.h>
static int __init is_memory(efi_memory_desc_t *md)
{
if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC))
return 1;
return 0;
}
/*
* Translate a EFI virtual address into a physical address: this is necessary,
* as some data members of the EFI system table are virtually remapped after
* SetVirtualAddressMap() has been called.
*/
static phys_addr_t __init efi_to_phys(unsigned long addr)
{
efi_memory_desc_t *md;
for_each_efi_memory_desc(md) {
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
if (md->virt_addr == 0)
/* no virtual mapping has been installed by the stub */
break;
if (md->virt_addr <= addr &&
(addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT))
return md->phys_addr + addr - md->virt_addr;
}
return addr;
}
static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR;
static __initdata unsigned long cpu_state_table = EFI_INVALID_TABLE_ADDR;
static const efi_config_table_type_t arch_tables[] __initconst = {
{LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table},
{LINUX_EFI_ARM_CPU_STATE_TABLE_GUID, &cpu_state_table},
{}
};
static void __init init_screen_info(void)
{
struct screen_info *si;
if (IS_ENABLED(CONFIG_ARM) &&
screen_info_table != EFI_INVALID_TABLE_ADDR) {
si = early_memremap_ro(screen_info_table, sizeof(*si));
if (!si) {
pr_err("Could not map screen_info config table\n");
return;
}
screen_info = *si;
early_memunmap(si, sizeof(*si));
/* dummycon on ARM needs non-zero values for columns/lines */
screen_info.orig_video_cols = 80;
screen_info.orig_video_lines = 25;
}
if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
memblock_is_map_memory(screen_info.lfb_base))
memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size);
}
static int __init uefi_init(u64 efi_system_table)
{
efi_config_table_t *config_tables;
efi_system_table_t *systab;
size_t table_size;
int retval;
systab = early_memremap_ro(efi_system_table, sizeof(efi_system_table_t));
if (systab == NULL) {
pr_warn("Unable to map EFI system table.\n");
return -ENOMEM;
}
set_bit(EFI_BOOT, &efi.flags);
if (IS_ENABLED(CONFIG_64BIT))
set_bit(EFI_64BIT, &efi.flags);
retval = efi_systab_check_header(&systab->hdr, 2);
if (retval)
goto out;
efi.runtime = systab->runtime;
efi.runtime_version = systab->hdr.revision;
efi_systab_report_header(&systab->hdr, efi_to_phys(systab->fw_vendor));
table_size = sizeof(efi_config_table_t) * systab->nr_tables;
config_tables = early_memremap_ro(efi_to_phys(systab->tables),
table_size);
if (config_tables == NULL) {
pr_warn("Unable to map EFI config table array.\n");
retval = -ENOMEM;
goto out;
}
retval = efi_config_parse_tables(config_tables, systab->nr_tables,
IS_ENABLED(CONFIG_ARM) ? arch_tables
: NULL);
early_memunmap(config_tables, table_size);
out:
early_memunmap(systab, sizeof(efi_system_table_t));
return retval;
}
/*
* Return true for regions that can be used as System RAM.
*/
static __init int is_usable_memory(efi_memory_desc_t *md)
{
switch (md->type) {
case EFI_LOADER_CODE:
case EFI_LOADER_DATA:
case EFI_ACPI_RECLAIM_MEMORY:
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
case EFI_PERSISTENT_MEMORY:
/*
* Special purpose memory is 'soft reserved', which means it
* is set aside initially, but can be hotplugged back in or
* be assigned to the dax driver after boot.
*/
if (efi_soft_reserve_enabled() &&
(md->attribute & EFI_MEMORY_SP))
return false;
/*
* According to the spec, these regions are no longer reserved
* after calling ExitBootServices(). However, we can only use
* them as System RAM if they can be mapped writeback cacheable.
*/
return (md->attribute & EFI_MEMORY_WB);
default:
break;
}
return false;
}
static __init void reserve_regions(void)
{
efi_memory_desc_t *md;
u64 paddr, npages, size;
if (efi_enabled(EFI_DBG))
pr_info("Processing EFI memory map:\n");
/*
* Discard memblocks discovered so far: if there are any at this
* point, they originate from memory nodes in the DT, and UEFI
* uses its own memory map instead.
*/
memblock_dump_all();
memblock_remove(0, PHYS_ADDR_MAX);
for_each_efi_memory_desc(md) {
paddr = md->phys_addr;
npages = md->num_pages;
if (efi_enabled(EFI_DBG)) {
char buf[64];
pr_info(" 0x%012llx-0x%012llx %s\n",
paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
efi_md_typeattr_format(buf, sizeof(buf), md));
}
memrange_efi_to_native(&paddr, &npages);
size = npages << PAGE_SHIFT;
if (is_memory(md)) {
early_init_dt_add_memory_arch(paddr, size);
if (!is_usable_memory(md))
memblock_mark_nomap(paddr, size);
/* keep ACPI reclaim memory intact for kexec etc. */
if (md->type == EFI_ACPI_RECLAIM_MEMORY)
memblock_reserve(paddr, size);
}
}
}
void __init efi_init(void)
{
struct efi_memory_map_data data;
u64 efi_system_table;
/* Grab UEFI information placed in FDT by stub */
efi_system_table = efi_get_fdt_params(&data);
if (!efi_system_table)
return;
if (efi_memmap_init_early(&data) < 0) {
/*
* If we are booting via UEFI, the UEFI memory map is the only
* description of memory we have, so there is little point in
* proceeding if we cannot access it.
*/
panic("Unable to map EFI memory map.\n");
}
WARN(efi.memmap.desc_version != 1,
"Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
efi.memmap.desc_version);
if (uefi_init(efi_system_table) < 0) {
efi_memmap_unmap();
return;
}
reserve_regions();
/*
* For memblock manipulation, the cap should come after the memblock_add().
* And now, memblock is fully populated, it is time to do capping.
*/
early_init_dt_check_for_usable_mem_range();
efi_esrt_init();
efi_mokvar_table_init();
memblock_reserve(data.phys_map & PAGE_MASK,
PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK)));
init_screen_info();
#ifdef CONFIG_ARM
/* ARM does not permit early mappings to persist across paging_init() */
efi_memmap_unmap();
if (cpu_state_table != EFI_INVALID_TABLE_ADDR) {
struct efi_arm_entry_state *state;
bool dump_state = true;
state = early_memremap_ro(cpu_state_table,
sizeof(struct efi_arm_entry_state));
if (state == NULL) {
pr_warn("Unable to map CPU entry state table.\n");
return;
}
if ((state->sctlr_before_ebs & 1) == 0)
pr_warn(FW_BUG "EFI stub was entered with MMU and Dcache disabled, please fix your firmware!\n");
else if ((state->sctlr_after_ebs & 1) == 0)
pr_warn(FW_BUG "ExitBootServices() returned with MMU and Dcache disabled, please fix your firmware!\n");
else
dump_state = false;
if (dump_state || efi_enabled(EFI_DBG)) {
pr_info("CPSR at EFI stub entry : 0x%08x\n", state->cpsr_before_ebs);
pr_info("SCTLR at EFI stub entry : 0x%08x\n", state->sctlr_before_ebs);
pr_info("CPSR after ExitBootServices() : 0x%08x\n", state->cpsr_after_ebs);
pr_info("SCTLR after ExitBootServices(): 0x%08x\n", state->sctlr_after_ebs);
}
early_memunmap(state, sizeof(struct efi_arm_entry_state));
}
#endif
}