Merge branch 'acpi-mm'

* acpi-mm:
  ACPI: HMAT: use %u instead of %d to print u32 values
  ACPI: NUMA: HMAT: fix a section mismatch
  ACPI: HMAT: don't mix pxm and nid when setting memory target processor_pxm
  ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
  ACPI: NUMA: HMAT: Register HMAT at device_initcall level
  device-dax: Add a driver for "hmem" devices
  dax: Fix alloc_dax_region() compile warning
  lib: Uplevel the pmem "region" ida to a global allocator
  x86/efi: Add efi_fake_mem support for EFI_MEMORY_SP
  arm/efi: EFI soft reservation to memblock
  x86/efi: EFI soft reservation to E820 enumeration
  efi: Common enable/disable infrastructure for EFI soft reservation
  x86/efi: Push EFI_MEMMAP check into leaf routines
  efi: Enumerate EFI_MEMORY_SP
  ACPI: NUMA: Establish a new drivers/acpi/numa/ directory
This commit is contained in:
Rafael J. Wysocki 2019-11-26 10:31:02 +01:00
commit 782b59711e
45 changed files with 644 additions and 100 deletions

View File

@ -1168,7 +1168,8 @@
Format: {"off" | "on" | "skip[mbr]"} Format: {"off" | "on" | "skip[mbr]"}
efi= [EFI] efi= [EFI]
Format: { "old_map", "nochunk", "noruntime", "debug" } Format: { "old_map", "nochunk", "noruntime", "debug",
"nosoftreserve" }
old_map [X86-64]: switch to the old ioremap-based EFI old_map [X86-64]: switch to the old ioremap-based EFI
runtime services mapping. 32-bit still uses this one by runtime services mapping. 32-bit still uses this one by
default. default.
@ -1177,6 +1178,12 @@
firmware implementations. firmware implementations.
noruntime : disable EFI runtime services support noruntime : disable EFI runtime services support
debug: enable misc debug output debug: enable misc debug output
nosoftreserve: The EFI_MEMORY_SP (Specific Purpose)
attribute may cause the kernel to reserve the
memory range for a memory mapping driver to
claim. Specify efi=nosoftreserve to disable this
reservation and treat the memory by its base type
(i.e. EFI_CONVENTIONAL_MEMORY / "System RAM").
efi_no_storage_paranoia [EFI; X86] efi_no_storage_paranoia [EFI; X86]
Using this parameter you can use more than 50% of Using this parameter you can use more than 50% of
@ -1189,15 +1196,21 @@
updating original EFI memory map. updating original EFI memory map.
Region of memory which aa attribute is added to is Region of memory which aa attribute is added to is
from ss to ss+nn. from ss to ss+nn.
If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000 If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000
is specified, EFI_MEMORY_MORE_RELIABLE(0x10000) is specified, EFI_MEMORY_MORE_RELIABLE(0x10000)
attribute is added to range 0x100000000-0x180000000 and attribute is added to range 0x100000000-0x180000000 and
0x10a0000000-0x1120000000. 0x10a0000000-0x1120000000.
If efi_fake_mem=8G@9G:0x40000 is specified, the
EFI_MEMORY_SP(0x40000) attribute is added to
range 0x240000000-0x43fffffff.
Using this parameter you can do debugging of EFI memmap Using this parameter you can do debugging of EFI memmap
related feature. For example, you can do debugging of related features. For example, you can do debugging of
Address Range Mirroring feature even if your box Address Range Mirroring feature even if your box
doesn't support it. doesn't support it, or mark specific memory as
"soft reserved".
efivar_ssdt= [EFI; X86] Name of an EFI variable that contains an SSDT efivar_ssdt= [EFI; X86] Name of an EFI variable that contains an SSDT
that is to be dynamically loaded by Linux. If there are that is to be dynamically loaded by Linux. If there are

View File

@ -1061,6 +1061,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
memblock_clear_nomap(start, size);
return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
restrictions); restrictions);
} }

View File

@ -554,7 +554,11 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s
case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA: case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY: case EFI_CONVENTIONAL_MEMORY:
e820_type = E820_TYPE_RAM; if (efi_soft_reserve_enabled() &&
(d->attribute & EFI_MEMORY_SP))
e820_type = E820_TYPE_SOFT_RESERVED;
else
e820_type = E820_TYPE_RAM;
break; break;
case EFI_ACPI_MEMORY_NVS: case EFI_ACPI_MEMORY_NVS:

View File

@ -132,8 +132,14 @@ char *skip_spaces(const char *str)
#include "../../../../lib/ctype.c" #include "../../../../lib/ctype.c"
#include "../../../../lib/cmdline.c" #include "../../../../lib/cmdline.c"
enum parse_mode {
PARSE_MEMMAP,
PARSE_EFI,
};
static int static int
parse_memmap(char *p, unsigned long long *start, unsigned long long *size) parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
enum parse_mode mode)
{ {
char *oldp; char *oldp;
@ -156,8 +162,29 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
*start = memparse(p + 1, &p); *start = memparse(p + 1, &p);
return 0; return 0;
case '@': case '@':
/* memmap=nn@ss specifies usable region, should be skipped */ if (mode == PARSE_MEMMAP) {
*size = 0; /*
* memmap=nn@ss specifies usable region, should
* be skipped
*/
*size = 0;
} else {
unsigned long long flags;
/*
* efi_fake_mem=nn@ss:attr the attr specifies
* flags that might imply a soft-reservation.
*/
*start = memparse(p + 1, &p);
if (p && *p == ':') {
p++;
if (kstrtoull(p, 0, &flags) < 0)
*size = 0;
else if (flags & EFI_MEMORY_SP)
return 0;
}
*size = 0;
}
/* Fall through */ /* Fall through */
default: default:
/* /*
@ -172,7 +199,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
return -EINVAL; return -EINVAL;
} }
static void mem_avoid_memmap(char *str) static void mem_avoid_memmap(enum parse_mode mode, char *str)
{ {
static int i; static int i;
@ -187,7 +214,7 @@ static void mem_avoid_memmap(char *str)
if (k) if (k)
*k++ = 0; *k++ = 0;
rc = parse_memmap(str, &start, &size); rc = parse_memmap(str, &start, &size, mode);
if (rc < 0) if (rc < 0)
break; break;
str = k; str = k;
@ -238,7 +265,6 @@ static void parse_gb_huge_pages(char *param, char *val)
} }
} }
static void handle_mem_options(void) static void handle_mem_options(void)
{ {
char *args = (char *)get_cmd_line_ptr(); char *args = (char *)get_cmd_line_ptr();
@ -271,7 +297,7 @@ static void handle_mem_options(void)
} }
if (!strcmp(param, "memmap")) { if (!strcmp(param, "memmap")) {
mem_avoid_memmap(val); mem_avoid_memmap(PARSE_MEMMAP, val);
} else if (strstr(param, "hugepages")) { } else if (strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val); parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) { } else if (!strcmp(param, "mem")) {
@ -284,6 +310,8 @@ static void handle_mem_options(void)
goto out; goto out;
mem_limit = mem_size; mem_limit = mem_size;
} else if (!strcmp(param, "efi_fake_mem")) {
mem_avoid_memmap(PARSE_EFI, val);
} }
} }
@ -760,6 +788,10 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
if (md->type != EFI_CONVENTIONAL_MEMORY) if (md->type != EFI_CONVENTIONAL_MEMORY)
continue; continue;
if (efi_soft_reserve_enabled() &&
(md->attribute & EFI_MEMORY_SP))
continue;
if (efi_mirror_found && if (efi_mirror_found &&
!(md->attribute & EFI_MEMORY_MORE_RELIABLE)) !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
continue; continue;

View File

@ -28,6 +28,14 @@ enum e820_type {
*/ */
E820_TYPE_PRAM = 12, E820_TYPE_PRAM = 12,
/*
* Special-purpose memory is indicated to the system via the
* EFI_MEMORY_SP attribute. Define an e820 translation of this
* memory type for the purpose of reserving this range and
* marking it with the IORES_DESC_SOFT_RESERVED designation.
*/
E820_TYPE_SOFT_RESERVED = 0xefffffff,
/* /*
* Reserved RAM used by the kernel itself if * Reserved RAM used by the kernel itself if
* CONFIG_INTEL_TXT=y is enabled, memory of this type * CONFIG_INTEL_TXT=y is enabled, memory of this type

View File

@ -140,7 +140,6 @@ extern void efi_delete_dummy_variable(void);
extern void efi_switch_mm(struct mm_struct *mm); extern void efi_switch_mm(struct mm_struct *mm);
extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_recover_from_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void); extern void efi_free_boot_services(void);
extern void efi_reserve_boot_services(void);
struct efi_setup_data { struct efi_setup_data {
u64 fw_vendor; u64 fw_vendor;
@ -244,6 +243,8 @@ static inline bool efi_is_64bit(void)
extern bool efi_reboot_required(void); extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr); extern bool efi_is_table_address(unsigned long phys_addr);
extern void efi_find_mirror(void);
extern void efi_reserve_boot_services(void);
#else #else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
static inline bool efi_reboot_required(void) static inline bool efi_reboot_required(void)
@ -254,6 +255,20 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
{ {
return false; return false;
} }
static inline void efi_find_mirror(void)
{
}
static inline void efi_reserve_boot_services(void)
{
}
#endif /* CONFIG_EFI */ #endif /* CONFIG_EFI */
#ifdef CONFIG_EFI_FAKE_MEMMAP
extern void __init efi_fake_memmap_early(void);
#else
static inline void efi_fake_memmap_early(void)
{
}
#endif
#endif /* _ASM_X86_EFI_H */ #endif /* _ASM_X86_EFI_H */

View File

@ -190,6 +190,7 @@ static void __init e820_print_type(enum e820_type type)
case E820_TYPE_RAM: /* Fall through: */ case E820_TYPE_RAM: /* Fall through: */
case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break; case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break;
case E820_TYPE_RESERVED: pr_cont("reserved"); break; case E820_TYPE_RESERVED: pr_cont("reserved"); break;
case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break;
case E820_TYPE_ACPI: pr_cont("ACPI data"); break; case E820_TYPE_ACPI: pr_cont("ACPI data"); break;
case E820_TYPE_NVS: pr_cont("ACPI NVS"); break; case E820_TYPE_NVS: pr_cont("ACPI NVS"); break;
case E820_TYPE_UNUSABLE: pr_cont("unusable"); break; case E820_TYPE_UNUSABLE: pr_cont("unusable"); break;
@ -1037,6 +1038,7 @@ static const char *__init e820_type_to_string(struct e820_entry *entry)
case E820_TYPE_PRAM: return "Persistent Memory (legacy)"; case E820_TYPE_PRAM: return "Persistent Memory (legacy)";
case E820_TYPE_PMEM: return "Persistent Memory"; case E820_TYPE_PMEM: return "Persistent Memory";
case E820_TYPE_RESERVED: return "Reserved"; case E820_TYPE_RESERVED: return "Reserved";
case E820_TYPE_SOFT_RESERVED: return "Soft Reserved";
default: return "Unknown E820 type"; default: return "Unknown E820 type";
} }
} }
@ -1052,6 +1054,7 @@ static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry)
case E820_TYPE_PRAM: /* Fall-through: */ case E820_TYPE_PRAM: /* Fall-through: */
case E820_TYPE_PMEM: /* Fall-through: */ case E820_TYPE_PMEM: /* Fall-through: */
case E820_TYPE_RESERVED: /* Fall-through: */ case E820_TYPE_RESERVED: /* Fall-through: */
case E820_TYPE_SOFT_RESERVED: /* Fall-through: */
default: return IORESOURCE_MEM; default: return IORESOURCE_MEM;
} }
} }
@ -1064,6 +1067,7 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED;
case E820_TYPE_RESERVED_KERN: /* Fall-through: */ case E820_TYPE_RESERVED_KERN: /* Fall-through: */
case E820_TYPE_RAM: /* Fall-through: */ case E820_TYPE_RAM: /* Fall-through: */
case E820_TYPE_UNUSABLE: /* Fall-through: */ case E820_TYPE_UNUSABLE: /* Fall-through: */
@ -1078,11 +1082,12 @@ static bool __init do_mark_busy(enum e820_type type, struct resource *res)
return true; return true;
/* /*
* Treat persistent memory like device memory, i.e. reserve it * Treat persistent memory and other special memory ranges like
* for exclusive use of a driver * device memory, i.e. reserve it for exclusive use of a driver
*/ */
switch (type) { switch (type) {
case E820_TYPE_RESERVED: case E820_TYPE_RESERVED:
case E820_TYPE_SOFT_RESERVED:
case E820_TYPE_PRAM: case E820_TYPE_PRAM:
case E820_TYPE_PMEM: case E820_TYPE_PMEM:
return false; return false;
@ -1285,6 +1290,9 @@ void __init e820__memblock_setup(void)
if (end != (resource_size_t)end) if (end != (resource_size_t)end)
continue; continue;
if (entry->type == E820_TYPE_SOFT_RESERVED)
memblock_reserve(entry->addr, entry->size);
if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
continue; continue;

View File

@ -1122,17 +1122,15 @@ void __init setup_arch(char **cmdline_p)
reserve_bios_regions(); reserve_bios_regions();
if (efi_enabled(EFI_MEMMAP)) { efi_fake_memmap();
efi_fake_memmap(); efi_find_mirror();
efi_find_mirror(); efi_esrt_init();
efi_esrt_init();
/* /*
* The EFI specification says that boot service code won't be * The EFI specification says that boot service code won't be
* called after ExitBootServices(). This is, in fact, a lie. * called after ExitBootServices(). This is, in fact, a lie.
*/ */
efi_reserve_boot_services(); efi_reserve_boot_services();
}
/* preallocate 4k for mptable mpc */ /* preallocate 4k for mptable mpc */
e820__memblock_alloc_reserved_mpc_new(); e820__memblock_alloc_reserved_mpc_new();

View File

@ -128,6 +128,9 @@ void __init efi_find_mirror(void)
efi_memory_desc_t *md; efi_memory_desc_t *md;
u64 mirror_size = 0, total_size = 0; u64 mirror_size = 0, total_size = 0;
if (!efi_enabled(EFI_MEMMAP))
return;
for_each_efi_memory_desc(md) { for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr; unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@ -145,14 +148,18 @@ void __init efi_find_mirror(void)
/* /*
* Tell the kernel about the EFI memory map. This might include * Tell the kernel about the EFI memory map. This might include
* more than the max 128 entries that can fit in the e820 legacy * more than the max 128 entries that can fit in the passed in e820
* (zeropage) memory map. * legacy (zeropage) memory map, but the kernel's e820 table can hold
* E820_MAX_ENTRIES.
*/ */
static void __init do_add_efi_memmap(void) static void __init do_add_efi_memmap(void)
{ {
efi_memory_desc_t *md; efi_memory_desc_t *md;
if (!efi_enabled(EFI_MEMMAP))
return;
for_each_efi_memory_desc(md) { for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr; unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@ -164,7 +171,10 @@ static void __init do_add_efi_memmap(void)
case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA: case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY: case EFI_CONVENTIONAL_MEMORY:
if (md->attribute & EFI_MEMORY_WB) if (efi_soft_reserve_enabled()
&& (md->attribute & EFI_MEMORY_SP))
e820_type = E820_TYPE_SOFT_RESERVED;
else if (md->attribute & EFI_MEMORY_WB)
e820_type = E820_TYPE_RAM; e820_type = E820_TYPE_RAM;
else else
e820_type = E820_TYPE_RESERVED; e820_type = E820_TYPE_RESERVED;
@ -190,11 +200,36 @@ static void __init do_add_efi_memmap(void)
e820_type = E820_TYPE_RESERVED; e820_type = E820_TYPE_RESERVED;
break; break;
} }
e820__range_add(start, size, e820_type); e820__range_add(start, size, e820_type);
} }
e820__update_table(e820_table); e820__update_table(e820_table);
} }
/*
* Given add_efi_memmap defaults to 0 and there there is no alternative
* e820 mechanism for soft-reserved memory, import the full EFI memory
* map if soft reservations are present and enabled. Otherwise, the
* mechanism to disable the kernel's consideration of EFI_MEMORY_SP is
* the efi=nosoftreserve option.
*/
static bool do_efi_soft_reserve(void)
{
efi_memory_desc_t *md;
if (!efi_enabled(EFI_MEMMAP))
return false;
if (!efi_soft_reserve_enabled())
return false;
for_each_efi_memory_desc(md)
if (md->type == EFI_CONVENTIONAL_MEMORY &&
(md->attribute & EFI_MEMORY_SP))
return true;
return false;
}
int __init efi_memblock_x86_reserve_range(void) int __init efi_memblock_x86_reserve_range(void)
{ {
struct efi_info *e = &boot_params.efi_info; struct efi_info *e = &boot_params.efi_info;
@ -224,9 +259,11 @@ int __init efi_memblock_x86_reserve_range(void)
if (rv) if (rv)
return rv; return rv;
if (add_efi_memmap) if (add_efi_memmap || do_efi_soft_reserve())
do_add_efi_memmap(); do_add_efi_memmap();
efi_fake_memmap_early();
WARN(efi.memmap.desc_version != 1, WARN(efi.memmap.desc_version != 1,
"Unexpected EFI_MEMORY_DESCRIPTOR version %ld", "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
efi.memmap.desc_version); efi.memmap.desc_version);
@ -778,6 +815,15 @@ static bool should_map_region(efi_memory_desc_t *md)
if (IS_ENABLED(CONFIG_X86_32)) if (IS_ENABLED(CONFIG_X86_32))
return false; return false;
/*
* EFI specific purpose memory may be reserved by default
* depending on kernel config and boot options.
*/
if (md->type == EFI_CONVENTIONAL_MEMORY &&
efi_soft_reserve_enabled() &&
(md->attribute & EFI_MEMORY_SP))
return false;
/* /*
* Map all of RAM so that we can access arguments in the 1:1 * Map all of RAM so that we can access arguments in the 1:1
* mapping when making EFI runtime calls. * mapping when making EFI runtime calls.

View File

@ -320,6 +320,9 @@ void __init efi_reserve_boot_services(void)
{ {
efi_memory_desc_t *md; efi_memory_desc_t *md;
if (!efi_enabled(EFI_MEMMAP))
return;
for_each_efi_memory_desc(md) { for_each_efi_memory_desc(md) {
u64 start = md->phys_addr; u64 start = md->phys_addr;
u64 size = md->num_pages << EFI_PAGE_SHIFT; u64 size = md->num_pages << EFI_PAGE_SHIFT;

View File

@ -319,12 +319,6 @@ config ACPI_THERMAL
To compile this driver as a module, choose M here: To compile this driver as a module, choose M here:
the module will be called thermal. the module will be called thermal.
config ACPI_NUMA
bool "NUMA support"
depends on NUMA
depends on (X86 || IA64 || ARM64)
default y if IA64 || ARM64
config ACPI_CUSTOM_DSDT_FILE config ACPI_CUSTOM_DSDT_FILE
string "Custom DSDT Table file to include" string "Custom DSDT Table file to include"
default "" default ""
@ -473,8 +467,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
If you are unsure what to do, do not enable this option. If you are unsure what to do, do not enable this option.
source "drivers/acpi/nfit/Kconfig" source "drivers/acpi/nfit/Kconfig"
source "drivers/acpi/hmat/Kconfig" source "drivers/acpi/numa/Kconfig"
source "drivers/acpi/apei/Kconfig" source "drivers/acpi/apei/Kconfig"
source "drivers/acpi/dptf/Kconfig" source "drivers/acpi/dptf/Kconfig"

View File

@ -55,7 +55,6 @@ acpi-$(CONFIG_X86) += acpi_cmos_rtc.o
acpi-$(CONFIG_X86) += x86/apple.o acpi-$(CONFIG_X86) += x86/apple.o
acpi-$(CONFIG_X86) += x86/utils.o acpi-$(CONFIG_X86) += x86/utils.o
acpi-$(CONFIG_DEBUG_FS) += debugfs.o acpi-$(CONFIG_DEBUG_FS) += debugfs.o
acpi-$(CONFIG_ACPI_NUMA) += numa.o
acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
acpi-y += acpi_lpat.o acpi-y += acpi_lpat.o
acpi-$(CONFIG_ACPI_LPIT) += acpi_lpit.o acpi-$(CONFIG_ACPI_LPIT) += acpi_lpit.o
@ -80,7 +79,7 @@ obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-$(CONFIG_ACPI) += container.o obj-$(CONFIG_ACPI) += container.o
obj-$(CONFIG_ACPI_THERMAL) += thermal.o obj-$(CONFIG_ACPI_THERMAL) += thermal.o
obj-$(CONFIG_ACPI_NFIT) += nfit/ obj-$(CONFIG_ACPI_NFIT) += nfit/
obj-$(CONFIG_ACPI_HMAT) += hmat/ obj-$(CONFIG_ACPI_NUMA) += numa/
obj-$(CONFIG_ACPI) += acpi_memhotplug.o obj-$(CONFIG_ACPI) += acpi_memhotplug.o
obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
obj-$(CONFIG_ACPI_BATTERY) += battery.o obj-$(CONFIG_ACPI_BATTERY) += battery.o

View File

@ -1,2 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_ACPI_HMAT) := hmat.o

View File

@ -1,8 +1,15 @@
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
config ACPI_NUMA
bool "NUMA support"
depends on NUMA
depends on (X86 || IA64 || ARM64)
default y if IA64 || ARM64
config ACPI_HMAT config ACPI_HMAT
bool "ACPI Heterogeneous Memory Attribute Table Support" bool "ACPI Heterogeneous Memory Attribute Table Support"
depends on ACPI_NUMA depends on ACPI_NUMA
select HMEM_REPORTING select HMEM_REPORTING
select MEMREGION
help help
If set, this option has the kernel parse and report the If set, this option has the kernel parse and report the
platform's ACPI HMAT (Heterogeneous Memory Attributes Table), platform's ACPI HMAT (Heterogeneous Memory Attributes Table),

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_ACPI_HMAT) += hmat.o

View File

@ -8,12 +8,18 @@
* the applicable attributes with the node's interfaces. * the applicable attributes with the node's interfaces.
*/ */
#define pr_fmt(fmt) "acpi/hmat: " fmt
#define dev_fmt(fmt) "acpi/hmat: " fmt
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/mm.h>
#include <linux/platform_device.h>
#include <linux/list_sort.h> #include <linux/list_sort.h>
#include <linux/memregion.h>
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/node.h> #include <linux/node.h>
@ -49,6 +55,7 @@ struct memory_target {
struct list_head node; struct list_head node;
unsigned int memory_pxm; unsigned int memory_pxm;
unsigned int processor_pxm; unsigned int processor_pxm;
struct resource memregions;
struct node_hmem_attrs hmem_attrs; struct node_hmem_attrs hmem_attrs;
struct list_head caches; struct list_head caches;
struct node_cache_attrs cache_attrs; struct node_cache_attrs cache_attrs;
@ -104,22 +111,36 @@ static __init void alloc_memory_initiator(unsigned int cpu_pxm)
list_add_tail(&initiator->node, &initiators); list_add_tail(&initiator->node, &initiators);
} }
static __init void alloc_memory_target(unsigned int mem_pxm) static __init void alloc_memory_target(unsigned int mem_pxm,
resource_size_t start, resource_size_t len)
{ {
struct memory_target *target; struct memory_target *target;
target = find_mem_target(mem_pxm); target = find_mem_target(mem_pxm);
if (target) if (!target) {
return; target = kzalloc(sizeof(*target), GFP_KERNEL);
if (!target)
return;
target->memory_pxm = mem_pxm;
target->processor_pxm = PXM_INVAL;
target->memregions = (struct resource) {
.name = "ACPI mem",
.start = 0,
.end = -1,
.flags = IORESOURCE_MEM,
};
list_add_tail(&target->node, &targets);
INIT_LIST_HEAD(&target->caches);
}
target = kzalloc(sizeof(*target), GFP_KERNEL); /*
if (!target) * There are potentially multiple ranges per PXM, so record each
return; * in the per-target memregions resource tree.
*/
target->memory_pxm = mem_pxm; if (!__request_region(&target->memregions, start, len, "memory target",
target->processor_pxm = PXM_INVAL; IORESOURCE_MEM))
list_add_tail(&target->node, &targets); pr_warn("failed to reserve %#llx - %#llx in pxm: %d\n",
INIT_LIST_HEAD(&target->caches); start, start + len, mem_pxm);
} }
static __init const char *hmat_data_type(u8 type) static __init const char *hmat_data_type(u8 type)
@ -272,7 +293,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
u8 type, mem_hier; u8 type, mem_hier;
if (hmat_loc->header.length < sizeof(*hmat_loc)) { if (hmat_loc->header.length < sizeof(*hmat_loc)) {
pr_notice("HMAT: Unexpected locality header length: %d\n", pr_notice("HMAT: Unexpected locality header length: %u\n",
hmat_loc->header.length); hmat_loc->header.length);
return -EINVAL; return -EINVAL;
} }
@ -284,12 +305,12 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds + total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds +
sizeof(*inits) * ipds + sizeof(*targs) * tpds; sizeof(*inits) * ipds + sizeof(*targs) * tpds;
if (hmat_loc->header.length < total_size) { if (hmat_loc->header.length < total_size) {
pr_notice("HMAT: Unexpected locality header length:%d, minimum required:%d\n", pr_notice("HMAT: Unexpected locality header length:%u, minimum required:%u\n",
hmat_loc->header.length, total_size); hmat_loc->header.length, total_size);
return -EINVAL; return -EINVAL;
} }
pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%d Target Domains:%d Base:%lld\n", pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n",
hmat_loc->flags, hmat_data_type(type), ipds, tpds, hmat_loc->flags, hmat_data_type(type), ipds, tpds,
hmat_loc->entry_base_unit); hmat_loc->entry_base_unit);
@ -302,7 +323,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
value = hmat_normalize(entries[init * tpds + targ], value = hmat_normalize(entries[init * tpds + targ],
hmat_loc->entry_base_unit, hmat_loc->entry_base_unit,
type); type);
pr_info(" Initiator-Target[%d-%d]:%d%s\n", pr_info(" Initiator-Target[%u-%u]:%u%s\n",
inits[init], targs[targ], value, inits[init], targs[targ], value,
hmat_data_type_suffix(type)); hmat_data_type_suffix(type));
@ -329,13 +350,13 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header,
u32 attrs; u32 attrs;
if (cache->header.length < sizeof(*cache)) { if (cache->header.length < sizeof(*cache)) {
pr_notice("HMAT: Unexpected cache header length: %d\n", pr_notice("HMAT: Unexpected cache header length: %u\n",
cache->header.length); cache->header.length);
return -EINVAL; return -EINVAL;
} }
attrs = cache->cache_attributes; attrs = cache->cache_attributes;
pr_info("HMAT: Cache: Domain:%d Size:%llu Attrs:%08x SMBIOS Handles:%d\n", pr_info("HMAT: Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
cache->memory_PD, cache->cache_size, attrs, cache->memory_PD, cache->cache_size, attrs,
cache->number_of_SMBIOShandles); cache->number_of_SMBIOShandles);
@ -390,17 +411,17 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
struct memory_target *target = NULL; struct memory_target *target = NULL;
if (p->header.length != sizeof(*p)) { if (p->header.length != sizeof(*p)) {
pr_notice("HMAT: Unexpected address range header length: %d\n", pr_notice("HMAT: Unexpected address range header length: %u\n",
p->header.length); p->header.length);
return -EINVAL; return -EINVAL;
} }
if (hmat_revision == 1) if (hmat_revision == 1)
pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%d Memory Domain:%d\n", pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->reserved3, p->reserved4, p->flags, p->processor_PD, p->reserved3, p->reserved4, p->flags, p->processor_PD,
p->memory_PD); p->memory_PD);
else else
pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n", pr_info("HMAT: Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->flags, p->processor_PD, p->memory_PD); p->flags, p->processor_PD, p->memory_PD);
if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) { if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
@ -417,7 +438,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
pr_debug("HMAT: Invalid Processor Domain\n"); pr_debug("HMAT: Invalid Processor Domain\n");
return -EINVAL; return -EINVAL;
} }
target->processor_pxm = p_node; target->processor_pxm = p->processor_PD;
} }
return 0; return 0;
@ -452,7 +473,7 @@ static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header,
return -EINVAL; return -EINVAL;
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
return 0; return 0;
alloc_memory_target(ma->proximity_domain); alloc_memory_target(ma->proximity_domain, ma->base_address, ma->length);
return 0; return 0;
} }
@ -613,10 +634,91 @@ static void hmat_register_target_perf(struct memory_target *target)
node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0); node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
} }
static void hmat_register_target_device(struct memory_target *target,
struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
struct resource res = {
.start = r->start,
.end = r->end,
.flags = IORESOURCE_MEM,
};
struct platform_device *pdev;
struct memregion_info info;
int rc, id;
rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
if (rc != REGION_INTERSECTS)
return;
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
pr_err("memregion allocation failure for %pr\n", &res);
return;
}
pdev = platform_device_alloc("hmem", id);
if (!pdev) {
pr_err("hmem device allocation failure for %pr\n", &res);
goto out_pdev;
}
pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
info = (struct memregion_info) {
.target_node = acpi_map_pxm_to_node(target->memory_pxm),
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
pr_err("hmem memregion_info allocation failure for %pr\n", &res);
goto out_pdev;
}
rc = platform_device_add_resources(pdev, &res, 1);
if (rc < 0) {
pr_err("hmem resource allocation failure for %pr\n", &res);
goto out_resource;
}
rc = platform_device_add(pdev);
if (rc < 0) {
dev_err(&pdev->dev, "device add failed for %pr\n", &res);
goto out_resource;
}
return;
out_resource:
put_device(&pdev->dev);
out_pdev:
memregion_free(id);
}
static void hmat_register_target_devices(struct memory_target *target)
{
struct resource *res;
/*
* Do not bother creating devices if no driver is available to
* consume them.
*/
if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
return;
for (res = target->memregions.child; res; res = res->sibling)
hmat_register_target_device(target, res);
}
static void hmat_register_target(struct memory_target *target) static void hmat_register_target(struct memory_target *target)
{ {
int nid = pxm_to_node(target->memory_pxm); int nid = pxm_to_node(target->memory_pxm);
/*
* Devices may belong to either an offline or online
* node, so unconditionally add them.
*/
hmat_register_target_devices(target);
/* /*
* Skip offline nodes. This can happen when memory * Skip offline nodes. This can happen when memory
* marked EFI_MEMORY_SP, "specific purpose", is applied * marked EFI_MEMORY_SP, "specific purpose", is applied
@ -677,11 +779,21 @@ static __init void hmat_free_structures(void)
struct target_cache *tcache, *cnext; struct target_cache *tcache, *cnext;
list_for_each_entry_safe(target, tnext, &targets, node) { list_for_each_entry_safe(target, tnext, &targets, node) {
struct resource *res, *res_next;
list_for_each_entry_safe(tcache, cnext, &target->caches, node) { list_for_each_entry_safe(tcache, cnext, &target->caches, node) {
list_del(&tcache->node); list_del(&tcache->node);
kfree(tcache); kfree(tcache);
} }
list_del(&target->node); list_del(&target->node);
res = target->memregions.child;
while (res) {
res_next = res->sibling;
__release_region(&target->memregions, res->start,
resource_size(res));
res = res_next;
}
kfree(target); kfree(target);
} }
@ -748,4 +860,4 @@ out_put:
acpi_put_table(tbl); acpi_put_table(tbl);
return 0; return 0;
} }
subsys_initcall(hmat_init); device_initcall(hmat_init);

View File

@ -32,19 +32,36 @@ config DEV_DAX_PMEM
Say M if unsure Say M if unsure
config DEV_DAX_HMEM
tristate "HMEM DAX: direct access to 'specific purpose' memory"
depends on EFI_SOFT_RESERVE
default DEV_DAX
help
EFI 2.8 platforms, and others, may advertise 'specific purpose'
memory. For example, a high bandwidth memory pool. The
indication from platform firmware is meant to reserve the
memory from typical usage by default. This driver creates
device-dax instances for these memory ranges, and that also
enables the possibility to assign them to the DEV_DAX_KMEM
driver to override the reservation and add them to kernel
"System RAM" pool.
Say M if unsure.
config DEV_DAX_KMEM config DEV_DAX_KMEM
tristate "KMEM DAX: volatile-use of persistent memory" tristate "KMEM DAX: volatile-use of persistent memory"
default DEV_DAX default DEV_DAX
depends on DEV_DAX depends on DEV_DAX
depends on MEMORY_HOTPLUG # for add_memory() and friends depends on MEMORY_HOTPLUG # for add_memory() and friends
help help
Support access to persistent memory as if it were RAM. This Support access to persistent, or other performance
allows easier use of persistent memory by unmodified differentiated memory as if it were System RAM. This allows
applications. easier use of persistent memory by unmodified applications, or
adds core kernel memory services to heterogeneous memory types
(HMEM) marked "reserved" by platform firmware.
To use this feature, a DAX device must be unbound from the To use this feature, a DAX device must be unbound from the
device_dax driver (PMEM DAX) and bound to this kmem driver device_dax driver and bound to this kmem driver on each boot.
on each boot.
Say N if unsure. Say N if unsure.

View File

@ -2,9 +2,11 @@
obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
dax-y := super.o dax-y := super.o
dax-y += bus.o dax-y += bus.o
device_dax-y := device.o device_dax-y := device.o
dax_hmem-y := hmem.o
obj-y += pmem/ obj-y += pmem/

View File

@ -227,7 +227,7 @@ static void dax_region_unregister(void *region)
struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, int target_node, unsigned int align, struct resource *res, int target_node, unsigned int align,
unsigned long pfn_flags) unsigned long long pfn_flags)
{ {
struct dax_region *dax_region; struct dax_region *dax_region;

View File

@ -11,7 +11,7 @@ struct dax_region;
void dax_region_put(struct dax_region *dax_region); void dax_region_put(struct dax_region *dax_region);
struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, int target_node, unsigned int align, struct resource *res, int target_node, unsigned int align,
unsigned long flags); unsigned long long flags);
enum dev_dax_subsys { enum dev_dax_subsys {
DEV_DAX_BUS, DEV_DAX_BUS,

View File

@ -32,7 +32,7 @@ struct dax_region {
struct device *dev; struct device *dev;
unsigned int align; unsigned int align;
struct resource res; struct resource res;
unsigned long pfn_flags; unsigned long long pfn_flags;
}; };
/** /**

56
drivers/dax/hmem.c Normal file
View File

@ -0,0 +1,56 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/platform_device.h>
#include <linux/memregion.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include "bus.h"
static int dax_hmem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct dev_pagemap pgmap = { };
struct dax_region *dax_region;
struct memregion_info *mri;
struct dev_dax *dev_dax;
struct resource *res;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -ENOMEM;
mri = dev->platform_data;
memcpy(&pgmap.res, res, sizeof(*res));
dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node,
PMD_SIZE, PFN_DEV|PFN_MAP);
if (!dax_region)
return -ENOMEM;
dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap);
if (IS_ERR(dev_dax))
return PTR_ERR(dev_dax);
/* child dev_dax instances now own the lifetime of the dax_region */
dax_region_put(dax_region);
return 0;
}
static int dax_hmem_remove(struct platform_device *pdev)
{
/* devm handles teardown */
return 0;
}
static struct platform_driver dax_hmem_driver = {
.probe = dax_hmem_probe,
.remove = dax_hmem_remove,
.driver = {
.name = "hmem",
},
};
module_platform_driver(dax_hmem_driver);
MODULE_ALIAS("platform:hmem*");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");

View File

@ -75,6 +75,27 @@ config EFI_MAX_FAKE_MEM
Ranges can be set up to this value using comma-separated list. Ranges can be set up to this value using comma-separated list.
The default value is 8. The default value is 8.
config EFI_SOFT_RESERVE
bool "Reserve EFI Specific Purpose Memory"
depends on EFI && EFI_STUB && ACPI_HMAT
default ACPI_HMAT
help
On systems that have mixed performance classes of memory EFI
may indicate specific purpose memory with an attribute (See
EFI_MEMORY_SP in UEFI 2.8). A memory range tagged with this
attribute may have unique performance characteristics compared
to the system's general purpose "System RAM" pool. On the
expectation that such memory has application specific usage,
and its base EFI memory type is "conventional" answer Y to
arrange for the kernel to reserve it as a "Soft Reserved"
resource, and set aside for direct-access (device-dax) by
default. The memory range can later be optionally assigned to
the page allocator by system administrator policy via the
device-dax kmem facility. Say N to have the kernel treat this
memory as "System RAM" by default.
If unsure, say Y.
config EFI_PARAMS_FROM_FDT config EFI_PARAMS_FROM_FDT
bool bool
help help

View File

@ -20,13 +20,16 @@ obj-$(CONFIG_UEFI_CPER) += cper.o
obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o
obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o
obj-$(CONFIG_EFI_STUB) += libstub/ obj-$(CONFIG_EFI_STUB) += libstub/
obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_map.o
obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o
obj-$(CONFIG_EFI_TEST) += test/ obj-$(CONFIG_EFI_TEST) += test/
obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o
obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o
obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o
fake_map-y += fake_mem.o
fake_map-$(CONFIG_X86) += x86_fake_mem.o
arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o
obj-$(CONFIG_ARM) += $(arm-obj-y) obj-$(CONFIG_ARM) += $(arm-obj-y)
obj-$(CONFIG_ARM64) += $(arm-obj-y) obj-$(CONFIG_ARM64) += $(arm-obj-y)

View File

@ -163,6 +163,15 @@ static __init int is_usable_memory(efi_memory_desc_t *md)
case EFI_BOOT_SERVICES_DATA: case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY: case EFI_CONVENTIONAL_MEMORY:
case EFI_PERSISTENT_MEMORY: case EFI_PERSISTENT_MEMORY:
/*
* Special purpose memory is 'soft reserved', which means it
* is set aside initially, but can be hotplugged back in or
* be assigned to the dax driver after boot.
*/
if (efi_soft_reserve_enabled() &&
(md->attribute & EFI_MEMORY_SP))
return false;
/* /*
* According to the spec, these regions are no longer reserved * According to the spec, these regions are no longer reserved
* after calling ExitBootServices(). However, we can only use * after calling ExitBootServices(). However, we can only use

View File

@ -121,6 +121,30 @@ static int __init arm_enable_runtime_services(void)
return 0; return 0;
} }
if (efi_soft_reserve_enabled()) {
efi_memory_desc_t *md;
for_each_efi_memory_desc(md) {
int md_size = md->num_pages << EFI_PAGE_SHIFT;
struct resource *res;
if (!(md->attribute & EFI_MEMORY_SP))
continue;
res = kzalloc(sizeof(*res), GFP_KERNEL);
if (WARN_ON(!res))
break;
res->start = md->phys_addr;
res->end = md->phys_addr + md_size - 1;
res->name = "Soft Reserved";
res->flags = IORESOURCE_MEM;
res->desc = IORES_DESC_SOFT_RESERVED;
insert_resource(&iomem_resource, res);
}
}
if (efi_runtime_disabled()) { if (efi_runtime_disabled()) {
pr_info("EFI runtime services will be disabled.\n"); pr_info("EFI runtime services will be disabled.\n");
return 0; return 0;

View File

@ -81,6 +81,11 @@ bool efi_runtime_disabled(void)
return disable_runtime; return disable_runtime;
} }
bool __pure __efi_soft_reserve_enabled(void)
{
return !efi_enabled(EFI_MEM_NO_SOFT_RESERVE);
}
static int __init parse_efi_cmdline(char *str) static int __init parse_efi_cmdline(char *str)
{ {
if (!str) { if (!str) {
@ -94,6 +99,9 @@ static int __init parse_efi_cmdline(char *str)
if (parse_option_str(str, "noruntime")) if (parse_option_str(str, "noruntime"))
disable_runtime = true; disable_runtime = true;
if (parse_option_str(str, "nosoftreserve"))
set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
return 0; return 0;
} }
early_param("efi", parse_efi_cmdline); early_param("efi", parse_efi_cmdline);
@ -842,15 +850,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size,
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
EFI_MEMORY_NV | EFI_MEMORY_NV | EFI_MEMORY_SP |
EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
snprintf(pos, size, "|attr=0x%016llx]", snprintf(pos, size, "|attr=0x%016llx]",
(unsigned long long)attr); (unsigned long long)attr);
else else
snprintf(pos, size, snprintf(pos, size,
"|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
attr & EFI_MEMORY_RUNTIME ? "RUN" : "", attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
attr & EFI_MEMORY_SP ? "SP" : "",
attr & EFI_MEMORY_NV ? "NV" : "", attr & EFI_MEMORY_NV ? "NV" : "",
attr & EFI_MEMORY_XP ? "XP" : "", attr & EFI_MEMORY_XP ? "XP" : "",
attr & EFI_MEMORY_RP ? "RP" : "", attr & EFI_MEMORY_RP ? "RP" : "",

View File

@ -246,6 +246,9 @@ void __init efi_esrt_init(void)
int rc; int rc;
phys_addr_t end; phys_addr_t end;
if (!efi_enabled(EFI_MEMMAP))
return;
pr_debug("esrt-init: loading.\n"); pr_debug("esrt-init: loading.\n");
if (!esrt_table_exists()) if (!esrt_table_exists())
return; return;

View File

@ -17,12 +17,10 @@
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <asm/efi.h> #include "fake_mem.h"
#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM];
int nr_fake_mem;
static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM];
static int nr_fake_mem;
static int __init cmp_fake_mem(const void *x1, const void *x2) static int __init cmp_fake_mem(const void *x1, const void *x2)
{ {
@ -44,13 +42,13 @@ void __init efi_fake_memmap(void)
void *new_memmap; void *new_memmap;
int i; int i;
if (!nr_fake_mem) if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem)
return; return;
/* count up the number of EFI memory descriptor */ /* count up the number of EFI memory descriptor */
for (i = 0; i < nr_fake_mem; i++) { for (i = 0; i < nr_fake_mem; i++) {
for_each_efi_memory_desc(md) { for_each_efi_memory_desc(md) {
struct range *r = &fake_mems[i].range; struct range *r = &efi_fake_mems[i].range;
new_nr_map += efi_memmap_split_count(md, r); new_nr_map += efi_memmap_split_count(md, r);
} }
@ -70,7 +68,7 @@ void __init efi_fake_memmap(void)
} }
for (i = 0; i < nr_fake_mem; i++) for (i = 0; i < nr_fake_mem; i++)
efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]); efi_memmap_insert(&efi.memmap, new_memmap, &efi_fake_mems[i]);
/* swap into new EFI memmap */ /* swap into new EFI memmap */
early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map);
@ -104,22 +102,22 @@ static int __init setup_fake_mem(char *p)
if (nr_fake_mem >= EFI_MAX_FAKEMEM) if (nr_fake_mem >= EFI_MAX_FAKEMEM)
break; break;
fake_mems[nr_fake_mem].range.start = start; efi_fake_mems[nr_fake_mem].range.start = start;
fake_mems[nr_fake_mem].range.end = start + mem_size - 1; efi_fake_mems[nr_fake_mem].range.end = start + mem_size - 1;
fake_mems[nr_fake_mem].attribute = attribute; efi_fake_mems[nr_fake_mem].attribute = attribute;
nr_fake_mem++; nr_fake_mem++;
if (*p == ',') if (*p == ',')
p++; p++;
} }
sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range), sort(efi_fake_mems, nr_fake_mem, sizeof(struct efi_mem_range),
cmp_fake_mem, NULL); cmp_fake_mem, NULL);
for (i = 0; i < nr_fake_mem; i++) for (i = 0; i < nr_fake_mem; i++)
pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]", pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]",
fake_mems[i].attribute, fake_mems[i].range.start, efi_fake_mems[i].attribute, efi_fake_mems[i].range.start,
fake_mems[i].range.end); efi_fake_mems[i].range.end);
return *p == '\0' ? 0 : -EINVAL; return *p == '\0' ? 0 : -EINVAL;
} }

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __EFI_FAKE_MEM_H__
#define __EFI_FAKE_MEM_H__
#include <asm/efi.h>
#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
extern struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM];
extern int nr_fake_mem;
#endif /* __EFI_FAKE_MEM_H__ */

View File

@ -146,6 +146,11 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
continue; continue;
case EFI_CONVENTIONAL_MEMORY: case EFI_CONVENTIONAL_MEMORY:
/* Skip soft reserved conventional memory */
if (efi_soft_reserve_enabled() &&
(desc->attribute & EFI_MEMORY_SP))
continue;
/* /*
* Reserve the intersection between this entry and the * Reserve the intersection between this entry and the
* region. * region.

View File

@ -32,6 +32,7 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
static int __section(.data) __nokaslr; static int __section(.data) __nokaslr;
static int __section(.data) __quiet; static int __section(.data) __quiet;
static int __section(.data) __novamap; static int __section(.data) __novamap;
static bool __section(.data) efi_nosoftreserve;
int __pure nokaslr(void) int __pure nokaslr(void)
{ {
@ -45,6 +46,10 @@ int __pure novamap(void)
{ {
return __novamap; return __novamap;
} }
bool __pure __efi_soft_reserve_enabled(void)
{
return !efi_nosoftreserve;
}
#define EFI_MMAP_NR_SLACK_SLOTS 8 #define EFI_MMAP_NR_SLACK_SLOTS 8
@ -211,6 +216,10 @@ again:
if (desc->type != EFI_CONVENTIONAL_MEMORY) if (desc->type != EFI_CONVENTIONAL_MEMORY)
continue; continue;
if (efi_soft_reserve_enabled() &&
(desc->attribute & EFI_MEMORY_SP))
continue;
if (desc->num_pages < nr_pages) if (desc->num_pages < nr_pages)
continue; continue;
@ -305,6 +314,10 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
if (desc->type != EFI_CONVENTIONAL_MEMORY) if (desc->type != EFI_CONVENTIONAL_MEMORY)
continue; continue;
if (efi_soft_reserve_enabled() &&
(desc->attribute & EFI_MEMORY_SP))
continue;
if (desc->num_pages < nr_pages) if (desc->num_pages < nr_pages)
continue; continue;
@ -484,6 +497,12 @@ efi_status_t efi_parse_options(char const *cmdline)
__novamap = 1; __novamap = 1;
} }
if (IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) &&
!strncmp(str, "nosoftreserve", 7)) {
str += strlen("nosoftreserve");
efi_nosoftreserve = 1;
}
/* Group words together, delimited by "," */ /* Group words together, delimited by "," */
while (*str && *str != ' ' && *str != ',') while (*str && *str != ' ' && *str != ',')
str++; str++;

View File

@ -46,6 +46,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
if (md->type != EFI_CONVENTIONAL_MEMORY) if (md->type != EFI_CONVENTIONAL_MEMORY)
return 0; return 0;
if (efi_soft_reserve_enabled() &&
(md->attribute & EFI_MEMORY_SP))
return 0;
region_end = min((u64)ULONG_MAX, md->phys_addr + md->num_pages*EFI_PAGE_SIZE - 1); region_end = min((u64)ULONG_MAX, md->phys_addr + md->num_pages*EFI_PAGE_SIZE - 1);
first_slot = round_up(md->phys_addr, align); first_slot = round_up(md->phys_addr, align);

View File

@ -0,0 +1,69 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2019 Intel Corporation. All rights reserved. */
#include <linux/efi.h>
#include <asm/e820/api.h>
#include "fake_mem.h"
void __init efi_fake_memmap_early(void)
{
int i;
/*
* The late efi_fake_mem() call can handle all requests if
* EFI_MEMORY_SP support is disabled.
*/
if (!efi_soft_reserve_enabled())
return;
if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem)
return;
/*
* Given that efi_fake_memmap() needs to perform memblock
* allocations it needs to run after e820__memblock_setup().
* However, if efi_fake_mem specifies EFI_MEMORY_SP for a given
* address range that potentially needs to mark the memory as
* reserved prior to e820__memblock_setup(). Update e820
* directly if EFI_MEMORY_SP is specified for an
* EFI_CONVENTIONAL_MEMORY descriptor.
*/
for (i = 0; i < nr_fake_mem; i++) {
struct efi_mem_range *mem = &efi_fake_mems[i];
efi_memory_desc_t *md;
u64 m_start, m_end;
if ((mem->attribute & EFI_MEMORY_SP) == 0)
continue;
m_start = mem->range.start;
m_end = mem->range.end;
for_each_efi_memory_desc(md) {
u64 start, end;
if (md->type != EFI_CONVENTIONAL_MEMORY)
continue;
start = md->phys_addr;
end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
if (m_start <= end && m_end >= start)
/* fake range overlaps descriptor */;
else
continue;
/*
* Trim the boundary of the e820 update to the
* descriptor in case the fake range overlaps
* !EFI_CONVENTIONAL_MEMORY
*/
start = max(start, m_start);
end = min(end, m_end);
if (end <= start)
continue;
e820__range_update(start, end - start + 1, E820_TYPE_RAM,
E820_TYPE_SOFT_RESERVED);
e820__update_table(e820_table);
}
}
}

View File

@ -4,6 +4,7 @@ menuconfig LIBNVDIMM
depends on PHYS_ADDR_T_64BIT depends on PHYS_ADDR_T_64BIT
depends on HAS_IOMEM depends on HAS_IOMEM
depends on BLK_DEV depends on BLK_DEV
select MEMREGION
help help
Generic support for non-volatile memory devices including Generic support for non-volatile memory devices including
ACPI-6-NFIT defined resources. On platforms that define an ACPI-6-NFIT defined resources. On platforms that define an

View File

@ -455,7 +455,6 @@ static __exit void libnvdimm_exit(void)
nd_region_exit(); nd_region_exit();
nvdimm_exit(); nvdimm_exit();
nvdimm_bus_exit(); nvdimm_bus_exit();
nd_region_devs_exit();
nvdimm_devs_exit(); nvdimm_devs_exit();
} }

View File

@ -114,7 +114,6 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void); int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void); void nvdimm_bus_exit(void);
void nvdimm_devs_exit(void); void nvdimm_devs_exit(void);
void nd_region_devs_exit(void);
struct nd_region; struct nd_region;
void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev); void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev);
void nd_region_create_ns_seed(struct nd_region *nd_region); void nd_region_create_ns_seed(struct nd_region *nd_region);

View File

@ -3,6 +3,7 @@
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved. * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*/ */
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/memregion.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
@ -19,7 +20,6 @@
*/ */
#include <linux/io-64-nonatomic-hi-lo.h> #include <linux/io-64-nonatomic-hi-lo.h>
static DEFINE_IDA(region_ida);
static DEFINE_PER_CPU(int, flush_idx); static DEFINE_PER_CPU(int, flush_idx);
static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
@ -133,7 +133,7 @@ static void nd_region_release(struct device *dev)
put_device(&nvdimm->dev); put_device(&nvdimm->dev);
} }
free_percpu(nd_region->lane); free_percpu(nd_region->lane);
ida_simple_remove(&region_ida, nd_region->id); memregion_free(nd_region->id);
if (is_nd_blk(dev)) if (is_nd_blk(dev))
kfree(to_nd_blk_region(dev)); kfree(to_nd_blk_region(dev));
else else
@ -985,7 +985,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (!region_buf) if (!region_buf)
return NULL; return NULL;
nd_region->id = ida_simple_get(&region_ida, 0, 0, GFP_KERNEL); nd_region->id = memregion_alloc(GFP_KERNEL);
if (nd_region->id < 0) if (nd_region->id < 0)
goto err_id; goto err_id;
@ -1044,7 +1044,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
return nd_region; return nd_region;
err_percpu: err_percpu:
ida_simple_remove(&region_ida, nd_region->id); memregion_free(nd_region->id);
err_id: err_id:
kfree(region_buf); kfree(region_buf);
return NULL; return NULL;
@ -1216,8 +1216,3 @@ int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
} }
void __exit nd_region_devs_exit(void)
{
ida_destroy(&region_ida);
}

View File

@ -112,6 +112,7 @@ typedef struct {
#define EFI_MEMORY_MORE_RELIABLE \ #define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0000000000010000ULL) /* higher reliability */ ((u64)0x0000000000010000ULL) /* higher reliability */
#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ #define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */
#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */
#define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ #define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */
#define EFI_MEMORY_DESCRIPTOR_VERSION 1 #define EFI_MEMORY_DESCRIPTOR_VERSION 1
@ -1044,7 +1045,6 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos
extern efi_status_t efi_query_variable_store(u32 attributes, extern efi_status_t efi_query_variable_store(u32 attributes,
unsigned long size, unsigned long size,
bool nonblocking); bool nonblocking);
extern void efi_find_mirror(void);
#else #else
static inline efi_status_t efi_query_variable_store(u32 attributes, static inline efi_status_t efi_query_variable_store(u32 attributes,
@ -1202,6 +1202,7 @@ extern int __init efi_setup_pcdp_console(char *);
#define EFI_DBG 8 /* Print additional debug info at runtime */ #define EFI_DBG 8 /* Print additional debug info at runtime */
#define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ #define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */
#define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */
#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */
#ifdef CONFIG_EFI #ifdef CONFIG_EFI
/* /*
@ -1212,6 +1213,14 @@ static inline bool efi_enabled(int feature)
return test_bit(feature, &efi.flags) != 0; return test_bit(feature, &efi.flags) != 0;
} }
extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
bool __pure __efi_soft_reserve_enabled(void);
static inline bool __pure efi_soft_reserve_enabled(void)
{
return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE)
&& __efi_soft_reserve_enabled();
}
#else #else
static inline bool efi_enabled(int feature) static inline bool efi_enabled(int feature)
{ {
@ -1225,6 +1234,11 @@ efi_capsule_pending(int *reset_type)
{ {
return false; return false;
} }
static inline bool efi_soft_reserve_enabled(void)
{
return false;
}
#endif #endif
extern int efi_status_to_err(efi_status_t status); extern int efi_status_to_err(efi_status_t status);

View File

@ -134,6 +134,7 @@ enum {
IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5,
IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6,
IORES_DESC_RESERVED = 7, IORES_DESC_RESERVED = 7,
IORES_DESC_SOFT_RESERVED = 8,
}; };
/* /*

23
include/linux/memregion.h Normal file
View File

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MEMREGION_H_
#define _MEMREGION_H_
#include <linux/types.h>
#include <linux/errno.h>
struct memregion_info {
int target_node;
};
#ifdef CONFIG_MEMREGION
int memregion_alloc(gfp_t gfp);
void memregion_free(int id);
#else
static inline int memregion_alloc(gfp_t gfp)
{
return -ENOMEM;
}
void memregion_free(int id)
{
}
#endif
#endif /* _MEMREGION_H_ */

View File

@ -605,6 +605,9 @@ config ARCH_NO_SG_CHAIN
config ARCH_HAS_PMEM_API config ARCH_HAS_PMEM_API
bool bool
config MEMREGION
bool
# use memcpy to implement user copies for nommu architectures # use memcpy to implement user copies for nommu architectures
config UACCESS_MEMCPY config UACCESS_MEMCPY
bool bool

View File

@ -212,6 +212,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
obj-$(CONFIG_SG_SPLIT) += sg_split.o obj-$(CONFIG_SG_SPLIT) += sg_split.o
obj-$(CONFIG_SG_POOL) += sg_pool.o obj-$(CONFIG_SG_POOL) += sg_pool.o
obj-$(CONFIG_MEMREGION) += memregion.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
obj-$(CONFIG_IRQ_POLL) += irq_poll.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o

18
lib/memregion.c Normal file
View File

@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-only
/* identifiers for device / performance-differentiated memory regions */
#include <linux/idr.h>
#include <linux/types.h>
static DEFINE_IDA(memregion_ids);
int memregion_alloc(gfp_t gfp)
{
return ida_alloc(&memregion_ids, gfp);
}
EXPORT_SYMBOL(memregion_alloc);
void memregion_free(int id)
{
ida_free(&memregion_ids, id);
}
EXPORT_SYMBOL(memregion_free);