s390: unify identity mapping limits handling

Currently we have to consider too many different values which
in the end only affect identity mapping size. These are:
1. max_physmem_end - end of physical memory online or standby.
   Always <= end of the last online memory block (get_mem_detect_end()).
2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
   kernel is able to support.
3. "mem=" kernel command line option which limits physical memory usage.
4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as
   crash kernel.
5. "hsa" size which is a memory limit when the kernel is executed during
   zfcp/nvme dump.

Through out kernel startup and run we juggle all those values at once
but that does not bring any amusement, only confusion and complexity.

Unify all those values to a single one we should really care, that is
our identity mapping size.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
This commit is contained in:
Vasily Gorbik 2020-10-19 11:01:33 +02:00 committed by Heiko Carstens
parent 1e632eaa0f
commit 73045a08cf
10 changed files with 109 additions and 80 deletions

View File

@ -2,20 +2,23 @@
#ifndef BOOT_BOOT_H #ifndef BOOT_BOOT_H
#define BOOT_BOOT_H #define BOOT_BOOT_H
#include <linux/types.h>
void startup_kernel(void); void startup_kernel(void);
void detect_memory(void); unsigned long detect_memory(void);
bool is_ipl_block_dump(void);
void store_ipl_parmblock(void); void store_ipl_parmblock(void);
void setup_boot_command_line(void); void setup_boot_command_line(void);
void parse_boot_command_line(void); void parse_boot_command_line(void);
void setup_memory_end(void);
void verify_facilities(void); void verify_facilities(void);
void print_missing_facilities(void); void print_missing_facilities(void);
void print_pgm_check_info(void); void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr); unsigned long get_random_base(unsigned long safe_addr);
extern int kaslr_enabled;
extern int vmalloc_size_set;
extern const char kernel_version[]; extern const char kernel_version[];
extern unsigned long memory_limit;
extern int vmalloc_size_set;
extern int kaslr_enabled;
unsigned long read_ipl_report(unsigned long safe_offset); unsigned long read_ipl_report(unsigned long safe_offset);

View File

@ -17,10 +17,9 @@ int __bootdata_preserved(ipl_block_valid);
unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE;
unsigned long __bootdata(memory_end);
int __bootdata(memory_end_set);
int __bootdata(noexec_disabled); int __bootdata(noexec_disabled);
unsigned long memory_limit;
int vmalloc_size_set; int vmalloc_size_set;
int kaslr_enabled; int kaslr_enabled;
@ -58,6 +57,17 @@ void store_ipl_parmblock(void)
ipl_block_valid = 1; ipl_block_valid = 1;
} }
bool is_ipl_block_dump(void)
{
if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
return true;
if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME &&
ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
return true;
return false;
}
static size_t scpdata_length(const u8 *buf, size_t count) static size_t scpdata_length(const u8 *buf, size_t count)
{ {
while (count) { while (count) {
@ -238,10 +248,8 @@ void parse_boot_command_line(void)
while (*args) { while (*args) {
args = next_arg(args, &param, &val); args = next_arg(args, &param, &val);
if (!strcmp(param, "mem") && val) { if (!strcmp(param, "mem") && val)
memory_end = round_down(memparse(val, NULL), PAGE_SIZE); memory_limit = round_down(memparse(val, NULL), PAGE_SIZE);
memory_end_set = 1;
}
if (!strcmp(param, "vmalloc") && val) { if (!strcmp(param, "vmalloc") && val) {
vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
@ -282,27 +290,3 @@ void parse_boot_command_line(void)
#endif #endif
} }
} }
static inline bool is_ipl_block_dump(void)
{
if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
return true;
if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME &&
ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
return true;
return false;
}
void setup_memory_end(void)
{
#ifdef CONFIG_CRASH_DUMP
if (OLDMEM_BASE) {
kaslr_enabled = 0;
} else if (ipl_block_valid && is_ipl_block_dump()) {
kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
memory_end_set = 1;
}
#endif
}

View File

@ -177,8 +177,7 @@ unsigned long get_random_base(unsigned long safe_addr)
unsigned long kasan_needs; unsigned long kasan_needs;
int i; int i;
if (memory_end_set) memory_limit = min(memory_limit, ident_map_size);
memory_limit = min(memory_limit, memory_end);
/* /*
* Avoid putting kernel in the end of physical memory * Avoid putting kernel in the end of physical memory

View File

@ -8,7 +8,6 @@
#include "compressed/decompressor.h" #include "compressed/decompressor.h"
#include "boot.h" #include "boot.h"
unsigned long __bootdata(max_physmem_end);
struct mem_detect_info __bootdata(mem_detect); struct mem_detect_info __bootdata(mem_detect);
/* up to 256 storage elements, 1020 subincrements each */ /* up to 256 storage elements, 1020 subincrements each */
@ -149,27 +148,29 @@ static void search_mem_end(void)
add_mem_detect_block(0, (offset + 1) << 20); add_mem_detect_block(0, (offset + 1) << 20);
} }
void detect_memory(void) unsigned long detect_memory(void)
{ {
unsigned long max_physmem_end;
sclp_early_get_memsize(&max_physmem_end); sclp_early_get_memsize(&max_physmem_end);
if (!sclp_early_read_storage_info()) { if (!sclp_early_read_storage_info()) {
mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO;
return; return max_physmem_end;
} }
if (!diag260()) { if (!diag260()) {
mem_detect.info_source = MEM_DETECT_DIAG260; mem_detect.info_source = MEM_DETECT_DIAG260;
return; return max_physmem_end;
} }
if (max_physmem_end) { if (max_physmem_end) {
add_mem_detect_block(0, max_physmem_end); add_mem_detect_block(0, max_physmem_end);
mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO;
return; return max_physmem_end;
} }
search_mem_end(); search_mem_end();
mem_detect.info_source = MEM_DETECT_BIN_SEARCH; mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
max_physmem_end = get_mem_detect_end(); return get_mem_detect_end();
} }

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/string.h> #include <linux/string.h>
#include <linux/elf.h> #include <linux/elf.h>
#include <asm/boot_data.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/cpu_mf.h> #include <asm/cpu_mf.h>
#include <asm/setup.h> #include <asm/setup.h>
@ -14,6 +15,7 @@
extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata(ident_map_size);
/* /*
* Some code and data needs to stay below 2 GB, even when the kernel would be * Some code and data needs to stay below 2 GB, even when the kernel would be
@ -127,6 +129,46 @@ static void handle_relocs(unsigned long offset)
} }
} }
/*
* Merge information from several sources into a single ident_map_size value.
* "ident_map_size" represents the upper limit of physical memory we may ever
* reach. It might not be all online memory, but also include standby (offline)
* memory. "ident_map_size" could be lower then actual standby or even online
* memory present, due to limiting factors. We should never go above this limit.
* It is the size of our identity mapping.
*
* Consider the following factors:
* 1. max_physmem_end - end of physical memory online or standby.
* Always <= end of the last online memory block (get_mem_detect_end()).
* 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
* kernel is able to support.
* 3. "mem=" kernel command line option which limits physical memory usage.
* 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as
* crash kernel.
* 5. "hsa" size which is a memory limit when the kernel is executed during
* zfcp/nvme dump.
*/
static void setup_ident_map_size(unsigned long max_physmem_end)
{
unsigned long hsa_size;
ident_map_size = max_physmem_end;
if (memory_limit)
ident_map_size = min(ident_map_size, memory_limit);
ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS);
#ifdef CONFIG_CRASH_DUMP
if (OLDMEM_BASE) {
kaslr_enabled = 0;
ident_map_size = min(ident_map_size, OLDMEM_SIZE);
} else if (ipl_block_valid && is_ipl_block_dump()) {
kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
ident_map_size = min(ident_map_size, hsa_size);
}
#endif
}
/* /*
* This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
*/ */
@ -145,8 +187,7 @@ static void setup_vmalloc_size(void)
if (vmalloc_size_set) if (vmalloc_size_set)
return; return;
size = (memory_end ?: max_physmem_end) >> 3; size = round_up(ident_map_size / 8, _SEGMENT_SIZE);
size = round_up(size, _SEGMENT_SIZE);
vmalloc_size = max(size, vmalloc_size); vmalloc_size = max(size, vmalloc_size);
} }
@ -165,8 +206,7 @@ void startup_kernel(void)
sclp_early_read_info(); sclp_early_read_info();
setup_boot_command_line(); setup_boot_command_line();
parse_boot_command_line(); parse_boot_command_line();
setup_memory_end(); setup_ident_map_size(detect_memory());
detect_memory();
setup_vmalloc_size(); setup_vmalloc_size();
random_lma = __kaslr_offset = 0; random_lma = __kaslr_offset = 0;

View File

@ -86,10 +86,8 @@ extern unsigned int zlib_dfltcc_support;
#define ZLIB_DFLTCC_FULL_DEBUG 4 #define ZLIB_DFLTCC_FULL_DEBUG 4
extern int noexec_disabled; extern int noexec_disabled;
extern int memory_end_set; extern unsigned long ident_map_size;
extern unsigned long memory_end;
extern unsigned long vmalloc_size; extern unsigned long vmalloc_size;
extern unsigned long max_physmem_end;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */ /* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask; extern unsigned long mio_wb_bit_mask;

View File

@ -94,10 +94,8 @@ char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0; unsigned long int_hwcap = 0;
int __bootdata(noexec_disabled); int __bootdata(noexec_disabled);
int __bootdata(memory_end_set); unsigned long __bootdata(ident_map_size);
unsigned long __bootdata(memory_end);
unsigned long __bootdata(vmalloc_size); unsigned long __bootdata(vmalloc_size);
unsigned long __bootdata(max_physmem_end);
struct mem_detect_info __bootdata(mem_detect); struct mem_detect_info __bootdata(mem_detect);
struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
@ -557,12 +555,12 @@ static void __init setup_resources(void)
#endif #endif
} }
static void __init setup_memory_end(void) static void __init setup_ident_map_size(void)
{ {
unsigned long vmax, tmp; unsigned long vmax, tmp;
/* Choose kernel address space layout: 3 or 4 levels. */ /* Choose kernel address space layout: 3 or 4 levels. */
tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; tmp = ident_map_size / PAGE_SIZE;
tmp = tmp * (sizeof(struct page) + PAGE_SIZE); tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
vmax = _REGION2_SIZE; /* 3-level kernel page table */ vmax = _REGION2_SIZE; /* 3-level kernel page table */
@ -589,22 +587,22 @@ static void __init setup_memory_end(void)
tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
vmemmap = (struct page *) tmp; vmemmap = (struct page *) tmp;
/* Take care that memory_end is set and <= vmemmap */ /* Take care that ident_map_size <= vmemmap */
memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); ident_map_size = min(ident_map_size, (unsigned long)vmemmap);
#ifdef CONFIG_KASAN #ifdef CONFIG_KASAN
memory_end = min(memory_end, KASAN_SHADOW_START); ident_map_size = min(ident_map_size, KASAN_SHADOW_START);
#endif #endif
vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
#ifdef CONFIG_KASAN #ifdef CONFIG_KASAN
/* move vmemmap above kasan shadow only if stands in a way */ /* move vmemmap above kasan shadow only if stands in a way */
if (KASAN_SHADOW_END > (unsigned long)vmemmap && if (KASAN_SHADOW_END > (unsigned long)vmemmap &&
(unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START) (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START)
vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
#endif #endif
max_pfn = max_low_pfn = PFN_DOWN(memory_end); max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
memblock_remove(memory_end, ULONG_MAX); memblock_remove(ident_map_size, ULONG_MAX);
pr_notice("The maximum memory size is %luMB\n", memory_end >> 20); pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
} }
#ifdef CONFIG_CRASH_DUMP #ifdef CONFIG_CRASH_DUMP
@ -634,12 +632,11 @@ static struct notifier_block kdump_mem_nb = {
#endif #endif
/* /*
* Make sure that the area behind memory_end is protected * Make sure that the area above identity mapping is protected
*/ */
static void __init reserve_memory_end(void) static void __init reserve_above_ident_map(void)
{ {
if (memory_end_set) memblock_reserve(ident_map_size, ULONG_MAX);
memblock_reserve(memory_end, ULONG_MAX);
} }
/* /*
@ -676,7 +673,7 @@ static void __init reserve_crashkernel(void)
phys_addr_t low, high; phys_addr_t low, high;
int rc; int rc;
rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
&crash_base); &crash_base);
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
@ -1130,7 +1127,7 @@ void __init setup_arch(char **cmdline_p)
setup_control_program_code(); setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */ /* Do some memory reservations *before* memory is added to memblock */
reserve_memory_end(); reserve_above_ident_map();
reserve_oldmem(); reserve_oldmem();
reserve_kernel(); reserve_kernel();
reserve_initrd(); reserve_initrd();
@ -1145,9 +1142,9 @@ void __init setup_arch(char **cmdline_p)
remove_oldmem(); remove_oldmem();
setup_uv(); setup_uv();
setup_memory_end(); setup_ident_map_size();
setup_memory(); setup_memory();
dma_contiguous_reserve(memory_end); dma_contiguous_reserve(ident_map_size);
vmcp_cma_reserve(); vmcp_cma_reserve();
check_initrd(); check_initrd();

View File

@ -255,7 +255,7 @@ static int pt_dump_init(void)
*/ */
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31); max_addr = 1UL << (max_addr * 11 + 31);
address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end; address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_NR].start_address = MODULES_VADDR;
address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[MODULES_END_NR].start_address = MODULES_END;
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;

View File

@ -289,12 +289,19 @@ void __init kasan_early_init(void)
memsize = get_mem_detect_end(); memsize = get_mem_detect_end();
if (!memsize) if (!memsize)
kasan_early_panic("cannot detect physical memory size\n"); kasan_early_panic("cannot detect physical memory size\n");
/* respect mem= cmdline parameter */ /*
if (memory_end_set && memsize > memory_end) * Kasan currently supports standby memory but only if it follows
memsize = memory_end; * online memory (default allocation), i.e. no memory holes.
if (IS_ENABLED(CONFIG_CRASH_DUMP) && OLDMEM_BASE) * - memsize represents end of online memory
memsize = min(memsize, OLDMEM_SIZE); * - ident_map_size represents online + standby and memory limits
memsize = min(memsize, KASAN_SHADOW_START); * accounted.
* Kasan maps "memsize" right away.
* [0, memsize] - as identity mapping
* [__sha(0), __sha(memsize)] - shadow memory for identity mapping
* The rest [memsize, ident_map_size] if memsize < ident_map_size
* could be mapped/unmapped dynamically later during memory hotplug.
*/
memsize = min(memsize, ident_map_size);
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
@ -377,7 +384,7 @@ void __init kasan_early_init(void)
POPULATE_SHALLOW); POPULATE_SHALLOW);
} }
/* populate kasan shadow for untracked memory */ /* populate kasan shadow for untracked memory */
kasan_early_pgtable_populate(__sha(max_physmem_end), __sha(untracked_mem_end), kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_mem_end),
POPULATE_ZERO_SHADOW); POPULATE_ZERO_SHADOW);
kasan_early_pgtable_populate(__sha(kasan_vmax), __sha(vmax_unlimited), kasan_early_pgtable_populate(__sha(kasan_vmax), __sha(vmax_unlimited),
POPULATE_ZERO_SHADOW); POPULATE_ZERO_SHADOW);

View File

@ -401,10 +401,10 @@ static void __init add_memory_merged(u16 rn)
goto skip_add; goto skip_add;
if (start + size > VMEM_MAX_PHYS) if (start + size > VMEM_MAX_PHYS)
size = VMEM_MAX_PHYS - start; size = VMEM_MAX_PHYS - start;
if (memory_end_set && (start >= memory_end)) if (start >= ident_map_size)
goto skip_add; goto skip_add;
if (memory_end_set && (start + size > memory_end)) if (start + size > ident_map_size)
size = memory_end - start; size = ident_map_size - start;
block_size = memory_block_size_bytes(); block_size = memory_block_size_bytes();
align_to_block_size(&start, &size, block_size); align_to_block_size(&start, &size, block_size);
if (!size) if (!size)