Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:
 "Misc updates:

   - fix e820 error handling

   - convert page table setup code from assembly to C

   - fix kexec environment bug

   - ... plus small cleanups"

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/kconfig: Remove misleading note regarding hibernation and KASLR
  x86/boot: Fix KASLR and memmap= collision
  x86/e820/32: Fix e820_search_gap() error handling on x86-32
  x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C
  x86/e820: Make e820_search_gap() static and remove unused variables
This commit is contained in:
Linus Torvalds 2017-02-20 14:04:37 -08:00
commit 292d386743
9 changed files with 262 additions and 137 deletions

View File

@ -1994,10 +1994,6 @@ config RANDOMIZE_BASE
theoretically possible, but the implementations are further
limited due to memory layouts.
If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
time. To enable it, boot with "kaslr" on the kernel command
line (which will also disable hibernation).
If unsure, say N.
# Relocation on x86 needs some additional build support

View File

@ -333,6 +333,7 @@ size_t strnlen(const char *s, size_t maxlen);
unsigned int atou(const char *s);
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
size_t strlen(const char *s);
char *strchr(const char *s, int c);
/* tty.c */
void puts(const char *);

View File

@ -11,6 +11,7 @@
*/
#include "misc.h"
#include "error.h"
#include "../boot.h"
#include <generated/compile.h>
#include <linux/module.h>
@ -52,15 +53,22 @@ static unsigned long get_boot_seed(void)
#include "../../lib/kaslr.c"
struct mem_vector {
unsigned long start;
unsigned long size;
unsigned long long start;
unsigned long long size;
};
/* Only supporting at most 4 unusable memmap regions with kaslr */
#define MAX_MEMMAP_REGIONS 4
static bool memmap_too_large;
enum mem_avoid_index {
MEM_AVOID_ZO_RANGE = 0,
MEM_AVOID_INITRD,
MEM_AVOID_CMDLINE,
MEM_AVOID_BOOTPARAMS,
MEM_AVOID_MEMMAP_BEGIN,
MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
MEM_AVOID_MAX,
};
@ -77,6 +85,123 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
return true;
}
/**
* _memparse - Parse a string with mem suffixes into a number
* @ptr: Where parse begins
* @retptr: (output) Optional pointer to next char after parse completes
*
* Parses a string into a number. The number stored at @ptr is
* potentially suffixed with K, M, G, T, P, E.
*/
static unsigned long long _memparse(const char *ptr, char **retptr)
{
char *endptr; /* Local pointer to end of parsed string */
unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
switch (*endptr) {
case 'E':
case 'e':
ret <<= 10;
case 'P':
case 'p':
ret <<= 10;
case 'T':
case 't':
ret <<= 10;
case 'G':
case 'g':
ret <<= 10;
case 'M':
case 'm':
ret <<= 10;
case 'K':
case 'k':
ret <<= 10;
endptr++;
default:
break;
}
if (retptr)
*retptr = endptr;
return ret;
}
static int
parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
{
char *oldp;
if (!p)
return -EINVAL;
/* We don't care about this option here */
if (!strncmp(p, "exactmap", 8))
return -EINVAL;
oldp = p;
*size = _memparse(p, &p);
if (p == oldp)
return -EINVAL;
switch (*p) {
case '@':
/* Skip this region, usable */
*start = 0;
*size = 0;
return 0;
case '#':
case '$':
case '!':
*start = _memparse(p + 1, &p);
return 0;
}
return -EINVAL;
}
static void mem_avoid_memmap(void)
{
char arg[128];
int rc;
int i;
char *str;
/* See if we have any memmap areas */
rc = cmdline_find_option("memmap", arg, sizeof(arg));
if (rc <= 0)
return;
i = 0;
str = arg;
while (str && (i < MAX_MEMMAP_REGIONS)) {
int rc;
unsigned long long start, size;
char *k = strchr(str, ',');
if (k)
*k++ = 0;
rc = parse_memmap(str, &start, &size);
if (rc < 0)
break;
str = k;
/* A usable region that should not be skipped */
if (size == 0)
continue;
mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
i++;
}
/* More than 4 memmaps, fail kaslr */
if ((i >= MAX_MEMMAP_REGIONS) && str)
memmap_too_large = true;
}
/*
* In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
* The mem_avoid array is used to store the ranges that need to be avoided
@ -197,6 +322,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* We don't need to set a mapping for setup_data. */
/* Mark the memmap regions we need to avoid */
mem_avoid_memmap();
#ifdef CONFIG_X86_VERBOSE_BOOTUP
/* Make sure video RAM can be used. */
add_identity_map(0, PMD_SIZE);
@ -379,6 +507,12 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
int i;
unsigned long addr;
/* Check if we had too many memmaps. */
if (memmap_too_large) {
debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n");
return 0;
}
/* Make sure minimum is aligned. */
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
@ -456,7 +590,7 @@ void choose_random_location(unsigned long input,
/* Walk e820 and find a random address. */
random_addr = find_random_phys_addr(min_addr, output_size);
if (!random_addr) {
warn("KASLR disabled: could not find suitable E820 region!");
warn("Physical KASLR disabled: no suitable memory region!");
} else {
/* Update the new physical address location. */
if (*output != random_addr) {

View File

@ -156,3 +156,16 @@ char *strstr(const char *s1, const char *s2)
}
return NULL;
}
/**
* strchr - Find the first occurrence of the character c in the string s.
* @s: the string to be searched
* @c: the character to search for
*/
char *strchr(const char *s, int c)
{
while (*s != (char)c)
if (*s++ == '\0')
return NULL;
return (char *)s;
}

View File

@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
int checktype);
extern void update_e820(void);
extern void e820_setup_gap(void);
extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr);
struct setup_data;
extern void parse_e820_ext(u64 phys_addr, u32 data_len);

View File

@ -27,6 +27,7 @@ struct vm_area_struct;
extern pgd_t swapper_pg_dir[1024];
extern pgd_t initial_page_table[1024];
extern pmd_t initial_pg_pmd[];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
@ -75,4 +76,35 @@ do { \
#define kern_addr_valid(kaddr) (0)
#endif
/*
* This is how much memory in addition to the memory covered up to
* and including _end we need mapped initially.
* We need:
* (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
* (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
*
* KERNEL_IMAGE_SIZE should be greater than pa(_end)
* and small than max_low_pfn, otherwise will waste some page table entries
*/
#if PTRS_PER_PMD > 1
#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
#else
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif
/*
* Number of possible pages in the lowmem region.
*
* We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
* gas warning about overflowing shift count when gas has been compiled
* with only a host target support using a 32-bit type for internal
* representation.
*/
#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
#endif /* _ASM_X86_PGTABLE_32_H */

View File

@ -580,24 +580,19 @@ static void __init update_e820_saved(void)
}
#define MAX_GAP_END 0x100000000ull
/*
* Search for a gap in the e820 memory space from start_addr to end_addr.
* Search for a gap in the e820 memory space from 0 to MAX_GAP_END.
*/
__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr)
static int __init e820_search_gap(unsigned long *gapstart,
unsigned long *gapsize)
{
unsigned long long last;
unsigned long long last = MAX_GAP_END;
int i = e820->nr_map;
int found = 0;
last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
while (--i >= 0) {
unsigned long long start = e820->map[i].addr;
unsigned long long end = start + e820->map[i].size;
if (end < start_addr)
continue;
/*
* Since "last" is at most 4GB, we know we'll
* fit in 32 bits if this condition is true
@ -628,18 +623,19 @@ __init void e820_setup_gap(void)
unsigned long gapstart, gapsize;
int found;
gapstart = 0x10000000;
gapsize = 0x400000;
found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
found = e820_search_gap(&gapstart, &gapsize);
#ifdef CONFIG_X86_64
if (!found) {
#ifdef CONFIG_X86_64
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
printk(KERN_ERR
"e820: cannot find a gap in the 32bit address range\n"
"e820: PCI devices with unassigned 32bit BARs may break!\n");
}
#else
gapstart = 0x10000000;
#endif
}
/*
* e820_reserve_resources_late protect stolen RAM already

View File

@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
start_kernel();
}
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond __brk_base. The variable
* _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end.
*
* In PAE mode initial_page_table is statically defined to contain
* enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD entries
* to the first kernel PMD. Note the upper half of each PMD or PTE are
* always zero at this stage.
*/
void __init mk_early_pgtbl_32(void)
{
#ifdef __pa
#undef __pa
#endif
#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
pte_t pte, *ptep;
int i;
unsigned long *ptr;
/* Enough space to fit pagetables for the low memory linear map */
const unsigned long limit = __pa(_end) +
(PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
#ifdef CONFIG_X86_PAE
pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
#define SET_PL2(pl2, val) { (pl2).pmd = (val); }
#else
pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
#define SET_PL2(pl2, val) { (pl2).pgd = (val); }
#endif
ptep = (pte_t *)__pa(__brk_base);
pte.pte = PTE_IDENT_ATTR;
while ((pte.pte & PTE_PFN_MASK) < limit) {
SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
*pl2p = pl2;
#ifndef CONFIG_X86_PAE
/* Kernel PDE entry */
*(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
#endif
for (i = 0; i < PTRS_PER_PTE; i++) {
*ptep = pte;
pte.pte += PAGE_SIZE;
ptep++;
}
pl2p++;
}
ptr = (unsigned long *)__pa(&max_pfn_mapped);
/* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
*ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
ptr = (unsigned long *)__pa(&_brk_end);
*ptr = (unsigned long)ptep + PAGE_OFFSET;
}

View File

@ -24,6 +24,7 @@
#include <asm/nops.h>
#include <asm/bootparam.h>
#include <asm/export.h>
#include <asm/pgtable_32.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
@ -41,43 +42,9 @@
#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
/*
* This is how much memory in addition to the memory covered up to
* and including _end we need mapped initially.
* We need:
* (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
* (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
*
* KERNEL_IMAGE_SIZE should be greater than pa(_end)
* and small than max_low_pfn, otherwise will waste some page table entries
*/
#if PTRS_PER_PMD > 1
#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
#else
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif
#define SIZEOF_PTREGS 17*4
/*
* Number of possible pages in the lowmem region.
*
* We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
* gas warning about overflowing shift count when gas has been compiled
* with only a host target support using a 32-bit type for internal
* representation.
*/
LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)
/* Enough space to fit pagetables for the low memory linear map */
MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
/*
* Worst-case size of the kernel mapping we need to make:
* a relocatable kernel can live anywhere in lowmem, so we need to be able
@ -160,90 +127,15 @@ ENTRY(startup_32)
call load_ucode_bsp
#endif
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond __brk_base. The variable
* _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end.
*/
/* Create early pagetables. */
call mk_early_pgtbl_32
/* Do early initialization of the fixmap area */
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
#ifdef CONFIG_X86_PAE
/*
* In PAE mode initial_page_table is statically defined to contain
* enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD entries
* to the first kernel PMD.
*
* Note the upper half of each PMD or PTE are always zero at this stage.
*/
#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(__brk_base), %edi
movl $pa(initial_pg_pmd), %edx
movl $PTE_IDENT_ATTR, %eax
10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
movl %ecx,(%edx) /* Store PMD entry */
/* Upper half already zero */
addl $8,%edx
movl $512,%ecx
11:
stosl
xchgl %eax,%ebx
stosl
xchgl %eax,%ebx
addl $0x1000,%eax
loop 11b
/*
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
1:
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
#else /* Not PAE */
page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(__brk_base), %edi
movl $pa(initial_page_table), %edx
movl $PTE_IDENT_ATTR, %eax
10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
addl $4,%edx
movl $1024, %ecx
11:
stosl
addl $0x1000,%eax
loop 11b
/*
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
#else
movl %eax,pa(initial_page_table+0xffc)
#endif
@ -666,6 +558,7 @@ ENTRY(setup_once_ref)
__PAGE_ALIGNED_BSS
.align PAGE_SIZE
#ifdef CONFIG_X86_PAE
.globl initial_pg_pmd
initial_pg_pmd:
.fill 1024*KPMDS,4,0
#else