Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 boot updates from Ingo Molnar: "The main changes: - add initial commits to randomize kernel memory section virtual addresses, enabled via a new kernel option: RANDOMIZE_MEMORY (Thomas Garnier, Kees Cook, Baoquan He, Yinghai Lu) - enhance KASLR (RANDOMIZE_BASE) physical memory randomization (Kees Cook) - EBDA/BIOS region boot quirk cleanups (Andy Lutomirski, Ingo Molnar) - misc cleanups/fixes" * 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/boot: Simplify EBDA-vs-BIOS reservation logic x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does x86/boot: Reorganize and clean up the BIOS area reservation code x86/mm: Do not reference phys addr beyond kernel x86/mm: Add memory hotplug support for KASLR memory randomization x86/mm: Enable KASLR for vmalloc memory regions x86/mm: Enable KASLR for physical mapping memory regions x86/mm: Implement ASLR for kernel memory regions x86/mm: Separate variable for trampoline PGD x86/mm: Add PUD VA support for physical mapping x86/mm: Update physical mapping variable names x86/mm: Refactor KASLR entropy functions x86/KASLR: Fix boot crash with certain memory configurations x86/boot/64: Add forgotten end of function marker x86/KASLR: Allow randomization below the load address x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G x86/KASLR: Randomize virtual address separately x86/KASLR: Clarify identity map interface x86/boot: Refuse to build with data relocations x86/KASLR, x86/power: Remove x86 hibernation restrictions
This commit is contained in:
commit
77cd3d0c43
@ -1803,12 +1803,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
js= [HW,JOY] Analog joystick
|
||||
See Documentation/input/joystick.txt.
|
||||
|
||||
kaslr/nokaslr [X86]
|
||||
Enable/disable kernel and module base offset ASLR
|
||||
(Address Space Layout Randomization) if built into
|
||||
the kernel. When CONFIG_HIBERNATION is selected,
|
||||
kASLR is disabled by default. When kASLR is enabled,
|
||||
hibernation will be disabled.
|
||||
nokaslr [KNL]
|
||||
When CONFIG_RANDOMIZE_BASE is set, this disables
|
||||
kernel and module base offset ASLR (Address Space
|
||||
Layout Randomization).
|
||||
|
||||
keepinitrd [HW,ARM]
|
||||
|
||||
|
@ -39,4 +39,8 @@ memory window (this size is arbitrary, it can be raised later if needed).
|
||||
The mappings are not part of any other kernel PGD and are only available
|
||||
during EFI runtime calls.
|
||||
|
||||
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
|
||||
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
|
||||
Their order is preserved but their base will be offset early at boot time.
|
||||
|
||||
-Andi Kleen, Jul 2004
|
||||
|
@ -1929,21 +1929,26 @@ config RANDOMIZE_BASE
|
||||
attempts relying on knowledge of the location of kernel
|
||||
code internals.
|
||||
|
||||
The kernel physical and virtual address can be randomized
|
||||
from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that
|
||||
using RANDOMIZE_BASE reduces the memory space available to
|
||||
kernel modules from 1.5GB to 1GB.)
|
||||
On 64-bit, the kernel physical and virtual addresses are
|
||||
randomized separately. The physical address will be anywhere
|
||||
between 16MB and the top of physical memory (up to 64TB). The
|
||||
virtual address will be randomized from 16MB up to 1GB (9 bits
|
||||
of entropy). Note that this also reduces the memory space
|
||||
available to kernel modules from 1.5GB to 1GB.
|
||||
|
||||
On 32-bit, the kernel physical and virtual addresses are
|
||||
randomized together. They will be randomized from 16MB up to
|
||||
512MB (8 bits of entropy).
|
||||
|
||||
Entropy is generated using the RDRAND instruction if it is
|
||||
supported. If RDTSC is supported, its value is mixed into
|
||||
the entropy pool as well. If neither RDRAND nor RDTSC are
|
||||
supported, then entropy is read from the i8254 timer.
|
||||
|
||||
Since the kernel is built using 2GB addressing, and
|
||||
PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of
|
||||
entropy is theoretically possible. Currently, with the
|
||||
default value for PHYSICAL_ALIGN and due to page table
|
||||
layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits.
|
||||
supported, then entropy is read from the i8254 timer. The
|
||||
usable entropy is limited by the kernel being built using
|
||||
2GB addressing, and that PHYSICAL_ALIGN must be at a
|
||||
minimum of 2MB. As a result, only 10 bits of entropy are
|
||||
theoretically possible, but the implementations are further
|
||||
limited due to memory layouts.
|
||||
|
||||
If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
|
||||
time. To enable it, boot with "kaslr" on the kernel command
|
||||
@ -1983,6 +1988,38 @@ config PHYSICAL_ALIGN
|
||||
|
||||
Don't change this unless you know what you are doing.
|
||||
|
||||
config RANDOMIZE_MEMORY
|
||||
bool "Randomize the kernel memory sections"
|
||||
depends on X86_64
|
||||
depends on RANDOMIZE_BASE
|
||||
default RANDOMIZE_BASE
|
||||
---help---
|
||||
Randomizes the base virtual address of kernel memory sections
|
||||
(physical memory mapping, vmalloc & vmemmap). This security feature
|
||||
makes exploits relying on predictable memory locations less reliable.
|
||||
|
||||
The order of allocations remains unchanged. Entropy is generated in
|
||||
the same way as RANDOMIZE_BASE. Current implementation in the optimal
|
||||
configuration have in average 30,000 different possible virtual
|
||||
addresses for each memory section.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config RANDOMIZE_MEMORY_PHYSICAL_PADDING
|
||||
hex "Physical memory mapping padding" if EXPERT
|
||||
depends on RANDOMIZE_MEMORY
|
||||
default "0xa" if MEMORY_HOTPLUG
|
||||
default "0x0"
|
||||
range 0x1 0x40 if MEMORY_HOTPLUG
|
||||
range 0x0 0x40
|
||||
---help---
|
||||
Define the padding in terabytes added to the existing physical
|
||||
memory size during kernel memory randomization. It is useful
|
||||
for memory hotplug support but reduces the entropy available for
|
||||
address randomization.
|
||||
|
||||
If unsure, leave at the default value.
|
||||
|
||||
config HOTPLUG_CPU
|
||||
bool "Support for hot-pluggable CPUs"
|
||||
depends on SMP
|
||||
|
@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
|
||||
$(objtree)/drivers/firmware/efi/libstub/lib.a
|
||||
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
|
||||
|
||||
# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
|
||||
# can place it anywhere in memory and it will still run. However, since
|
||||
# it is executed as-is without any ELF relocation processing performed
|
||||
# (and has already had all relocation sections stripped from the binary),
|
||||
# none of the code can use data relocations (e.g. static assignments of
|
||||
# pointer values), since they will be meaningless at runtime. This check
|
||||
# will refuse to link the vmlinux if any of these relocations are found.
|
||||
quiet_cmd_check_data_rel = DATAREL $@
|
||||
define cmd_check_data_rel
|
||||
for obj in $(filter %.o,$^); do \
|
||||
readelf -S $$obj | grep -qF .rel.local && { \
|
||||
echo "error: $$obj has data relocations!" >&2; \
|
||||
exit 1; \
|
||||
} || true; \
|
||||
done
|
||||
endef
|
||||
|
||||
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
|
||||
$(call if_changed,check_data_rel)
|
||||
$(call if_changed,ld)
|
||||
|
||||
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
|
||||
|
@ -12,10 +12,6 @@
|
||||
#include "misc.h"
|
||||
#include "error.h"
|
||||
|
||||
#include <asm/msr.h>
|
||||
#include <asm/archrandom.h>
|
||||
#include <asm/e820.h>
|
||||
|
||||
#include <generated/compile.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/uts.h>
|
||||
@ -26,26 +22,6 @@
|
||||
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
|
||||
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
|
||||
|
||||
#define I8254_PORT_CONTROL 0x43
|
||||
#define I8254_PORT_COUNTER0 0x40
|
||||
#define I8254_CMD_READBACK 0xC0
|
||||
#define I8254_SELECT_COUNTER0 0x02
|
||||
#define I8254_STATUS_NOTREADY 0x40
|
||||
static inline u16 i8254(void)
|
||||
{
|
||||
u16 status, timer;
|
||||
|
||||
do {
|
||||
outb(I8254_PORT_CONTROL,
|
||||
I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
|
||||
status = inb(I8254_PORT_COUNTER0);
|
||||
timer = inb(I8254_PORT_COUNTER0);
|
||||
timer |= inb(I8254_PORT_COUNTER0) << 8;
|
||||
} while (status & I8254_STATUS_NOTREADY);
|
||||
|
||||
return timer;
|
||||
}
|
||||
|
||||
static unsigned long rotate_xor(unsigned long hash, const void *area,
|
||||
size_t size)
|
||||
{
|
||||
@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area,
|
||||
}
|
||||
|
||||
/* Attempt to create a simple but unpredictable starting entropy. */
|
||||
static unsigned long get_random_boot(void)
|
||||
static unsigned long get_boot_seed(void)
|
||||
{
|
||||
unsigned long hash = 0;
|
||||
|
||||
@ -72,50 +48,8 @@ static unsigned long get_random_boot(void)
|
||||
return hash;
|
||||
}
|
||||
|
||||
static unsigned long get_random_long(const char *purpose)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
|
||||
#else
|
||||
const unsigned long mix_const = 0x3f39e593UL;
|
||||
#endif
|
||||
unsigned long raw, random = get_random_boot();
|
||||
bool use_i8254 = true;
|
||||
|
||||
debug_putstr(purpose);
|
||||
debug_putstr(" KASLR using");
|
||||
|
||||
if (has_cpuflag(X86_FEATURE_RDRAND)) {
|
||||
debug_putstr(" RDRAND");
|
||||
if (rdrand_long(&raw)) {
|
||||
random ^= raw;
|
||||
use_i8254 = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_cpuflag(X86_FEATURE_TSC)) {
|
||||
debug_putstr(" RDTSC");
|
||||
raw = rdtsc();
|
||||
|
||||
random ^= raw;
|
||||
use_i8254 = false;
|
||||
}
|
||||
|
||||
if (use_i8254) {
|
||||
debug_putstr(" i8254");
|
||||
random ^= i8254();
|
||||
}
|
||||
|
||||
/* Circular multiply for better bit diffusion */
|
||||
asm("mul %3"
|
||||
: "=a" (random), "=d" (raw)
|
||||
: "a" (random), "rm" (mix_const));
|
||||
random += raw;
|
||||
|
||||
debug_putstr("...\n");
|
||||
|
||||
return random;
|
||||
}
|
||||
#define KASLR_COMPRESSED_BOOT
|
||||
#include "../../lib/kaslr.c"
|
||||
|
||||
struct mem_vector {
|
||||
unsigned long start;
|
||||
@ -132,17 +66,6 @@ enum mem_avoid_index {
|
||||
|
||||
static struct mem_vector mem_avoid[MEM_AVOID_MAX];
|
||||
|
||||
static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
|
||||
{
|
||||
/* Item at least partially before region. */
|
||||
if (item->start < region->start)
|
||||
return false;
|
||||
/* Item at least partially after region. */
|
||||
if (item->start + item->size > region->start + region->size)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
|
||||
{
|
||||
/* Item one is entirely before item two. */
|
||||
@ -296,6 +219,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
|
||||
if (mem_overlaps(img, &mem_avoid[i]) &&
|
||||
mem_avoid[i].start < earliest) {
|
||||
*overlap = mem_avoid[i];
|
||||
earliest = overlap->start;
|
||||
is_overlapping = true;
|
||||
}
|
||||
}
|
||||
@ -310,6 +234,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
|
||||
|
||||
if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
|
||||
*overlap = avoid;
|
||||
earliest = overlap->start;
|
||||
is_overlapping = true;
|
||||
}
|
||||
|
||||
@ -319,8 +244,6 @@ static bool mem_avoid_overlap(struct mem_vector *img,
|
||||
return is_overlapping;
|
||||
}
|
||||
|
||||
static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];
|
||||
|
||||
struct slot_area {
|
||||
unsigned long addr;
|
||||
int num;
|
||||
@ -351,36 +274,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
|
||||
}
|
||||
}
|
||||
|
||||
static void slots_append(unsigned long addr)
|
||||
{
|
||||
/* Overflowing the slots list should be impossible. */
|
||||
if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
|
||||
return;
|
||||
|
||||
slots[slot_max++] = addr;
|
||||
}
|
||||
|
||||
static unsigned long slots_fetch_random(void)
|
||||
{
|
||||
unsigned long slot;
|
||||
int i;
|
||||
|
||||
/* Handle case of no slots stored. */
|
||||
if (slot_max == 0)
|
||||
return 0;
|
||||
|
||||
return slots[get_random_long("Physical") % slot_max];
|
||||
slot = kaslr_get_random_long("Physical") % slot_max;
|
||||
|
||||
for (i = 0; i < slot_area_index; i++) {
|
||||
if (slot >= slot_areas[i].num) {
|
||||
slot -= slot_areas[i].num;
|
||||
continue;
|
||||
}
|
||||
return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
|
||||
}
|
||||
|
||||
if (i == slot_area_index)
|
||||
debug_putstr("slots_fetch_random() failed!?\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void process_e820_entry(struct e820entry *entry,
|
||||
unsigned long minimum,
|
||||
unsigned long image_size)
|
||||
{
|
||||
struct mem_vector region, img, overlap;
|
||||
struct mem_vector region, overlap;
|
||||
struct slot_area slot_area;
|
||||
unsigned long start_orig;
|
||||
|
||||
/* Skip non-RAM entries. */
|
||||
if (entry->type != E820_RAM)
|
||||
return;
|
||||
|
||||
/* Ignore entries entirely above our maximum. */
|
||||
if (entry->addr >= KERNEL_IMAGE_SIZE)
|
||||
/* On 32-bit, ignore entries entirely above our maximum. */
|
||||
if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
|
||||
return;
|
||||
|
||||
/* Ignore entries entirely below our minimum. */
|
||||
@ -390,31 +321,55 @@ static void process_e820_entry(struct e820entry *entry,
|
||||
region.start = entry->addr;
|
||||
region.size = entry->size;
|
||||
|
||||
/* Potentially raise address to minimum location. */
|
||||
if (region.start < minimum)
|
||||
region.start = minimum;
|
||||
/* Give up if slot area array is full. */
|
||||
while (slot_area_index < MAX_SLOT_AREA) {
|
||||
start_orig = region.start;
|
||||
|
||||
/* Potentially raise address to meet alignment requirements. */
|
||||
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
|
||||
/* Potentially raise address to minimum location. */
|
||||
if (region.start < minimum)
|
||||
region.start = minimum;
|
||||
|
||||
/* Did we raise the address above the bounds of this e820 region? */
|
||||
if (region.start > entry->addr + entry->size)
|
||||
return;
|
||||
/* Potentially raise address to meet alignment needs. */
|
||||
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
|
||||
|
||||
/* Reduce size by any delta from the original address. */
|
||||
region.size -= region.start - entry->addr;
|
||||
/* Did we raise the address above this e820 region? */
|
||||
if (region.start > entry->addr + entry->size)
|
||||
return;
|
||||
|
||||
/* Reduce maximum size to fit end of image within maximum limit. */
|
||||
if (region.start + region.size > KERNEL_IMAGE_SIZE)
|
||||
region.size = KERNEL_IMAGE_SIZE - region.start;
|
||||
/* Reduce size by any delta from the original address. */
|
||||
region.size -= region.start - start_orig;
|
||||
|
||||
/* Walk each aligned slot and check for avoided areas. */
|
||||
for (img.start = region.start, img.size = image_size ;
|
||||
mem_contains(®ion, &img) ;
|
||||
img.start += CONFIG_PHYSICAL_ALIGN) {
|
||||
if (mem_avoid_overlap(&img, &overlap))
|
||||
continue;
|
||||
slots_append(img.start);
|
||||
/* On 32-bit, reduce region size to fit within max size. */
|
||||
if (IS_ENABLED(CONFIG_X86_32) &&
|
||||
region.start + region.size > KERNEL_IMAGE_SIZE)
|
||||
region.size = KERNEL_IMAGE_SIZE - region.start;
|
||||
|
||||
/* Return if region can't contain decompressed kernel */
|
||||
if (region.size < image_size)
|
||||
return;
|
||||
|
||||
/* If nothing overlaps, store the region and return. */
|
||||
if (!mem_avoid_overlap(®ion, &overlap)) {
|
||||
store_slot_info(®ion, image_size);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Store beginning of region if holds at least image_size. */
|
||||
if (overlap.start > region.start + image_size) {
|
||||
struct mem_vector beginning;
|
||||
|
||||
beginning.start = region.start;
|
||||
beginning.size = overlap.start - region.start;
|
||||
store_slot_info(&beginning, image_size);
|
||||
}
|
||||
|
||||
/* Return if overlap extends to or past end of region. */
|
||||
if (overlap.start + overlap.size >= region.start + region.size)
|
||||
return;
|
||||
|
||||
/* Clip off the overlapping region and start over. */
|
||||
region.size -= overlap.start - region.start + overlap.size;
|
||||
region.start = overlap.start + overlap.size;
|
||||
}
|
||||
}
|
||||
|
||||
@ -431,6 +386,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
|
||||
for (i = 0; i < boot_params->e820_entries; i++) {
|
||||
process_e820_entry(&boot_params->e820_map[i], minimum,
|
||||
image_size);
|
||||
if (slot_area_index == MAX_SLOT_AREA) {
|
||||
debug_putstr("Aborted e820 scan (slot_areas full)!\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return slots_fetch_random();
|
||||
@ -454,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
|
||||
slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
|
||||
CONFIG_PHYSICAL_ALIGN + 1;
|
||||
|
||||
random_addr = get_random_long("Virtual") % slots;
|
||||
random_addr = kaslr_get_random_long("Virtual") % slots;
|
||||
|
||||
return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
|
||||
}
|
||||
@ -463,48 +422,54 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
|
||||
* Since this function examines addresses much more numerically,
|
||||
* it takes the input and output pointers as 'unsigned long'.
|
||||
*/
|
||||
unsigned char *choose_random_location(unsigned long input,
|
||||
unsigned long input_size,
|
||||
unsigned long output,
|
||||
unsigned long output_size)
|
||||
void choose_random_location(unsigned long input,
|
||||
unsigned long input_size,
|
||||
unsigned long *output,
|
||||
unsigned long output_size,
|
||||
unsigned long *virt_addr)
|
||||
{
|
||||
unsigned long choice = output;
|
||||
unsigned long random_addr;
|
||||
unsigned long random_addr, min_addr;
|
||||
|
||||
/* By default, keep output position unchanged. */
|
||||
*virt_addr = *output;
|
||||
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
if (!cmdline_find_option_bool("kaslr")) {
|
||||
warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
|
||||
goto out;
|
||||
}
|
||||
#else
|
||||
if (cmdline_find_option_bool("nokaslr")) {
|
||||
warn("KASLR disabled: 'nokaslr' on cmdline.");
|
||||
goto out;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
boot_params->hdr.loadflags |= KASLR_FLAG;
|
||||
|
||||
/* Prepare to add new identity pagetables on demand. */
|
||||
initialize_identity_maps();
|
||||
|
||||
/* Record the various known unsafe memory ranges. */
|
||||
mem_avoid_init(input, input_size, output);
|
||||
mem_avoid_init(input, input_size, *output);
|
||||
|
||||
/*
|
||||
* Low end of the randomization range should be the
|
||||
* smaller of 512M or the initial kernel image
|
||||
* location:
|
||||
*/
|
||||
min_addr = min(*output, 512UL << 20);
|
||||
|
||||
/* Walk e820 and find a random address. */
|
||||
random_addr = find_random_phys_addr(output, output_size);
|
||||
random_addr = find_random_phys_addr(min_addr, output_size);
|
||||
if (!random_addr) {
|
||||
warn("KASLR disabled: could not find suitable E820 region!");
|
||||
goto out;
|
||||
} else {
|
||||
/* Update the new physical address location. */
|
||||
if (*output != random_addr) {
|
||||
add_identity_map(random_addr, output_size);
|
||||
*output = random_addr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Always enforce the minimum. */
|
||||
if (random_addr < choice)
|
||||
goto out;
|
||||
|
||||
choice = random_addr;
|
||||
|
||||
add_identity_map(choice, output_size);
|
||||
|
||||
/* This actually loads the identity pagetable on x86_64. */
|
||||
finalize_identity_maps();
|
||||
out:
|
||||
return (unsigned char *)choice;
|
||||
|
||||
/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
|
||||
if (IS_ENABLED(CONFIG_X86_64))
|
||||
random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
|
||||
*virt_addr = random_addr;
|
||||
}
|
||||
|
@ -170,7 +170,8 @@ void __puthex(unsigned long value)
|
||||
}
|
||||
|
||||
#if CONFIG_X86_NEED_RELOCS
|
||||
static void handle_relocations(void *output, unsigned long output_len)
|
||||
static void handle_relocations(void *output, unsigned long output_len,
|
||||
unsigned long virt_addr)
|
||||
{
|
||||
int *reloc;
|
||||
unsigned long delta, map, ptr;
|
||||
@ -182,11 +183,6 @@ static void handle_relocations(void *output, unsigned long output_len)
|
||||
* and where it was actually loaded.
|
||||
*/
|
||||
delta = min_addr - LOAD_PHYSICAL_ADDR;
|
||||
if (!delta) {
|
||||
debug_putstr("No relocation needed... ");
|
||||
return;
|
||||
}
|
||||
debug_putstr("Performing relocations... ");
|
||||
|
||||
/*
|
||||
* The kernel contains a table of relocation addresses. Those
|
||||
@ -197,6 +193,20 @@ static void handle_relocations(void *output, unsigned long output_len)
|
||||
*/
|
||||
map = delta - __START_KERNEL_map;
|
||||
|
||||
/*
|
||||
* 32-bit always performs relocations. 64-bit relocations are only
|
||||
* needed if KASLR has chosen a different starting address offset
|
||||
* from __START_KERNEL_map.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_64))
|
||||
delta = virt_addr - LOAD_PHYSICAL_ADDR;
|
||||
|
||||
if (!delta) {
|
||||
debug_putstr("No relocation needed... ");
|
||||
return;
|
||||
}
|
||||
debug_putstr("Performing relocations... ");
|
||||
|
||||
/*
|
||||
* Process relocations: 32 bit relocations first then 64 bit after.
|
||||
* Three sets of binary relocations are added to the end of the kernel
|
||||
@ -250,7 +260,8 @@ static void handle_relocations(void *output, unsigned long output_len)
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static inline void handle_relocations(void *output, unsigned long output_len)
|
||||
static inline void handle_relocations(void *output, unsigned long output_len,
|
||||
unsigned long virt_addr)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
@ -327,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
||||
unsigned long output_len)
|
||||
{
|
||||
const unsigned long kernel_total_size = VO__end - VO__text;
|
||||
unsigned char *output_orig = output;
|
||||
unsigned long virt_addr = (unsigned long)output;
|
||||
|
||||
/* Retain x86 boot parameters pointer passed from startup_32/64. */
|
||||
boot_params = rmode;
|
||||
@ -366,13 +377,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
||||
* the entire decompressed kernel plus relocation table, or the
|
||||
* entire decompressed kernel plus .bss and .brk sections.
|
||||
*/
|
||||
output = choose_random_location((unsigned long)input_data, input_len,
|
||||
(unsigned long)output,
|
||||
max(output_len, kernel_total_size));
|
||||
choose_random_location((unsigned long)input_data, input_len,
|
||||
(unsigned long *)&output,
|
||||
max(output_len, kernel_total_size),
|
||||
&virt_addr);
|
||||
|
||||
/* Validate memory location choices. */
|
||||
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
|
||||
error("Destination address inappropriately aligned");
|
||||
error("Destination physical address inappropriately aligned");
|
||||
if (virt_addr & (MIN_KERNEL_ALIGN - 1))
|
||||
error("Destination virtual address inappropriately aligned");
|
||||
#ifdef CONFIG_X86_64
|
||||
if (heap > 0x3fffffffffffUL)
|
||||
error("Destination address too large");
|
||||
@ -382,19 +396,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
||||
#endif
|
||||
#ifndef CONFIG_RELOCATABLE
|
||||
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
|
||||
error("Wrong destination address");
|
||||
error("Destination address does not match LOAD_PHYSICAL_ADDR");
|
||||
if ((unsigned long)output != virt_addr)
|
||||
error("Destination virtual address changed when not relocatable");
|
||||
#endif
|
||||
|
||||
debug_putstr("\nDecompressing Linux... ");
|
||||
__decompress(input_data, input_len, NULL, NULL, output, output_len,
|
||||
NULL, error);
|
||||
parse_elf(output);
|
||||
/*
|
||||
* 32-bit always performs relocations. 64-bit relocations are only
|
||||
* needed if kASLR has chosen a different load address.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
|
||||
handle_relocations(output, output_len);
|
||||
handle_relocations(output, output_len, virt_addr);
|
||||
debug_putstr("done.\nBooting the kernel.\n");
|
||||
return output;
|
||||
}
|
||||
|
@ -67,28 +67,33 @@ int cmdline_find_option_bool(const char *option);
|
||||
|
||||
#if CONFIG_RANDOMIZE_BASE
|
||||
/* kaslr.c */
|
||||
unsigned char *choose_random_location(unsigned long input_ptr,
|
||||
unsigned long input_size,
|
||||
unsigned long output_ptr,
|
||||
unsigned long output_size);
|
||||
void choose_random_location(unsigned long input,
|
||||
unsigned long input_size,
|
||||
unsigned long *output,
|
||||
unsigned long output_size,
|
||||
unsigned long *virt_addr);
|
||||
/* cpuflags.c */
|
||||
bool has_cpuflag(int flag);
|
||||
#else
|
||||
static inline
|
||||
unsigned char *choose_random_location(unsigned long input_ptr,
|
||||
unsigned long input_size,
|
||||
unsigned long output_ptr,
|
||||
unsigned long output_size)
|
||||
static inline void choose_random_location(unsigned long input,
|
||||
unsigned long input_size,
|
||||
unsigned long *output,
|
||||
unsigned long output_size,
|
||||
unsigned long *virt_addr)
|
||||
{
|
||||
return (unsigned char *)output_ptr;
|
||||
/* No change from existing output location. */
|
||||
*virt_addr = *output;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void initialize_identity_maps(void);
|
||||
void add_identity_map(unsigned long start, unsigned long size);
|
||||
void finalize_identity_maps(void);
|
||||
extern unsigned char _pgtable[];
|
||||
#else
|
||||
static inline void initialize_identity_maps(void)
|
||||
{ }
|
||||
static inline void add_identity_map(unsigned long start, unsigned long size)
|
||||
{ }
|
||||
static inline void finalize_identity_maps(void)
|
||||
|
@ -2,6 +2,9 @@
|
||||
* This code is used on x86_64 to create page table identity mappings on
|
||||
* demand by building up a new set of page tables (or appending to the
|
||||
* existing ones), and then switching over to them when ready.
|
||||
*
|
||||
* Copyright (C) 2015-2016 Yinghai Lu
|
||||
* Copyright (C) 2016 Kees Cook
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -17,6 +20,9 @@
|
||||
/* These actually do the work of building the kernel identity maps. */
|
||||
#include <asm/init.h>
|
||||
#include <asm/pgtable.h>
|
||||
/* Use the static base for this part of the boot process */
|
||||
#undef __PAGE_OFFSET
|
||||
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
|
||||
#include "../../mm/ident_map.c"
|
||||
|
||||
/* Used by pgtable.h asm code to force instruction serialization. */
|
||||
@ -59,9 +65,21 @@ static struct alloc_pgt_data pgt_data;
|
||||
/* The top level page table entry pointer. */
|
||||
static unsigned long level4p;
|
||||
|
||||
/*
|
||||
* Mapping information structure passed to kernel_ident_mapping_init().
|
||||
* Due to relocation, pointers must be assigned at run time not build time.
|
||||
*/
|
||||
static struct x86_mapping_info mapping_info = {
|
||||
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
|
||||
};
|
||||
|
||||
/* Locates and clears a region for a new top level page table. */
|
||||
static void prepare_level4(void)
|
||||
void initialize_identity_maps(void)
|
||||
{
|
||||
/* Init mapping_info with run-time function/buffer pointers. */
|
||||
mapping_info.alloc_pgt_page = alloc_pgt_page;
|
||||
mapping_info.context = &pgt_data;
|
||||
|
||||
/*
|
||||
* It should be impossible for this not to already be true,
|
||||
* but since calling this a second time would rewind the other
|
||||
@ -96,17 +114,8 @@ static void prepare_level4(void)
|
||||
*/
|
||||
void add_identity_map(unsigned long start, unsigned long size)
|
||||
{
|
||||
struct x86_mapping_info mapping_info = {
|
||||
.alloc_pgt_page = alloc_pgt_page,
|
||||
.context = &pgt_data,
|
||||
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
|
||||
};
|
||||
unsigned long end = start + size;
|
||||
|
||||
/* Make sure we have a top level page table ready to use. */
|
||||
if (!level4p)
|
||||
prepare_level4();
|
||||
|
||||
/* Align boundary to 2M. */
|
||||
start = round_down(start, PMD_SIZE);
|
||||
end = round_up(end, PMD_SIZE);
|
||||
|
@ -17,7 +17,7 @@ static inline unsigned int get_bios_ebda(void)
|
||||
return address; /* 0 means none */
|
||||
}
|
||||
|
||||
void reserve_ebda_region(void);
|
||||
void reserve_bios_regions(void);
|
||||
|
||||
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
|
||||
/*
|
||||
|
15
arch/x86/include/asm/kaslr.h
Normal file
15
arch/x86/include/asm/kaslr.h
Normal file
@ -0,0 +1,15 @@
|
||||
#ifndef _ASM_KASLR_H_
|
||||
#define _ASM_KASLR_H_
|
||||
|
||||
unsigned long kaslr_get_random_long(const char *purpose);
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
extern unsigned long page_offset_base;
|
||||
extern unsigned long vmalloc_base;
|
||||
|
||||
void kernel_randomize_memory(void);
|
||||
#else
|
||||
static inline void kernel_randomize_memory(void) { }
|
||||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
|
||||
#endif
|
@ -1,6 +1,10 @@
|
||||
#ifndef _ASM_X86_PAGE_64_DEFS_H
|
||||
#define _ASM_X86_PAGE_64_DEFS_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <asm/kaslr.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
#define KASAN_STACK_ORDER 1
|
||||
#else
|
||||
@ -32,7 +36,12 @@
|
||||
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
|
||||
* what Xen requires.
|
||||
*/
|
||||
#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
|
||||
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#define __PAGE_OFFSET page_offset_base
|
||||
#else
|
||||
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
|
||||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
|
||||
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
|
||||
|
||||
|
@ -736,6 +736,23 @@ extern int direct_gbpages;
|
||||
void init_mem_mapping(void);
|
||||
void early_alloc_pgt_buf(void);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Realmode trampoline initialization. */
|
||||
extern pgd_t trampoline_pgd_entry;
|
||||
static inline void __meminit init_trampoline_default(void)
|
||||
{
|
||||
/* Default trampoline pgd value */
|
||||
trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)];
|
||||
}
|
||||
# ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
void __meminit init_trampoline(void);
|
||||
# else
|
||||
# define init_trampoline init_trampoline_default
|
||||
# endif
|
||||
#else
|
||||
static inline void init_trampoline(void) { }
|
||||
#endif
|
||||
|
||||
/* local pte updates need not use xchg for locking */
|
||||
static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
|
||||
{
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/types.h>
|
||||
#include <asm/kaslr.h>
|
||||
|
||||
/*
|
||||
* These are used to make use of C type-checking..
|
||||
@ -53,10 +54,16 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
||||
|
||||
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
#define VMALLOC_START _AC(0xffffc90000000000, UL)
|
||||
#define VMALLOC_END _AC(0xffffe8ffffffffff, UL)
|
||||
#define VMEMMAP_START _AC(0xffffea0000000000, UL)
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
#define VMALLOC_SIZE_TB _AC(32, UL)
|
||||
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
|
||||
#define VMEMMAP_START _AC(0xffffea0000000000, UL)
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#define VMALLOC_START vmalloc_base
|
||||
#else
|
||||
#define VMALLOC_START __VMALLOC_BASE
|
||||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
|
||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||
#define MODULES_END _AC(0xffffffffff000000, UL)
|
||||
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
||||
|
@ -168,14 +168,15 @@ struct x86_legacy_devices {
|
||||
* struct x86_legacy_features - legacy x86 features
|
||||
*
|
||||
* @rtc: this device has a CMOS real-time clock present
|
||||
* @ebda_search: it's safe to search for the EBDA signature in the hardware's
|
||||
* low RAM
|
||||
* @reserve_bios_regions: boot code will search for the EBDA address and the
|
||||
* start of the 640k - 1M BIOS region. If false, the platform must
|
||||
* ensure that its memory map correctly reserves sub-1MB regions as needed.
|
||||
* @devices: legacy x86 devices, refer to struct x86_legacy_devices
|
||||
* documentation for further details.
|
||||
*/
|
||||
struct x86_legacy_features {
|
||||
int rtc;
|
||||
int ebda_search;
|
||||
int reserve_bios_regions;
|
||||
struct x86_legacy_devices devices;
|
||||
};
|
||||
|
||||
|
@ -6,66 +6,92 @@
|
||||
#include <asm/bios_ebda.h>
|
||||
|
||||
/*
|
||||
* This function reserves all conventional PC system BIOS related
|
||||
* firmware memory areas (some of which are data, some of which
|
||||
* are code), that must not be used by the kernel as available
|
||||
* RAM.
|
||||
*
|
||||
* The BIOS places the EBDA/XBDA at the top of conventional
|
||||
* memory, and usually decreases the reported amount of
|
||||
* conventional memory (int 0x12) too. This also contains a
|
||||
* workaround for Dell systems that neglect to reserve EBDA.
|
||||
* The same workaround also avoids a problem with the AMD768MPX
|
||||
* chipset: reserve a page before VGA to prevent PCI prefetch
|
||||
* into it (errata #56). Usually the page is reserved anyways,
|
||||
* unless you have no PS/2 mouse plugged in.
|
||||
* conventional memory (int 0x12) too.
|
||||
*
|
||||
* This functions is deliberately very conservative. Losing
|
||||
* memory in the bottom megabyte is rarely a problem, as long
|
||||
* as we have enough memory to install the trampoline. Using
|
||||
* memory that is in use by the BIOS or by some DMA device
|
||||
* the BIOS didn't shut down *is* a big problem.
|
||||
* This means that as a first approximation on most systems we can
|
||||
* guess the reserved BIOS area by looking at the low BIOS RAM size
|
||||
* value and assume that everything above that value (up to 1MB) is
|
||||
* reserved.
|
||||
*
|
||||
* But life in firmware country is not that simple:
|
||||
*
|
||||
* - This code also contains a quirk for Dell systems that neglect
|
||||
* to reserve the EBDA area in the 'RAM size' value ...
|
||||
*
|
||||
* - The same quirk also avoids a problem with the AMD768MPX
|
||||
* chipset: reserve a page before VGA to prevent PCI prefetch
|
||||
* into it (errata #56). (Usually the page is reserved anyways,
|
||||
* unless you have no PS/2 mouse plugged in.)
|
||||
*
|
||||
* - Plus paravirt systems don't have a reliable value in the
|
||||
* 'BIOS RAM size' pointer we can rely on, so we must quirk
|
||||
* them too.
|
||||
*
|
||||
* Due to those various problems this function is deliberately
|
||||
* very conservative and tries to err on the side of reserving
|
||||
* too much, to not risk reserving too little.
|
||||
*
|
||||
* Losing a small amount of memory in the bottom megabyte is
|
||||
* rarely a problem, as long as we have enough memory to install
|
||||
* the SMP bootup trampoline which *must* be in this area.
|
||||
*
|
||||
* Using memory that is in use by the BIOS or by some DMA device
|
||||
* the BIOS didn't shut down *is* a big problem to the kernel,
|
||||
* obviously.
|
||||
*/
|
||||
|
||||
#define BIOS_LOWMEM_KILOBYTES 0x413
|
||||
#define LOWMEM_CAP 0x9f000U /* Absolute maximum */
|
||||
#define INSANE_CUTOFF 0x20000U /* Less than this = insane */
|
||||
#define BIOS_RAM_SIZE_KB_PTR 0x413
|
||||
|
||||
void __init reserve_ebda_region(void)
|
||||
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
|
||||
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
|
||||
|
||||
void __init reserve_bios_regions(void)
|
||||
{
|
||||
unsigned int lowmem, ebda_addr;
|
||||
unsigned int bios_start, ebda_start;
|
||||
|
||||
/*
|
||||
* To determine the position of the EBDA and the
|
||||
* end of conventional memory, we need to look at
|
||||
* the BIOS data area. In a paravirtual environment
|
||||
* that area is absent. We'll just have to assume
|
||||
* that the paravirt case can handle memory setup
|
||||
* correctly, without our help.
|
||||
* NOTE: In a paravirtual environment the BIOS reserved
|
||||
* area is absent. We'll just have to assume that the
|
||||
* paravirt case can handle memory setup correctly,
|
||||
* without our help.
|
||||
*/
|
||||
if (!x86_platform.legacy.ebda_search)
|
||||
if (!x86_platform.legacy.reserve_bios_regions)
|
||||
return;
|
||||
|
||||
/* end of low (conventional) memory */
|
||||
lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
|
||||
lowmem <<= 10;
|
||||
|
||||
/* start of EBDA area */
|
||||
ebda_addr = get_bios_ebda();
|
||||
/*
|
||||
* BIOS RAM size is encoded in kilobytes, convert it
|
||||
* to bytes to get a first guess at where the BIOS
|
||||
* firmware area starts:
|
||||
*/
|
||||
bios_start = *(unsigned short *)__va(BIOS_RAM_SIZE_KB_PTR);
|
||||
bios_start <<= 10;
|
||||
|
||||
/*
|
||||
* Note: some old Dells seem to need 4k EBDA without
|
||||
* reporting so, so just consider the memory above 0x9f000
|
||||
* to be off limits (bugzilla 2990).
|
||||
* If bios_start is less than 128K, assume it is bogus
|
||||
* and bump it up to 640K. Similarly, if bios_start is above 640K,
|
||||
* don't trust it.
|
||||
*/
|
||||
if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
|
||||
bios_start = BIOS_START_MAX;
|
||||
|
||||
/* If the EBDA address is below 128K, assume it is bogus */
|
||||
if (ebda_addr < INSANE_CUTOFF)
|
||||
ebda_addr = LOWMEM_CAP;
|
||||
/* Get the start address of the EBDA page: */
|
||||
ebda_start = get_bios_ebda();
|
||||
|
||||
/* If lowmem is less than 128K, assume it is bogus */
|
||||
if (lowmem < INSANE_CUTOFF)
|
||||
lowmem = LOWMEM_CAP;
|
||||
/*
|
||||
* If the EBDA start address is sane and is below the BIOS region,
|
||||
* then also reserve everything from the EBDA start address up to
|
||||
* the BIOS region.
|
||||
*/
|
||||
if (ebda_start >= BIOS_START_MIN && ebda_start < bios_start)
|
||||
bios_start = ebda_start;
|
||||
|
||||
/* Use the lower of the lowmem and EBDA markers as the cutoff */
|
||||
lowmem = min(lowmem, ebda_addr);
|
||||
lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */
|
||||
|
||||
/* reserve all memory between lowmem and the 1MB mark */
|
||||
memblock_reserve(lowmem, 0x100000 - lowmem);
|
||||
/* Reserve all memory between bios_start and the 1MB mark: */
|
||||
memblock_reserve(bios_start, 0x100000 - bios_start);
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ static void __init i386_default_early_setup(void)
|
||||
x86_init.resources.reserve_resources = i386_reserve_resources;
|
||||
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
|
||||
|
||||
reserve_ebda_region();
|
||||
reserve_bios_regions();
|
||||
}
|
||||
|
||||
asmlinkage __visible void __init i386_start_kernel(void)
|
||||
|
@ -183,7 +183,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
|
||||
copy_bootdata(__va(real_mode_data));
|
||||
|
||||
x86_early_init_platform_quirks();
|
||||
reserve_ebda_region();
|
||||
reserve_bios_regions();
|
||||
|
||||
switch (boot_params.hdr.hardware_subarch) {
|
||||
case X86_SUBARCH_INTEL_MID:
|
||||
|
@ -38,7 +38,7 @@
|
||||
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
|
||||
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
|
||||
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
@ -299,6 +299,7 @@ ENTRY(secondary_startup_64)
|
||||
pushq $__KERNEL_CS # set correct cs
|
||||
pushq %rax # target address in negative space
|
||||
lretq
|
||||
ENDPROC(secondary_startup_64)
|
||||
|
||||
#include "verify_cpu.S"
|
||||
|
||||
|
@ -7,12 +7,12 @@
|
||||
void __init x86_early_init_platform_quirks(void)
|
||||
{
|
||||
x86_platform.legacy.rtc = 1;
|
||||
x86_platform.legacy.ebda_search = 0;
|
||||
x86_platform.legacy.reserve_bios_regions = 0;
|
||||
x86_platform.legacy.devices.pnpbios = 1;
|
||||
|
||||
switch (boot_params.hdr.hardware_subarch) {
|
||||
case X86_SUBARCH_PC:
|
||||
x86_platform.legacy.ebda_search = 1;
|
||||
x86_platform.legacy.reserve_bios_regions = 1;
|
||||
break;
|
||||
case X86_SUBARCH_XEN:
|
||||
case X86_SUBARCH_LGUEST:
|
||||
|
@ -113,6 +113,7 @@
|
||||
#include <asm/prom.h>
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/kaslr.h>
|
||||
|
||||
/*
|
||||
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
|
||||
@ -942,6 +943,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
x86_init.oem.arch_setup();
|
||||
|
||||
kernel_randomize_memory();
|
||||
|
||||
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
|
||||
setup_memory_map();
|
||||
parse_setup_data();
|
||||
|
@ -24,6 +24,7 @@ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
|
||||
lib-y += memcpy_$(BITS).o
|
||||
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
|
||||
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
|
||||
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
||||
|
||||
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
|
||||
|
||||
|
90
arch/x86/lib/kaslr.c
Normal file
90
arch/x86/lib/kaslr.c
Normal file
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Entropy functions used on early boot for KASLR base and memory
|
||||
* randomization. The base randomization is done in the compressed
|
||||
* kernel and memory randomization is done early when the regular
|
||||
* kernel starts. This file is included in the compressed kernel and
|
||||
* normally linked in the regular.
|
||||
*/
|
||||
#include <asm/kaslr.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/archrandom.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
/*
|
||||
* When built for the regular kernel, several functions need to be stubbed out
|
||||
* or changed to their regular kernel equivalent.
|
||||
*/
|
||||
#ifndef KASLR_COMPRESSED_BOOT
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#define debug_putstr(v) early_printk(v)
|
||||
#define has_cpuflag(f) boot_cpu_has(f)
|
||||
#define get_boot_seed() kaslr_offset()
|
||||
#endif
|
||||
|
||||
#define I8254_PORT_CONTROL 0x43
|
||||
#define I8254_PORT_COUNTER0 0x40
|
||||
#define I8254_CMD_READBACK 0xC0
|
||||
#define I8254_SELECT_COUNTER0 0x02
|
||||
#define I8254_STATUS_NOTREADY 0x40
|
||||
static inline u16 i8254(void)
|
||||
{
|
||||
u16 status, timer;
|
||||
|
||||
do {
|
||||
outb(I8254_PORT_CONTROL,
|
||||
I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
|
||||
status = inb(I8254_PORT_COUNTER0);
|
||||
timer = inb(I8254_PORT_COUNTER0);
|
||||
timer |= inb(I8254_PORT_COUNTER0) << 8;
|
||||
} while (status & I8254_STATUS_NOTREADY);
|
||||
|
||||
return timer;
|
||||
}
|
||||
|
||||
unsigned long kaslr_get_random_long(const char *purpose)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
|
||||
#else
|
||||
const unsigned long mix_const = 0x3f39e593UL;
|
||||
#endif
|
||||
unsigned long raw, random = get_boot_seed();
|
||||
bool use_i8254 = true;
|
||||
|
||||
debug_putstr(purpose);
|
||||
debug_putstr(" KASLR using");
|
||||
|
||||
if (has_cpuflag(X86_FEATURE_RDRAND)) {
|
||||
debug_putstr(" RDRAND");
|
||||
if (rdrand_long(&raw)) {
|
||||
random ^= raw;
|
||||
use_i8254 = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_cpuflag(X86_FEATURE_TSC)) {
|
||||
debug_putstr(" RDTSC");
|
||||
raw = rdtsc();
|
||||
|
||||
random ^= raw;
|
||||
use_i8254 = false;
|
||||
}
|
||||
|
||||
if (use_i8254) {
|
||||
debug_putstr(" i8254");
|
||||
random ^= i8254();
|
||||
}
|
||||
|
||||
/* Circular multiply for better bit diffusion */
|
||||
asm("mul %3"
|
||||
: "=a" (random), "=d" (raw)
|
||||
: "a" (random), "rm" (mix_const));
|
||||
random += raw;
|
||||
|
||||
debug_putstr("...\n");
|
||||
|
||||
return random;
|
||||
}
|
@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
|
||||
|
||||
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
|
||||
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
|
||||
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
|
||||
|
||||
|
@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = {
|
||||
{ 0, "User Space" },
|
||||
#ifdef CONFIG_X86_64
|
||||
{ 0x8000000000000000UL, "Kernel Space" },
|
||||
{ PAGE_OFFSET, "Low Kernel Mapping" },
|
||||
{ VMALLOC_START, "vmalloc() Area" },
|
||||
{ VMEMMAP_START, "Vmemmap" },
|
||||
{ 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
|
||||
{ 0/* VMALLOC_START */, "vmalloc() Area" },
|
||||
{ 0/* VMEMMAP_START */, "Vmemmap" },
|
||||
# ifdef CONFIG_X86_ESPFIX64
|
||||
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
|
||||
# endif
|
||||
@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void)
|
||||
|
||||
static int __init pt_dump_init(void)
|
||||
{
|
||||
/*
|
||||
* Various markers are not compile-time constants, so assign them
|
||||
* here.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
|
||||
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
|
||||
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Not a compile-time constant on x86-32 */
|
||||
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
|
||||
address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/dma.h> /* for MAX_DMA_PFN */
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/kaslr.h>
|
||||
|
||||
/*
|
||||
* We need to define the tracepoints somewhere, and tlb.c
|
||||
@ -590,6 +591,9 @@ void __init init_mem_mapping(void)
|
||||
/* the ISA range is always mapped regardless of memory holes */
|
||||
init_memory_mapping(0, ISA_END_ADDRESS);
|
||||
|
||||
/* Init the trampoline, possibly with KASLR memory offset */
|
||||
init_trampoline();
|
||||
|
||||
/*
|
||||
* If the allocation is in bottom-up direction, we setup direct mapping
|
||||
* in bottom-up, otherwise we setup direct mapping in top-down.
|
||||
|
@ -328,22 +328,30 @@ void __init cleanup_highmap(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Create PTE level page table mapping for physical addresses.
|
||||
* It returns the last physical address mapped.
|
||||
*/
|
||||
static unsigned long __meminit
|
||||
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
|
||||
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
|
||||
pgprot_t prot)
|
||||
{
|
||||
unsigned long pages = 0, next;
|
||||
unsigned long last_map_addr = end;
|
||||
unsigned long pages = 0, paddr_next;
|
||||
unsigned long paddr_last = paddr_end;
|
||||
pte_t *pte;
|
||||
int i;
|
||||
|
||||
pte_t *pte = pte_page + pte_index(addr);
|
||||
pte = pte_page + pte_index(paddr);
|
||||
i = pte_index(paddr);
|
||||
|
||||
for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
|
||||
next = (addr & PAGE_MASK) + PAGE_SIZE;
|
||||
if (addr >= end) {
|
||||
for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
|
||||
paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
|
||||
if (paddr >= paddr_end) {
|
||||
if (!after_bootmem &&
|
||||
!e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
|
||||
!e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
|
||||
!e820_any_mapped(paddr & PAGE_MASK, paddr_next,
|
||||
E820_RAM) &&
|
||||
!e820_any_mapped(paddr & PAGE_MASK, paddr_next,
|
||||
E820_RESERVED_KERN))
|
||||
set_pte(pte, __pte(0));
|
||||
continue;
|
||||
}
|
||||
@ -361,37 +369,44 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
|
||||
}
|
||||
|
||||
if (0)
|
||||
printk(" pte=%p addr=%lx pte=%016lx\n",
|
||||
pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
|
||||
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
|
||||
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
|
||||
pages++;
|
||||
set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
|
||||
last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
|
||||
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
|
||||
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
|
||||
}
|
||||
|
||||
update_page_count(PG_LEVEL_4K, pages);
|
||||
|
||||
return last_map_addr;
|
||||
return paddr_last;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create PMD level page table mapping for physical addresses. The virtual
|
||||
* and physical address have to be aligned at this level.
|
||||
* It returns the last physical address mapped.
|
||||
*/
|
||||
static unsigned long __meminit
|
||||
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
||||
unsigned long page_size_mask, pgprot_t prot)
|
||||
{
|
||||
unsigned long pages = 0, next;
|
||||
unsigned long last_map_addr = end;
|
||||
unsigned long pages = 0, paddr_next;
|
||||
unsigned long paddr_last = paddr_end;
|
||||
|
||||
int i = pmd_index(address);
|
||||
int i = pmd_index(paddr);
|
||||
|
||||
for (; i < PTRS_PER_PMD; i++, address = next) {
|
||||
pmd_t *pmd = pmd_page + pmd_index(address);
|
||||
for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
|
||||
pmd_t *pmd = pmd_page + pmd_index(paddr);
|
||||
pte_t *pte;
|
||||
pgprot_t new_prot = prot;
|
||||
|
||||
next = (address & PMD_MASK) + PMD_SIZE;
|
||||
if (address >= end) {
|
||||
paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
|
||||
if (paddr >= paddr_end) {
|
||||
if (!after_bootmem &&
|
||||
!e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
|
||||
!e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
|
||||
!e820_any_mapped(paddr & PMD_MASK, paddr_next,
|
||||
E820_RAM) &&
|
||||
!e820_any_mapped(paddr & PMD_MASK, paddr_next,
|
||||
E820_RESERVED_KERN))
|
||||
set_pmd(pmd, __pmd(0));
|
||||
continue;
|
||||
}
|
||||
@ -400,8 +415,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||
if (!pmd_large(*pmd)) {
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte = (pte_t *)pmd_page_vaddr(*pmd);
|
||||
last_map_addr = phys_pte_init(pte, address,
|
||||
end, prot);
|
||||
paddr_last = phys_pte_init(pte, paddr,
|
||||
paddr_end, prot);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
continue;
|
||||
}
|
||||
@ -420,7 +435,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||
if (page_size_mask & (1 << PG_LEVEL_2M)) {
|
||||
if (!after_bootmem)
|
||||
pages++;
|
||||
last_map_addr = next;
|
||||
paddr_last = paddr_next;
|
||||
continue;
|
||||
}
|
||||
new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
|
||||
@ -430,42 +445,54 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||
pages++;
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
set_pte((pte_t *)pmd,
|
||||
pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
|
||||
pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
|
||||
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
last_map_addr = next;
|
||||
paddr_last = paddr_next;
|
||||
continue;
|
||||
}
|
||||
|
||||
pte = alloc_low_page();
|
||||
last_map_addr = phys_pte_init(pte, address, end, new_prot);
|
||||
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_populate_kernel(&init_mm, pmd, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
update_page_count(PG_LEVEL_2M, pages);
|
||||
return last_map_addr;
|
||||
return paddr_last;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create PUD level page table mapping for physical addresses. The virtual
|
||||
* and physical address do not have to be aligned at this level. KASLR can
|
||||
* randomize virtual addresses up to this level.
|
||||
* It returns the last physical address mapped.
|
||||
*/
|
||||
static unsigned long __meminit
|
||||
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||
unsigned long page_size_mask)
|
||||
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
unsigned long page_size_mask)
|
||||
{
|
||||
unsigned long pages = 0, next;
|
||||
unsigned long last_map_addr = end;
|
||||
int i = pud_index(addr);
|
||||
unsigned long pages = 0, paddr_next;
|
||||
unsigned long paddr_last = paddr_end;
|
||||
unsigned long vaddr = (unsigned long)__va(paddr);
|
||||
int i = pud_index(vaddr);
|
||||
|
||||
for (; i < PTRS_PER_PUD; i++, addr = next) {
|
||||
pud_t *pud = pud_page + pud_index(addr);
|
||||
for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
next = (addr & PUD_MASK) + PUD_SIZE;
|
||||
if (addr >= end) {
|
||||
vaddr = (unsigned long)__va(paddr);
|
||||
pud = pud_page + pud_index(vaddr);
|
||||
paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
|
||||
|
||||
if (paddr >= paddr_end) {
|
||||
if (!after_bootmem &&
|
||||
!e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
|
||||
!e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
|
||||
!e820_any_mapped(paddr & PUD_MASK, paddr_next,
|
||||
E820_RAM) &&
|
||||
!e820_any_mapped(paddr & PUD_MASK, paddr_next,
|
||||
E820_RESERVED_KERN))
|
||||
set_pud(pud, __pud(0));
|
||||
continue;
|
||||
}
|
||||
@ -473,8 +500,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||
if (!pud_none(*pud)) {
|
||||
if (!pud_large(*pud)) {
|
||||
pmd = pmd_offset(pud, 0);
|
||||
last_map_addr = phys_pmd_init(pmd, addr, end,
|
||||
page_size_mask, prot);
|
||||
paddr_last = phys_pmd_init(pmd, paddr,
|
||||
paddr_end,
|
||||
page_size_mask,
|
||||
prot);
|
||||
__flush_tlb_all();
|
||||
continue;
|
||||
}
|
||||
@ -493,7 +522,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||
if (page_size_mask & (1 << PG_LEVEL_1G)) {
|
||||
if (!after_bootmem)
|
||||
pages++;
|
||||
last_map_addr = next;
|
||||
paddr_last = paddr_next;
|
||||
continue;
|
||||
}
|
||||
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
|
||||
@ -503,16 +532,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||
pages++;
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
set_pte((pte_t *)pud,
|
||||
pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
|
||||
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
|
||||
PAGE_KERNEL_LARGE));
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
last_map_addr = next;
|
||||
paddr_last = paddr_next;
|
||||
continue;
|
||||
}
|
||||
|
||||
pmd = alloc_low_page();
|
||||
last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
|
||||
prot);
|
||||
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
|
||||
page_size_mask, prot);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_populate(&init_mm, pud, pmd);
|
||||
@ -522,38 +551,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||
|
||||
update_page_count(PG_LEVEL_1G, pages);
|
||||
|
||||
return last_map_addr;
|
||||
return paddr_last;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create page table mapping for the physical memory for specific physical
|
||||
* addresses. The virtual and physical addresses have to be aligned on PMD level
|
||||
* down. It returns the last physical address mapped.
|
||||
*/
|
||||
unsigned long __meminit
|
||||
kernel_physical_mapping_init(unsigned long start,
|
||||
unsigned long end,
|
||||
kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
unsigned long paddr_end,
|
||||
unsigned long page_size_mask)
|
||||
{
|
||||
bool pgd_changed = false;
|
||||
unsigned long next, last_map_addr = end;
|
||||
unsigned long addr;
|
||||
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
|
||||
|
||||
start = (unsigned long)__va(start);
|
||||
end = (unsigned long)__va(end);
|
||||
addr = start;
|
||||
paddr_last = paddr_end;
|
||||
vaddr = (unsigned long)__va(paddr_start);
|
||||
vaddr_end = (unsigned long)__va(paddr_end);
|
||||
vaddr_start = vaddr;
|
||||
|
||||
for (; start < end; start = next) {
|
||||
pgd_t *pgd = pgd_offset_k(start);
|
||||
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
|
||||
pgd_t *pgd = pgd_offset_k(vaddr);
|
||||
pud_t *pud;
|
||||
|
||||
next = (start & PGDIR_MASK) + PGDIR_SIZE;
|
||||
vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
|
||||
|
||||
if (pgd_val(*pgd)) {
|
||||
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||
last_map_addr = phys_pud_init(pud, __pa(start),
|
||||
__pa(end), page_size_mask);
|
||||
paddr_last = phys_pud_init(pud, __pa(vaddr),
|
||||
__pa(vaddr_end),
|
||||
page_size_mask);
|
||||
continue;
|
||||
}
|
||||
|
||||
pud = alloc_low_page();
|
||||
last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
|
||||
page_size_mask);
|
||||
paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
|
||||
page_size_mask);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pgd_populate(&init_mm, pgd, pud);
|
||||
@ -562,11 +597,11 @@ kernel_physical_mapping_init(unsigned long start,
|
||||
}
|
||||
|
||||
if (pgd_changed)
|
||||
sync_global_pgds(addr, end - 1, 0);
|
||||
sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
|
||||
|
||||
__flush_tlb_all();
|
||||
|
||||
return last_map_addr;
|
||||
return paddr_last;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NUMA
|
||||
|
172
arch/x86/mm/kaslr.c
Normal file
172
arch/x86/mm/kaslr.c
Normal file
@ -0,0 +1,172 @@
|
||||
/*
|
||||
* This file implements KASLR memory randomization for x86_64. It randomizes
|
||||
* the virtual address space of kernel memory regions (physical memory
|
||||
* mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
|
||||
* exploits relying on predictable kernel addresses.
|
||||
*
|
||||
* Entropy is generated using the KASLR early boot functions now shared in
|
||||
* the lib directory (originally written by Kees Cook). Randomization is
|
||||
* done on PGD & PUD page table levels to increase possible addresses. The
|
||||
* physical memory mapping code was adapted to support PUD level virtual
|
||||
* addresses. This implementation on the best configuration provides 30,000
|
||||
* possible virtual addresses in average for each memory region. An additional
|
||||
* low memory page is used to ensure each CPU can start with a PGD aligned
|
||||
* virtual address (for realmode).
|
||||
*
|
||||
* The order of each memory region is not changed. The feature looks at
|
||||
* the available space for the regions based on different configuration
|
||||
* options and randomizes the base and space between each. The size of the
|
||||
* physical memory mapping is the available physical memory.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/kaslr.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
||||
#define TB_SHIFT 40
|
||||
|
||||
/*
|
||||
* Virtual address start and end range for randomization. The end changes base
|
||||
* on configuration to have the highest amount of space for randomization.
|
||||
* It increases the possible random position for each randomized region.
|
||||
*
|
||||
* You need to add an if/def entry if you introduce a new memory region
|
||||
* compatible with KASLR. Your entry must be in logical order with memory
|
||||
* layout. For example, ESPFIX is before EFI because its virtual address is
|
||||
* before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
|
||||
* ensure that this order is correct and won't be changed.
|
||||
*/
|
||||
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
|
||||
static const unsigned long vaddr_end = VMEMMAP_START;
|
||||
|
||||
/* Default values */
|
||||
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
|
||||
EXPORT_SYMBOL(page_offset_base);
|
||||
unsigned long vmalloc_base = __VMALLOC_BASE;
|
||||
EXPORT_SYMBOL(vmalloc_base);
|
||||
|
||||
/*
|
||||
* Memory regions randomized by KASLR (except modules that use a separate logic
|
||||
* earlier during boot). The list is ordered based on virtual addresses. This
|
||||
* order is kept after randomization.
|
||||
*/
|
||||
static __initdata struct kaslr_memory_region {
|
||||
unsigned long *base;
|
||||
unsigned long size_tb;
|
||||
} kaslr_regions[] = {
|
||||
{ &page_offset_base, 64/* Maximum */ },
|
||||
{ &vmalloc_base, VMALLOC_SIZE_TB },
|
||||
};
|
||||
|
||||
/* Get size in bytes used by the memory region */
|
||||
static inline unsigned long get_padding(struct kaslr_memory_region *region)
|
||||
{
|
||||
return (region->size_tb << TB_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply no randomization if KASLR was disabled at boot or if KASAN
|
||||
* is enabled. KASAN shadow mappings rely on regions being PGD aligned.
|
||||
*/
|
||||
static inline bool kaslr_memory_enabled(void)
|
||||
{
|
||||
return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
|
||||
}
|
||||
|
||||
/* Initialize base and padding for each memory region randomized with KASLR */
|
||||
void __init kernel_randomize_memory(void)
|
||||
{
|
||||
size_t i;
|
||||
unsigned long vaddr = vaddr_start;
|
||||
unsigned long rand, memory_tb;
|
||||
struct rnd_state rand_state;
|
||||
unsigned long remain_entropy;
|
||||
|
||||
if (!kaslr_memory_enabled())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Update Physical memory mapping to available and
|
||||
* add padding if needed (especially for memory hotplug support).
|
||||
*/
|
||||
BUG_ON(kaslr_regions[0].base != &page_offset_base);
|
||||
memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
|
||||
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
|
||||
|
||||
/* Adapt phyiscal memory region size based on available memory */
|
||||
if (memory_tb < kaslr_regions[0].size_tb)
|
||||
kaslr_regions[0].size_tb = memory_tb;
|
||||
|
||||
/* Calculate entropy available between regions */
|
||||
remain_entropy = vaddr_end - vaddr_start;
|
||||
for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
|
||||
remain_entropy -= get_padding(&kaslr_regions[i]);
|
||||
|
||||
prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
|
||||
unsigned long entropy;
|
||||
|
||||
/*
|
||||
* Select a random virtual address using the extra entropy
|
||||
* available.
|
||||
*/
|
||||
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
|
||||
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
|
||||
entropy = (rand % (entropy + 1)) & PUD_MASK;
|
||||
vaddr += entropy;
|
||||
*kaslr_regions[i].base = vaddr;
|
||||
|
||||
/*
|
||||
* Jump the region and add a minimum padding based on
|
||||
* randomization alignment.
|
||||
*/
|
||||
vaddr += get_padding(&kaslr_regions[i]);
|
||||
vaddr = round_up(vaddr + 1, PUD_SIZE);
|
||||
remain_entropy -= entropy;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Create PGD aligned trampoline table to allow real mode initialization
|
||||
* of additional CPUs. Consume only 1 low memory page.
|
||||
*/
|
||||
void __meminit init_trampoline(void)
|
||||
{
|
||||
unsigned long paddr, paddr_next;
|
||||
pgd_t *pgd;
|
||||
pud_t *pud_page, *pud_page_tramp;
|
||||
int i;
|
||||
|
||||
if (!kaslr_memory_enabled()) {
|
||||
init_trampoline_default();
|
||||
return;
|
||||
}
|
||||
|
||||
pud_page_tramp = alloc_low_page();
|
||||
|
||||
paddr = 0;
|
||||
pgd = pgd_offset_k((unsigned long)__va(paddr));
|
||||
pud_page = (pud_t *) pgd_page_vaddr(*pgd);
|
||||
|
||||
for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
|
||||
pud_t *pud, *pud_tramp;
|
||||
unsigned long vaddr = (unsigned long)__va(paddr);
|
||||
|
||||
pud_tramp = pud_page_tramp + pud_index(paddr);
|
||||
pud = pud_page + pud_index(vaddr);
|
||||
paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
|
||||
|
||||
*pud_tramp = *pud;
|
||||
}
|
||||
|
||||
set_pgd(&trampoline_pgd_entry,
|
||||
__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
|
||||
}
|
@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void)
|
||||
|
||||
static inline unsigned long highmap_end_pfn(void)
|
||||
{
|
||||
return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
|
||||
/* Do not reference physical address outside the kernel. */
|
||||
return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end)
|
||||
return addr >= start && addr < end;
|
||||
}
|
||||
|
||||
static inline int
|
||||
within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
|
||||
{
|
||||
return addr >= start && addr <= end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flushing functions
|
||||
*/
|
||||
@ -1299,7 +1306,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
|
||||
* to touch the high mapped kernel as well:
|
||||
*/
|
||||
if (!within(vaddr, (unsigned long)_text, _brk_end) &&
|
||||
within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
|
||||
within_inclusive(cpa->pfn, highmap_start_pfn(),
|
||||
highmap_end_pfn())) {
|
||||
unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
|
||||
__START_KERNEL_map - phys_base;
|
||||
alias_cpa = *cpa;
|
||||
|
@ -8,6 +8,9 @@
|
||||
struct real_mode_header *real_mode_header;
|
||||
u32 *trampoline_cr4_features;
|
||||
|
||||
/* Hold the pgd entry used on booting additional CPUs */
|
||||
pgd_t trampoline_pgd_entry;
|
||||
|
||||
void __init reserve_real_mode(void)
|
||||
{
|
||||
phys_addr_t mem;
|
||||
@ -84,7 +87,7 @@ void __init setup_real_mode(void)
|
||||
*trampoline_cr4_features = __read_cr4();
|
||||
|
||||
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
|
||||
trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
|
||||
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
|
||||
trampoline_pgd[511] = init_level4_pgt[511].pgd;
|
||||
#endif
|
||||
}
|
||||
|
@ -1154,11 +1154,6 @@ static int __init nohibernate_setup(char *str)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init kaslr_nohibernate_setup(char *str)
|
||||
{
|
||||
return nohibernate_setup(str);
|
||||
}
|
||||
|
||||
static int __init page_poison_nohibernate_setup(char *str)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_POISONING_ZERO
|
||||
@ -1182,5 +1177,4 @@ __setup("hibernate=", hibernate_setup);
|
||||
__setup("resumewait", resumewait_setup);
|
||||
__setup("resumedelay=", resumedelay_setup);
|
||||
__setup("nohibernate", nohibernate_setup);
|
||||
__setup("kaslr", kaslr_nohibernate_setup);
|
||||
__setup("page_poison=", page_poison_nohibernate_setup);
|
||||
|
Loading…
Reference in New Issue
Block a user