diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 2a641ba7be3b..55a55f30eed8 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: 0000000000000000 0000ffffffffffff 256TB user ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB modules - ffff800008000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff80007fffffff 2GB modules + ffff800080000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support): 0000000000000000 000fffffffffffff 4PB user fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB modules - ffff800008000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff80007fffffff 2GB modules + ffff800080000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 215efc3bbbcf..6e0e5722f229 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -46,7 +46,7 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) -#define MODULES_VSIZE (SZ_128M) +#define MODULES_VSIZE (SZ_2G) #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_8M) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f64636c2fd05..dd851297596e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -7,6 +7,8 @@ * Author: Will Deacon */ +#define pr_fmt(fmt) "Modules: " fmt + #include #include #include @@ -24,72 +26,119 @@ #include #include -static u64 __ro_after_init module_alloc_base = (u64)_etext - MODULES_VSIZE; +static u64 module_direct_base __ro_after_init = 0; +static u64 module_plt_base __ro_after_init = 0; -#ifdef CONFIG_RANDOMIZE_BASE -static int __init kaslr_module_init(void) +/* + * Choose a random page-aligned base address for a window of 'size' bytes which + * entirely contains the interval [start, end - 1]. + */ +static u64 __init random_bounding_box(u64 size, u64 start, u64 end) { - u64 module_range; - u32 seed; + u64 max_pgoff, pgoff; - if (!kaslr_enabled()) + if ((end - start) >= size) return 0; - seed = get_random_u32(); + max_pgoff = (size - (end - start)) / PAGE_SIZE; + pgoff = get_random_u32_inclusive(0, max_pgoff); - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - /* - * Randomize the module region over a 2 GB window covering the - * kernel. This reduces the risk of modules leaking information - * about the address of the kernel itself, but results in - * branches between modules and the core kernel that are - * resolved via PLTs. (Branches between modules will be - * resolved normally.) - */ - module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); + return start - pgoff * PAGE_SIZE; +} + +/* + * Modules may directly reference data and text anywhere within the kernel + * image and other modules. References using PREL32 relocations have a +/-2G + * range, and so we need to ensure that the entire kernel image and all modules + * fall within a 2G window such that these are always within range. + * + * Modules may directly branch to functions and code within the kernel text, + * and to functions and code within other modules. These branches will use + * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure + * that the entire kernel text and all module text falls within a 128M window + * such that these are always within range. With PLTs, we can expand this to a + * 2G window. + * + * We chose the 128M region to surround the entire kernel image (rather than + * just the text) as using the same bounds for the 128M and 2G regions ensures + * by construction that we never select a 128M region that is not a subset of + * the 2G region. For very large and unusual kernel configurations this means + * we may fall back to PLTs where they could have been avoided, but this keeps + * the logic significantly simpler. + */ +static int __init module_init_limits(void) +{ + u64 kernel_end = (u64)_end; + u64 kernel_start = (u64)_text; + u64 kernel_size = kernel_end - kernel_start; + + /* + * The default modules region is placed immediately below the kernel + * image, and is large enough to use the full 2G relocation range. + */ + BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); + BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); + + if (!kaslr_enabled()) { + if (kernel_size < SZ_128M) + module_direct_base = kernel_end - SZ_128M; + if (kernel_size < SZ_2G) + module_plt_base = kernel_end - SZ_2G; } else { - /* - * Randomize the module region by setting module_alloc_base to - * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, - * _stext) . This guarantees that the resulting region still - * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers unless this region is exhausted - * and we fall back to a larger 2GB window in module_alloc() - * when ARM64_MODULE_PLTS is enabled. - */ - module_range = MODULES_VSIZE - (u64)(_etext - _stext); + u64 min = kernel_start; + u64 max = kernel_end; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); + } else { + module_direct_base = random_bounding_box(SZ_128M, min, max); + if (module_direct_base) { + min = module_direct_base; + max = module_direct_base + SZ_128M; + } + } + + module_plt_base = random_bounding_box(SZ_2G, min, max); } - /* use the lower 21 bits to randomize the base of the module region */ - module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; - module_alloc_base &= PAGE_MASK; + pr_info("%llu pages in range for non-PLT usage", + module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); + pr_info("%llu pages in range for PLT usage", + module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); return 0; } -subsys_initcall(kaslr_module_init) -#endif +subsys_initcall(module_init_limits); void *module_alloc(unsigned long size) { - u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; - void *p; + void *p = NULL; /* * Where possible, prefer to allocate within direct branch range of the - * kernel such that no PLTs are necessary. This may fail, so we pass - * __GFP_NOWARN to silence the resulting warning. + * kernel such that no PLTs are necessary. */ - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + if (module_direct_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_direct_base, + module_direct_base + SZ_128M, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } + + if (!p && module_plt_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_plt_base, + module_plt_base + SZ_2G, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } if (!p) { - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + pr_warn_ratelimited("%s: unable to allocate memory\n", + __func__); } if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {