b569bab78d
On x86, 5-level paging enables 56-bit userspace virtual address space. Not all user space is ready to handle wide addresses. It's known that at least some JIT compilers use higher bits in pointers to encode their information. It collides with valid pointers with 5-level paging and leads to crashes. To mitigate this, we are not going to allocate virtual address space above 47-bit by default. But userspace can ask for allocation from full address space by specifying hint address (with or without MAP_FIXED) above 47-bits. If hint address set above 47-bit, but MAP_FIXED is not specified, we try to look for unmapped area by specified address. If it's already occupied, we look for unmapped area in *full* address space, rather than from 47-bit window. A high hint address would only affect the allocation in question, but not any future mmap()s. Specifying high hint address on older kernel or on machine without 5-level paging support is safe. The hint will be ignored and kernel will fall back to allocation from 47-bit address space. This approach helps to easily make application's memory allocator aware about large address space without manually tracking allocated virtual address space. The patch puts all machinery in place, but not yet allows userspace to have mappings above 47-bit -- TASK_SIZE_MAX has to be raised to get the effect. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170716225954.74185-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
178 lines
4.6 KiB
C
178 lines
4.6 KiB
C
/*
|
|
* Flexible mmap layout support
|
|
*
|
|
* Based on code by Ingo Molnar and Andi Kleen, copyrighted
|
|
* as follows:
|
|
*
|
|
* Copyright 2003-2009 Red Hat Inc.
|
|
* All Rights Reserved.
|
|
* Copyright 2005 Andi Kleen, SUSE Labs.
|
|
* Copyright 2007 Jiri Kosina, SUSE Labs.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include <linux/personality.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/random.h>
|
|
#include <linux/limits.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/compat.h>
|
|
#include <asm/elf.h>
|
|
|
|
struct va_alignment __read_mostly va_align = {
|
|
.flags = -1,
|
|
};
|
|
|
|
unsigned long task_size_32bit(void)
|
|
{
|
|
return IA32_PAGE_OFFSET;
|
|
}
|
|
|
|
unsigned long task_size_64bit(int full_addr_space)
|
|
{
|
|
return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
|
|
}
|
|
|
|
static unsigned long stack_maxrandom_size(unsigned long task_size)
|
|
{
|
|
unsigned long max = 0;
|
|
if ((current->flags & PF_RANDOMIZE) &&
|
|
!(current->personality & ADDR_NO_RANDOMIZE)) {
|
|
max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit());
|
|
max <<= PAGE_SHIFT;
|
|
}
|
|
|
|
return max;
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
# define mmap32_rnd_bits mmap_rnd_compat_bits
|
|
# define mmap64_rnd_bits mmap_rnd_bits
|
|
#else
|
|
# define mmap32_rnd_bits mmap_rnd_bits
|
|
# define mmap64_rnd_bits mmap_rnd_bits
|
|
#endif
|
|
|
|
#define SIZE_128M (128 * 1024 * 1024UL)
|
|
|
|
static int mmap_is_legacy(void)
|
|
{
|
|
if (current->personality & ADDR_COMPAT_LAYOUT)
|
|
return 1;
|
|
|
|
return sysctl_legacy_va_layout;
|
|
}
|
|
|
|
static unsigned long arch_rnd(unsigned int rndbits)
|
|
{
|
|
return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
|
|
}
|
|
|
|
unsigned long arch_mmap_rnd(void)
|
|
{
|
|
if (!(current->flags & PF_RANDOMIZE))
|
|
return 0;
|
|
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
|
|
}
|
|
|
|
static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
|
|
{
|
|
unsigned long gap = rlimit(RLIMIT_STACK);
|
|
unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
|
|
unsigned long gap_min, gap_max;
|
|
|
|
/* Values close to RLIM_INFINITY can overflow. */
|
|
if (gap + pad > gap)
|
|
gap += pad;
|
|
|
|
/*
|
|
* Top of mmap area (just below the process stack).
|
|
* Leave an at least ~128 MB hole with possible stack randomization.
|
|
*/
|
|
gap_min = SIZE_128M;
|
|
gap_max = (task_size / 6) * 5;
|
|
|
|
if (gap < gap_min)
|
|
gap = gap_min;
|
|
else if (gap > gap_max)
|
|
gap = gap_max;
|
|
|
|
return PAGE_ALIGN(task_size - gap - rnd);
|
|
}
|
|
|
|
static unsigned long mmap_legacy_base(unsigned long rnd,
|
|
unsigned long task_size)
|
|
{
|
|
return __TASK_UNMAPPED_BASE(task_size) + rnd;
|
|
}
|
|
|
|
/*
|
|
* This function, called very early during the creation of a new
|
|
* process VM image, sets up which VM layout function to use:
|
|
*/
|
|
static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
|
|
unsigned long random_factor, unsigned long task_size)
|
|
{
|
|
*legacy_base = mmap_legacy_base(random_factor, task_size);
|
|
if (mmap_is_legacy())
|
|
*base = *legacy_base;
|
|
else
|
|
*base = mmap_base(random_factor, task_size);
|
|
}
|
|
|
|
void arch_pick_mmap_layout(struct mm_struct *mm)
|
|
{
|
|
if (mmap_is_legacy())
|
|
mm->get_unmapped_area = arch_get_unmapped_area;
|
|
else
|
|
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
|
|
|
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
|
|
arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
|
|
|
|
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
|
/*
|
|
* The mmap syscall mapping base decision depends solely on the
|
|
* syscall type (64-bit or compat). This applies for 64bit
|
|
* applications and 32bit applications. The 64bit syscall uses
|
|
* mmap_base, the compat syscall uses mmap_compat_base.
|
|
*/
|
|
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
|
|
arch_rnd(mmap32_rnd_bits), task_size_32bit());
|
|
#endif
|
|
}
|
|
|
|
unsigned long get_mmap_base(int is_legacy)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
|
|
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
|
if (in_compat_syscall()) {
|
|
return is_legacy ? mm->mmap_compat_legacy_base
|
|
: mm->mmap_compat_base;
|
|
}
|
|
#endif
|
|
return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
|
|
}
|
|
|
|
const char *arch_vma_name(struct vm_area_struct *vma)
|
|
{
|
|
if (vma->vm_flags & VM_MPX)
|
|
return "[mpx]";
|
|
return NULL;
|
|
}
|