linux/arch/x86/mm/mmap.c
Kirill A. Shutemov b569bab78d x86/mm: Prepare to expose larger address space to userspace
On x86, 5-level paging enables 56-bit userspace virtual address space.
Not all user space is ready to handle wide addresses. It's known that
at least some JIT compilers use higher bits in pointers to encode their
information. It collides with valid pointers with 5-level paging and
leads to crashes.

To mitigate this, we are not going to allocate virtual address space
above 47-bit by default.

But userspace can ask for allocation from full address space by
specifying hint address (with or without MAP_FIXED) above 47-bits.

If hint address set above 47-bit, but MAP_FIXED is not specified, we try
to look for unmapped area by specified address. If it's already
occupied, we look for unmapped area in *full* address space, rather than
from 47-bit window.

A high hint address would only affect the allocation in question, but not
any future mmap()s.

Specifying high hint address on older kernel or on machine without 5-level
paging support is safe. The hint will be ignored and kernel will fall back
to allocation from 47-bit address space.

This approach helps to easily make application's memory allocator aware
about large address space without manually tracking allocated virtual
address space.

The patch puts all machinery in place, but not yet allows userspace to have
mappings above 47-bit -- TASK_SIZE_MAX has to be raised to get the effect.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170716225954.74185-7-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-07-21 10:05:18 +02:00

178 lines
4.6 KiB
C

/*
* Flexible mmap layout support
*
* Based on code by Ingo Molnar and Andi Kleen, copyrighted
* as follows:
*
* Copyright 2003-2009 Red Hat Inc.
* All Rights Reserved.
* Copyright 2005 Andi Kleen, SUSE Labs.
* Copyright 2007 Jiri Kosina, SUSE Labs.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/limits.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/compat.h>
#include <asm/elf.h>
struct va_alignment __read_mostly va_align = {
.flags = -1,
};
unsigned long task_size_32bit(void)
{
return IA32_PAGE_OFFSET;
}
unsigned long task_size_64bit(int full_addr_space)
{
return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
}
static unsigned long stack_maxrandom_size(unsigned long task_size)
{
unsigned long max = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit());
max <<= PAGE_SHIFT;
}
return max;
}
#ifdef CONFIG_COMPAT
# define mmap32_rnd_bits mmap_rnd_compat_bits
# define mmap64_rnd_bits mmap_rnd_bits
#else
# define mmap32_rnd_bits mmap_rnd_bits
# define mmap64_rnd_bits mmap_rnd_bits
#endif
#define SIZE_128M (128 * 1024 * 1024UL)
static int mmap_is_legacy(void)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
return sysctl_legacy_va_layout;
}
static unsigned long arch_rnd(unsigned int rndbits)
{
return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
}
unsigned long arch_mmap_rnd(void)
{
if (!(current->flags & PF_RANDOMIZE))
return 0;
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
{
unsigned long gap = rlimit(RLIMIT_STACK);
unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
unsigned long gap_min, gap_max;
/* Values close to RLIM_INFINITY can overflow. */
if (gap + pad > gap)
gap += pad;
/*
* Top of mmap area (just below the process stack).
* Leave an at least ~128 MB hole with possible stack randomization.
*/
gap_min = SIZE_128M;
gap_max = (task_size / 6) * 5;
if (gap < gap_min)
gap = gap_min;
else if (gap > gap_max)
gap = gap_max;
return PAGE_ALIGN(task_size - gap - rnd);
}
static unsigned long mmap_legacy_base(unsigned long rnd,
unsigned long task_size)
{
return __TASK_UNMAPPED_BASE(task_size) + rnd;
}
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
unsigned long random_factor, unsigned long task_size)
{
*legacy_base = mmap_legacy_base(random_factor, task_size);
if (mmap_is_legacy())
*base = *legacy_base;
else
*base = mmap_base(random_factor, task_size);
}
void arch_pick_mmap_layout(struct mm_struct *mm)
{
if (mmap_is_legacy())
mm->get_unmapped_area = arch_get_unmapped_area;
else
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/*
* The mmap syscall mapping base decision depends solely on the
* syscall type (64-bit or compat). This applies for 64bit
* applications and 32bit applications. The 64bit syscall uses
* mmap_base, the compat syscall uses mmap_compat_base.
*/
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
arch_rnd(mmap32_rnd_bits), task_size_32bit());
#endif
}
unsigned long get_mmap_base(int is_legacy)
{
struct mm_struct *mm = current->mm;
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
if (in_compat_syscall()) {
return is_legacy ? mm->mmap_compat_legacy_base
: mm->mmap_compat_base;
}
#endif
return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
}
const char *arch_vma_name(struct vm_area_struct *vma)
{
if (vma->vm_flags & VM_MPX)
return "[mpx]";
return NULL;
}