x86/mm: Prepare to expose larger address space to userspace
On x86, 5-level paging enables 56-bit userspace virtual address space. Not all user space is ready to handle wide addresses. It's known that at least some JIT compilers use higher bits in pointers to encode their information. It collides with valid pointers with 5-level paging and leads to crashes. To mitigate this, we are not going to allocate virtual address space above 47-bit by default. But userspace can ask for allocation from full address space by specifying hint address (with or without MAP_FIXED) above 47-bits. If hint address set above 47-bit, but MAP_FIXED is not specified, we try to look for unmapped area by specified address. If it's already occupied, we look for unmapped area in *full* address space, rather than from 47-bit window. A high hint address would only affect the allocation in question, but not any future mmap()s. Specifying high hint address on older kernel or on machine without 5-level paging support is safe. The hint will be ignored and kernel will fall back to allocation from 47-bit address space. This approach helps to easily make application's memory allocator aware about large address space without manually tracking allocated virtual address space. The patch puts all machinery in place, but not yet allows userspace to have mappings above 47-bit -- TASK_SIZE_MAX has to be raised to get the effect. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170716225954.74185-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
44b04912fa
commit
b569bab78d
@ -305,7 +305,7 @@ static inline int mmap_is_ia32(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern unsigned long task_size_32bit(void);
|
extern unsigned long task_size_32bit(void);
|
||||||
extern unsigned long task_size_64bit(void);
|
extern unsigned long task_size_64bit(int full_addr_space);
|
||||||
extern unsigned long get_mmap_base(int is_legacy);
|
extern unsigned long get_mmap_base(int is_legacy);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
|
@ -808,6 +808,7 @@ static inline void spin_lock_prefetch(const void *x)
|
|||||||
*/
|
*/
|
||||||
#define IA32_PAGE_OFFSET PAGE_OFFSET
|
#define IA32_PAGE_OFFSET PAGE_OFFSET
|
||||||
#define TASK_SIZE PAGE_OFFSET
|
#define TASK_SIZE PAGE_OFFSET
|
||||||
|
#define TASK_SIZE_LOW TASK_SIZE
|
||||||
#define TASK_SIZE_MAX TASK_SIZE
|
#define TASK_SIZE_MAX TASK_SIZE
|
||||||
#define DEFAULT_MAP_WINDOW TASK_SIZE
|
#define DEFAULT_MAP_WINDOW TASK_SIZE
|
||||||
#define STACK_TOP TASK_SIZE
|
#define STACK_TOP TASK_SIZE
|
||||||
@ -859,12 +860,14 @@ static inline void spin_lock_prefetch(const void *x)
|
|||||||
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
|
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
|
||||||
0xc0000000 : 0xFFFFe000)
|
0xc0000000 : 0xFFFFe000)
|
||||||
|
|
||||||
|
#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
|
||||||
|
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
|
||||||
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
|
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
|
||||||
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
|
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
|
||||||
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
|
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
|
||||||
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
|
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
|
||||||
|
|
||||||
#define STACK_TOP TASK_SIZE
|
#define STACK_TOP TASK_SIZE_LOW
|
||||||
#define STACK_TOP_MAX TASK_SIZE_MAX
|
#define STACK_TOP_MAX TASK_SIZE_MAX
|
||||||
|
|
||||||
#define INIT_THREAD { \
|
#define INIT_THREAD { \
|
||||||
@ -885,7 +888,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
|
|||||||
* space during mmap's.
|
* space during mmap's.
|
||||||
*/
|
*/
|
||||||
#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
|
#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
|
||||||
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
|
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
|
||||||
|
|
||||||
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
|
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
|
||||||
|
|
||||||
|
@ -101,8 +101,8 @@ out:
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void find_start_end(unsigned long flags, unsigned long *begin,
|
static void find_start_end(unsigned long addr, unsigned long flags,
|
||||||
unsigned long *end)
|
unsigned long *begin, unsigned long *end)
|
||||||
{
|
{
|
||||||
if (!in_compat_syscall() && (flags & MAP_32BIT)) {
|
if (!in_compat_syscall() && (flags & MAP_32BIT)) {
|
||||||
/* This is usually used needed to map code in small
|
/* This is usually used needed to map code in small
|
||||||
@ -121,7 +121,10 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
|
|||||||
}
|
}
|
||||||
|
|
||||||
*begin = get_mmap_base(1);
|
*begin = get_mmap_base(1);
|
||||||
*end = in_compat_syscall() ? task_size_32bit() : task_size_64bit();
|
if (in_compat_syscall())
|
||||||
|
*end = task_size_32bit();
|
||||||
|
else
|
||||||
|
*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long
|
unsigned long
|
||||||
@ -140,7 +143,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
|||||||
if (flags & MAP_FIXED)
|
if (flags & MAP_FIXED)
|
||||||
return addr;
|
return addr;
|
||||||
|
|
||||||
find_start_end(flags, &begin, &end);
|
find_start_end(addr, flags, &begin, &end);
|
||||||
|
|
||||||
if (len > end)
|
if (len > end)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@ -204,6 +207,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
|||||||
info.length = len;
|
info.length = len;
|
||||||
info.low_limit = PAGE_SIZE;
|
info.low_limit = PAGE_SIZE;
|
||||||
info.high_limit = get_mmap_base(0);
|
info.high_limit = get_mmap_base(0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
|
||||||
|
* in the full address space.
|
||||||
|
*
|
||||||
|
* !in_compat_syscall() check to avoid high addresses for x32.
|
||||||
|
*/
|
||||||
|
if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
|
||||||
|
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
|
||||||
|
|
||||||
info.align_mask = 0;
|
info.align_mask = 0;
|
||||||
info.align_offset = pgoff << PAGE_SHIFT;
|
info.align_offset = pgoff << PAGE_SHIFT;
|
||||||
if (filp) {
|
if (filp) {
|
||||||
|
@ -86,25 +86,38 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
|
|||||||
info.flags = 0;
|
info.flags = 0;
|
||||||
info.length = len;
|
info.length = len;
|
||||||
info.low_limit = get_mmap_base(1);
|
info.low_limit = get_mmap_base(1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
|
||||||
|
* in the full address space.
|
||||||
|
*/
|
||||||
info.high_limit = in_compat_syscall() ?
|
info.high_limit = in_compat_syscall() ?
|
||||||
task_size_32bit() : task_size_64bit();
|
task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
|
||||||
|
|
||||||
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
|
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
|
||||||
info.align_offset = 0;
|
info.align_offset = 0;
|
||||||
return vm_unmapped_area(&info);
|
return vm_unmapped_area(&info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
|
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
|
||||||
unsigned long addr0, unsigned long len,
|
unsigned long addr, unsigned long len,
|
||||||
unsigned long pgoff, unsigned long flags)
|
unsigned long pgoff, unsigned long flags)
|
||||||
{
|
{
|
||||||
struct hstate *h = hstate_file(file);
|
struct hstate *h = hstate_file(file);
|
||||||
struct vm_unmapped_area_info info;
|
struct vm_unmapped_area_info info;
|
||||||
unsigned long addr;
|
|
||||||
|
|
||||||
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
||||||
info.length = len;
|
info.length = len;
|
||||||
info.low_limit = PAGE_SIZE;
|
info.low_limit = PAGE_SIZE;
|
||||||
info.high_limit = get_mmap_base(0);
|
info.high_limit = get_mmap_base(0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
|
||||||
|
* in the full address space.
|
||||||
|
*/
|
||||||
|
if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
|
||||||
|
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
|
||||||
|
|
||||||
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
|
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
|
||||||
info.align_offset = 0;
|
info.align_offset = 0;
|
||||||
addr = vm_unmapped_area(&info);
|
addr = vm_unmapped_area(&info);
|
||||||
@ -119,7 +132,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
|
|||||||
VM_BUG_ON(addr != -ENOMEM);
|
VM_BUG_ON(addr != -ENOMEM);
|
||||||
info.flags = 0;
|
info.flags = 0;
|
||||||
info.low_limit = TASK_UNMAPPED_BASE;
|
info.low_limit = TASK_UNMAPPED_BASE;
|
||||||
info.high_limit = TASK_SIZE;
|
info.high_limit = TASK_SIZE_LOW;
|
||||||
addr = vm_unmapped_area(&info);
|
addr = vm_unmapped_area(&info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,9 +42,9 @@ unsigned long task_size_32bit(void)
|
|||||||
return IA32_PAGE_OFFSET;
|
return IA32_PAGE_OFFSET;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long task_size_64bit(void)
|
unsigned long task_size_64bit(int full_addr_space)
|
||||||
{
|
{
|
||||||
return TASK_SIZE_MAX;
|
return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long stack_maxrandom_size(unsigned long task_size)
|
static unsigned long stack_maxrandom_size(unsigned long task_size)
|
||||||
@ -142,7 +142,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
|
|||||||
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
||||||
|
|
||||||
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
|
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
|
||||||
arch_rnd(mmap64_rnd_bits), task_size_64bit());
|
arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
||||||
/*
|
/*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user