a846446b19
The result of in_compat_syscall() can be pictured as: x86 platform: --------------------------------------------------- | Arch\syscall | 64-bit | ia32 | x32 | |-------------------------------------------------| | x86_64 | false | true | true | |-------------------------------------------------| | i686 | | <true> | | --------------------------------------------------- Other platforms: ------------------------------------------- | Arch\syscall | 64-bit | compat | |-----------------------------------------| | 64-bit | false | true | |-----------------------------------------| | 32-bit(?) | | <false> | ------------------------------------------- As seen, the result of in_compat_syscall() on generic 32-bit platform differs from i686. There is no reason for in_compat_syscall() == true on native i686. It also easy to misread code if the result on native 32-bit platform differs between arches. Because of that non arch-specific code has many places with: if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall()) in different variations. It looks-like the only non-x86 code which uses in_compat_syscall() not under CONFIG_COMPAT guard is in amd/amdkfd. But according to the commit a18069c132cb ("amdkfd: Disable support for 32-bit user processes"), it actually should be disabled on native i686. Rename in_compat_syscall() to in_32bit_syscall() for x86-specific code and make in_compat_syscall() false under !CONFIG_COMPAT. A follow on patch will clean up generic users which were forced to check IS_ENABLED(CONFIG_COMPAT) with in_compat_syscall(). Signed-off-by: Dmitry Safonov <dima@arista.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Andy Lutomirski <luto@kernel.org> Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: John Stultz <john.stultz@linaro.org> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Steffen Klassert <steffen.klassert@secunet.com> Cc: Stephen Boyd <sboyd@kernel.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: linux-efi@vger.kernel.org Cc: netdev@vger.kernel.org Link: https://lkml.kernel.org/r/20181012134253.23266-2-dima@arista.com
246 lines
6.1 KiB
C
246 lines
6.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/compat.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/sem.h>
|
|
#include <linux/msg.h>
|
|
#include <linux/shm.h>
|
|
#include <linux/stat.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/file.h>
|
|
#include <linux/utsname.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/random.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/elf.h>
|
|
|
|
#include <asm/elf.h>
|
|
#include <asm/ia32.h>
|
|
#include <asm/syscalls.h>
|
|
#include <asm/mpx.h>
|
|
|
|
/*
|
|
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
|
|
*/
|
|
static unsigned long get_align_mask(void)
|
|
{
|
|
/* handle 32- and 64-bit case with a single conditional */
|
|
if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
|
|
return 0;
|
|
|
|
if (!(current->flags & PF_RANDOMIZE))
|
|
return 0;
|
|
|
|
return va_align.mask;
|
|
}
|
|
|
|
/*
|
|
* To avoid aliasing in the I$ on AMD F15h, the bits defined by the
|
|
* va_align.bits, [12:upper_bit), are set to a random value instead of
|
|
* zeroing them. This random value is computed once per boot. This form
|
|
* of ASLR is known as "per-boot ASLR".
|
|
*
|
|
* To achieve this, the random value is added to the info.align_offset
|
|
* value before calling vm_unmapped_area() or ORed directly to the
|
|
* address.
|
|
*/
|
|
static unsigned long get_align_bits(void)
|
|
{
|
|
return va_align.bits & get_align_mask();
|
|
}
|
|
|
|
unsigned long align_vdso_addr(unsigned long addr)
|
|
{
|
|
unsigned long align_mask = get_align_mask();
|
|
addr = (addr + align_mask) & ~align_mask;
|
|
return addr | get_align_bits();
|
|
}
|
|
|
|
static int __init control_va_addr_alignment(char *str)
|
|
{
|
|
/* guard against enabling this on other CPU families */
|
|
if (va_align.flags < 0)
|
|
return 1;
|
|
|
|
if (*str == 0)
|
|
return 1;
|
|
|
|
if (*str == '=')
|
|
str++;
|
|
|
|
if (!strcmp(str, "32"))
|
|
va_align.flags = ALIGN_VA_32;
|
|
else if (!strcmp(str, "64"))
|
|
va_align.flags = ALIGN_VA_64;
|
|
else if (!strcmp(str, "off"))
|
|
va_align.flags = 0;
|
|
else if (!strcmp(str, "on"))
|
|
va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
|
|
else
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
__setup("align_va_addr", control_va_addr_alignment);
|
|
|
|
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
|
|
unsigned long, prot, unsigned long, flags,
|
|
unsigned long, fd, unsigned long, off)
|
|
{
|
|
long error;
|
|
error = -EINVAL;
|
|
if (off & ~PAGE_MASK)
|
|
goto out;
|
|
|
|
error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
static void find_start_end(unsigned long addr, unsigned long flags,
|
|
unsigned long *begin, unsigned long *end)
|
|
{
|
|
if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
|
|
/* This is usually used needed to map code in small
|
|
model, so it needs to be in the first 31bit. Limit
|
|
it to that. This means we need to move the
|
|
unmapped base down for this case. This can give
|
|
conflicts with the heap, but we assume that glibc
|
|
malloc knows how to fall back to mmap. Give it 1GB
|
|
of playground for now. -AK */
|
|
*begin = 0x40000000;
|
|
*end = 0x80000000;
|
|
if (current->flags & PF_RANDOMIZE) {
|
|
*begin = randomize_page(*begin, 0x02000000);
|
|
}
|
|
return;
|
|
}
|
|
|
|
*begin = get_mmap_base(1);
|
|
if (in_32bit_syscall())
|
|
*end = task_size_32bit();
|
|
else
|
|
*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
|
|
}
|
|
|
|
unsigned long
|
|
arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct vm_area_struct *vma;
|
|
struct vm_unmapped_area_info info;
|
|
unsigned long begin, end;
|
|
|
|
addr = mpx_unmapped_area_check(addr, len, flags);
|
|
if (IS_ERR_VALUE(addr))
|
|
return addr;
|
|
|
|
if (flags & MAP_FIXED)
|
|
return addr;
|
|
|
|
find_start_end(addr, flags, &begin, &end);
|
|
|
|
if (len > end)
|
|
return -ENOMEM;
|
|
|
|
if (addr) {
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (end - len >= addr &&
|
|
(!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
info.flags = 0;
|
|
info.length = len;
|
|
info.low_limit = begin;
|
|
info.high_limit = end;
|
|
info.align_mask = 0;
|
|
info.align_offset = pgoff << PAGE_SHIFT;
|
|
if (filp) {
|
|
info.align_mask = get_align_mask();
|
|
info.align_offset += get_align_bits();
|
|
}
|
|
return vm_unmapped_area(&info);
|
|
}
|
|
|
|
unsigned long
|
|
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
|
const unsigned long len, const unsigned long pgoff,
|
|
const unsigned long flags)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long addr = addr0;
|
|
struct vm_unmapped_area_info info;
|
|
|
|
addr = mpx_unmapped_area_check(addr, len, flags);
|
|
if (IS_ERR_VALUE(addr))
|
|
return addr;
|
|
|
|
/* requested length too big for entire address space */
|
|
if (len > TASK_SIZE)
|
|
return -ENOMEM;
|
|
|
|
/* No address checking. See comment at mmap_address_hint_valid() */
|
|
if (flags & MAP_FIXED)
|
|
return addr;
|
|
|
|
/* for MAP_32BIT mappings we force the legacy mmap base */
|
|
if (!in_32bit_syscall() && (flags & MAP_32BIT))
|
|
goto bottomup;
|
|
|
|
/* requesting a specific address */
|
|
if (addr) {
|
|
addr &= PAGE_MASK;
|
|
if (!mmap_address_hint_valid(addr, len))
|
|
goto get_unmapped_area;
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (!vma || addr + len <= vm_start_gap(vma))
|
|
return addr;
|
|
}
|
|
get_unmapped_area:
|
|
|
|
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
|
info.length = len;
|
|
info.low_limit = PAGE_SIZE;
|
|
info.high_limit = get_mmap_base(0);
|
|
|
|
/*
|
|
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
|
|
* in the full address space.
|
|
*
|
|
* !in_32bit_syscall() check to avoid high addresses for x32
|
|
* (and make it no op on native i386).
|
|
*/
|
|
if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
|
|
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
|
|
|
|
info.align_mask = 0;
|
|
info.align_offset = pgoff << PAGE_SHIFT;
|
|
if (filp) {
|
|
info.align_mask = get_align_mask();
|
|
info.align_offset += get_align_bits();
|
|
}
|
|
addr = vm_unmapped_area(&info);
|
|
if (!(addr & ~PAGE_MASK))
|
|
return addr;
|
|
VM_BUG_ON(addr != -ENOMEM);
|
|
|
|
bottomup:
|
|
/*
|
|
* A failed mmap() very likely causes application failure,
|
|
* so fall back to the bottom-up function here. This scenario
|
|
* can happen with large stack limits and large mmap()
|
|
* allocations.
|
|
*/
|
|
return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
|
|
}
|