Dmitry Safonov 739586951b arm64/vdso: Support mremap() for vDSO
vDSO VMA address is saved in mm_context for the purpose of using
restorer from vDSO page to return to userspace after signal handling.

In Checkpoint Restore in Userspace (CRIU) project we place vDSO VMA
on restore back to the place where it was on the dump.
With the exception for x86 (where there is API to map vDSO with
arch_prctl()), we move vDSO inherited from CRIU task to restoree
position by mremap().

CRIU does support arm64 architecture, but kernel doesn't update
context.vdso pointer after mremap(). Which results in translation
fault after signal handling on restored application:
https://github.com/xemul/criu/issues/288

Make vDSO code track the VMA address by supplying .mremap() fops
the same way it's done for x86 and arm32 by:
commit b059a453b1cf ("x86/vdso: Add mremap hook to vm_special_mapping")
commit 280e87e98c09 ("ARM: 8683/1: ARM32: Support mremap() for sigpage/vDSO").

Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-08-09 12:16:28 +01:00

257 lines
6.5 KiB
C

/*
* VDSO implementation for AArch64 and vector page setup for AArch32.
*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <linux/cache.h>
#include <linux/clocksource.h>
#include <linux/elf.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
extern char vdso_start[], vdso_end[];
static unsigned long vdso_pages __ro_after_init;
/*
* The vDSO data page.
*/
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
#ifdef CONFIG_COMPAT
/*
* Create and map the vectors page for AArch32 tasks.
*/
static struct page *vectors_page[1] __ro_after_init;
static int __init alloc_vectors_page(void)
{
extern char __kuser_helper_start[], __kuser_helper_end[];
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int kuser_sz = __kuser_helper_end - __kuser_helper_start;
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
unsigned long vpage;
vpage = get_zeroed_page(GFP_ATOMIC);
if (!vpage)
return -ENOMEM;
/* kuser helpers */
memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start,
kuser_sz);
/* sigreturn code */
memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
__aarch32_sigret_code_start, sigret_sz);
flush_icache_range(vpage, vpage + PAGE_SIZE);
vectors_page[0] = virt_to_page(vpage);
return 0;
}
arch_initcall(alloc_vectors_page);
int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long addr = AARCH32_VECTORS_BASE;
static const struct vm_special_mapping spec = {
.name = "[vectors]",
.pages = vectors_page,
};
void *ret;
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
current->mm->context.vdso = (void *)addr;
/* Map vectors page at the high address. */
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
&spec);
up_write(&mm->mmap_sem);
return PTR_ERR_OR_ZERO(ret);
}
#endif /* CONFIG_COMPAT */
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
unsigned long vdso_size = vdso_end - vdso_start;
if (vdso_size != new_size)
return -EINVAL;
current->mm->context.vdso = (void *)new_vma->vm_start;
return 0;
}
static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
{
.name = "[vvar]",
},
{
.name = "[vdso]",
.mremap = vdso_mremap,
},
};
static int __init vdso_init(void)
{
int i;
struct page **vdso_pagelist;
unsigned long pfn;
if (memcmp(vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
/* Allocate the vDSO pagelist, plus a page for the data. */
vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
GFP_KERNEL);
if (vdso_pagelist == NULL)
return -ENOMEM;
/* Grab the vDSO data page. */
vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
/* Grab the vDSO code pages. */
pfn = sym_to_pfn(vdso_start);
for (i = 0; i < vdso_pages; i++)
vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
vdso_spec[0].pages = &vdso_pagelist[0];
vdso_spec[1].pages = &vdso_pagelist[1];
return 0;
}
arch_initcall(vdso_init);
int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
void *ret;
vdso_text_len = vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
vdso_mapping_len = vdso_text_len + PAGE_SIZE;
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
ret = ERR_PTR(vdso_base);
goto up_fail;
}
ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
VM_READ|VM_MAYREAD,
&vdso_spec[0]);
if (IS_ERR(ret))
goto up_fail;
vdso_base += PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
&vdso_spec[1]);
if (IS_ERR(ret))
goto up_fail;
up_write(&mm->mmap_sem);
return 0;
up_fail:
mm->context.vdso = NULL;
up_write(&mm->mmap_sem);
return PTR_ERR(ret);
}
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
void update_vsyscall(struct timekeeper *tk)
{
u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
++vdso_data->tb_seq_count;
smp_wmb();
vdso_data->use_syscall = use_syscall;
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift;
vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->raw_time_sec = tk->raw_sec;
vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */
vdso_data->cs_shift = tk->tkr_mono.shift;
}
smp_wmb();
++vdso_data->tb_seq_count;
}
void update_vsyscall_tz(void)
{
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
}