Add a vDSO function __vdso_riscv_hwprobe, which can sit in front of the riscv_hwprobe syscall and answer common queries. We stash a copy of static answers for the "all CPUs" case in the vDSO data page. This data is private to the vDSO, so we can decide later to change what's stored there or under what conditions we defer to the syscall. Currently all data can be discovered at boot, so the vDSO function answers all queries when the cpumask is set to the "all CPUs" hint. There's also a boolean in the data that lets the vDSO function know that all CPUs are the same. In that case, the vDSO will also answer queries for arbitrary CPU masks in addition to the "all CPUs" hint. Signed-off-by: Evan Green <evan@rivosinc.com> Link: https://lore.kernel.org/r/20230407231103.2622178-7-evan@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
288 lines
6.4 KiB
C
288 lines
6.4 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
|
|
* <benh@kernel.crashing.org>
|
|
* Copyright (C) 2012 ARM Limited
|
|
* Copyright (C) 2015 Regents of the University of California
|
|
*/
|
|
|
|
#include <linux/elf.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/binfmts.h>
|
|
#include <linux/err.h>
|
|
#include <asm/page.h>
|
|
#include <asm/vdso.h>
|
|
#include <linux/time_namespace.h>
|
|
#include <vdso/datapage.h>
|
|
|
|
enum vvar_pages {
|
|
VVAR_DATA_PAGE_OFFSET,
|
|
VVAR_TIMENS_PAGE_OFFSET,
|
|
VVAR_NR_PAGES,
|
|
};
|
|
|
|
enum rv_vdso_map {
|
|
RV_VDSO_MAP_VVAR,
|
|
RV_VDSO_MAP_VDSO,
|
|
};
|
|
|
|
#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
|
|
|
|
/*
|
|
* The vDSO data page.
|
|
*/
|
|
static union {
|
|
struct vdso_data data;
|
|
u8 page[PAGE_SIZE];
|
|
} vdso_data_store __page_aligned_data;
|
|
struct vdso_data *vdso_data = &vdso_data_store.data;
|
|
|
|
struct __vdso_info {
|
|
const char *name;
|
|
const char *vdso_code_start;
|
|
const char *vdso_code_end;
|
|
unsigned long vdso_pages;
|
|
/* Data Mapping */
|
|
struct vm_special_mapping *dm;
|
|
/* Code Mapping */
|
|
struct vm_special_mapping *cm;
|
|
};
|
|
|
|
static struct __vdso_info vdso_info;
|
|
#ifdef CONFIG_COMPAT
|
|
static struct __vdso_info compat_vdso_info;
|
|
#endif
|
|
|
|
static int vdso_mremap(const struct vm_special_mapping *sm,
|
|
struct vm_area_struct *new_vma)
|
|
{
|
|
current->mm->context.vdso = (void *)new_vma->vm_start;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void __init __vdso_init(struct __vdso_info *vdso_info)
|
|
{
|
|
unsigned int i;
|
|
struct page **vdso_pagelist;
|
|
unsigned long pfn;
|
|
|
|
if (memcmp(vdso_info->vdso_code_start, "\177ELF", 4))
|
|
panic("vDSO is not a valid ELF object!\n");
|
|
|
|
vdso_info->vdso_pages = (
|
|
vdso_info->vdso_code_end -
|
|
vdso_info->vdso_code_start) >>
|
|
PAGE_SHIFT;
|
|
|
|
vdso_pagelist = kcalloc(vdso_info->vdso_pages,
|
|
sizeof(struct page *),
|
|
GFP_KERNEL);
|
|
if (vdso_pagelist == NULL)
|
|
panic("vDSO kcalloc failed!\n");
|
|
|
|
/* Grab the vDSO code pages. */
|
|
pfn = sym_to_pfn(vdso_info->vdso_code_start);
|
|
|
|
for (i = 0; i < vdso_info->vdso_pages; i++)
|
|
vdso_pagelist[i] = pfn_to_page(pfn + i);
|
|
|
|
vdso_info->cm->pages = vdso_pagelist;
|
|
}
|
|
|
|
#ifdef CONFIG_TIME_NS
|
|
struct vdso_data *arch_get_vdso_data(void *vvar_page)
|
|
{
|
|
return (struct vdso_data *)(vvar_page);
|
|
}
|
|
|
|
/*
|
|
* The vvar mapping contains data for a specific time namespace, so when a task
|
|
* changes namespace we must unmap its vvar data for the old namespace.
|
|
* Subsequent faults will map in data for the new namespace.
|
|
*
|
|
* For more details see timens_setup_vdso_data().
|
|
*/
|
|
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|
{
|
|
struct mm_struct *mm = task->mm;
|
|
struct vm_area_struct *vma;
|
|
VMA_ITERATOR(vmi, mm, 0);
|
|
|
|
mmap_read_lock(mm);
|
|
|
|
for_each_vma(vmi, vma) {
|
|
if (vma_is_special_mapping(vma, vdso_info.dm))
|
|
zap_vma_pages(vma);
|
|
#ifdef CONFIG_COMPAT
|
|
if (vma_is_special_mapping(vma, compat_vdso_info.dm))
|
|
zap_vma_pages(vma);
|
|
#endif
|
|
}
|
|
|
|
mmap_read_unlock(mm);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
|
|
struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
{
|
|
struct page *timens_page = find_timens_vvar_page(vma);
|
|
unsigned long pfn;
|
|
|
|
switch (vmf->pgoff) {
|
|
case VVAR_DATA_PAGE_OFFSET:
|
|
if (timens_page)
|
|
pfn = page_to_pfn(timens_page);
|
|
else
|
|
pfn = sym_to_pfn(vdso_data);
|
|
break;
|
|
#ifdef CONFIG_TIME_NS
|
|
case VVAR_TIMENS_PAGE_OFFSET:
|
|
/*
|
|
* If a task belongs to a time namespace then a namespace
|
|
* specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
|
|
* the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
|
|
* offset.
|
|
* See also the comment near timens_setup_vdso_data().
|
|
*/
|
|
if (!timens_page)
|
|
return VM_FAULT_SIGBUS;
|
|
pfn = sym_to_pfn(vdso_data);
|
|
break;
|
|
#endif /* CONFIG_TIME_NS */
|
|
default:
|
|
return VM_FAULT_SIGBUS;
|
|
}
|
|
|
|
return vmf_insert_pfn(vma, vmf->address, pfn);
|
|
}
|
|
|
|
static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
|
|
[RV_VDSO_MAP_VVAR] = {
|
|
.name = "[vvar]",
|
|
.fault = vvar_fault,
|
|
},
|
|
[RV_VDSO_MAP_VDSO] = {
|
|
.name = "[vdso]",
|
|
.mremap = vdso_mremap,
|
|
},
|
|
};
|
|
|
|
static struct __vdso_info vdso_info __ro_after_init = {
|
|
.name = "vdso",
|
|
.vdso_code_start = vdso_start,
|
|
.vdso_code_end = vdso_end,
|
|
.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR],
|
|
.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO],
|
|
};
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = {
|
|
[RV_VDSO_MAP_VVAR] = {
|
|
.name = "[vvar]",
|
|
.fault = vvar_fault,
|
|
},
|
|
[RV_VDSO_MAP_VDSO] = {
|
|
.name = "[vdso]",
|
|
.mremap = vdso_mremap,
|
|
},
|
|
};
|
|
|
|
static struct __vdso_info compat_vdso_info __ro_after_init = {
|
|
.name = "compat_vdso",
|
|
.vdso_code_start = compat_vdso_start,
|
|
.vdso_code_end = compat_vdso_end,
|
|
.dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR],
|
|
.cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO],
|
|
};
|
|
#endif
|
|
|
|
static int __init vdso_init(void)
|
|
{
|
|
__vdso_init(&vdso_info);
|
|
#ifdef CONFIG_COMPAT
|
|
__vdso_init(&compat_vdso_info);
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
arch_initcall(vdso_init);
|
|
|
|
static int __setup_additional_pages(struct mm_struct *mm,
|
|
struct linux_binprm *bprm,
|
|
int uses_interp,
|
|
struct __vdso_info *vdso_info)
|
|
{
|
|
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
|
|
void *ret;
|
|
|
|
BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
|
|
|
|
vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT;
|
|
/* Be sure to map the data page */
|
|
vdso_mapping_len = vdso_text_len + VVAR_SIZE;
|
|
|
|
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
|
|
if (IS_ERR_VALUE(vdso_base)) {
|
|
ret = ERR_PTR(vdso_base);
|
|
goto up_fail;
|
|
}
|
|
|
|
ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
|
|
(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm);
|
|
if (IS_ERR(ret))
|
|
goto up_fail;
|
|
|
|
vdso_base += VVAR_SIZE;
|
|
mm->context.vdso = (void *)vdso_base;
|
|
|
|
ret =
|
|
_install_special_mapping(mm, vdso_base, vdso_text_len,
|
|
(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
|
|
vdso_info->cm);
|
|
|
|
if (IS_ERR(ret))
|
|
goto up_fail;
|
|
|
|
return 0;
|
|
|
|
up_fail:
|
|
mm->context.vdso = NULL;
|
|
return PTR_ERR(ret);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
|
|
int uses_interp)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
int ret;
|
|
|
|
if (mmap_write_lock_killable(mm))
|
|
return -EINTR;
|
|
|
|
ret = __setup_additional_pages(mm, bprm, uses_interp,
|
|
&compat_vdso_info);
|
|
mmap_write_unlock(mm);
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
int ret;
|
|
|
|
if (mmap_write_lock_killable(mm))
|
|
return -EINTR;
|
|
|
|
ret = __setup_additional_pages(mm, bprm, uses_interp, &vdso_info);
|
|
mmap_write_unlock(mm);
|
|
|
|
return ret;
|
|
}
|