s390/mm: rework memcpy_real() to avoid DAT-off mode
Function memcpy_real() is an univeral data mover that does not require DAT mode to be able reading from a physical address. Its advantage is an ability to read from any address, even those for which no kernel virtual mapping exists. Although memcpy_real() is interrupt-safe, there are no handlers that make use of this function. The compiler instrumentation have to be disabled and separate no-DAT stack used to allow execution of the function once DAT mode is disabled. Rework memcpy_real() to overcome these shortcomings. As result, data copying (which is primarily reading out a crashed system memory by a user process) is executed on a regular stack with enabled interrupts. Also, use of memcpy_real_buf swap buffer becomes unnecessary and the swapping is eliminated. The above is achieved by using a fixed virtual address range that spans a single page and remaps that page repeatedly when memcpy_real() is called for a particular physical address. Reviewed-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
parent
14a3a26242
commit
2f0e8aae26
@ -17,6 +17,7 @@
|
||||
|
||||
unsigned long __bootdata_preserved(__kaslr_offset);
|
||||
unsigned long __bootdata_preserved(__abs_lowcore);
|
||||
unsigned long __bootdata_preserved(__memcpy_real_area);
|
||||
unsigned long __bootdata(__amode31_base);
|
||||
unsigned long __bootdata_preserved(VMALLOC_START);
|
||||
unsigned long __bootdata_preserved(VMALLOC_END);
|
||||
@ -182,7 +183,9 @@ static void setup_kernel_memory_layout(void)
|
||||
/* force vmalloc and modules below kasan shadow */
|
||||
vmax = min(vmax, KASAN_SHADOW_START);
|
||||
#endif
|
||||
__abs_lowcore = round_down(vmax - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore));
|
||||
__memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE);
|
||||
__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
|
||||
sizeof(struct lowcore));
|
||||
MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
|
||||
MODULES_VADDR = MODULES_END - MODULES_LEN;
|
||||
VMALLOC_END = MODULES_VADDR;
|
||||
|
14
arch/s390/include/asm/maccess.h
Normal file
14
arch/s390/include/asm/maccess.h
Normal file
@ -0,0 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __ASM_S390_MACCESS_H
|
||||
#define __ASM_S390_MACCESS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct iov_iter;
|
||||
|
||||
extern unsigned long __memcpy_real_area;
|
||||
void memcpy_real_init(void);
|
||||
size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
|
||||
int memcpy_real(void *dest, unsigned long src, size_t count);
|
||||
|
||||
#endif /* __ASM_S390_MACCESS_H */
|
@ -1780,6 +1780,7 @@ extern void vmem_remove_mapping(unsigned long start, unsigned long size);
|
||||
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
|
||||
extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
|
||||
extern void vmem_unmap_4k_page(unsigned long addr);
|
||||
extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
|
||||
extern int s390_enable_sie(void);
|
||||
extern int s390_enable_skey(void);
|
||||
extern void s390_reset_cmma(struct mm_struct *mm);
|
||||
|
@ -306,8 +306,6 @@ static __always_inline void __noreturn disabled_wait(void)
|
||||
|
||||
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
|
||||
|
||||
extern int memcpy_real(void *, unsigned long, size_t);
|
||||
|
||||
extern int s390_isolate_bp(void);
|
||||
extern int s390_isolate_bp_guest(void);
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <asm/elf.h>
|
||||
#include <asm/ipl.h>
|
||||
#include <asm/sclp.h>
|
||||
#include <asm/maccess.h>
|
||||
|
||||
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
|
||||
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
|
||||
@ -53,8 +54,6 @@ struct save_area {
|
||||
};
|
||||
|
||||
static LIST_HEAD(dump_save_areas);
|
||||
static DEFINE_MUTEX(memcpy_real_mutex);
|
||||
static char memcpy_real_buf[PAGE_SIZE];
|
||||
|
||||
/*
|
||||
* Allocate a save area
|
||||
@ -116,26 +115,6 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
|
||||
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
|
||||
}
|
||||
|
||||
static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count)
|
||||
{
|
||||
size_t len, copied, res = 0;
|
||||
|
||||
mutex_lock(&memcpy_real_mutex);
|
||||
while (count) {
|
||||
len = min(PAGE_SIZE, count);
|
||||
if (memcpy_real(memcpy_real_buf, src, len))
|
||||
break;
|
||||
copied = copy_to_iter(memcpy_real_buf, len, iter);
|
||||
count -= copied;
|
||||
src += copied;
|
||||
res += copied;
|
||||
if (copied < len)
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&memcpy_real_mutex);
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
|
||||
{
|
||||
size_t len, copied, res = 0;
|
||||
@ -156,7 +135,7 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
|
||||
} else {
|
||||
len = count;
|
||||
}
|
||||
copied = copy_to_iter_real(iter, src, len);
|
||||
copied = memcpy_real_iter(iter, src, len);
|
||||
}
|
||||
count -= copied;
|
||||
src += copied;
|
||||
|
@ -74,6 +74,7 @@
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/mem_detect.h>
|
||||
#include <asm/maccess.h>
|
||||
#include <asm/uv.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include "entry.h"
|
||||
@ -1050,7 +1051,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
* Create kernel page tables and switch to virtual addressing.
|
||||
*/
|
||||
paging_init();
|
||||
|
||||
memcpy_real_init();
|
||||
/*
|
||||
* After paging_init created the kernel page table, the new PSWs
|
||||
* in lowcore can now run with DAT enabled.
|
||||
|
@ -12,12 +12,17 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/uio.h>
|
||||
#include <asm/asm-extable.h>
|
||||
#include <asm/ctl_reg.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/abs_lowcore.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
unsigned long __bootdata_preserved(__memcpy_real_area);
|
||||
static __ro_after_init pte_t *memcpy_real_ptep;
|
||||
static DEFINE_MUTEX(memcpy_real_mutex);
|
||||
|
||||
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
|
||||
{
|
||||
unsigned long aligned, offset, count;
|
||||
@ -77,75 +82,55 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
|
||||
return dst;
|
||||
}
|
||||
|
||||
static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
|
||||
void __init memcpy_real_init(void)
|
||||
{
|
||||
union register_pair _dst, _src;
|
||||
int rc = -EFAULT;
|
||||
|
||||
_dst.even = (unsigned long) dest;
|
||||
_dst.odd = (unsigned long) count;
|
||||
_src.even = (unsigned long) src;
|
||||
_src.odd = (unsigned long) count;
|
||||
asm volatile (
|
||||
"0: mvcle %[dst],%[src],0\n"
|
||||
"1: jo 0b\n"
|
||||
" lhi %[rc],0\n"
|
||||
"2:\n"
|
||||
EX_TABLE(1b,2b)
|
||||
: [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair)
|
||||
: : "cc", "memory");
|
||||
return rc;
|
||||
memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
|
||||
if (!memcpy_real_ptep)
|
||||
panic("Couldn't setup memcpy real area");
|
||||
}
|
||||
|
||||
static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
|
||||
unsigned long src,
|
||||
unsigned long count)
|
||||
size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
|
||||
{
|
||||
int irqs_disabled, rc;
|
||||
unsigned long flags;
|
||||
size_t len, copied, res = 0;
|
||||
unsigned long phys, offset;
|
||||
void *chunk;
|
||||
pte_t pte;
|
||||
|
||||
if (!count)
|
||||
return 0;
|
||||
flags = arch_local_irq_save();
|
||||
irqs_disabled = arch_irqs_disabled_flags(flags);
|
||||
if (!irqs_disabled)
|
||||
trace_hardirqs_off();
|
||||
__arch_local_irq_stnsm(0xf8); // disable DAT
|
||||
rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
|
||||
if (flags & PSW_MASK_DAT)
|
||||
__arch_local_irq_stosm(0x04); // enable DAT
|
||||
if (!irqs_disabled)
|
||||
trace_hardirqs_on();
|
||||
__arch_local_irq_ssm(flags);
|
||||
return rc;
|
||||
while (count) {
|
||||
phys = src & PAGE_MASK;
|
||||
offset = src & ~PAGE_MASK;
|
||||
chunk = (void *)(__memcpy_real_area + offset);
|
||||
len = min(count, PAGE_SIZE - offset);
|
||||
pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
|
||||
|
||||
mutex_lock(&memcpy_real_mutex);
|
||||
if (pte_val(pte) != pte_val(*memcpy_real_ptep)) {
|
||||
__ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL);
|
||||
set_pte(memcpy_real_ptep, pte);
|
||||
}
|
||||
copied = copy_to_iter(chunk, len, iter);
|
||||
mutex_unlock(&memcpy_real_mutex);
|
||||
|
||||
count -= copied;
|
||||
src += copied;
|
||||
res += copied;
|
||||
if (copied < len)
|
||||
break;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy memory in real mode (kernel to kernel)
|
||||
*/
|
||||
int memcpy_real(void *dest, unsigned long src, size_t count)
|
||||
{
|
||||
unsigned long _dest = (unsigned long)dest;
|
||||
unsigned long _src = (unsigned long)src;
|
||||
unsigned long _count = (unsigned long)count;
|
||||
int rc;
|
||||
struct iov_iter iter;
|
||||
struct kvec kvec;
|
||||
|
||||
if (S390_lowcore.nodat_stack != 0) {
|
||||
preempt_disable();
|
||||
rc = call_on_stack(3, S390_lowcore.nodat_stack,
|
||||
unsigned long, _memcpy_real,
|
||||
unsigned long, _dest,
|
||||
unsigned long, _src,
|
||||
unsigned long, _count);
|
||||
preempt_enable();
|
||||
return rc;
|
||||
}
|
||||
/*
|
||||
* This is a really early memcpy_real call, the stacks are
|
||||
* not set up yet. Just call _memcpy_real on the early boot
|
||||
* stack
|
||||
*/
|
||||
return _memcpy_real(_dest, _src, _count);
|
||||
kvec.iov_base = dest;
|
||||
kvec.iov_len = count;
|
||||
iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
|
||||
if (memcpy_real_iter(&iter, src, count) < count)
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -567,7 +567,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
|
||||
* while traversing is an error, since the function is expected to be
|
||||
* called against virtual regions reserverd for 4KB mappings only.
|
||||
*/
|
||||
static pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
|
||||
pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
|
||||
{
|
||||
pte_t *ptep = NULL;
|
||||
pgd_t *pgd;
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <asm/checksum.h>
|
||||
#include <asm/os_info.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/maccess.h>
|
||||
#include "sclp.h"
|
||||
|
||||
#define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x)
|
||||
|
Loading…
x
Reference in New Issue
Block a user