Revert "s390/smp: rework absolute lowcore access"

This reverts commit 7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8.

This introduced vmem_mutex locking from vmem_map_4k_page()
function called from smp_reinit_ipl_cpu() with interrupts
disabled. While it is a pre-SMP early initcall no other CPUs
running in parallel nor other code taking vmem_mutex on this
boot stage - it still needs to be fixed.

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
This commit is contained in:
Alexander Gordeev 2022-08-06 09:24:07 +02:00
parent 3fb39cb7c5
commit 5e441f61f5
14 changed files with 83 additions and 294 deletions

View File

@ -10,13 +10,11 @@
#include <asm/sclp.h>
#include <asm/diag.h>
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@ -182,8 +180,7 @@ static void setup_kernel_memory_layout(void)
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
__abs_lowcore = round_down(vmax - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore));
MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;

View File

@ -1,17 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H
#include <asm/lowcore.h>
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
extern unsigned long __abs_lowcore;
extern bool abs_lowcore_mapped;
struct lowcore *get_abs_lowcore(unsigned long *flags);
void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
int abs_lowcore_map(int cpu, struct lowcore *lc);
void abs_lowcore_unmap(int cpu);
#endif /* _ASM_ABS_S390_LOWCORE_H */

View File

@ -1781,8 +1781,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
extern void vmem_unmap_4k_page(unsigned long addr);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);

View File

@ -307,6 +307,21 @@ static __always_inline void __noreturn disabled_wait(void)
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
extern int memcpy_real(void *, unsigned long, size_t);
extern void memcpy_absolute(void *, void *, size_t);
#define put_abs_lowcore(member, x) do { \
unsigned long __abs_address = offsetof(struct lowcore, member); \
__typeof__(((struct lowcore *)0)->member) __tmp = (x); \
\
memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \
} while (0)
#define get_abs_lowcore(x, member) do { \
unsigned long __abs_address = offsetof(struct lowcore, member); \
__typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \
\
memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \
} while (0)
extern int s390_isolate_bp(void);
extern int s390_isolate_bp_guest(void);

View File

@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
obj-y += smp.o text_amode31.o stacktrace.o
extra-y += head64.o vmlinux.lds

View File

@ -1,88 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/pgtable.h>
#include <asm/pgtable.h>
#include <asm/abs_lowcore.h>
#define ABS_LOWCORE_UNMAPPED 1
#define ABS_LOWCORE_LAP_ON 2
#define ABS_LOWCORE_IRQS_ON 4
unsigned long __bootdata_preserved(__abs_lowcore);
bool __ro_after_init abs_lowcore_mapped;
int abs_lowcore_map(int cpu, struct lowcore *lc)
{
unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
unsigned long phys = __pa(lc);
int rc, i;
for (i = 0; i < LC_PAGES; i++) {
rc = vmem_map_4k_page(addr, phys, PAGE_KERNEL);
if (rc) {
for (--i; i >= 0; i--) {
addr -= PAGE_SIZE;
vmem_unmap_4k_page(addr);
}
return rc;
}
addr += PAGE_SIZE;
phys += PAGE_SIZE;
}
return 0;
}
void abs_lowcore_unmap(int cpu)
{
unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
int i;
for (i = 0; i < LC_PAGES; i++) {
vmem_unmap_4k_page(addr);
addr += PAGE_SIZE;
}
}
struct lowcore *get_abs_lowcore(unsigned long *flags)
{
unsigned long irq_flags;
union ctlreg0 cr0;
int cpu;
*flags = 0;
cpu = get_cpu();
if (abs_lowcore_mapped) {
return ((struct lowcore *)__abs_lowcore) + cpu;
} else {
if (cpu != 0)
panic("Invalid unmapped absolute lowcore access\n");
local_irq_save(irq_flags);
if (!irqs_disabled_flags(irq_flags))
*flags |= ABS_LOWCORE_IRQS_ON;
__ctl_store(cr0.val, 0, 0);
if (cr0.lap) {
*flags |= ABS_LOWCORE_LAP_ON;
__ctl_clear_bit(0, 28);
}
*flags |= ABS_LOWCORE_UNMAPPED;
return lowcore_ptr[0];
}
}
void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
{
if (abs_lowcore_mapped) {
if (flags)
panic("Invalid mapped absolute lowcore release\n");
} else {
if (smp_processor_id() != 0)
panic("Invalid mapped absolute lowcore access\n");
if (!(flags & ABS_LOWCORE_UNMAPPED))
panic("Invalid unmapped absolute lowcore release\n");
if (flags & ABS_LOWCORE_LAP_ON)
__ctl_set_bit(0, 28);
if (flags & ABS_LOWCORE_IRQS_ON)
local_irq_enable();
}
put_cpu();
}

View File

@ -29,7 +29,6 @@
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
@ -1643,16 +1642,12 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
struct lowcore *abs_lc;
unsigned long flags;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore(&flags);
abs_lc->ipib = ipib;
abs_lc->ipib_checksum = csum;
put_abs_lowcore(abs_lc, flags);
put_abs_lowcore(ipib, ipib);
put_abs_lowcore(ipib_checksum, csum);
dump_run(trigger);
}

View File

@ -21,7 +21,6 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
@ -223,18 +222,13 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
struct lowcore *abs_lc;
unsigned long flags;
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
abs_lc = get_abs_lowcore(&flags);
abs_lc->vmcore_info = paddr_vmcoreinfo_note();
put_abs_lowcore(abs_lc, flags);
put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
}
void machine_shutdown(void)

View File

@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
#include <asm/abs_lowcore.h>
#include <asm/lowcore.h>
#include <asm/os_info.h>
#include <asm/asm-offsets.h>
@ -57,16 +57,13 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
*/
void __init os_info_init(void)
{
struct lowcore *abs_lc;
unsigned long flags;
void *ptr = &os_info;
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
abs_lc = get_abs_lowcore(&flags);
abs_lc->os_info = __pa(&os_info);
put_abs_lowcore(abs_lc, flags);
put_abs_lowcore(os_info, __pa(ptr));
}
#ifdef CONFIG_CRASH_DUMP

View File

@ -58,7 +58,7 @@
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
#include <asm/abs_lowcore.h>
#include <asm/lowcore.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
@ -411,9 +411,8 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void)
{
unsigned long int_psw_mask = PSW_KERNEL_BITS;
struct lowcore *abs_lc, *lc;
unsigned long mcck_stack;
unsigned long flags;
struct lowcore *lc;
if (IS_ENABLED(CONFIG_KASAN))
int_psw_mask |= PSW_MASK_DAT;
@ -475,13 +474,11 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0;
lc->restart_source = -1U;
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_stack = lc->restart_stack;
abs_lc->restart_fn = lc->restart_fn;
abs_lc->restart_data = lc->restart_data;
abs_lc->restart_source = lc->restart_source;
abs_lc->restart_psw = lc->restart_psw;
put_abs_lowcore(abs_lc, flags);
put_abs_lowcore(restart_stack, lc->restart_stack);
put_abs_lowcore(restart_fn, lc->restart_fn);
put_abs_lowcore(restart_data, lc->restart_data);
put_abs_lowcore(restart_source, lc->restart_source);
put_abs_lowcore(restart_psw, lc->restart_psw);
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
@ -502,8 +499,8 @@ static void __init setup_lowcore_dat_off(void)
static void __init setup_lowcore_dat_on(void)
{
struct lowcore *abs_lc;
unsigned long flags;
struct lowcore *lc = lowcore_ptr[0];
int cr;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
@ -512,15 +509,10 @@ static void __init setup_lowcore_dat_on(void)
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
__ctl_set_bit(0, 28);
__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
abs_lc->program_new_psw = S390_lowcore.program_new_psw;
memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
sizeof(abs_lc->cregs_save_area));
put_abs_lowcore(abs_lc, flags);
if (abs_lowcore_map(0, lowcore_ptr[0]))
panic("Couldn't setup absolute lowcore");
abs_lowcore_mapped = true;
put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
put_abs_lowcore(program_new_psw, lc->program_new_psw);
for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
}
static struct resource code_resource = {

View File

@ -45,7 +45,7 @@
#include <asm/irq.h>
#include <asm/tlbflush.h>
#include <asm/vtimer.h>
#include <asm/abs_lowcore.h>
#include <asm/lowcore.h>
#include <asm/sclp.h>
#include <asm/debug.h>
#include <asm/os_info.h>
@ -212,14 +212,10 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->preempt_count = PREEMPT_DISABLED;
if (nmi_alloc_mcesa(&lc->mcesad))
goto out;
if (abs_lowcore_map(cpu, lc))
goto out_mcesa;
lowcore_ptr[cpu] = lc;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
return 0;
out_mcesa:
nmi_free_mcesa(&lc->mcesad);
out:
stack_free(mcck_stack);
stack_free(async_stack);
@ -241,7 +237,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[cpu] = NULL;
abs_lowcore_unmap(cpu);
nmi_free_mcesa(&lc->mcesad);
stack_free(async_stack);
stack_free(mcck_stack);
@ -320,12 +315,9 @@ static void pcpu_delegate(struct pcpu *pcpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
unsigned long flags;
struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
unsigned int source_cpu = stap();
lc = lowcore_ptr[pcpu - pcpu_devices];
source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
@ -340,12 +332,10 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc->restart_data = (unsigned long)data;
lc->restart_source = source_cpu;
} else {
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_stack = stack;
abs_lc->restart_fn = (unsigned long)func;
abs_lc->restart_data = (unsigned long)data;
abs_lc->restart_source = source_cpu;
put_abs_lowcore(abs_lc, flags);
put_abs_lowcore(restart_stack, stack);
put_abs_lowcore(restart_fn, (unsigned long)func);
put_abs_lowcore(restart_data, (unsigned long)data);
put_abs_lowcore(restart_source, source_cpu);
}
__bpon();
asm volatile(
@ -591,8 +581,6 @@ static DEFINE_SPINLOCK(ctl_lock);
void smp_ctl_set_clear_bit(int cr, int bit, bool set)
{
struct ec_creg_mask_parms parms = { .cr = cr, };
struct lowcore *abs_lc;
unsigned long flags;
u64 ctlreg;
if (set) {
@ -603,11 +591,9 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
parms.andval = ~(1UL << bit);
}
spin_lock(&ctl_lock);
abs_lc = get_abs_lowcore(&flags);
ctlreg = abs_lc->cregs_save_area[cr];
get_abs_lowcore(ctlreg, cregs_save_area[cr]);
ctlreg = (ctlreg & parms.andval) | parms.orval;
abs_lc->cregs_save_area[cr] = ctlreg;
put_abs_lowcore(abs_lc, flags);
put_abs_lowcore(cregs_save_area[cr], ctlreg);
spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
@ -1295,8 +1281,6 @@ static int __init smp_reinit_ipl_cpu(void)
__ctl_clear_bit(0, 28); /* disable lowcore protection */
S390_lowcore.mcesad = mcesad;
__ctl_load(cr0, 0, 0);
if (abs_lowcore_map(0, lc))
panic("Couldn't remap absolute lowcore");
lowcore_ptr[0] = lc;
local_mcck_enable();
local_irq_restore(flags);

View File

@ -38,7 +38,7 @@
#include <asm/kfence.h>
#include <asm/ptdump.h>
#include <asm/dma.h>
#include <asm/abs_lowcore.h>
#include <asm/lowcore.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>

View File

@ -15,7 +15,6 @@
#include <asm/asm-extable.h>
#include <asm/ctl_reg.h>
#include <asm/io.h>
#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
@ -149,20 +148,46 @@ int memcpy_real(void *dest, unsigned long src, size_t count)
}
/*
* Find CPU that owns swapped prefix page
* Copy memory in absolute mode (kernel to kernel)
*/
static int get_swapped_owner(phys_addr_t addr)
void memcpy_absolute(void *dest, void *src, size_t count)
{
unsigned long cr0, flags, prefix;
flags = arch_local_irq_save();
__ctl_store(cr0, 0, 0);
__ctl_clear_bit(0, 28); /* disable lowcore protection */
prefix = store_prefix();
if (prefix) {
local_mcck_disable();
set_prefix(0);
memcpy(dest, src, count);
set_prefix(prefix);
local_mcck_enable();
} else {
memcpy(dest, src, count);
}
__ctl_load(cr0, 0, 0);
arch_local_irq_restore(flags);
}
/*
* Check if physical address is within prefix or zero page
*/
static int is_swapped(phys_addr_t addr)
{
phys_addr_t lc;
int cpu;
if (addr < sizeof(struct lowcore))
return 1;
for_each_online_cpu(cpu) {
lc = virt_to_phys(lowcore_ptr[cpu]);
if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
continue;
return cpu;
return 1;
}
return -1;
return 0;
}
/*
@ -175,35 +200,17 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
{
void *ptr = phys_to_virt(addr);
void *bounce = ptr;
struct lowcore *abs_lc;
unsigned long flags;
unsigned long size;
int this_cpu, cpu;
cpus_read_lock();
this_cpu = get_cpu();
if (addr >= sizeof(struct lowcore)) {
cpu = get_swapped_owner(addr);
if (cpu < 0)
goto out;
}
bounce = (void *)__get_free_page(GFP_ATOMIC);
if (!bounce)
goto out;
preempt_disable();
if (is_swapped(addr)) {
size = PAGE_SIZE - (addr & ~PAGE_MASK);
if (addr < sizeof(struct lowcore)) {
abs_lc = get_abs_lowcore(&flags);
ptr = (void *)abs_lc + addr;
memcpy(bounce, ptr, size);
put_abs_lowcore(abs_lc, flags);
} else if (cpu == this_cpu) {
ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
memcpy(bounce, ptr, size);
} else {
memcpy(bounce, ptr, size);
bounce = (void *) __get_free_page(GFP_ATOMIC);
if (bounce)
memcpy_absolute(bounce, ptr, size);
}
out:
put_cpu();
preempt_enable();
cpus_read_unlock();
return bounce;
}

View File

@ -560,91 +560,6 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
return ret;
}
/*
* Allocate new or return existing page-table entry, but do not map it
* to any physical address. If missing, allocate segment- and region-
* table entries along. Meeting a large segment- or region-table entry
* while traversing is an error, since the function is expected to be
* called against virtual regions reserverd for 4KB mappings only.
*/
static pte_t *vmem_get_alloc_pte(unsigned long addr)
{
pte_t *ptep = NULL;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset_k(addr);
if (pgd_none(*pgd)) {
p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!p4d)
goto out;
pgd_populate(&init_mm, pgd, p4d);
}
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!pud)
goto out;
p4d_populate(&init_mm, p4d, pud);
}
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pmd)
goto out;
pud_populate(&init_mm, pud, pmd);
} else if (WARN_ON_ONCE(pud_large(*pud))) {
goto out;
}
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
pte = vmem_pte_alloc();
if (!pte)
goto out;
pmd_populate(&init_mm, pmd, pte);
} else if (WARN_ON_ONCE(pmd_large(*pmd))) {
goto out;
}
ptep = pte_offset_kernel(pmd, addr);
out:
return ptep;
}
int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
{
pte_t *ptep, pte;
int rc = 0;
if (!IS_ALIGNED(addr, PAGE_SIZE))
return -EINVAL;
mutex_lock(&vmem_mutex);
ptep = vmem_get_alloc_pte(addr);
if (!ptep) {
rc = -ENOMEM;
goto out;
}
__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
pte = mk_pte_phys(phys, prot);
set_pte(ptep, pte);
out:
mutex_unlock(&vmem_mutex);
return rc;
}
void vmem_unmap_4k_page(unsigned long addr)
{
pte_t *ptep;
mutex_lock(&vmem_mutex);
ptep = virt_to_kpte(addr);
__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
pte_clear(&init_mm, addr, ptep);
mutex_unlock(&vmem_mutex);
}
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug