more s390 updates for 6.7 merge window
- Get rid of s390 specific use of two PTEs per 4KB page with complex half-used pages tracking. Using full 4KB pages for 2KB PTEs increases the memory footprint of page tables but drastically simplify mm code, removing a common blocker for common code changes and adaptations - Simplify and rework "cmma no-dat" handling. This is a follow up for recent fixes which prevent potential incorrect guest TLB flushes - Add perf user stack unwinding as well as USER_STACKTRACE support for user space built with -mbackchain compile option - Add few missing conversion from tlb_remove_table to tlb_remove_ptdesc - Fix crypto cards vanishing in a secure execution environment due to asynchronous errors - Avoid reporting crypto cards or queues in check-stop state as online - Fix null-ptr deference in AP bus code triggered by early config change via SCLP - Couple of stability improvements in AP queue interrupt handling -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEE3QHqV+H2a8xAv27vjYWKoQLXFBgFAmVL7MMACgkQjYWKoQLX FBiQ0gf9GQDlZiMXpwTTgPWuaj8lD5D25k+mXdH7JNooguswTtWwQfjt4prVmMic AmLT+FNlSzGf4ZHTjaOSc6MW1g4ILxe6Uu7WScGnUgSpfoXAc2k8ECll1DFIA7nA eqezMPD28Kl9OJn/5jRAwg5bn9m3Rsh48Kdp+re6eiZ75o+JXpyXtXpHvWkxYpQc IUiAuBRV8a7NdovlfJmgDPhn3x0cwyHxyEMv/smOZCMujMH+6JNN0ob7emL+NQ/R oNbDwkolCb8+rHcMomFGvqzUk9cT9ZGlQZe1bXUtgODeKDCEbI/Xzs5ulS0rQrGT ItXK4vYBLd9vJZnZjSp5ISiq8RDXzQ== =KFS4 -----END PGP SIGNATURE----- Merge tag 's390-6.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux Pull more s390 updates from Vasily Gorbik: - Get rid of s390 specific use of two PTEs per 4KB page with complex half-used pages tracking. Using full 4KB pages for 2KB PTEs increases the memory footprint of page tables but drastically simplify mm code, removing a common blocker for common code changes and adaptations - Simplify and rework "cmma no-dat" handling. This is a follow up for recent fixes which prevent potential incorrect guest TLB flushes - Add perf user stack unwinding as well as USER_STACKTRACE support for user space built with -mbackchain compile option - Add few missing conversion from tlb_remove_table to tlb_remove_ptdesc - Fix crypto cards vanishing in a secure execution environment due to asynchronous errors - Avoid reporting crypto cards or queues in check-stop state as online - Fix null-ptr deference in AP bus code triggered by early config change via SCLP - Couple of stability improvements in AP queue interrupt handling * tag 's390-6.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: s390/mm: make pte_free_tlb() similar to pXd_free_tlb() s390/mm: use compound page order to distinguish page tables s390/mm: use full 4KB page for 2KB PTE s390/cmma: rework no-dat handling s390/cmma: move arch_set_page_dat() to header file s390/cmma: move set_page_stable() and friends to header file s390/cmma: move parsing of cmma kernel parameter to early boot code s390/cmma: cleanup inline assemblies s390/ap: fix vanishing crypto cards in SE environment s390/zcrypt: don't report online if card or queue is in check-stop state s390: add USER_STACKTRACE support s390/perf: implement perf_callchain_user() s390/ap: fix AP bus crash on early config change callback invocation s390/ap: re-enable interrupt for AP queues s390/ap: rework to use irq info from ap queue status s390/mm: add missing conversion to use ptdescs
This commit is contained in:
commit
1995a53670
@ -236,6 +236,7 @@ config S390
|
||||
select THREAD_INFO_IN_TASK
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select TTY
|
||||
select USER_STACKTRACE_SUPPORT
|
||||
select VIRT_CPU_ACCOUNTING
|
||||
select ZONE_DMA
|
||||
# Note: keep the above list sorted alphabetically
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/ebcdic.h>
|
||||
#include <asm/sclp.h>
|
||||
#include <asm/sections.h>
|
||||
@ -24,6 +25,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
|
||||
struct ipl_parameter_block __bootdata_preserved(ipl_block);
|
||||
int __bootdata_preserved(ipl_block_valid);
|
||||
int __bootdata_preserved(__kaslr_enabled);
|
||||
int __bootdata_preserved(cmma_flag) = 1;
|
||||
|
||||
unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
|
||||
unsigned long memory_limit;
|
||||
@ -295,6 +297,12 @@ void parse_boot_command_line(void)
|
||||
if (!strcmp(param, "nokaslr"))
|
||||
__kaslr_enabled = 0;
|
||||
|
||||
if (!strcmp(param, "cmma")) {
|
||||
rc = kstrtobool(val, &enabled);
|
||||
if (!rc && !enabled)
|
||||
cmma_flag = 0;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM)
|
||||
if (!strcmp(param, "prot_virt")) {
|
||||
rc = kstrtobool(val, &enabled);
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/string.h>
|
||||
#include <linux/elf.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/boot_data.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/maccess.h>
|
||||
@ -57,6 +58,48 @@ static void detect_facilities(void)
|
||||
machine.has_nx = 1;
|
||||
}
|
||||
|
||||
static int cmma_test_essa(void)
|
||||
{
|
||||
unsigned long reg1, reg2, tmp = 0;
|
||||
int rc = 1;
|
||||
psw_t old;
|
||||
|
||||
/* Test ESSA_GET_STATE */
|
||||
asm volatile(
|
||||
" mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
|
||||
" epsw %[reg1],%[reg2]\n"
|
||||
" st %[reg1],0(%[psw_pgm])\n"
|
||||
" st %[reg2],4(%[psw_pgm])\n"
|
||||
" larl %[reg1],1f\n"
|
||||
" stg %[reg1],8(%[psw_pgm])\n"
|
||||
" .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
|
||||
" la %[rc],0\n"
|
||||
"1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
|
||||
: [reg1] "=&d" (reg1),
|
||||
[reg2] "=&a" (reg2),
|
||||
[rc] "+&d" (rc),
|
||||
[tmp] "=&d" (tmp),
|
||||
"+Q" (S390_lowcore.program_new_psw),
|
||||
"=Q" (old)
|
||||
: [psw_old] "a" (&old),
|
||||
[psw_pgm] "a" (&S390_lowcore.program_new_psw),
|
||||
[cmd] "i" (ESSA_GET_STATE)
|
||||
: "cc", "memory");
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void cmma_init(void)
|
||||
{
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
if (cmma_test_essa()) {
|
||||
cmma_flag = 0;
|
||||
return;
|
||||
}
|
||||
if (test_facility(147))
|
||||
cmma_flag = 2;
|
||||
}
|
||||
|
||||
static void setup_lpp(void)
|
||||
{
|
||||
S390_lowcore.current_pid = 0;
|
||||
@ -306,6 +349,7 @@ void startup_kernel(void)
|
||||
setup_boot_command_line();
|
||||
parse_boot_command_line();
|
||||
detect_facilities();
|
||||
cmma_init();
|
||||
sanitize_prot_virt_host();
|
||||
max_physmem_end = detect_max_physmem_end();
|
||||
setup_ident_map_size(max_physmem_end);
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/facility.h>
|
||||
#include <asm/sections.h>
|
||||
@ -70,6 +71,10 @@ static void kasan_populate_shadow(void)
|
||||
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
|
||||
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
|
||||
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
|
||||
__arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
|
||||
__arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
|
||||
__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
|
||||
__arch_set_page_dat(kasan_early_shadow_pte, 1);
|
||||
|
||||
/*
|
||||
* Current memory layout:
|
||||
@ -223,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val)
|
||||
|
||||
table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
|
||||
crst_table_init(table, val);
|
||||
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
|
||||
return table;
|
||||
}
|
||||
|
||||
@ -238,6 +244,7 @@ static pte_t *boot_pte_alloc(void)
|
||||
if (!pte_leftover) {
|
||||
pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
|
||||
pte = pte_leftover + _PAGE_TABLE_SIZE;
|
||||
__arch_set_page_dat(pte, 1);
|
||||
} else {
|
||||
pte = pte_leftover;
|
||||
pte_leftover = NULL;
|
||||
@ -418,6 +425,14 @@ void setup_vmem(unsigned long asce_limit)
|
||||
unsigned long asce_bits;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Mark whole memory as no-dat. This must be done before any
|
||||
* page tables are allocated, or kernel image builtin pages
|
||||
* are marked as dat tables.
|
||||
*/
|
||||
for_each_physmem_online_range(i, &start, &end)
|
||||
__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
|
||||
|
||||
if (asce_limit == _REGION1_SIZE) {
|
||||
asce_type = _REGION2_ENTRY_EMPTY;
|
||||
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
|
||||
@ -429,6 +444,8 @@ void setup_vmem(unsigned long asce_limit)
|
||||
|
||||
crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
|
||||
crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
|
||||
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
|
||||
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
|
||||
|
||||
/*
|
||||
* To allow prefixing the lowcore must be mapped with 4KB pages.
|
||||
|
@ -11,7 +11,6 @@ typedef struct {
|
||||
cpumask_t cpu_attach_mask;
|
||||
atomic_t flush_count;
|
||||
unsigned int flush_mm;
|
||||
struct list_head pgtable_list;
|
||||
struct list_head gmap_list;
|
||||
unsigned long gmap_asce;
|
||||
unsigned long asce;
|
||||
@ -39,7 +38,6 @@ typedef struct {
|
||||
|
||||
#define INIT_MM_CONTEXT(name) \
|
||||
.context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
|
||||
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
|
||||
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
|
||||
|
||||
#endif
|
||||
|
@ -22,7 +22,6 @@ static inline int init_new_context(struct task_struct *tsk,
|
||||
unsigned long asce_type, init_entry;
|
||||
|
||||
spin_lock_init(&mm->context.lock);
|
||||
INIT_LIST_HEAD(&mm->context.pgtable_list);
|
||||
INIT_LIST_HEAD(&mm->context.gmap_list);
|
||||
cpumask_clear(&mm->context.cpu_attach_mask);
|
||||
atomic_set(&mm->context.flush_count, 0);
|
||||
|
@ -7,6 +7,9 @@
|
||||
#ifndef PAGE_STATES_H
|
||||
#define PAGE_STATES_H
|
||||
|
||||
#include <asm/sections.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#define ESSA_GET_STATE 0
|
||||
#define ESSA_SET_STABLE 1
|
||||
#define ESSA_SET_UNUSED 2
|
||||
@ -18,4 +21,60 @@
|
||||
|
||||
#define ESSA_MAX ESSA_SET_STABLE_NODAT
|
||||
|
||||
extern int __bootdata_preserved(cmma_flag);
|
||||
|
||||
static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
|
||||
{
|
||||
unsigned long rc;
|
||||
|
||||
asm volatile(
|
||||
" .insn rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0"
|
||||
: [rc] "=d" (rc)
|
||||
: [paddr] "d" (paddr),
|
||||
[cmd] "i" (cmd));
|
||||
return rc;
|
||||
}
|
||||
|
||||
static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd)
|
||||
{
|
||||
unsigned long paddr = __pa(addr) & PAGE_MASK;
|
||||
|
||||
while (num_pages--) {
|
||||
essa(paddr, cmd);
|
||||
paddr += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __set_page_unused(void *addr, unsigned long num_pages)
|
||||
{
|
||||
__set_page_state(addr, num_pages, ESSA_SET_UNUSED);
|
||||
}
|
||||
|
||||
static inline void __set_page_stable_dat(void *addr, unsigned long num_pages)
|
||||
{
|
||||
__set_page_state(addr, num_pages, ESSA_SET_STABLE);
|
||||
}
|
||||
|
||||
static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages)
|
||||
{
|
||||
__set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT);
|
||||
}
|
||||
|
||||
static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages)
|
||||
{
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
if (cmma_flag < 2)
|
||||
__set_page_stable_dat(addr, num_pages);
|
||||
else
|
||||
__set_page_stable_nodat(addr, num_pages);
|
||||
}
|
||||
|
||||
static inline void __arch_set_page_dat(void *addr, unsigned long num_pages)
|
||||
{
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
__set_page_stable_dat(addr, num_pages);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -164,7 +164,6 @@ static inline int page_reset_referenced(unsigned long addr)
|
||||
struct page;
|
||||
void arch_free_page(struct page *page, int order);
|
||||
void arch_alloc_page(struct page *page, int order);
|
||||
void arch_set_page_dat(struct page *page, int order);
|
||||
|
||||
static inline int devmem_is_allowed(unsigned long pfn)
|
||||
{
|
||||
|
@ -25,7 +25,6 @@ void crst_table_free(struct mm_struct *, unsigned long *);
|
||||
unsigned long *page_table_alloc(struct mm_struct *);
|
||||
struct page *page_table_alloc_pgste(struct mm_struct *mm);
|
||||
void page_table_free(struct mm_struct *, unsigned long *);
|
||||
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
|
||||
void page_table_free_pgste(struct page *page);
|
||||
extern int page_table_allocate_pgste;
|
||||
|
||||
|
@ -125,9 +125,6 @@ static inline void vmcp_cma_reserve(void) { }
|
||||
|
||||
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
|
||||
|
||||
void cmma_init(void);
|
||||
void cmma_init_nodat(void);
|
||||
|
||||
extern void (*_machine_restart)(char *command);
|
||||
extern void (*_machine_halt)(void);
|
||||
extern void (*_machine_power_off)(void);
|
||||
|
@ -6,6 +6,13 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
struct stack_frame_user {
|
||||
unsigned long back_chain;
|
||||
unsigned long empty1[5];
|
||||
unsigned long gprs[10];
|
||||
unsigned long empty2[4];
|
||||
};
|
||||
|
||||
enum stack_type {
|
||||
STACK_TYPE_UNKNOWN,
|
||||
STACK_TYPE_TASK,
|
||||
|
@ -69,12 +69,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
|
||||
tlb->mm->context.flush_mm = 1;
|
||||
tlb->freed_tables = 1;
|
||||
tlb->cleared_pmds = 1;
|
||||
/*
|
||||
* page_table_free_rcu takes care of the allocation bit masks
|
||||
* of the 2K table fragments in the 4K page table page,
|
||||
* then calls tlb_remove_table.
|
||||
*/
|
||||
page_table_free_rcu(tlb, (unsigned long *) pte, address);
|
||||
if (mm_alloc_pgste(tlb->mm))
|
||||
gmap_unlink(tlb->mm, (unsigned long *)pte, address);
|
||||
tlb_remove_ptdesc(tlb, pte);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -112,7 +109,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
|
||||
__tlb_adjust_range(tlb, address, PAGE_SIZE);
|
||||
tlb->mm->context.flush_mm = 1;
|
||||
tlb->freed_tables = 1;
|
||||
tlb_remove_table(tlb, p4d);
|
||||
tlb_remove_ptdesc(tlb, p4d);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -130,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
|
||||
tlb->mm->context.flush_mm = 1;
|
||||
tlb->freed_tables = 1;
|
||||
tlb->cleared_p4ds = 1;
|
||||
tlb_remove_table(tlb, pud);
|
||||
tlb_remove_ptdesc(tlb, pud);
|
||||
}
|
||||
|
||||
|
||||
|
@ -46,6 +46,7 @@ decompressor_handled_param(vmalloc);
|
||||
decompressor_handled_param(dfltcc);
|
||||
decompressor_handled_param(facilities);
|
||||
decompressor_handled_param(nokaslr);
|
||||
decompressor_handled_param(cmma);
|
||||
#if IS_ENABLED(CONFIG_KVM)
|
||||
decompressor_handled_param(prot_virt);
|
||||
#endif
|
||||
|
@ -15,7 +15,10 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/cpu_mf.h>
|
||||
#include <asm/lowcore.h>
|
||||
@ -212,6 +215,44 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
|
||||
}
|
||||
}
|
||||
|
||||
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct stack_frame_user __user *sf;
|
||||
unsigned long ip, sp;
|
||||
bool first = true;
|
||||
|
||||
if (is_compat_task())
|
||||
return;
|
||||
perf_callchain_store(entry, instruction_pointer(regs));
|
||||
sf = (void __user *)user_stack_pointer(regs);
|
||||
pagefault_disable();
|
||||
while (entry->nr < entry->max_stack) {
|
||||
if (__get_user(sp, &sf->back_chain))
|
||||
break;
|
||||
if (__get_user(ip, &sf->gprs[8]))
|
||||
break;
|
||||
if (ip & 0x1) {
|
||||
/*
|
||||
* If the instruction address is invalid, and this
|
||||
* is the first stack frame, assume r14 has not
|
||||
* been written to the stack yet. Otherwise exit.
|
||||
*/
|
||||
if (first && !(regs->gprs[14] & 0x1))
|
||||
ip = regs->gprs[14];
|
||||
else
|
||||
break;
|
||||
}
|
||||
perf_callchain_store(entry, ip);
|
||||
/* Sanity check: ABI requires SP to be aligned 8 bytes. */
|
||||
if (!sp || sp & 0x7)
|
||||
break;
|
||||
sf = (void __user *)sp;
|
||||
first = false;
|
||||
}
|
||||
pagefault_enable();
|
||||
}
|
||||
|
||||
/* Perf definitions for PMU event attributes in sysfs */
|
||||
ssize_t cpumf_events_sysfs_show(struct device *dev,
|
||||
struct device_attribute *attr, char *page)
|
||||
|
@ -6,9 +6,12 @@
|
||||
*/
|
||||
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/compat.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/unwind.h>
|
||||
#include <asm/kprobes.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
|
||||
struct task_struct *task, struct pt_regs *regs)
|
||||
@ -58,3 +61,43 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
|
||||
const struct pt_regs *regs)
|
||||
{
|
||||
struct stack_frame_user __user *sf;
|
||||
unsigned long ip, sp;
|
||||
bool first = true;
|
||||
|
||||
if (is_compat_task())
|
||||
return;
|
||||
if (!consume_entry(cookie, instruction_pointer(regs)))
|
||||
return;
|
||||
sf = (void __user *)user_stack_pointer(regs);
|
||||
pagefault_disable();
|
||||
while (1) {
|
||||
if (__get_user(sp, &sf->back_chain))
|
||||
break;
|
||||
if (__get_user(ip, &sf->gprs[8]))
|
||||
break;
|
||||
if (ip & 0x1) {
|
||||
/*
|
||||
* If the instruction address is invalid, and this
|
||||
* is the first stack frame, assume r14 has not
|
||||
* been written to the stack yet. Otherwise exit.
|
||||
*/
|
||||
if (first && !(regs->gprs[14] & 0x1))
|
||||
ip = regs->gprs[14];
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (!consume_entry(cookie, ip))
|
||||
break;
|
||||
/* Sanity check: ABI requires SP to be aligned 8 bytes. */
|
||||
if (!sp || sp & 0x7)
|
||||
break;
|
||||
sf = (void __user *)sp;
|
||||
first = false;
|
||||
}
|
||||
pagefault_enable();
|
||||
}
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <linux/ksm.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/page.h>
|
||||
@ -33,7 +33,7 @@ static struct page *gmap_alloc_crst(void)
|
||||
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
|
||||
if (!page)
|
||||
return NULL;
|
||||
arch_set_page_dat(page, CRST_ALLOC_ORDER);
|
||||
__arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
|
||||
return page;
|
||||
}
|
||||
|
||||
|
@ -164,14 +164,10 @@ void __init mem_init(void)
|
||||
|
||||
pv_init();
|
||||
kfence_split_mapping();
|
||||
/* Setup guest page hinting */
|
||||
cmma_init();
|
||||
|
||||
/* this will put all low memory onto the freelists */
|
||||
memblock_free_all();
|
||||
setup_zero_pages(); /* Setup zeroed pages. */
|
||||
|
||||
cmma_init_nodat();
|
||||
}
|
||||
|
||||
void free_initmem(void)
|
||||
|
@ -7,212 +7,18 @@
|
||||
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/asm-extable.h>
|
||||
#include <asm/facility.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
static int cmma_flag = 1;
|
||||
|
||||
static int __init cmma(char *str)
|
||||
{
|
||||
bool enabled;
|
||||
|
||||
if (!kstrtobool(str, &enabled))
|
||||
cmma_flag = enabled;
|
||||
return 1;
|
||||
}
|
||||
__setup("cmma=", cmma);
|
||||
|
||||
static inline int cmma_test_essa(void)
|
||||
{
|
||||
unsigned long tmp = 0;
|
||||
int rc = -EOPNOTSUPP;
|
||||
|
||||
/* test ESSA_GET_STATE */
|
||||
asm volatile(
|
||||
" .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
|
||||
"0: la %[rc],0\n"
|
||||
"1:\n"
|
||||
EX_TABLE(0b,1b)
|
||||
: [rc] "+&d" (rc), [tmp] "+&d" (tmp)
|
||||
: [cmd] "i" (ESSA_GET_STATE));
|
||||
return rc;
|
||||
}
|
||||
|
||||
void __init cmma_init(void)
|
||||
{
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
if (cmma_test_essa()) {
|
||||
cmma_flag = 0;
|
||||
return;
|
||||
}
|
||||
if (test_facility(147))
|
||||
cmma_flag = 2;
|
||||
}
|
||||
|
||||
static inline void set_page_unused(struct page *page, int order)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
for (i = 0; i < (1 << order); i++)
|
||||
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
|
||||
: "=&d" (rc)
|
||||
: "a" (page_to_phys(page + i)),
|
||||
"i" (ESSA_SET_UNUSED));
|
||||
}
|
||||
|
||||
static inline void set_page_stable_dat(struct page *page, int order)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
for (i = 0; i < (1 << order); i++)
|
||||
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
|
||||
: "=&d" (rc)
|
||||
: "a" (page_to_phys(page + i)),
|
||||
"i" (ESSA_SET_STABLE));
|
||||
}
|
||||
|
||||
static inline void set_page_stable_nodat(struct page *page, int order)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
for (i = 0; i < (1 << order); i++)
|
||||
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
|
||||
: "=&d" (rc)
|
||||
: "a" (page_to_phys(page + i)),
|
||||
"i" (ESSA_SET_STABLE_NODAT));
|
||||
}
|
||||
|
||||
static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
struct page *page;
|
||||
pmd_t *pmd;
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
do {
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none(*pmd) || pmd_large(*pmd))
|
||||
continue;
|
||||
page = phys_to_page(pmd_val(*pmd));
|
||||
set_bit(PG_arch_1, &page->flags);
|
||||
} while (pmd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
struct page *page;
|
||||
pud_t *pud;
|
||||
int i;
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
do {
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(*pud) || pud_large(*pud))
|
||||
continue;
|
||||
if (!pud_folded(*pud)) {
|
||||
page = phys_to_page(pud_val(*pud));
|
||||
for (i = 0; i < 4; i++)
|
||||
set_bit(PG_arch_1, &page[i].flags);
|
||||
}
|
||||
mark_kernel_pmd(pud, addr, next);
|
||||
} while (pud++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
struct page *page;
|
||||
p4d_t *p4d;
|
||||
int i;
|
||||
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
if (p4d_none(*p4d))
|
||||
continue;
|
||||
if (!p4d_folded(*p4d)) {
|
||||
page = phys_to_page(p4d_val(*p4d));
|
||||
for (i = 0; i < 4; i++)
|
||||
set_bit(PG_arch_1, &page[i].flags);
|
||||
}
|
||||
mark_kernel_pud(p4d, addr, next);
|
||||
} while (p4d++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void mark_kernel_pgd(void)
|
||||
{
|
||||
unsigned long addr, next, max_addr;
|
||||
struct page *page;
|
||||
pgd_t *pgd;
|
||||
int i;
|
||||
|
||||
addr = 0;
|
||||
/*
|
||||
* Figure out maximum virtual address accessible with the
|
||||
* kernel ASCE. This is required to keep the page table walker
|
||||
* from accessing non-existent entries.
|
||||
*/
|
||||
max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2;
|
||||
max_addr = 1UL << (max_addr * 11 + 31);
|
||||
pgd = pgd_offset_k(addr);
|
||||
do {
|
||||
next = pgd_addr_end(addr, max_addr);
|
||||
if (pgd_none(*pgd))
|
||||
continue;
|
||||
if (!pgd_folded(*pgd)) {
|
||||
page = phys_to_page(pgd_val(*pgd));
|
||||
for (i = 0; i < 4; i++)
|
||||
set_bit(PG_arch_1, &page[i].flags);
|
||||
}
|
||||
mark_kernel_p4d(pgd, addr, next);
|
||||
} while (pgd++, addr = next, addr != max_addr);
|
||||
}
|
||||
|
||||
void __init cmma_init_nodat(void)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long start, end, ix;
|
||||
int i;
|
||||
|
||||
if (cmma_flag < 2)
|
||||
return;
|
||||
/* Mark pages used in kernel page tables */
|
||||
mark_kernel_pgd();
|
||||
page = virt_to_page(&swapper_pg_dir);
|
||||
for (i = 0; i < 4; i++)
|
||||
set_bit(PG_arch_1, &page[i].flags);
|
||||
page = virt_to_page(&invalid_pg_dir);
|
||||
for (i = 0; i < 4; i++)
|
||||
set_bit(PG_arch_1, &page[i].flags);
|
||||
|
||||
/* Set all kernel pages not used for page tables to stable/no-dat */
|
||||
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
|
||||
page = pfn_to_page(start);
|
||||
for (ix = start; ix < end; ix++, page++) {
|
||||
if (__test_and_clear_bit(PG_arch_1, &page->flags))
|
||||
continue; /* skip page table pages */
|
||||
if (!list_empty(&page->lru))
|
||||
continue; /* skip free pages */
|
||||
set_page_stable_nodat(page, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
int __bootdata_preserved(cmma_flag);
|
||||
|
||||
void arch_free_page(struct page *page, int order)
|
||||
{
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
set_page_unused(page, order);
|
||||
__set_page_unused(page_to_virt(page), 1UL << order);
|
||||
}
|
||||
|
||||
void arch_alloc_page(struct page *page, int order)
|
||||
@ -220,14 +26,7 @@ void arch_alloc_page(struct page *page, int order)
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
if (cmma_flag < 2)
|
||||
set_page_stable_dat(page, order);
|
||||
__set_page_stable_dat(page_to_virt(page), 1UL << order);
|
||||
else
|
||||
set_page_stable_nodat(page, order);
|
||||
}
|
||||
|
||||
void arch_set_page_dat(struct page *page, int order)
|
||||
{
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
set_page_stable_dat(page, order);
|
||||
__set_page_stable_nodat(page_to_virt(page), 1UL << order);
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/tlb.h>
|
||||
@ -43,11 +44,13 @@ __initcall(page_table_register_sysctl);
|
||||
unsigned long *crst_table_alloc(struct mm_struct *mm)
|
||||
{
|
||||
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
|
||||
unsigned long *table;
|
||||
|
||||
if (!ptdesc)
|
||||
return NULL;
|
||||
arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER);
|
||||
return (unsigned long *) ptdesc_to_virt(ptdesc);
|
||||
table = ptdesc_to_virt(ptdesc);
|
||||
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
|
||||
return table;
|
||||
}
|
||||
|
||||
void crst_table_free(struct mm_struct *mm, unsigned long *table)
|
||||
@ -130,11 +133,6 @@ err_p4d:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
|
||||
{
|
||||
return atomic_fetch_xor(bits, v) ^ bits;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PGSTE
|
||||
|
||||
struct page *page_table_alloc_pgste(struct mm_struct *mm)
|
||||
@ -145,7 +143,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
|
||||
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
|
||||
if (ptdesc) {
|
||||
table = (u64 *)ptdesc_to_virt(ptdesc);
|
||||
arch_set_page_dat(virt_to_page(table), 0);
|
||||
__arch_set_page_dat(table, 1);
|
||||
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
|
||||
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
|
||||
}
|
||||
@ -159,125 +157,11 @@ void page_table_free_pgste(struct page *page)
|
||||
|
||||
#endif /* CONFIG_PGSTE */
|
||||
|
||||
/*
|
||||
* A 2KB-pgtable is either upper or lower half of a normal page.
|
||||
* The second half of the page may be unused or used as another
|
||||
* 2KB-pgtable.
|
||||
*
|
||||
* Whenever possible the parent page for a new 2KB-pgtable is picked
|
||||
* from the list of partially allocated pages mm_context_t::pgtable_list.
|
||||
* In case the list is empty a new parent page is allocated and added to
|
||||
* the list.
|
||||
*
|
||||
* When a parent page gets fully allocated it contains 2KB-pgtables in both
|
||||
* upper and lower halves and is removed from mm_context_t::pgtable_list.
|
||||
*
|
||||
* When 2KB-pgtable is freed from to fully allocated parent page that
|
||||
* page turns partially allocated and added to mm_context_t::pgtable_list.
|
||||
*
|
||||
* If 2KB-pgtable is freed from the partially allocated parent page that
|
||||
* page turns unused and gets removed from mm_context_t::pgtable_list.
|
||||
* Furthermore, the unused parent page is released.
|
||||
*
|
||||
* As follows from the above, no unallocated or fully allocated parent
|
||||
* pages are contained in mm_context_t::pgtable_list.
|
||||
*
|
||||
* The upper byte (bits 24-31) of the parent page _refcount is used
|
||||
* for tracking contained 2KB-pgtables and has the following format:
|
||||
*
|
||||
* PP AA
|
||||
* 01234567 upper byte (bits 24-31) of struct page::_refcount
|
||||
* || ||
|
||||
* || |+--- upper 2KB-pgtable is allocated
|
||||
* || +---- lower 2KB-pgtable is allocated
|
||||
* |+------- upper 2KB-pgtable is pending for removal
|
||||
* +-------- lower 2KB-pgtable is pending for removal
|
||||
*
|
||||
* (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
|
||||
* using _refcount is possible).
|
||||
*
|
||||
* When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
|
||||
* The parent page is either:
|
||||
* - added to mm_context_t::pgtable_list in case the second half of the
|
||||
* parent page is still unallocated;
|
||||
* - removed from mm_context_t::pgtable_list in case both hales of the
|
||||
* parent page are allocated;
|
||||
* These operations are protected with mm_context_t::lock.
|
||||
*
|
||||
* When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
|
||||
* and the corresponding PP bit is set to 1 in a single atomic operation.
|
||||
* Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
|
||||
* exclusive and may never be both set to 1!
|
||||
* The parent page is either:
|
||||
* - added to mm_context_t::pgtable_list in case the second half of the
|
||||
* parent page is still allocated;
|
||||
* - removed from mm_context_t::pgtable_list in case the second half of
|
||||
* the parent page is unallocated;
|
||||
* These operations are protected with mm_context_t::lock.
|
||||
*
|
||||
* It is important to understand that mm_context_t::lock only protects
|
||||
* mm_context_t::pgtable_list and AA bits, but not the parent page itself
|
||||
* and PP bits.
|
||||
*
|
||||
* Releasing the parent page happens whenever the PP bit turns from 1 to 0,
|
||||
* while both AA bits and the second PP bit are already unset. Then the
|
||||
* parent page does not contain any 2KB-pgtable fragment anymore, and it has
|
||||
* also been removed from mm_context_t::pgtable_list. It is safe to release
|
||||
* the page therefore.
|
||||
*
|
||||
* PGSTE memory spaces use full 4KB-pgtables and do not need most of the
|
||||
* logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
|
||||
* while the PP bits are never used, nor such a page is added to or removed
|
||||
* from mm_context_t::pgtable_list.
|
||||
*
|
||||
* pte_free_defer() overrides those rules: it takes the page off pgtable_list,
|
||||
* and prevents both 2K fragments from being reused. pte_free_defer() has to
|
||||
* guarantee that its pgtable cannot be reused before the RCU grace period
|
||||
* has elapsed (which page_table_free_rcu() does not actually guarantee).
|
||||
* But for simplicity, because page->rcu_head overlays page->lru, and because
|
||||
* the RCU callback might not be called before the mm_context_t has been freed,
|
||||
* pte_free_defer() in this implementation prevents both fragments from being
|
||||
* reused, and delays making the call to RCU until both fragments are freed.
|
||||
*/
|
||||
unsigned long *page_table_alloc(struct mm_struct *mm)
|
||||
{
|
||||
unsigned long *table;
|
||||
struct ptdesc *ptdesc;
|
||||
unsigned int mask, bit;
|
||||
unsigned long *table;
|
||||
|
||||
/* Try to get a fragment of a 4K page as a 2K page table */
|
||||
if (!mm_alloc_pgste(mm)) {
|
||||
table = NULL;
|
||||
spin_lock_bh(&mm->context.lock);
|
||||
if (!list_empty(&mm->context.pgtable_list)) {
|
||||
ptdesc = list_first_entry(&mm->context.pgtable_list,
|
||||
struct ptdesc, pt_list);
|
||||
mask = atomic_read(&ptdesc->_refcount) >> 24;
|
||||
/*
|
||||
* The pending removal bits must also be checked.
|
||||
* Failure to do so might lead to an impossible
|
||||
* value of (i.e 0x13 or 0x23) written to _refcount.
|
||||
* Such values violate the assumption that pending and
|
||||
* allocation bits are mutually exclusive, and the rest
|
||||
* of the code unrails as result. That could lead to
|
||||
* a whole bunch of races and corruptions.
|
||||
*/
|
||||
mask = (mask | (mask >> 4)) & 0x03U;
|
||||
if (mask != 0x03U) {
|
||||
table = (unsigned long *) ptdesc_to_virt(ptdesc);
|
||||
bit = mask & 1; /* =1 -> second 2K */
|
||||
if (bit)
|
||||
table += PTRS_PER_PTE;
|
||||
atomic_xor_bits(&ptdesc->_refcount,
|
||||
0x01U << (bit + 24));
|
||||
list_del_init(&ptdesc->pt_list);
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&mm->context.lock);
|
||||
if (table)
|
||||
return table;
|
||||
}
|
||||
/* Allocate a fresh page */
|
||||
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
|
||||
if (!ptdesc)
|
||||
return NULL;
|
||||
@ -285,177 +169,57 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
|
||||
pagetable_free(ptdesc);
|
||||
return NULL;
|
||||
}
|
||||
arch_set_page_dat(ptdesc_page(ptdesc), 0);
|
||||
/* Initialize page table */
|
||||
table = (unsigned long *) ptdesc_to_virt(ptdesc);
|
||||
if (mm_alloc_pgste(mm)) {
|
||||
/* Return 4K page table with PGSTEs */
|
||||
INIT_LIST_HEAD(&ptdesc->pt_list);
|
||||
atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
|
||||
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
|
||||
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
|
||||
} else {
|
||||
/* Return the first 2K fragment of the page */
|
||||
atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24);
|
||||
memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
|
||||
spin_lock_bh(&mm->context.lock);
|
||||
list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
|
||||
spin_unlock_bh(&mm->context.lock);
|
||||
}
|
||||
table = ptdesc_to_virt(ptdesc);
|
||||
__arch_set_page_dat(table, 1);
|
||||
/* pt_list is used by gmap only */
|
||||
INIT_LIST_HEAD(&ptdesc->pt_list);
|
||||
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
|
||||
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
|
||||
return table;
|
||||
}
|
||||
|
||||
static void page_table_release_check(struct page *page, void *table,
|
||||
unsigned int half, unsigned int mask)
|
||||
static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
|
||||
{
|
||||
char msg[128];
|
||||
|
||||
if (!IS_ENABLED(CONFIG_DEBUG_VM))
|
||||
return;
|
||||
if (!mask && list_empty(&page->lru))
|
||||
return;
|
||||
snprintf(msg, sizeof(msg),
|
||||
"Invalid pgtable %p release half 0x%02x mask 0x%02x",
|
||||
table, half, mask);
|
||||
dump_page(page, msg);
|
||||
}
|
||||
|
||||
static void pte_free_now(struct rcu_head *head)
|
||||
{
|
||||
struct ptdesc *ptdesc;
|
||||
|
||||
ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
|
||||
pagetable_pte_dtor(ptdesc);
|
||||
pagetable_free(ptdesc);
|
||||
}
|
||||
|
||||
void page_table_free(struct mm_struct *mm, unsigned long *table)
|
||||
{
|
||||
unsigned int mask, bit, half;
|
||||
struct ptdesc *ptdesc = virt_to_ptdesc(table);
|
||||
|
||||
if (!mm_alloc_pgste(mm)) {
|
||||
/* Free 2K page table fragment of a 4K page */
|
||||
bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
|
||||
spin_lock_bh(&mm->context.lock);
|
||||
/*
|
||||
* Mark the page for delayed release. The actual release
|
||||
* will happen outside of the critical section from this
|
||||
* function or from __tlb_remove_table()
|
||||
*/
|
||||
mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
|
||||
mask >>= 24;
|
||||
if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
|
||||
/*
|
||||
* Other half is allocated, and neither half has had
|
||||
* its free deferred: add page to head of list, to make
|
||||
* this freed half available for immediate reuse.
|
||||
*/
|
||||
list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
|
||||
} else {
|
||||
/* If page is on list, now remove it. */
|
||||
list_del_init(&ptdesc->pt_list);
|
||||
}
|
||||
spin_unlock_bh(&mm->context.lock);
|
||||
mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24));
|
||||
mask >>= 24;
|
||||
if (mask != 0x00U)
|
||||
return;
|
||||
half = 0x01U << bit;
|
||||
} else {
|
||||
half = 0x03U;
|
||||
mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
|
||||
mask >>= 24;
|
||||
}
|
||||
|
||||
page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
|
||||
if (folio_test_clear_active(ptdesc_folio(ptdesc)))
|
||||
call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
|
||||
else
|
||||
pte_free_now(&ptdesc->pt_rcu_head);
|
||||
pagetable_pte_dtor_free(ptdesc);
|
||||
}
|
||||
|
||||
void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
|
||||
unsigned long vmaddr)
|
||||
void __tlb_remove_table(void *table)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
unsigned int bit, mask;
|
||||
struct ptdesc *ptdesc = virt_to_ptdesc(table);
|
||||
struct page *page = ptdesc_page(ptdesc);
|
||||
|
||||
mm = tlb->mm;
|
||||
if (mm_alloc_pgste(mm)) {
|
||||
gmap_unlink(mm, table, vmaddr);
|
||||
table = (unsigned long *) ((unsigned long)table | 0x03U);
|
||||
tlb_remove_ptdesc(tlb, table);
|
||||
return;
|
||||
}
|
||||
bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
|
||||
spin_lock_bh(&mm->context.lock);
|
||||
/*
|
||||
* Mark the page for delayed release. The actual release will happen
|
||||
* outside of the critical section from __tlb_remove_table() or from
|
||||
* page_table_free()
|
||||
*/
|
||||
mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
|
||||
mask >>= 24;
|
||||
if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
|
||||
/*
|
||||
* Other half is allocated, and neither half has had
|
||||
* its free deferred: add page to end of list, to make
|
||||
* this freed half available for reuse once its pending
|
||||
* bit has been cleared by __tlb_remove_table().
|
||||
*/
|
||||
list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list);
|
||||
} else {
|
||||
/* If page is on list, now remove it. */
|
||||
list_del_init(&ptdesc->pt_list);
|
||||
}
|
||||
spin_unlock_bh(&mm->context.lock);
|
||||
table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
|
||||
tlb_remove_table(tlb, table);
|
||||
}
|
||||
|
||||
void __tlb_remove_table(void *_table)
|
||||
{
|
||||
unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
|
||||
void *table = (void *)((unsigned long) _table ^ mask);
|
||||
struct ptdesc *ptdesc = virt_to_ptdesc(table);
|
||||
|
||||
switch (half) {
|
||||
case 0x00U: /* pmd, pud, or p4d */
|
||||
if (compound_order(page) == CRST_ALLOC_ORDER) {
|
||||
/* pmd, pud, or p4d */
|
||||
pagetable_free(ptdesc);
|
||||
return;
|
||||
case 0x01U: /* lower 2K of a 4K page table */
|
||||
case 0x02U: /* higher 2K of a 4K page table */
|
||||
mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24));
|
||||
mask >>= 24;
|
||||
if (mask != 0x00U)
|
||||
return;
|
||||
break;
|
||||
case 0x03U: /* 4K page table with pgstes */
|
||||
mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
|
||||
mask >>= 24;
|
||||
break;
|
||||
}
|
||||
|
||||
page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
|
||||
if (folio_test_clear_active(ptdesc_folio(ptdesc)))
|
||||
call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
|
||||
else
|
||||
pte_free_now(&ptdesc->pt_rcu_head);
|
||||
pagetable_pte_dtor_free(ptdesc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static void pte_free_now(struct rcu_head *head)
|
||||
{
|
||||
struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
|
||||
|
||||
pagetable_pte_dtor_free(ptdesc);
|
||||
}
|
||||
|
||||
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
|
||||
{
|
||||
struct page *page;
|
||||
struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
|
||||
|
||||
page = virt_to_page(pgtable);
|
||||
SetPageActive(page);
|
||||
page_table_free(mm, (unsigned long *)pgtable);
|
||||
call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
|
||||
/*
|
||||
* page_table_free() does not do the pgste gmap_unlink() which
|
||||
* page_table_free_rcu() does: warn us if pgste ever reaches here.
|
||||
* THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
|
||||
* Turn to the generic pte_free_defer() version once gmap is removed.
|
||||
*/
|
||||
WARN_ON_ONCE(mm_has_pgste(mm));
|
||||
}
|
||||
|
@ -50,8 +50,7 @@ void *vmem_crst_alloc(unsigned long val)
|
||||
if (!table)
|
||||
return NULL;
|
||||
crst_table_init(table, val);
|
||||
if (slab_is_available())
|
||||
arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
|
||||
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
|
||||
return table;
|
||||
}
|
||||
|
||||
@ -67,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
|
||||
if (!pte)
|
||||
return NULL;
|
||||
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
|
||||
__arch_set_page_dat(pte, 1);
|
||||
return pte;
|
||||
}
|
||||
|
||||
|
@ -352,7 +352,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain);
|
||||
/*
|
||||
* ap_queue_info(): Check and get AP queue info.
|
||||
* Returns: 1 if APQN exists and info is filled,
|
||||
* 0 if APQN seems to exit but there is no info
|
||||
* 0 if APQN seems to exist but there is no info
|
||||
* available (eg. caused by an asynch pending error)
|
||||
* -1 invalid APQN, TAPQ error or AP queue status which
|
||||
* indicates there is no APQN.
|
||||
@ -373,36 +373,33 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac,
|
||||
/* call TAPQ on this APQN */
|
||||
status = ap_test_queue(qid, ap_apft_available(), &tapq_info);
|
||||
|
||||
/* handle pending async error with return 'no info available' */
|
||||
if (status.async)
|
||||
return 0;
|
||||
|
||||
switch (status.response_code) {
|
||||
case AP_RESPONSE_NORMAL:
|
||||
case AP_RESPONSE_RESET_IN_PROGRESS:
|
||||
case AP_RESPONSE_DECONFIGURED:
|
||||
case AP_RESPONSE_CHECKSTOPPED:
|
||||
case AP_RESPONSE_BUSY:
|
||||
/*
|
||||
* According to the architecture in all these cases the
|
||||
* info should be filled. All bits 0 is not possible as
|
||||
* there is at least one of the mode bits set.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!tapq_info.value))
|
||||
return 0;
|
||||
*q_type = tapq_info.at;
|
||||
*q_fac = tapq_info.fac;
|
||||
*q_depth = tapq_info.qd;
|
||||
*q_ml = tapq_info.ml;
|
||||
*q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
|
||||
*q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
|
||||
return 1;
|
||||
/* For all these RCs the tapq info should be available */
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* A response code which indicates, there is no info available.
|
||||
*/
|
||||
return -1;
|
||||
/* On a pending async error the info should be available */
|
||||
if (!status.async)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* There should be at least one of the mode bits set */
|
||||
if (WARN_ON_ONCE(!tapq_info.value))
|
||||
return 0;
|
||||
|
||||
*q_type = tapq_info.at;
|
||||
*q_fac = tapq_info.fac;
|
||||
*q_depth = tapq_info.qd;
|
||||
*q_ml = tapq_info.ml;
|
||||
*q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
|
||||
*q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void ap_wait(enum ap_sm_wait wait)
|
||||
@ -1022,6 +1019,10 @@ EXPORT_SYMBOL(ap_driver_unregister);
|
||||
|
||||
void ap_bus_force_rescan(void)
|
||||
{
|
||||
/* Only trigger AP bus scans after the initial scan is done */
|
||||
if (atomic64_read(&ap_scan_bus_count) <= 0)
|
||||
return;
|
||||
|
||||
/* processing a asynchronous bus rescan */
|
||||
del_timer(&ap_config_timer);
|
||||
queue_work(system_long_wq, &ap_scan_work);
|
||||
|
@ -206,7 +206,6 @@ struct ap_queue {
|
||||
bool config; /* configured state */
|
||||
bool chkstop; /* checkstop state */
|
||||
ap_qid_t qid; /* AP queue id. */
|
||||
bool interrupt; /* indicate if interrupts are enabled */
|
||||
bool se_bound; /* SE bound state */
|
||||
unsigned int assoc_idx; /* SE association index */
|
||||
int queue_count; /* # messages currently on AP queue. */
|
||||
|
@ -200,13 +200,13 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
|
||||
return AP_SM_WAIT_AGAIN;
|
||||
}
|
||||
aq->sm_state = AP_SM_STATE_IDLE;
|
||||
return AP_SM_WAIT_NONE;
|
||||
break;
|
||||
case AP_RESPONSE_NO_PENDING_REPLY:
|
||||
if (aq->queue_count > 0)
|
||||
return aq->interrupt ?
|
||||
return status.irq_enabled ?
|
||||
AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
|
||||
aq->sm_state = AP_SM_STATE_IDLE;
|
||||
return AP_SM_WAIT_NONE;
|
||||
break;
|
||||
default:
|
||||
aq->dev_state = AP_DEV_STATE_ERROR;
|
||||
aq->last_err_rc = status.response_code;
|
||||
@ -215,6 +215,16 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
|
||||
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
|
||||
return AP_SM_WAIT_NONE;
|
||||
}
|
||||
/* Check and maybe enable irq support (again) on this queue */
|
||||
if (!status.irq_enabled && status.queue_empty) {
|
||||
void *lsi_ptr = ap_airq_ptr();
|
||||
|
||||
if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0) {
|
||||
aq->sm_state = AP_SM_STATE_SETIRQ_WAIT;
|
||||
return AP_SM_WAIT_AGAIN;
|
||||
}
|
||||
}
|
||||
return AP_SM_WAIT_NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -254,7 +264,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
|
||||
fallthrough;
|
||||
case AP_RESPONSE_Q_FULL:
|
||||
aq->sm_state = AP_SM_STATE_QUEUE_FULL;
|
||||
return aq->interrupt ?
|
||||
return status.irq_enabled ?
|
||||
AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
|
||||
case AP_RESPONSE_RESET_IN_PROGRESS:
|
||||
aq->sm_state = AP_SM_STATE_RESET_WAIT;
|
||||
@ -307,7 +317,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
|
||||
case AP_RESPONSE_NORMAL:
|
||||
case AP_RESPONSE_RESET_IN_PROGRESS:
|
||||
aq->sm_state = AP_SM_STATE_RESET_WAIT;
|
||||
aq->interrupt = false;
|
||||
aq->rapq_fbit = 0;
|
||||
aq->se_bound = false;
|
||||
return AP_SM_WAIT_LOW_TIMEOUT;
|
||||
@ -383,7 +392,6 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
|
||||
|
||||
if (status.irq_enabled == 1) {
|
||||
/* Irqs are now enabled */
|
||||
aq->interrupt = true;
|
||||
aq->sm_state = (aq->queue_count > 0) ?
|
||||
AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
|
||||
}
|
||||
@ -626,16 +634,21 @@ static ssize_t interrupt_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct ap_queue *aq = to_ap_queue(dev);
|
||||
struct ap_queue_status status;
|
||||
int rc = 0;
|
||||
|
||||
spin_lock_bh(&aq->lock);
|
||||
if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT)
|
||||
if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) {
|
||||
rc = sysfs_emit(buf, "Enable Interrupt pending.\n");
|
||||
else if (aq->interrupt)
|
||||
rc = sysfs_emit(buf, "Interrupts enabled.\n");
|
||||
else
|
||||
rc = sysfs_emit(buf, "Interrupts disabled.\n");
|
||||
} else {
|
||||
status = ap_tapq(aq->qid, NULL);
|
||||
if (status.irq_enabled)
|
||||
rc = sysfs_emit(buf, "Interrupts enabled.\n");
|
||||
else
|
||||
rc = sysfs_emit(buf, "Interrupts disabled.\n");
|
||||
}
|
||||
spin_unlock_bh(&aq->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -1032,7 +1045,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
|
||||
if (ap_sb_available() && is_prot_virt_guest())
|
||||
aq->ap_dev.device.groups = ap_queue_dev_sb_attr_groups;
|
||||
aq->qid = qid;
|
||||
aq->interrupt = false;
|
||||
spin_lock_init(&aq->lock);
|
||||
INIT_LIST_HEAD(&aq->pendingq);
|
||||
INIT_LIST_HEAD(&aq->requestq);
|
||||
|
@ -52,7 +52,7 @@ static ssize_t online_show(struct device *dev,
|
||||
{
|
||||
struct zcrypt_card *zc = dev_get_drvdata(dev);
|
||||
struct ap_card *ac = to_ap_card(dev);
|
||||
int online = ac->config && zc->online ? 1 : 0;
|
||||
int online = ac->config && !ac->chkstop && zc->online ? 1 : 0;
|
||||
|
||||
return sysfs_emit(buf, "%d\n", online);
|
||||
}
|
||||
@ -70,7 +70,7 @@ static ssize_t online_store(struct device *dev,
|
||||
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (online && !ac->config)
|
||||
if (online && (!ac->config || ac->chkstop))
|
||||
return -ENODEV;
|
||||
|
||||
zc->online = online;
|
||||
|
@ -42,7 +42,7 @@ static ssize_t online_show(struct device *dev,
|
||||
{
|
||||
struct zcrypt_queue *zq = dev_get_drvdata(dev);
|
||||
struct ap_queue *aq = to_ap_queue(dev);
|
||||
int online = aq->config && zq->online ? 1 : 0;
|
||||
int online = aq->config && !aq->chkstop && zq->online ? 1 : 0;
|
||||
|
||||
return sysfs_emit(buf, "%d\n", online);
|
||||
}
|
||||
@ -59,7 +59,8 @@ static ssize_t online_store(struct device *dev,
|
||||
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (online && (!aq->config || !aq->card->config))
|
||||
if (online && (!aq->config || !aq->card->config ||
|
||||
aq->chkstop || aq->card->chkstop))
|
||||
return -ENODEV;
|
||||
if (online && !zc->online)
|
||||
return -EINVAL;
|
||||
|
Loading…
x
Reference in New Issue
Block a user