linux/arch/s390/mm/pageattr.c
Linus Torvalds 4a0fc73da9 more s390 updates for 6.6 merge window
- Couple of virtual vs physical address confusion fixes
 
 - Rework locking in dcssblk driver to address a lockdep warning
 
 - Remove support for "noexec" kernel command line option since there
   is no use case where it would make sense
 
 - Simplify kernel mapping setup and get rid of quite a bit of code
 
 - Add architecture specific __set_memory_yy() functions which allow to
   modify kernel mappings. Unlike the set_memory_xx() variants they
   take void pointer start and end parameters, which allows to use them
   without the usual casts, and also to use them on areas larger than
   8TB.
   Note that the set_memory_xx() family comes with an int num_pages
   parameter which overflows with 8TB. This could be addressed by
   changing the num_pages parameter to unsigned long, however requires
   to change all architectures, since the module code expects an int
   parameter (see module_set_memory()).
   This was indeed an issue since for debug_pagealloc() we call
   set_memory_4k() on the whole identity mapping. Therefore address
   this for now with the __set_memory_yy() variant, and address common
   code later
 
 - Use dev_set_name() and also fix memory leak in zcrypt driver error
   handling
 
 - Remove unused lsi_mask from airq_struct
 
 - Add warning for invalid kernel mapping requests
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEECMNfWEw3SLnmiLkZIg7DeRspbsIFAmT5sYkACgkQIg7DeRsp
 bsJQHg//SFJOcVr2uViorvmt+IyiW78H75y8+AC6qoxZ3fuJeuyW2zz/resRsFRO
 UDCjkV2QYT885ADFUMcunf7LQnJYpSFVjAB6WEhgFr+bFdMWtX1crjHsMHkpcobR
 Fo4IFurG5NOMIdunPwKiO/dLU8pNGTm1T430uyLZH3ApVrmJ8stgOczusqTO/h3V
 tjbC5HAm9BBwNyOd2lnZSi8pbUMcfGE2DkRYhFQup1N9NT0BC4S6B8vTsD7NVYVZ
 WbJzqSl3CnTUJxZFX287YDW6cSaPCLAUfCl7r5FJZUoGvUtOVLlqcOjIIm/72epv
 YygMI8RdM0KHuzOd8XTNDbBnyYWTmWnZypCQVgNezY1GiO5nfTCkwYLZZagcc0Lo
 6fx1B7xBbNao9/pURa6wJ9dXsx+NmD52DjM806hP1H+qoL/YP8dLqV6Qh2C1PFzI
 VncrnG2mnxjwry5WoOyl19SKqXDgmDylnFuHQHKA9mdCaqZWNkqovOo/6qE6OdWI
 Q3TinPwTug9vmgwBuQgua/2R1owy7G9mJwwDW9AtZXVzlMG6k06deKIYrZjLxf6Z
 hOtAu+Xy5IeoXOaaOFU92RsRKX481OTG77rWLREC+orAT1dPrfnguPLlQ7KeFbkR
 foduTTHcDPOLiIVL1RbWQC9AAw/i7IjN04Jp6KFlQTZqLATyPbM=
 =+iSZ
 -----END PGP SIGNATURE-----

Merge tag 's390-6.6-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull more s390 updates from Heiko Carstens:

 - A couple of virtual vs physical address confusion fixes

 - Rework locking in dcssblk driver to address a lockdep warning

 - Remove support for "noexec" kernel command line option since there is
   no use case where it would make sense

 - Simplify kernel mapping setup and get rid of quite a bit of code

 - Add architecture specific __set_memory_yy() functions which allow us
   to modify kernel mappings. Unlike the set_memory_xx() variants they
   take void pointer start and end parameters, which allows using them
   without the usual casts, and also to use them on areas larger than
   8TB.

   Note that the set_memory_xx() family comes with an int num_pages
   parameter which overflows with 8TB. This could be addressed by
   changing the num_pages parameter to unsigned long, however requires
   to change all architectures, since the module code expects an int
   parameter (see module_set_memory()).

   This was indeed an issue since for debug_pagealloc() we call
   set_memory_4k() on the whole identity mapping. Therefore address this
   for now with the __set_memory_yy() variant, and address common code
   later

 - Use dev_set_name() and also fix memory leak in zcrypt driver error
   handling

 - Remove unused lsi_mask from airq_struct

 - Add warning for invalid kernel mapping requests

* tag 's390-6.6-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/vmem: do not silently ignore mapping limit
  s390/zcrypt: utilize dev_set_name() ability to use a formatted string
  s390/zcrypt: don't leak memory if dev_set_name() fails
  s390/mm: fix MAX_DMA_ADDRESS physical vs virtual confusion
  s390/airq: remove lsi_mask from airq_struct
  s390/mm: use __set_memory() variants where useful
  s390/set_memory: add __set_memory() variant
  s390/set_memory: generate all set_memory() functions
  s390/mm: improve description of mapping permissions of prefix pages
  s390/amode31: change type of __samode31, __eamode31, etc
  s390/mm: simplify kernel mapping setup
  s390: remove "noexec" option
  s390/vmem: fix virtual vs physical address confusion
  s390/dcssblk: fix lockdep warning
  s390/monreader: fix virtual vs physical address confusion
2023-09-07 10:52:13 -07:00

453 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2011
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
#include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
#include <asm/facility.h>
#include <asm/pgalloc.h>
#include <asm/kfence.h>
#include <asm/page.h>
#include <asm/set_memory.h>
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
: [addr] "+a" (addr) : [skey] "d" (skey));
return addr;
}
void __storage_key_init_range(unsigned long start, unsigned long end)
{
unsigned long boundary, size;
while (start < end) {
if (MACHINE_HAS_EDAT1) {
/* set storage keys for a 1MB frame */
size = 1UL << 20;
boundary = (start + size) & ~(size - 1);
if (boundary <= end) {
do {
start = sske_frame(start, PAGE_DEFAULT_KEY);
} while (start < boundary);
continue;
}
}
page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
start += PAGE_SIZE;
}
}
#ifdef CONFIG_PROC_FS
atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
void arch_report_meminfo(struct seq_file *m)
{
seq_printf(m, "DirectMap4k: %8lu kB\n",
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
seq_printf(m, "DirectMap1M: %8lu kB\n",
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
seq_printf(m, "DirectMap2G: %8lu kB\n",
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
}
#endif /* CONFIG_PROC_FS */
static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
unsigned long dtt)
{
unsigned long *table, mask;
mask = 0;
if (MACHINE_HAS_EDAT2) {
switch (dtt) {
case CRDTE_DTT_REGION3:
mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
break;
case CRDTE_DTT_SEGMENT:
mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
break;
case CRDTE_DTT_PAGE:
mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
break;
}
table = (unsigned long *)((unsigned long)old & mask);
crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
} else if (MACHINE_HAS_IDTE) {
cspg(old, *old, new);
} else {
csp((unsigned int *)old + 1, *old, new);
}
}
static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
unsigned long flags)
{
pte_t *ptep, new;
if (flags == SET_MEMORY_4K)
return 0;
ptep = pte_offset_kernel(pmdp, addr);
do {
new = *ptep;
if (pte_none(new))
return -EINVAL;
if (flags & SET_MEMORY_RO)
new = pte_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pte_mkwrite_novma(pte_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
else if (flags & SET_MEMORY_X)
new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
if (flags & SET_MEMORY_INV) {
new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
} else if (flags & SET_MEMORY_DEF) {
new = __pte(pte_val(new) & PAGE_MASK);
new = set_pte_bit(new, PAGE_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
}
pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
ptep++;
addr += PAGE_SIZE;
cond_resched();
} while (addr < end);
return 0;
}
static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
{
unsigned long pte_addr, prot;
pte_t *pt_dir, *ptep;
pmd_t new;
int i, ro, nx;
pt_dir = vmem_pte_alloc();
if (!pt_dir)
return -ENOMEM;
pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
if (!nx)
prot &= ~_PAGE_NOEXEC;
ptep = pt_dir;
for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte(ptep, __pte(pte_addr | prot));
pte_addr += PAGE_SIZE;
ptep++;
}
new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
update_page_count(PG_DIRECT_MAP_1M, -1);
return 0;
}
static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
unsigned long flags)
{
pmd_t new = *pmdp;
if (flags & SET_MEMORY_RO)
new = pmd_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pmd_mkwrite_novma(pmd_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
else if (flags & SET_MEMORY_X)
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
if (flags & SET_MEMORY_INV) {
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
} else if (flags & SET_MEMORY_DEF) {
new = __pmd(pmd_val(new) & PMD_MASK);
new = set_pmd_bit(new, SEGMENT_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
}
static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
int need_split;
pmd_t *pmdp;
int rc = 0;
pmdp = pmd_offset(pudp, addr);
do {
if (pmd_none(*pmdp))
return -EINVAL;
next = pmd_addr_end(addr, end);
if (pmd_large(*pmdp)) {
need_split = !!(flags & SET_MEMORY_4K);
need_split |= !!(addr & ~PMD_MASK);
need_split |= !!(addr + PMD_SIZE > next);
if (need_split) {
rc = split_pmd_page(pmdp, addr);
if (rc)
return rc;
continue;
}
modify_pmd_page(pmdp, addr, flags);
} else {
rc = walk_pte_level(pmdp, addr, next, flags);
if (rc)
return rc;
}
pmdp++;
addr = next;
cond_resched();
} while (addr < end);
return rc;
}
static int split_pud_page(pud_t *pudp, unsigned long addr)
{
unsigned long pmd_addr, prot;
pmd_t *pm_dir, *pmdp;
pud_t new;
int i, ro, nx;
pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
return -ENOMEM;
pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
if (!nx)
prot &= ~_SEGMENT_ENTRY_NOEXEC;
pmdp = pm_dir;
for (i = 0; i < PTRS_PER_PMD; i++) {
set_pmd(pmdp, __pmd(pmd_addr | prot));
pmd_addr += PMD_SIZE;
pmdp++;
}
new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
update_page_count(PG_DIRECT_MAP_2G, -1);
return 0;
}
static void modify_pud_page(pud_t *pudp, unsigned long addr,
unsigned long flags)
{
pud_t new = *pudp;
if (flags & SET_MEMORY_RO)
new = pud_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pud_mkwrite(pud_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
else if (flags & SET_MEMORY_X)
new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
if (flags & SET_MEMORY_INV) {
new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
} else if (flags & SET_MEMORY_DEF) {
new = __pud(pud_val(new) & PUD_MASK);
new = set_pud_bit(new, REGION3_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
int need_split;
pud_t *pudp;
int rc = 0;
pudp = pud_offset(p4d, addr);
do {
if (pud_none(*pudp))
return -EINVAL;
next = pud_addr_end(addr, end);
if (pud_large(*pudp)) {
need_split = !!(flags & SET_MEMORY_4K);
need_split |= !!(addr & ~PUD_MASK);
need_split |= !!(addr + PUD_SIZE > next);
if (need_split) {
rc = split_pud_page(pudp, addr);
if (rc)
break;
continue;
}
modify_pud_page(pudp, addr, flags);
} else {
rc = walk_pmd_level(pudp, addr, next, flags);
}
pudp++;
addr = next;
cond_resched();
} while (addr < end && !rc);
return rc;
}
static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
p4d_t *p4dp;
int rc = 0;
p4dp = p4d_offset(pgd, addr);
do {
if (p4d_none(*p4dp))
return -EINVAL;
next = p4d_addr_end(addr, end);
rc = walk_pud_level(p4dp, addr, next, flags);
p4dp++;
addr = next;
cond_resched();
} while (addr < end && !rc);
return rc;
}
DEFINE_MUTEX(cpa_mutex);
static int change_page_attr(unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
int rc = -EINVAL;
pgd_t *pgdp;
pgdp = pgd_offset_k(addr);
do {
if (pgd_none(*pgdp))
break;
next = pgd_addr_end(addr, end);
rc = walk_p4d_level(pgdp, addr, next, flags);
if (rc)
break;
cond_resched();
} while (pgdp++, addr = next, addr < end && !rc);
return rc;
}
static int change_page_attr_alias(unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long alias, offset, va_start, va_end;
struct vm_struct *area;
int rc = 0;
/*
* Changes to read-only permissions on kernel VA mappings are also
* applied to the kernel direct mapping. Execute permissions are
* intentionally not transferred to keep all allocated pages within
* the direct mapping non-executable.
*/
flags &= SET_MEMORY_RO | SET_MEMORY_RW;
if (!flags)
return 0;
area = NULL;
while (addr < end) {
if (!area)
area = find_vm_area((void *)addr);
if (!area || !(area->flags & VM_ALLOC))
return 0;
va_start = (unsigned long)area->addr;
va_end = va_start + area->nr_pages * PAGE_SIZE;
offset = (addr - va_start) >> PAGE_SHIFT;
alias = (unsigned long)page_address(area->pages[offset]);
rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
if (rc)
break;
addr += PAGE_SIZE;
if (addr >= va_end)
area = NULL;
}
return rc;
}
int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
{
unsigned long end;
int rc;
if (!MACHINE_HAS_NX)
flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
if (!flags)
return 0;
if (!numpages)
return 0;
addr &= PAGE_MASK;
end = addr + numpages * PAGE_SIZE;
mutex_lock(&cpa_mutex);
rc = change_page_attr(addr, end, flags);
if (rc)
goto out;
rc = change_page_attr_alias(addr, end, flags);
out:
mutex_unlock(&cpa_mutex);
return rc;
}
int set_direct_map_invalid_noflush(struct page *page)
{
return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
}
int set_direct_map_default_noflush(struct page *page)
{
return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
}
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static void ipte_range(pte_t *pte, unsigned long address, int nr)
{
int i;
if (test_facility(13)) {
__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
return;
}
for (i = 0; i < nr; i++) {
__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
address += PAGE_SIZE;
pte++;
}
}
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long address;
pte_t *ptep, pte;
int nr, i, j;
for (i = 0; i < numpages;) {
address = (unsigned long)page_to_virt(page + i);
ptep = virt_to_kpte(address);
nr = (unsigned long)ptep >> ilog2(sizeof(long));
nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
nr = min(numpages - i, nr);
if (enable) {
for (j = 0; j < nr; j++) {
pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
set_pte(ptep, pte);
address += PAGE_SIZE;
ptep++;
}
} else {
ipte_range(ptep, address, nr);
}
i += nr;
}
}
#endif /* CONFIG_DEBUG_PAGEALLOC */