2019-05-29 07:18:00 -07:00
/* SPDX-License-Identifier: GPL-2.0-only */
2017-07-10 18:06:09 -07:00
/*
* Copyright ( C ) 2012 Regents of the University of California
*/
# ifndef _ASM_RISCV_PGTABLE_H
# define _ASM_RISCV_PGTABLE_H
# include <linux/mmzone.h>
2019-10-25 08:30:03 +00:00
# include <linux/sizes.h>
2017-07-10 18:06:09 -07:00
# include <asm/pgtable-bits.h>
2021-04-11 12:41:44 -04:00
# ifndef CONFIG_MMU
# define KERNEL_LINK_ADDR PAGE_OFFSET
2022-02-25 13:39:48 +01:00
# define KERN_VIRT_SIZE (UL(-1))
2021-04-11 12:41:44 -04:00
# else
2017-07-10 18:06:09 -07:00
2021-04-11 12:41:44 -04:00
# define ADDRESS_SPACE_END (UL(-1))
2017-07-10 18:06:09 -07:00
2021-04-11 12:41:44 -04:00
# ifdef CONFIG_64BIT
/* Leave 2GB for kernel and BPF at the end of the address space */
# define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1)
# else
# define KERNEL_LINK_ADDR PAGE_OFFSET
# endif
2020-02-24 11:34:36 -08:00
2021-12-06 11:46:45 +01:00
/* Number of entries in the page global directory */
# define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
/* Number of entries in the page table */
# define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
/*
* Half of the kernel address space ( half of the entries of the page global
* directory ) is for the direct mapping .
*/
# define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
2020-02-24 11:34:36 -08:00
# define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
2021-11-18 14:45:39 +01:00
# define VMALLOC_END PAGE_OFFSET
2020-02-24 11:34:36 -08:00
# define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
# define BPF_JIT_REGION_SIZE (SZ_128M)
2021-04-11 12:41:44 -04:00
# ifdef CONFIG_64BIT
riscv: Ensure BPF_JIT_REGION_START aligned with PMD size
Andreas reported commit fc8504765ec5 ("riscv: bpf: Avoid breaking W^X")
breaks booting with one kind of defconfig, I reproduced a kernel panic
with the defconfig:
[ 0.138553] Unable to handle kernel paging request at virtual address ffffffff81201220
[ 0.139159] Oops [#1]
[ 0.139303] Modules linked in:
[ 0.139601] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5-default+ #1
[ 0.139934] Hardware name: riscv-virtio,qemu (DT)
[ 0.140193] epc : __memset+0xc4/0xfc
[ 0.140416] ra : skb_flow_dissector_init+0x1e/0x82
[ 0.140609] epc : ffffffff8029806c ra : ffffffff8033be78 sp : ffffffe001647da0
[ 0.140878] gp : ffffffff81134b08 tp : ffffffe001654380 t0 : ffffffff81201158
[ 0.141156] t1 : 0000000000000002 t2 : 0000000000000154 s0 : ffffffe001647dd0
[ 0.141424] s1 : ffffffff80a43250 a0 : ffffffff81201220 a1 : 0000000000000000
[ 0.141654] a2 : 000000000000003c a3 : ffffffff81201258 a4 : 0000000000000064
[ 0.141893] a5 : ffffffff8029806c a6 : 0000000000000040 a7 : ffffffffffffffff
[ 0.142126] s2 : ffffffff81201220 s3 : 0000000000000009 s4 : ffffffff81135088
[ 0.142353] s5 : ffffffff81135038 s6 : ffffffff8080ce80 s7 : ffffffff80800438
[ 0.142584] s8 : ffffffff80bc6578 s9 : 0000000000000008 s10: ffffffff806000ac
[ 0.142810] s11: 0000000000000000 t3 : fffffffffffffffc t4 : 0000000000000000
[ 0.143042] t5 : 0000000000000155 t6 : 00000000000003ff
[ 0.143220] status: 0000000000000120 badaddr: ffffffff81201220 cause: 000000000000000f
[ 0.143560] [<ffffffff8029806c>] __memset+0xc4/0xfc
[ 0.143859] [<ffffffff8061e984>] init_default_flow_dissectors+0x22/0x60
[ 0.144092] [<ffffffff800010fc>] do_one_initcall+0x3e/0x168
[ 0.144278] [<ffffffff80600df0>] kernel_init_freeable+0x1c8/0x224
[ 0.144479] [<ffffffff804868a8>] kernel_init+0x12/0x110
[ 0.144658] [<ffffffff800022de>] ret_from_exception+0x0/0xc
[ 0.145124] ---[ end trace f1e9643daa46d591 ]---
After some investigation, I think I found the root cause: commit
2bfc6cd81bd ("move kernel mapping outside of linear mapping") moves
BPF JIT region after the kernel:
| #define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
The &_end is unlikely aligned with PMD size, so the front bpf jit
region sits with part of kernel .data section in one PMD size mapping.
But kernel is mapped in PMD SIZE, when bpf_jit_binary_lock_ro() is
called to make the first bpf jit prog ROX, we will make part of kernel
.data section RO too, so when we write to, for example memset the
.data section, MMU will trigger a store page fault.
To fix the issue, we need to ensure the BPF JIT region is PMD size
aligned. This patch acchieve this goal by restoring the BPF JIT region
to original position, I.E the 128MB before kernel .text section. The
modification to kasan_init.c is inspired by Alexandre.
Fixes: fc8504765ec5 ("riscv: bpf: Avoid breaking W^X")
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2021-06-18 22:09:13 +08:00
# define BPF_JIT_REGION_START (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE)
# define BPF_JIT_REGION_END (MODULES_END)
2021-04-11 12:41:44 -04:00
# else
2020-02-24 11:34:36 -08:00
# define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
# define BPF_JIT_REGION_END (VMALLOC_END)
2021-04-11 12:41:44 -04:00
# endif
/* Modules always live before the kernel */
# ifdef CONFIG_64BIT
2021-12-06 11:46:45 +01:00
/* This is used to define the end of the KASAN shadow region */
# define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
# define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
# define MODULES_END (PFN_ALIGN((unsigned long)&_start))
2021-04-11 12:41:44 -04:00
# endif
2020-02-24 11:34:36 -08:00
/*
* Roughly size the vmemmap space to be large enough to fit enough
* struct pages to map half the virtual address space . Then
* position vmemmap directly below the VMALLOC region .
*/
2021-12-06 11:46:48 +01:00
# ifdef CONFIG_64BIT
2022-01-27 10:48:41 +08:00
# define VA_BITS (pgtable_l5_enabled ? \
57 : ( pgtable_l4_enabled ? 48 : 39 ) )
2021-12-06 11:46:48 +01:00
# else
# define VA_BITS 32
# endif
2020-02-24 11:34:36 -08:00
# define VMEMMAP_SHIFT \
2021-12-06 11:46:48 +01:00
( VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT )
2020-02-24 11:34:36 -08:00
# define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
2021-11-18 14:45:39 +01:00
# define VMEMMAP_END VMALLOC_START
2020-02-24 11:34:36 -08:00
# define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
/*
* Define vmemmap for pfn_to_page & page_to_pfn calls . Needed if kernel
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled .
*/
# define vmemmap ((struct page *)VMEMMAP_START)
# define PCI_IO_SIZE SZ_16M
# define PCI_IO_END VMEMMAP_START
# define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
# define FIXADDR_TOP PCI_IO_START
# ifdef CONFIG_64BIT
# define FIXADDR_SIZE PMD_SIZE
# else
# define FIXADDR_SIZE PGDIR_SIZE
# endif
# define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
2021-04-13 02:35:14 -04:00
2021-04-28 14:45:12 -07:00
# endif
2021-04-13 02:35:14 -04:00
# ifdef CONFIG_XIP_KERNEL
2021-10-11 11:14:14 +02:00
# define XIP_OFFSET SZ_32M
# define XIP_OFFSET_MASK (SZ_32M - 1)
2021-06-04 13:49:48 +02:00
# else
# define XIP_OFFSET 0
2020-02-24 11:34:36 -08:00
# endif
2021-04-11 12:41:44 -04:00
# ifndef __ASSEMBLY__
# include <asm/page.h>
# include <asm/tlbflush.h>
# include <linux/mm_types.h>
2022-05-11 21:29:17 +02:00
# define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)
2017-07-10 18:06:09 -07:00
# ifdef CONFIG_64BIT
# include <asm/pgtable-64.h>
# else
# include <asm/pgtable-32.h>
# endif /* CONFIG_64BIT */
2022-05-12 20:23:06 -07:00
# include <linux/page_table_check.h>
2021-04-28 14:45:12 -07:00
# ifdef CONFIG_XIP_KERNEL
# define XIP_FIXUP(addr) ({ \
uintptr_t __a = ( uintptr_t ) ( addr ) ; \
2021-10-11 11:14:14 +02:00
( __a > = CONFIG_XIP_PHYS_ADDR & & \
__a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2 ) ? \
2021-04-28 14:45:12 -07:00
__a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET : \
__a ; \
} )
# else
# define XIP_FIXUP(addr) (addr)
# endif /* CONFIG_XIP_KERNEL */
2021-12-06 11:46:51 +01:00
struct pt_alloc_ops {
pte_t * ( * get_pte_virt ) ( phys_addr_t pa ) ;
phys_addr_t ( * alloc_pte ) ( uintptr_t va ) ;
# ifndef __PAGETABLE_PMD_FOLDED
pmd_t * ( * get_pmd_virt ) ( phys_addr_t pa ) ;
phys_addr_t ( * alloc_pmd ) ( uintptr_t va ) ;
pud_t * ( * get_pud_virt ) ( phys_addr_t pa ) ;
phys_addr_t ( * alloc_pud ) ( uintptr_t va ) ;
2022-01-27 10:48:42 +08:00
p4d_t * ( * get_p4d_virt ) ( phys_addr_t pa ) ;
phys_addr_t ( * alloc_p4d ) ( uintptr_t va ) ;
2021-12-06 11:46:51 +01:00
# endif
} ;
2022-01-19 19:23:41 -08:00
extern struct pt_alloc_ops pt_ops __initdata ;
2017-07-10 18:06:09 -07:00
2019-10-28 13:10:41 +01:00
# ifdef CONFIG_MMU
2017-07-10 18:06:09 -07:00
/* Number of PGD entries that a user-mode program can use */
# define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
/* Page protection bits */
# define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
riscv/mm: Adjust PAGE_PROT_NONE to comply with THP semantics
This is a preparation for enabling THP migration.
As the commit b65399f6111b("arm64/mm: Change THP helpers
to comply with generic MM semantics") mentioned, pmd_present()
and pmd_trans_huge() are expected to behave in the following
manner:
-------------------------------------------------------------------------
| PMD states | pmd_present | pmd_trans_huge |
-------------------------------------------------------------------------
| Mapped | Yes | Yes |
-------------------------------------------------------------------------
| Splitting | Yes | Yes |
-------------------------------------------------------------------------
| Migration/Swap | No | No |
-------------------------------------------------------------------------
At present the PROT_NONE bit reuses the READ bit could not comply with
above semantics with two problems:
1. When splitting a PMD THP, PMD is first invalidated with
pmdp_invalidate()->pmd_mkinvalid(), which clears the PRESENT bit
and PROT_NONE bit/READ bit, if the PMD is read-only, then the PAGE_LEAF
property is also cleared, which results in pmd_present() return false.
2. When migrating, the swap entry only clear the PRESENT bit
and PROT_NONE bit/READ bit, the W/X bit may be set, so _PAGE_LEAF may be
true which results in pmd_present() return true.
Solution:
Adjust PROT_NONE bit from READ to GLOBAL bit can satisfy the above rules:
1. GLOBAL bit has no other meanings, not like the R/W/X bit, which is
also relative with _PAGE_LEAF property.
2. GLOBAL bit is at bit 5, making swap entry start from bit 6, bit 0-5
are zero, which means the PRESENT, PROT_NONE, and PAGE_LEAF are
all false, then the pmd_present() and pmd_trans_huge() return false when
in migration/swap.
Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2021-11-23 22:06:37 +08:00
# define PAGE_NONE __pgprot(_PAGE_PROT_NONE | _PAGE_READ)
2017-07-10 18:06:09 -07:00
# define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ)
# define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
# define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC)
# define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
# define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \
_PAGE_EXEC | _PAGE_WRITE )
# define PAGE_COPY PAGE_READ
# define PAGE_COPY_EXEC PAGE_EXEC
# define PAGE_COPY_READ_EXEC PAGE_READ_EXEC
# define PAGE_SHARED PAGE_WRITE
# define PAGE_SHARED_EXEC PAGE_WRITE_EXEC
# define _PAGE_KERNEL (_PAGE_READ \
| _PAGE_WRITE \
| _PAGE_PRESENT \
| _PAGE_ACCESSED \
2021-05-26 05:49:20 +00:00
| _PAGE_DIRTY \
| _PAGE_GLOBAL )
2017-07-10 18:06:09 -07:00
# define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
2020-09-17 15:37:15 -07:00
# define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
# define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
# define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
| _PAGE_EXEC )
2017-07-10 18:06:09 -07:00
2019-06-28 13:36:21 -07:00
# define PAGE_TABLE __pgprot(_PAGE_TABLE)
2022-05-11 21:29:18 +02:00
# define _PAGE_IOREMAP ((_PAGE_KERNEL & ~_PAGE_MTMASK) | _PAGE_IO)
# define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP)
2019-08-13 11:27:56 +02:00
2017-07-10 18:06:09 -07:00
extern pgd_t swapper_pg_dir [ ] ;
2021-04-30 16:28:50 +08:00
# ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_present ( pmd_t pmd )
{
/*
* Checking for _PAGE_LEAF is needed too because :
* When splitting a THP , split_huge_page ( ) will temporarily clear
* the present bit , in this situation , pmd_present ( ) and
* pmd_trans_huge ( ) still needs to return true .
*/
return ( pmd_val ( pmd ) & ( _PAGE_PRESENT | _PAGE_PROT_NONE | _PAGE_LEAF ) ) ;
}
# else
2017-07-10 18:06:09 -07:00
static inline int pmd_present ( pmd_t pmd )
{
2018-12-16 13:03:36 -05:00
return ( pmd_val ( pmd ) & ( _PAGE_PRESENT | _PAGE_PROT_NONE ) ) ;
2017-07-10 18:06:09 -07:00
}
2021-04-30 16:28:50 +08:00
# endif
2017-07-10 18:06:09 -07:00
static inline int pmd_none ( pmd_t pmd )
{
return ( pmd_val ( pmd ) = = 0 ) ;
}
static inline int pmd_bad ( pmd_t pmd )
{
2021-04-30 16:28:48 +08:00
return ! pmd_present ( pmd ) | | ( pmd_val ( pmd ) & _PAGE_LEAF ) ;
2017-07-10 18:06:09 -07:00
}
2020-02-03 17:35:28 -08:00
# define pmd_leaf pmd_leaf
static inline int pmd_leaf ( pmd_t pmd )
{
2021-04-30 16:28:47 +08:00
return pmd_present ( pmd ) & & ( pmd_val ( pmd ) & _PAGE_LEAF ) ;
2020-02-03 17:35:28 -08:00
}
2017-07-10 18:06:09 -07:00
static inline void set_pmd ( pmd_t * pmdp , pmd_t pmd )
{
* pmdp = pmd ;
}
static inline void pmd_clear ( pmd_t * pmdp )
{
set_pmd ( pmdp , __pmd ( 0 ) ) ;
}
static inline pgd_t pfn_pgd ( unsigned long pfn , pgprot_t prot )
{
2022-05-11 21:29:21 +02:00
unsigned long prot_val = pgprot_val ( prot ) ;
ALT_THEAD_PMA ( prot_val ) ;
return __pgd ( ( pfn < < _PAGE_PFN_SHIFT ) | prot_val ) ;
2017-07-10 18:06:09 -07:00
}
2019-06-28 13:36:21 -07:00
static inline unsigned long _pgd_pfn ( pgd_t pgd )
{
2022-07-11 09:29:51 +05:30
return __page_val_to_pfn ( pgd_val ( pgd ) ) ;
2019-06-28 13:36:21 -07:00
}
2017-07-10 18:06:09 -07:00
static inline struct page * pmd_page ( pmd_t pmd )
{
2022-05-11 21:29:17 +02:00
return pfn_to_page ( __page_val_to_pfn ( pmd_val ( pmd ) ) ) ;
2017-07-10 18:06:09 -07:00
}
static inline unsigned long pmd_page_vaddr ( pmd_t pmd )
{
2022-05-11 21:29:17 +02:00
return ( unsigned long ) pfn_to_virt ( __page_val_to_pfn ( pmd_val ( pmd ) ) ) ;
2017-07-10 18:06:09 -07:00
}
2020-11-18 16:38:28 -08:00
static inline pte_t pmd_pte ( pmd_t pmd )
{
return __pte ( pmd_val ( pmd ) ) ;
}
2021-04-17 00:37:22 +08:00
static inline pte_t pud_pte ( pud_t pud )
{
return __pte ( pud_val ( pud ) ) ;
}
2017-07-10 18:06:09 -07:00
/* Yields the page frame number (PFN) of a page table entry */
static inline unsigned long pte_pfn ( pte_t pte )
{
2022-05-11 21:29:17 +02:00
return __page_val_to_pfn ( pte_val ( pte ) ) ;
2017-07-10 18:06:09 -07:00
}
# define pte_page(x) pfn_to_page(pte_pfn(x))
/* Constructs a page table entry */
static inline pte_t pfn_pte ( unsigned long pfn , pgprot_t prot )
{
2022-05-11 21:29:21 +02:00
unsigned long prot_val = pgprot_val ( prot ) ;
ALT_THEAD_PMA ( prot_val ) ;
return __pte ( ( pfn < < _PAGE_PFN_SHIFT ) | prot_val ) ;
2017-07-10 18:06:09 -07:00
}
2019-10-23 11:23:01 +08:00
# define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
2017-07-10 18:06:09 -07:00
static inline int pte_present ( pte_t pte )
{
2018-12-16 13:03:36 -05:00
return ( pte_val ( pte ) & ( _PAGE_PRESENT | _PAGE_PROT_NONE ) ) ;
2017-07-10 18:06:09 -07:00
}
static inline int pte_none ( pte_t pte )
{
return ( pte_val ( pte ) = = 0 ) ;
}
static inline int pte_write ( pte_t pte )
{
return pte_val ( pte ) & _PAGE_WRITE ;
}
2017-10-25 14:30:32 -07:00
static inline int pte_exec ( pte_t pte )
{
return pte_val ( pte ) & _PAGE_EXEC ;
}
2022-05-12 20:23:06 -07:00
static inline int pte_user ( pte_t pte )
{
return pte_val ( pte ) & _PAGE_USER ;
}
2017-07-10 18:06:09 -07:00
static inline int pte_huge ( pte_t pte )
{
2021-04-30 16:28:47 +08:00
return pte_present ( pte ) & & ( pte_val ( pte ) & _PAGE_LEAF ) ;
2017-07-10 18:06:09 -07:00
}
static inline int pte_dirty ( pte_t pte )
{
return pte_val ( pte ) & _PAGE_DIRTY ;
}
static inline int pte_young ( pte_t pte )
{
return pte_val ( pte ) & _PAGE_ACCESSED ;
}
static inline int pte_special ( pte_t pte )
{
return pte_val ( pte ) & _PAGE_SPECIAL ;
}
/* static inline pte_t pte_rdprotect(pte_t pte) */
static inline pte_t pte_wrprotect ( pte_t pte )
{
return __pte ( pte_val ( pte ) & ~ ( _PAGE_WRITE ) ) ;
}
/* static inline pte_t pte_mkread(pte_t pte) */
static inline pte_t pte_mkwrite ( pte_t pte )
{
return __pte ( pte_val ( pte ) | _PAGE_WRITE ) ;
}
/* static inline pte_t pte_mkexec(pte_t pte) */
static inline pte_t pte_mkdirty ( pte_t pte )
{
return __pte ( pte_val ( pte ) | _PAGE_DIRTY ) ;
}
static inline pte_t pte_mkclean ( pte_t pte )
{
return __pte ( pte_val ( pte ) & ~ ( _PAGE_DIRTY ) ) ;
}
static inline pte_t pte_mkyoung ( pte_t pte )
{
return __pte ( pte_val ( pte ) | _PAGE_ACCESSED ) ;
}
static inline pte_t pte_mkold ( pte_t pte )
{
return __pte ( pte_val ( pte ) & ~ ( _PAGE_ACCESSED ) ) ;
}
static inline pte_t pte_mkspecial ( pte_t pte )
{
return __pte ( pte_val ( pte ) | _PAGE_SPECIAL ) ;
}
2019-05-26 08:50:38 -04:00
static inline pte_t pte_mkhuge ( pte_t pte )
{
return pte ;
}
2020-11-18 16:38:28 -08:00
# ifdef CONFIG_NUMA_BALANCING
/*
* See the comment in include / asm - generic / pgtable . h
*/
static inline int pte_protnone ( pte_t pte )
{
return ( pte_val ( pte ) & ( _PAGE_PRESENT | _PAGE_PROT_NONE ) ) = = _PAGE_PROT_NONE ;
}
static inline int pmd_protnone ( pmd_t pmd )
{
return pte_protnone ( pmd_pte ( pmd ) ) ;
}
# endif
2017-07-10 18:06:09 -07:00
/* Modify page protection bits */
static inline pte_t pte_modify ( pte_t pte , pgprot_t newprot )
{
2022-05-11 21:29:21 +02:00
unsigned long newprot_val = pgprot_val ( newprot ) ;
ALT_THEAD_PMA ( newprot_val ) ;
return __pte ( ( pte_val ( pte ) & _PAGE_CHG_MASK ) | newprot_val ) ;
2017-07-10 18:06:09 -07:00
}
# define pgd_ERROR(e) \
pr_err ( " %s:%d: bad pgd " PTE_FMT " . \n " , __FILE__ , __LINE__ , pgd_val ( e ) )
/* Commit new configuration to MMU hardware */
static inline void update_mmu_cache ( struct vm_area_struct * vma ,
unsigned long address , pte_t * ptep )
{
/*
* The kernel assumes that TLBs don ' t cache invalid entries , but
* in RISC - V , SFENCE . VMA specifies an ordering constraint , not a
* cache flush ; it is necessary even after writing invalid entries .
* Relying on flush_tlb_fix_spurious_fault would suffice , but
* the extra traps reduce performance . So , eagerly SFENCE . VMA .
*/
local_flush_tlb_page ( address ) ;
}
2021-04-30 16:28:50 +08:00
static inline void update_mmu_cache_pmd ( struct vm_area_struct * vma ,
unsigned long address , pmd_t * pmdp )
{
pte_t * ptep = ( pte_t * ) pmdp ;
update_mmu_cache ( vma , address , ptep ) ;
}
2017-07-10 18:06:09 -07:00
# define __HAVE_ARCH_PTE_SAME
static inline int pte_same ( pte_t pte_a , pte_t pte_b )
{
return pte_val ( pte_a ) = = pte_val ( pte_b ) ;
}
2017-10-25 14:30:32 -07:00
/*
* Certain architectures need to do special things when PTEs within
* a page table are directly modified . Thus , the following hook is
* made available .
*/
static inline void set_pte ( pte_t * ptep , pte_t pteval )
{
* ptep = pteval ;
}
void flush_icache_pte ( pte_t pte ) ;
2022-05-12 20:23:06 -07:00
static inline void __set_pte_at ( struct mm_struct * mm ,
2017-10-25 14:30:32 -07:00
unsigned long addr , pte_t * ptep , pte_t pteval )
{
if ( pte_present ( pteval ) & & pte_exec ( pteval ) )
flush_icache_pte ( pteval ) ;
set_pte ( ptep , pteval ) ;
}
2022-05-12 20:23:06 -07:00
static inline void set_pte_at ( struct mm_struct * mm ,
unsigned long addr , pte_t * ptep , pte_t pteval )
{
page_table_check_pte_set ( mm , addr , ptep , pteval ) ;
__set_pte_at ( mm , addr , ptep , pteval ) ;
}
2017-10-25 14:30:32 -07:00
static inline void pte_clear ( struct mm_struct * mm ,
unsigned long addr , pte_t * ptep )
{
2022-05-12 20:23:06 -07:00
__set_pte_at ( mm , addr , ptep , __pte ( 0 ) ) ;
2017-10-25 14:30:32 -07:00
}
2017-07-10 18:06:09 -07:00
# define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags ( struct vm_area_struct * vma ,
unsigned long address , pte_t * ptep ,
pte_t entry , int dirty )
{
if ( ! pte_same ( * ptep , entry ) )
set_pte_at ( vma - > vm_mm , address , ptep , entry ) ;
/*
* update_mmu_cache will unconditionally execute , handling both
* the case that the PTE changed and the spurious fault case .
*/
return true ;
}
# define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear ( struct mm_struct * mm ,
unsigned long address , pte_t * ptep )
{
2022-05-12 20:23:06 -07:00
pte_t pte = __pte ( atomic_long_xchg ( ( atomic_long_t * ) ptep , 0 ) ) ;
page_table_check_pte_clear ( mm , address , pte ) ;
return pte ;
2017-07-10 18:06:09 -07:00
}
# define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young ( struct vm_area_struct * vma ,
unsigned long address ,
pte_t * ptep )
{
if ( ! pte_young ( * ptep ) )
return 0 ;
return test_and_clear_bit ( _PAGE_ACCESSED_OFFSET , & pte_val ( * ptep ) ) ;
}
# define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect ( struct mm_struct * mm ,
unsigned long address , pte_t * ptep )
{
atomic_long_and ( ~ ( unsigned long ) _PAGE_WRITE , ( atomic_long_t * ) ptep ) ;
}
# define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young ( struct vm_area_struct * vma ,
unsigned long address , pte_t * ptep )
{
/*
* This comment is borrowed from x86 , but applies equally to RISC - V :
*
* Clearing the accessed bit without a TLB flush
* doesn ' t cause data corruption . [ It could cause incorrect
* page aging and the ( mistaken ) reclaim of hot pages , but the
* chance of that should be relatively low . ]
*
* So as a performance optimization don ' t flush the TLB when
* clearing the accessed bit , it will eventually be flushed by
* a context switch or a VM operation anyway . [ In the rare
* event of it not getting flushed for a long time the delay
* shouldn ' t really matter because there ' s no real memory
* pressure for swapout to react to . ]
*/
return ptep_test_and_clear_young ( vma , address , ptep ) ;
}
2022-05-11 21:29:18 +02:00
# define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached ( pgprot_t _prot )
{
unsigned long prot = pgprot_val ( _prot ) ;
prot & = ~ _PAGE_MTMASK ;
prot | = _PAGE_IO ;
return __pgprot ( prot ) ;
}
# define pgprot_writecombine pgprot_writecombine
static inline pgprot_t pgprot_writecombine ( pgprot_t _prot )
{
unsigned long prot = pgprot_val ( _prot ) ;
prot & = ~ _PAGE_MTMASK ;
prot | = _PAGE_NOCACHE ;
return __pgprot ( prot ) ;
}
2021-04-30 16:28:50 +08:00
/*
* THP functions
*/
static inline pmd_t pte_pmd ( pte_t pte )
{
return __pmd ( pte_val ( pte ) ) ;
}
static inline pmd_t pmd_mkhuge ( pmd_t pmd )
{
return pmd ;
}
static inline pmd_t pmd_mkinvalid ( pmd_t pmd )
{
return __pmd ( pmd_val ( pmd ) & ~ ( _PAGE_PRESENT | _PAGE_PROT_NONE ) ) ;
}
2022-07-11 09:29:51 +05:30
# define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT)
2021-04-30 16:28:50 +08:00
static inline unsigned long pmd_pfn ( pmd_t pmd )
{
return ( ( __pmd_to_phys ( pmd ) & PMD_MASK ) > > PAGE_SHIFT ) ;
}
2022-07-11 09:29:51 +05:30
# define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT)
2022-05-12 20:23:06 -07:00
static inline unsigned long pud_pfn ( pud_t pud )
{
return ( ( __pud_to_phys ( pud ) & PUD_MASK ) > > PAGE_SHIFT ) ;
}
2021-04-30 16:28:50 +08:00
static inline pmd_t pmd_modify ( pmd_t pmd , pgprot_t newprot )
{
return pte_pmd ( pte_modify ( pmd_pte ( pmd ) , newprot ) ) ;
}
# define pmd_write pmd_write
static inline int pmd_write ( pmd_t pmd )
{
return pte_write ( pmd_pte ( pmd ) ) ;
}
static inline int pmd_dirty ( pmd_t pmd )
{
return pte_dirty ( pmd_pte ( pmd ) ) ;
}
static inline int pmd_young ( pmd_t pmd )
{
return pte_young ( pmd_pte ( pmd ) ) ;
}
2022-05-12 20:23:06 -07:00
static inline int pmd_user ( pmd_t pmd )
{
return pte_user ( pmd_pte ( pmd ) ) ;
}
2021-04-30 16:28:50 +08:00
static inline pmd_t pmd_mkold ( pmd_t pmd )
{
return pte_pmd ( pte_mkold ( pmd_pte ( pmd ) ) ) ;
}
static inline pmd_t pmd_mkyoung ( pmd_t pmd )
{
return pte_pmd ( pte_mkyoung ( pmd_pte ( pmd ) ) ) ;
}
static inline pmd_t pmd_mkwrite ( pmd_t pmd )
{
return pte_pmd ( pte_mkwrite ( pmd_pte ( pmd ) ) ) ;
}
static inline pmd_t pmd_wrprotect ( pmd_t pmd )
{
return pte_pmd ( pte_wrprotect ( pmd_pte ( pmd ) ) ) ;
}
static inline pmd_t pmd_mkclean ( pmd_t pmd )
{
return pte_pmd ( pte_mkclean ( pmd_pte ( pmd ) ) ) ;
}
static inline pmd_t pmd_mkdirty ( pmd_t pmd )
{
return pte_pmd ( pte_mkdirty ( pmd_pte ( pmd ) ) ) ;
}
static inline void set_pmd_at ( struct mm_struct * mm , unsigned long addr ,
pmd_t * pmdp , pmd_t pmd )
{
2022-05-12 20:23:06 -07:00
page_table_check_pmd_set ( mm , addr , pmdp , pmd ) ;
return __set_pte_at ( mm , addr , ( pte_t * ) pmdp , pmd_pte ( pmd ) ) ;
}
2021-04-17 00:37:22 +08:00
static inline void set_pud_at ( struct mm_struct * mm , unsigned long addr ,
pud_t * pudp , pud_t pud )
{
2022-05-12 20:23:06 -07:00
page_table_check_pud_set ( mm , addr , pudp , pud ) ;
return __set_pte_at ( mm , addr , ( pte_t * ) pudp , pud_pte ( pud ) ) ;
}
# ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page ( pte_t pte )
{
return pte_present ( pte ) & & pte_user ( pte ) ;
}
static inline bool pmd_user_accessible_page ( pmd_t pmd )
{
return pmd_leaf ( pmd ) & & pmd_user ( pmd ) ;
2021-04-17 00:37:22 +08:00
}
2022-05-12 20:23:06 -07:00
static inline bool pud_user_accessible_page ( pud_t pud )
{
return pud_leaf ( pud ) & & pud_user ( pud ) ;
}
# endif
2021-04-30 16:28:50 +08:00
# ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_trans_huge ( pmd_t pmd )
{
return pmd_leaf ( pmd ) ;
}
# define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
static inline int pmdp_set_access_flags ( struct vm_area_struct * vma ,
unsigned long address , pmd_t * pmdp ,
pmd_t entry , int dirty )
{
return ptep_set_access_flags ( vma , address , ( pte_t * ) pmdp , pmd_pte ( entry ) , dirty ) ;
}
# define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young ( struct vm_area_struct * vma ,
unsigned long address , pmd_t * pmdp )
{
return ptep_test_and_clear_young ( vma , address , ( pte_t * ) pmdp ) ;
}
# define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear ( struct mm_struct * mm ,
unsigned long address , pmd_t * pmdp )
{
2022-05-12 20:23:06 -07:00
pmd_t pmd = __pmd ( atomic_long_xchg ( ( atomic_long_t * ) pmdp , 0 ) ) ;
page_table_check_pmd_clear ( mm , address , pmd ) ;
return pmd ;
2021-04-30 16:28:50 +08:00
}
# define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect ( struct mm_struct * mm ,
unsigned long address , pmd_t * pmdp )
{
ptep_set_wrprotect ( mm , address , ( pte_t * ) pmdp ) ;
}
# define pmdp_establish pmdp_establish
static inline pmd_t pmdp_establish ( struct vm_area_struct * vma ,
unsigned long address , pmd_t * pmdp , pmd_t pmd )
{
2022-05-12 20:23:06 -07:00
page_table_check_pmd_set ( vma - > vm_mm , address , pmdp , pmd ) ;
2021-04-30 16:28:50 +08:00
return __pmd ( atomic_long_xchg ( ( atomic_long_t * ) pmdp , pmd_val ( pmd ) ) ) ;
}
# endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2017-07-10 18:06:09 -07:00
/*
* Encode and decode a swap entry
*
* Format of swap PTE :
* bit 0 : _PAGE_PRESENT ( zero )
riscv/mm: Adjust PAGE_PROT_NONE to comply with THP semantics
This is a preparation for enabling THP migration.
As the commit b65399f6111b("arm64/mm: Change THP helpers
to comply with generic MM semantics") mentioned, pmd_present()
and pmd_trans_huge() are expected to behave in the following
manner:
-------------------------------------------------------------------------
| PMD states | pmd_present | pmd_trans_huge |
-------------------------------------------------------------------------
| Mapped | Yes | Yes |
-------------------------------------------------------------------------
| Splitting | Yes | Yes |
-------------------------------------------------------------------------
| Migration/Swap | No | No |
-------------------------------------------------------------------------
At present the PROT_NONE bit reuses the READ bit could not comply with
above semantics with two problems:
1. When splitting a PMD THP, PMD is first invalidated with
pmdp_invalidate()->pmd_mkinvalid(), which clears the PRESENT bit
and PROT_NONE bit/READ bit, if the PMD is read-only, then the PAGE_LEAF
property is also cleared, which results in pmd_present() return false.
2. When migrating, the swap entry only clear the PRESENT bit
and PROT_NONE bit/READ bit, the W/X bit may be set, so _PAGE_LEAF may be
true which results in pmd_present() return true.
Solution:
Adjust PROT_NONE bit from READ to GLOBAL bit can satisfy the above rules:
1. GLOBAL bit has no other meanings, not like the R/W/X bit, which is
also relative with _PAGE_LEAF property.
2. GLOBAL bit is at bit 5, making swap entry start from bit 6, bit 0-5
are zero, which means the PRESENT, PROT_NONE, and PAGE_LEAF are
all false, then the pmd_present() and pmd_trans_huge() return false when
in migration/swap.
Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2021-11-23 22:06:37 +08:00
* bit 1 to 3 : _PAGE_LEAF ( zero )
* bit 5 : _PAGE_PROT_NONE ( zero )
* bits 6 to 10 : swap type
* bits 10 to XLEN - 1 : swap offset
2017-07-10 18:06:09 -07:00
*/
riscv/mm: Adjust PAGE_PROT_NONE to comply with THP semantics
This is a preparation for enabling THP migration.
As the commit b65399f6111b("arm64/mm: Change THP helpers
to comply with generic MM semantics") mentioned, pmd_present()
and pmd_trans_huge() are expected to behave in the following
manner:
-------------------------------------------------------------------------
| PMD states | pmd_present | pmd_trans_huge |
-------------------------------------------------------------------------
| Mapped | Yes | Yes |
-------------------------------------------------------------------------
| Splitting | Yes | Yes |
-------------------------------------------------------------------------
| Migration/Swap | No | No |
-------------------------------------------------------------------------
At present the PROT_NONE bit reuses the READ bit could not comply with
above semantics with two problems:
1. When splitting a PMD THP, PMD is first invalidated with
pmdp_invalidate()->pmd_mkinvalid(), which clears the PRESENT bit
and PROT_NONE bit/READ bit, if the PMD is read-only, then the PAGE_LEAF
property is also cleared, which results in pmd_present() return false.
2. When migrating, the swap entry only clear the PRESENT bit
and PROT_NONE bit/READ bit, the W/X bit may be set, so _PAGE_LEAF may be
true which results in pmd_present() return true.
Solution:
Adjust PROT_NONE bit from READ to GLOBAL bit can satisfy the above rules:
1. GLOBAL bit has no other meanings, not like the R/W/X bit, which is
also relative with _PAGE_LEAF property.
2. GLOBAL bit is at bit 5, making swap entry start from bit 6, bit 0-5
are zero, which means the PRESENT, PROT_NONE, and PAGE_LEAF are
all false, then the pmd_present() and pmd_trans_huge() return false when
in migration/swap.
Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2021-11-23 22:06:37 +08:00
# define __SWP_TYPE_SHIFT 6
2017-07-10 18:06:09 -07:00
# define __SWP_TYPE_BITS 5
# define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1)
# define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
# define MAX_SWAPFILES_CHECK() \
BUILD_BUG_ON ( MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS )
# define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
# define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT)
# define __swp_entry(type, offset) ((swp_entry_t) \
{ ( ( type ) < < __SWP_TYPE_SHIFT ) | ( ( offset ) < < __SWP_OFFSET_SHIFT ) } )
# define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
# define __swp_entry_to_pte(x) ((pte_t) { (x).val })
2021-11-23 22:06:38 +08:00
# ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
# define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
# define __swp_entry_to_pmd(swp) __pmd((swp).val)
# endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
2020-03-12 10:58:35 +08:00
/*
* In the RV64 Linux scheme , we give the user half of the virtual - address space
* and give the kernel the other ( upper ) half .
*/
# ifdef CONFIG_64BIT
2021-12-06 11:46:48 +01:00
# define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE)
2020-03-12 10:58:35 +08:00
# else
# define KERN_VIRT_START FIXADDR_START
# endif
2017-07-10 18:06:09 -07:00
/*
2019-08-19 05:14:23 +00:00
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32 .
2017-07-10 18:06:09 -07:00
* Note that PGDIR_SIZE must evenly divide TASK_SIZE .
2021-12-06 11:46:53 +01:00
* Task size is :
* - 0x9fc00000 ( ~ 2.5 GB ) for RV32 .
* - 0x4000000000 ( 256 GB ) for RV64 using SV39 mmu
* - 0x800000000000 ( 128 TB ) for RV64 using SV48 mmu
*
* Note that PGDIR_SIZE must evenly divide TASK_SIZE since " RISC-V
* Instruction Set Manual Volume II : Privileged Architecture " states that
* " load and store effective addresses, which are 64bits, must have bits
* 63 – 48 all equal to bit 47 , or else a page - fault exception will occur . "
2017-07-10 18:06:09 -07:00
*/
# ifdef CONFIG_64BIT
2022-04-05 15:13:04 +08:00
# define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2)
# define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
# ifdef CONFIG_COMPAT
# define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE)
# define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64 )
# else
# define TASK_SIZE TASK_SIZE_64
# endif
2017-07-10 18:06:09 -07:00
# else
2021-12-06 11:46:51 +01:00
# define TASK_SIZE FIXADDR_START
# define TASK_SIZE_MIN TASK_SIZE
2017-07-10 18:06:09 -07:00
# endif
2019-10-28 13:10:41 +01:00
# else /* CONFIG_MMU */
2020-05-11 10:19:54 +08:00
# define PAGE_SHARED __pgprot(0)
2019-10-28 13:10:41 +01:00
# define PAGE_KERNEL __pgprot(0)
# define swapper_pg_dir NULL
2020-06-01 21:51:57 -07:00
# define TASK_SIZE 0xffffffffUL
2019-10-28 13:10:41 +01:00
# define VMALLOC_START 0
2020-06-01 21:51:57 -07:00
# define VMALLOC_END TASK_SIZE
2019-10-28 13:10:41 +01:00
# endif /* !CONFIG_MMU */
# define kern_addr_valid(addr) (1) /* FIXME */
2021-04-11 12:41:44 -04:00
extern char _start [ ] ;
2021-04-13 02:35:14 -04:00
extern void * _dtb_early_va ;
extern uintptr_t _dtb_early_pa ;
# if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_MMU)
# define dtb_early_va (*(void **)XIP_FIXUP(&_dtb_early_va))
# define dtb_early_pa (*(uintptr_t *)XIP_FIXUP(&_dtb_early_pa))
# else
# define dtb_early_va _dtb_early_va
# define dtb_early_pa _dtb_early_pa
# endif /* CONFIG_XIP_KERNEL */
2021-12-06 11:46:51 +01:00
extern u64 satp_mode ;
extern bool pgtable_l4_enabled ;
2021-04-13 02:35:14 -04:00
2019-10-28 13:10:41 +01:00
void paging_init ( void ) ;
2020-11-18 16:38:27 -08:00
void misc_mem_init ( void ) ;
2019-10-28 13:10:41 +01:00
/*
* ZERO_PAGE is a global shared page that is always zero ,
* used for zero - mapped memory areas , etc .
*/
extern unsigned long empty_zero_page [ PAGE_SIZE / sizeof ( unsigned long ) ] ;
# define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
2017-07-10 18:06:09 -07:00
# endif /* !__ASSEMBLY__ */
# endif /* _ASM_RISCV_PGTABLE_H */