linux/arch/riscv/include/asm/pgtable.h

/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (C) 2012 Regents of the University of California
 */

#ifndef _ASM_RISCV_PGTABLE_H
#define _ASM_RISCV_PGTABLE_H

#include <linux/mmzone.h>
#include <linux/sizes.h>

#include <asm/pgtable-bits.h>

#ifndef CONFIG_MMU
#define KERNEL_LINK_ADDR	PAGE_OFFSET
#define KERN_VIRT_SIZE		(UL(-1))
#else

#define ADDRESS_SPACE_END	(UL(-1))

#ifdef CONFIG_64BIT
/* Leave 2GB for kernel and BPF at the end of the address space */
#define KERNEL_LINK_ADDR	(ADDRESS_SPACE_END - SZ_2G + 1)
#else
#define KERNEL_LINK_ADDR	PAGE_OFFSET
#endif

/* Number of entries in the page global directory */
#define PTRS_PER_PGD    (PAGE_SIZE / sizeof(pgd_t))
/* Number of entries in the page table */
#define PTRS_PER_PTE    (PAGE_SIZE / sizeof(pte_t))

/*
 * Half of the kernel address space (half of the entries of the page global
 * directory) is for the direct mapping.
 */
#define KERN_VIRT_SIZE          ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)

#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END      PAGE_OFFSET
#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)

#define BPF_JIT_REGION_SIZE	(SZ_128M)
#ifdef CONFIG_64BIT
#define BPF_JIT_REGION_START	(BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_END	(MODULES_END)
#else
#define BPF_JIT_REGION_START	(PAGE_OFFSET - BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_END	(VMALLOC_END)
#endif

/* Modules always live before the kernel */
#ifdef CONFIG_64BIT
/* This is used to define the end of the KASAN shadow region */
#define MODULES_LOWEST_VADDR	(KERNEL_LINK_ADDR - SZ_2G)
#define MODULES_VADDR		(PFN_ALIGN((unsigned long)&_end) - SZ_2G)
#define MODULES_END		(PFN_ALIGN((unsigned long)&_start))
#endif

/*
 * Roughly size the vmemmap space to be large enough to fit enough
 * struct pages to map half the virtual address space. Then
 * position vmemmap directly below the VMALLOC region.
 */
#ifdef CONFIG_64BIT
#define VA_BITS		(pgtable_l5_enabled ? \
				57 : (pgtable_l4_enabled ? 48 : 39))
#else
#define VA_BITS		32
#endif

#define VMEMMAP_SHIFT \
	(VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE	BIT(VMEMMAP_SHIFT)
#define VMEMMAP_END	VMALLOC_START
#define VMEMMAP_START	(VMALLOC_START - VMEMMAP_SIZE)

/*
 * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
 * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
 */
#define vmemmap		((struct page *)VMEMMAP_START)

#define PCI_IO_SIZE      SZ_16M
#define PCI_IO_END       VMEMMAP_START
#define PCI_IO_START     (PCI_IO_END - PCI_IO_SIZE)

#define FIXADDR_TOP      PCI_IO_START
#ifdef CONFIG_64BIT
#define FIXADDR_SIZE     PMD_SIZE
#else
#define FIXADDR_SIZE     PGDIR_SIZE
#endif
#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)

#endif

#ifdef CONFIG_XIP_KERNEL
#define XIP_OFFSET		SZ_32M
#define XIP_OFFSET_MASK		(SZ_32M - 1)
#else
#define XIP_OFFSET		0
#endif

#ifndef __ASSEMBLY__

#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>

#define __page_val_to_pfn(_val)  (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)

#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
#else
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */

#include <linux/page_table_check.h>

#ifdef CONFIG_XIP_KERNEL
#define XIP_FIXUP(addr) ({							\
	uintptr_t __a = (uintptr_t)(addr);					\
	(__a >= CONFIG_XIP_PHYS_ADDR && \
	 __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ?	\
		__a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\
		__a;								\
	})
#else
#define XIP_FIXUP(addr)		(addr)
#endif /* CONFIG_XIP_KERNEL */

struct pt_alloc_ops {
	pte_t *(*get_pte_virt)(phys_addr_t pa);
	phys_addr_t (*alloc_pte)(uintptr_t va);
#ifndef __PAGETABLE_PMD_FOLDED
	pmd_t *(*get_pmd_virt)(phys_addr_t pa);
	phys_addr_t (*alloc_pmd)(uintptr_t va);
	pud_t *(*get_pud_virt)(phys_addr_t pa);
	phys_addr_t (*alloc_pud)(uintptr_t va);
	p4d_t *(*get_p4d_virt)(phys_addr_t pa);
	phys_addr_t (*alloc_p4d)(uintptr_t va);
#endif
};

extern struct pt_alloc_ops pt_ops __initdata;

#ifdef CONFIG_MMU
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD   (TASK_SIZE / PGDIR_SIZE)

/* Page protection bits */
#define _PAGE_BASE	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)

#define PAGE_NONE		__pgprot(_PAGE_PROT_NONE | _PAGE_READ)
#define PAGE_READ		__pgprot(_PAGE_BASE | _PAGE_READ)
#define PAGE_WRITE		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
#define PAGE_EXEC		__pgprot(_PAGE_BASE | _PAGE_EXEC)
#define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
#define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
					 _PAGE_EXEC | _PAGE_WRITE)

#define PAGE_COPY		PAGE_READ
#define PAGE_COPY_EXEC		PAGE_EXEC
#define PAGE_COPY_READ_EXEC	PAGE_READ_EXEC
#define PAGE_SHARED		PAGE_WRITE
#define PAGE_SHARED_EXEC	PAGE_WRITE_EXEC

#define _PAGE_KERNEL		(_PAGE_READ \
				| _PAGE_WRITE \
				| _PAGE_PRESENT \
				| _PAGE_ACCESSED \
				| _PAGE_DIRTY \
				| _PAGE_GLOBAL)

#define PAGE_KERNEL		__pgprot(_PAGE_KERNEL)
#define PAGE_KERNEL_READ	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL | _PAGE_EXEC)
#define PAGE_KERNEL_READ_EXEC	__pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
					 | _PAGE_EXEC)

#define PAGE_TABLE		__pgprot(_PAGE_TABLE)

#define _PAGE_IOREMAP	((_PAGE_KERNEL & ~_PAGE_MTMASK) | _PAGE_IO)
#define PAGE_KERNEL_IO		__pgprot(_PAGE_IOREMAP)

extern pgd_t swapper_pg_dir[];

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_present(pmd_t pmd)
{
	/*
	 * Checking for _PAGE_LEAF is needed too because:
	 * When splitting a THP, split_huge_page() will temporarily clear
	 * the present bit, in this situation, pmd_present() and
	 * pmd_trans_huge() still needs to return true.
	 */
	return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE | _PAGE_LEAF));
}
#else
static inline int pmd_present(pmd_t pmd)
{
	return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
}
#endif

static inline int pmd_none(pmd_t pmd)
{
	return (pmd_val(pmd) == 0);
}

static inline int pmd_bad(pmd_t pmd)
{
	return !pmd_present(pmd) || (pmd_val(pmd) & _PAGE_LEAF);
}

#define pmd_leaf	pmd_leaf
static inline int pmd_leaf(pmd_t pmd)
{
	return pmd_present(pmd) && (pmd_val(pmd) & _PAGE_LEAF);
}

static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
	*pmdp = pmd;
}

static inline void pmd_clear(pmd_t *pmdp)
{
	set_pmd(pmdp, __pmd(0));
}

static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
{
	unsigned long prot_val = pgprot_val(prot);

	ALT_THEAD_PMA(prot_val);

	return __pgd((pfn << _PAGE_PFN_SHIFT) | prot_val);
}

static inline unsigned long _pgd_pfn(pgd_t pgd)
{
	return __page_val_to_pfn(pgd_val(pgd));
}

static inline struct page *pmd_page(pmd_t pmd)
{
	return pfn_to_page(__page_val_to_pfn(pmd_val(pmd)));
}

static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
	return (unsigned long)pfn_to_virt(__page_val_to_pfn(pmd_val(pmd)));
}

static inline pte_t pmd_pte(pmd_t pmd)
{
	return __pte(pmd_val(pmd));
}

static inline pte_t pud_pte(pud_t pud)
{
	return __pte(pud_val(pud));
}

/* Yields the page frame number (PFN) of a page table entry */
static inline unsigned long pte_pfn(pte_t pte)
{
	return __page_val_to_pfn(pte_val(pte));
}

#define pte_page(x)     pfn_to_page(pte_pfn(x))

/* Constructs a page table entry */
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
{
	unsigned long prot_val = pgprot_val(prot);

	ALT_THEAD_PMA(prot_val);

	return __pte((pfn << _PAGE_PFN_SHIFT) | prot_val);
}

#define mk_pte(page, prot)       pfn_pte(page_to_pfn(page), prot)

static inline int pte_present(pte_t pte)
{
	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
}

static inline int pte_none(pte_t pte)
{
	return (pte_val(pte) == 0);
}

static inline int pte_write(pte_t pte)
{
	return pte_val(pte) & _PAGE_WRITE;
}

static inline int pte_exec(pte_t pte)
{
	return pte_val(pte) & _PAGE_EXEC;
}

static inline int pte_user(pte_t pte)
{
	return pte_val(pte) & _PAGE_USER;
}

static inline int pte_huge(pte_t pte)
{
	return pte_present(pte) && (pte_val(pte) & _PAGE_LEAF);
}

static inline int pte_dirty(pte_t pte)
{
	return pte_val(pte) & _PAGE_DIRTY;
}

static inline int pte_young(pte_t pte)
{
	return pte_val(pte) & _PAGE_ACCESSED;
}

static inline int pte_special(pte_t pte)
{
	return pte_val(pte) & _PAGE_SPECIAL;
}

/* static inline pte_t pte_rdprotect(pte_t pte) */

static inline pte_t pte_wrprotect(pte_t pte)
{
	return __pte(pte_val(pte) & ~(_PAGE_WRITE));
}

/* static inline pte_t pte_mkread(pte_t pte) */

static inline pte_t pte_mkwrite(pte_t pte)
{
	return __pte(pte_val(pte) | _PAGE_WRITE);
}

/* static inline pte_t pte_mkexec(pte_t pte) */

static inline pte_t pte_mkdirty(pte_t pte)
{
	return __pte(pte_val(pte) | _PAGE_DIRTY);
}

static inline pte_t pte_mkclean(pte_t pte)
{
	return __pte(pte_val(pte) & ~(_PAGE_DIRTY));
}

static inline pte_t pte_mkyoung(pte_t pte)
{
	return __pte(pte_val(pte) | _PAGE_ACCESSED);
}

static inline pte_t pte_mkold(pte_t pte)
{
	return __pte(pte_val(pte) & ~(_PAGE_ACCESSED));
}

static inline pte_t pte_mkspecial(pte_t pte)
{
	return __pte(pte_val(pte) | _PAGE_SPECIAL);
}

static inline pte_t pte_mkhuge(pte_t pte)
{
	return pte;
}

#ifdef CONFIG_NUMA_BALANCING
/*
 * See the comment in include/asm-generic/pgtable.h
 */
static inline int pte_protnone(pte_t pte)
{
	return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
}

static inline int pmd_protnone(pmd_t pmd)
{
	return pte_protnone(pmd_pte(pmd));
}
#endif

/* Modify page protection bits */
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
	unsigned long newprot_val = pgprot_val(newprot);

	ALT_THEAD_PMA(newprot_val);

	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | newprot_val);
}

#define pgd_ERROR(e) \
	pr_err("%s:%d: bad pgd " PTE_FMT ".\n", __FILE__, __LINE__, pgd_val(e))


/* Commit new configuration to MMU hardware */
static inline void update_mmu_cache(struct vm_area_struct *vma,
	unsigned long address, pte_t *ptep)
{
	/*
	 * The kernel assumes that TLBs don't cache invalid entries, but
	 * in RISC-V, SFENCE.VMA specifies an ordering constraint, not a
	 * cache flush; it is necessary even after writing invalid entries.
	 * Relying on flush_tlb_fix_spurious_fault would suffice, but
	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
	 */
	local_flush_tlb_page(address);
}

static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
		unsigned long address, pmd_t *pmdp)
{
	pte_t *ptep = (pte_t *)pmdp;

	update_mmu_cache(vma, address, ptep);
}

#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
	return pte_val(pte_a) == pte_val(pte_b);
}

/*
 * Certain architectures need to do special things when PTEs within
 * a page table are directly modified.  Thus, the following hook is
 * made available.
 */
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
	*ptep = pteval;
}

void flush_icache_pte(pte_t pte);

static inline void __set_pte_at(struct mm_struct *mm,
	unsigned long addr, pte_t *ptep, pte_t pteval)
{
	if (pte_present(pteval) && pte_exec(pteval))
		flush_icache_pte(pteval);

	set_pte(ptep, pteval);
}

static inline void set_pte_at(struct mm_struct *mm,
	unsigned long addr, pte_t *ptep, pte_t pteval)
{
	page_table_check_pte_set(mm, addr, ptep, pteval);
	__set_pte_at(mm, addr, ptep, pteval);
}

static inline void pte_clear(struct mm_struct *mm,
	unsigned long addr, pte_t *ptep)
{
	__set_pte_at(mm, addr, ptep, __pte(0));
}

#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
					unsigned long address, pte_t *ptep,
					pte_t entry, int dirty)
{
	if (!pte_same(*ptep, entry))
		set_pte_at(vma->vm_mm, address, ptep, entry);
	/*
	 * update_mmu_cache will unconditionally execute, handling both
	 * the case that the PTE changed and the spurious fault case.
	 */
	return true;
}

#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
				       unsigned long address, pte_t *ptep)
{
	pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));

	page_table_check_pte_clear(mm, address, pte);

	return pte;
}

#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
					    unsigned long address,
					    pte_t *ptep)
{
	if (!pte_young(*ptep))
		return 0;
	return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
}

#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
				      unsigned long address, pte_t *ptep)
{
	atomic_long_and(~(unsigned long)_PAGE_WRITE, (atomic_long_t *)ptep);
}

#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
					 unsigned long address, pte_t *ptep)
{
	/*
	 * This comment is borrowed from x86, but applies equally to RISC-V:
	 *
	 * Clearing the accessed bit without a TLB flush
	 * doesn't cause data corruption. [ It could cause incorrect
	 * page aging and the (mistaken) reclaim of hot pages, but the
	 * chance of that should be relatively low. ]
	 *
	 * So as a performance optimization don't flush the TLB when
	 * clearing the accessed bit, it will eventually be flushed by
	 * a context switch or a VM operation anyway. [ In the rare
	 * event of it not getting flushed for a long time the delay
	 * shouldn't really matter because there's no real memory
	 * pressure for swapout to react to. ]
	 */
	return ptep_test_and_clear_young(vma, address, ptep);
}

#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t _prot)
{
	unsigned long prot = pgprot_val(_prot);

	prot &= ~_PAGE_MTMASK;
	prot |= _PAGE_IO;

	return __pgprot(prot);
}

#define pgprot_writecombine pgprot_writecombine
static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
{
	unsigned long prot = pgprot_val(_prot);

	prot &= ~_PAGE_MTMASK;
	prot |= _PAGE_NOCACHE;

	return __pgprot(prot);
}

/*
 * THP functions
 */
static inline pmd_t pte_pmd(pte_t pte)
{
	return __pmd(pte_val(pte));
}

static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
	return pmd;
}

static inline pmd_t pmd_mkinvalid(pmd_t pmd)
{
	return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
}

#define __pmd_to_phys(pmd)  (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT)

static inline unsigned long pmd_pfn(pmd_t pmd)
{
	return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
}

#define __pud_to_phys(pud)  (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT)

static inline unsigned long pud_pfn(pud_t pud)
{
	return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT);
}

static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}

#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
	return pte_write(pmd_pte(pmd));
}

static inline int pmd_dirty(pmd_t pmd)
{
	return pte_dirty(pmd_pte(pmd));
}

static inline int pmd_young(pmd_t pmd)
{
	return pte_young(pmd_pte(pmd));
}

static inline int pmd_user(pmd_t pmd)
{
	return pte_user(pmd_pte(pmd));
}

static inline pmd_t pmd_mkold(pmd_t pmd)
{
	return pte_pmd(pte_mkold(pmd_pte(pmd)));
}

static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
	return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
}

static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
	return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
}

static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
	return pte_pmd(pte_wrprotect(pmd_pte(pmd)));
}

static inline pmd_t pmd_mkclean(pmd_t pmd)
{
	return pte_pmd(pte_mkclean(pmd_pte(pmd)));
}

static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
	return pte_pmd(pte_mkdirty(pmd_pte(pmd)));
}

static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
				pmd_t *pmdp, pmd_t pmd)
{
	page_table_check_pmd_set(mm, addr, pmdp, pmd);
	return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
}

static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
				pud_t *pudp, pud_t pud)
{
	page_table_check_pud_set(mm, addr, pudp, pud);
	return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
}

#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
	return pte_present(pte) && pte_user(pte);
}

static inline bool pmd_user_accessible_page(pmd_t pmd)
{
	return pmd_leaf(pmd) && pmd_user(pmd);
}

static inline bool pud_user_accessible_page(pud_t pud)
{
	return pud_leaf(pud) && pud_user(pud);
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_trans_huge(pmd_t pmd)
{
	return pmd_leaf(pmd);
}

#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
					unsigned long address, pmd_t *pmdp,
					pmd_t entry, int dirty)
{
	return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
}

#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
					unsigned long address, pmd_t *pmdp)
{
	return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
}

#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
					unsigned long address, pmd_t *pmdp)
{
	pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));

	page_table_check_pmd_clear(mm, address, pmd);

	return pmd;
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
					unsigned long address, pmd_t *pmdp)
{
	ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}

#define pmdp_establish pmdp_establish
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
				unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
	page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
	return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

/*
 * Encode and decode a swap entry
 *
 * Format of swap PTE:
 *	bit            0:	_PAGE_PRESENT (zero)
 *	bit       1 to 3:       _PAGE_LEAF (zero)
 *	bit            5:	_PAGE_PROT_NONE (zero)
 *	bits      6 to 10:	swap type
 *	bits 10 to XLEN-1:	swap offset
 */
#define __SWP_TYPE_SHIFT	6
#define __SWP_TYPE_BITS		5
#define __SWP_TYPE_MASK		((1UL << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)

#define MAX_SWAPFILES_CHECK()	\
	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)

#define __swp_type(x)	(((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
#define __swp_offset(x)	((x).val >> __SWP_OFFSET_SHIFT)
#define __swp_entry(type, offset) ((swp_entry_t) \
	{ ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })

#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x)	((pte_t) { (x).val })

#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
#define __swp_entry_to_pmd(swp) __pmd((swp).val)
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */

/*
 * In the RV64 Linux scheme, we give the user half of the virtual-address space
 * and give the kernel the other (upper) half.
 */
#ifdef CONFIG_64BIT
#define KERN_VIRT_START	(-(BIT(VA_BITS)) + TASK_SIZE)
#else
#define KERN_VIRT_START	FIXADDR_START
#endif

/*
 * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
 * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
 * Task size is:
 * -     0x9fc00000 (~2.5GB) for RV32.
 * -   0x4000000000 ( 256GB) for RV64 using SV39 mmu
 * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
 *
 * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
 * Instruction Set Manual Volume II: Privileged Architecture" states that
 * "load and store effective addresses, which are 64bits, must have bits
 * 63–48 all equal to bit 47, or else a page-fault exception will occur."
 */
#ifdef CONFIG_64BIT
#define TASK_SIZE_64	(PGDIR_SIZE * PTRS_PER_PGD / 2)
#define TASK_SIZE_MIN	(PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)

#ifdef CONFIG_COMPAT
#define TASK_SIZE_32	(_AC(0x80000000, UL) - PAGE_SIZE)
#define TASK_SIZE	(test_thread_flag(TIF_32BIT) ? \
			 TASK_SIZE_32 : TASK_SIZE_64)
#else
#define TASK_SIZE	TASK_SIZE_64
#endif

#else
#define TASK_SIZE	FIXADDR_START
#define TASK_SIZE_MIN	TASK_SIZE
#endif

#else /* CONFIG_MMU */

#define PAGE_SHARED		__pgprot(0)
#define PAGE_KERNEL		__pgprot(0)
#define swapper_pg_dir		NULL
#define TASK_SIZE		0xffffffffUL
#define VMALLOC_START		0
#define VMALLOC_END		TASK_SIZE

#endif /* !CONFIG_MMU */

#define kern_addr_valid(addr)   (1) /* FIXME */

extern char _start[];
extern void *_dtb_early_va;
extern uintptr_t _dtb_early_pa;
#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_MMU)
#define dtb_early_va	(*(void **)XIP_FIXUP(&_dtb_early_va))
#define dtb_early_pa	(*(uintptr_t *)XIP_FIXUP(&_dtb_early_pa))
#else
#define dtb_early_va	_dtb_early_va
#define dtb_early_pa	_dtb_early_pa
#endif /* CONFIG_XIP_KERNEL */
extern u64 satp_mode;
extern bool pgtable_l4_enabled;

void paging_init(void);
void misc_mem_init(void);

/*
 * ZERO_PAGE is a global shared page that is always zero,
 * used for zero-mapped memory areas, etc.
 */
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))

#endif /* !__ASSEMBLY__ */

#endif /* _ASM_RISCV_PGTABLE_H */
-												treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 286

Based on 1 normalized pattern(s):

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license as published by
  the free software foundation version 2 this program is distributed
  in the hope that it will be useful but without any warranty without
  even the implied warranty of merchantability or fitness for a
  particular purpose see the gnu general public license for more
  details

extracted by the scancode license scanner the SPDX license identifier

  GPL-2.0-only

has been chosen to replace the boilerplate/reference in 97 file(s).

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Allison Randal <allison@lohutok.net>
Reviewed-by: Alexios Zavras <alexios.zavras@intel.com>
Cc: linux-spdx@vger.kernel.org
Link: https://lkml.kernel.org/r/20190529141901.025053186@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

											
										
										
											2019-05-29 07:18:00 -07:00
+								/* SPDX-License-Identifier: GPL-2.0-only */
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								/*
 								 * Copyright (C) 2012 Regents of the University of California
 								 */
 								#ifndef _ASM_RISCV_PGTABLE_H
 								#define _ASM_RISCV_PGTABLE_H
 								#include <linux/mmzone.h>
-												RISC-V: Add PCIe I/O BAR memory mapping

For legacy I/O BARs (non-MMIO BARs) to work correctly on RISC-V Linux,
we need to establish a reserved memory region for them, so that drivers
that wish to use the legacy I/O BARs can issue reads and writes against
a memory region that is mapped to the host PCIe controller's I/O BAR
mapping.

Signed-off-by: Yash Shah <yash.shah@sifive.com>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-25 08:30:03 +00:00
+								#include <linux/sizes.h>
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
 								#include <asm/pgtable-bits.h>
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#ifndef CONFIG_MMU
 								#define KERNEL_LINK_ADDR	PAGE_OFFSET
-												riscv: Fix is_linear_mapping with recent move of KASAN region

The KASAN region was recently moved between the linear mapping and the
kernel mapping, is_linear_mapping used to check the validity of an
address by using the start of the kernel mapping, which is now wrong.

Fix this by using the maximum size of the physical memory.

Fixes: f7ae02333d13 ("riscv: Move KASAN mapping next to the kernel mapping")
Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-02-25 13:39:48 +01:00
+								#define KERN_VIRT_SIZE		(UL(-1))
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#else
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#define ADDRESS_SPACE_END	(UL(-1))
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#ifdef CONFIG_64BIT
 								/* Leave 2GB for kernel and BPF at the end of the address space */
 								#define KERNEL_LINK_ADDR	(ADDRESS_SPACE_END - SZ_2G + 1)
 								#else
 								#define KERNEL_LINK_ADDR	PAGE_OFFSET
 								#endif
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
-												riscv: Move KASAN mapping next to the kernel mapping

Now that KASAN_SHADOW_OFFSET is defined at compile time as a config,
this value must remain constant whatever the size of the virtual address
space, which is only possible by pushing this region at the end of the
address space next to the kernel mapping.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:45 +01:00
+								/* Number of entries in the page global directory */
 								#define PTRS_PER_PGD    (PAGE_SIZE / sizeof(pgd_t))
 								/* Number of entries in the page table */
 								#define PTRS_PER_PTE    (PAGE_SIZE / sizeof(pte_t))
 								/*
 								 * Half of the kernel address space (half of the entries of the page global
 								 * directory) is for the direct mapping.
 								 */
 								#define KERN_VIRT_SIZE          ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
+								#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
-												riscv: Make vmalloc/vmemmap end equal to the start of the next region

We used to define VMALLOC_END equal to the start of the next region
*minus one* which is inconsistent with the use of this define in the
core code (for example, see the definitions of VMALLOC_TOTAL and
is_vmalloc_addr).

And then make the definition of VMEMMAP_END consistent with VMALLOC_END
and all other regions actually.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-11-18 14:45:39 +01:00
+								#define VMALLOC_END      PAGE_OFFSET
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
+								#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
 								#define BPF_JIT_REGION_SIZE	(SZ_128M)
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#ifdef CONFIG_64BIT
-												riscv: Ensure BPF_JIT_REGION_START aligned with PMD size

Andreas reported commit fc8504765ec5 ("riscv: bpf: Avoid breaking W^X")
breaks booting with one kind of defconfig, I reproduced a kernel panic
with the defconfig:

[    0.138553] Unable to handle kernel paging request at virtual address ffffffff81201220
[    0.139159] Oops [#1]
[    0.139303] Modules linked in:
[    0.139601] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5-default+ #1
[    0.139934] Hardware name: riscv-virtio,qemu (DT)
[    0.140193] epc : __memset+0xc4/0xfc
[    0.140416]  ra : skb_flow_dissector_init+0x1e/0x82
[    0.140609] epc : ffffffff8029806c ra : ffffffff8033be78 sp : ffffffe001647da0
[    0.140878]  gp : ffffffff81134b08 tp : ffffffe001654380 t0 : ffffffff81201158
[    0.141156]  t1 : 0000000000000002 t2 : 0000000000000154 s0 : ffffffe001647dd0
[    0.141424]  s1 : ffffffff80a43250 a0 : ffffffff81201220 a1 : 0000000000000000
[    0.141654]  a2 : 000000000000003c a3 : ffffffff81201258 a4 : 0000000000000064
[    0.141893]  a5 : ffffffff8029806c a6 : 0000000000000040 a7 : ffffffffffffffff
[    0.142126]  s2 : ffffffff81201220 s3 : 0000000000000009 s4 : ffffffff81135088
[    0.142353]  s5 : ffffffff81135038 s6 : ffffffff8080ce80 s7 : ffffffff80800438
[    0.142584]  s8 : ffffffff80bc6578 s9 : 0000000000000008 s10: ffffffff806000ac
[    0.142810]  s11: 0000000000000000 t3 : fffffffffffffffc t4 : 0000000000000000
[    0.143042]  t5 : 0000000000000155 t6 : 00000000000003ff
[    0.143220] status: 0000000000000120 badaddr: ffffffff81201220 cause: 000000000000000f
[    0.143560] [<ffffffff8029806c>] __memset+0xc4/0xfc
[    0.143859] [<ffffffff8061e984>] init_default_flow_dissectors+0x22/0x60
[    0.144092] [<ffffffff800010fc>] do_one_initcall+0x3e/0x168
[    0.144278] [<ffffffff80600df0>] kernel_init_freeable+0x1c8/0x224
[    0.144479] [<ffffffff804868a8>] kernel_init+0x12/0x110
[    0.144658] [<ffffffff800022de>] ret_from_exception+0x0/0xc
[    0.145124] ---[ end trace f1e9643daa46d591 ]---

After some investigation, I think I found the root cause: commit
2bfc6cd81bd ("move kernel mapping outside of linear mapping") moves
BPF JIT region after the kernel:

| #define BPF_JIT_REGION_START	PFN_ALIGN((unsigned long)&_end)

The &_end is unlikely aligned with PMD size, so the front bpf jit
region sits with part of kernel .data section in one PMD size mapping.
But kernel is mapped in PMD SIZE, when bpf_jit_binary_lock_ro() is
called to make the first bpf jit prog ROX, we will make part of kernel
.data section RO too, so when we write to, for example memset the
.data section, MMU will trigger a store page fault.

To fix the issue, we need to ensure the BPF JIT region is PMD size
aligned. This patch acchieve this goal by restoring the BPF JIT region
to original position, I.E the 128MB before kernel .text section. The
modification to kasan_init.c is inspired by Alexandre.

Fixes: fc8504765ec5 ("riscv: bpf: Avoid breaking W^X")
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-06-18 22:09:13 +08:00
+								#define BPF_JIT_REGION_START	(BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE)
 								#define BPF_JIT_REGION_END	(MODULES_END)
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#else
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
+								#define BPF_JIT_REGION_START	(PAGE_OFFSET - BPF_JIT_REGION_SIZE)
 								#define BPF_JIT_REGION_END	(VMALLOC_END)
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#endif
 								/* Modules always live before the kernel */
 								#ifdef CONFIG_64BIT
-												riscv: Move KASAN mapping next to the kernel mapping

Now that KASAN_SHADOW_OFFSET is defined at compile time as a config,
this value must remain constant whatever the size of the virtual address
space, which is only possible by pushing this region at the end of the
address space next to the kernel mapping.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:45 +01:00
+								/* This is used to define the end of the KASAN shadow region */
 								#define MODULES_LOWEST_VADDR	(KERNEL_LINK_ADDR - SZ_2G)
 								#define MODULES_VADDR		(PFN_ALIGN((unsigned long)&_end) - SZ_2G)
 								#define MODULES_END		(PFN_ALIGN((unsigned long)&_start))
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#endif
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
 								/*
 								 * Roughly size the vmemmap space to be large enough to fit enough
 								 * struct pages to map half the virtual address space. Then
 								 * position vmemmap directly below the VMALLOC region.
 								 */
-												riscv: Allow to dynamically define VA_BITS

With 4-level page table folding at runtime, we don't know at compile time
the size of the virtual address space so we must set VA_BITS dynamically
so that sparsemem reserves the right amount of memory for struct pages.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:48 +01:00
+								#ifdef CONFIG_64BIT
-												riscv: mm: Control p4d's folding by pgtable_l5_enabled

To determine pgtable level at boot time, we can not use helper functions
in include/asm-generic/pgtable-nop4d.h and must implement these
functions. This patch uses pgtable_l5_enabled variable instead of
including pgtable-nop4d.h to controle p4d's folding, and implements
corresponding helper functions.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-01-27 10:48:41 +08:00
+								#define VA_BITS		(pgtable_l5_enabled ? \
 : (pgtable_l4_enabled ? 48 : 39))
-												riscv: Allow to dynamically define VA_BITS

With 4-level page table folding at runtime, we don't know at compile time
the size of the virtual address space so we must set VA_BITS dynamically
so that sparsemem reserves the right amount of memory for struct pages.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:48 +01:00
+								#else
 								#define VA_BITS		32
 								#endif
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
+								#define VMEMMAP_SHIFT \
-												riscv: Allow to dynamically define VA_BITS

With 4-level page table folding at runtime, we don't know at compile time
the size of the virtual address space so we must set VA_BITS dynamically
so that sparsemem reserves the right amount of memory for struct pages.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:48 +01:00
+									(VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
+								#define VMEMMAP_SIZE	BIT(VMEMMAP_SHIFT)
-												riscv: Make vmalloc/vmemmap end equal to the start of the next region

We used to define VMALLOC_END equal to the start of the next region
*minus one* which is inconsistent with the use of this define in the
core code (for example, see the definitions of VMALLOC_TOTAL and
is_vmalloc_addr).

And then make the definition of VMEMMAP_END consistent with VMALLOC_END
and all other regions actually.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-11-18 14:45:39 +01:00
+								#define VMEMMAP_END	VMALLOC_START
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
+								#define VMEMMAP_START	(VMALLOC_START - VMEMMAP_SIZE)
 								/*
 								 * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
 								 * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
 								 */
 								#define vmemmap		((struct page *)VMEMMAP_START)
 								#define PCI_IO_SIZE      SZ_16M
 								#define PCI_IO_END       VMEMMAP_START
 								#define PCI_IO_START     (PCI_IO_END - PCI_IO_SIZE)
 								#define FIXADDR_TOP      PCI_IO_START
 								#ifdef CONFIG_64BIT
 								#define FIXADDR_SIZE     PMD_SIZE
 								#else
 								#define FIXADDR_SIZE     PGDIR_SIZE
 								#endif
 								#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
-												RISC-V: enable XIP

Introduce XIP (eXecute In Place) support for RISC-V platforms.
It allows code to be executed directly from non-volatile storage
directly addressable by the CPU, such as QSPI NOR flash which can
be found on many RISC-V platforms. This makes way for significant
optimization of RAM footprint. The XIP kernel is not compressed
since it has to run directly from flash, so it will occupy more
space on the non-volatile storage. The physical flash address used
to link the kernel object files and for storing it has to be known
at compile time and is represented by a Kconfig option.

XIP on RISC-V will for the time being only work on MMU-enabled
kernels.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
[Alex: Rebase on top of "Move kernel mapping outside the linear mapping" ]
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: disable XIP for allyesconfig]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-13 02:35:14 -04:00
-												RISC-V: Always define XIP_FIXUP

XIP depends on MMU, but XIP_FIXUP is used throughout the kernel in
order to avoid excessive ifdefs.  This just makes sure to always define
XIP_FIXUP, which will fix MMU=n builds.  XIP_OFFSET is used by assembly
but XIP_FIXUP is C-only, so they're split.

Fixes: 44c922572952 ("RISC-V: enable XIP")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
Tested-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-28 14:45:12 -07:00
+								#endif
-												RISC-V: enable XIP

Introduce XIP (eXecute In Place) support for RISC-V platforms.
It allows code to be executed directly from non-volatile storage
directly addressable by the CPU, such as QSPI NOR flash which can
be found on many RISC-V platforms. This makes way for significant
optimization of RAM footprint. The XIP kernel is not compressed
since it has to run directly from flash, so it will occupy more
space on the non-volatile storage. The physical flash address used
to link the kernel object files and for storing it has to be known
at compile time and is represented by a Kconfig option.

XIP on RISC-V will for the time being only work on MMU-enabled
kernels.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
[Alex: Rebase on top of "Move kernel mapping outside the linear mapping" ]
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: disable XIP for allyesconfig]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-13 02:35:14 -04:00
+								#ifdef CONFIG_XIP_KERNEL
-												riscv: remove .text section size limitation for XIP

Currently there's a limit of 8MB for the .text section of a RISC-V
image in the XIP case. This breaks compilation of many automatic
builds and is generally inconvenient. This patch removes that
limitation and optimizes XIP image file size at the same time.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-10-11 11:14:14 +02:00
+								#define XIP_OFFSET		SZ_32M
 								#define XIP_OFFSET_MASK		(SZ_32M - 1)
-												riscv: Simplify xip and !xip kernel address conversion macros

To simplify the kernel address conversion code, make the same definition of
kernel_mapping_pa_to_va and kernel_mapping_va_to_pa compatible for both xip
and !xip kernel by defining XIP_OFFSET to 0 in !xip kernel.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-06-04 13:49:48 +02:00
+								#else
 								#define XIP_OFFSET		0
-												RISC-V: Move all address space definition macros to one place

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the
following compilation error.

---------------------------------------------------------------
./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’:
./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared
(first use in this function); did you mean ‘mem_map’?
 #define __pfn_to_page(pfn) (vmemmap + (pfn))
                             ^~~~~~~
./include/asm-generic/memory_model.h:82:21: note: in expansion of
macro ‘__pfn_to_page’

 #define pfn_to_page __pfn_to_page
                     ^~~~~~~~~~~~~
./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro
‘pfn_to_page’
  return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
---------------------------------------------------------------

Fix the compliation errors by moving all the address space definition
macros before including pgtable-64.h.

Fixes: 8ad8b72721d0 (riscv: Add KASAN support)

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-02-24 11:34:36 -08:00
+								#endif
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								#ifndef __ASSEMBLY__
 								#include <asm/page.h>
 								#include <asm/tlbflush.h>
 								#include <linux/mm_types.h>
-												riscv: Fix accessing pfn bits in PTEs for non-32bit variants

On rv32 the PFN part of PTEs is defined to use bits [xlen-1:10]
while on rv64 it is defined to use bits [53:10], leaving [63:54]
as reserved.

With upcoming optional extensions like svpbmt these previously
reserved bits will get used so simply right-shifting the PTE
to get the PFN won't be enough.

So introduce a _PAGE_PFN_MASK constant to mask the correct bits
for both rv32 and rv64 before shifting.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Link: https://lore.kernel.org/r/20220511192921.2223629-9-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:17 +02:00
+								#define __page_val_to_pfn(_val)  (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#ifdef CONFIG_64BIT
 								#include <asm/pgtable-64.h>
 								#else
 								#include <asm/pgtable-32.h>
 								#endif /* CONFIG_64BIT */
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+								#include <linux/page_table_check.h>
-												RISC-V: Always define XIP_FIXUP

XIP depends on MMU, but XIP_FIXUP is used throughout the kernel in
order to avoid excessive ifdefs.  This just makes sure to always define
XIP_FIXUP, which will fix MMU=n builds.  XIP_OFFSET is used by assembly
but XIP_FIXUP is C-only, so they're split.

Fixes: 44c922572952 ("RISC-V: enable XIP")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
Tested-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-28 14:45:12 -07:00
+								#ifdef CONFIG_XIP_KERNEL
 								#define XIP_FIXUP(addr) ({							\
 									uintptr_t __a = (uintptr_t)(addr);					\
-												riscv: remove .text section size limitation for XIP

Currently there's a limit of 8MB for the .text section of a RISC-V
image in the XIP case. This breaks compilation of many automatic
builds and is generally inconvenient. This patch removes that
limitation and optimizes XIP image file size at the same time.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-10-11 11:14:14 +02:00
+									(__a >= CONFIG_XIP_PHYS_ADDR && \
 									 __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ?	\
-												RISC-V: Always define XIP_FIXUP

XIP depends on MMU, but XIP_FIXUP is used throughout the kernel in
order to avoid excessive ifdefs.  This just makes sure to always define
XIP_FIXUP, which will fix MMU=n builds.  XIP_OFFSET is used by assembly
but XIP_FIXUP is C-only, so they're split.

Fixes: 44c922572952 ("RISC-V: enable XIP")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
Tested-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-28 14:45:12 -07:00
+										__a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\
 										__a;								\
 									})
 								#else
 								#define XIP_FIXUP(addr)		(addr)
 								#endif /* CONFIG_XIP_KERNEL */
-												riscv: Implement sv48 support

By adding a new 4th level of page table, give the possibility to 64bit
kernel to address 2^48 bytes of virtual address: in practice, that offers
128TB of virtual address space to userspace and allows up to 64TB of
physical memory.

If the underlying hardware does not support sv48, we will automatically
fallback to a standard 3-level page table by folding the new PUD level into
PGDIR level. In order to detect HW capabilities at runtime, we
use SATP feature that ignores writes with an unsupported mode.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:51 +01:00
+								struct pt_alloc_ops {
 									pte_t *(*get_pte_virt)(phys_addr_t pa);
 									phys_addr_t (*alloc_pte)(uintptr_t va);
 								#ifndef __PAGETABLE_PMD_FOLDED
 									pmd_t *(*get_pmd_virt)(phys_addr_t pa);
 									phys_addr_t (*alloc_pmd)(uintptr_t va);
 									pud_t *(*get_pud_virt)(phys_addr_t pa);
 									phys_addr_t (*alloc_pud)(uintptr_t va);
-												riscv: mm: Prepare pt_ops helper functions for sv57

This patch prepare some pt_ops helper functions which will be used in
creating sv57 mappings during boot time.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-01-27 10:48:42 +08:00
+									p4d_t *(*get_p4d_virt)(phys_addr_t pa);
 									phys_addr_t (*alloc_p4d)(uintptr_t va);
-												riscv: Implement sv48 support

By adding a new 4th level of page table, give the possibility to 64bit
kernel to address 2^48 bytes of virtual address: in practice, that offers
128TB of virtual address space to userspace and allows up to 64TB of
physical memory.

If the underlying hardware does not support sv48, we will automatically
fallback to a standard 3-level page table by folding the new PUD level into
PGDIR level. In order to detect HW capabilities at runtime, we
use SATP feature that ignores writes with an unsupported mode.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:51 +01:00
+								#endif
 								};
-												RISC-V: Introduce sv48 support without relocatable kernel

This patchset allows to have a single kernel for sv39 and sv48 without
being relocatable.

The idea comes from Arnd Bergmann who suggested to do the same as x86,
that is mapping the kernel to the end of the address space, which allows
the kernel to be linked at the same address for both sv39 and sv48 and
then does not require to be relocated at runtime.

This implements sv48 support at runtime. The kernel will try to boot
with 4-level page table and will fallback to 3-level if the HW does not
support it. Folding the 4th level into a 3-level page table has almost
no cost at runtime.

Note that kasan region had to be moved to the end of the address space
since its location must be known at compile-time and then be valid for
both sv39 and sv48 (and sv57 that is coming).

* riscv-sv48-v3:
  riscv: Explicit comment about user virtual address space size
  riscv: Use pgtable_l4_enabled to output mmu_type in cpuinfo
  riscv: Implement sv48 support
  asm-generic: Prepare for riscv use of pud_alloc_one and pud_free
  riscv: Allow to dynamically define VA_BITS
  riscv: Introduce functions to switch pt_ops
  riscv: Split early kasan mapping to prepare sv48 introduction
  riscv: Move KASAN mapping next to the kernel mapping
  riscv: Get rid of MAXPHYSMEM configs

Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-01-19 19:23:41 -08:00
+								extern struct pt_alloc_ops pt_ops __initdata;
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
-												riscv: add nommu support

The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.

Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:

 - The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
   entry point, but the ELF VDSO is not supported for nommu Linux.
   We instead copy the code to call the syscall onto the stack.

In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:

qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
	-kernel arch/riscv/boot/loader \
	-drive file=rootfs.ext2,format=raw,id=hd0 \
	-device virtio-blk-device,drive=hd0

Contains contributions from Damien Le Moal <Damien.LeMoal@wdc.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
[paul.walmsley@sifive.com: updated to apply; add CONFIG_MMU guards
 around PCI_IOBASE definition to fix build issues; fixed checkpatch
 issues; move the PCI_IO_* and VMEMMAP address space macros along
 with the others; resolve sparse warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-28 13:10:41 +01:00
+								#ifdef CONFIG_MMU
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								/* Number of PGD entries that a user-mode program can use */
 								#define USER_PTRS_PER_PGD   (TASK_SIZE / PGDIR_SIZE)
 								/* Page protection bits */
 								#define _PAGE_BASE	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
-												riscv/mm: Adjust PAGE_PROT_NONE to comply with THP semantics

This is a preparation for enabling THP migration.
As the commit b65399f6111b("arm64/mm: Change THP helpers
to comply with generic MM semantics") mentioned, pmd_present()
and pmd_trans_huge() are expected to behave in the following
manner:
-------------------------------------------------------------------------
|	PMD states	|	pmd_present	|	pmd_trans_huge	|
-------------------------------------------------------------------------
|	Mapped		|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Splitting	|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Migration/Swap	|	No		|	No		|
-------------------------------------------------------------------------

At present the PROT_NONE bit reuses the READ bit could not comply with
above semantics with two problems:
1. When splitting a PMD THP, PMD is first invalidated with
pmdp_invalidate()->pmd_mkinvalid(), which clears the PRESENT bit
and PROT_NONE bit/READ bit, if the PMD is read-only, then the PAGE_LEAF
property is also cleared, which results in pmd_present() return false.
2. When migrating, the swap entry only clear the PRESENT bit
and PROT_NONE bit/READ bit, the W/X bit may be set, so _PAGE_LEAF may be
true which results in pmd_present() return true.

Solution:
Adjust PROT_NONE bit from READ to GLOBAL bit can satisfy the above rules:
1. GLOBAL bit has no other meanings, not like the R/W/X bit, which is
also relative with _PAGE_LEAF property.
2. GLOBAL bit is at bit 5, making swap entry start from bit 6, bit 0-5
are zero, which means the PRESENT, PROT_NONE, and PAGE_LEAF are
all false, then the pmd_present() and pmd_trans_huge() return false when
in migration/swap.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-11-23 22:06:37 +08:00
+								#define PAGE_NONE		__pgprot(_PAGE_PROT_NONE | _PAGE_READ)
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#define PAGE_READ		__pgprot(_PAGE_BASE | _PAGE_READ)
 								#define PAGE_WRITE		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
 								#define PAGE_EXEC		__pgprot(_PAGE_BASE | _PAGE_EXEC)
 								#define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 								#define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
 													 _PAGE_EXEC | _PAGE_WRITE)
 								#define PAGE_COPY		PAGE_READ
 								#define PAGE_COPY_EXEC		PAGE_EXEC
 								#define PAGE_COPY_READ_EXEC	PAGE_READ_EXEC
 								#define PAGE_SHARED		PAGE_WRITE
 								#define PAGE_SHARED_EXEC	PAGE_WRITE_EXEC
 								#define _PAGE_KERNEL		(_PAGE_READ \
 												| _PAGE_WRITE \
 												| _PAGE_PRESENT \
 												| _PAGE_ACCESSED \
-												riscv: Use global mappings for kernel pages

We map kernel pages into all addresses spages, so they can be marked as
global.  This allows hardware to avoid flushing the kernel mappings when
moving between address spaces.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[Palmer: commit text]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-05-26 05:49:20 +00:00
+												| _PAGE_DIRTY \
 												| _PAGE_GLOBAL)
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
 								#define PAGE_KERNEL		__pgprot(_PAGE_KERNEL)
-												RISC-V: Add EFI runtime services

This patch adds EFI runtime service support for RISC-V.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
[ardb: - Remove the page check]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-09-17 15:37:15 -07:00
+								#define PAGE_KERNEL_READ	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
 								#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL | _PAGE_EXEC)
 								#define PAGE_KERNEL_READ_EXEC	__pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
 													 | _PAGE_EXEC)
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
-												RISC-V: Setup initial page tables in two stages

Currently, the setup_vm() does initial page table setup in one-shot
very early before enabling MMU. Due to this, the setup_vm() has to map
all possible kernel virtual addresses since it does not know size and
location of RAM. This means we have kernel mappings for non-existent
RAM and any buggy driver (or kernel) code doing out-of-bound access
to RAM will not fault and cause underterministic behaviour.

Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
516 KB) of memory for initial page tables which is never freed. The
memory required for initial page tables will further increase if
we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)

This patch implements two-staged initial page table setup, as follows:
1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
a early page table (i.e. early_pg_dir). The early_pg_dir will be used
only by boot HART so it can be freed as-part of init memory free-up.
2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
banks in the final page table (i.e. swapper_pg_dir). The boot HART
will start using swapper_pg_dir at the end of setup_vm_final(). All
non-boot HARTs directly use the swapper_pg_dir created by boot HART.

We have following advantages with this new approach:
1. Kernel mappings for non-existent RAM don't exists anymore.
2. Memory consumed by initial page tables is now indpendent of the
chosen PAGE_OFFSET.
3. Memory consumed by initial page tables on RV64 system is 2 pages
(i.e. 8 KB) which has significantly reduced and these pages will be
freed as-part of the init memory free-up.

The patch also provides a foundation for implementing strict kernel
mappings where we protect kernel text and rodata using PTE permissions.

Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Anup Patel <anup.patel@wdc.com>
[paul.walmsley@sifive.com: updated to apply; fixed a checkpatch warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-06-28 13:36:21 -07:00
+								#define PAGE_TABLE		__pgprot(_PAGE_TABLE)
-												riscv: add RISC-V Svpbmt extension support

Svpbmt (the S should be capitalized) is the
"Supervisor-mode: page-based memory types" extension
that specifies attributes for cacheability, idempotency
and ordering.

The relevant settings are done in special bits in PTEs:

Here is the svpbmt PTE format:
| 63 | 62-61 | 60-8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
  N     MT     RSW    D   A   G   U   X   W   R   V
        ^

Of the Reserved bits [63:54] in a leaf PTE, the high bit is already
allocated (as the N bit), so bits [62:61] are used as the MT (aka
MemType) field. This field specifies one of three memory types that
are close equivalents (or equivalent in effect) to the three main x86
and ARMv8 memory types - as shown in the following table.

RISC-V
Encoding &
MemType     RISC-V Description
----------  ------------------------------------------------
00 - PMA    Normal Cacheable, No change to implied PMA memory type
01 - NC     Non-cacheable, idempotent, weakly-ordered Main Memory
10 - IO     Non-cacheable, non-idempotent, strongly-ordered I/O memory
11 - Rsvd   Reserved for future standard use

As the extension will not be present on all implementations,
implement a method to handle cpufeatures via alternatives
to not incur runtime penalties on cpu variants not supporting
specific extensions and patch relevant code parts at runtime.

Co-developed-by: Wei Fu <wefu@redhat.com>
Signed-off-by: Wei Fu <wefu@redhat.com>
Co-developed-by: Liu Shaohua <liush@allwinnertech.com>
Signed-off-by: Liu Shaohua <liush@allwinnertech.com>
Co-developed-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Guo Ren <guoren@kernel.org>
[moved to use the alternatives mechanism]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Link: https://lore.kernel.org/r/20220511192921.2223629-10-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:18 +02:00
+								#define _PAGE_IOREMAP	((_PAGE_KERNEL & ~_PAGE_MTMASK) | _PAGE_IO)
 								#define PAGE_KERNEL_IO		__pgprot(_PAGE_IOREMAP)
-												riscv: use the generic ioremap code

Use the generic ioremap code instead of providing a local version.
Note that this relies on the asm-generic no-op definition of
pgprot_noncached.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Paul Walmsley <paul.walmsley@sifive.com>
Tested-by: Paul Walmsley <paul.walmsley@sifive.com> # rv32, rv64 boot
Acked-by: Paul Walmsley <paul.walmsley@sifive.com> # arch/riscv

											
										
										
											2019-08-13 11:27:56 +02:00
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								extern pgd_t swapper_pg_dir[];
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 								static inline int pmd_present(pmd_t pmd)
 								{
 									/*
 									 * Checking for _PAGE_LEAF is needed too because:
 									 * When splitting a THP, split_huge_page() will temporarily clear
 									 * the present bit, in this situation, pmd_present() and
 									 * pmd_trans_huge() still needs to return true.
 									 */
 									return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE | _PAGE_LEAF));
 								}
 								#else
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								static inline int pmd_present(pmd_t pmd)
 								{
-												riscv: Add pte bit to distinguish swap from invalid

Previously, invalid PTEs and swap PTEs had the same binary
representation, causing errors when attempting to unmap PROT_NONE
mappings, including implicit unmap on exit.

Typical error:

swap_info_get: Bad swap file entry 40000000007a9879
BUG: Bad page map in process a.out  pte:3d4c3cc0 pmd:3e521401

Cc: stable@vger.kernel.org
Signed-off-by: Stefan O'Rear <sorear2@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

											
										
										
											2018-12-16 13:03:36 -05:00
+									return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								#endif
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
 								static inline int pmd_none(pmd_t pmd)
 								{
 									return (pmd_val(pmd) == 0);
 								}
 								static inline int pmd_bad(pmd_t pmd)
 								{
-												riscv: mm: make pmd_bad() check leaf condition

In the definition in Documentation/vm/arch_pgtable_helpers.rst,
pmd_bad() means test a non-table mapped PMD, so it should also
return true when it is a leaf page.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:48 +08:00
+									return !pmd_present(pmd) || (pmd_val(pmd) & _PAGE_LEAF);
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
-												riscv: mm: add p?d_leaf() definitions

walk_page_range() is going to be allowed to walk page tables other than
those of user space.  For this it needs to know when it has reached a
'leaf' entry in the page tables.  This information is provided by the
p?d_leaf() functions/macros.

For riscv a page is a leaf page when it has a read, write or execute bit
set on it.

Link: http://lkml.kernel.org/r/20191218162402.45610-8-steven.price@arm.com
Signed-off-by: Steven Price <steven.price@arm.com>
Reviewed-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Zong Li <zong.li@sifive.com>
Acked-by: Paul Walmsley <paul.walmsley@sifive.com>	[arch/riscv]
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

											
										
										
											2020-02-03 17:35:28 -08:00
+								#define pmd_leaf	pmd_leaf
 								static inline int pmd_leaf(pmd_t pmd)
 								{
-												riscv: mm: add _PAGE_LEAF macro

In riscv, a page table entry is leaf when any bit of read, write,
or execute bit is set. So add a macro:_PAGE_LEAF instead of
(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC), which is frequently used
to determine if it is a leaf page. This make code easier to read,
without any functional change.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:47 +08:00
+									return pmd_present(pmd) && (pmd_val(pmd) & _PAGE_LEAF);
-												riscv: mm: add p?d_leaf() definitions

walk_page_range() is going to be allowed to walk page tables other than
those of user space.  For this it needs to know when it has reached a
'leaf' entry in the page tables.  This information is provided by the
p?d_leaf() functions/macros.

For riscv a page is a leaf page when it has a read, write or execute bit
set on it.

Link: http://lkml.kernel.org/r/20191218162402.45610-8-steven.price@arm.com
Signed-off-by: Steven Price <steven.price@arm.com>
Reviewed-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Zong Li <zong.li@sifive.com>
Acked-by: Paul Walmsley <paul.walmsley@sifive.com>	[arch/riscv]
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

											
										
										
											2020-02-03 17:35:28 -08:00
+								}
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 								{
 									*pmdp = pmd;
 								}
 								static inline void pmd_clear(pmd_t *pmdp)
 								{
 									set_pmd(pmdp, __pmd(0));
 								}
 								static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
 								{
-												riscv: add memory-type errata for T-Head

Some current cpus based on T-Head cores implement memory-types
way different than described in the svpbmt spec even going
so far as using PTE bits marked as reserved.

Add the T-Head vendor-id and necessary errata code to
replace the affected instructions.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20220511192921.2223629-13-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:21 +02:00
+									unsigned long prot_val = pgprot_val(prot);
 									ALT_THEAD_PMA(prot_val);
 									return __pgd((pfn << _PAGE_PFN_SHIFT) | prot_val);
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
-												RISC-V: Setup initial page tables in two stages

Currently, the setup_vm() does initial page table setup in one-shot
very early before enabling MMU. Due to this, the setup_vm() has to map
all possible kernel virtual addresses since it does not know size and
location of RAM. This means we have kernel mappings for non-existent
RAM and any buggy driver (or kernel) code doing out-of-bound access
to RAM will not fault and cause underterministic behaviour.

Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
516 KB) of memory for initial page tables which is never freed. The
memory required for initial page tables will further increase if
we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)

This patch implements two-staged initial page table setup, as follows:
1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
a early page table (i.e. early_pg_dir). The early_pg_dir will be used
only by boot HART so it can be freed as-part of init memory free-up.
2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
banks in the final page table (i.e. swapper_pg_dir). The boot HART
will start using swapper_pg_dir at the end of setup_vm_final(). All
non-boot HARTs directly use the swapper_pg_dir created by boot HART.

We have following advantages with this new approach:
1. Kernel mappings for non-existent RAM don't exists anymore.
2. Memory consumed by initial page tables is now indpendent of the
chosen PAGE_OFFSET.
3. Memory consumed by initial page tables on RV64 system is 2 pages
(i.e. 8 KB) which has significantly reduced and these pages will be
freed as-part of the init memory free-up.

The patch also provides a foundation for implementing strict kernel
mappings where we protect kernel text and rodata using PTE permissions.

Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Anup Patel <anup.patel@wdc.com>
[paul.walmsley@sifive.com: updated to apply; fixed a checkpatch warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-06-28 13:36:21 -07:00
+								static inline unsigned long _pgd_pfn(pgd_t pgd)
 								{
-												riscv: Fix missing PAGE_PFN_MASK

There are a bunch of functions that use the PFN from a page table entry
that end up with the svpbmt upper-bits because they are missing the newly
introduced PAGE_PFN_MASK which leads to wrong addresses conversions and
then crash: fix this by adding this mask.

Fixes: 100631b48ded ("riscv: Fix accessing pfn bits in PTEs for non-32bit variants")
Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Anup Patel <anup@brainfault.org>

											
										
										
											2022-07-11 09:29:51 +05:30
+									return __page_val_to_pfn(pgd_val(pgd));
-												RISC-V: Setup initial page tables in two stages

Currently, the setup_vm() does initial page table setup in one-shot
very early before enabling MMU. Due to this, the setup_vm() has to map
all possible kernel virtual addresses since it does not know size and
location of RAM. This means we have kernel mappings for non-existent
RAM and any buggy driver (or kernel) code doing out-of-bound access
to RAM will not fault and cause underterministic behaviour.

Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
516 KB) of memory for initial page tables which is never freed. The
memory required for initial page tables will further increase if
we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)

This patch implements two-staged initial page table setup, as follows:
1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
a early page table (i.e. early_pg_dir). The early_pg_dir will be used
only by boot HART so it can be freed as-part of init memory free-up.
2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
banks in the final page table (i.e. swapper_pg_dir). The boot HART
will start using swapper_pg_dir at the end of setup_vm_final(). All
non-boot HARTs directly use the swapper_pg_dir created by boot HART.

We have following advantages with this new approach:
1. Kernel mappings for non-existent RAM don't exists anymore.
2. Memory consumed by initial page tables is now indpendent of the
chosen PAGE_OFFSET.
3. Memory consumed by initial page tables on RV64 system is 2 pages
(i.e. 8 KB) which has significantly reduced and these pages will be
freed as-part of the init memory free-up.

The patch also provides a foundation for implementing strict kernel
mappings where we protect kernel text and rodata using PTE permissions.

Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Anup Patel <anup.patel@wdc.com>
[paul.walmsley@sifive.com: updated to apply; fixed a checkpatch warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-06-28 13:36:21 -07:00
+								}
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								static inline struct page *pmd_page(pmd_t pmd)
 								{
-												riscv: Fix accessing pfn bits in PTEs for non-32bit variants

On rv32 the PFN part of PTEs is defined to use bits [xlen-1:10]
while on rv64 it is defined to use bits [53:10], leaving [63:54]
as reserved.

With upcoming optional extensions like svpbmt these previously
reserved bits will get used so simply right-shifting the PTE
to get the PFN won't be enough.

So introduce a _PAGE_PFN_MASK constant to mask the correct bits
for both rv32 and rv64 before shifting.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Link: https://lore.kernel.org/r/20220511192921.2223629-9-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:17 +02:00
+									return pfn_to_page(__page_val_to_pfn(pmd_val(pmd)));
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
 								static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 								{
-												riscv: Fix accessing pfn bits in PTEs for non-32bit variants

On rv32 the PFN part of PTEs is defined to use bits [xlen-1:10]
while on rv64 it is defined to use bits [53:10], leaving [63:54]
as reserved.

With upcoming optional extensions like svpbmt these previously
reserved bits will get used so simply right-shifting the PTE
to get the PFN won't be enough.

So introduce a _PAGE_PFN_MASK constant to mask the correct bits
for both rv32 and rv64 before shifting.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Link: https://lore.kernel.org/r/20220511192921.2223629-9-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:17 +02:00
+									return (unsigned long)pfn_to_virt(__page_val_to_pfn(pmd_val(pmd)));
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
-												riscv: Add support pte_protnone and pmd_protnone if CONFIG_NUMA_BALANCING

These two functions are used to distinguish between PROT_NONENUMA
protections and hinting fault protections.

Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-11-18 16:38:28 -08:00
+								static inline pte_t pmd_pte(pmd_t pmd)
 								{
 									return __pte(pmd_val(pmd));
 								}
-												riscv: mremap speedup - enable HAVE_MOVE_PUD and HAVE_MOVE_PMD

HAVE_MOVE_PUD enables remapping pages at the PUD level if both the source
and destination addresses are PUD-aligned.
HAVE_MOVE_PMD does similar speedup on the PMD level.

With HAVE_MOVE_PUD enabled, there is about a 143x improvement on qemu
With HAVE_MOVE_PMD enabled, there is about a 5x improvement on qemu

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-17 00:37:22 +08:00
+								static inline pte_t pud_pte(pud_t pud)
 								{
 									return __pte(pud_val(pud));
 								}
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								/* Yields the page frame number (PFN) of a page table entry */
 								static inline unsigned long pte_pfn(pte_t pte)
 								{
-												riscv: Fix accessing pfn bits in PTEs for non-32bit variants

On rv32 the PFN part of PTEs is defined to use bits [xlen-1:10]
while on rv64 it is defined to use bits [53:10], leaving [63:54]
as reserved.

With upcoming optional extensions like svpbmt these previously
reserved bits will get used so simply right-shifting the PTE
to get the PFN won't be enough.

So introduce a _PAGE_PFN_MASK constant to mask the correct bits
for both rv32 and rv64 before shifting.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Link: https://lore.kernel.org/r/20220511192921.2223629-9-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:17 +02:00
+									return __page_val_to_pfn(pte_val(pte));
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
 								#define pte_page(x)     pfn_to_page(pte_pfn(x))
 								/* Constructs a page table entry */
 								static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 								{
-												riscv: add memory-type errata for T-Head

Some current cpus based on T-Head cores implement memory-types
way different than described in the svpbmt spec even going
so far as using PTE bits marked as reserved.

Add the T-Head vendor-id and necessary errata code to
replace the affected instructions.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20220511192921.2223629-13-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:21 +02:00
+									unsigned long prot_val = pgprot_val(prot);
 									ALT_THEAD_PMA(prot_val);
 									return __pte((pfn << _PAGE_PFN_SHIFT) | prot_val);
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
-												riscv: Fix implicit declaration of 'page_to_section'

With CONFIG_SPARSEMEM and !CONFIG_SPARSEMEM_VMEMMAP,

arch/riscv/include/asm/pgtable.h: In function ‘mk_pte’:
include/asm-generic/memory_model.h:64:14: error: implicit declaration of function ‘page_to_section’; did you mean ‘present_section’? [-Werror=implicit-function-declaration]
  int __sec = page_to_section(__pg);   \
              ^~~~~~~~~~~~~~~

Fixed by changing mk_pte() from inline function to macro.

Cc: Logan Gunthorpe <logang@deltatee.com>
Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem")
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
[paul.walmsley@sifive.com: fixed checkpatch errors]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-23 11:23:01 +08:00
+								#define mk_pte(page, prot)       pfn_pte(page_to_pfn(page), prot)
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
 								static inline int pte_present(pte_t pte)
 								{
-												riscv: Add pte bit to distinguish swap from invalid

Previously, invalid PTEs and swap PTEs had the same binary
representation, causing errors when attempting to unmap PROT_NONE
mappings, including implicit unmap on exit.

Typical error:

swap_info_get: Bad swap file entry 40000000007a9879
BUG: Bad page map in process a.out  pte:3d4c3cc0 pmd:3e521401

Cc: stable@vger.kernel.org
Signed-off-by: Stefan O'Rear <sorear2@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

											
										
										
											2018-12-16 13:03:36 -05:00
+									return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
 								static inline int pte_none(pte_t pte)
 								{
 									return (pte_val(pte) == 0);
 								}
 								static inline int pte_write(pte_t pte)
 								{
 									return pte_val(pte) & _PAGE_WRITE;
 								}
-												RISC-V: Flush I$ when making a dirty page executable

The RISC-V ISA allows for instruction caches that are not coherent WRT
stores, even on a single hart.  As a result, we need to explicitly flush
the instruction cache whenever marking a dirty page as executable in
order to preserve the correct system behavior.

Local instruction caches aren't that scary (our implementations actually
flush the cache, but RISC-V is defined to allow higher-performance
implementations to exist), but RISC-V defines no way to perform an
instruction cache shootdown.  When explicitly asked to do so we can
shoot down remote instruction caches via an IPI, but this is a bit on
the slow side.

Instead of requiring an IPI to all harts whenever marking a page as
executable, we simply flush the currently running harts.  In order to
maintain correct behavior, we additionally mark every other hart as
needing a deferred instruction cache which will be taken before anything
runs on it.

Signed-off-by: Andrew Waterman <andrew@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

											
										
										
											2017-10-25 14:30:32 -07:00
+								static inline int pte_exec(pte_t pte)
 								{
 									return pte_val(pte) & _PAGE_EXEC;
 								}
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+								static inline int pte_user(pte_t pte)
 								{
 									return pte_val(pte) & _PAGE_USER;
 								}
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								static inline int pte_huge(pte_t pte)
 								{
-												riscv: mm: add _PAGE_LEAF macro

In riscv, a page table entry is leaf when any bit of read, write,
or execute bit is set. So add a macro:_PAGE_LEAF instead of
(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC), which is frequently used
to determine if it is a leaf page. This make code easier to read,
without any functional change.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:47 +08:00
+									return pte_present(pte) && (pte_val(pte) & _PAGE_LEAF);
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
 								static inline int pte_dirty(pte_t pte)
 								{
 									return pte_val(pte) & _PAGE_DIRTY;
 								}
 								static inline int pte_young(pte_t pte)
 								{
 									return pte_val(pte) & _PAGE_ACCESSED;
 								}
 								static inline int pte_special(pte_t pte)
 								{
 									return pte_val(pte) & _PAGE_SPECIAL;
 								}
 								/* static inline pte_t pte_rdprotect(pte_t pte) */
 								static inline pte_t pte_wrprotect(pte_t pte)
 								{
 									return __pte(pte_val(pte) & ~(_PAGE_WRITE));
 								}
 								/* static inline pte_t pte_mkread(pte_t pte) */
 								static inline pte_t pte_mkwrite(pte_t pte)
 								{
 									return __pte(pte_val(pte) | _PAGE_WRITE);
 								}
 								/* static inline pte_t pte_mkexec(pte_t pte) */
 								static inline pte_t pte_mkdirty(pte_t pte)
 								{
 									return __pte(pte_val(pte) | _PAGE_DIRTY);
 								}
 								static inline pte_t pte_mkclean(pte_t pte)
 								{
 									return __pte(pte_val(pte) & ~(_PAGE_DIRTY));
 								}
 								static inline pte_t pte_mkyoung(pte_t pte)
 								{
 									return __pte(pte_val(pte) | _PAGE_ACCESSED);
 								}
 								static inline pte_t pte_mkold(pte_t pte)
 								{
 									return __pte(pte_val(pte) & ~(_PAGE_ACCESSED));
 								}
 								static inline pte_t pte_mkspecial(pte_t pte)
 								{
 									return __pte(pte_val(pte) | _PAGE_SPECIAL);
 								}
-												riscv: Introduce huge page support for 32/64bit kernel

This patch implements both 4MB huge page support for 32bit kernel
and 2MB/1GB huge pages support for 64bit kernel.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-05-26 08:50:38 -04:00
+								static inline pte_t pte_mkhuge(pte_t pte)
 								{
 									return pte;
 								}
-												riscv: Add support pte_protnone and pmd_protnone if CONFIG_NUMA_BALANCING

These two functions are used to distinguish between PROT_NONENUMA
protections and hinting fault protections.

Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-11-18 16:38:28 -08:00
+								#ifdef CONFIG_NUMA_BALANCING
 								/*
 								 * See the comment in include/asm-generic/pgtable.h
 								 */
 								static inline int pte_protnone(pte_t pte)
 								{
 									return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
 								}
 								static inline int pmd_protnone(pmd_t pmd)
 								{
 									return pte_protnone(pmd_pte(pmd));
 								}
 								#endif
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								/* Modify page protection bits */
 								static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 								{
-												riscv: add memory-type errata for T-Head

Some current cpus based on T-Head cores implement memory-types
way different than described in the svpbmt spec even going
so far as using PTE bits marked as reserved.

Add the T-Head vendor-id and necessary errata code to
replace the affected instructions.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20220511192921.2223629-13-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:21 +02:00
+									unsigned long newprot_val = pgprot_val(newprot);
 									ALT_THEAD_PMA(newprot_val);
 									return __pte((pte_val(pte) & _PAGE_CHG_MASK) | newprot_val);
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
 								#define pgd_ERROR(e) \
 									pr_err("%s:%d: bad pgd " PTE_FMT ".\n", __FILE__, __LINE__, pgd_val(e))
 								/* Commit new configuration to MMU hardware */
 								static inline void update_mmu_cache(struct vm_area_struct *vma,
 									unsigned long address, pte_t *ptep)
 								{
 									/*
 									 * The kernel assumes that TLBs don't cache invalid entries, but
 									 * in RISC-V, SFENCE.VMA specifies an ordering constraint, not a
 									 * cache flush; it is necessary even after writing invalid entries.
 									 * Relying on flush_tlb_fix_spurious_fault would suffice, but
 									 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
 									 */
 									local_flush_tlb_page(address);
 								}
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 										unsigned long address, pmd_t *pmdp)
 								{
 									pte_t *ptep = (pte_t *)pmdp;
 									update_mmu_cache(vma, address, ptep);
 								}
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#define __HAVE_ARCH_PTE_SAME
 								static inline int pte_same(pte_t pte_a, pte_t pte_b)
 								{
 									return pte_val(pte_a) == pte_val(pte_b);
 								}
-												RISC-V: Flush I$ when making a dirty page executable

The RISC-V ISA allows for instruction caches that are not coherent WRT
stores, even on a single hart.  As a result, we need to explicitly flush
the instruction cache whenever marking a dirty page as executable in
order to preserve the correct system behavior.

Local instruction caches aren't that scary (our implementations actually
flush the cache, but RISC-V is defined to allow higher-performance
implementations to exist), but RISC-V defines no way to perform an
instruction cache shootdown.  When explicitly asked to do so we can
shoot down remote instruction caches via an IPI, but this is a bit on
the slow side.

Instead of requiring an IPI to all harts whenever marking a page as
executable, we simply flush the currently running harts.  In order to
maintain correct behavior, we additionally mark every other hart as
needing a deferred instruction cache which will be taken before anything
runs on it.

Signed-off-by: Andrew Waterman <andrew@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

											
										
										
											2017-10-25 14:30:32 -07:00
+								/*
 								 * Certain architectures need to do special things when PTEs within
 								 * a page table are directly modified.  Thus, the following hook is
 								 * made available.
 								 */
 								static inline void set_pte(pte_t *ptep, pte_t pteval)
 								{
 									*ptep = pteval;
 								}
 								void flush_icache_pte(pte_t pte);
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+								static inline void __set_pte_at(struct mm_struct *mm,
-												RISC-V: Flush I$ when making a dirty page executable

The RISC-V ISA allows for instruction caches that are not coherent WRT
stores, even on a single hart.  As a result, we need to explicitly flush
the instruction cache whenever marking a dirty page as executable in
order to preserve the correct system behavior.

Local instruction caches aren't that scary (our implementations actually
flush the cache, but RISC-V is defined to allow higher-performance
implementations to exist), but RISC-V defines no way to perform an
instruction cache shootdown.  When explicitly asked to do so we can
shoot down remote instruction caches via an IPI, but this is a bit on
the slow side.

Instead of requiring an IPI to all harts whenever marking a page as
executable, we simply flush the currently running harts.  In order to
maintain correct behavior, we additionally mark every other hart as
needing a deferred instruction cache which will be taken before anything
runs on it.

Signed-off-by: Andrew Waterman <andrew@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

											
										
										
											2017-10-25 14:30:32 -07:00
+									unsigned long addr, pte_t *ptep, pte_t pteval)
 								{
 									if (pte_present(pteval) && pte_exec(pteval))
 										flush_icache_pte(pteval);
 									set_pte(ptep, pteval);
 								}
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+								static inline void set_pte_at(struct mm_struct *mm,
 									unsigned long addr, pte_t *ptep, pte_t pteval)
 								{
 									page_table_check_pte_set(mm, addr, ptep, pteval);
 									__set_pte_at(mm, addr, ptep, pteval);
 								}
-												RISC-V: Flush I$ when making a dirty page executable

The RISC-V ISA allows for instruction caches that are not coherent WRT
stores, even on a single hart.  As a result, we need to explicitly flush
the instruction cache whenever marking a dirty page as executable in
order to preserve the correct system behavior.

Local instruction caches aren't that scary (our implementations actually
flush the cache, but RISC-V is defined to allow higher-performance
implementations to exist), but RISC-V defines no way to perform an
instruction cache shootdown.  When explicitly asked to do so we can
shoot down remote instruction caches via an IPI, but this is a bit on
the slow side.

Instead of requiring an IPI to all harts whenever marking a page as
executable, we simply flush the currently running harts.  In order to
maintain correct behavior, we additionally mark every other hart as
needing a deferred instruction cache which will be taken before anything
runs on it.

Signed-off-by: Andrew Waterman <andrew@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

											
										
										
											2017-10-25 14:30:32 -07:00
+								static inline void pte_clear(struct mm_struct *mm,
 									unsigned long addr, pte_t *ptep)
 								{
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+									__set_pte_at(mm, addr, ptep, __pte(0));
-												RISC-V: Flush I$ when making a dirty page executable

The RISC-V ISA allows for instruction caches that are not coherent WRT
stores, even on a single hart.  As a result, we need to explicitly flush
the instruction cache whenever marking a dirty page as executable in
order to preserve the correct system behavior.

Local instruction caches aren't that scary (our implementations actually
flush the cache, but RISC-V is defined to allow higher-performance
implementations to exist), but RISC-V defines no way to perform an
instruction cache shootdown.  When explicitly asked to do so we can
shoot down remote instruction caches via an IPI, but this is a bit on
the slow side.

Instead of requiring an IPI to all harts whenever marking a page as
executable, we simply flush the currently running harts.  In order to
maintain correct behavior, we additionally mark every other hart as
needing a deferred instruction cache which will be taken before anything
runs on it.

Signed-off-by: Andrew Waterman <andrew@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

											
										
										
											2017-10-25 14:30:32 -07:00
+								}
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 								static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 													unsigned long address, pte_t *ptep,
 													pte_t entry, int dirty)
 								{
 									if (!pte_same(*ptep, entry))
 										set_pte_at(vma->vm_mm, address, ptep, entry);
 									/*
 									 * update_mmu_cache will unconditionally execute, handling both
 									 * the case that the PTE changed and the spurious fault case.
 									 */
 									return true;
 								}
 								#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 								static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 												       unsigned long address, pte_t *ptep)
 								{
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+									pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
 									page_table_check_pte_clear(mm, address, pte);
 									return pte;
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								}
 								#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 								static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 													    unsigned long address,
 													    pte_t *ptep)
 								{
 									if (!pte_young(*ptep))
 										return 0;
 									return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
 								}
 								#define __HAVE_ARCH_PTEP_SET_WRPROTECT
 								static inline void ptep_set_wrprotect(struct mm_struct *mm,
 												      unsigned long address, pte_t *ptep)
 								{
 									atomic_long_and(~(unsigned long)_PAGE_WRITE, (atomic_long_t *)ptep);
 								}
 								#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 								static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 													 unsigned long address, pte_t *ptep)
 								{
 									/*
 									 * This comment is borrowed from x86, but applies equally to RISC-V:
 									 *
 									 * Clearing the accessed bit without a TLB flush
 									 * doesn't cause data corruption. [ It could cause incorrect
 									 * page aging and the (mistaken) reclaim of hot pages, but the
 									 * chance of that should be relatively low. ]
 									 *
 									 * So as a performance optimization don't flush the TLB when
 									 * clearing the accessed bit, it will eventually be flushed by
 									 * a context switch or a VM operation anyway. [ In the rare
 									 * event of it not getting flushed for a long time the delay
 									 * shouldn't really matter because there's no real memory
 									 * pressure for swapout to react to. ]
 									 */
 									return ptep_test_and_clear_young(vma, address, ptep);
 								}
-												riscv: add RISC-V Svpbmt extension support

Svpbmt (the S should be capitalized) is the
"Supervisor-mode: page-based memory types" extension
that specifies attributes for cacheability, idempotency
and ordering.

The relevant settings are done in special bits in PTEs:

Here is the svpbmt PTE format:
| 63 | 62-61 | 60-8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
  N     MT     RSW    D   A   G   U   X   W   R   V
        ^

Of the Reserved bits [63:54] in a leaf PTE, the high bit is already
allocated (as the N bit), so bits [62:61] are used as the MT (aka
MemType) field. This field specifies one of three memory types that
are close equivalents (or equivalent in effect) to the three main x86
and ARMv8 memory types - as shown in the following table.

RISC-V
Encoding &
MemType     RISC-V Description
----------  ------------------------------------------------
00 - PMA    Normal Cacheable, No change to implied PMA memory type
01 - NC     Non-cacheable, idempotent, weakly-ordered Main Memory
10 - IO     Non-cacheable, non-idempotent, strongly-ordered I/O memory
11 - Rsvd   Reserved for future standard use

As the extension will not be present on all implementations,
implement a method to handle cpufeatures via alternatives
to not incur runtime penalties on cpu variants not supporting
specific extensions and patch relevant code parts at runtime.

Co-developed-by: Wei Fu <wefu@redhat.com>
Signed-off-by: Wei Fu <wefu@redhat.com>
Co-developed-by: Liu Shaohua <liush@allwinnertech.com>
Signed-off-by: Liu Shaohua <liush@allwinnertech.com>
Co-developed-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Guo Ren <guoren@kernel.org>
[moved to use the alternatives mechanism]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Link: https://lore.kernel.org/r/20220511192921.2223629-10-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-05-11 21:29:18 +02:00
+								#define pgprot_noncached pgprot_noncached
 								static inline pgprot_t pgprot_noncached(pgprot_t _prot)
 								{
 									unsigned long prot = pgprot_val(_prot);
 									prot &= ~_PAGE_MTMASK;
 									prot |= _PAGE_IO;
 									return __pgprot(prot);
 								}
 								#define pgprot_writecombine pgprot_writecombine
 								static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
 								{
 									unsigned long prot = pgprot_val(_prot);
 									prot &= ~_PAGE_MTMASK;
 									prot |= _PAGE_NOCACHE;
 									return __pgprot(prot);
 								}
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								/*
 								 * THP functions
 								 */
 								static inline pmd_t pte_pmd(pte_t pte)
 								{
 									return __pmd(pte_val(pte));
 								}
 								static inline pmd_t pmd_mkhuge(pmd_t pmd)
 								{
 									return pmd;
 								}
 								static inline pmd_t pmd_mkinvalid(pmd_t pmd)
 								{
 									return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
 								}
-												riscv: Fix missing PAGE_PFN_MASK

There are a bunch of functions that use the PFN from a page table entry
that end up with the svpbmt upper-bits because they are missing the newly
introduced PAGE_PFN_MASK which leads to wrong addresses conversions and
then crash: fix this by adding this mask.

Fixes: 100631b48ded ("riscv: Fix accessing pfn bits in PTEs for non-32bit variants")
Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Anup Patel <anup@brainfault.org>

											
										
										
											2022-07-11 09:29:51 +05:30
+								#define __pmd_to_phys(pmd)  (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT)
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
 								static inline unsigned long pmd_pfn(pmd_t pmd)
 								{
 									return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
 								}
-												riscv: Fix missing PAGE_PFN_MASK

There are a bunch of functions that use the PFN from a page table entry
that end up with the svpbmt upper-bits because they are missing the newly
introduced PAGE_PFN_MASK which leads to wrong addresses conversions and
then crash: fix this by adding this mask.

Fixes: 100631b48ded ("riscv: Fix accessing pfn bits in PTEs for non-32bit variants")
Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Anup Patel <anup@brainfault.org>

											
										
										
											2022-07-11 09:29:51 +05:30
+								#define __pud_to_phys(pud)  (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT)
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
 								static inline unsigned long pud_pfn(pud_t pud)
 								{
 									return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT);
 								}
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 								{
 									return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
 								}
 								#define pmd_write pmd_write
 								static inline int pmd_write(pmd_t pmd)
 								{
 									return pte_write(pmd_pte(pmd));
 								}
 								static inline int pmd_dirty(pmd_t pmd)
 								{
 									return pte_dirty(pmd_pte(pmd));
 								}
 								static inline int pmd_young(pmd_t pmd)
 								{
 									return pte_young(pmd_pte(pmd));
 								}
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+								static inline int pmd_user(pmd_t pmd)
 								{
 									return pte_user(pmd_pte(pmd));
 								}
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								static inline pmd_t pmd_mkold(pmd_t pmd)
 								{
 									return pte_pmd(pte_mkold(pmd_pte(pmd)));
 								}
 								static inline pmd_t pmd_mkyoung(pmd_t pmd)
 								{
 									return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
 								}
 								static inline pmd_t pmd_mkwrite(pmd_t pmd)
 								{
 									return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
 								}
 								static inline pmd_t pmd_wrprotect(pmd_t pmd)
 								{
 									return pte_pmd(pte_wrprotect(pmd_pte(pmd)));
 								}
 								static inline pmd_t pmd_mkclean(pmd_t pmd)
 								{
 									return pte_pmd(pte_mkclean(pmd_pte(pmd)));
 								}
 								static inline pmd_t pmd_mkdirty(pmd_t pmd)
 								{
 									return pte_pmd(pte_mkdirty(pmd_pte(pmd)));
 								}
 								static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 												pmd_t *pmdp, pmd_t pmd)
 								{
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+									page_table_check_pmd_set(mm, addr, pmdp, pmd);
 									return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
 								}
-												riscv: mremap speedup - enable HAVE_MOVE_PUD and HAVE_MOVE_PMD

HAVE_MOVE_PUD enables remapping pages at the PUD level if both the source
and destination addresses are PUD-aligned.
HAVE_MOVE_PMD does similar speedup on the PMD level.

With HAVE_MOVE_PUD enabled, there is about a 143x improvement on qemu
With HAVE_MOVE_PMD enabled, there is about a 5x improvement on qemu

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-17 00:37:22 +08:00
+								static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
 												pud_t *pudp, pud_t pud)
 								{
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+									page_table_check_pud_set(mm, addr, pudp, pud);
 									return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
 								}
 								#ifdef CONFIG_PAGE_TABLE_CHECK
 								static inline bool pte_user_accessible_page(pte_t pte)
 								{
 									return pte_present(pte) && pte_user(pte);
 								}
 								static inline bool pmd_user_accessible_page(pmd_t pmd)
 								{
 									return pmd_leaf(pmd) && pmd_user(pmd);
-												riscv: mremap speedup - enable HAVE_MOVE_PUD and HAVE_MOVE_PMD

HAVE_MOVE_PUD enables remapping pages at the PUD level if both the source
and destination addresses are PUD-aligned.
HAVE_MOVE_PMD does similar speedup on the PMD level.

With HAVE_MOVE_PUD enabled, there is about a 143x improvement on qemu
With HAVE_MOVE_PMD enabled, there is about a 5x improvement on qemu

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-17 00:37:22 +08:00
+								}
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+								static inline bool pud_user_accessible_page(pud_t pud)
 								{
 									return pud_leaf(pud) && pud_user(pud);
 								}
 								#endif
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 								static inline int pmd_trans_huge(pmd_t pmd)
 								{
 									return pmd_leaf(pmd);
 								}
 								#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
 								static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
 													unsigned long address, pmd_t *pmdp,
 													pmd_t entry, int dirty)
 								{
 									return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
 								}
 								#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 								static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 													unsigned long address, pmd_t *pmdp)
 								{
 									return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
 								}
 								#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 								static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 													unsigned long address, pmd_t *pmdp)
 								{
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+									pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));
 									page_table_check_pmd_clear(mm, address, pmd);
 									return pmd;
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+								}
 								#define __HAVE_ARCH_PMDP_SET_WRPROTECT
 								static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 													unsigned long address, pmd_t *pmdp)
 								{
 									ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
 								}
 								#define pmdp_establish pmdp_establish
 								static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 												unsigned long address, pmd_t *pmdp, pmd_t pmd)
 								{
-												riscv/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK

As commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check"), enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on riscv.

Add additional page table check stubs for page table helpers, these stubs
can be used to check the existing page table entries.

Link: https://lkml.kernel.org/r/20220507110114.4128854-7-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2022-05-12 20:23:06 -07:00
+									page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
-												riscv: mm: add THP support on 64-bit

Bring Transparent HugePage support to riscv. A
transparent huge page is always represented as a pmd.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-30 16:28:50 +08:00
+									return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
 								}
 								#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								/*
 								 * Encode and decode a swap entry
 								 *
 								 * Format of swap PTE:
 								 *	bit            0:	_PAGE_PRESENT (zero)
-												riscv/mm: Adjust PAGE_PROT_NONE to comply with THP semantics

This is a preparation for enabling THP migration.
As the commit b65399f6111b("arm64/mm: Change THP helpers
to comply with generic MM semantics") mentioned, pmd_present()
and pmd_trans_huge() are expected to behave in the following
manner:
-------------------------------------------------------------------------
|	PMD states	|	pmd_present	|	pmd_trans_huge	|
-------------------------------------------------------------------------
|	Mapped		|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Splitting	|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Migration/Swap	|	No		|	No		|
-------------------------------------------------------------------------

At present the PROT_NONE bit reuses the READ bit could not comply with
above semantics with two problems:
1. When splitting a PMD THP, PMD is first invalidated with
pmdp_invalidate()->pmd_mkinvalid(), which clears the PRESENT bit
and PROT_NONE bit/READ bit, if the PMD is read-only, then the PAGE_LEAF
property is also cleared, which results in pmd_present() return false.
2. When migrating, the swap entry only clear the PRESENT bit
and PROT_NONE bit/READ bit, the W/X bit may be set, so _PAGE_LEAF may be
true which results in pmd_present() return true.

Solution:
Adjust PROT_NONE bit from READ to GLOBAL bit can satisfy the above rules:
1. GLOBAL bit has no other meanings, not like the R/W/X bit, which is
also relative with _PAGE_LEAF property.
2. GLOBAL bit is at bit 5, making swap entry start from bit 6, bit 0-5
are zero, which means the PRESENT, PROT_NONE, and PAGE_LEAF are
all false, then the pmd_present() and pmd_trans_huge() return false when
in migration/swap.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-11-23 22:06:37 +08:00
+								 *	bit       1 to 3:       _PAGE_LEAF (zero)
 								 *	bit            5:	_PAGE_PROT_NONE (zero)
 								 *	bits      6 to 10:	swap type
 								 *	bits 10 to XLEN-1:	swap offset
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								 */
-												riscv/mm: Adjust PAGE_PROT_NONE to comply with THP semantics

This is a preparation for enabling THP migration.
As the commit b65399f6111b("arm64/mm: Change THP helpers
to comply with generic MM semantics") mentioned, pmd_present()
and pmd_trans_huge() are expected to behave in the following
manner:
-------------------------------------------------------------------------
|	PMD states	|	pmd_present	|	pmd_trans_huge	|
-------------------------------------------------------------------------
|	Mapped		|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Splitting	|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Migration/Swap	|	No		|	No		|
-------------------------------------------------------------------------

At present the PROT_NONE bit reuses the READ bit could not comply with
above semantics with two problems:
1. When splitting a PMD THP, PMD is first invalidated with
pmdp_invalidate()->pmd_mkinvalid(), which clears the PRESENT bit
and PROT_NONE bit/READ bit, if the PMD is read-only, then the PAGE_LEAF
property is also cleared, which results in pmd_present() return false.
2. When migrating, the swap entry only clear the PRESENT bit
and PROT_NONE bit/READ bit, the W/X bit may be set, so _PAGE_LEAF may be
true which results in pmd_present() return true.

Solution:
Adjust PROT_NONE bit from READ to GLOBAL bit can satisfy the above rules:
1. GLOBAL bit has no other meanings, not like the R/W/X bit, which is
also relative with _PAGE_LEAF property.
2. GLOBAL bit is at bit 5, making swap entry start from bit 6, bit 0-5
are zero, which means the PRESENT, PROT_NONE, and PAGE_LEAF are
all false, then the pmd_present() and pmd_trans_huge() return false when
in migration/swap.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-11-23 22:06:37 +08:00
+								#define __SWP_TYPE_SHIFT	6
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#define __SWP_TYPE_BITS		5
 								#define __SWP_TYPE_MASK		((1UL << __SWP_TYPE_BITS) - 1)
 								#define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
 								#define MAX_SWAPFILES_CHECK()	\
 									BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
 								#define __swp_type(x)	(((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
 								#define __swp_offset(x)	((x).val >> __SWP_OFFSET_SHIFT)
 								#define __swp_entry(type, offset) ((swp_entry_t) \
 									{ ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
 								#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 								#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-												riscv/mm: Enable THP migration

Add two THP helpers required to create PMD migration swap entries,
and enable THP migration via ARCH_ENABLE_THP_MIGRATION. This can
reduce time of THP migration without splitting and guarantee the
migrated pages are still contiguous.

Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-11-23 22:06:38 +08:00
+								#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
 								#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
 								#define __swp_entry_to_pmd(swp) __pmd((swp).val)
 								#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
-												riscv: Add support to dump the kernel page tables

In a similar manner to arm64, x86, powerpc, etc., it can traverse all
page tables, and dump the page table layout with the memory types and
permissions.

Add a debugfs file at /sys/kernel/debug/kernel_page_tables to export
the page table layout to userspace.

Signed-off-by: Zong Li <zong.li@sifive.com>
Tested-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-03-12 10:58:35 +08:00
+								/*
 								 * In the RV64 Linux scheme, we give the user half of the virtual-address space
 								 * and give the kernel the other (upper) half.
 								 */
 								#ifdef CONFIG_64BIT
-												riscv: Allow to dynamically define VA_BITS

With 4-level page table folding at runtime, we don't know at compile time
the size of the virtual address space so we must set VA_BITS dynamically
so that sparsemem reserves the right amount of memory for struct pages.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:48 +01:00
+								#define KERN_VIRT_START	(-(BIT(VA_BITS)) + TASK_SIZE)
-												riscv: Add support to dump the kernel page tables

In a similar manner to arm64, x86, powerpc, etc., it can traverse all
page tables, and dump the page table layout with the memory types and
permissions.

Add a debugfs file at /sys/kernel/debug/kernel_page_tables to export
the page table layout to userspace.

Signed-off-by: Zong Li <zong.li@sifive.com>
Tested-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-03-12 10:58:35 +08:00
+								#else
 								#define KERN_VIRT_START	FIXADDR_START
 								#endif
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								/*
-												RISC-V: Fix FIXMAP area corruption on RV32 systems

Currently, various virtual memory areas of Linux RISC-V are organized
in increasing order of their virtual addresses is as follows:
1. User space area (This is lowest area and starts at 0x0)
2. FIXMAP area
3. VMALLOC area
4. Kernel area (This is highest area and starts at PAGE_OFFSET)

The maximum size of user space aread is represented by TASK_SIZE.

On RV32 systems, TASK_SIZE is defined as VMALLOC_START which causes the
user space area to overlap the FIXMAP area. This allows user space apps
to potentially corrupt the FIXMAP area and kernel OF APIs will crash
whenever they access corrupted FDT in the FIXMAP area.

On RV64 systems, TASK_SIZE is set to fixed 256GB and no other areas
happen to overlap so we don't see any FIXMAP area corruptions.

This patch fixes FIXMAP area corruption on RV32 systems by setting
TASK_SIZE to FIXADDR_START. We also move FIXADDR_TOP, FIXADDR_SIZE,
and FIXADDR_START defines to asm/pgtable.h so that we can avoid cyclic
header includes.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Tested-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-08-19 05:14:23 +00:00
+								 * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								 * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
-												riscv: Explicit comment about user virtual address space size

Define precisely the size of the user accessible virtual space size
for sv32/39/48 mmu types and explain why the whole virtual address
space is split into 2 equal chunks between kernel and user space.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:53 +01:00
+								 * Task size is:
 								 * -     0x9fc00000 (~2.5GB) for RV32.
 								 * -   0x4000000000 ( 256GB) for RV64 using SV39 mmu
 								 * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
 								 *
 								 * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
 								 * Instruction Set Manual Volume II: Privileged Architecture" states that
 								 * "load and store effective addresses, which are 64bits, must have bits
 								 * 63–48 all equal to bit 47, or else a page-fault exception will occur."
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								 */
 								#ifdef CONFIG_64BIT
-												riscv: compat: Support TASK_SIZE for compat mode

Make TASK_SIZE from const to dynamic detect TIF_32BIT flag
function. Refer to arm64 to implement DEFAULT_MAP_WINDOW_64 for
efi-stub.

Limit 32-bit compatible process in 0-2GB virtual address range
(which is enough for real scenarios), because it could avoid
address sign extend problem when 32-bit enter 64-bit and ease
software design.

The standard 32-bit TASK_SIZE is 0x9dc00000:FIXADDR_START, and
compared to a compatible 32-bit, it increases 476MB for the
application's virtual address.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20220405071314.3225832-11-guoren@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2022-04-05 15:13:04 +08:00
+								#define TASK_SIZE_64	(PGDIR_SIZE * PTRS_PER_PGD / 2)
 								#define TASK_SIZE_MIN	(PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
 								#ifdef CONFIG_COMPAT
 								#define TASK_SIZE_32	(_AC(0x80000000, UL) - PAGE_SIZE)
 								#define TASK_SIZE	(test_thread_flag(TIF_32BIT) ? \
 											 TASK_SIZE_32 : TASK_SIZE_64)
 								#else
 								#define TASK_SIZE	TASK_SIZE_64
 								#endif
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#else
-												riscv: Implement sv48 support

By adding a new 4th level of page table, give the possibility to 64bit
kernel to address 2^48 bytes of virtual address: in practice, that offers
128TB of virtual address space to userspace and allows up to 64TB of
physical memory.

If the underlying hardware does not support sv48, we will automatically
fallback to a standard 3-level page table by folding the new PUD level into
PGDIR level. In order to detect HW capabilities at runtime, we
use SATP feature that ignores writes with an unsupported mode.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:51 +01:00
+								#define TASK_SIZE	FIXADDR_START
 								#define TASK_SIZE_MIN	TASK_SIZE
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#endif
-												riscv: add nommu support

The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.

Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:

 - The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
   entry point, but the ELF VDSO is not supported for nommu Linux.
   We instead copy the code to call the syscall onto the stack.

In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:

qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
	-kernel arch/riscv/boot/loader \
	-drive file=rootfs.ext2,format=raw,id=hd0 \
	-device virtio-blk-device,drive=hd0

Contains contributions from Damien Le Moal <Damien.LeMoal@wdc.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
[paul.walmsley@sifive.com: updated to apply; add CONFIG_MMU guards
 around PCI_IOBASE definition to fix build issues; fixed checkpatch
 issues; move the PCI_IO_* and VMEMMAP address space macros along
 with the others; resolve sparse warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-28 13:10:41 +01:00
+								#else /* CONFIG_MMU */
-												riscv: Add pgprot_writecombine/device and PAGE_SHARED defination if NOMMU

Some drivers use PAGE_SHARED, pgprot_writecombine()/pgprot_device(),
add the defination to fix build error if NOMMU.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-05-11 10:19:54 +08:00
+								#define PAGE_SHARED		__pgprot(0)
-												riscv: add nommu support

The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.

Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:

 - The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
   entry point, but the ELF VDSO is not supported for nommu Linux.
   We instead copy the code to call the syscall onto the stack.

In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:

qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
	-kernel arch/riscv/boot/loader \
	-drive file=rootfs.ext2,format=raw,id=hd0 \
	-device virtio-blk-device,drive=hd0

Contains contributions from Damien Le Moal <Damien.LeMoal@wdc.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
[paul.walmsley@sifive.com: updated to apply; add CONFIG_MMU guards
 around PCI_IOBASE definition to fix build issues; fixed checkpatch
 issues; move the PCI_IO_* and VMEMMAP address space macros along
 with the others; resolve sparse warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-28 13:10:41 +01:00
+								#define PAGE_KERNEL		__pgprot(0)
 								#define swapper_pg_dir		NULL
-												mm: switch the test_vmalloc module to use __vmalloc_node

No need to export the very low-level __vmalloc_node_range when the test
module can use a slightly higher level variant.

[akpm@linux-foundation.org: add missing `node' arg]
[akpm@linux-foundation.org: fix riscv nommu build]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Kelley <mikelley@microsoft.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lkml.kernel.org/r/20200414131348.444715-26-hch@lst.de
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

											
										
										
											2020-06-01 21:51:57 -07:00
+								#define TASK_SIZE		0xffffffffUL
-												riscv: add nommu support

The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.

Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:

 - The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
   entry point, but the ELF VDSO is not supported for nommu Linux.
   We instead copy the code to call the syscall onto the stack.

In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:

qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
	-kernel arch/riscv/boot/loader \
	-drive file=rootfs.ext2,format=raw,id=hd0 \
	-device virtio-blk-device,drive=hd0

Contains contributions from Damien Le Moal <Damien.LeMoal@wdc.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
[paul.walmsley@sifive.com: updated to apply; add CONFIG_MMU guards
 around PCI_IOBASE definition to fix build issues; fixed checkpatch
 issues; move the PCI_IO_* and VMEMMAP address space macros along
 with the others; resolve sparse warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-28 13:10:41 +01:00
+								#define VMALLOC_START		0
-												mm: switch the test_vmalloc module to use __vmalloc_node

No need to export the very low-level __vmalloc_node_range when the test
module can use a slightly higher level variant.

[akpm@linux-foundation.org: add missing `node' arg]
[akpm@linux-foundation.org: fix riscv nommu build]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Kelley <mikelley@microsoft.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lkml.kernel.org/r/20200414131348.444715-26-hch@lst.de
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

											
										
										
											2020-06-01 21:51:57 -07:00
+								#define VMALLOC_END		TASK_SIZE
-												riscv: add nommu support

The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.

Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:

 - The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
   entry point, but the ELF VDSO is not supported for nommu Linux.
   We instead copy the code to call the syscall onto the stack.

In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:

qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
	-kernel arch/riscv/boot/loader \
	-drive file=rootfs.ext2,format=raw,id=hd0 \
	-device virtio-blk-device,drive=hd0

Contains contributions from Damien Le Moal <Damien.LeMoal@wdc.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
[paul.walmsley@sifive.com: updated to apply; add CONFIG_MMU guards
 around PCI_IOBASE definition to fix build issues; fixed checkpatch
 issues; move the PCI_IO_* and VMEMMAP address space macros along
 with the others; resolve sparse warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-28 13:10:41 +01:00
 								#endif /* !CONFIG_MMU */
 								#define kern_addr_valid(addr)   (1) /* FIXME */
-												riscv: Move kernel mapping outside of linear mapping

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-11 12:41:44 -04:00
+								extern char _start[];
-												RISC-V: enable XIP

Introduce XIP (eXecute In Place) support for RISC-V platforms.
It allows code to be executed directly from non-volatile storage
directly addressable by the CPU, such as QSPI NOR flash which can
be found on many RISC-V platforms. This makes way for significant
optimization of RAM footprint. The XIP kernel is not compressed
since it has to run directly from flash, so it will occupy more
space on the non-volatile storage. The physical flash address used
to link the kernel object files and for storing it has to be known
at compile time and is represented by a Kconfig option.

XIP on RISC-V will for the time being only work on MMU-enabled
kernels.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
[Alex: Rebase on top of "Move kernel mapping outside the linear mapping" ]
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: disable XIP for allyesconfig]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-13 02:35:14 -04:00
+								extern void *_dtb_early_va;
 								extern uintptr_t _dtb_early_pa;
 								#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_MMU)
 								#define dtb_early_va	(*(void **)XIP_FIXUP(&_dtb_early_va))
 								#define dtb_early_pa	(*(uintptr_t *)XIP_FIXUP(&_dtb_early_pa))
 								#else
 								#define dtb_early_va	_dtb_early_va
 								#define dtb_early_pa	_dtb_early_pa
 								#endif /* CONFIG_XIP_KERNEL */
-												riscv: Implement sv48 support

By adding a new 4th level of page table, give the possibility to 64bit
kernel to address 2^48 bytes of virtual address: in practice, that offers
128TB of virtual address space to userspace and allows up to 64TB of
physical memory.

If the underlying hardware does not support sv48, we will automatically
fallback to a standard 3-level page table by folding the new PUD level into
PGDIR level. In order to detect HW capabilities at runtime, we
use SATP feature that ignores writes with an unsupported mode.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

											
										
										
											2021-12-06 11:46:51 +01:00
+								extern u64 satp_mode;
 								extern bool pgtable_l4_enabled;
-												RISC-V: enable XIP

Introduce XIP (eXecute In Place) support for RISC-V platforms.
It allows code to be executed directly from non-volatile storage
directly addressable by the CPU, such as QSPI NOR flash which can
be found on many RISC-V platforms. This makes way for significant
optimization of RAM footprint. The XIP kernel is not compressed
since it has to run directly from flash, so it will occupy more
space on the non-volatile storage. The physical flash address used
to link the kernel object files and for storing it has to be known
at compile time and is represented by a Kconfig option.

XIP on RISC-V will for the time being only work on MMU-enabled
kernels.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
[Alex: Rebase on top of "Move kernel mapping outside the linear mapping" ]
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: disable XIP for allyesconfig]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2021-04-13 02:35:14 -04:00
-												riscv: add nommu support

The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.

Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:

 - The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
   entry point, but the ELF VDSO is not supported for nommu Linux.
   We instead copy the code to call the syscall onto the stack.

In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:

qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
	-kernel arch/riscv/boot/loader \
	-drive file=rootfs.ext2,format=raw,id=hd0 \
	-device virtio-blk-device,drive=hd0

Contains contributions from Damien Le Moal <Damien.LeMoal@wdc.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
[paul.walmsley@sifive.com: updated to apply; add CONFIG_MMU guards
 around PCI_IOBASE definition to fix build issues; fixed checkpatch
 issues; move the PCI_IO_* and VMEMMAP address space macros along
 with the others; resolve sparse warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-28 13:10:41 +01:00
+								void paging_init(void);
-												riscv: Separate memory init from paging init

Currently, we perform some memory init functions in paging init. But,
that will be an issue for NUMA support where DT needs to be flattened
before numa initialization and memblock_present can only be called
after numa initialization.

Move memory initialization related functions to a separate function.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Greentime Hu <greentime.hu@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>

											
										
										
											2020-11-18 16:38:27 -08:00
+								void misc_mem_init(void);
-												riscv: add nommu support

The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.

Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:

 - The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
   entry point, but the ELF VDSO is not supported for nommu Linux.
   We instead copy the code to call the syscall onto the stack.

In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:

qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
	-kernel arch/riscv/boot/loader \
	-drive file=rootfs.ext2,format=raw,id=hd0 \
	-device virtio-blk-device,drive=hd0

Contains contributions from Damien Le Moal <Damien.LeMoal@wdc.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
[paul.walmsley@sifive.com: updated to apply; add CONFIG_MMU guards
 around PCI_IOBASE definition to fix build issues; fixed checkpatch
 issues; move the PCI_IO_* and VMEMMAP address space macros along
 with the others; resolve sparse warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
											
										
										
											2019-10-28 13:10:41 +01:00
 								/*
 								 * ZERO_PAGE is a global shared page that is always zero,
 								 * used for zero-mapped memory areas, etc.
 								 */
 								extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 								#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-												RISC-V: Paging and MMU

This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

											
										
										
											2017-07-10 18:06:09 -07:00
+								#endif /* !__ASSEMBLY__ */
 								#endif /* _ASM_RISCV_PGTABLE_H */