powerpc/64s: use contiguous PMD/PUD instead of HUGEPD
On book3s/64, the only user of hugepd is hash in 4k mode. All other setups (hash-64, radix-4, radix-64) use leaf PMD/PUD. Rework hash-4k to use contiguous PMD and PUD instead. In that setup there are only two huge page sizes: 16M and 16G. 16M sits at PMD level and 16G at PUD level. pte_update doesn't know page size, lets use the same trick as hpte_need_flush() to get page size from segment properties. That's not the most efficient way but let's do that until callers of pte_update() provide page size instead of just a huge flag. Link: https://lkml.kernel.org/r/7448f60a9b3efd396595f4f735d1e0babc5ae379.1719928057.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
7c44202e36
commit
57fb15c32f
@ -74,21 +74,6 @@
|
||||
#define remap_4k_pfn(vma, addr, pfn, prot) \
|
||||
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
static inline int hash__hugepd_ok(hugepd_t hpd)
|
||||
{
|
||||
unsigned long hpdval = hpd_val(hpd);
|
||||
/*
|
||||
* if it is not a pte and have hugepd shift mask
|
||||
* set, then it is a hugepd directory pointer
|
||||
*/
|
||||
if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) &&
|
||||
((hpdval & HUGEPD_SHIFT_MASK) != 0))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 4K PTE format is different from 64K PTE format. Saving the hash_slot is just
|
||||
* a matter of returning the PTE bits that need to be modified. On 64K PTE,
|
||||
|
@ -4,6 +4,7 @@
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#include <asm/asm-const.h>
|
||||
#include <asm/book3s/64/slice.h>
|
||||
|
||||
/*
|
||||
* Common bits between 4K and 64K pages in a linux-style PTE.
|
||||
@ -161,14 +162,10 @@ extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned long pte, int huge);
|
||||
unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags);
|
||||
/* Atomic PTE updates */
|
||||
static inline unsigned long hash__pte_update(struct mm_struct *mm,
|
||||
unsigned long addr,
|
||||
pte_t *ptep, unsigned long clr,
|
||||
unsigned long set,
|
||||
int huge)
|
||||
static inline unsigned long hash__pte_update_one(pte_t *ptep, unsigned long clr,
|
||||
unsigned long set)
|
||||
{
|
||||
__be64 old_be, tmp_be;
|
||||
unsigned long old;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldarx %0,0,%3 # pte_update\n\
|
||||
@ -182,11 +179,40 @@ static inline unsigned long hash__pte_update(struct mm_struct *mm,
|
||||
: "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep),
|
||||
"r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
|
||||
: "cc" );
|
||||
|
||||
return be64_to_cpu(old_be);
|
||||
}
|
||||
|
||||
static inline unsigned long hash__pte_update(struct mm_struct *mm,
|
||||
unsigned long addr,
|
||||
pte_t *ptep, unsigned long clr,
|
||||
unsigned long set,
|
||||
int huge)
|
||||
{
|
||||
unsigned long old;
|
||||
|
||||
old = hash__pte_update_one(ptep, clr, set);
|
||||
|
||||
if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && huge) {
|
||||
unsigned int psize = get_slice_psize(mm, addr);
|
||||
int nb, i;
|
||||
|
||||
if (psize == MMU_PAGE_16M)
|
||||
nb = SZ_16M / PMD_SIZE;
|
||||
else if (psize == MMU_PAGE_16G)
|
||||
nb = SZ_16G / PUD_SIZE;
|
||||
else
|
||||
nb = 1;
|
||||
|
||||
WARN_ON_ONCE(nb == 1); /* Should never happen */
|
||||
|
||||
for (i = 1; i < nb; i++)
|
||||
hash__pte_update_one(ptep + i, clr, set);
|
||||
}
|
||||
/* huge pages use the old page table lock */
|
||||
if (!huge)
|
||||
assert_pte_locked(mm, addr);
|
||||
|
||||
old = be64_to_cpu(old_be);
|
||||
if (old & H_PAGE_HASHPTE)
|
||||
hpte_need_flush(mm, addr, ptep, old, huge);
|
||||
|
||||
|
@ -49,9 +49,6 @@ static inline bool gigantic_page_runtime_supported(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* hugepd entry valid bit */
|
||||
#define HUGEPD_VAL_BITS (0x8000000000000000UL)
|
||||
|
||||
#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
|
||||
extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
@ -60,29 +57,7 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
|
||||
extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
pte_t old_pte, pte_t new_pte);
|
||||
/*
|
||||
* This should work for other subarchs too. But right now we use the
|
||||
* new format only for 64bit book3s
|
||||
*/
|
||||
static inline pte_t *hugepd_page(hugepd_t hpd)
|
||||
{
|
||||
BUG_ON(!hugepd_ok(hpd));
|
||||
/*
|
||||
* We have only four bits to encode, MMU page size
|
||||
*/
|
||||
BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
|
||||
return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
|
||||
}
|
||||
|
||||
static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
|
||||
{
|
||||
return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
|
||||
}
|
||||
|
||||
static inline unsigned int hugepd_shift(hugepd_t hpd)
|
||||
{
|
||||
return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
|
||||
}
|
||||
static inline void flush_hugetlb_page(struct vm_area_struct *vma,
|
||||
unsigned long vmaddr)
|
||||
{
|
||||
@ -90,19 +65,6 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
|
||||
return radix__flush_hugetlb_page(vma, vmaddr);
|
||||
}
|
||||
|
||||
static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
|
||||
unsigned int pdshift)
|
||||
{
|
||||
unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
|
||||
|
||||
return hugepd_page(hpd) + idx;
|
||||
}
|
||||
|
||||
static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
|
||||
{
|
||||
*hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2));
|
||||
}
|
||||
|
||||
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
|
||||
|
||||
static inline int check_and_get_huge_psize(int shift)
|
||||
|
@ -1,47 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
|
||||
#define _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
|
||||
/*
|
||||
* hash 4k can't share hugetlb and also doesn't support THP
|
||||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
/*
|
||||
* With radix , we have hugepage ptes in the pud and pmd entries. We don't
|
||||
* need to setup hugepage directory for them. Our pte and page directory format
|
||||
* enable us to have this enabled.
|
||||
*/
|
||||
static inline int hugepd_ok(hugepd_t hpd)
|
||||
{
|
||||
if (radix_enabled())
|
||||
return 0;
|
||||
return hash__hugepd_ok(hpd);
|
||||
}
|
||||
#define is_hugepd(hpd) (hugepd_ok(hpd))
|
||||
|
||||
/*
|
||||
* 16M and 16G huge page directory tables are allocated from slab cache
|
||||
*
|
||||
*/
|
||||
#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24)
|
||||
#define H_16G_CACHE_INDEX \
|
||||
(PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34)
|
||||
|
||||
static inline int get_hugepd_cache_index(int index)
|
||||
{
|
||||
switch (index) {
|
||||
case H_16M_CACHE_INDEX:
|
||||
return HTLB_16M_INDEX;
|
||||
case H_16G_CACHE_INDEX:
|
||||
return HTLB_16G_INDEX;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
/* should not reach */
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */
|
@ -5,26 +5,6 @@
|
||||
#ifndef __ASSEMBLY__
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
|
||||
/*
|
||||
* With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
|
||||
* need to setup hugepage directory for them. Our pte and page directory format
|
||||
* enable us to have this enabled.
|
||||
*/
|
||||
static inline int hugepd_ok(hugepd_t hpd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define is_hugepd(pdep) 0
|
||||
|
||||
/*
|
||||
* This should never get called
|
||||
*/
|
||||
static __always_inline int get_hugepd_cache_index(int index)
|
||||
{
|
||||
BUILD_BUG();
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
|
||||
|
@ -274,6 +274,24 @@ static inline bool pud_leaf(pud_t pud)
|
||||
{
|
||||
return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
|
||||
}
|
||||
|
||||
#define pmd_leaf_size pmd_leaf_size
|
||||
static inline unsigned long pmd_leaf_size(pmd_t pmd)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled())
|
||||
return SZ_16M;
|
||||
else
|
||||
return PMD_SIZE;
|
||||
}
|
||||
|
||||
#define pud_leaf_size pud_leaf_size
|
||||
static inline unsigned long pud_leaf_size(pud_t pud)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled())
|
||||
return SZ_16G;
|
||||
else
|
||||
return PUD_SIZE;
|
||||
}
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#include <asm/book3s/64/hash.h>
|
||||
@ -285,11 +303,9 @@ static inline bool pud_leaf(pud_t pud)
|
||||
#define MAX_PHYSMEM_BITS R_MAX_PHYSMEM_BITS
|
||||
#endif
|
||||
|
||||
|
||||
/* hash 4k can't share hugetlb and also doesn't support THP */
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
#include <asm/book3s/64/pgtable-64k.h>
|
||||
#else
|
||||
#include <asm/book3s/64/pgtable-4k.h>
|
||||
#endif
|
||||
|
||||
#include <asm/barrier.h>
|
||||
|
@ -37,6 +37,10 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
|
||||
unsigned long ceiling);
|
||||
#endif
|
||||
|
||||
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
|
||||
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
||||
pte_t pte, unsigned long sz);
|
||||
|
||||
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
|
||||
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
|
@ -2,10 +2,6 @@
|
||||
#ifndef _ASM_POWERPC_NOHASH_HUGETLB_E500_H
|
||||
#define _ASM_POWERPC_NOHASH_HUGETLB_E500_H
|
||||
|
||||
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
|
||||
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
||||
pte_t pte, unsigned long sz);
|
||||
|
||||
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
|
||||
|
||||
static inline int check_and_get_huge_psize(int shift)
|
||||
|
@ -269,14 +269,6 @@ static inline const void *pfn_to_kaddr(unsigned long pfn)
|
||||
#define is_kernel_addr(x) ((x) >= TASK_SIZE)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/*
|
||||
* Book3S 64 stores real addresses in the hugepd entries to
|
||||
* avoid overlaps with _PAGE_PRESENT and _PAGE_PTE.
|
||||
*/
|
||||
#define HUGEPD_ADDR_MASK (0x0ffffffffffffffful & ~HUGEPD_SHIFT_MASK)
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
/*
|
||||
* Some number of bits at the level of the page table that points to
|
||||
* a hugepte are used to encode the size. This masks those bits.
|
||||
|
@ -1233,10 +1233,6 @@ void __init hash__early_init_mmu(void)
|
||||
__pmd_table_size = H_PMD_TABLE_SIZE;
|
||||
__pud_table_size = H_PUD_TABLE_SIZE;
|
||||
__pgd_table_size = H_PGD_TABLE_SIZE;
|
||||
/*
|
||||
* 4k use hugepd format, so for hash set then to
|
||||
* zero
|
||||
*/
|
||||
__pmd_val_bits = HASH_PMD_VAL_BITS;
|
||||
__pud_val_bits = HASH_PUD_VAL_BITS;
|
||||
__pgd_val_bits = HASH_PGD_VAL_BITS;
|
||||
@ -1546,6 +1542,13 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled()) {
|
||||
if (hugeshift == PMD_SHIFT && psize == MMU_PAGE_16M)
|
||||
hugeshift = mmu_psize_defs[MMU_PAGE_16M].shift;
|
||||
if (hugeshift == PUD_SHIFT && psize == MMU_PAGE_16G)
|
||||
hugeshift = mmu_psize_defs[MMU_PAGE_16G].shift;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add _PAGE_PRESENT to the required access perm. If there are parallel
|
||||
* updates to the pte that can possibly clear _PAGE_PTE, catch that too.
|
||||
|
@ -53,6 +53,16 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
/* If PTE permissions don't match, take page fault */
|
||||
if (unlikely(!check_pte_access(access, old_pte)))
|
||||
return 1;
|
||||
/*
|
||||
* If hash-4k, hugepages use seeral contiguous PxD entries
|
||||
* so bail out and let mm make the page young or dirty
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PPC_4K_PAGES)) {
|
||||
if (!(old_pte & _PAGE_ACCESSED))
|
||||
return 1;
|
||||
if ((access & _PAGE_WRITE) && !(old_pte & _PAGE_DIRTY))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to lock the PTE, add ACCESSED and DIRTY if it was
|
||||
|
@ -461,18 +461,6 @@ static inline void pgtable_free(void *table, int index)
|
||||
case PUD_INDEX:
|
||||
__pud_free(table);
|
||||
break;
|
||||
#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
|
||||
/* 16M hugepd directory at pud level */
|
||||
case HTLB_16M_INDEX:
|
||||
BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
|
||||
kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
|
||||
break;
|
||||
/* 16G hugepd directory at the pgd level */
|
||||
case HTLB_16G_INDEX:
|
||||
BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
|
||||
kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
|
||||
break;
|
||||
#endif
|
||||
/* We don't free pgd table via RCU callback */
|
||||
default:
|
||||
BUG();
|
||||
|
@ -592,40 +592,14 @@ static int __init hugetlbpage_init(void)
|
||||
|
||||
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
|
||||
unsigned shift;
|
||||
unsigned pdshift;
|
||||
|
||||
if (!mmu_psize_defs[psize].shift)
|
||||
continue;
|
||||
|
||||
shift = mmu_psize_to_shift(psize);
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
if (shift > PGDIR_SHIFT)
|
||||
continue;
|
||||
else if (shift > PUD_SHIFT)
|
||||
pdshift = PGDIR_SHIFT;
|
||||
else if (shift > PMD_SHIFT)
|
||||
pdshift = PUD_SHIFT;
|
||||
else
|
||||
pdshift = PMD_SHIFT;
|
||||
#else
|
||||
if (shift < PUD_SHIFT)
|
||||
pdshift = PMD_SHIFT;
|
||||
else if (shift < PGDIR_SHIFT)
|
||||
pdshift = PUD_SHIFT;
|
||||
else
|
||||
pdshift = PGDIR_SHIFT;
|
||||
#endif
|
||||
|
||||
if (add_huge_page_size(1ULL << shift) < 0)
|
||||
continue;
|
||||
/*
|
||||
* if we have pdshift and shift value same, we don't
|
||||
* use pgt cache for hugepd.
|
||||
*/
|
||||
if (pdshift > shift) {
|
||||
pgtable_cache_add(pdshift - shift);
|
||||
}
|
||||
|
||||
configured = true;
|
||||
}
|
||||
|
@ -331,7 +331,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
||||
__set_huge_pte_at(pmdp, ptep, pte_val(pte));
|
||||
}
|
||||
}
|
||||
#elif defined(CONFIG_PPC_E500)
|
||||
#else
|
||||
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
||||
pte_t pte, unsigned long sz)
|
||||
{
|
||||
|
@ -98,7 +98,6 @@ config PPC_BOOK3S_64
|
||||
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
|
||||
select ARCH_ENABLE_SPLIT_PMD_PTLOCK
|
||||
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
||||
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
|
||||
select ARCH_SUPPORTS_HUGETLBFS
|
||||
select ARCH_SUPPORTS_NUMA_BALANCING
|
||||
select HAVE_MOVE_PMD
|
||||
|
Loading…
x
Reference in New Issue
Block a user