a2beb5f1ef
Here're the last pieces of page fault accounting that were still done outside handle_mm_fault() where we still have regs==NULL when calling handle_mm_fault(): arch/powerpc/mm/copro_fault.c: copro_handle_mm_fault arch/sparc/mm/fault_32.c: force_user_fault arch/um/kernel/trap.c: handle_page_fault mm/gup.c: faultin_page fixup_user_fault mm/hmm.c: hmm_vma_fault mm/ksm.c: break_ksm Some of them has the issue of duplicated accounting for page fault retries. Some of them didn't do the accounting at all. This patch cleans all these up by letting handle_mm_fault() to do per-task page fault accounting even if regs==NULL (though we'll still skip the perf event accountings). With that, we can safely remove all the outliers now. There's another functional change in that now we account the page faults to the caller of gup, rather than the task_struct that passed into the gup code. More information of this can be found at [1]. After this patch, below things should never be touched again outside handle_mm_fault(): - task_struct.[maj|min]_flt - PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] [1] https://lore.kernel.org/lkml/CAHk-=wj_V2Tps2QrMn20_W0OJF9xqNh52XSGA42s-ZJ8Y+GyKw@mail.gmail.com/ Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Cain <bcain@codeaurora.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Chris Zankel <chris@zankel.net> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Greentime Hu <green.hu@gmail.com> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jonas Bonn <jonas@southpole.se> Cc: Ley Foon Tan <ley.foon.tan@intel.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Nick Hu <nickhu@andestech.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vincent Chen <deanbo422@gmail.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Will Deacon <will@kernel.org> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Link: http://lkml.kernel.org/r/20200707225021.200906-25-peterx@redhat.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
149 lines
3.5 KiB
C
149 lines
3.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* CoProcessor (SPU/AFU) mm fault handler
|
|
*
|
|
* (C) Copyright IBM Deutschland Entwicklung GmbH 2007
|
|
*
|
|
* Author: Arnd Bergmann <arndb@de.ibm.com>
|
|
* Author: Jeremy Kerr <jk@ozlabs.org>
|
|
*/
|
|
#include <linux/sched.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/export.h>
|
|
#include <asm/reg.h>
|
|
#include <asm/copro.h>
|
|
#include <asm/spu.h>
|
|
#include <misc/cxl-base.h>
|
|
|
|
/*
|
|
* This ought to be kept in sync with the powerpc specific do_page_fault
|
|
* function. Currently, there are a few corner cases that we haven't had
|
|
* to handle fortunately.
|
|
*/
|
|
int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
|
|
unsigned long dsisr, vm_fault_t *flt)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
unsigned long is_write;
|
|
int ret;
|
|
|
|
if (mm == NULL)
|
|
return -EFAULT;
|
|
|
|
if (mm->pgd == NULL)
|
|
return -EFAULT;
|
|
|
|
mmap_read_lock(mm);
|
|
ret = -EFAULT;
|
|
vma = find_vma(mm, ea);
|
|
if (!vma)
|
|
goto out_unlock;
|
|
|
|
if (ea < vma->vm_start) {
|
|
if (!(vma->vm_flags & VM_GROWSDOWN))
|
|
goto out_unlock;
|
|
if (expand_stack(vma, ea))
|
|
goto out_unlock;
|
|
}
|
|
|
|
is_write = dsisr & DSISR_ISSTORE;
|
|
if (is_write) {
|
|
if (!(vma->vm_flags & VM_WRITE))
|
|
goto out_unlock;
|
|
} else {
|
|
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
|
|
goto out_unlock;
|
|
/*
|
|
* PROT_NONE is covered by the VMA check above.
|
|
* and hash should get a NOHPTE fault instead of
|
|
* a PROTFAULT in case fixup is needed for things
|
|
* like autonuma.
|
|
*/
|
|
if (!radix_enabled())
|
|
WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
|
|
}
|
|
|
|
ret = 0;
|
|
*flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
|
|
if (unlikely(*flt & VM_FAULT_ERROR)) {
|
|
if (*flt & VM_FAULT_OOM) {
|
|
ret = -ENOMEM;
|
|
goto out_unlock;
|
|
} else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
|
|
ret = -EFAULT;
|
|
goto out_unlock;
|
|
}
|
|
BUG();
|
|
}
|
|
|
|
out_unlock:
|
|
mmap_read_unlock(mm);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
|
|
|
|
int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
|
|
{
|
|
u64 vsid, vsidkey;
|
|
int psize, ssize;
|
|
|
|
switch (get_region_id(ea)) {
|
|
case USER_REGION_ID:
|
|
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
|
|
if (mm == NULL)
|
|
return 1;
|
|
psize = get_slice_psize(mm, ea);
|
|
ssize = user_segment_size(ea);
|
|
vsid = get_user_vsid(&mm->context, ea, ssize);
|
|
vsidkey = SLB_VSID_USER;
|
|
break;
|
|
case VMALLOC_REGION_ID:
|
|
pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
|
|
psize = mmu_vmalloc_psize;
|
|
ssize = mmu_kernel_ssize;
|
|
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
|
|
vsidkey = SLB_VSID_KERNEL;
|
|
break;
|
|
case IO_REGION_ID:
|
|
pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
|
|
psize = mmu_io_psize;
|
|
ssize = mmu_kernel_ssize;
|
|
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
|
|
vsidkey = SLB_VSID_KERNEL;
|
|
break;
|
|
case LINEAR_MAP_REGION_ID:
|
|
pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea);
|
|
psize = mmu_linear_psize;
|
|
ssize = mmu_kernel_ssize;
|
|
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
|
|
vsidkey = SLB_VSID_KERNEL;
|
|
break;
|
|
default:
|
|
pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
|
|
return 1;
|
|
}
|
|
/* Bad address */
|
|
if (!vsid)
|
|
return 1;
|
|
|
|
vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey;
|
|
|
|
vsid |= mmu_psize_defs[psize].sllp |
|
|
((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
|
|
|
|
slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V;
|
|
slb->vsid = vsid;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(copro_calculate_slb);
|
|
|
|
void copro_flush_all_slbs(struct mm_struct *mm)
|
|
{
|
|
#ifdef CONFIG_SPU_BASE
|
|
spu_flush_all_slbs(mm);
|
|
#endif
|
|
cxl_slbia(mm);
|
|
}
|
|
EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
|