Merge branch 'fixes' into next

Merge our fixes branch which has a number of important fixes, notably
the fix for initrd corruption, as well as the fixes for scv vs ptrace.
This commit is contained in:
Michael Ellerman 2021-06-16 00:14:55 +10:00
commit a4785e93aa
16 changed files with 137 additions and 109 deletions

View File

@ -109,6 +109,16 @@ auxiliary vector.
scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC.
ptrace
------
When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains
the system call type that can be used to distinguish between sc and scv 0
system calls, and the different register conventions can be accounted for.
If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was
performed with the sc instruction, if it is 0x3000 then the system call was
performed with the scv 0 instruction.
vsyscall
========

View File

@ -50,7 +50,7 @@ l_yes:
1098: nop; \
.pushsection __jump_table, "aw"; \
.long 1098b - ., LABEL - .; \
FTR_ENTRY_LONG KEY; \
FTR_ENTRY_LONG KEY - .; \
.popsection
#endif

View File

@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
pgd_t *pgdir = init_mm.pgd;
return __find_linux_pte(pgdir, ea, NULL, hshift);
}
/*
* Convert a kernel vmap virtual address (vmalloc or ioremap space) to a
* physical address, without taking locks. This can be used in real-mode.
*/
static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
{
pte_t *ptep;
phys_addr_t pa;
int hugepage_shift;
/*
* init_mm does not free page tables, and does not do THP. It may
* have huge pages from huge vmalloc / ioremap etc.
*/
ptep = find_init_mm_pte(addr, &hugepage_shift);
if (WARN_ON(!ptep))
return 0;
pa = PFN_PHYS(pte_pfn(*ptep));
if (!hugepage_shift)
hugepage_shift = PAGE_SHIFT;
pa |= addr & ((1ul << hugepage_shift) - 1);
return pa;
}
/*
* This is what we should always use. Any other lockless page table lookup needs
* careful audit against THP split.

View File

@ -19,6 +19,7 @@
#ifndef _ASM_POWERPC_PTRACE_H
#define _ASM_POWERPC_PTRACE_H
#include <linux/err.h>
#include <uapi/asm/ptrace.h>
#include <asm/asm-const.h>
@ -153,25 +154,6 @@ extern unsigned long profile_pc(struct pt_regs *regs);
long do_syscall_trace_enter(struct pt_regs *regs);
void do_syscall_trace_leave(struct pt_regs *regs);
#define kernel_stack_pointer(regs) ((regs)->gpr[1])
static inline int is_syscall_success(struct pt_regs *regs)
{
return !(regs->ccr & 0x10000000);
}
static inline long regs_return_value(struct pt_regs *regs)
{
if (is_syscall_success(regs))
return regs->gpr[3];
else
return -regs->gpr[3];
}
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->gpr[3] = rc;
}
#ifdef __powerpc64__
#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
#else
@ -236,6 +218,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs)
regs->trap |= 0x1;
}
#define kernel_stack_pointer(regs) ((regs)->gpr[1])
static inline int is_syscall_success(struct pt_regs *regs)
{
if (trap_is_scv(regs))
return !IS_ERR_VALUE((unsigned long)regs->gpr[3]);
else
return !(regs->ccr & 0x10000000);
}
static inline long regs_return_value(struct pt_regs *regs)
{
if (trap_is_scv(regs))
return regs->gpr[3];
if (is_syscall_success(regs))
return regs->gpr[3];
else
return -regs->gpr[3];
}
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->gpr[3] = rc;
}
#define arch_has_single_step() (1)
#define arch_has_block_step() (true)
#define ARCH_HAS_USER_SINGLE_STEP_REPORT

View File

@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
/*
* If the system call failed,
* regs->gpr[3] contains a positive ERRORCODE.
*/
return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
if (trap_is_scv(regs)) {
unsigned long error = regs->gpr[3];
return IS_ERR_VALUE(error) ? error : 0;
} else {
/*
* If the system call failed,
* regs->gpr[3] contains a positive ERRORCODE.
*/
return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
}
}
static inline long syscall_get_return_value(struct task_struct *task,
@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
/*
* In the general case it's not obvious that we must deal with CCR
* here, as the syscall exit path will also do that for us. However
* there are some places, eg. the signal code, which check ccr to
* decide if the value in r3 is actually an error.
*/
if (error) {
regs->ccr |= 0x10000000L;
regs->gpr[3] = error;
if (trap_is_scv(regs)) {
regs->gpr[3] = (long) error ?: val;
} else {
regs->ccr &= ~0x10000000L;
regs->gpr[3] = val;
/*
* In the general case it's not obvious that we must deal with
* CCR here, as the syscall exit path will also do that for us.
* However there are some places, eg. the signal code, which
* check ccr to decide if the value in r3 is actually an error.
*/
if (error) {
regs->ccr |= 0x10000000L;
regs->gpr[3] = error;
} else {
regs->ccr &= ~0x10000000L;
regs->gpr[3] = val;
}
}
}

View File

@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
pte_t *ptep;
unsigned long pa;
int hugepage_shift;
/*
* We won't find hugepages here(this is iomem). Hence we are not
* worried about _PAGE_SPLITTING/collapse. Also we will not hit
* page table free, because of init_mm.
*/
ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
pa = pte_pfn(*ptep);
/* On radix we can do hugepage mappings for io, so handle that */
if (!hugepage_shift)
hugepage_shift = PAGE_SHIFT;
pa <<= PAGE_SHIFT;
pa |= token & ((1ul << hugepage_shift) - 1);
return pa;
return ppc_find_vmap_phys(token);
}
/*

View File

@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
#ifdef CONFIG_PPC_INDIRECT_MMIO
struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
{
unsigned hugepage_shift;
struct iowa_bus *bus;
int token;
@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
bus = &iowa_busses[token - 1];
else {
unsigned long vaddr, paddr;
pte_t *ptep;
vaddr = (unsigned long)PCI_FIX_ADDR(addr);
if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
return NULL;
/*
* We won't find huge pages here (iomem). Also can't hit
* a page table free due to init_mm
*/
ptep = find_init_mm_pte(vaddr, &hugepage_shift);
if (ptep == NULL)
paddr = 0;
else {
WARN_ON(hugepage_shift);
paddr = pte_pfn(*ptep) << PAGE_SHIFT;
}
paddr = ppc_find_vmap_phys(vaddr);
bus = iowa_pci_find(vaddr, paddr);
if (bus == NULL)

View File

@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
unsigned int order;
unsigned int nio_pages, io_order;
struct page *page;
size_t size_io = size;
size = PAGE_ALIGN(size);
order = get_order(size);
@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
nio_pages = size_io >> tbl->it_page_shift;
io_order = get_iommu_order(size_io, tbl);
nio_pages = size >> tbl->it_page_shift;
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
if (mapping == DMA_MAPPING_ERROR) {
@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
if (tbl) {
size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
unsigned int nio_pages = size_io >> tbl->it_page_shift;
unsigned int nio_pages;
size = PAGE_ALIGN(size);
nio_pages = size >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));

View File

@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p)
int ret = 0;
struct kprobe *prev;
struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1));
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p)
} else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
ret = -EINVAL;
} else if (ppc_inst_prefixed(prefix)) {
} else if ((unsigned long)p->addr & ~PAGE_MASK &&
ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) {
printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}

View File

@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
early_ioremap_setup();
/* Initialize the hash table or TLB handling */
early_init_mmu();
early_ioremap_setup();
/*
* After firmware and early platform setup code has set things up,
* we note the SPR values for configurable control/performance

View File

@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
user_write_access_end();
/* Save the siginfo outside of the unsafe block. */
if (copy_siginfo_to_user(&frame->info, &ksig->info))
goto badframe;
/* Make sure signal handler doesn't get spurious FP exceptions */
tsk->thread.fp_state.fpscr = 0;
@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
regs->nip = (unsigned long) &frame->tramp[0];
}
/* Save the siginfo outside of the unsafe block. */
if (copy_siginfo_to_user(&frame->info, &ksig->info))
goto badframe;
/* Allocate a dummy caller frame for the signal handler. */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);

View File

@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
mtspr(SPRN_EBBRR, ebb_regs[1]);
mtspr(SPRN_BESCR, ebb_regs[2]);
mtspr(SPRN_TAR, user_tar);
mtspr(SPRN_FSCR, current->thread.fscr);
}
mtspr(SPRN_VRSAVE, user_vrsave);

View File

@ -23,20 +23,9 @@
#include <asm/pte-walk.h>
/* Translate address of a vmalloc'd thing to a linear map address */
static void *real_vmalloc_addr(void *x)
static void *real_vmalloc_addr(void *addr)
{
unsigned long addr = (unsigned long) x;
pte_t *p;
/*
* assume we don't have huge pages in vmalloc space...
* So don't worry about THP collapse/split. Called
* Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
*/
p = find_init_mm_pte(addr, NULL);
if (!p || !pte_present(*p))
return NULL;
addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
return __va(addr);
return __va(ppc_find_vmap_phys((unsigned long)addr));
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */

View File

@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_UAMOR (SFS-88)
#define STACK_SLOT_DAWR1 (SFS-96)
#define STACK_SLOT_DAWRX1 (SFS-104)
#define STACK_SLOT_FSCR (SFS-112)
/* the following is used by the P9 short path */
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
@ -686,6 +687,8 @@ BEGIN_FTR_SECTION
std r6, STACK_SLOT_DAWR0(r1)
std r7, STACK_SLOT_DAWRX0(r1)
std r8, STACK_SLOT_IAMR(r1)
mfspr r5, SPRN_FSCR
std r5, STACK_SLOT_FSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
mfspr r6, SPRN_DAWR1
@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE
ld r7, STACK_SLOT_HFSCR(r1)
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_FSCR(r1)
mtspr SPRN_FSCR, r5
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.

View File

@ -20,6 +20,7 @@
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/kasan.h>
#include <asm/sparsemem.h>
#include <asm/svm.h>
#include <mm/mmu_decl.h>

View File

@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# define SYSCALL_RET_SET(_regs, _val) \
do { \
typeof(_val) _result = (_val); \
/* \
* A syscall error is signaled by CR0 SO bit \
* and the code is stored as a positive value. \
*/ \
if (_result < 0) { \
SYSCALL_RET(_regs) = -_result; \
(_regs).ccr |= 0x10000000; \
} else { \
if ((_regs.trap & 0xfff0) == 0x3000) { \
/* \
* scv 0 system call uses -ve result \
* for error, so no need to adjust. \
*/ \
SYSCALL_RET(_regs) = _result; \
(_regs).ccr &= ~0x10000000; \
} else { \
/* \
* A syscall error is signaled by the \
* CR0 SO bit and the code is stored as \
* a positive value. \
*/ \
if (_result < 0) { \
SYSCALL_RET(_regs) = -_result; \
(_regs).ccr |= 0x10000000; \
} else { \
SYSCALL_RET(_regs) = _result; \
(_regs).ccr &= ~0x10000000; \
} \
} \
} while (0)
# define SYSCALL_RET_SET_ON_PTRACE_EXIT