Christophe Leroy 0d76914a4c powerpc/inst: Optimise copy_inst_from_kernel_nofault()
copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to
copy one or two 32bits words. This means calling an out-of-line
function which itself calls back copy_from_kernel_nofault_allowed()
then performs a generic copy with loops.

Rewrite copy_inst_from_kernel_nofault() to do everything at a
single place and use __get_kernel_nofault() directly to perform
single accesses without loops.

Allthough the generic function uses pagefault_disable(), it is not
required on powerpc because do_page_fault() bails earlier when a
kernel mode fault happens on a kernel address.

As the function has now become very small, inline it.

With this change, on an 8xx the time spent in the loop in
ftrace_replace_code() is reduced by 23% at function tracer activation
and 27% at nop tracer activation.
The overall time to activate function tracer (measured with shell
command 'time') is 570ms before the patch and 470ms after the patch.

Even vmlinux size is reduced (by 152 instruction).

Before the patch:

	00000018 <copy_inst_from_kernel_nofault>:
	  18:	94 21 ff e0 	stwu    r1,-32(r1)
	  1c:	7c 08 02 a6 	mflr    r0
	  20:	38 a0 00 04 	li      r5,4
	  24:	93 e1 00 1c 	stw     r31,28(r1)
	  28:	7c 7f 1b 78 	mr      r31,r3
	  2c:	38 61 00 08 	addi    r3,r1,8
	  30:	90 01 00 24 	stw     r0,36(r1)
	  34:	48 00 00 01 	bl      34 <copy_inst_from_kernel_nofault+0x1c>
				34: R_PPC_REL24	copy_from_kernel_nofault
	  38:	2c 03 00 00 	cmpwi   r3,0
	  3c:	40 82 00 0c 	bne     48 <copy_inst_from_kernel_nofault+0x30>
	  40:	81 21 00 08 	lwz     r9,8(r1)
	  44:	91 3f 00 00 	stw     r9,0(r31)
	  48:	80 01 00 24 	lwz     r0,36(r1)
	  4c:	83 e1 00 1c 	lwz     r31,28(r1)
	  50:	38 21 00 20 	addi    r1,r1,32
	  54:	7c 08 03 a6 	mtlr    r0
	  58:	4e 80 00 20 	blr

After the patch (before inlining):

	00000018 <copy_inst_from_kernel_nofault>:
	  18:	3d 20 b0 00 	lis     r9,-20480
	  1c:	7c 04 48 40 	cmplw   r4,r9
	  20:	7c 69 1b 78 	mr      r9,r3
	  24:	41 80 00 14 	blt     38 <copy_inst_from_kernel_nofault+0x20>
	  28:	81 44 00 00 	lwz     r10,0(r4)
	  2c:	38 60 00 00 	li      r3,0
	  30:	91 49 00 00 	stw     r10,0(r9)
	  34:	4e 80 00 20 	blr

	  38:	38 60 ff de 	li      r3,-34
	  3c:	4e 80 00 20 	blr
	  40:	38 60 ff f2 	li      r3,-14
	  44:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Add clang workaround, with version check as suggested by Nathan]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/0d5b12183d5176dd702d29ad94c39c384e51c78f.1638208156.git.christophe.leroy@csgroup.eu
2021-12-09 22:41:21 +11:00

177 lines
4.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_INST_H
#define _ASM_POWERPC_INST_H
#include <asm/ppc-opcode.h>
#include <asm/reg.h>
#include <asm/disassemble.h>
#include <asm/uaccess.h>
#define ___get_user_instr(gu_op, dest, ptr) \
({ \
long __gui_ret; \
u32 __user *__gui_ptr = (u32 __user *)ptr; \
ppc_inst_t __gui_inst; \
unsigned int __prefix, __suffix; \
\
__chk_user_ptr(ptr); \
__gui_ret = gu_op(__prefix, __gui_ptr); \
if (__gui_ret == 0) { \
if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \
__gui_ret = gu_op(__suffix, __gui_ptr + 1); \
__gui_inst = ppc_inst_prefix(__prefix, __suffix); \
} else { \
__gui_inst = ppc_inst(__prefix); \
} \
if (__gui_ret == 0) \
(dest) = __gui_inst; \
} \
__gui_ret; \
})
#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr)
/*
* Instruction data type for POWER
*/
#if defined(CONFIG_PPC64) || defined(__CHECKER__)
static inline u32 ppc_inst_val(ppc_inst_t x)
{
return x.val;
}
#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
#else
static inline u32 ppc_inst_val(ppc_inst_t x)
{
return x;
}
#define ppc_inst(x) (x)
#endif
static inline int ppc_inst_primary_opcode(ppc_inst_t x)
{
return ppc_inst_val(x) >> 26;
}
#ifdef CONFIG_PPC64
#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
static inline u32 ppc_inst_suffix(ppc_inst_t x)
{
return x.suffix;
}
#else
#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x))
static inline u32 ppc_inst_suffix(ppc_inst_t x)
{
return 0;
}
#endif /* CONFIG_PPC64 */
static inline ppc_inst_t ppc_inst_read(const u32 *ptr)
{
if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
return ppc_inst_prefix(*ptr, *(ptr + 1));
else
return ppc_inst(*ptr);
}
static inline bool ppc_inst_prefixed(ppc_inst_t x)
{
return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
}
static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x)
{
return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
}
static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y)
{
if (ppc_inst_val(x) != ppc_inst_val(y))
return false;
if (!ppc_inst_prefixed(x))
return true;
return ppc_inst_suffix(x) == ppc_inst_suffix(y);
}
static inline int ppc_inst_len(ppc_inst_t x)
{
return ppc_inst_prefixed(x) ? 8 : 4;
}
/*
* Return the address of the next instruction, if the instruction @value was
* located at @location.
*/
static inline u32 *ppc_inst_next(u32 *location, u32 *value)
{
ppc_inst_t tmp;
tmp = ppc_inst_read(value);
return (void *)location + ppc_inst_len(tmp);
}
static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x)
{
if (IS_ENABLED(CONFIG_PPC32))
return ppc_inst_val(x);
else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
else
return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
}
#define PPC_INST_STR_LEN sizeof("00000000 00000000")
static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x)
{
if (ppc_inst_prefixed(x))
sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x));
else
sprintf(str, "%08x", ppc_inst_val(x));
return str;
}
#define ppc_inst_as_str(x) \
({ \
char __str[PPC_INST_STR_LEN]; \
__ppc_inst_as_str(__str, x); \
__str; \
})
static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
{
unsigned int val, suffix;
if (unlikely(!is_kernel_addr((unsigned long)src)))
return -ERANGE;
/* See https://github.com/ClangBuiltLinux/linux/issues/1521 */
#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000
val = suffix = 0;
#endif
__get_kernel_nofault(&val, src, u32, Efault);
if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
__get_kernel_nofault(&suffix, src + 1, u32, Efault);
*inst = ppc_inst_prefix(val, suffix);
} else {
*inst = ppc_inst(val);
}
return 0;
Efault:
return -EFAULT;
}
#endif /* _ASM_POWERPC_INST_H */