This commit replaces arm64's support for FTRACE_WITH_REGS with support for FTRACE_WITH_ARGS. This removes some overhead and complexity, and removes some latent issues with inconsistent presentation of struct pt_regs (which can only be reliably saved/restored at exception boundaries). FTRACE_WITH_REGS has been supported on arm64 since commit: 3b23e4991fb66f6d ("arm64: implement ftrace with regs") As noted in the commit message, the major reasons for implementing FTRACE_WITH_REGS were: (1) To make it possible to use the ftrace graph tracer with pointer authentication, where it's necessary to snapshot/manipulate the LR before it is signed by the instrumented function. (2) To make it possible to implement LIVEPATCH in future, where we need to hook function entry before an instrumented function manipulates the stack or argument registers. Practically speaking, we need to preserve the argument/return registers, PC, LR, and SP. Neither of these need a struct pt_regs, and only require the set of registers which are live at function call/return boundaries. Our calling convention is defined by "Procedure Call Standard for the Arm® 64-bit Architecture (AArch64)" (AKA "AAPCS64"), which can currently be found at: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst Per AAPCS64, all function call argument and return values are held in the following GPRs: * X0 - X7 : parameter / result registers * X8 : indirect result location register * SP : stack pointer (AKA SP) Additionally, ad function call boundaries, the following GPRs hold context/return information: * X29 : frame pointer (AKA FP) * X30 : link register (AKA LR) ... and for ftrace we need to capture the instrumented address: * PC : program counter No other GPRs are relevant, as none of the other arguments hold parameters or return values: * X9 - X17 : temporaries, may be clobbered * X18 : shadow call stack pointer (or temorary) * X19 - X28 : callee saved This patch implements FTRACE_WITH_ARGS for arm64, only saving/restoring the minimal set of registers necessary. This is always sufficient to manipulate control flow (e.g. for live-patching) or to manipulate function arguments and return values. This reduces the necessary stack usage from 336 bytes for pt_regs down to 112 bytes for ftrace_regs + 32 bytes for two frame records, freeing up 188 bytes. This could be reduced further with changes to the unwinder. As there is no longer a need to save different sets of registers for different features, we no longer need distinct `ftrace_caller` and `ftrace_regs_caller` trampolines. This allows the trampoline assembly to be simpler, and simplifies code which previously had to handle the two trampolines. I've tested this with the ftrace selftests, where there are no unexpected failures. Co-developed-by: Florent Revest <revest@chromium.org> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Florent Revest <revest@chromium.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Will Deacon <will@kernel.org> Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> Link: https://lore.kernel.org/r/20221103170520.931305-5-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org>
187 lines
4.6 KiB
C
187 lines
4.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* arch/arm64/include/asm/ftrace.h
|
|
*
|
|
* Copyright (C) 2013 Linaro Limited
|
|
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
|
*/
|
|
#ifndef __ASM_FTRACE_H
|
|
#define __ASM_FTRACE_H
|
|
|
|
#include <asm/insn.h>
|
|
|
|
#define HAVE_FUNCTION_GRAPH_FP_TEST
|
|
|
|
/*
|
|
* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
|
|
* "return address pointer" which can be used to uniquely identify a return
|
|
* address which has been overwritten.
|
|
*
|
|
* On arm64 we use the address of the caller's frame record, which remains the
|
|
* same for the lifetime of the instrumented function, unlike the return
|
|
* address in the LR.
|
|
*/
|
|
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
|
|
#define ARCH_SUPPORTS_FTRACE_OPS 1
|
|
#else
|
|
#define MCOUNT_ADDR ((unsigned long)_mcount)
|
|
#endif
|
|
|
|
/* The BL at the callsite's adjusted rec->ip */
|
|
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
|
|
|
|
#define FTRACE_PLT_IDX 0
|
|
#define NR_FTRACE_PLTS 1
|
|
|
|
/*
|
|
* Currently, gcc tends to save the link register after the local variables
|
|
* on the stack. This causes the max stack tracer to report the function
|
|
* frame sizes for the wrong functions. By defining
|
|
* ARCH_FTRACE_SHIFT_STACK_TRACER, it will tell the stack tracer to expect
|
|
* to find the return address on the stack after the local variables have
|
|
* been set up.
|
|
*
|
|
* Note, this may change in the future, and we will need to deal with that
|
|
* if it were to happen.
|
|
*/
|
|
#define ARCH_FTRACE_SHIFT_STACK_TRACER 1
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/compat.h>
|
|
|
|
extern void _mcount(unsigned long);
|
|
extern void *return_address(unsigned int);
|
|
|
|
struct dyn_arch_ftrace {
|
|
/* No extra data needed for arm64 */
|
|
};
|
|
|
|
extern unsigned long ftrace_graph_call;
|
|
|
|
extern void return_to_handler(void);
|
|
|
|
static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
|
{
|
|
/*
|
|
* Adjust addr to point at the BL in the callsite.
|
|
* See ftrace_init_nop() for the callsite sequence.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
|
|
return addr + AARCH64_INSN_SIZE;
|
|
/*
|
|
* addr is the address of the mcount call instruction.
|
|
* recordmcount does the necessary offset calculation.
|
|
*/
|
|
return addr;
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
|
|
struct dyn_ftrace;
|
|
struct ftrace_ops;
|
|
|
|
#define arch_ftrace_get_regs(regs) NULL
|
|
|
|
struct ftrace_regs {
|
|
/* x0 - x8 */
|
|
unsigned long regs[9];
|
|
unsigned long __unused;
|
|
|
|
unsigned long fp;
|
|
unsigned long lr;
|
|
|
|
unsigned long sp;
|
|
unsigned long pc;
|
|
};
|
|
|
|
static __always_inline unsigned long
|
|
ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
|
|
{
|
|
return fregs->pc;
|
|
}
|
|
|
|
static __always_inline void
|
|
ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
|
|
unsigned long pc)
|
|
{
|
|
fregs->pc = pc;
|
|
}
|
|
|
|
static __always_inline unsigned long
|
|
ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs)
|
|
{
|
|
return fregs->sp;
|
|
}
|
|
|
|
static __always_inline unsigned long
|
|
ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n)
|
|
{
|
|
if (n < 8)
|
|
return fregs->regs[n];
|
|
return 0;
|
|
}
|
|
|
|
static __always_inline unsigned long
|
|
ftrace_regs_get_return_value(const struct ftrace_regs *fregs)
|
|
{
|
|
return fregs->regs[0];
|
|
}
|
|
|
|
static __always_inline void
|
|
ftrace_regs_set_return_value(struct ftrace_regs *fregs,
|
|
unsigned long ret)
|
|
{
|
|
fregs->regs[0] = ret;
|
|
}
|
|
|
|
static __always_inline void
|
|
ftrace_override_function_with_return(struct ftrace_regs *fregs)
|
|
{
|
|
fregs->pc = fregs->lr;
|
|
}
|
|
|
|
int ftrace_regs_query_register_offset(const char *name);
|
|
|
|
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
|
|
#define ftrace_init_nop ftrace_init_nop
|
|
|
|
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
|
struct ftrace_ops *op, struct ftrace_regs *fregs);
|
|
#define ftrace_graph_func ftrace_graph_func
|
|
#endif
|
|
|
|
#define ftrace_return_address(n) return_address(n)
|
|
|
|
/*
|
|
* Because AArch32 mode does not share the same syscall table with AArch64,
|
|
* tracing compat syscalls may result in reporting bogus syscalls or even
|
|
* hang-up, so just do not trace them.
|
|
* See kernel/trace/trace_syscalls.c
|
|
*
|
|
* x86 code says:
|
|
* If the user really wants these, then they should use the
|
|
* raw syscall tracepoints with filtering.
|
|
*/
|
|
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
|
|
static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
|
|
{
|
|
return is_compat_task();
|
|
}
|
|
|
|
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
|
|
|
|
static inline bool arch_syscall_match_sym_name(const char *sym,
|
|
const char *name)
|
|
{
|
|
/*
|
|
* Since all syscall functions have __arm64_ prefix, we must skip it.
|
|
* However, as we described above, we decided to ignore compat
|
|
* syscalls, so we don't care about __arm64_compat_ prefix here.
|
|
*/
|
|
return !strcmp(sym + 8, name);
|
|
}
|
|
#endif /* ifndef __ASSEMBLY__ */
|
|
|
|
#endif /* __ASM_FTRACE_H */
|