diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 70ab8d807032..5c9a8d3362cd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -91,8 +91,8 @@ config ARM select HAVE_EXIT_THREAD select HAVE_FAST_GUP if ARM_LPAE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG - select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !(THUMB2_KERNEL && CC_IS_CLANG) + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 98436702e0c7..b79dc7fa89bf 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER config UNWINDER_ARM bool "ARM EABI stack unwinder" - depends on AEABI && !FUNCTION_GRAPH_TRACER + depends on AEABI # https://github.com/ClangBuiltLinux/linux/issues/732 depends on !LD_IS_LLD || LLD_VERSION >= 110000 select ARM_UNWIND diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index e68fb879e4f9..d27782331556 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -446,15 +446,10 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) * however some exceptions may exist. Caveat emptor. * * - The clobber list is dictated by the call to v7_flush_dcache_*. - * fp is preserved to the stack explicitly prior disabling the cache - * since adding it to the clobber list is incompatible with having - * CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering - * trampoline are inserted by the linker and to keep sp 64-bit aligned. */ #define v7_exit_coherency_flush(level) \ asm volatile( \ ".arch armv7-a \n\t" \ - "stmfd sp!, {fp, ip} \n\t" \ "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \ "bic r0, r0, #"__stringify(CR_C)" \n\t" \ "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \ @@ -464,10 +459,9 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \ "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \ "isb \n\t" \ - "dsb \n\t" \ - "ldmfd sp!, {fp, ip}" \ - : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ - "r9","r10","lr","memory" ) + "dsb" \ + : : : "r0","r1","r2","r3","r4","r5","r6", \ + "r9","r10","ip","lr","memory" ) void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index a4dbac07e4ef..5358aad67831 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -2,6 +2,8 @@ #ifndef _ASM_ARM_FTRACE #define _ASM_ARM_FTRACE +#define HAVE_FUNCTION_GRAPH_FP_TEST + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif @@ -33,26 +35,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #ifndef __ASSEMBLY__ -#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) -/* - * return_address uses walk_stackframe to do it's work. If both - * CONFIG_FRAME_POINTER=y and CONFIG_ARM_UNWIND=y walk_stackframe uses unwind - * information. For this to work in the function tracer many functions would - * have to be marked with __notrace. So for now just depend on - * !CONFIG_ARM_UNWIND. - */ - void *return_address(unsigned int); -#else - -static inline void *return_address(unsigned int level) -{ - return NULL; -} - -#endif - #define ftrace_return_address(n) return_address(n) #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index d87d60532b86..e56503fd9447 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -14,6 +14,9 @@ struct stackframe { unsigned long sp; unsigned long lr; unsigned long pc; + + /* address of the LR value on the stack */ + unsigned long *lr_addr; #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; struct task_struct *tsk; diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ae295a3bcfef..5cebb8d5a1d6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -10,6 +10,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_patch.o = -pg +CFLAGS_REMOVE_unwind.o = -pg endif CFLAGS_REMOVE_return_address.o = -pg @@ -24,10 +25,7 @@ obj-y := elf.o entry-common.o irq.o opcodes.o \ KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_traps.o := n -ifneq ($(CONFIG_ARM_UNWIND),y) -obj-$(CONFIG_FRAME_POINTER) += return_address.o -endif - +obj-y += return_address.o obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index a74289ebc803..3e7bcaca5e07 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -22,12 +22,9 @@ * mcount can be thought of as a function called in the middle of a subroutine * call. As such, it needs to be transparent for both the caller and the * callee: the original lr needs to be restored when leaving mcount, and no - * registers should be clobbered. (In the __gnu_mcount_nc implementation, we - * clobber the ip register. This is OK because the ARM calling convention - * allows it to be clobbered in subroutines and doesn't use it to hold - * parameters.) + * registers should be clobbered. * - * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}" + * When using dynamic ftrace, we patch out the mcount call by a "add sp, #4" * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c). */ @@ -38,23 +35,20 @@ .macro __mcount suffix mcount_enter - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub + ldr_va r2, ftrace_trace_function + badr r0, .Lftrace_stub cmp r0, r2 bne 1f #ifdef CONFIG_FUNCTION_GRAPH_TRACER - ldr r1, =ftrace_graph_return - ldr r2, [r1] - cmp r0, r2 - bne ftrace_graph_caller\suffix + ldr_va r2, ftrace_graph_return + cmp r0, r2 + bne ftrace_graph_caller\suffix - ldr r1, =ftrace_graph_entry - ldr r2, [r1] - ldr r0, =ftrace_graph_entry_stub - cmp r0, r2 - bne ftrace_graph_caller\suffix + ldr_va r2, ftrace_graph_entry + mov_l r0, ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix #endif mcount_exit @@ -70,29 +64,27 @@ .macro __ftrace_regs_caller - sub sp, sp, #8 @ space for PC and CPSR OLD_R0, + str lr, [sp, #-8]! @ store LR as PC and make space for CPSR/OLD_R0, @ OLD_R0 will overwrite previous LR - add ip, sp, #12 @ move in IP the value of SP as it was - @ before the push {lr} of the mcount mechanism - - str lr, [sp, #0] @ store LR instead of PC - - ldr lr, [sp, #8] @ get previous LR + ldr lr, [sp, #8] @ get previous LR str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR - stmdb sp!, {ip, lr} - stmdb sp!, {r0-r11, lr} + str lr, [sp, #-4]! @ store previous LR as LR + + add lr, sp, #16 @ move in LR the value of SP as it was + @ before the push {lr} of the mcount mechanism + + push {r0-r11, ip, lr} @ stack content at this point: @ 0 4 48 52 56 60 64 68 72 - @ R0 | R1 | ... | LR | SP + 4 | previous LR | LR | PSR | OLD_R0 | + @ R0 | R1 | ... | IP | SP + 4 | previous LR | LR | PSR | OLD_R0 | - mov r3, sp @ struct pt_regs* + mov r3, sp @ struct pt_regs* - ldr r2, =function_trace_op - ldr r2, [r2] @ pointer to the current + ldr_va r2, function_trace_op @ pointer to the current @ function tracing op ldr r1, [sp, #S_LR] @ lr of instrumented func @@ -108,35 +100,37 @@ ftrace_regs_call: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_regs_call ftrace_graph_regs_call: - mov r0, r0 +ARM( mov r0, r0 ) +THUMB( nop.w ) #endif @ pop saved regs - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #ifdef CONFIG_FUNCTION_GRAPH_TRACER .macro __ftrace_graph_regs_caller - sub r0, fp, #4 @ lr of instrumented routine (parent) +#ifdef CONFIG_UNWINDER_FRAME_POINTER + sub r0, fp, #4 @ lr of instrumented routine (parent) +#else + add r0, sp, #S_LR +#endif @ called from __ftrace_regs_caller - ldr r1, [sp, #S_PC] @ instrumented routine (func) + ldr r1, [sp, #S_PC] @ instrumented routine (func) mcount_adjust_addr r1, r1 - mov r2, fp @ frame pointer + mov r2, fpreg @ frame pointer + add r3, sp, #PT_REGS_SIZE bl prepare_ftrace_return @ pop registers saved in ftrace_regs_caller - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #endif @@ -149,8 +143,7 @@ ftrace_graph_regs_call: mcount_adjust_addr r0, lr @ instrumented function #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - ldr r2, =function_trace_op - ldr r2, [r2] @ pointer to the current + ldr_va r2, function_trace_op @ pointer to the current @ function tracing op mov r3, #0 @ regs is NULL #endif @@ -162,14 +155,19 @@ ftrace_call\suffix: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call\suffix ftrace_graph_call\suffix: - mov r0, r0 +ARM( mov r0, r0 ) +THUMB( nop.w ) #endif mcount_exit .endm .macro __ftrace_graph_caller +#ifdef CONFIG_UNWINDER_FRAME_POINTER sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#else + add r0, sp, #20 +#endif #ifdef CONFIG_DYNAMIC_FTRACE @ called from __ftrace_caller, saved in mcount_enter ldr r1, [sp, #16] @ instrumented routine (func) @@ -178,7 +176,8 @@ ftrace_graph_call\suffix: @ called from __mcount, untouched in lr mcount_adjust_addr r1, lr @ instrumented routine (func) #endif - mov r2, fp @ frame pointer + mov r2, fpreg @ frame pointer + add r3, sp, #24 bl prepare_ftrace_return mcount_exit .endm @@ -202,16 +201,17 @@ ftrace_graph_call\suffix: .endm .macro mcount_exit - ldmia sp!, {r0-r3, ip, lr} - ret ip + ldmia sp!, {r0-r3} + ldr lr, [sp, #4] + ldr pc, [sp], #8 .endm ENTRY(__gnu_mcount_nc) UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE - mov ip, lr - ldmia sp!, {lr} - ret ip + push {lr} + ldr lr, [sp, #4] + ldr pc, [sp], #8 #else __mcount #endif @@ -256,17 +256,33 @@ ENDPROC(ftrace_graph_regs_caller) .purgem mcount_exit #ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl return_to_handler -return_to_handler: +ENTRY(return_to_handler) stmdb sp!, {r0-r3} - mov r0, fp @ frame pointer + add r0, sp, #16 @ sp at exit of instrumented routine bl ftrace_return_to_handler mov lr, r0 @ r0 has real ret addr ldmia sp!, {r0-r3} ret lr +ENDPROC(return_to_handler) #endif ENTRY(ftrace_stub) .Lftrace_stub: ret lr ENDPROC(ftrace_stub) + +#ifdef CONFIG_DYNAMIC_FTRACE + + __INIT + + .macro init_tramp, dst:req +ENTRY(\dst\()_from_init) + ldr pc, =\dst +ENDPROC(\dst\()_from_init) + .endm + + init_tramp ftrace_caller +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + init_tramp ftrace_regs_caller +#endif +#endif diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index a006585e1c09..83cc068586bc 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -22,12 +22,24 @@ #include #include #include +#include #include +/* + * The compiler emitted profiling hook consists of + * + * PUSH {LR} + * BL __gnu_mcount_nc + * + * To turn this combined sequence into a NOP, we need to restore the value of + * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not + * modified anyway, and reloading LR from memory is highly likely to be less + * efficient. + */ #ifdef CONFIG_THUMB2_KERNEL -#define NOP 0xf85deb04 /* pop.w {lr} */ +#define NOP 0xf10d0d04 /* add.w sp, sp, #4 */ #else -#define NOP 0xe8bd4000 /* pop {lr} */ +#define NOP 0xe28dd004 /* add sp, sp, #4 */ #endif #ifdef CONFIG_DYNAMIC_FTRACE @@ -51,9 +63,20 @@ static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) return NOP; } -static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) +void ftrace_caller_from_init(void); +void ftrace_regs_caller_from_init(void); + +static unsigned long __ref adjust_address(struct dyn_ftrace *rec, + unsigned long addr) { - return addr; + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE) || + system_state >= SYSTEM_FREEING_INITMEM || + likely(!is_kernel_inittext(rec->ip))) + return addr; + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) || + addr == (unsigned long)&ftrace_caller) + return (unsigned long)&ftrace_caller_from_init; + return (unsigned long)&ftrace_regs_caller_from_init; } int ftrace_arch_code_modify_prepare(void) @@ -189,15 +212,23 @@ int ftrace_make_nop(struct module *mod, #endif new = ftrace_nop_replace(rec); - ret = ftrace_modify_code(ip, old, new, true); + /* + * Locations in .init.text may call __gnu_mcount_mc via a linker + * emitted veneer if they are too far away from its implementation, and + * so validation may fail spuriously in such cases. Let's work around + * this by omitting those from validation. + */ + ret = ftrace_modify_code(ip, old, new, !is_kernel_inittext(ip)); return ret; } #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER +asmlinkage void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, - unsigned long frame_pointer) + unsigned long frame_pointer, + unsigned long stack_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; unsigned long old; @@ -205,6 +236,23 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; + if (IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER)) { + /* FP points one word below parent's top of stack */ + frame_pointer += 4; + } else { + struct stackframe frame = { + .fp = frame_pointer, + .sp = stack_pointer, + .lr = self_addr, + .pc = self_addr, + }; + if (unwind_frame(&frame) < 0) + return; + if (frame.lr != self_addr) + parent = frame.lr_addr; + frame_pointer = frame.sp; + } + old = *parent; *parent = return_hooker; @@ -225,7 +273,7 @@ static int __ftrace_modify_caller(unsigned long *callsite, unsigned long caller_fn = (unsigned long) func; unsigned long pc = (unsigned long) callsite; unsigned long branch = arm_gen_branch(pc, caller_fn); - unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long nop = arm_gen_nop(); unsigned long old = enable ? nop : branch; unsigned long new = enable ? branch : nop; diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index c5ea328c428d..b4e468a7674b 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -55,6 +55,7 @@ struct unwind_ctrl_block { const unsigned long *insn; /* pointer to the current instructions word */ unsigned long sp_low; /* lowest value of sp allowed */ unsigned long sp_high; /* highest value of sp allowed */ + unsigned long *lr_addr; /* address of LR value on the stack */ /* * 1 : check for stack overflow for each register pop. * 0 : save overhead if there is plenty of stack remaining. @@ -239,6 +240,8 @@ static int unwind_pop_register(struct unwind_ctrl_block *ctrl, * from being tracked by KASAN. */ ctrl->vrs[reg] = READ_ONCE_NOCHECK(*(*vsp)); + if (reg == 14) + ctrl->lr_addr = *vsp; (*vsp)++; return URC_OK; } @@ -395,9 +398,6 @@ int unwind_frame(struct stackframe *frame) pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__, frame->pc, frame->lr, frame->sp); - if (!kernel_text_address(frame->pc)) - return -URC_FAILURE; - idx = unwind_find_idx(frame->pc); if (!idx) { pr_warn("unwind: Index not found %08lx\n", frame->pc); @@ -476,6 +476,7 @@ int unwind_frame(struct stackframe *frame) frame->lr = ctrl.vrs[LR]; frame->pc = ctrl.vrs[PC]; frame->sp_low = ctrl.sp_low; + frame->lr_addr = ctrl.lr_addr; return URC_OK; } diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile index 7baa8c9427d5..b2394ddb0558 100644 --- a/arch/arm/mach-bcm/Makefile +++ b/arch/arm/mach-bcm/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_ARCH_BCM_MOBILE_L2_CACHE) += kona_l2_cache.o # Support for secure monitor traps obj-$(CONFIG_ARCH_BCM_MOBILE_SMC) += bcm_kona_smc.o +CFLAGS_REMOVE_bcm_kona_smc.o += $(CC_FLAGS_FTRACE) # BCM2835 ifeq ($(CONFIG_ARCH_BCM2835),y) diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index cd861c57d5ad..fd0dbeb93357 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -35,7 +35,6 @@ static bool secure_firmware __ro_after_init; */ #define exynos_v7_exit_coherency_flush(level) \ asm volatile( \ - "stmfd sp!, {fp, ip}\n\t"\ "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR\n\t" \ "bic r0, r0, #"__stringify(CR_C)"\n\t" \ "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR\n\t" \ @@ -50,11 +49,10 @@ static bool secure_firmware __ro_after_init; "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR\n\t" \ "isb\n\t" \ "dsb\n\t" \ - "ldmfd sp!, {fp, ip}" \ : \ : "Ir" (pmu_base_addr + S5P_INFORM0) \ - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ - "r9", "r10", "lr", "memory") + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", \ + "r9", "r10", "ip", "lr", "memory") static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) { diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 830bbfb26ca5..7c9499b728c4 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -90,7 +90,7 @@ ENDPROC(v7_flush_icache_all) * * Flush the D-cache up to the Level of Unification Inner Shareable * - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + * Corrupted registers: r0-r6, r9-r10 */ ENTRY(v7_flush_dcache_louis) @@ -117,7 +117,7 @@ ENDPROC(v7_flush_dcache_louis) * * Flush the whole D-cache. * - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + * Corrupted registers: r0-r6, r9-r10 * * - mm - mm_struct describing address space */ @@ -149,22 +149,22 @@ flush_levels: movw r4, #0x3ff ands r4, r4, r1, lsr #3 @ find maximum number on the way size clz r5, r4 @ find bit position of way size increment - movw r7, #0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the index size + movw r6, #0x7fff + and r1, r6, r1, lsr #13 @ extract max number of the index size + mov r6, #1 + movne r4, r4, lsl r5 @ # of ways shifted into bits [31:...] + movne r6, r6, lsl r5 @ 1 shifted left by same amount loop1: - mov r9, r7 @ create working copy of max index + mov r9, r1 @ create working copy of max index loop2: - ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 - THUMB( lsl r6, r4, r5 ) - THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 - ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 - THUMB( lsl r6, r9, r2 ) - THUMB( orr r11, r11, r6 ) @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way + mov r5, r9, lsl r2 @ factor set number into r5 + orr r5, r5, r4 @ factor way number into r5 + orr r5, r5, r10 @ factor cache level into r5 + mcr p15, 0, r5, c7, c14, 2 @ clean & invalidate by set/way subs r9, r9, #1 @ decrement the index bge loop2 - subs r4, r4, #1 @ decrement the way - bge loop1 + subs r4, r4, r6 @ decrement the way + bcs loop1 skip: add r10, r10, #2 @ increment cache number cmp r3, r10 @@ -192,14 +192,12 @@ ENDPROC(v7_flush_dcache_all) * */ ENTRY(v7_flush_kern_cache_all) - ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + stmfd sp!, {r4-r6, r9-r10, lr} bl v7_flush_dcache_all mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate - ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + ldmfd sp!, {r4-r6, r9-r10, lr} ret lr ENDPROC(v7_flush_kern_cache_all) @@ -210,14 +208,12 @@ ENDPROC(v7_flush_kern_cache_all) * Invalidate the I-cache to the point of unification. */ ENTRY(v7_flush_kern_cache_louis) - ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + stmfd sp!, {r4-r6, r9-r10, lr} bl v7_flush_dcache_louis mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate - ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + ldmfd sp!, {r4-r6, r9-r10, lr} ret lr ENDPROC(v7_flush_kern_cache_louis) diff --git a/arch/arm/probes/kprobes/actions-common.c b/arch/arm/probes/kprobes/actions-common.c index 836aebe596cd..79171344dbeb 100644 --- a/arch/arm/probes/kprobes/actions-common.c +++ b/arch/arm/probes/kprobes/actions-common.c @@ -84,7 +84,8 @@ emulate_generic_r0_12_noflags(probes_opcode_t insn, register void *rfn asm("lr") = asi->insn_fn; __asm__ __volatile__ ( - "stmdb sp!, {%[regs], r11} \n\t" +ARM( "stmdb sp!, {%[regs], r11} \n\t" ) +THUMB( "stmdb sp!, {%[regs], r7} \n\t" ) "ldmia %[regs], {r0-r12} \n\t" #if __LINUX_ARM_ARCH__ >= 6 "blx %[fn] \n\t" @@ -96,10 +97,11 @@ emulate_generic_r0_12_noflags(probes_opcode_t insn, #endif "ldr lr, [sp], #4 \n\t" /* lr = regs */ "stmia lr, {r0-r12} \n\t" - "ldr r11, [sp], #4 \n\t" +ARM( "ldr r11, [sp], #4 \n\t" ) +THUMB( "ldr r7, [sp], #4 \n\t" ) : [regs] "=r" (rregs), [fn] "=r" (rfn) : "0" (rregs), "1" (rfn) - : "r0", "r2", "r3", "r4", "r5", "r6", "r7", + : "r0", "r2", "r3", "r4", "r5", "r6", ARM("r7") THUMB("r11"), "r8", "r9", "r10", "r12", "memory", "cc" ); } diff --git a/arch/arm/probes/kprobes/actions-thumb.c b/arch/arm/probes/kprobes/actions-thumb.c index 7884fcb81c26..51624fc263fc 100644 --- a/arch/arm/probes/kprobes/actions-thumb.c +++ b/arch/arm/probes/kprobes/actions-thumb.c @@ -447,14 +447,16 @@ t16_emulate_loregs(probes_opcode_t insn, __asm__ __volatile__ ( "msr cpsr_fs, %[oldcpsr] \n\t" + "mov r11, r7 \n\t" "ldmia %[regs], {r0-r7} \n\t" "blx %[fn] \n\t" "stmia %[regs], {r0-r7} \n\t" + "mov r7, r11 \n\t" "mrs %[newcpsr], cpsr \n\t" : [newcpsr] "=r" (newcpsr) : [oldcpsr] "r" (oldcpsr), [regs] "r" (regs), [fn] "r" (asi->insn_fn) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r11", "lr", "memory", "cc" ); @@ -524,14 +526,16 @@ t16_emulate_push(probes_opcode_t insn, struct arch_probes_insn *asi, struct pt_regs *regs) { __asm__ __volatile__ ( + "mov r11, r7 \n\t" "ldr r9, [%[regs], #13*4] \n\t" "ldr r8, [%[regs], #14*4] \n\t" "ldmia %[regs], {r0-r7} \n\t" "blx %[fn] \n\t" "str r9, [%[regs], #13*4] \n\t" + "mov r7, r11 \n\t" : : [regs] "r" (regs), [fn] "r" (asi->insn_fn) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r11", "lr", "memory", "cc" ); } @@ -558,14 +562,16 @@ t16_emulate_pop_nopc(probes_opcode_t insn, struct arch_probes_insn *asi, struct pt_regs *regs) { __asm__ __volatile__ ( + "mov r11, r7 \n\t" "ldr r9, [%[regs], #13*4] \n\t" "ldmia %[regs], {r0-r7} \n\t" "blx %[fn] \n\t" "stmia %[regs], {r0-r7} \n\t" "str r9, [%[regs], #13*4] \n\t" + "mov r7, r11 \n\t" : : [regs] "r" (regs), [fn] "r" (asi->insn_fn) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9", + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r9", "r11", "lr", "memory", "cc" ); } @@ -577,14 +583,16 @@ t16_emulate_pop_pc(probes_opcode_t insn, register unsigned long pc asm("r8"); __asm__ __volatile__ ( + "mov r11, r7 \n\t" "ldr r9, [%[regs], #13*4] \n\t" "ldmia %[regs], {r0-r7} \n\t" "blx %[fn] \n\t" "stmia %[regs], {r0-r7} \n\t" "str r9, [%[regs], #13*4] \n\t" + "mov r7, r11 \n\t" : "=r" (pc) : [regs] "r" (regs), [fn] "r" (asi->insn_fn) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9", + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r9", "r11", "lr", "memory", "cc" );