2008-11-12 20:11:47 +09:00
/*
2009-06-11 09:26:43 +01:00
* Copyright ( C ) 2008 Matt Fleming < matt @ console - pimps . org >
2008-12-08 12:02:28 +09:00
* Copyright ( C ) 2008 Paul Mundt < lethal @ linux - sh . org >
2008-11-12 20:11:47 +09:00
*
* Code for replacing ftrace calls with jumps .
*
* Copyright ( C ) 2007 - 2008 Steven Rostedt < srostedt @ redhat . com >
*
* Thanks goes to Ingo Molnar , for suggesting the idea .
* Mathieu Desnoyers , for suggesting postponing the modifications .
* Arjan van de Ven , for keeping me straight , and explaining to me
* the dangers of modifying code on the run .
*/
# include <linux/uaccess.h>
# include <linux/ftrace.h>
# include <linux/string.h>
# include <linux/init.h>
# include <linux/io.h>
2009-07-11 00:29:03 +00:00
# include <linux/kernel.h>
2008-11-12 20:11:47 +09:00
# include <asm/ftrace.h>
# include <asm/cacheflush.h>
2009-07-06 20:16:33 +09:00
# include <asm/unistd.h>
# include <trace/syscall.h>
2008-11-12 20:11:47 +09:00
2009-07-11 00:29:03 +00:00
# ifdef CONFIG_DYNAMIC_FTRACE
2008-11-12 20:11:47 +09:00
static unsigned char ftrace_replaced_code [ MCOUNT_INSN_SIZE ] ;
2009-06-10 22:07:53 +01:00
static unsigned char ftrace_nop [ 4 ] ;
/*
* If we ' re trying to nop out a call to a function , we instead
* place a call to the address after the memory table .
*
* 8 c011060 < a > :
* 8 c011060 : 02 d1 mov . l 8 c01106c < a + 0xc > , r1
* 8 c011062 : 22 4f sts . l pr , @ - r15
* 8 c011064 : 02 c7 mova 8 c011070 < a + 0x10 > , r0
* 8 c011066 : 2 b 41 jmp @ r1
* 8 c011068 : 2 a 40 lds r0 , pr
* 8 c01106a : 09 00 nop
* 8 c01106c : 68 24 . word 0x2468 < - - - ip
* 8 c01106e : 1 d 8 c . word 0x8c1d
* 8 c011070 : 26 4f lds . l @ r15 + , pr < - - - ip + MCOUNT_INSN_SIZE
*
* We write 0x8c011070 to 0x8c01106c so that on entry to a ( ) we branch
* past the _mcount call and continue executing code like normal .
*/
static unsigned char * ftrace_nop_replace ( unsigned long ip )
2008-11-12 20:11:47 +09:00
{
2009-06-10 22:07:53 +01:00
__raw_writel ( ip + MCOUNT_INSN_SIZE , ftrace_nop ) ;
2008-11-12 20:11:47 +09:00
return ftrace_nop ;
}
2009-06-10 22:07:53 +01:00
static unsigned char * ftrace_call_replace ( unsigned long ip , unsigned long addr )
2008-11-12 20:11:47 +09:00
{
/* Place the address in the memory table. */
2009-06-10 22:07:53 +01:00
__raw_writel ( addr , ftrace_replaced_code ) ;
2008-11-12 20:11:47 +09:00
/*
* No locking needed , this must be called via kstop_machine
* which in essence is like running on a uniprocessor machine .
*/
return ftrace_replaced_code ;
}
2009-10-13 16:52:50 +09:00
/*
* Modifying code must take extra care . On an SMP machine , if
* the code being modified is also being executed on another CPU
* that CPU will have undefined results and possibly take a GPF .
* We use kstop_machine to stop other CPUS from exectuing code .
* But this does not stop NMIs from happening . We still need
* to protect against that . We separate out the modification of
* the code to take care of this .
*
* Two buffers are added : An IP buffer and a " code " buffer .
*
* 1 ) Put the instruction pointer into the IP buffer
* and the new code into the " code " buffer .
* 2 ) Wait for any running NMIs to finish and set a flag that says
* we are modifying code , it is done in an atomic operation .
* 3 ) Write the code
* 4 ) clear the flag .
* 5 ) Wait for any running NMIs to finish .
*
* If an NMI is executed , the first thing it does is to call
* " ftrace_nmi_enter " . This will check if the flag is set to write
* and if it is , it will write what is in the IP and " code " buffers .
*
* The trick is , it does not matter if everyone is writing the same
* content to the code location . Also , if a CPU is executing code
* it is OK to write to that code location if the contents being written
* are the same as what exists .
*/
# define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
static atomic_t nmi_running = ATOMIC_INIT ( 0 ) ;
static int mod_code_status ; /* holds return value of text write */
static void * mod_code_ip ; /* holds the IP to write to */
static void * mod_code_newcode ; /* holds the text to write to the IP */
static unsigned nmi_wait_count ;
static atomic_t nmi_update_count = ATOMIC_INIT ( 0 ) ;
int ftrace_arch_read_dyn_info ( char * buf , int size )
{
int r ;
r = snprintf ( buf , size , " %u %u " ,
nmi_wait_count ,
atomic_read ( & nmi_update_count ) ) ;
return r ;
}
static void clear_mod_flag ( void )
{
int old = atomic_read ( & nmi_running ) ;
for ( ; ; ) {
int new = old & ~ MOD_CODE_WRITE_FLAG ;
if ( old = = new )
break ;
old = atomic_cmpxchg ( & nmi_running , old , new ) ;
}
}
static void ftrace_mod_code ( void )
{
/*
* Yes , more than one CPU process can be writing to mod_code_status .
* ( and the code itself )
* But if one were to fail , then they all should , and if one were
* to succeed , then they all should .
*/
mod_code_status = probe_kernel_write ( mod_code_ip , mod_code_newcode ,
MCOUNT_INSN_SIZE ) ;
/* if we fail, then kill any new writers */
if ( mod_code_status )
clear_mod_flag ( ) ;
}
void ftrace_nmi_enter ( void )
{
if ( atomic_inc_return ( & nmi_running ) & MOD_CODE_WRITE_FLAG ) {
smp_rmb ( ) ;
ftrace_mod_code ( ) ;
atomic_inc ( & nmi_update_count ) ;
}
/* Must have previous changes seen before executions */
smp_mb ( ) ;
}
void ftrace_nmi_exit ( void )
{
/* Finish all executions before clearing nmi_running */
smp_mb ( ) ;
atomic_dec ( & nmi_running ) ;
}
static void wait_for_nmi_and_set_mod_flag ( void )
{
if ( ! atomic_cmpxchg ( & nmi_running , 0 , MOD_CODE_WRITE_FLAG ) )
return ;
do {
cpu_relax ( ) ;
} while ( atomic_cmpxchg ( & nmi_running , 0 , MOD_CODE_WRITE_FLAG ) ) ;
nmi_wait_count + + ;
}
static void wait_for_nmi ( void )
{
if ( ! atomic_read ( & nmi_running ) )
return ;
do {
cpu_relax ( ) ;
} while ( atomic_read ( & nmi_running ) ) ;
nmi_wait_count + + ;
}
static int
do_ftrace_mod_code ( unsigned long ip , void * new_code )
{
mod_code_ip = ( void * ) ip ;
mod_code_newcode = new_code ;
/* The buffers need to be visible before we let NMIs write them */
smp_mb ( ) ;
wait_for_nmi_and_set_mod_flag ( ) ;
/* Make sure all running NMIs have finished before we write the code */
smp_mb ( ) ;
ftrace_mod_code ( ) ;
/* Make sure the write happens before clearing the bit */
smp_mb ( ) ;
clear_mod_flag ( ) ;
wait_for_nmi ( ) ;
return mod_code_status ;
}
2009-06-10 22:07:53 +01:00
static int ftrace_modify_code ( unsigned long ip , unsigned char * old_code ,
2008-11-12 20:11:47 +09:00
unsigned char * new_code )
{
unsigned char replaced [ MCOUNT_INSN_SIZE ] ;
/*
* Note : Due to modules and __init , code can
* disappear and change , we need to protect against faulting
* as well as code changing . We do this by using the
* probe_kernel_ * functions .
*
* No real locking needed , this code is run through
* kstop_machine , or before SMP starts .
*/
/* read the text we want to modify */
if ( probe_kernel_read ( replaced , ( void * ) ip , MCOUNT_INSN_SIZE ) )
return - EFAULT ;
/* Make sure it is what we expect it to be */
if ( memcmp ( replaced , old_code , MCOUNT_INSN_SIZE ) ! = 0 )
return - EINVAL ;
/* replace the text with the new text */
2009-10-13 16:52:50 +09:00
if ( do_ftrace_mod_code ( ip , new_code ) )
2008-11-12 20:11:47 +09:00
return - EPERM ;
flush_icache_range ( ip , ip + MCOUNT_INSN_SIZE ) ;
return 0 ;
}
int ftrace_update_ftrace_func ( ftrace_func_t func )
{
2009-06-10 22:07:53 +01:00
unsigned long ip = ( unsigned long ) ( & ftrace_call ) + MCOUNT_INSN_OFFSET ;
2008-11-12 20:11:47 +09:00
unsigned char old [ MCOUNT_INSN_SIZE ] , * new ;
2009-06-10 22:07:53 +01:00
memcpy ( old , ( unsigned char * ) ip , MCOUNT_INSN_SIZE ) ;
2008-11-12 20:11:47 +09:00
new = ftrace_call_replace ( ip , ( unsigned long ) func ) ;
2009-06-10 22:07:53 +01:00
return ftrace_modify_code ( ip , old , new ) ;
2008-11-12 20:11:47 +09:00
}
2008-12-08 12:02:28 +09:00
int ftrace_make_nop ( struct module * mod ,
struct dyn_ftrace * rec , unsigned long addr )
{
unsigned char * new , * old ;
unsigned long ip = rec - > ip ;
old = ftrace_call_replace ( ip , addr ) ;
2009-06-10 22:07:53 +01:00
new = ftrace_nop_replace ( ip ) ;
2008-12-08 12:02:28 +09:00
return ftrace_modify_code ( rec - > ip , old , new ) ;
}
int ftrace_make_call ( struct dyn_ftrace * rec , unsigned long addr )
{
unsigned char * new , * old ;
unsigned long ip = rec - > ip ;
2009-06-10 22:07:53 +01:00
old = ftrace_nop_replace ( ip ) ;
2008-12-08 12:02:28 +09:00
new = ftrace_call_replace ( ip , addr ) ;
return ftrace_modify_code ( rec - > ip , old , new ) ;
}
2008-11-12 20:11:47 +09:00
int __init ftrace_dyn_arch_init ( void * data )
{
/* The return code is retured via data */
__raw_writel ( 0 , ( unsigned long ) data ) ;
return 0 ;
}
2009-07-11 00:29:03 +00:00
# endif /* CONFIG_DYNAMIC_FTRACE */
# ifdef CONFIG_FUNCTION_GRAPH_TRACER
# ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call ( void ) ;
static int ftrace_mod ( unsigned long ip , unsigned long old_addr ,
unsigned long new_addr )
{
unsigned char code [ MCOUNT_INSN_SIZE ] ;
if ( probe_kernel_read ( code , ( void * ) ip , MCOUNT_INSN_SIZE ) )
return - EFAULT ;
if ( old_addr ! = __raw_readl ( ( unsigned long * ) code ) )
return - EINVAL ;
__raw_writel ( new_addr , ip ) ;
return 0 ;
}
int ftrace_enable_ftrace_graph_caller ( void )
{
unsigned long ip , old_addr , new_addr ;
ip = ( unsigned long ) ( & ftrace_graph_call ) + GRAPH_INSN_OFFSET ;
old_addr = ( unsigned long ) ( & skip_trace ) ;
new_addr = ( unsigned long ) ( & ftrace_graph_caller ) ;
return ftrace_mod ( ip , old_addr , new_addr ) ;
}
int ftrace_disable_ftrace_graph_caller ( void )
{
unsigned long ip , old_addr , new_addr ;
ip = ( unsigned long ) ( & ftrace_graph_call ) + GRAPH_INSN_OFFSET ;
old_addr = ( unsigned long ) ( & ftrace_graph_caller ) ;
new_addr = ( unsigned long ) ( & skip_trace ) ;
return ftrace_mod ( ip , old_addr , new_addr ) ;
}
# endif /* CONFIG_DYNAMIC_FTRACE */
/*
* Hook the return address and push it in the stack of return addrs
* in the current thread info .
*
* This is the main routine for the function graph tracer . The function
* graph tracer essentially works like this :
*
* parent is the stack address containing self_addr ' s return address .
* We pull the real return address out of parent and store it in
* current ' s ret_stack . Then , we replace the return address on the stack
* with the address of return_to_handler . self_addr is the function that
* called mcount .
*
* When self_addr returns , it will jump to return_to_handler which calls
* ftrace_return_to_handler . ftrace_return_to_handler will pull the real
* return address off of current ' s ret_stack and jump to it .
*/
void prepare_ftrace_return ( unsigned long * parent , unsigned long self_addr )
{
unsigned long old ;
int faulted , err ;
struct ftrace_graph_ent trace ;
unsigned long return_hooker = ( unsigned long ) & return_to_handler ;
if ( unlikely ( atomic_read ( & current - > tracing_graph_pause ) ) )
return ;
/*
* Protect against fault , even if it shouldn ' t
* happen . This tool is too much intrusive to
* ignore such a protection .
*/
__asm__ __volatile__ (
" 1: \n \t "
" mov.l @%2, %0 \n \t "
" 2: \n \t "
" mov.l %3, @%2 \n \t "
" mov #0, %1 \n \t "
" 3: \n \t "
" .section .fixup, \" ax \" \n \t "
" 4: \n \t "
" mov.l 5f, %0 \n \t "
" jmp @%0 \n \t "
" mov #1, %1 \n \t "
" .balign 4 \n \t "
" 5: .long 3b \n \t "
" .previous \n \t "
" .section __ex_table, \" a \" \n \t "
" .long 1b, 4b \n \t "
" .long 2b, 4b \n \t "
" .previous \n \t "
: " =&r " ( old ) , " =r " ( faulted )
: " r " ( parent ) , " r " ( return_hooker )
) ;
if ( unlikely ( faulted ) ) {
ftrace_graph_stop ( ) ;
WARN_ON ( 1 ) ;
return ;
}
err = ftrace_push_return_trace ( old , self_addr , & trace . depth , 0 ) ;
if ( err = = - EBUSY ) {
__raw_writel ( old , parent ) ;
return ;
}
trace . func = self_addr ;
/* Only trace if the calling function expects to */
if ( ! ftrace_graph_entry ( & trace ) ) {
current - > curr_ret_stack - - ;
__raw_writel ( old , parent ) ;
}
}
# endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2009-07-06 20:16:33 +09:00
# ifdef CONFIG_FTRACE_SYSCALLS
extern unsigned long __start_syscalls_metadata [ ] ;
extern unsigned long __stop_syscalls_metadata [ ] ;
extern unsigned long * sys_call_table ;
static struct syscall_metadata * * syscalls_metadata ;
static struct syscall_metadata * find_syscall_meta ( unsigned long * syscall )
{
struct syscall_metadata * start ;
struct syscall_metadata * stop ;
char str [ KSYM_SYMBOL_LEN ] ;
start = ( struct syscall_metadata * ) __start_syscalls_metadata ;
stop = ( struct syscall_metadata * ) __stop_syscalls_metadata ;
kallsyms_lookup ( ( unsigned long ) syscall , NULL , NULL , NULL , str ) ;
for ( ; start < stop ; start + + ) {
if ( start - > name & & ! strcmp ( start - > name , str ) )
return start ;
}
return NULL ;
}
struct syscall_metadata * syscall_nr_to_meta ( int nr )
{
if ( ! syscalls_metadata | | nr > = FTRACE_SYSCALL_MAX | | nr < 0 )
return NULL ;
return syscalls_metadata [ nr ] ;
}
2009-10-13 12:42:48 +09:00
int syscall_name_to_nr ( char * name )
{
int i ;
if ( ! syscalls_metadata )
return - 1 ;
for ( i = 0 ; i < NR_syscalls ; i + + )
if ( syscalls_metadata [ i ] )
if ( ! strcmp ( syscalls_metadata [ i ] - > name , name ) )
return i ;
return - 1 ;
}
void set_syscall_enter_id ( int num , int id )
{
syscalls_metadata [ num ] - > enter_id = id ;
}
void set_syscall_exit_id ( int num , int id )
{
syscalls_metadata [ num ] - > exit_id = id ;
}
static int __init arch_init_ftrace_syscalls ( void )
2009-07-06 20:16:33 +09:00
{
int i ;
struct syscall_metadata * meta ;
unsigned long * * psys_syscall_table = & sys_call_table ;
syscalls_metadata = kzalloc ( sizeof ( * syscalls_metadata ) *
FTRACE_SYSCALL_MAX , GFP_KERNEL ) ;
if ( ! syscalls_metadata ) {
WARN_ON ( 1 ) ;
2009-10-13 12:42:48 +09:00
return - ENOMEM ;
2009-07-06 20:16:33 +09:00
}
for ( i = 0 ; i < FTRACE_SYSCALL_MAX ; i + + ) {
meta = find_syscall_meta ( psys_syscall_table [ i ] ) ;
syscalls_metadata [ i ] = meta ;
}
2009-10-13 12:42:48 +09:00
return 0 ;
2009-07-06 20:16:33 +09:00
}
2009-10-13 12:42:48 +09:00
arch_initcall ( arch_init_ftrace_syscalls ) ;
2009-07-06 20:16:33 +09:00
# endif /* CONFIG_FTRACE_SYSCALLS */