2005-04-17 02:20:36 +04:00
/*
* Kernel Probes ( KProbes )
* arch / x86_64 / kernel / kprobes . c
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place - Suite 330 , Boston , MA 02111 - 1307 , USA .
*
* Copyright ( C ) IBM Corporation , 2002 , 2004
*
* 2002 - Oct Created by Vamsi Krishna S < vamsi_krishna @ in . ibm . com > Kernel
* Probes initial implementation ( includes contributions from
* Rusty Russell ) .
* 2004 - July Suparna Bhattacharya < suparna @ in . ibm . com > added jumper probes
* interface to access function arguments .
* 2004 - Oct Jim Keniston < kenistoj @ us . ibm . com > and Prasanna S Panchamukhi
* < prasanna @ in . ibm . com > adapted for x86_64
* 2005 - Mar Roland McGrath < roland @ redhat . com >
* Fixed to handle % rip - relative addressing mode correctly .
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
* 2005 - May Rusty Lynch < rusty . lynch @ intel . com >
* Added function return probes functionality
2005-04-17 02:20:36 +04:00
*/
# include <linux/kprobes.h>
# include <linux/ptrace.h>
# include <linux/string.h>
# include <linux/slab.h>
# include <linux/preempt.h>
2006-03-26 13:38:23 +04:00
# include <linux/module.h>
2007-05-08 11:27:03 +04:00
# include <linux/kdebug.h>
2005-06-28 02:17:01 +04:00
2005-06-23 11:09:25 +04:00
# include <asm/cacheflush.h>
2005-04-17 02:20:36 +04:00
# include <asm/pgtable.h>
2006-03-26 13:38:23 +04:00
# include <asm/uaccess.h>
2005-04-17 02:20:36 +04:00
void jprobe_return_end ( void ) ;
2006-01-10 07:52:44 +03:00
static void __kprobes arch_copy_kprobe ( struct kprobe * p ) ;
2005-04-17 02:20:36 +04:00
2005-11-07 12:00:12 +03:00
DEFINE_PER_CPU ( struct kprobe * , current_kprobe ) = NULL ;
DEFINE_PER_CPU ( struct kprobe_ctlblk , kprobe_ctlblk ) ;
2005-04-17 02:20:36 +04:00
/*
* returns non - zero if opcode modifies the interrupt flag .
*/
2006-04-19 09:22:00 +04:00
static __always_inline int is_IF_modifier ( kprobe_opcode_t * insn )
2005-04-17 02:20:36 +04:00
{
switch ( * insn ) {
case 0xfa : /* cli */
case 0xfb : /* sti */
case 0xcf : /* iret/iretd */
case 0x9d : /* popf/popfd */
return 1 ;
}
if ( * insn > = 0x40 & & * insn < = 0x4f & & * + + insn = = 0xcf )
return 1 ;
return 0 ;
}
2005-09-07 02:19:28 +04:00
int __kprobes arch_prepare_kprobe ( struct kprobe * p )
2005-04-17 02:20:36 +04:00
{
/* insn: must be on special executable page on x86_64. */
2005-09-30 22:59:20 +04:00
p - > ainsn . insn = get_insn_slot ( ) ;
2005-04-17 02:20:36 +04:00
if ( ! p - > ainsn . insn ) {
return - ENOMEM ;
}
2006-01-10 07:52:43 +03:00
arch_copy_kprobe ( p ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
/*
* Determine if the instruction uses the % rip - relative addressing mode .
* If it does , return the address of the 32 - bit displacement word .
* If not , return null .
*/
2006-04-19 09:22:00 +04:00
static s32 __kprobes * is_riprel ( u8 * insn )
2005-04-17 02:20:36 +04:00
{
# define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
( ( ( b0 # # UL < < 0x0 ) | ( b1 # # UL < < 0x1 ) | ( b2 # # UL < < 0x2 ) | ( b3 # # UL < < 0x3 ) | \
( b4 # # UL < < 0x4 ) | ( b5 # # UL < < 0x5 ) | ( b6 # # UL < < 0x6 ) | ( b7 # # UL < < 0x7 ) | \
( b8 # # UL < < 0x8 ) | ( b9 # # UL < < 0x9 ) | ( ba # # UL < < 0xa ) | ( bb # # UL < < 0xb ) | \
( bc # # UL < < 0xc ) | ( bd # # UL < < 0xd ) | ( be # # UL < < 0xe ) | ( bf # # UL < < 0xf ) ) \
< < ( row % 64 ) )
static const u64 onebyte_has_modrm [ 256 / 64 ] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ------------------------------- */
W ( 0x00 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ) | /* 00 */
W ( 0x10 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ) | /* 10 */
W ( 0x20 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ) | /* 20 */
W ( 0x30 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ) , /* 30 */
W ( 0x40 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* 40 */
W ( 0x50 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* 50 */
W ( 0x60 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1 , 0 , 0 , 0 , 0 ) | /* 60 */
W ( 0x70 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) , /* 70 */
W ( 0x80 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* 80 */
W ( 0x90 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* 90 */
W ( 0xa0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* a0 */
W ( 0xb0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) , /* b0 */
W ( 0xc0 , 1 , 1 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* c0 */
W ( 0xd0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* d0 */
W ( 0xe0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* e0 */
W ( 0xf0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 ) /* f0 */
/* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
} ;
static const u64 twobyte_has_modrm [ 256 / 64 ] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ------------------------------- */
W ( 0x00 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1 ) | /* 0f */
W ( 0x10 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* 1f */
W ( 0x20 , 1 , 1 , 1 , 1 , 1 , 0 , 1 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* 2f */
W ( 0x30 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) , /* 3f */
W ( 0x40 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* 4f */
W ( 0x50 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* 5f */
W ( 0x60 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* 6f */
W ( 0x70 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 ) , /* 7f */
W ( 0x80 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* 8f */
W ( 0x90 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* 9f */
W ( 0xa0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 ) | /* af */
W ( 0xb0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 ) , /* bf */
W ( 0xc0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) | /* cf */
W ( 0xd0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* df */
W ( 0xe0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) | /* ef */
W ( 0xf0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 ) /* ff */
/* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
} ;
# undef W
int need_modrm ;
/* Skip legacy instruction prefixes. */
while ( 1 ) {
switch ( * insn ) {
case 0x66 :
case 0x67 :
case 0x2e :
case 0x3e :
case 0x26 :
case 0x64 :
case 0x65 :
case 0x36 :
case 0xf0 :
case 0xf3 :
case 0xf2 :
+ + insn ;
continue ;
}
break ;
}
/* Skip REX instruction prefix. */
if ( ( * insn & 0xf0 ) = = 0x40 )
+ + insn ;
if ( * insn = = 0x0f ) { /* Two-byte opcode. */
+ + insn ;
need_modrm = test_bit ( * insn , twobyte_has_modrm ) ;
} else { /* One-byte opcode. */
need_modrm = test_bit ( * insn , onebyte_has_modrm ) ;
}
if ( need_modrm ) {
u8 modrm = * + + insn ;
if ( ( modrm & 0xc7 ) = = 0x05 ) { /* %rip+disp32 addressing mode */
/* Displacement follows ModRM byte. */
return ( s32 * ) + + insn ;
}
}
/* No %rip-relative addressing mode here. */
return NULL ;
}
2006-01-10 07:52:44 +03:00
static void __kprobes arch_copy_kprobe ( struct kprobe * p )
2005-04-17 02:20:36 +04:00
{
s32 * ripdisp ;
memcpy ( p - > ainsn . insn , p - > addr , MAX_INSN_SIZE ) ;
ripdisp = is_riprel ( p - > ainsn . insn ) ;
if ( ripdisp ) {
/*
* The copied instruction uses the % rip - relative
* addressing mode . Adjust the displacement for the
* difference between the original location of this
* instruction and the location of the copy that will
* actually be run . The tricky bit here is making sure
* that the sign extension happens correctly in this
* calculation , since we need a signed 32 - bit result to
* be sign - extended to 64 bits when it ' s added to the
* % rip value and yield the same 64 - bit result that the
* sign - extension of the original signed 32 - bit
* displacement would have given .
*/
s64 disp = ( u8 * ) p - > addr + * ripdisp - ( u8 * ) p - > ainsn . insn ;
BUG_ON ( ( s64 ) ( s32 ) disp ! = disp ) ; /* Sanity check. */
* ripdisp = disp ;
}
2005-06-23 11:09:25 +04:00
p - > opcode = * p - > addr ;
2005-04-17 02:20:36 +04:00
}
2005-09-07 02:19:28 +04:00
void __kprobes arch_arm_kprobe ( struct kprobe * p )
2005-04-17 02:20:36 +04:00
{
2005-06-23 11:09:25 +04:00
* p - > addr = BREAKPOINT_INSTRUCTION ;
flush_icache_range ( ( unsigned long ) p - > addr ,
( unsigned long ) p - > addr + sizeof ( kprobe_opcode_t ) ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-07 02:19:28 +04:00
void __kprobes arch_disarm_kprobe ( struct kprobe * p )
2005-04-17 02:20:36 +04:00
{
* p - > addr = p - > opcode ;
2005-06-23 11:09:25 +04:00
flush_icache_range ( ( unsigned long ) p - > addr ,
( unsigned long ) p - > addr + sizeof ( kprobe_opcode_t ) ) ;
}
2006-01-10 07:52:46 +03:00
void __kprobes arch_remove_kprobe ( struct kprobe * p )
2005-06-23 11:09:25 +04:00
{
2006-03-23 14:00:35 +03:00
mutex_lock ( & kprobe_mutex ) ;
2006-12-07 07:38:11 +03:00
free_insn_slot ( p - > ainsn . insn , 0 ) ;
2006-03-23 14:00:35 +03:00
mutex_unlock ( & kprobe_mutex ) ;
2005-04-17 02:20:36 +04:00
}
2006-04-19 09:22:00 +04:00
static void __kprobes save_previous_kprobe ( struct kprobe_ctlblk * kcb )
2005-06-23 11:09:37 +04:00
{
2005-11-07 12:00:12 +03:00
kcb - > prev_kprobe . kp = kprobe_running ( ) ;
kcb - > prev_kprobe . status = kcb - > kprobe_status ;
kcb - > prev_kprobe . old_rflags = kcb - > kprobe_old_rflags ;
kcb - > prev_kprobe . saved_rflags = kcb - > kprobe_saved_rflags ;
2005-06-23 11:09:37 +04:00
}
2006-04-19 09:22:00 +04:00
static void __kprobes restore_previous_kprobe ( struct kprobe_ctlblk * kcb )
2005-06-23 11:09:37 +04:00
{
2005-11-07 12:00:12 +03:00
__get_cpu_var ( current_kprobe ) = kcb - > prev_kprobe . kp ;
kcb - > kprobe_status = kcb - > prev_kprobe . status ;
kcb - > kprobe_old_rflags = kcb - > prev_kprobe . old_rflags ;
kcb - > kprobe_saved_rflags = kcb - > prev_kprobe . saved_rflags ;
2005-06-23 11:09:37 +04:00
}
2006-04-19 09:22:00 +04:00
static void __kprobes set_current_kprobe ( struct kprobe * p , struct pt_regs * regs ,
2005-11-07 12:00:12 +03:00
struct kprobe_ctlblk * kcb )
2005-06-23 11:09:37 +04:00
{
2005-11-07 12:00:12 +03:00
__get_cpu_var ( current_kprobe ) = p ;
kcb - > kprobe_saved_rflags = kcb - > kprobe_old_rflags
2005-06-23 11:09:37 +04:00
= ( regs - > eflags & ( TF_MASK | IF_MASK ) ) ;
if ( is_IF_modifier ( p - > ainsn . insn ) )
2005-11-07 12:00:12 +03:00
kcb - > kprobe_saved_rflags & = ~ IF_MASK ;
2005-06-23 11:09:37 +04:00
}
2005-09-07 02:19:28 +04:00
static void __kprobes prepare_singlestep ( struct kprobe * p , struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
regs - > eflags | = TF_MASK ;
regs - > eflags & = ~ IF_MASK ;
/*single step inline if the instruction is an int3*/
if ( p - > opcode = = BREAKPOINT_INSTRUCTION )
regs - > rip = ( unsigned long ) p - > addr ;
else
regs - > rip = ( unsigned long ) p - > ainsn . insn ;
}
2005-11-07 12:00:14 +03:00
/* Called with kretprobe_lock held */
2005-09-07 02:19:28 +04:00
void __kprobes arch_prepare_kretprobe ( struct kretprobe * rp ,
struct pt_regs * regs )
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
{
unsigned long * sara = ( unsigned long * ) regs - > rsp ;
2006-10-02 13:17:33 +04:00
struct kretprobe_instance * ri ;
2005-06-28 02:17:10 +04:00
2006-10-02 13:17:33 +04:00
if ( ( ri = get_free_rp_inst ( rp ) ) ! = NULL ) {
ri - > rp = rp ;
ri - > task = current ;
2005-06-28 02:17:10 +04:00
ri - > ret_addr = ( kprobe_opcode_t * ) * sara ;
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
/* Replace the return addr with trampoline addr */
* sara = ( unsigned long ) & kretprobe_trampoline ;
2006-10-02 13:17:33 +04:00
add_rp_inst ( ri ) ;
} else {
rp - > nmissed + + ;
}
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
}
2005-09-07 02:19:28 +04:00
int __kprobes kprobe_handler ( struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
struct kprobe * p ;
int ret = 0 ;
kprobe_opcode_t * addr = ( kprobe_opcode_t * ) ( regs - > rip - sizeof ( kprobe_opcode_t ) ) ;
2005-11-07 12:00:14 +03:00
struct kprobe_ctlblk * kcb ;
/*
* We don ' t want to be preempted for the entire
* duration of kprobe processing
*/
preempt_disable ( ) ;
kcb = get_kprobe_ctlblk ( ) ;
2005-04-17 02:20:36 +04:00
/* Check we're not actually recursing */
if ( kprobe_running ( ) ) {
p = get_kprobe ( addr ) ;
if ( p ) {
2005-11-07 12:00:12 +03:00
if ( kcb - > kprobe_status = = KPROBE_HIT_SS & &
2005-09-07 02:19:35 +04:00
* p - > ainsn . insn = = BREAKPOINT_INSTRUCTION ) {
2005-04-17 02:20:36 +04:00
regs - > eflags & = ~ TF_MASK ;
2005-11-07 12:00:12 +03:00
regs - > eflags | = kcb - > kprobe_saved_rflags ;
2005-04-17 02:20:36 +04:00
goto no_kprobe ;
2005-11-07 12:00:12 +03:00
} else if ( kcb - > kprobe_status = = KPROBE_HIT_SSDONE ) {
2005-06-23 11:09:37 +04:00
/* TODO: Provide re-entrancy from
* post_kprobes_handler ( ) and avoid exception
* stack corruption while single - stepping on
* the instruction of the new probe .
*/
arch_disarm_kprobe ( p ) ;
regs - > rip = ( unsigned long ) p - > addr ;
2005-11-07 12:00:12 +03:00
reset_current_kprobe ( ) ;
2005-06-23 11:09:37 +04:00
ret = 1 ;
} else {
/* We have reentered the kprobe_handler(), since
* another probe was hit while within the
* handler . We here save the original kprobe
* variables and just single step on instruction
* of the new probe without calling any user
* handlers .
*/
2005-11-07 12:00:12 +03:00
save_previous_kprobe ( kcb ) ;
set_current_kprobe ( p , regs , kcb ) ;
2005-12-12 11:37:34 +03:00
kprobes_inc_nmissed_count ( p ) ;
2005-06-23 11:09:37 +04:00
prepare_singlestep ( p , regs ) ;
2005-11-07 12:00:12 +03:00
kcb - > kprobe_status = KPROBE_REENTER ;
2005-06-23 11:09:37 +04:00
return 1 ;
2005-04-17 02:20:36 +04:00
}
} else {
2006-01-11 23:17:42 +03:00
if ( * addr ! = BREAKPOINT_INSTRUCTION ) {
/* The breakpoint instruction was removed by
* another cpu right after we hit , no further
* handling of this interrupt is appropriate
*/
regs - > rip = ( unsigned long ) addr ;
ret = 1 ;
goto no_kprobe ;
}
2005-11-07 12:00:12 +03:00
p = __get_cpu_var ( current_kprobe ) ;
2005-04-17 02:20:36 +04:00
if ( p - > break_handler & & p - > break_handler ( p , regs ) ) {
goto ss_probe ;
}
}
goto no_kprobe ;
}
p = get_kprobe ( addr ) ;
if ( ! p ) {
if ( * addr ! = BREAKPOINT_INSTRUCTION ) {
/*
* The breakpoint instruction was removed right
* after we hit it . Another cpu has removed
* either a probepoint or a debugger breakpoint
* at this address . In either case , no further
* handling of this interrupt is appropriate .
2005-09-07 02:19:34 +04:00
* Back up over the ( now missing ) int3 and run
* the original instruction .
2005-04-17 02:20:36 +04:00
*/
2005-09-07 02:19:34 +04:00
regs - > rip = ( unsigned long ) addr ;
2005-04-17 02:20:36 +04:00
ret = 1 ;
}
/* Not one of ours: let kernel handle it */
goto no_kprobe ;
}
2005-11-07 12:00:12 +03:00
set_current_kprobe ( p , regs , kcb ) ;
kcb - > kprobe_status = KPROBE_HIT_ACTIVE ;
2005-04-17 02:20:36 +04:00
if ( p - > pre_handler & & p - > pre_handler ( p , regs ) )
/* handler has already set things up, so skip ss setup */
return 1 ;
ss_probe :
prepare_singlestep ( p , regs ) ;
2005-11-07 12:00:12 +03:00
kcb - > kprobe_status = KPROBE_HIT_SS ;
2005-04-17 02:20:36 +04:00
return 1 ;
no_kprobe :
2005-11-07 12:00:14 +03:00
preempt_enable_no_resched ( ) ;
2005-04-17 02:20:36 +04:00
return ret ;
}
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
/*
* For function - return probes , init_kprobes ( ) establishes a probepoint
* here . When a retprobed function returns , this probe is hit and
* trampoline_probe_handler ( ) runs , calling the kretprobe ' s handler .
*/
void kretprobe_trampoline_holder ( void )
{
asm volatile ( " .global kretprobe_trampoline \n "
" kretprobe_trampoline: \n "
" nop \n " ) ;
}
/*
* Called when we hit the probe point at kretprobe_trampoline
*/
2005-09-07 02:19:28 +04:00
int __kprobes trampoline_probe_handler ( struct kprobe * p , struct pt_regs * regs )
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
{
2006-10-02 13:17:33 +04:00
struct kretprobe_instance * ri = NULL ;
2006-10-02 13:17:35 +04:00
struct hlist_head * head , empty_rp ;
2006-10-02 13:17:33 +04:00
struct hlist_node * node , * tmp ;
2005-11-07 12:00:14 +03:00
unsigned long flags , orig_ret_address = 0 ;
2005-06-28 02:17:10 +04:00
unsigned long trampoline_address = ( unsigned long ) & kretprobe_trampoline ;
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
2006-10-02 13:17:35 +04:00
INIT_HLIST_HEAD ( & empty_rp ) ;
2005-11-07 12:00:14 +03:00
spin_lock_irqsave ( & kretprobe_lock , flags ) ;
2006-10-02 13:17:33 +04:00
head = kretprobe_inst_table_head ( current ) ;
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
2005-06-28 02:17:10 +04:00
/*
* It is possible to have multiple instances associated with a given
* task either because an multiple functions in the call path
* have a return probe installed on them , and / or more then one return
* return probe was registered for a target function .
*
* We can handle this because :
* - instances are always inserted at the head of the list
* - when multiple return probes are registered for the same
2006-10-02 13:17:33 +04:00
* function , the first instance ' s ret_addr will point to the
2005-06-28 02:17:10 +04:00
* real return address , and all the rest will point to
* kretprobe_trampoline
*/
hlist_for_each_entry_safe ( ri , node , tmp , head , hlist ) {
2006-10-02 13:17:33 +04:00
if ( ri - > task ! = current )
2005-06-28 02:17:10 +04:00
/* another task is sharing our hash bucket */
2006-10-02 13:17:33 +04:00
continue ;
2005-06-28 02:17:10 +04:00
if ( ri - > rp & & ri - > rp - > handler )
ri - > rp - > handler ( ri , regs ) ;
orig_ret_address = ( unsigned long ) ri - > ret_addr ;
2006-10-02 13:17:35 +04:00
recycle_rp_inst ( ri , & empty_rp ) ;
2005-06-28 02:17:10 +04:00
if ( orig_ret_address ! = trampoline_address )
/*
* This is the real return address . Any other
* instances associated with this task are for
* other calls deeper on the call stack
*/
break ;
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
}
2005-06-28 02:17:10 +04:00
BUG_ON ( ! orig_ret_address | | ( orig_ret_address = = trampoline_address ) ) ;
regs - > rip = orig_ret_address ;
2005-11-07 12:00:12 +03:00
reset_current_kprobe ( ) ;
2005-11-07 12:00:14 +03:00
spin_unlock_irqrestore ( & kretprobe_lock , flags ) ;
2005-06-28 02:17:10 +04:00
preempt_enable_no_resched ( ) ;
2006-10-02 13:17:35 +04:00
hlist_for_each_entry_safe ( ri , node , tmp , & empty_rp , hlist ) {
hlist_del ( & ri - > hlist ) ;
kfree ( ri ) ;
}
2006-10-02 13:17:33 +04:00
/*
* By returning a non - zero value , we are telling
* kprobe_handler ( ) that we don ' t want the post_handler
2005-11-07 12:00:14 +03:00
* to run ( and have re - enabled preemption )
2006-10-02 13:17:33 +04:00
*/
return 1 ;
[PATCH] x86_64 specific function return probes
The following patch adds the x86_64 architecture specific implementation
for function return probes.
Function return probes is a mechanism built on top of kprobes that allows
a caller to register a handler to be called when a given function exits.
For example, to instrument the return path of sys_mkdir:
static int sys_mkdir_exit(struct kretprobe_instance *i, struct pt_regs *regs)
{
printk("sys_mkdir exited\n");
return 0;
}
static struct kretprobe return_probe = {
.handler = sys_mkdir_exit,
};
<inside setup function>
return_probe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("sys_mkdir");
if (register_kretprobe(&return_probe)) {
printk(KERN_DEBUG "Unable to register return probe!\n");
/* do error path */
}
<inside cleanup function>
unregister_kretprobe(&return_probe);
The way this works is that:
* At system initialization time, kernel/kprobes.c installs a kprobe
on a function called kretprobe_trampoline() that is implemented in
the arch/x86_64/kernel/kprobes.c (More on this later)
* When a return probe is registered using register_kretprobe(),
kernel/kprobes.c will install a kprobe on the first instruction of the
targeted function with the pre handler set to arch_prepare_kretprobe()
which is implemented in arch/x86_64/kernel/kprobes.c.
* arch_prepare_kretprobe() will prepare a kretprobe instance that stores:
- nodes for hanging this instance in an empty or free list
- a pointer to the return probe
- the original return address
- a pointer to the stack address
With all this stowed away, arch_prepare_kretprobe() then sets the return
address for the targeted function to a special trampoline function called
kretprobe_trampoline() implemented in arch/x86_64/kernel/kprobes.c
* The kprobe completes as normal, with control passing back to the target
function that executes as normal, and eventually returns to our trampoline
function.
* Since a kprobe was installed on kretprobe_trampoline() during system
initialization, control passes back to kprobes via the architecture
specific function trampoline_probe_handler() which will lookup the
instance in an hlist maintained by kernel/kprobes.c, and then call
the handler function.
* When trampoline_probe_handler() is done, the kprobes infrastructure
single steps the original instruction (in this case just a top), and
then calls trampoline_post_handler(). trampoline_post_handler() then
looks up the instance again, puts the instance back on the free list,
and then makes a long jump back to the original return instruction.
So to recap, to instrument the exit path of a function this implementation
will cause four interruptions:
- A breakpoint at the very beginning of the function allowing us to
switch out the return address
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
- A breakpoint in the trampoline function where our instrumented function
returned to
- A single step interruption to execute the original instruction that
we replaced with the break instruction (normal kprobe flow)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 11:09:23 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* Called after single - stepping . p - > addr is the address of the
* instruction whose first byte has been replaced by the " int 3 "
* instruction . To avoid the SMP problems that can occur when we
* temporarily put back the original opcode to single - step , we
* single - stepped a copy of the instruction . The address of this
* copy is p - > ainsn . insn .
*
* This function prepares to return from the post - single - step
* interrupt . We have to fix up the stack as follows :
*
* 0 ) Except in the case of absolute or indirect jump or call instructions ,
* the new rip is relative to the copied instruction . We need to make
* it relative to the original instruction .
*
* 1 ) If the single - stepped instruction was pushfl , then the TF and IF
* flags are set in the just - pushed eflags , and may need to be cleared .
*
* 2 ) If the single - stepped instruction was a call , the return address
* that is atop the stack is the address following the copied instruction .
* We need to make it the address following the original instruction .
*/
2005-11-07 12:00:12 +03:00
static void __kprobes resume_execution ( struct kprobe * p ,
struct pt_regs * regs , struct kprobe_ctlblk * kcb )
2005-04-17 02:20:36 +04:00
{
unsigned long * tos = ( unsigned long * ) regs - > rsp ;
unsigned long next_rip = 0 ;
unsigned long copy_rip = ( unsigned long ) p - > ainsn . insn ;
unsigned long orig_rip = ( unsigned long ) p - > addr ;
kprobe_opcode_t * insn = p - > ainsn . insn ;
/*skip the REX prefix*/
if ( * insn > = 0x40 & & * insn < = 0x4f )
insn + + ;
switch ( * insn ) {
case 0x9c : /* pushfl */
* tos & = ~ ( TF_MASK | IF_MASK ) ;
2005-11-07 12:00:12 +03:00
* tos | = kcb - > kprobe_old_rflags ;
2005-04-17 02:20:36 +04:00
break ;
2005-05-06 03:15:40 +04:00
case 0xc3 : /* ret/lret */
case 0xcb :
case 0xc2 :
case 0xca :
regs - > eflags & = ~ TF_MASK ;
/* rip is already adjusted, no more changes required*/
return ;
2005-04-17 02:20:36 +04:00
case 0xe8 : /* call relative - Fix return addr */
* tos = orig_rip + ( * tos - copy_rip ) ;
break ;
case 0xff :
2006-05-21 02:00:21 +04:00
if ( ( insn [ 1 ] & 0x30 ) = = 0x10 ) {
2005-04-17 02:20:36 +04:00
/* call absolute, indirect */
/* Fix return addr; rip is correct. */
next_rip = regs - > rip ;
* tos = orig_rip + ( * tos - copy_rip ) ;
2006-05-21 02:00:21 +04:00
} else if ( ( ( insn [ 1 ] & 0x31 ) = = 0x20 ) | | /* jmp near, absolute indirect */
( ( insn [ 1 ] & 0x31 ) = = 0x21 ) ) { /* jmp far, absolute indirect */
2005-04-17 02:20:36 +04:00
/* rip is correct. */
next_rip = regs - > rip ;
}
break ;
case 0xea : /* jmp absolute -- rip is correct */
next_rip = regs - > rip ;
break ;
default :
break ;
}
regs - > eflags & = ~ TF_MASK ;
if ( next_rip ) {
regs - > rip = next_rip ;
} else {
regs - > rip = orig_rip + ( regs - > rip - copy_rip ) ;
}
}
2005-09-07 02:19:28 +04:00
int __kprobes post_kprobe_handler ( struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
2005-11-07 12:00:12 +03:00
struct kprobe * cur = kprobe_running ( ) ;
struct kprobe_ctlblk * kcb = get_kprobe_ctlblk ( ) ;
if ( ! cur )
2005-04-17 02:20:36 +04:00
return 0 ;
2005-11-07 12:00:12 +03:00
if ( ( kcb - > kprobe_status ! = KPROBE_REENTER ) & & cur - > post_handler ) {
kcb - > kprobe_status = KPROBE_HIT_SSDONE ;
cur - > post_handler ( cur , regs , 0 ) ;
2005-06-23 11:09:37 +04:00
}
2005-04-17 02:20:36 +04:00
2005-11-07 12:00:12 +03:00
resume_execution ( cur , regs , kcb ) ;
regs - > eflags | = kcb - > kprobe_saved_rflags ;
2005-04-17 02:20:36 +04:00
2005-06-23 11:09:37 +04:00
/* Restore the original saved kprobes variables and continue. */
2005-11-07 12:00:12 +03:00
if ( kcb - > kprobe_status = = KPROBE_REENTER ) {
restore_previous_kprobe ( kcb ) ;
2005-06-23 11:09:37 +04:00
goto out ;
}
2005-11-07 12:00:12 +03:00
reset_current_kprobe ( ) ;
2005-06-23 11:09:37 +04:00
out :
2005-04-17 02:20:36 +04:00
preempt_enable_no_resched ( ) ;
/*
* if somebody else is singlestepping across a probe point , eflags
* will have TF set , in which case , continue the remaining processing
* of do_debug , as if this is not a probe hit .
*/
if ( regs - > eflags & TF_MASK )
return 0 ;
return 1 ;
}
2005-09-07 02:19:28 +04:00
int __kprobes kprobe_fault_handler ( struct pt_regs * regs , int trapnr )
2005-04-17 02:20:36 +04:00
{
2005-11-07 12:00:12 +03:00
struct kprobe * cur = kprobe_running ( ) ;
struct kprobe_ctlblk * kcb = get_kprobe_ctlblk ( ) ;
2006-03-26 13:38:23 +04:00
const struct exception_table_entry * fixup ;
2005-11-07 12:00:12 +03:00
2006-03-26 13:38:23 +04:00
switch ( kcb - > kprobe_status ) {
case KPROBE_HIT_SS :
case KPROBE_REENTER :
/*
* We are here because the instruction being single
* stepped caused a page fault . We reset the current
* kprobe and the rip points back to the probe address
* and allow the page fault handler to continue as a
* normal page fault .
*/
regs - > rip = ( unsigned long ) cur - > addr ;
2005-11-07 12:00:12 +03:00
regs - > eflags | = kcb - > kprobe_old_rflags ;
2006-03-26 13:38:23 +04:00
if ( kcb - > kprobe_status = = KPROBE_REENTER )
restore_previous_kprobe ( kcb ) ;
else
reset_current_kprobe ( ) ;
2005-04-17 02:20:36 +04:00
preempt_enable_no_resched ( ) ;
2006-03-26 13:38:23 +04:00
break ;
case KPROBE_HIT_ACTIVE :
case KPROBE_HIT_SSDONE :
/*
* We increment the nmissed count for accounting ,
* we can also use npre / npostfault count for accouting
* these specific fault cases .
*/
kprobes_inc_nmissed_count ( cur ) ;
/*
* We come here because instructions in the pre / post
* handler caused the page_fault , this could happen
* if handler tries to access user space by
* copy_from_user ( ) , get_user ( ) etc . Let the
* user - specified handler try to fix it first .
*/
if ( cur - > fault_handler & & cur - > fault_handler ( cur , regs , trapnr ) )
return 1 ;
/*
* In case the user - specified fault handler returned
* zero , try to fix up .
*/
fixup = search_exception_tables ( regs - > rip ) ;
if ( fixup ) {
regs - > rip = fixup - > fixup ;
return 1 ;
}
/*
* fixup ( ) could not handle it ,
* Let do_page_fault ( ) fix it .
*/
break ;
default :
break ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
}
/*
* Wrapper routine for handling exceptions .
*/
2005-09-07 02:19:28 +04:00
int __kprobes kprobe_exceptions_notify ( struct notifier_block * self ,
unsigned long val , void * data )
2005-04-17 02:20:36 +04:00
{
struct die_args * args = ( struct die_args * ) data ;
2005-11-07 12:00:07 +03:00
int ret = NOTIFY_DONE ;
2006-03-26 13:38:21 +04:00
if ( args - > regs & & user_mode ( args - > regs ) )
return ret ;
2005-04-17 02:20:36 +04:00
switch ( val ) {
case DIE_INT3 :
if ( kprobe_handler ( args - > regs ) )
2005-11-07 12:00:07 +03:00
ret = NOTIFY_STOP ;
2005-04-17 02:20:36 +04:00
break ;
case DIE_DEBUG :
if ( post_kprobe_handler ( args - > regs ) )
2005-11-07 12:00:07 +03:00
ret = NOTIFY_STOP ;
2005-04-17 02:20:36 +04:00
break ;
case DIE_GPF :
case DIE_PAGE_FAULT :
2005-11-07 12:00:14 +03:00
/* kprobe_running() needs smp_processor_id() */
preempt_disable ( ) ;
2005-04-17 02:20:36 +04:00
if ( kprobe_running ( ) & &
kprobe_fault_handler ( args - > regs , args - > trapnr ) )
2005-11-07 12:00:07 +03:00
ret = NOTIFY_STOP ;
2005-11-07 12:00:14 +03:00
preempt_enable ( ) ;
2005-04-17 02:20:36 +04:00
break ;
default :
break ;
}
2005-11-07 12:00:07 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
}
2005-09-07 02:19:28 +04:00
int __kprobes setjmp_pre_handler ( struct kprobe * p , struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
struct jprobe * jp = container_of ( p , struct jprobe , kp ) ;
unsigned long addr ;
2005-11-07 12:00:12 +03:00
struct kprobe_ctlblk * kcb = get_kprobe_ctlblk ( ) ;
2005-04-17 02:20:36 +04:00
2005-11-07 12:00:12 +03:00
kcb - > jprobe_saved_regs = * regs ;
kcb - > jprobe_saved_rsp = ( long * ) regs - > rsp ;
addr = ( unsigned long ) ( kcb - > jprobe_saved_rsp ) ;
2005-04-17 02:20:36 +04:00
/*
* As Linus pointed out , gcc assumes that the callee
* owns the argument space and could overwrite it , e . g .
* tailcall optimization . So , to be absolutely safe
* we also save and restore enough stack bytes to cover
* the argument area .
*/
2005-11-07 12:00:12 +03:00
memcpy ( kcb - > jprobes_stack , ( kprobe_opcode_t * ) addr ,
MIN_STACK_SIZE ( addr ) ) ;
2005-04-17 02:20:36 +04:00
regs - > eflags & = ~ IF_MASK ;
regs - > rip = ( unsigned long ) ( jp - > entry ) ;
return 1 ;
}
2005-09-07 02:19:28 +04:00
void __kprobes jprobe_return ( void )
2005-04-17 02:20:36 +04:00
{
2005-11-07 12:00:12 +03:00
struct kprobe_ctlblk * kcb = get_kprobe_ctlblk ( ) ;
2005-04-17 02:20:36 +04:00
asm volatile ( " xchg %%rbx,%%rsp \n "
" int3 \n "
" .globl jprobe_return_end \n "
" jprobe_return_end: \n "
" nop \n " : : " b "
2005-11-07 12:00:12 +03:00
( kcb - > jprobe_saved_rsp ) : " memory " ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-07 02:19:28 +04:00
int __kprobes longjmp_break_handler ( struct kprobe * p , struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
2005-11-07 12:00:12 +03:00
struct kprobe_ctlblk * kcb = get_kprobe_ctlblk ( ) ;
2005-04-17 02:20:36 +04:00
u8 * addr = ( u8 * ) ( regs - > rip - 1 ) ;
2005-11-07 12:00:12 +03:00
unsigned long stack_addr = ( unsigned long ) ( kcb - > jprobe_saved_rsp ) ;
2005-04-17 02:20:36 +04:00
struct jprobe * jp = container_of ( p , struct jprobe , kp ) ;
if ( ( addr > ( u8 * ) jprobe_return ) & & ( addr < ( u8 * ) jprobe_return_end ) ) {
2005-11-07 12:00:12 +03:00
if ( ( long * ) regs - > rsp ! = kcb - > jprobe_saved_rsp ) {
2005-04-17 02:20:36 +04:00
struct pt_regs * saved_regs =
2005-11-07 12:00:12 +03:00
container_of ( kcb - > jprobe_saved_rsp ,
struct pt_regs , rsp ) ;
2005-04-17 02:20:36 +04:00
printk ( " current rsp %p does not match saved rsp %p \n " ,
2005-11-07 12:00:12 +03:00
( long * ) regs - > rsp , kcb - > jprobe_saved_rsp ) ;
2005-04-17 02:20:36 +04:00
printk ( " Saved registers for jprobe %p \n " , jp ) ;
show_registers ( saved_regs ) ;
printk ( " Current registers \n " ) ;
show_registers ( regs ) ;
BUG ( ) ;
}
2005-11-07 12:00:12 +03:00
* regs = kcb - > jprobe_saved_regs ;
memcpy ( ( kprobe_opcode_t * ) stack_addr , kcb - > jprobes_stack ,
2005-04-17 02:20:36 +04:00
MIN_STACK_SIZE ( stack_addr ) ) ;
2005-11-07 12:00:14 +03:00
preempt_enable_no_resched ( ) ;
2005-04-17 02:20:36 +04:00
return 1 ;
}
return 0 ;
}
2005-06-28 02:17:10 +04:00
static struct kprobe trampoline_p = {
. addr = ( kprobe_opcode_t * ) & kretprobe_trampoline ,
. pre_handler = trampoline_probe_handler
} ;
2005-07-06 05:54:50 +04:00
int __init arch_init_kprobes ( void )
2005-06-28 02:17:10 +04:00
{
return register_kprobe ( & trampoline_p ) ;
}