2008-05-12 21:20:56 +02:00
/* Support for MMIO probes.
* Benfit many code from kprobes
* ( C ) 2002 Louis Zhuang < louis . zhuang @ intel . com > .
* 2007 Alexander Eichner
* 2008 Pekka Paalanen < pq @ iki . fi >
*/
# include <linux/version.h>
# include <linux/spinlock.h>
# include <linux/hash.h>
# include <linux/init.h>
# include <linux/module.h>
# include <linux/slab.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/uaccess.h>
# include <linux/ptrace.h>
# include <linux/preempt.h>
2008-05-12 21:20:57 +02:00
# include <linux/percpu.h>
2008-05-12 21:20:56 +02:00
# include <asm/io.h>
# include <asm/cacheflush.h>
# include <asm/errno.h>
# include <asm/tlbflush.h>
2008-05-12 21:20:56 +02:00
# include <asm/pgtable.h>
2008-05-12 21:20:56 +02:00
# include "kmmio.h"
# define KMMIO_HASH_BITS 6
# define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
# define KMMIO_PAGE_HASH_BITS 4
# define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
struct kmmio_context {
struct kmmio_fault_page * fpage ;
struct kmmio_probe * probe ;
unsigned long saved_flags ;
int active ;
} ;
static int kmmio_page_fault ( struct pt_regs * regs , unsigned long error_code ,
unsigned long address ) ;
static int kmmio_die_notifier ( struct notifier_block * nb , unsigned long val ,
void * args ) ;
static DEFINE_SPINLOCK ( kmmio_lock ) ;
/* These are protected by kmmio_lock */
unsigned int kmmio_count ;
static unsigned int handler_registered ;
static struct list_head kmmio_page_table [ KMMIO_PAGE_TABLE_SIZE ] ;
static LIST_HEAD ( kmmio_probes ) ;
2008-05-12 21:20:57 +02:00
/* Accessed per-cpu */
static DEFINE_PER_CPU ( struct kmmio_context , kmmio_ctx ) ;
2008-05-12 21:20:56 +02:00
static struct notifier_block nb_die = {
. notifier_call = kmmio_die_notifier
} ;
int init_kmmio ( void )
{
int i ;
for ( i = 0 ; i < KMMIO_PAGE_TABLE_SIZE ; i + + )
INIT_LIST_HEAD ( & kmmio_page_table [ i ] ) ;
register_die_notifier ( & nb_die ) ;
return 0 ;
}
void cleanup_kmmio ( void )
{
/*
* Assume the following have been already cleaned by calling
* unregister_kmmio_probe ( ) appropriately :
* kmmio_page_table , kmmio_probes
*/
if ( handler_registered ) {
2008-05-12 21:20:57 +02:00
if ( mmiotrace_unregister_pf ( & kmmio_page_fault ) )
BUG ( ) ;
2008-05-12 21:20:56 +02:00
synchronize_rcu ( ) ;
}
unregister_die_notifier ( & nb_die ) ;
}
/*
* this is basically a dynamic stabbing problem :
* Could use the existing prio tree code or
* Possible better implementations :
* The Interval Skip List : A Data Structure for Finding All Intervals That
* Overlap a Point ( might be simple )
* Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
*/
/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */
static struct kmmio_probe * get_kmmio_probe ( unsigned long addr )
{
struct kmmio_probe * p ;
list_for_each_entry ( p , & kmmio_probes , list ) {
if ( addr > = p - > addr & & addr < = ( p - > addr + p - > len ) )
return p ;
}
return NULL ;
}
static struct kmmio_fault_page * get_kmmio_fault_page ( unsigned long page )
{
struct list_head * head , * tmp ;
page & = PAGE_MASK ;
head = & kmmio_page_table [ hash_long ( page , KMMIO_PAGE_HASH_BITS ) ] ;
list_for_each ( tmp , head ) {
struct kmmio_fault_page * p
= list_entry ( tmp , struct kmmio_fault_page , list ) ;
if ( p - > page = = page )
return p ;
}
return NULL ;
}
2008-05-12 21:20:56 +02:00
static void arm_kmmio_fault_page ( unsigned long page , int * page_level )
2008-05-12 21:20:56 +02:00
{
unsigned long address = page & PAGE_MASK ;
2008-05-12 21:20:56 +02:00
int level ;
pte_t * pte = lookup_address ( address , & level ) ;
2008-05-12 21:20:56 +02:00
2008-05-12 21:20:56 +02:00
if ( ! pte ) {
printk ( KERN_ERR " Error in %s: no pte for page 0x%08lx \n " ,
__FUNCTION__ , page ) ;
return ;
}
if ( level = = PG_LEVEL_2M ) {
pmd_t * pmd = ( pmd_t * ) pte ;
2008-05-12 21:20:56 +02:00
set_pmd ( pmd , __pmd ( pmd_val ( * pmd ) & ~ _PAGE_PRESENT ) ) ;
} else {
2008-05-12 21:20:56 +02:00
/* PG_LEVEL_4K */
2008-05-12 21:20:56 +02:00
set_pte ( pte , __pte ( pte_val ( * pte ) & ~ _PAGE_PRESENT ) ) ;
}
2008-05-12 21:20:56 +02:00
if ( page_level )
* page_level = level ;
2008-05-12 21:20:56 +02:00
__flush_tlb_one ( page ) ;
}
2008-05-12 21:20:56 +02:00
static void disarm_kmmio_fault_page ( unsigned long page , int * page_level )
2008-05-12 21:20:56 +02:00
{
unsigned long address = page & PAGE_MASK ;
2008-05-12 21:20:56 +02:00
int level ;
pte_t * pte = lookup_address ( address , & level ) ;
2008-05-12 21:20:56 +02:00
2008-05-12 21:20:56 +02:00
if ( ! pte ) {
printk ( KERN_ERR " Error in %s: no pte for page 0x%08lx \n " ,
__FUNCTION__ , page ) ;
return ;
}
if ( level = = PG_LEVEL_2M ) {
pmd_t * pmd = ( pmd_t * ) pte ;
2008-05-12 21:20:56 +02:00
set_pmd ( pmd , __pmd ( pmd_val ( * pmd ) | _PAGE_PRESENT ) ) ;
} else {
2008-05-12 21:20:56 +02:00
/* PG_LEVEL_4K */
2008-05-12 21:20:56 +02:00
set_pte ( pte , __pte ( pte_val ( * pte ) | _PAGE_PRESENT ) ) ;
}
2008-05-12 21:20:56 +02:00
if ( page_level )
* page_level = level ;
2008-05-12 21:20:56 +02:00
__flush_tlb_one ( page ) ;
}
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate
* and they remain disabled thorough out this function .
*/
static int kmmio_handler ( struct pt_regs * regs , unsigned long addr )
{
2008-05-12 21:20:57 +02:00
struct kmmio_context * ctx = & get_cpu_var ( kmmio_ctx ) ;
2008-05-12 21:20:56 +02:00
/*
* Preemption is now disabled to prevent process switch during
* single stepping . We can only handle one active kmmio trace
* per cpu , so ensure that we finish it before something else
* gets to run .
*
* XXX what if an interrupt occurs between returning from
* do_page_fault ( ) and entering the single - step exception handler ?
* And that interrupt triggers a kmmio trap ?
*/
preempt_disable ( ) ;
/* interrupts disabled and CPU-local data => atomicity guaranteed. */
if ( ctx - > active ) {
/*
* This avoids a deadlock with kmmio_lock .
* If this page fault really was due to kmmio trap ,
* all hell breaks loose .
*/
printk ( KERN_EMERG " mmiotrace: recursive probe hit on CPU %d, "
" for address %lu. Ignoring. \n " ,
2008-05-12 21:20:57 +02:00
smp_processor_id ( ) , addr ) ;
2008-05-12 21:20:56 +02:00
goto no_kmmio ;
}
ctx - > active + + ;
/*
* Acquire the kmmio lock to prevent changes affecting
* get_kmmio_fault_page ( ) and get_kmmio_probe ( ) , since we save their
* returned pointers .
* The lock is released in post_kmmio_handler ( ) .
* XXX : could / should get_kmmio_ * ( ) be using RCU instead of spinlock ?
*/
spin_lock ( & kmmio_lock ) ;
ctx - > fpage = get_kmmio_fault_page ( addr ) ;
if ( ! ctx - > fpage ) {
/* this page fault is not caused by kmmio */
goto no_kmmio_locked ;
}
ctx - > probe = get_kmmio_probe ( addr ) ;
ctx - > saved_flags = ( regs - > flags & ( TF_MASK | IF_MASK ) ) ;
if ( ctx - > probe & & ctx - > probe - > pre_handler )
ctx - > probe - > pre_handler ( ctx - > probe , regs , addr ) ;
regs - > flags | = TF_MASK ;
regs - > flags & = ~ IF_MASK ;
/* We hold lock, now we set present bit in PTE and single step. */
disarm_kmmio_fault_page ( ctx - > fpage - > page , NULL ) ;
2008-05-12 21:20:57 +02:00
put_cpu_var ( kmmio_ctx ) ;
2008-05-12 21:20:56 +02:00
return 1 ;
no_kmmio_locked :
spin_unlock ( & kmmio_lock ) ;
ctx - > active - - ;
no_kmmio :
preempt_enable_no_resched ( ) ;
2008-05-12 21:20:57 +02:00
put_cpu_var ( kmmio_ctx ) ;
2008-05-12 21:20:56 +02:00
/* page fault not handled by kmmio */
return 0 ;
}
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate
* and they remain disabled thorough out this function .
* And we hold kmmio lock .
*/
static int post_kmmio_handler ( unsigned long condition , struct pt_regs * regs )
{
2008-05-12 21:20:57 +02:00
int ret = 0 ;
struct kmmio_context * ctx = & get_cpu_var ( kmmio_ctx ) ;
2008-05-12 21:20:56 +02:00
if ( ! ctx - > active )
2008-05-12 21:20:57 +02:00
goto out ;
2008-05-12 21:20:56 +02:00
if ( ctx - > probe & & ctx - > probe - > post_handler )
ctx - > probe - > post_handler ( ctx - > probe , condition , regs ) ;
arm_kmmio_fault_page ( ctx - > fpage - > page , NULL ) ;
regs - > flags & = ~ TF_MASK ;
regs - > flags | = ctx - > saved_flags ;
/* These were acquired in kmmio_handler(). */
ctx - > active - - ;
spin_unlock ( & kmmio_lock ) ;
preempt_enable_no_resched ( ) ;
/*
* if somebody else is singlestepping across a probe point , flags
* will have TF set , in which case , continue the remaining processing
* of do_debug , as if this is not a probe hit .
*/
2008-05-12 21:20:57 +02:00
if ( ! ( regs - > flags & TF_MASK ) )
ret = 1 ;
2008-05-12 21:20:56 +02:00
2008-05-12 21:20:57 +02:00
out :
put_cpu_var ( kmmio_ctx ) ;
return ret ;
2008-05-12 21:20:56 +02:00
}
static int add_kmmio_fault_page ( unsigned long page )
{
struct kmmio_fault_page * f ;
page & = PAGE_MASK ;
f = get_kmmio_fault_page ( page ) ;
if ( f ) {
f - > count + + ;
return 0 ;
}
f = kmalloc ( sizeof ( * f ) , GFP_ATOMIC ) ;
if ( ! f )
return - 1 ;
f - > count = 1 ;
f - > page = page ;
list_add ( & f - > list ,
& kmmio_page_table [ hash_long ( f - > page , KMMIO_PAGE_HASH_BITS ) ] ) ;
arm_kmmio_fault_page ( f - > page , NULL ) ;
return 0 ;
}
static void release_kmmio_fault_page ( unsigned long page )
{
struct kmmio_fault_page * f ;
page & = PAGE_MASK ;
f = get_kmmio_fault_page ( page ) ;
if ( ! f )
return ;
f - > count - - ;
if ( ! f - > count ) {
disarm_kmmio_fault_page ( f - > page , NULL ) ;
list_del ( & f - > list ) ;
}
}
int register_kmmio_probe ( struct kmmio_probe * p )
{
int ret = 0 ;
unsigned long size = 0 ;
spin_lock_irq ( & kmmio_lock ) ;
kmmio_count + + ;
if ( get_kmmio_probe ( p - > addr ) ) {
ret = - EEXIST ;
goto out ;
}
list_add ( & p - > list , & kmmio_probes ) ;
/*printk("adding fault pages...\n");*/
while ( size < p - > len ) {
if ( add_kmmio_fault_page ( p - > addr + size ) )
printk ( KERN_ERR " mmio: Unable to set page fault. \n " ) ;
size + = PAGE_SIZE ;
}
if ( ! handler_registered ) {
2008-05-12 21:20:57 +02:00
if ( mmiotrace_register_pf ( & kmmio_page_fault ) )
printk ( KERN_ERR " mmiotrace: Cannot register page "
" fault handler. \n " ) ;
else
handler_registered + + ;
2008-05-12 21:20:56 +02:00
}
out :
spin_unlock_irq ( & kmmio_lock ) ;
/*
* XXX : What should I do here ?
* Here was a call to global_flush_tlb ( ) , but it does not exist
* anymore .
*/
return ret ;
}
void unregister_kmmio_probe ( struct kmmio_probe * p )
{
unsigned long size = 0 ;
spin_lock_irq ( & kmmio_lock ) ;
while ( size < p - > len ) {
release_kmmio_fault_page ( p - > addr + size ) ;
size + = PAGE_SIZE ;
}
list_del ( & p - > list ) ;
kmmio_count - - ;
spin_unlock_irq ( & kmmio_lock ) ;
}
/*
* According to 2.6 .20 , mainly x86_64 arch :
* This is being called from do_page_fault ( ) , via the page fault notifier
* chain . The chain is called for both user space faults and kernel space
* faults ( address > = TASK_SIZE64 ) , except not on faults serviced by
* vmalloc_fault ( ) .
*
* We may be in an interrupt or a critical section . Also prefecthing may
* trigger a page fault . We may be in the middle of process switch .
* The page fault hook functionality has put us inside RCU read lock .
*
* Local interrupts are disabled , so preemption cannot happen .
* Do not enable interrupts , do not sleep , and watch out for other CPUs .
*/
static int kmmio_page_fault ( struct pt_regs * regs , unsigned long error_code ,
unsigned long address )
{
if ( is_kmmio_active ( ) )
if ( kmmio_handler ( regs , address ) = = 1 )
return - 1 ;
return 0 ;
}
static int kmmio_die_notifier ( struct notifier_block * nb , unsigned long val ,
void * args )
{
struct die_args * arg = args ;
if ( val = = DIE_DEBUG )
if ( post_kmmio_handler ( arg - > err , arg - > regs ) = = 1 )
return NOTIFY_STOP ;
return NOTIFY_DONE ;
}