2006-03-23 20:00:26 +01:00
/*
2006-09-04 15:41:16 +02:00
* Copyright ( C ) 2006 Jens Axboe < axboe @ kernel . dk >
2006-03-23 20:00:26 +01:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
*
*/
# include <linux/kernel.h>
# include <linux/blkdev.h>
# include <linux/blktrace_api.h>
# include <linux/percpu.h>
# include <linux/init.h>
# include <linux/mutex.h>
# include <linux/debugfs.h>
2006-12-01 10:39:12 +01:00
# include <linux/time.h>
2006-03-23 20:00:26 +01:00
# include <asm/uaccess.h>
static unsigned int blktrace_seq __read_mostly = 1 ;
2006-12-01 10:39:12 +01:00
/*
* Send out a notify message .
*/
2006-12-04 09:30:58 +01:00
static void trace_note ( struct blk_trace * bt , pid_t pid , int action ,
const void * data , size_t len )
2006-12-01 10:39:12 +01:00
{
struct blk_io_trace * t ;
t = relay_reserve ( bt - > rchan , sizeof ( * t ) + len ) ;
2006-12-04 09:27:41 +01:00
if ( t ) {
const int cpu = smp_processor_id ( ) ;
t - > magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION ;
2008-01-11 13:35:54 +01:00
t - > time = ktime_to_ns ( ktime_get ( ) ) ;
2006-12-04 09:27:41 +01:00
t - > device = bt - > dev ;
t - > action = action ;
t - > pid = pid ;
t - > cpu = cpu ;
t - > pdu_len = len ;
memcpy ( ( void * ) t + sizeof ( * t ) , data , len ) ;
}
2006-12-01 10:39:12 +01:00
}
2006-03-23 20:00:26 +01:00
/*
* Send out a notify for this process , if we haven ' t done so since a trace
* started
*/
static void trace_note_tsk ( struct blk_trace * bt , struct task_struct * tsk )
{
2006-12-04 09:30:58 +01:00
tsk - > btrace_seq = blktrace_seq ;
trace_note ( bt , tsk - > pid , BLK_TN_PROCESS , tsk - > comm , sizeof ( tsk - > comm ) ) ;
2006-12-01 10:39:12 +01:00
}
2006-03-23 20:00:26 +01:00
2006-12-01 10:39:12 +01:00
static void trace_note_time ( struct blk_trace * bt )
{
struct timespec now ;
unsigned long flags ;
u32 words [ 2 ] ;
getnstimeofday ( & now ) ;
words [ 0 ] = now . tv_sec ;
words [ 1 ] = now . tv_nsec ;
local_irq_save ( flags ) ;
trace_note ( bt , 0 , BLK_TN_TIMESTAMP , words , sizeof ( words ) ) ;
local_irq_restore ( flags ) ;
2006-03-23 20:00:26 +01:00
}
static int act_log_check ( struct blk_trace * bt , u32 what , sector_t sector ,
pid_t pid )
{
if ( ( ( bt - > act_mask < < BLK_TC_SHIFT ) & what ) = = 0 )
return 1 ;
if ( sector < bt - > start_lba | | sector > bt - > end_lba )
return 1 ;
if ( bt - > pid & & pid ! = bt - > pid )
return 1 ;
return 0 ;
}
/*
* Data direction bit lookup
*/
static u32 ddir_act [ 2 ] __read_mostly = { BLK_TC_ACT ( BLK_TC_READ ) , BLK_TC_ACT ( BLK_TC_WRITE ) } ;
/*
* Bio action bits of interest
*/
2006-07-23 02:12:01 +02:00
static u32 bio_act [ 9 ] __read_mostly = { 0 , BLK_TC_ACT ( BLK_TC_BARRIER ) , BLK_TC_ACT ( BLK_TC_SYNC ) , 0 , BLK_TC_ACT ( BLK_TC_AHEAD ) , 0 , 0 , 0 , BLK_TC_ACT ( BLK_TC_META ) } ;
2006-03-23 20:00:26 +01:00
/*
* More could be added as needed , taking care to increment the decrementer
* to get correct indexing
*/
# define trace_barrier_bit(rw) \
( ( ( rw ) & ( 1 < < BIO_RW_BARRIER ) ) > > ( BIO_RW_BARRIER - 0 ) )
# define trace_sync_bit(rw) \
( ( ( rw ) & ( 1 < < BIO_RW_SYNC ) ) > > ( BIO_RW_SYNC - 1 ) )
2006-07-06 10:03:28 +02:00
# define trace_ahead_bit(rw) \
2006-07-25 15:04:13 +02:00
( ( ( rw ) & ( 1 < < BIO_RW_AHEAD ) ) < < ( 2 - BIO_RW_AHEAD ) )
2006-07-23 02:12:01 +02:00
# define trace_meta_bit(rw) \
( ( ( rw ) & ( 1 < < BIO_RW_META ) ) > > ( BIO_RW_META - 3 ) )
2006-03-23 20:00:26 +01:00
/*
* The worker for the various blk_add_trace * ( ) types . Fills out a
* blk_io_trace structure and places it in a per - cpu subbuffer .
*/
void __blk_add_trace ( struct blk_trace * bt , sector_t sector , int bytes ,
int rw , u32 what , int error , int pdu_len , void * pdu_data )
{
struct task_struct * tsk = current ;
struct blk_io_trace * t ;
unsigned long flags ;
unsigned long * sequence ;
pid_t pid ;
int cpu ;
if ( unlikely ( bt - > trace_state ! = Blktrace_running ) )
return ;
what | = ddir_act [ rw & WRITE ] ;
what | = bio_act [ trace_barrier_bit ( rw ) ] ;
what | = bio_act [ trace_sync_bit ( rw ) ] ;
2006-07-06 10:03:28 +02:00
what | = bio_act [ trace_ahead_bit ( rw ) ] ;
2006-07-23 02:12:01 +02:00
what | = bio_act [ trace_meta_bit ( rw ) ] ;
2006-03-23 20:00:26 +01:00
pid = tsk - > pid ;
if ( unlikely ( act_log_check ( bt , what , sector , pid ) ) )
return ;
/*
* A word about the locking here - we disable interrupts to reserve
* some space in the relay per - cpu buffer , to prevent an irq
* from coming in and stepping on our toes . Once reserved , it ' s
* enough to get preemption disabled to prevent read of this data
* before we are through filling it . get_cpu ( ) / put_cpu ( ) does this
* for us
*/
local_irq_save ( flags ) ;
if ( unlikely ( tsk - > btrace_seq ! = blktrace_seq ) )
trace_note_tsk ( bt , tsk ) ;
t = relay_reserve ( bt - > rchan , sizeof ( * t ) + pdu_len ) ;
if ( t ) {
cpu = smp_processor_id ( ) ;
sequence = per_cpu_ptr ( bt - > sequence , cpu ) ;
t - > magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION ;
t - > sequence = + + ( * sequence ) ;
2008-01-11 13:35:54 +01:00
t - > time = ktime_to_ns ( ktime_get ( ) ) ;
2006-03-23 20:00:26 +01:00
t - > sector = sector ;
t - > bytes = bytes ;
t - > action = what ;
t - > pid = pid ;
t - > device = bt - > dev ;
t - > cpu = cpu ;
t - > error = error ;
t - > pdu_len = pdu_len ;
if ( pdu_len )
memcpy ( ( void * ) t + sizeof ( * t ) , pdu_data , pdu_len ) ;
}
local_irq_restore ( flags ) ;
}
EXPORT_SYMBOL_GPL ( __blk_add_trace ) ;
static struct dentry * blk_tree_root ;
2008-01-11 13:37:01 +01:00
static DEFINE_MUTEX ( blk_tree_mutex ) ;
2006-03-23 20:00:26 +01:00
static unsigned int root_users ;
static inline void blk_remove_root ( void )
{
if ( blk_tree_root ) {
debugfs_remove ( blk_tree_root ) ;
blk_tree_root = NULL ;
}
}
static void blk_remove_tree ( struct dentry * dir )
{
mutex_lock ( & blk_tree_mutex ) ;
debugfs_remove ( dir ) ;
if ( - - root_users = = 0 )
blk_remove_root ( ) ;
mutex_unlock ( & blk_tree_mutex ) ;
}
static struct dentry * blk_create_tree ( const char * blk_name )
{
struct dentry * dir = NULL ;
2007-11-21 12:25:41 +01:00
int created = 0 ;
2006-03-23 20:00:26 +01:00
mutex_lock ( & blk_tree_mutex ) ;
if ( ! blk_tree_root ) {
blk_tree_root = debugfs_create_dir ( " block " , NULL ) ;
if ( ! blk_tree_root )
goto err ;
2007-11-21 12:25:41 +01:00
created = 1 ;
2006-03-23 20:00:26 +01:00
}
dir = debugfs_create_dir ( blk_name , blk_tree_root ) ;
if ( dir )
root_users + + ;
2007-11-21 12:25:41 +01:00
else {
/* Delete root only if we created it */
if ( created )
blk_remove_root ( ) ;
}
2006-03-23 20:00:26 +01:00
err :
mutex_unlock ( & blk_tree_mutex ) ;
return dir ;
}
static void blk_trace_cleanup ( struct blk_trace * bt )
{
relay_close ( bt - > rchan ) ;
debugfs_remove ( bt - > dropped_file ) ;
blk_remove_tree ( bt - > dir ) ;
free_percpu ( bt - > sequence ) ;
kfree ( bt ) ;
}
2008-01-11 10:09:43 +01:00
int blk_trace_remove ( struct request_queue * q )
2006-03-23 20:00:26 +01:00
{
struct blk_trace * bt ;
bt = xchg ( & q - > blk_trace , NULL ) ;
if ( ! bt )
return - EINVAL ;
if ( bt - > trace_state = = Blktrace_setup | |
bt - > trace_state = = Blktrace_stopped )
blk_trace_cleanup ( bt ) ;
return 0 ;
}
2008-01-11 10:09:43 +01:00
EXPORT_SYMBOL_GPL ( blk_trace_remove ) ;
2006-03-23 20:00:26 +01:00
static int blk_dropped_open ( struct inode * inode , struct file * filp )
{
2006-09-27 01:50:46 -07:00
filp - > private_data = inode - > i_private ;
2006-03-23 20:00:26 +01:00
return 0 ;
}
static ssize_t blk_dropped_read ( struct file * filp , char __user * buffer ,
size_t count , loff_t * ppos )
{
struct blk_trace * bt = filp - > private_data ;
char buf [ 16 ] ;
snprintf ( buf , sizeof ( buf ) , " %u \n " , atomic_read ( & bt - > dropped ) ) ;
return simple_read_from_buffer ( buffer , count , ppos , buf , strlen ( buf ) ) ;
}
2007-02-12 00:55:32 -08:00
static const struct file_operations blk_dropped_fops = {
2006-03-23 20:00:26 +01:00
. owner = THIS_MODULE ,
. open = blk_dropped_open ,
. read = blk_dropped_read ,
} ;
/*
* Keep track of how many times we encountered a full subbuffer , to aid
* the user space app in telling how many lost events there were .
*/
static int blk_subbuf_start_callback ( struct rchan_buf * buf , void * subbuf ,
void * prev_subbuf , size_t prev_padding )
{
struct blk_trace * bt ;
if ( ! relay_buf_full ( buf ) )
return 1 ;
bt = buf - > chan - > private_data ;
atomic_inc ( & bt - > dropped ) ;
return 0 ;
}
static int blk_remove_buf_file_callback ( struct dentry * dentry )
{
debugfs_remove ( dentry ) ;
return 0 ;
}
static struct dentry * blk_create_buf_file_callback ( const char * filename ,
struct dentry * parent ,
int mode ,
struct rchan_buf * buf ,
int * is_global )
{
return debugfs_create_file ( filename , mode , parent , buf ,
& relay_file_operations ) ;
}
static struct rchan_callbacks blk_relay_callbacks = {
. subbuf_start = blk_subbuf_start_callback ,
. create_buf_file = blk_create_buf_file_callback ,
. remove_buf_file = blk_remove_buf_file_callback ,
} ;
/*
* Setup everything required to start tracing
*/
2008-01-11 10:09:43 +01:00
int do_blk_trace_setup ( struct request_queue * q , char * name , dev_t dev ,
2007-10-09 13:23:53 +02:00
struct blk_user_trace_setup * buts )
2006-03-23 20:00:26 +01:00
{
struct blk_trace * old_bt , * bt = NULL ;
struct dentry * dir = NULL ;
int ret , i ;
2007-10-09 13:23:53 +02:00
if ( ! buts - > buf_size | | ! buts - > buf_nr )
2006-03-23 20:00:26 +01:00
return - EINVAL ;
2008-01-11 10:09:43 +01:00
strcpy ( buts - > name , name ) ;
2006-03-23 20:00:26 +01:00
/*
* some device names have larger paths - convert the slashes
* to underscores for this to work as expected
*/
2007-10-09 13:23:53 +02:00
for ( i = 0 ; i < strlen ( buts - > name ) ; i + + )
if ( buts - > name [ i ] = = ' / ' )
buts - > name [ i ] = ' _ ' ;
2006-03-23 20:00:26 +01:00
ret = - ENOMEM ;
bt = kzalloc ( sizeof ( * bt ) , GFP_KERNEL ) ;
if ( ! bt )
goto err ;
bt - > sequence = alloc_percpu ( unsigned long ) ;
if ( ! bt - > sequence )
goto err ;
ret = - ENOENT ;
2007-10-09 13:23:53 +02:00
dir = blk_create_tree ( buts - > name ) ;
2006-03-23 20:00:26 +01:00
if ( ! dir )
goto err ;
bt - > dir = dir ;
2008-01-11 10:09:43 +01:00
bt - > dev = dev ;
2006-03-23 20:00:26 +01:00
atomic_set ( & bt - > dropped , 0 ) ;
ret = - EIO ;
bt - > dropped_file = debugfs_create_file ( " dropped " , 0444 , dir , bt , & blk_dropped_fops ) ;
if ( ! bt - > dropped_file )
goto err ;
2007-10-09 13:23:53 +02:00
bt - > rchan = relay_open ( " trace " , dir , buts - > buf_size ,
buts - > buf_nr , & blk_relay_callbacks , bt ) ;
2006-03-23 20:00:26 +01:00
if ( ! bt - > rchan )
goto err ;
2007-10-09 13:23:53 +02:00
bt - > act_mask = buts - > act_mask ;
2006-03-23 20:00:26 +01:00
if ( ! bt - > act_mask )
bt - > act_mask = ( u16 ) - 1 ;
2007-10-09 13:23:53 +02:00
bt - > start_lba = buts - > start_lba ;
bt - > end_lba = buts - > end_lba ;
2006-03-23 20:00:26 +01:00
if ( ! bt - > end_lba )
bt - > end_lba = - 1ULL ;
2007-10-09 13:23:53 +02:00
bt - > pid = buts - > pid ;
2006-03-23 20:00:26 +01:00
bt - > trace_state = Blktrace_setup ;
ret = - EBUSY ;
old_bt = xchg ( & q - > blk_trace , bt ) ;
if ( old_bt ) {
( void ) xchg ( & q - > blk_trace , old_bt ) ;
goto err ;
}
return 0 ;
err :
if ( dir )
blk_remove_tree ( dir ) ;
if ( bt ) {
if ( bt - > dropped_file )
debugfs_remove ( bt - > dropped_file ) ;
2006-12-06 20:32:37 -08:00
free_percpu ( bt - > sequence ) ;
2006-03-23 20:00:26 +01:00
if ( bt - > rchan )
relay_close ( bt - > rchan ) ;
kfree ( bt ) ;
}
return ret ;
}
2007-10-09 13:23:53 +02:00
2008-01-11 10:09:43 +01:00
int blk_trace_setup ( struct request_queue * q , char * name , dev_t dev ,
char __user * arg )
2007-10-09 13:23:53 +02:00
{
struct blk_user_trace_setup buts ;
int ret ;
ret = copy_from_user ( & buts , arg , sizeof ( buts ) ) ;
if ( ret )
return - EFAULT ;
2008-01-11 10:09:43 +01:00
ret = do_blk_trace_setup ( q , name , dev , & buts ) ;
2007-10-09 13:23:53 +02:00
if ( ret )
return ret ;
if ( copy_to_user ( arg , & buts , sizeof ( buts ) ) )
return - EFAULT ;
return 0 ;
}
2008-01-11 10:09:43 +01:00
EXPORT_SYMBOL_GPL ( blk_trace_setup ) ;
2006-03-23 20:00:26 +01:00
2008-01-11 10:09:43 +01:00
int blk_trace_startstop ( struct request_queue * q , int start )
2006-03-23 20:00:26 +01:00
{
struct blk_trace * bt ;
int ret ;
if ( ( bt = q - > blk_trace ) = = NULL )
return - EINVAL ;
/*
* For starting a trace , we can transition from a setup or stopped
* trace . For stopping a trace , the state must be running
*/
ret = - EINVAL ;
if ( start ) {
if ( bt - > trace_state = = Blktrace_setup | |
bt - > trace_state = = Blktrace_stopped ) {
blktrace_seq + + ;
smp_mb ( ) ;
bt - > trace_state = Blktrace_running ;
2006-12-01 10:39:12 +01:00
trace_note_time ( bt ) ;
2006-03-23 20:00:26 +01:00
ret = 0 ;
}
} else {
if ( bt - > trace_state = = Blktrace_running ) {
bt - > trace_state = Blktrace_stopped ;
relay_flush ( bt - > rchan ) ;
ret = 0 ;
}
}
return ret ;
}
2008-01-11 10:09:43 +01:00
EXPORT_SYMBOL_GPL ( blk_trace_startstop ) ;
2006-03-23 20:00:26 +01:00
/**
* blk_trace_ioctl : - handle the ioctls associated with tracing
* @ bdev : the block device
* @ cmd : the ioctl cmd
* @ arg : the argument data , if any
*
* */
int blk_trace_ioctl ( struct block_device * bdev , unsigned cmd , char __user * arg )
{
2007-07-24 09:28:11 +02:00
struct request_queue * q ;
2006-03-23 20:00:26 +01:00
int ret , start = 0 ;
2008-01-11 10:09:43 +01:00
char b [ BDEVNAME_SIZE ] ;
2006-03-23 20:00:26 +01:00
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
mutex_lock ( & bdev - > bd_mutex ) ;
switch ( cmd ) {
case BLKTRACESETUP :
2008-05-12 14:02:33 -07:00
bdevname ( bdev , b ) ;
2008-01-11 10:09:43 +01:00
ret = blk_trace_setup ( q , b , bdev - > bd_dev , arg ) ;
2006-03-23 20:00:26 +01:00
break ;
case BLKTRACESTART :
start = 1 ;
case BLKTRACESTOP :
ret = blk_trace_startstop ( q , start ) ;
break ;
case BLKTRACETEARDOWN :
ret = blk_trace_remove ( q ) ;
break ;
default :
ret = - ENOTTY ;
break ;
}
mutex_unlock ( & bdev - > bd_mutex ) ;
return ret ;
}
/**
* blk_trace_shutdown : - stop and cleanup trace structures
* @ q : the request queue associated with the device
*
* */
2007-07-24 09:28:11 +02:00
void blk_trace_shutdown ( struct request_queue * q )
2006-03-23 20:00:26 +01:00
{
2006-09-29 01:59:40 -07:00
if ( q - > blk_trace ) {
blk_trace_startstop ( q , 0 ) ;
blk_trace_remove ( q ) ;
}
2006-03-23 20:00:26 +01:00
}