2013-10-25 14:52:25 +04:00
# include <linux/module.h>
2013-12-21 03:11:01 +04:00
2013-10-25 14:52:25 +04:00
# include <linux/moduleparam.h>
# include <linux/sched.h>
# include <linux/fs.h>
# include <linux/blkdev.h>
# include <linux/init.h>
# include <linux/slab.h>
# include <linux/blk-mq.h>
# include <linux/hrtimer.h>
struct nullb_cmd {
struct list_head list ;
struct llist_node ll_list ;
struct call_single_data csd ;
struct request * rq ;
struct bio * bio ;
unsigned int tag ;
struct nullb_queue * nq ;
} ;
struct nullb_queue {
unsigned long * tag_map ;
wait_queue_head_t wait ;
unsigned int queue_depth ;
struct nullb_cmd * cmds ;
} ;
struct nullb {
struct list_head list ;
unsigned int index ;
struct request_queue * q ;
struct gendisk * disk ;
struct hrtimer timer ;
unsigned int queue_depth ;
spinlock_t lock ;
struct nullb_queue * queues ;
unsigned int nr_queues ;
} ;
static LIST_HEAD ( nullb_list ) ;
static struct mutex lock ;
static int null_major ;
static int nullb_indexes ;
struct completion_queue {
struct llist_head list ;
struct hrtimer timer ;
} ;
/*
* These are per - cpu for now , they will need to be configured by the
* complete_queues parameter and appropriately mapped .
*/
static DEFINE_PER_CPU ( struct completion_queue , completion_queues ) ;
enum {
NULL_IRQ_NONE = 0 ,
NULL_IRQ_SOFTIRQ = 1 ,
NULL_IRQ_TIMER = 2 ,
2014-02-10 15:24:40 +04:00
} ;
2013-10-25 14:52:25 +04:00
2014-02-10 15:24:40 +04:00
enum {
2013-10-25 14:52:25 +04:00
NULL_Q_BIO = 0 ,
NULL_Q_RQ = 1 ,
NULL_Q_MQ = 2 ,
} ;
2013-12-18 16:41:43 +04:00
static int submit_queues ;
2013-10-25 14:52:25 +04:00
module_param ( submit_queues , int , S_IRUGO ) ;
MODULE_PARM_DESC ( submit_queues , " Number of submission queues " ) ;
static int home_node = NUMA_NO_NODE ;
module_param ( home_node , int , S_IRUGO ) ;
MODULE_PARM_DESC ( home_node , " Home node for the device " ) ;
static int queue_mode = NULL_Q_MQ ;
module_param ( queue_mode , int , S_IRUGO ) ;
MODULE_PARM_DESC ( use_mq , " Use blk-mq interface (0=bio,1=rq,2=multiqueue) " ) ;
static int gb = 250 ;
module_param ( gb , int , S_IRUGO ) ;
MODULE_PARM_DESC ( gb , " Size in GB " ) ;
static int bs = 512 ;
module_param ( bs , int , S_IRUGO ) ;
MODULE_PARM_DESC ( bs , " Block size (in bytes) " ) ;
static int nr_devices = 2 ;
module_param ( nr_devices , int , S_IRUGO ) ;
MODULE_PARM_DESC ( nr_devices , " Number of devices to register " ) ;
static int irqmode = NULL_IRQ_SOFTIRQ ;
module_param ( irqmode , int , S_IRUGO ) ;
MODULE_PARM_DESC ( irqmode , " IRQ completion handler. 0-none, 1-softirq, 2-timer " ) ;
static int completion_nsec = 10000 ;
module_param ( completion_nsec , int , S_IRUGO ) ;
MODULE_PARM_DESC ( completion_nsec , " Time in ns to complete a request in hardware. Default: 10,000ns " ) ;
static int hw_queue_depth = 64 ;
module_param ( hw_queue_depth , int , S_IRUGO ) ;
MODULE_PARM_DESC ( hw_queue_depth , " Queue depth for each hardware queue. Default: 64 " ) ;
2013-12-21 03:11:00 +04:00
static bool use_per_node_hctx = false ;
2013-10-25 14:52:25 +04:00
module_param ( use_per_node_hctx , bool , S_IRUGO ) ;
2013-12-21 03:11:00 +04:00
MODULE_PARM_DESC ( use_per_node_hctx , " Use per-node allocation for hardware context queues. Default: false " ) ;
2013-10-25 14:52:25 +04:00
static void put_tag ( struct nullb_queue * nq , unsigned int tag )
{
clear_bit_unlock ( tag , nq - > tag_map ) ;
if ( waitqueue_active ( & nq - > wait ) )
wake_up ( & nq - > wait ) ;
}
static unsigned int get_tag ( struct nullb_queue * nq )
{
unsigned int tag ;
do {
tag = find_first_zero_bit ( nq - > tag_map , nq - > queue_depth ) ;
if ( tag > = nq - > queue_depth )
return - 1U ;
} while ( test_and_set_bit_lock ( tag , nq - > tag_map ) ) ;
return tag ;
}
static void free_cmd ( struct nullb_cmd * cmd )
{
put_tag ( cmd - > nq , cmd - > tag ) ;
}
static struct nullb_cmd * __alloc_cmd ( struct nullb_queue * nq )
{
struct nullb_cmd * cmd ;
unsigned int tag ;
tag = get_tag ( nq ) ;
if ( tag ! = - 1U ) {
cmd = & nq - > cmds [ tag ] ;
cmd - > tag = tag ;
cmd - > nq = nq ;
return cmd ;
}
return NULL ;
}
static struct nullb_cmd * alloc_cmd ( struct nullb_queue * nq , int can_wait )
{
struct nullb_cmd * cmd ;
DEFINE_WAIT ( wait ) ;
cmd = __alloc_cmd ( nq ) ;
if ( cmd | | ! can_wait )
return cmd ;
do {
prepare_to_wait ( & nq - > wait , & wait , TASK_UNINTERRUPTIBLE ) ;
cmd = __alloc_cmd ( nq ) ;
if ( cmd )
break ;
io_schedule ( ) ;
} while ( 1 ) ;
finish_wait ( & nq - > wait , & wait ) ;
return cmd ;
}
static void end_cmd ( struct nullb_cmd * cmd )
{
2014-02-10 15:24:40 +04:00
switch ( queue_mode ) {
case NULL_Q_MQ :
blk_mq_end_io ( cmd - > rq , 0 ) ;
return ;
case NULL_Q_RQ :
INIT_LIST_HEAD ( & cmd - > rq - > queuelist ) ;
blk_end_request_all ( cmd - > rq , 0 ) ;
break ;
case NULL_Q_BIO :
2013-10-25 14:52:25 +04:00
bio_endio ( cmd - > bio , 0 ) ;
2014-02-10 15:24:40 +04:00
break ;
}
2013-10-25 14:52:25 +04:00
2014-02-10 15:24:40 +04:00
free_cmd ( cmd ) ;
2013-10-25 14:52:25 +04:00
}
static enum hrtimer_restart null_cmd_timer_expired ( struct hrtimer * timer )
{
struct completion_queue * cq ;
struct llist_node * entry ;
struct nullb_cmd * cmd ;
cq = & per_cpu ( completion_queues , smp_processor_id ( ) ) ;
while ( ( entry = llist_del_all ( & cq - > list ) ) ! = NULL ) {
2014-02-06 20:33:17 +04:00
entry = llist_reverse_order ( entry ) ;
2013-10-25 14:52:25 +04:00
do {
cmd = container_of ( entry , struct nullb_cmd , ll_list ) ;
end_cmd ( cmd ) ;
entry = entry - > next ;
} while ( entry ) ;
}
return HRTIMER_NORESTART ;
}
static void null_cmd_end_timer ( struct nullb_cmd * cmd )
{
struct completion_queue * cq = & per_cpu ( completion_queues , get_cpu ( ) ) ;
cmd - > ll_list . next = NULL ;
if ( llist_add ( & cmd - > ll_list , & cq - > list ) ) {
ktime_t kt = ktime_set ( 0 , completion_nsec ) ;
hrtimer_start ( & cq - > timer , kt , HRTIMER_MODE_REL ) ;
}
put_cpu ( ) ;
}
static void null_softirq_done_fn ( struct request * rq )
{
2014-02-10 15:24:40 +04:00
end_cmd ( rq - > special ) ;
2013-10-25 14:52:25 +04:00
}
static inline void null_handle_cmd ( struct nullb_cmd * cmd )
{
/* Complete IO by inline, softirq or timer */
switch ( irqmode ) {
case NULL_IRQ_SOFTIRQ :
2014-02-10 15:24:40 +04:00
switch ( queue_mode ) {
case NULL_Q_MQ :
blk_mq_complete_request ( cmd - > rq ) ;
break ;
case NULL_Q_RQ :
blk_complete_request ( cmd - > rq ) ;
break ;
case NULL_Q_BIO :
/*
* XXX : no proper submitting cpu information available .
*/
end_cmd ( cmd ) ;
break ;
}
break ;
case NULL_IRQ_NONE :
2013-10-25 14:52:25 +04:00
end_cmd ( cmd ) ;
break ;
case NULL_IRQ_TIMER :
null_cmd_end_timer ( cmd ) ;
break ;
}
}
static struct nullb_queue * nullb_to_queue ( struct nullb * nullb )
{
int index = 0 ;
if ( nullb - > nr_queues ! = 1 )
index = raw_smp_processor_id ( ) / ( ( nr_cpu_ids + nullb - > nr_queues - 1 ) / nullb - > nr_queues ) ;
return & nullb - > queues [ index ] ;
}
static void null_queue_bio ( struct request_queue * q , struct bio * bio )
{
struct nullb * nullb = q - > queuedata ;
struct nullb_queue * nq = nullb_to_queue ( nullb ) ;
struct nullb_cmd * cmd ;
cmd = alloc_cmd ( nq , 1 ) ;
cmd - > bio = bio ;
null_handle_cmd ( cmd ) ;
}
static int null_rq_prep_fn ( struct request_queue * q , struct request * req )
{
struct nullb * nullb = q - > queuedata ;
struct nullb_queue * nq = nullb_to_queue ( nullb ) ;
struct nullb_cmd * cmd ;
cmd = alloc_cmd ( nq , 0 ) ;
if ( cmd ) {
cmd - > rq = req ;
req - > special = cmd ;
return BLKPREP_OK ;
}
return BLKPREP_DEFER ;
}
static void null_request_fn ( struct request_queue * q )
{
struct request * rq ;
while ( ( rq = blk_fetch_request ( q ) ) ! = NULL ) {
struct nullb_cmd * cmd = rq - > special ;
spin_unlock_irq ( q - > queue_lock ) ;
null_handle_cmd ( cmd ) ;
spin_lock_irq ( q - > queue_lock ) ;
}
}
static int null_queue_rq ( struct blk_mq_hw_ctx * hctx , struct request * rq )
{
struct nullb_cmd * cmd = rq - > special ;
cmd - > rq = rq ;
cmd - > nq = hctx - > driver_data ;
null_handle_cmd ( cmd ) ;
return BLK_MQ_RQ_QUEUE_OK ;
}
static struct blk_mq_hw_ctx * null_alloc_hctx ( struct blk_mq_reg * reg , unsigned int hctx_index )
{
2013-12-21 03:11:01 +04:00
int b_size = DIV_ROUND_UP ( reg - > nr_hw_queues , nr_online_nodes ) ;
int tip = ( reg - > nr_hw_queues % nr_online_nodes ) ;
int node = 0 , i , n ;
/*
* Split submit queues evenly wrt to the number of nodes . If uneven ,
* fill the first buckets with one extra , until the rest is filled with
* no extra .
*/
for ( i = 0 , n = 1 ; i < hctx_index ; i + + , n + + ) {
if ( n % b_size = = 0 ) {
n = 0 ;
node + + ;
tip - - ;
if ( ! tip )
b_size = reg - > nr_hw_queues / nr_online_nodes ;
}
}
/*
* A node might not be online , therefore map the relative node id to the
* real node id .
*/
for_each_online_node ( n ) {
if ( ! node )
break ;
node - - ;
}
return kzalloc_node ( sizeof ( struct blk_mq_hw_ctx ) , GFP_KERNEL , n ) ;
2013-10-25 14:52:25 +04:00
}
static void null_free_hctx ( struct blk_mq_hw_ctx * hctx , unsigned int hctx_index )
{
kfree ( hctx ) ;
}
2013-12-18 16:41:43 +04:00
static void null_init_queue ( struct nullb * nullb , struct nullb_queue * nq )
{
BUG_ON ( ! nullb ) ;
BUG_ON ( ! nq ) ;
init_waitqueue_head ( & nq - > wait ) ;
nq - > queue_depth = nullb - > queue_depth ;
}
2013-10-25 14:52:25 +04:00
static int null_init_hctx ( struct blk_mq_hw_ctx * hctx , void * data ,
unsigned int index )
{
struct nullb * nullb = data ;
struct nullb_queue * nq = & nullb - > queues [ index ] ;
hctx - > driver_data = nq ;
2013-12-18 16:41:43 +04:00
null_init_queue ( nullb , nq ) ;
nullb - > nr_queues + + ;
2013-10-25 14:52:25 +04:00
return 0 ;
}
static struct blk_mq_ops null_mq_ops = {
. queue_rq = null_queue_rq ,
. map_queue = blk_mq_map_queue ,
. init_hctx = null_init_hctx ,
2014-02-10 15:24:40 +04:00
. complete = null_softirq_done_fn ,
2013-10-25 14:52:25 +04:00
} ;
static struct blk_mq_reg null_mq_reg = {
. ops = & null_mq_ops ,
. queue_depth = 64 ,
. cmd_size = sizeof ( struct nullb_cmd ) ,
. flags = BLK_MQ_F_SHOULD_MERGE ,
} ;
static void null_del_dev ( struct nullb * nullb )
{
list_del_init ( & nullb - > list ) ;
del_gendisk ( nullb - > disk ) ;
2013-12-26 17:31:37 +04:00
blk_cleanup_queue ( nullb - > q ) ;
2013-10-25 14:52:25 +04:00
put_disk ( nullb - > disk ) ;
kfree ( nullb ) ;
}
static int null_open ( struct block_device * bdev , fmode_t mode )
{
return 0 ;
}
static void null_release ( struct gendisk * disk , fmode_t mode )
{
}
static const struct block_device_operations null_fops = {
. owner = THIS_MODULE ,
. open = null_open ,
. release = null_release ,
} ;
static int setup_commands ( struct nullb_queue * nq )
{
struct nullb_cmd * cmd ;
int i , tag_size ;
nq - > cmds = kzalloc ( nq - > queue_depth * sizeof ( * cmd ) , GFP_KERNEL ) ;
if ( ! nq - > cmds )
2013-12-18 16:41:43 +04:00
return - ENOMEM ;
2013-10-25 14:52:25 +04:00
tag_size = ALIGN ( nq - > queue_depth , BITS_PER_LONG ) / BITS_PER_LONG ;
nq - > tag_map = kzalloc ( tag_size * sizeof ( unsigned long ) , GFP_KERNEL ) ;
if ( ! nq - > tag_map ) {
kfree ( nq - > cmds ) ;
2013-12-18 16:41:43 +04:00
return - ENOMEM ;
2013-10-25 14:52:25 +04:00
}
for ( i = 0 ; i < nq - > queue_depth ; i + + ) {
cmd = & nq - > cmds [ i ] ;
INIT_LIST_HEAD ( & cmd - > list ) ;
cmd - > ll_list . next = NULL ;
cmd - > tag = - 1U ;
}
return 0 ;
}
static void cleanup_queue ( struct nullb_queue * nq )
{
kfree ( nq - > tag_map ) ;
kfree ( nq - > cmds ) ;
}
static void cleanup_queues ( struct nullb * nullb )
{
int i ;
for ( i = 0 ; i < nullb - > nr_queues ; i + + )
cleanup_queue ( & nullb - > queues [ i ] ) ;
kfree ( nullb - > queues ) ;
}
static int setup_queues ( struct nullb * nullb )
{
2013-12-18 16:41:43 +04:00
nullb - > queues = kzalloc ( submit_queues * sizeof ( struct nullb_queue ) ,
GFP_KERNEL ) ;
2013-10-25 14:52:25 +04:00
if ( ! nullb - > queues )
2013-12-18 16:41:43 +04:00
return - ENOMEM ;
2013-10-25 14:52:25 +04:00
nullb - > nr_queues = 0 ;
nullb - > queue_depth = hw_queue_depth ;
2013-12-18 16:41:43 +04:00
return 0 ;
}
static int init_driver_queues ( struct nullb * nullb )
{
struct nullb_queue * nq ;
int i , ret = 0 ;
2013-10-25 14:52:25 +04:00
for ( i = 0 ; i < submit_queues ; i + + ) {
nq = & nullb - > queues [ i ] ;
2013-12-18 16:41:43 +04:00
null_init_queue ( nullb , nq ) ;
ret = setup_commands ( nq ) ;
if ( ret )
goto err_queue ;
2013-10-25 14:52:25 +04:00
nullb - > nr_queues + + ;
}
2013-12-18 16:41:43 +04:00
return 0 ;
err_queue :
2013-10-25 14:52:25 +04:00
cleanup_queues ( nullb ) ;
2013-12-18 16:41:43 +04:00
return ret ;
2013-10-25 14:52:25 +04:00
}
static int null_add_dev ( void )
{
struct gendisk * disk ;
struct nullb * nullb ;
sector_t size ;
nullb = kzalloc_node ( sizeof ( * nullb ) , GFP_KERNEL , home_node ) ;
if ( ! nullb )
return - ENOMEM ;
spin_lock_init ( & nullb - > lock ) ;
2013-12-10 19:50:38 +04:00
if ( queue_mode = = NULL_Q_MQ & & use_per_node_hctx )
submit_queues = nr_online_nodes ;
2013-10-25 14:52:25 +04:00
if ( setup_queues ( nullb ) )
goto err ;
if ( queue_mode = = NULL_Q_MQ ) {
null_mq_reg . numa_node = home_node ;
null_mq_reg . queue_depth = hw_queue_depth ;
2013-12-10 19:50:38 +04:00
null_mq_reg . nr_hw_queues = submit_queues ;
2013-10-25 14:52:25 +04:00
if ( use_per_node_hctx ) {
null_mq_reg . ops - > alloc_hctx = null_alloc_hctx ;
null_mq_reg . ops - > free_hctx = null_free_hctx ;
} else {
null_mq_reg . ops - > alloc_hctx = blk_mq_alloc_single_hw_queue ;
null_mq_reg . ops - > free_hctx = blk_mq_free_single_hw_queue ;
}
nullb - > q = blk_mq_init_queue ( & null_mq_reg , nullb ) ;
} else if ( queue_mode = = NULL_Q_BIO ) {
nullb - > q = blk_alloc_queue_node ( GFP_KERNEL , home_node ) ;
blk_queue_make_request ( nullb - > q , null_queue_bio ) ;
2013-12-18 16:41:43 +04:00
init_driver_queues ( nullb ) ;
2013-10-25 14:52:25 +04:00
} else {
nullb - > q = blk_init_queue_node ( null_request_fn , & nullb - > lock , home_node ) ;
blk_queue_prep_rq ( nullb - > q , null_rq_prep_fn ) ;
if ( nullb - > q )
blk_queue_softirq_done ( nullb - > q , null_softirq_done_fn ) ;
2013-12-18 16:41:43 +04:00
init_driver_queues ( nullb ) ;
2013-10-25 14:52:25 +04:00
}
if ( ! nullb - > q )
goto queue_fail ;
nullb - > q - > queuedata = nullb ;
queue_flag_set_unlocked ( QUEUE_FLAG_NONROT , nullb - > q ) ;
disk = nullb - > disk = alloc_disk_node ( 1 , home_node ) ;
if ( ! disk ) {
queue_fail :
2013-12-26 17:31:37 +04:00
blk_cleanup_queue ( nullb - > q ) ;
2013-10-25 14:52:25 +04:00
cleanup_queues ( nullb ) ;
err :
kfree ( nullb ) ;
return - ENOMEM ;
}
mutex_lock ( & lock ) ;
list_add_tail ( & nullb - > list , & nullb_list ) ;
nullb - > index = nullb_indexes + + ;
mutex_unlock ( & lock ) ;
blk_queue_logical_block_size ( nullb - > q , bs ) ;
blk_queue_physical_block_size ( nullb - > q , bs ) ;
size = gb * 1024 * 1024 * 1024ULL ;
sector_div ( size , bs ) ;
set_capacity ( disk , size ) ;
disk - > flags | = GENHD_FL_EXT_DEVT ;
disk - > major = null_major ;
disk - > first_minor = nullb - > index ;
disk - > fops = & null_fops ;
disk - > private_data = nullb ;
disk - > queue = nullb - > q ;
sprintf ( disk - > disk_name , " nullb%d " , nullb - > index ) ;
add_disk ( disk ) ;
return 0 ;
}
static int __init null_init ( void )
{
unsigned int i ;
2014-01-21 15:29:59 +04:00
if ( bs > PAGE_SIZE ) {
pr_warn ( " null_blk: invalid block size \n " ) ;
pr_warn ( " null_blk: defaults block size to %lu \n " , PAGE_SIZE ) ;
bs = PAGE_SIZE ;
}
2013-10-25 14:52:25 +04:00
2013-12-18 16:41:44 +04:00
if ( queue_mode = = NULL_Q_MQ & & use_per_node_hctx ) {
2013-12-21 03:11:01 +04:00
if ( submit_queues < nr_online_nodes ) {
2013-12-18 16:41:44 +04:00
pr_warn ( " null_blk: submit_queues param is set to %u. " ,
nr_online_nodes ) ;
2013-12-21 03:11:01 +04:00
submit_queues = nr_online_nodes ;
}
2013-12-18 16:41:44 +04:00
} else if ( submit_queues > nr_cpu_ids )
2013-10-25 14:52:25 +04:00
submit_queues = nr_cpu_ids ;
else if ( ! submit_queues )
submit_queues = 1 ;
mutex_init ( & lock ) ;
/* Initialize a separate list for each CPU for issuing softirqs */
for_each_possible_cpu ( i ) {
struct completion_queue * cq = & per_cpu ( completion_queues , i ) ;
init_llist_head ( & cq - > list ) ;
if ( irqmode ! = NULL_IRQ_TIMER )
continue ;
hrtimer_init ( & cq - > timer , CLOCK_MONOTONIC , HRTIMER_MODE_REL ) ;
cq - > timer . function = null_cmd_timer_expired ;
}
null_major = register_blkdev ( 0 , " nullb " ) ;
if ( null_major < 0 )
return null_major ;
for ( i = 0 ; i < nr_devices ; i + + ) {
if ( null_add_dev ( ) ) {
unregister_blkdev ( null_major , " nullb " ) ;
return - EINVAL ;
}
}
pr_info ( " null: module loaded \n " ) ;
return 0 ;
}
static void __exit null_exit ( void )
{
struct nullb * nullb ;
unregister_blkdev ( null_major , " nullb " ) ;
mutex_lock ( & lock ) ;
while ( ! list_empty ( & nullb_list ) ) {
nullb = list_entry ( nullb_list . next , struct nullb , list ) ;
null_del_dev ( nullb ) ;
}
mutex_unlock ( & lock ) ;
}
module_init ( null_init ) ;
module_exit ( null_exit ) ;
MODULE_AUTHOR ( " Jens Axboe <jaxboe@fusionio.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;