2007-10-22 11:03:38 +10:00
//#define DEBUG
# include <linux/spinlock.h>
# include <linux/blkdev.h>
# include <linux/hdreg.h>
# include <linux/virtio.h>
# include <linux/virtio_blk.h>
2007-10-24 13:21:21 +02:00
# include <linux/scatterlist.h>
2008-01-31 15:53:53 +01:00
# define PART_BITS 4
2007-10-22 11:03:38 +10:00
2008-02-01 09:05:00 +01:00
static int major , index ;
2008-01-31 15:53:53 +01:00
2007-10-22 11:03:38 +10:00
struct virtio_blk
{
spinlock_t lock ;
struct virtio_device * vdev ;
struct virtqueue * vq ;
/* The disk structure for the kernel. */
struct gendisk * disk ;
/* Request tracking. */
struct list_head reqs ;
mempool_t * pool ;
2008-12-30 09:26:05 -06:00
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems ;
2007-10-22 11:03:38 +10:00
/* Scatterlist: can be too big for stack. */
2008-12-30 09:26:05 -06:00
struct scatterlist sg [ /*sg_elems*/ ] ;
2007-10-22 11:03:38 +10:00
} ;
struct virtblk_req
{
struct list_head list ;
struct request * req ;
struct virtio_blk_outhdr out_hdr ;
2009-05-18 14:41:30 +02:00
struct virtio_scsi_inhdr in_hdr ;
2008-05-02 21:50:45 -05:00
u8 status ;
2007-10-22 11:03:38 +10:00
} ;
2008-02-04 23:49:57 -05:00
static void blk_done ( struct virtqueue * vq )
2007-10-22 11:03:38 +10:00
{
struct virtio_blk * vblk = vq - > vdev - > priv ;
struct virtblk_req * vbr ;
unsigned int len ;
unsigned long flags ;
spin_lock_irqsave ( & vblk - > lock , flags ) ;
while ( ( vbr = vblk - > vq - > vq_ops - > get_buf ( vblk - > vq , & len ) ) ! = NULL ) {
2008-10-01 10:11:20 -04:00
int error ;
2009-05-18 14:41:30 +02:00
2008-05-02 21:50:45 -05:00
switch ( vbr - > status ) {
2007-10-22 11:03:38 +10:00
case VIRTIO_BLK_S_OK :
2008-10-01 10:11:20 -04:00
error = 0 ;
2007-10-22 11:03:38 +10:00
break ;
case VIRTIO_BLK_S_UNSUPP :
2008-10-01 10:11:20 -04:00
error = - ENOTTY ;
2007-10-22 11:03:38 +10:00
break ;
default :
2008-10-01 10:11:20 -04:00
error = - EIO ;
2007-10-22 11:03:38 +10:00
break ;
}
2009-05-18 14:41:30 +02:00
if ( blk_pc_request ( vbr - > req ) ) {
vbr - > req - > resid_len = vbr - > in_hdr . residual ;
vbr - > req - > sense_len = vbr - > in_hdr . sense_len ;
vbr - > req - > errors = vbr - > in_hdr . errors ;
}
2009-04-23 11:05:19 +09:00
__blk_end_request_all ( vbr - > req , error ) ;
2007-10-22 11:03:38 +10:00
list_del ( & vbr - > list ) ;
mempool_free ( vbr , vblk - > pool ) ;
}
/* In case queue is stopped waiting for more buffers. */
blk_start_queue ( vblk - > disk - > queue ) ;
spin_unlock_irqrestore ( & vblk - > lock , flags ) ;
}
static bool do_req ( struct request_queue * q , struct virtio_blk * vblk ,
struct request * req )
{
2009-05-18 14:41:30 +02:00
unsigned long num , out = 0 , in = 0 ;
2007-10-22 11:03:38 +10:00
struct virtblk_req * vbr ;
vbr = mempool_alloc ( vblk - > pool , GFP_ATOMIC ) ;
if ( ! vbr )
/* When another request finishes we'll try again. */
return false ;
vbr - > req = req ;
if ( blk_fs_request ( vbr - > req ) ) {
vbr - > out_hdr . type = 0 ;
2009-05-07 22:24:39 +09:00
vbr - > out_hdr . sector = blk_rq_pos ( vbr - > req ) ;
2008-08-14 09:59:13 +02:00
vbr - > out_hdr . ioprio = req_get_ioprio ( vbr - > req ) ;
2007-10-22 11:03:38 +10:00
} else if ( blk_pc_request ( vbr - > req ) ) {
vbr - > out_hdr . type = VIRTIO_BLK_T_SCSI_CMD ;
vbr - > out_hdr . sector = 0 ;
2008-08-14 09:59:13 +02:00
vbr - > out_hdr . ioprio = req_get_ioprio ( vbr - > req ) ;
2007-10-22 11:03:38 +10:00
} else {
/* We don't put anything else in the queue. */
BUG ( ) ;
}
if ( blk_barrier_rq ( vbr - > req ) )
vbr - > out_hdr . type | = VIRTIO_BLK_T_BARRIER ;
2009-05-18 14:41:30 +02:00
sg_set_buf ( & vblk - > sg [ out + + ] , & vbr - > out_hdr , sizeof ( vbr - > out_hdr ) ) ;
2007-10-22 11:03:38 +10:00
2009-05-18 14:41:30 +02:00
/*
* If this is a packet command we need a couple of additional headers .
* Behind the normal outhdr we put a segment with the scsi command
* block , and before the normal inhdr we put the sense data and the
* inhdr with additional status information before the normal inhdr .
*/
if ( blk_pc_request ( vbr - > req ) )
sg_set_buf ( & vblk - > sg [ out + + ] , vbr - > req - > cmd , vbr - > req - > cmd_len ) ;
num = blk_rq_map_sg ( q , vbr - > req , vblk - > sg + out ) ;
if ( blk_pc_request ( vbr - > req ) ) {
sg_set_buf ( & vblk - > sg [ num + out + in + + ] , vbr - > req - > sense , 96 ) ;
sg_set_buf ( & vblk - > sg [ num + out + in + + ] , & vbr - > in_hdr ,
sizeof ( vbr - > in_hdr ) ) ;
}
sg_set_buf ( & vblk - > sg [ num + out + in + + ] , & vbr - > status ,
sizeof ( vbr - > status ) ) ;
if ( num ) {
if ( rq_data_dir ( vbr - > req ) = = WRITE ) {
vbr - > out_hdr . type | = VIRTIO_BLK_T_OUT ;
out + = num ;
} else {
vbr - > out_hdr . type | = VIRTIO_BLK_T_IN ;
in + = num ;
}
2007-10-22 11:03:38 +10:00
}
if ( vblk - > vq - > vq_ops - > add_buf ( vblk - > vq , vblk - > sg , out , in , vbr ) ) {
mempool_free ( vbr , vblk - > pool ) ;
return false ;
}
list_add_tail ( & vbr - > list , & vblk - > reqs ) ;
return true ;
}
static void do_virtblk_request ( struct request_queue * q )
{
2009-05-18 14:38:28 +02:00
struct virtio_blk * vblk = q - > queuedata ;
2007-10-22 11:03:38 +10:00
struct request * req ;
unsigned int issued = 0 ;
2009-05-08 11:54:16 +09:00
while ( ( req = blk_peek_request ( q ) ) ! = NULL ) {
2008-12-30 09:26:05 -06:00
BUG_ON ( req - > nr_phys_segments + 2 > vblk - > sg_elems ) ;
2007-10-22 11:03:38 +10:00
/* If this request fails, stop queue and wait for something to
finish to restart it . */
if ( ! do_req ( q , vblk , req ) ) {
blk_stop_queue ( q ) ;
break ;
}
2009-05-08 11:54:16 +09:00
blk_start_request ( req ) ;
2007-10-22 11:03:38 +10:00
issued + + ;
}
if ( issued )
vblk - > vq - > vq_ops - > kick ( vblk - > vq ) ;
}
2009-06-09 14:41:40 +02:00
/* return ATA identify data
*/
static int virtblk_identify ( struct gendisk * disk , void * argp )
{
struct virtio_blk * vblk = disk - > private_data ;
void * opaque ;
int err = - ENOMEM ;
opaque = kmalloc ( VIRTIO_BLK_ID_BYTES , GFP_KERNEL ) ;
if ( ! opaque )
goto out ;
err = virtio_config_buf ( vblk - > vdev , VIRTIO_BLK_F_IDENTIFY ,
offsetof ( struct virtio_blk_config , identify ) , opaque ,
VIRTIO_BLK_ID_BYTES ) ;
if ( err )
goto out_kfree ;
if ( copy_to_user ( argp , opaque , VIRTIO_BLK_ID_BYTES ) )
err = - EFAULT ;
out_kfree :
kfree ( opaque ) ;
out :
return err ;
}
2008-03-02 10:22:33 -05:00
static int virtblk_ioctl ( struct block_device * bdev , fmode_t mode ,
2007-10-22 11:03:38 +10:00
unsigned cmd , unsigned long data )
{
2009-05-18 14:41:30 +02:00
struct gendisk * disk = bdev - > bd_disk ;
struct virtio_blk * vblk = disk - > private_data ;
2009-06-09 14:41:40 +02:00
void __user * argp = ( void __user * ) data ;
if ( cmd = = HDIO_GET_IDENTITY )
return virtblk_identify ( disk , argp ) ;
2009-05-18 14:41:30 +02:00
/*
* Only allow the generic SCSI ioctls if the host can support it .
*/
if ( ! virtio_has_feature ( vblk - > vdev , VIRTIO_BLK_F_SCSI ) )
2009-06-20 21:29:41 +02:00
return - ENOTTY ;
2009-05-18 14:41:30 +02:00
2009-06-09 14:41:40 +02:00
return scsi_cmd_ioctl ( disk - > queue , disk , mode , cmd , argp ) ;
2007-10-22 11:03:38 +10:00
}
2008-01-23 17:56:50 +01:00
/* We provide getgeo only to please some old bootloader/partitioning tools */
static int virtblk_getgeo ( struct block_device * bd , struct hd_geometry * geo )
{
2008-04-16 13:56:37 -05:00
struct virtio_blk * vblk = bd - > bd_disk - > private_data ;
struct virtio_blk_geometry vgeo ;
int err ;
/* see if the host passed in geometry config */
err = virtio_config_val ( vblk - > vdev , VIRTIO_BLK_F_GEOMETRY ,
offsetof ( struct virtio_blk_config , geometry ) ,
& vgeo ) ;
if ( ! err ) {
geo - > heads = vgeo . heads ;
geo - > sectors = vgeo . sectors ;
geo - > cylinders = vgeo . cylinders ;
} else {
/* some standard values, similar to sd */
geo - > heads = 1 < < 6 ;
geo - > sectors = 1 < < 5 ;
geo - > cylinders = get_capacity ( bd - > bd_disk ) > > 11 ;
}
2008-01-23 17:56:50 +01:00
return 0 ;
}
2009-09-21 17:01:13 -07:00
static const struct block_device_operations virtblk_fops = {
2008-03-02 10:22:33 -05:00
. locked_ioctl = virtblk_ioctl ,
2008-01-23 17:56:50 +01:00
. owner = THIS_MODULE ,
. getgeo = virtblk_getgeo ,
2007-10-22 11:03:38 +10:00
} ;
2008-02-01 09:05:00 +01:00
static int index_to_minor ( int index )
{
return index < < PART_BITS ;
}
2009-05-18 03:39:09 -04:00
static int __devinit virtblk_probe ( struct virtio_device * vdev )
2007-10-22 11:03:38 +10:00
{
struct virtio_blk * vblk ;
2008-01-31 15:53:53 +01:00
int err ;
2007-10-22 11:03:38 +10:00
u64 cap ;
u32 v ;
2008-12-30 09:26:05 -06:00
u32 blk_size , sg_elems ;
2007-10-22 11:03:38 +10:00
2008-02-01 09:05:00 +01:00
if ( index_to_minor ( index ) > = 1 < < MINORBITS )
2008-01-31 15:53:53 +01:00
return - ENOSPC ;
2008-12-30 09:26:05 -06:00
/* We need to know how many segments before we allocate. */
err = virtio_config_val ( vdev , VIRTIO_BLK_F_SEG_MAX ,
offsetof ( struct virtio_blk_config , seg_max ) ,
& sg_elems ) ;
if ( err )
sg_elems = 1 ;
/* We need an extra sg elements at head and tail. */
sg_elems + = 2 ;
vdev - > priv = vblk = kmalloc ( sizeof ( * vblk ) +
sizeof ( vblk - > sg [ 0 ] ) * sg_elems , GFP_KERNEL ) ;
2007-10-22 11:03:38 +10:00
if ( ! vblk ) {
err = - ENOMEM ;
goto out ;
}
INIT_LIST_HEAD ( & vblk - > reqs ) ;
spin_lock_init ( & vblk - > lock ) ;
vblk - > vdev = vdev ;
2008-12-30 09:26:05 -06:00
vblk - > sg_elems = sg_elems ;
sg_init_table ( vblk - > sg , vblk - > sg_elems ) ;
2007-10-22 11:03:38 +10:00
/* We expect one virtqueue, for output. */
2009-06-12 22:16:36 -06:00
vblk - > vq = virtio_find_single_vq ( vdev , blk_done , " requests " ) ;
2007-10-22 11:03:38 +10:00
if ( IS_ERR ( vblk - > vq ) ) {
err = PTR_ERR ( vblk - > vq ) ;
goto out_free_vblk ;
}
vblk - > pool = mempool_create_kmalloc_pool ( 1 , sizeof ( struct virtblk_req ) ) ;
if ( ! vblk - > pool ) {
err = - ENOMEM ;
goto out_free_vq ;
}
/* FIXME: How many partitions? How long is a piece of string? */
2008-01-31 15:53:53 +01:00
vblk - > disk = alloc_disk ( 1 < < PART_BITS ) ;
2007-10-22 11:03:38 +10:00
if ( ! vblk - > disk ) {
err = - ENOMEM ;
2008-01-31 15:53:53 +01:00
goto out_mempool ;
2007-10-22 11:03:38 +10:00
}
vblk - > disk - > queue = blk_init_queue ( do_virtblk_request , & vblk - > lock ) ;
if ( ! vblk - > disk - > queue ) {
err = - ENOMEM ;
goto out_put_disk ;
}
2009-05-18 14:38:28 +02:00
vblk - > disk - > queue - > queuedata = vblk ;
2008-10-27 18:45:15 +09:00
queue_flag_set_unlocked ( QUEUE_FLAG_VIRT , vblk - > disk - > queue ) ;
2008-02-01 09:05:00 +01:00
if ( index < 26 ) {
sprintf ( vblk - > disk - > disk_name , " vd%c " , ' a ' + index % 26 ) ;
} else if ( index < ( 26 + 1 ) * 26 ) {
sprintf ( vblk - > disk - > disk_name , " vd%c%c " ,
' a ' + index / 26 - 1 , ' a ' + index % 26 ) ;
} else {
const unsigned int m1 = ( index / 26 - 1 ) / 26 - 1 ;
const unsigned int m2 = ( index / 26 - 1 ) % 26 ;
const unsigned int m3 = index % 26 ;
sprintf ( vblk - > disk - > disk_name , " vd%c%c%c " ,
' a ' + m1 , ' a ' + m2 , ' a ' + m3 ) ;
}
2007-10-22 11:03:38 +10:00
vblk - > disk - > major = major ;
2008-02-01 09:05:00 +01:00
vblk - > disk - > first_minor = index_to_minor ( index ) ;
2007-10-22 11:03:38 +10:00
vblk - > disk - > private_data = vblk ;
vblk - > disk - > fops = & virtblk_fops ;
2008-03-02 17:00:15 -05:00
vblk - > disk - > driverfs_dev = & vdev - > dev ;
2008-02-01 09:05:00 +01:00
index + + ;
2008-01-31 15:53:53 +01:00
2007-10-22 11:03:38 +10:00
/* If barriers are supported, tell block layer that queue is ordered */
2008-05-02 21:50:50 -05:00
if ( virtio_has_feature ( vdev , VIRTIO_BLK_F_BARRIER ) )
2007-10-22 11:03:38 +10:00
blk_queue_ordered ( vblk - > disk - > queue , QUEUE_ORDERED_TAG , NULL ) ;
2008-05-16 11:17:03 +02:00
/* If disk is read-only in the host, the guest should obey */
if ( virtio_has_feature ( vdev , VIRTIO_BLK_F_RO ) )
set_disk_ro ( vblk - > disk , 1 ) ;
2008-02-04 23:49:56 -05:00
/* Host must always specify the capacity. */
2008-05-02 21:50:49 -05:00
vdev - > config - > get ( vdev , offsetof ( struct virtio_blk_config , capacity ) ,
& cap , sizeof ( cap ) ) ;
2007-10-22 11:03:38 +10:00
/* If capacity is too big, truncate with warning. */
if ( ( sector_t ) cap ! = cap ) {
dev_warn ( & vdev - > dev , " Capacity %llu too large: truncating \n " ,
( unsigned long long ) cap ) ;
cap = ( sector_t ) - 1 ;
}
set_capacity ( vblk - > disk , cap ) ;
2008-12-30 09:26:05 -06:00
/* We can handle whatever the host told us to handle. */
blk_queue_max_phys_segments ( vblk - > disk - > queue , vblk - > sg_elems - 2 ) ;
blk_queue_max_hw_segments ( vblk - > disk - > queue , vblk - > sg_elems - 2 ) ;
2009-07-17 21:47:45 -06:00
/* No need to bounce any requests */
blk_queue_bounce_limit ( vblk - > disk - > queue , BLK_BOUNCE_ANY ) ;
2008-12-30 09:26:04 -06:00
/* No real sector limit. */
blk_queue_max_sectors ( vblk - > disk - > queue , - 1U ) ;
2008-02-04 23:49:56 -05:00
/* Host can optionally specify maximum segment size and number of
* segments . */
err = virtio_config_val ( vdev , VIRTIO_BLK_F_SIZE_MAX ,
offsetof ( struct virtio_blk_config , size_max ) ,
& v ) ;
2007-10-22 11:03:38 +10:00
if ( ! err )
blk_queue_max_segment_size ( vblk - > disk - > queue , v ) ;
2008-12-30 09:26:04 -06:00
else
2008-11-26 13:15:50 -08:00
blk_queue_max_segment_size ( vblk - > disk - > queue , - 1U ) ;
2007-10-22 11:03:38 +10:00
2008-05-29 11:08:26 +02:00
/* Host can optionally specify the block size of the device */
err = virtio_config_val ( vdev , VIRTIO_BLK_F_BLK_SIZE ,
offsetof ( struct virtio_blk_config , blk_size ) ,
& blk_size ) ;
if ( ! err )
2009-05-22 17:17:49 -04:00
blk_queue_logical_block_size ( vblk - > disk - > queue , blk_size ) ;
2008-05-29 11:08:26 +02:00
2007-10-22 11:03:38 +10:00
add_disk ( vblk - > disk ) ;
return 0 ;
out_put_disk :
put_disk ( vblk - > disk ) ;
out_mempool :
mempool_destroy ( vblk - > pool ) ;
out_free_vq :
2009-06-12 22:16:36 -06:00
vdev - > config - > del_vqs ( vdev ) ;
2007-10-22 11:03:38 +10:00
out_free_vblk :
kfree ( vblk ) ;
out :
return err ;
}
2009-05-18 03:39:09 -04:00
static void __devexit virtblk_remove ( struct virtio_device * vdev )
2007-10-22 11:03:38 +10:00
{
struct virtio_blk * vblk = vdev - > priv ;
2008-02-04 23:50:03 -05:00
/* Nothing should be pending. */
2007-10-22 11:03:38 +10:00
BUG_ON ( ! list_empty ( & vblk - > reqs ) ) ;
2008-02-04 23:50:03 -05:00
/* Stop all the virtqueues. */
vdev - > config - > reset ( vdev ) ;
2008-05-30 15:09:41 -05:00
del_gendisk ( vblk - > disk ) ;
2007-10-22 11:03:38 +10:00
blk_cleanup_queue ( vblk - > disk - > queue ) ;
put_disk ( vblk - > disk ) ;
mempool_destroy ( vblk - > pool ) ;
2009-06-12 22:16:36 -06:00
vdev - > config - > del_vqs ( vdev ) ;
2007-10-22 11:03:38 +10:00
kfree ( vblk ) ;
}
static struct virtio_device_id id_table [ ] = {
{ VIRTIO_ID_BLOCK , VIRTIO_DEV_ANY_ID } ,
{ 0 } ,
} ;
2008-05-02 21:50:50 -05:00
static unsigned int features [ ] = {
VIRTIO_BLK_F_BARRIER , VIRTIO_BLK_F_SEG_MAX , VIRTIO_BLK_F_SIZE_MAX ,
2008-05-29 11:08:26 +02:00
VIRTIO_BLK_F_GEOMETRY , VIRTIO_BLK_F_RO , VIRTIO_BLK_F_BLK_SIZE ,
2009-06-09 14:41:40 +02:00
VIRTIO_BLK_F_SCSI , VIRTIO_BLK_F_IDENTIFY
2008-05-02 21:50:50 -05:00
} ;
virtio_blk: mark virtio_blk with __refdata to kill spurious section mismatch
The variable virtio_blk references the function virtblk_probe() (which
is in .devinit section) and also references the function
virtblk_remove() ( which is in .devexit section). So, virtio_blk
simultaneously refers .devinit and .devexit section. To avoid this
messup, we mark virtio_blk as __refdata.
We were warned by the following warning:
LD drivers/block/built-in.o
WARNING: drivers/block/built-in.o(.data+0xc8dc): Section mismatch in
reference from the variable virtio_blk to the function
.devinit.text:virtblk_probe()
The variable virtio_blk references
the function __devinit virtblk_probe()
If the reference is valid then annotate the
variable with __init* or __refdata (see linux/init.h) or name the variable:
*driver, *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console,
WARNING: drivers/block/built-in.o(.data+0xc8e0): Section mismatch in
reference from the variable virtio_blk to the function
.devexit.text:virtblk_remove()
The variable virtio_blk references
the function __devexit virtblk_remove()
If the reference is valid then annotate the
variable with __exit* (see linux/init.h) or name the variable:
*driver, *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console,
Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2009-07-17 20:13:22 +06:00
/*
* virtio_blk causes spurious section mismatch warning by
* simultaneously referring to a __devinit and a __devexit function .
* Use __refdata to avoid this warning .
*/
static struct virtio_driver __refdata virtio_blk = {
2008-05-02 21:50:50 -05:00
. feature_table = features ,
. feature_table_size = ARRAY_SIZE ( features ) ,
2007-10-22 11:03:38 +10:00
. driver . name = KBUILD_MODNAME ,
. driver . owner = THIS_MODULE ,
. id_table = id_table ,
. probe = virtblk_probe ,
. remove = __devexit_p ( virtblk_remove ) ,
} ;
static int __init init ( void )
{
2008-01-31 15:53:53 +01:00
major = register_blkdev ( 0 , " virtblk " ) ;
if ( major < 0 )
return major ;
2007-10-22 11:03:38 +10:00
return register_virtio_driver ( & virtio_blk ) ;
}
static void __exit fini ( void )
{
2008-01-31 15:53:53 +01:00
unregister_blkdev ( major , " virtblk " ) ;
2007-10-22 11:03:38 +10:00
unregister_virtio_driver ( & virtio_blk ) ;
}
module_init ( init ) ;
module_exit ( fini ) ;
MODULE_DEVICE_TABLE ( virtio , id_table ) ;
MODULE_DESCRIPTION ( " Virtio block driver " ) ;
MODULE_LICENSE ( " GPL " ) ;