2007-07-18 05:37:06 +04:00
/*
* blkfront . c
*
* XenLinux virtual block device driver .
*
* Copyright ( c ) 2003 - 2004 , Keir Fraser & Steve Hand
* Modifications by Mark A . Williamson are ( c ) Intel Research Cambridge
* Copyright ( c ) 2004 , Christian Limpach
* Copyright ( c ) 2004 , Andrew Warfield
* Copyright ( c ) 2005 , Christopher Clark
* Copyright ( c ) 2005 , XenSource Ltd
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation ; or , when distributed
* separately from the Linux kernel or incorporated into other
* software packages , subject to the following license :
*
* Permission is hereby granted , free of charge , to any person obtaining a copy
* of this source file ( the " Software " ) , to deal in the Software without
* restriction , including without limitation the rights to use , copy , modify ,
* merge , publish , distribute , sublicense , and / or sell copies of the Software ,
* and to permit persons to whom the Software is furnished to do so , subject to
* the following conditions :
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
* LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING
* FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE .
*/
# include <linux/interrupt.h>
# include <linux/blkdev.h>
2008-02-22 00:03:45 +03:00
# include <linux/hdreg.h>
2008-06-17 12:47:08 +04:00
# include <linux/cdrom.h>
2007-07-18 05:37:06 +04:00
# include <linux/module.h>
2009-02-24 10:10:09 +03:00
# include <linux/scatterlist.h>
2007-07-18 05:37:06 +04:00
# include <xen/xenbus.h>
# include <xen/grant_table.h>
# include <xen/events.h>
# include <xen/page.h>
# include <xen/interface/grant_table.h>
# include <xen/interface/io/blkif.h>
2008-04-02 21:54:02 +04:00
# include <xen/interface/io/protocols.h>
2007-07-18 05:37:06 +04:00
# include <asm/xen/hypervisor.h>
enum blkif_state {
BLKIF_STATE_DISCONNECTED ,
BLKIF_STATE_CONNECTED ,
BLKIF_STATE_SUSPENDED ,
} ;
struct blk_shadow {
struct blkif_request req ;
unsigned long request ;
unsigned long frame [ BLKIF_MAX_SEGMENTS_PER_REQUEST ] ;
} ;
static struct block_device_operations xlvbd_block_fops ;
# define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
/*
* We have one of these per vbd , whether ide , scsi or ' other ' . They
* hang in private_data off the gendisk structure . We may end up
* putting all kinds of interesting stuff here : - )
*/
struct blkfront_info
{
struct xenbus_device * xbdev ;
struct gendisk * gd ;
int vdevice ;
blkif_vdev_t handle ;
enum blkif_state connected ;
int ring_ref ;
struct blkif_front_ring ring ;
2009-02-24 10:10:09 +03:00
struct scatterlist sg [ BLKIF_MAX_SEGMENTS_PER_REQUEST ] ;
2007-07-18 05:37:06 +04:00
unsigned int evtchn , irq ;
struct request_queue * rq ;
struct work_struct work ;
struct gnttab_free_callback callback ;
struct blk_shadow shadow [ BLK_RING_SIZE ] ;
unsigned long shadow_free ;
int feature_barrier ;
2008-04-02 21:54:04 +04:00
int is_ready ;
2007-07-18 05:37:06 +04:00
/**
* The number of people holding this device open . We won ' t allow a
* hot - unplug unless this is 0.
*/
int users ;
} ;
static DEFINE_SPINLOCK ( blkif_io_lock ) ;
# define MAXIMUM_OUTSTANDING_BLOCK_REQS \
( BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE )
# define GRANT_INVALID_REF 0
# define PARTS_PER_DISK 16
2008-09-18 01:30:32 +04:00
# define PARTS_PER_EXT_DISK 256
2007-07-18 05:37:06 +04:00
# define BLKIF_MAJOR(dev) ((dev)>>8)
# define BLKIF_MINOR(dev) ((dev) & 0xff)
2008-09-18 01:30:32 +04:00
# define EXT_SHIFT 28
# define EXTENDED (1<<EXT_SHIFT)
# define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
# define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
2007-07-18 05:37:06 +04:00
2008-09-18 01:30:32 +04:00
# define DEV_NAME "xvd" /* name in /dev */
2007-07-18 05:37:06 +04:00
static int get_id_from_freelist ( struct blkfront_info * info )
{
unsigned long free = info - > shadow_free ;
2009-05-22 11:25:32 +04:00
BUG_ON ( free > = BLK_RING_SIZE ) ;
2007-07-18 05:37:06 +04:00
info - > shadow_free = info - > shadow [ free ] . req . id ;
info - > shadow [ free ] . req . id = 0x0fffffee ; /* debug */
return free ;
}
static void add_id_to_freelist ( struct blkfront_info * info ,
unsigned long id )
{
info - > shadow [ id ] . req . id = info - > shadow_free ;
info - > shadow [ id ] . request = 0 ;
info - > shadow_free = id ;
}
static void blkif_restart_queue_callback ( void * arg )
{
struct blkfront_info * info = ( struct blkfront_info * ) arg ;
schedule_work ( & info - > work ) ;
}
2008-04-29 11:59:47 +04:00
static int blkif_getgeo ( struct block_device * bd , struct hd_geometry * hg )
2008-02-22 00:03:45 +03:00
{
/* We don't have real geometry info, but let's at least return
values consistent with the size of the device */
sector_t nsect = get_capacity ( bd - > bd_disk ) ;
sector_t cylinders = nsect ;
hg - > heads = 0xff ;
hg - > sectors = 0x3f ;
sector_div ( cylinders , hg - > heads * hg - > sectors ) ;
hg - > cylinders = cylinders ;
if ( ( sector_t ) ( hg - > cylinders + 1 ) * hg - > heads * hg - > sectors < nsect )
hg - > cylinders = 0xffff ;
return 0 ;
}
2008-03-02 18:23:47 +03:00
static int blkif_ioctl ( struct block_device * bdev , fmode_t mode ,
2008-08-04 13:59:05 +04:00
unsigned command , unsigned long argument )
2008-06-17 12:47:08 +04:00
{
2008-03-02 18:23:47 +03:00
struct blkfront_info * info = bdev - > bd_disk - > private_data ;
2008-06-17 12:47:08 +04:00
int i ;
dev_dbg ( & info - > xbdev - > dev , " command: 0x%x, argument: 0x%lx \n " ,
command , ( long ) argument ) ;
switch ( command ) {
case CDROMMULTISESSION :
dev_dbg ( & info - > xbdev - > dev , " FIXME: support multisession CDs later \n " ) ;
for ( i = 0 ; i < sizeof ( struct cdrom_multisession ) ; i + + )
if ( put_user ( 0 , ( char __user * ) ( argument + i ) ) )
return - EFAULT ;
return 0 ;
case CDROM_GET_CAPABILITY : {
struct gendisk * gd = info - > gd ;
if ( gd - > flags & GENHD_FL_CD )
return 0 ;
return - EINVAL ;
}
default :
/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
command ) ; */
return - EINVAL ; /* same return as native Linux */
}
return 0 ;
}
2007-07-18 05:37:06 +04:00
/*
* blkif_queue_request
*
* request block io
*
* id : for guest use only .
* operation : BLKIF_OP_ { READ , WRITE , PROBE }
* buffer : buffer to read / write into . this should be a
* virtual address in the guest os .
*/
static int blkif_queue_request ( struct request * req )
{
struct blkfront_info * info = req - > rq_disk - > private_data ;
unsigned long buffer_mfn ;
struct blkif_request * ring_req ;
unsigned long id ;
unsigned int fsect , lsect ;
2009-02-24 10:10:09 +03:00
int i , ref ;
2007-07-18 05:37:06 +04:00
grant_ref_t gref_head ;
2009-02-24 10:10:09 +03:00
struct scatterlist * sg ;
2007-07-18 05:37:06 +04:00
if ( unlikely ( info - > connected ! = BLKIF_STATE_CONNECTED ) )
return 1 ;
if ( gnttab_alloc_grant_references (
BLKIF_MAX_SEGMENTS_PER_REQUEST , & gref_head ) < 0 ) {
gnttab_request_free_callback (
& info - > callback ,
blkif_restart_queue_callback ,
info ,
BLKIF_MAX_SEGMENTS_PER_REQUEST ) ;
return 1 ;
}
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST ( & info - > ring , info - > ring . req_prod_pvt ) ;
id = get_id_from_freelist ( info ) ;
info - > shadow [ id ] . request = ( unsigned long ) req ;
ring_req - > id = id ;
2009-05-07 17:24:39 +04:00
ring_req - > sector_number = ( blkif_sector_t ) blk_rq_pos ( req ) ;
2007-07-18 05:37:06 +04:00
ring_req - > handle = info - > handle ;
ring_req - > operation = rq_data_dir ( req ) ?
BLKIF_OP_WRITE : BLKIF_OP_READ ;
if ( blk_barrier_rq ( req ) )
ring_req - > operation = BLKIF_OP_WRITE_BARRIER ;
2009-02-24 10:10:09 +03:00
ring_req - > nr_segments = blk_rq_map_sg ( req - > q , req , info - > sg ) ;
BUG_ON ( ring_req - > nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST ) ;
for_each_sg ( info - > sg , sg , ring_req - > nr_segments , i ) {
buffer_mfn = pfn_to_mfn ( page_to_pfn ( sg_page ( sg ) ) ) ;
fsect = sg - > offset > > 9 ;
lsect = fsect + ( sg - > length > > 9 ) - 1 ;
2007-08-16 15:43:12 +04:00
/* install a grant reference. */
ref = gnttab_claim_grant_reference ( & gref_head ) ;
BUG_ON ( ref = = - ENOSPC ) ;
gnttab_grant_foreign_access_ref (
2007-07-18 05:37:06 +04:00
ref ,
info - > xbdev - > otherend_id ,
buffer_mfn ,
rq_data_dir ( req ) ) ;
2009-02-24 10:10:09 +03:00
info - > shadow [ id ] . frame [ i ] = mfn_to_pfn ( buffer_mfn ) ;
ring_req - > seg [ i ] =
2007-07-18 05:37:06 +04:00
( struct blkif_request_segment ) {
. gref = ref ,
. first_sect = fsect ,
. last_sect = lsect } ;
}
info - > ring . req_prod_pvt + + ;
/* Keep a private copy so we can reissue requests when recovering. */
info - > shadow [ id ] . req = * ring_req ;
gnttab_free_grant_references ( gref_head ) ;
return 0 ;
}
static inline void flush_requests ( struct blkfront_info * info )
{
int notify ;
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY ( & info - > ring , notify ) ;
if ( notify )
notify_remote_via_irq ( info - > irq ) ;
}
/*
* do_blkif_request
* read a block ; request is in a request queue
*/
2007-07-24 11:28:11 +04:00
static void do_blkif_request ( struct request_queue * rq )
2007-07-18 05:37:06 +04:00
{
struct blkfront_info * info = NULL ;
struct request * req ;
int queued ;
pr_debug ( " Entered do_blkif_request \n " ) ;
queued = 0 ;
2009-05-08 06:54:16 +04:00
while ( ( req = blk_peek_request ( rq ) ) ! = NULL ) {
2007-07-18 05:37:06 +04:00
info = req - > rq_disk - > private_data ;
if ( RING_FULL ( & info - > ring ) )
goto wait ;
2009-05-08 06:54:16 +04:00
blk_start_request ( req ) ;
2009-05-08 06:54:15 +04:00
if ( ! blk_fs_request ( req ) ) {
__blk_end_request_all ( req , - EIO ) ;
continue ;
}
2007-07-18 05:37:06 +04:00
pr_debug ( " do_blk_req %p: cmd %p, sec %lx, "
2009-05-07 17:24:39 +04:00
" (%u/%u) buffer:%p [%s] \n " ,
req , req - > cmd , ( unsigned long ) blk_rq_pos ( req ) ,
blk_rq_cur_sectors ( req ) , blk_rq_sectors ( req ) ,
req - > buffer , rq_data_dir ( req ) ? " write " : " read " ) ;
2007-07-18 05:37:06 +04:00
if ( blkif_queue_request ( req ) ) {
blk_requeue_request ( rq , req ) ;
wait :
/* Avoid pointless unplugs. */
blk_stop_queue ( rq ) ;
break ;
}
queued + + ;
}
if ( queued ! = 0 )
flush_requests ( info ) ;
}
static int xlvbd_init_blk_queue ( struct gendisk * gd , u16 sector_size )
{
2007-07-24 11:28:11 +04:00
struct request_queue * rq ;
2007-07-18 05:37:06 +04:00
rq = blk_init_queue ( do_blkif_request , & blkif_io_lock ) ;
if ( rq = = NULL )
return - 1 ;
2008-10-27 12:45:54 +03:00
queue_flag_set_unlocked ( QUEUE_FLAG_VIRT , rq ) ;
2007-07-18 05:37:06 +04:00
/* Hard sector size and max sectors impersonate the equiv. hardware. */
2009-05-23 01:17:49 +04:00
blk_queue_logical_block_size ( rq , sector_size ) ;
2007-07-18 05:37:06 +04:00
blk_queue_max_sectors ( rq , 512 ) ;
/* Each segment in a request is up to an aligned page in size. */
blk_queue_segment_boundary ( rq , PAGE_SIZE - 1 ) ;
blk_queue_max_segment_size ( rq , PAGE_SIZE ) ;
/* Ensure a merged request will fit in a single I/O ring slot. */
blk_queue_max_phys_segments ( rq , BLKIF_MAX_SEGMENTS_PER_REQUEST ) ;
blk_queue_max_hw_segments ( rq , BLKIF_MAX_SEGMENTS_PER_REQUEST ) ;
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment ( rq , 511 ) ;
2008-06-17 12:47:08 +04:00
/* Make sure we don't use bounce buffers. */
blk_queue_bounce_limit ( rq , BLK_BOUNCE_ANY ) ;
2007-07-18 05:37:06 +04:00
gd - > queue = rq ;
return 0 ;
}
static int xlvbd_barrier ( struct blkfront_info * info )
{
int err ;
err = blk_queue_ordered ( info - > rq ,
info - > feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE ,
NULL ) ;
if ( err )
return err ;
printk ( KERN_INFO " blkfront: %s: barriers %s \n " ,
info - > gd - > disk_name ,
info - > feature_barrier ? " enabled " : " disabled " ) ;
return 0 ;
}
2008-09-18 01:30:32 +04:00
static int xlvbd_alloc_gendisk ( blkif_sector_t capacity ,
struct blkfront_info * info ,
u16 vdisk_info , u16 sector_size )
2007-07-18 05:37:06 +04:00
{
struct gendisk * gd ;
int nr_minors = 1 ;
int err = - ENODEV ;
2008-09-18 01:30:32 +04:00
unsigned int offset ;
int minor ;
int nr_parts ;
2007-07-18 05:37:06 +04:00
BUG_ON ( info - > gd ! = NULL ) ;
BUG_ON ( info - > rq ! = NULL ) ;
2008-09-18 01:30:32 +04:00
if ( ( info - > vdevice > > EXT_SHIFT ) > 1 ) {
/* this is above the extended range; something is wrong */
printk ( KERN_WARNING " blkfront: vdevice 0x%x is above the extended range; ignoring \n " , info - > vdevice ) ;
return - ENODEV ;
}
if ( ! VDEV_IS_EXTENDED ( info - > vdevice ) ) {
minor = BLKIF_MINOR ( info - > vdevice ) ;
nr_parts = PARTS_PER_DISK ;
} else {
minor = BLKIF_MINOR_EXT ( info - > vdevice ) ;
nr_parts = PARTS_PER_EXT_DISK ;
}
if ( ( minor % nr_parts ) = = 0 )
nr_minors = nr_parts ;
2007-07-18 05:37:06 +04:00
gd = alloc_disk ( nr_minors ) ;
if ( gd = = NULL )
goto out ;
2008-09-18 01:30:32 +04:00
offset = minor / nr_parts ;
if ( nr_minors > 1 ) {
if ( offset < 26 )
sprintf ( gd - > disk_name , " %s%c " , DEV_NAME , ' a ' + offset ) ;
else
sprintf ( gd - > disk_name , " %s%c%c " , DEV_NAME ,
' a ' + ( ( offset / 26 ) - 1 ) , ' a ' + ( offset % 26 ) ) ;
} else {
if ( offset < 26 )
sprintf ( gd - > disk_name , " %s%c%d " , DEV_NAME ,
' a ' + offset ,
minor & ( nr_parts - 1 ) ) ;
else
sprintf ( gd - > disk_name , " %s%c%c%d " , DEV_NAME ,
' a ' + ( ( offset / 26 ) - 1 ) ,
' a ' + ( offset % 26 ) ,
minor & ( nr_parts - 1 ) ) ;
}
2007-07-18 05:37:06 +04:00
gd - > major = XENVBD_MAJOR ;
gd - > first_minor = minor ;
gd - > fops = & xlvbd_block_fops ;
gd - > private_data = info ;
gd - > driverfs_dev = & ( info - > xbdev - > dev ) ;
set_capacity ( gd , capacity ) ;
if ( xlvbd_init_blk_queue ( gd , sector_size ) ) {
del_gendisk ( gd ) ;
goto out ;
}
info - > rq = gd - > queue ;
info - > gd = gd ;
if ( info - > feature_barrier )
xlvbd_barrier ( info ) ;
if ( vdisk_info & VDISK_READONLY )
set_disk_ro ( gd , 1 ) ;
if ( vdisk_info & VDISK_REMOVABLE )
gd - > flags | = GENHD_FL_REMOVABLE ;
if ( vdisk_info & VDISK_CDROM )
gd - > flags | = GENHD_FL_CD ;
return 0 ;
out :
return err ;
}
static void kick_pending_request_queues ( struct blkfront_info * info )
{
if ( ! RING_FULL ( & info - > ring ) ) {
/* Re-enable calldowns. */
blk_start_queue ( info - > rq ) ;
/* Kick things off immediately. */
do_blkif_request ( info - > rq ) ;
}
}
static void blkif_restart_queue ( struct work_struct * work )
{
struct blkfront_info * info = container_of ( work , struct blkfront_info , work ) ;
spin_lock_irq ( & blkif_io_lock ) ;
if ( info - > connected = = BLKIF_STATE_CONNECTED )
kick_pending_request_queues ( info ) ;
spin_unlock_irq ( & blkif_io_lock ) ;
}
static void blkif_free ( struct blkfront_info * info , int suspend )
{
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq ( & blkif_io_lock ) ;
info - > connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED ;
/* No more blkif_request(). */
if ( info - > rq )
blk_stop_queue ( info - > rq ) ;
/* No more gnttab callback work. */
gnttab_cancel_free_callback ( & info - > callback ) ;
spin_unlock_irq ( & blkif_io_lock ) ;
/* Flush gnttab callback work. Must be done with no locks held. */
flush_scheduled_work ( ) ;
/* Free resources associated with old device channel. */
if ( info - > ring_ref ! = GRANT_INVALID_REF ) {
gnttab_end_foreign_access ( info - > ring_ref , 0 ,
( unsigned long ) info - > ring . sring ) ;
info - > ring_ref = GRANT_INVALID_REF ;
info - > ring . sring = NULL ;
}
if ( info - > irq )
unbind_from_irqhandler ( info - > irq , info ) ;
info - > evtchn = info - > irq = 0 ;
}
static void blkif_completion ( struct blk_shadow * s )
{
int i ;
for ( i = 0 ; i < s - > req . nr_segments ; i + + )
gnttab_end_foreign_access ( s - > req . seg [ i ] . gref , 0 , 0UL ) ;
}
static irqreturn_t blkif_interrupt ( int irq , void * dev_id )
{
struct request * req ;
struct blkif_response * bret ;
RING_IDX i , rp ;
unsigned long flags ;
struct blkfront_info * info = ( struct blkfront_info * ) dev_id ;
2007-12-12 01:47:36 +03:00
int error ;
2007-07-18 05:37:06 +04:00
spin_lock_irqsave ( & blkif_io_lock , flags ) ;
if ( unlikely ( info - > connected ! = BLKIF_STATE_CONNECTED ) ) {
spin_unlock_irqrestore ( & blkif_io_lock , flags ) ;
return IRQ_HANDLED ;
}
again :
rp = info - > ring . sring - > rsp_prod ;
rmb ( ) ; /* Ensure we see queued responses up to 'rp'. */
for ( i = info - > ring . rsp_cons ; i ! = rp ; i + + ) {
unsigned long id ;
bret = RING_GET_RESPONSE ( & info - > ring , i ) ;
id = bret - > id ;
req = ( struct request * ) info - > shadow [ id ] . request ;
blkif_completion ( & info - > shadow [ id ] ) ;
add_id_to_freelist ( info , id ) ;
2007-12-12 01:47:36 +03:00
error = ( bret - > status = = BLKIF_RSP_OKAY ) ? 0 : - EIO ;
2007-07-18 05:37:06 +04:00
switch ( bret - > operation ) {
case BLKIF_OP_WRITE_BARRIER :
if ( unlikely ( bret - > status = = BLKIF_RSP_EOPNOTSUPP ) ) {
printk ( KERN_WARNING " blkfront: %s: write barrier op failed \n " ,
info - > gd - > disk_name ) ;
2007-12-12 01:47:36 +03:00
error = - EOPNOTSUPP ;
2007-07-18 05:37:06 +04:00
info - > feature_barrier = 0 ;
xlvbd_barrier ( info ) ;
}
/* fall through */
case BLKIF_OP_READ :
case BLKIF_OP_WRITE :
if ( unlikely ( bret - > status ! = BLKIF_RSP_OKAY ) )
dev_dbg ( & info - > xbdev - > dev , " Bad return from blkdev data "
" request: %x \n " , bret - > status ) ;
2009-04-23 06:05:19 +04:00
__blk_end_request_all ( req , error ) ;
2007-07-18 05:37:06 +04:00
break ;
default :
BUG ( ) ;
}
}
info - > ring . rsp_cons = i ;
if ( i ! = info - > ring . req_prod_pvt ) {
int more_to_do ;
RING_FINAL_CHECK_FOR_RESPONSES ( & info - > ring , more_to_do ) ;
if ( more_to_do )
goto again ;
} else
info - > ring . sring - > rsp_event = i + 1 ;
kick_pending_request_queues ( info ) ;
spin_unlock_irqrestore ( & blkif_io_lock , flags ) ;
return IRQ_HANDLED ;
}
static int setup_blkring ( struct xenbus_device * dev ,
struct blkfront_info * info )
{
struct blkif_sring * sring ;
int err ;
info - > ring_ref = GRANT_INVALID_REF ;
2008-06-17 12:47:08 +04:00
sring = ( struct blkif_sring * ) __get_free_page ( GFP_NOIO | __GFP_HIGH ) ;
2007-07-18 05:37:06 +04:00
if ( ! sring ) {
xenbus_dev_fatal ( dev , - ENOMEM , " allocating shared ring " ) ;
return - ENOMEM ;
}
SHARED_RING_INIT ( sring ) ;
FRONT_RING_INIT ( & info - > ring , sring , PAGE_SIZE ) ;
2009-02-24 10:10:09 +03:00
sg_init_table ( info - > sg , BLKIF_MAX_SEGMENTS_PER_REQUEST ) ;
2007-07-18 05:37:06 +04:00
err = xenbus_grant_ring ( dev , virt_to_mfn ( info - > ring . sring ) ) ;
if ( err < 0 ) {
free_page ( ( unsigned long ) sring ) ;
info - > ring . sring = NULL ;
goto fail ;
}
info - > ring_ref = err ;
err = xenbus_alloc_evtchn ( dev , & info - > evtchn ) ;
if ( err )
goto fail ;
err = bind_evtchn_to_irqhandler ( info - > evtchn ,
blkif_interrupt ,
IRQF_SAMPLE_RANDOM , " blkif " , info ) ;
if ( err < = 0 ) {
xenbus_dev_fatal ( dev , err ,
" bind_evtchn_to_irqhandler failed " ) ;
goto fail ;
}
info - > irq = err ;
return 0 ;
fail :
blkif_free ( info , 0 ) ;
return err ;
}
/* Common code used when first setting up, and when resuming. */
static int talk_to_backend ( struct xenbus_device * dev ,
struct blkfront_info * info )
{
const char * message = NULL ;
struct xenbus_transaction xbt ;
int err ;
/* Create shared ring, alloc event channel. */
err = setup_blkring ( dev , info ) ;
if ( err )
goto out ;
again :
err = xenbus_transaction_start ( & xbt ) ;
if ( err ) {
xenbus_dev_fatal ( dev , err , " starting transaction " ) ;
goto destroy_blkring ;
}
err = xenbus_printf ( xbt , dev - > nodename ,
" ring-ref " , " %u " , info - > ring_ref ) ;
if ( err ) {
message = " writing ring-ref " ;
goto abort_transaction ;
}
err = xenbus_printf ( xbt , dev - > nodename ,
" event-channel " , " %u " , info - > evtchn ) ;
if ( err ) {
message = " writing event-channel " ;
goto abort_transaction ;
}
2008-04-02 21:54:02 +04:00
err = xenbus_printf ( xbt , dev - > nodename , " protocol " , " %s " ,
XEN_IO_PROTO_ABI_NATIVE ) ;
if ( err ) {
message = " writing protocol " ;
goto abort_transaction ;
}
2007-07-18 05:37:06 +04:00
err = xenbus_transaction_end ( xbt , 0 ) ;
if ( err ) {
if ( err = = - EAGAIN )
goto again ;
xenbus_dev_fatal ( dev , err , " completing transaction " ) ;
goto destroy_blkring ;
}
xenbus_switch_state ( dev , XenbusStateInitialised ) ;
return 0 ;
abort_transaction :
xenbus_transaction_end ( xbt , 1 ) ;
if ( message )
xenbus_dev_fatal ( dev , err , " %s " , message ) ;
destroy_blkring :
blkif_free ( info , 0 ) ;
out :
return err ;
}
/**
* Entry point to this code when a new device is created . Allocate the basic
* structures and the ring buffer for communication with the backend , and
* inform the backend of the appropriate details for those . Switch to
* Initialised state .
*/
static int blkfront_probe ( struct xenbus_device * dev ,
const struct xenbus_device_id * id )
{
int err , vdevice , i ;
struct blkfront_info * info ;
/* FIXME: Use dynamic device id if this is not set. */
err = xenbus_scanf ( XBT_NIL , dev - > nodename ,
" virtual-device " , " %i " , & vdevice ) ;
if ( err ! = 1 ) {
2008-09-18 01:30:32 +04:00
/* go looking in the extended area instead */
err = xenbus_scanf ( XBT_NIL , dev - > nodename , " virtual-device-ext " ,
" %i " , & vdevice ) ;
if ( err ! = 1 ) {
xenbus_dev_fatal ( dev , err , " reading virtual-device " ) ;
return err ;
}
2007-07-18 05:37:06 +04:00
}
info = kzalloc ( sizeof ( * info ) , GFP_KERNEL ) ;
if ( ! info ) {
xenbus_dev_fatal ( dev , - ENOMEM , " allocating info structure " ) ;
return - ENOMEM ;
}
info - > xbdev = dev ;
info - > vdevice = vdevice ;
info - > connected = BLKIF_STATE_DISCONNECTED ;
INIT_WORK ( & info - > work , blkif_restart_queue ) ;
for ( i = 0 ; i < BLK_RING_SIZE ; i + + )
info - > shadow [ i ] . req . id = i + 1 ;
info - > shadow [ BLK_RING_SIZE - 1 ] . req . id = 0x0fffffff ;
/* Front end dir is a number, which is used as the id. */
info - > handle = simple_strtoul ( strrchr ( dev - > nodename , ' / ' ) + 1 , NULL , 0 ) ;
2009-05-01 01:43:31 +04:00
dev_set_drvdata ( & dev - > dev , info ) ;
2007-07-18 05:37:06 +04:00
err = talk_to_backend ( dev , info ) ;
if ( err ) {
kfree ( info ) ;
2009-05-01 01:43:31 +04:00
dev_set_drvdata ( & dev - > dev , NULL ) ;
2007-07-18 05:37:06 +04:00
return err ;
}
return 0 ;
}
static int blkif_recover ( struct blkfront_info * info )
{
int i ;
struct blkif_request * req ;
struct blk_shadow * copy ;
int j ;
/* Stage 1: Make a safe copy of the shadow state. */
2008-06-17 12:47:08 +04:00
copy = kmalloc ( sizeof ( info - > shadow ) ,
GFP_NOIO | __GFP_REPEAT | __GFP_HIGH ) ;
2007-07-18 05:37:06 +04:00
if ( ! copy )
return - ENOMEM ;
memcpy ( copy , info - > shadow , sizeof ( info - > shadow ) ) ;
/* Stage 2: Set up free list. */
memset ( & info - > shadow , 0 , sizeof ( info - > shadow ) ) ;
for ( i = 0 ; i < BLK_RING_SIZE ; i + + )
info - > shadow [ i ] . req . id = i + 1 ;
info - > shadow_free = info - > ring . req_prod_pvt ;
info - > shadow [ BLK_RING_SIZE - 1 ] . req . id = 0x0fffffff ;
/* Stage 3: Find pending requests and requeue them. */
for ( i = 0 ; i < BLK_RING_SIZE ; i + + ) {
/* Not in use? */
if ( copy [ i ] . request = = 0 )
continue ;
/* Grab a request slot and copy shadow state into it. */
req = RING_GET_REQUEST ( & info - > ring , info - > ring . req_prod_pvt ) ;
* req = copy [ i ] . req ;
/* We get a new request id, and must reset the shadow state. */
req - > id = get_id_from_freelist ( info ) ;
memcpy ( & info - > shadow [ req - > id ] , & copy [ i ] , sizeof ( copy [ i ] ) ) ;
/* Rewrite any grant references invalidated by susp/resume. */
for ( j = 0 ; j < req - > nr_segments ; j + + )
gnttab_grant_foreign_access_ref (
req - > seg [ j ] . gref ,
info - > xbdev - > otherend_id ,
pfn_to_mfn ( info - > shadow [ req - > id ] . frame [ j ] ) ,
rq_data_dir (
( struct request * )
info - > shadow [ req - > id ] . request ) ) ;
info - > shadow [ req - > id ] . req = * req ;
info - > ring . req_prod_pvt + + ;
}
kfree ( copy ) ;
xenbus_switch_state ( info - > xbdev , XenbusStateConnected ) ;
spin_lock_irq ( & blkif_io_lock ) ;
/* Now safe for us to use the shared ring */
info - > connected = BLKIF_STATE_CONNECTED ;
/* Send off requeued requests */
flush_requests ( info ) ;
/* Kick any other new requests queued since we resumed */
kick_pending_request_queues ( info ) ;
spin_unlock_irq ( & blkif_io_lock ) ;
return 0 ;
}
/**
* We are reconnecting to the backend , due to a suspend / resume , or a backend
* driver restart . We tear down our blkif structure and recreate it , but
* leave the device - layer structures intact so that this is transparent to the
* rest of the kernel .
*/
static int blkfront_resume ( struct xenbus_device * dev )
{
2009-05-01 01:43:31 +04:00
struct blkfront_info * info = dev_get_drvdata ( & dev - > dev ) ;
2007-07-18 05:37:06 +04:00
int err ;
dev_dbg ( & dev - > dev , " blkfront_resume: %s \n " , dev - > nodename ) ;
blkif_free ( info , info - > connected = = BLKIF_STATE_CONNECTED ) ;
err = talk_to_backend ( dev , info ) ;
if ( info - > connected = = BLKIF_STATE_SUSPENDED & & ! err )
err = blkif_recover ( info ) ;
return err ;
}
/*
* Invoked when the backend is finally ' ready ' ( and has told produced
* the details about the physical device - # sectors , size , etc ) .
*/
static void blkfront_connect ( struct blkfront_info * info )
{
unsigned long long sectors ;
unsigned long sector_size ;
unsigned int binfo ;
int err ;
if ( ( info - > connected = = BLKIF_STATE_CONNECTED ) | |
( info - > connected = = BLKIF_STATE_SUSPENDED ) )
return ;
dev_dbg ( & info - > xbdev - > dev , " %s:%s. \n " ,
__func__ , info - > xbdev - > otherend ) ;
err = xenbus_gather ( XBT_NIL , info - > xbdev - > otherend ,
" sectors " , " %llu " , & sectors ,
" info " , " %u " , & binfo ,
" sector-size " , " %lu " , & sector_size ,
NULL ) ;
if ( err ) {
xenbus_dev_fatal ( info - > xbdev , err ,
" reading backend fields at %s " ,
info - > xbdev - > otherend ) ;
return ;
}
err = xenbus_gather ( XBT_NIL , info - > xbdev - > otherend ,
" feature-barrier " , " %lu " , & info - > feature_barrier ,
NULL ) ;
if ( err )
info - > feature_barrier = 0 ;
2008-09-18 01:30:32 +04:00
err = xlvbd_alloc_gendisk ( sectors , info , binfo , sector_size ) ;
2007-07-18 05:37:06 +04:00
if ( err ) {
xenbus_dev_fatal ( info - > xbdev , err , " xlvbd_add at %s " ,
info - > xbdev - > otherend ) ;
return ;
}
xenbus_switch_state ( info - > xbdev , XenbusStateConnected ) ;
/* Kick pending requests. */
spin_lock_irq ( & blkif_io_lock ) ;
info - > connected = BLKIF_STATE_CONNECTED ;
kick_pending_request_queues ( info ) ;
spin_unlock_irq ( & blkif_io_lock ) ;
add_disk ( info - > gd ) ;
2008-04-02 21:54:04 +04:00
info - > is_ready = 1 ;
2007-07-18 05:37:06 +04:00
}
/**
* Handle the change of state of the backend to Closing . We must delete our
* device - layer structures now , to ensure that writes are flushed through to
* the backend . Once is this done , we can switch to Closed in
* acknowledgement .
*/
static void blkfront_closing ( struct xenbus_device * dev )
{
2009-05-01 01:43:31 +04:00
struct blkfront_info * info = dev_get_drvdata ( & dev - > dev ) ;
2007-07-18 05:37:06 +04:00
unsigned long flags ;
dev_dbg ( & dev - > dev , " blkfront_closing: %s removed \n " , dev - > nodename ) ;
if ( info - > rq = = NULL )
goto out ;
spin_lock_irqsave ( & blkif_io_lock , flags ) ;
/* No more blkif_request(). */
blk_stop_queue ( info - > rq ) ;
/* No more gnttab callback work. */
gnttab_cancel_free_callback ( & info - > callback ) ;
spin_unlock_irqrestore ( & blkif_io_lock , flags ) ;
/* Flush gnttab callback work. Must be done with no locks held. */
flush_scheduled_work ( ) ;
blk_cleanup_queue ( info - > rq ) ;
info - > rq = NULL ;
2009-05-19 10:27:42 +04:00
del_gendisk ( info - > gd ) ;
2007-07-18 05:37:06 +04:00
out :
xenbus_frontend_closed ( dev ) ;
}
/**
* Callback received when the backend ' s state changes .
*/
static void backend_changed ( struct xenbus_device * dev ,
enum xenbus_state backend_state )
{
2009-05-01 01:43:31 +04:00
struct blkfront_info * info = dev_get_drvdata ( & dev - > dev ) ;
2007-07-18 05:37:06 +04:00
struct block_device * bd ;
dev_dbg ( & dev - > dev , " blkfront:backend_changed. \n " ) ;
switch ( backend_state ) {
case XenbusStateInitialising :
case XenbusStateInitWait :
case XenbusStateInitialised :
case XenbusStateUnknown :
case XenbusStateClosed :
break ;
case XenbusStateConnected :
blkfront_connect ( info ) ;
break ;
case XenbusStateClosing :
2009-05-19 10:25:48 +04:00
if ( info - > gd = = NULL ) {
xenbus_frontend_closed ( dev ) ;
break ;
}
2008-04-02 21:54:03 +04:00
bd = bdget_disk ( info - > gd , 0 ) ;
2007-07-18 05:37:06 +04:00
if ( bd = = NULL )
xenbus_dev_fatal ( dev , - ENODEV , " bdget failed " ) ;
mutex_lock ( & bd - > bd_mutex ) ;
if ( info - > users > 0 )
xenbus_dev_error ( dev , - EBUSY ,
" Device in use; refusing to close " ) ;
else
blkfront_closing ( dev ) ;
mutex_unlock ( & bd - > bd_mutex ) ;
bdput ( bd ) ;
break ;
}
}
static int blkfront_remove ( struct xenbus_device * dev )
{
2009-05-01 01:43:31 +04:00
struct blkfront_info * info = dev_get_drvdata ( & dev - > dev ) ;
2007-07-18 05:37:06 +04:00
dev_dbg ( & dev - > dev , " blkfront_remove: %s removed \n " , dev - > nodename ) ;
blkif_free ( info , 0 ) ;
kfree ( info ) ;
return 0 ;
}
2008-04-02 21:54:04 +04:00
static int blkfront_is_ready ( struct xenbus_device * dev )
{
2009-05-01 01:43:31 +04:00
struct blkfront_info * info = dev_get_drvdata ( & dev - > dev ) ;
2008-04-02 21:54:04 +04:00
return info - > is_ready ;
}
2008-03-02 18:23:47 +03:00
static int blkif_open ( struct block_device * bdev , fmode_t mode )
2007-07-18 05:37:06 +04:00
{
2008-03-02 18:23:47 +03:00
struct blkfront_info * info = bdev - > bd_disk - > private_data ;
2007-07-18 05:37:06 +04:00
info - > users + + ;
return 0 ;
}
2008-03-02 18:23:47 +03:00
static int blkif_release ( struct gendisk * disk , fmode_t mode )
2007-07-18 05:37:06 +04:00
{
2008-03-02 18:23:47 +03:00
struct blkfront_info * info = disk - > private_data ;
2007-07-18 05:37:06 +04:00
info - > users - - ;
if ( info - > users = = 0 ) {
/* Check whether we have been instructed to close. We will
have ignored this request initially , as the device was
still mounted . */
struct xenbus_device * dev = info - > xbdev ;
enum xenbus_state state = xenbus_read_driver_state ( dev - > otherend ) ;
2008-06-17 12:47:08 +04:00
if ( state = = XenbusStateClosing & & info - > is_ready )
2007-07-18 05:37:06 +04:00
blkfront_closing ( dev ) ;
}
return 0 ;
}
static struct block_device_operations xlvbd_block_fops =
{
. owner = THIS_MODULE ,
2008-03-02 18:23:47 +03:00
. open = blkif_open ,
. release = blkif_release ,
2008-02-22 00:03:45 +03:00
. getgeo = blkif_getgeo ,
2008-03-02 18:23:47 +03:00
. locked_ioctl = blkif_ioctl ,
2007-07-18 05:37:06 +04:00
} ;
static struct xenbus_device_id blkfront_ids [ ] = {
{ " vbd " } ,
{ " " }
} ;
static struct xenbus_driver blkfront = {
. name = " vbd " ,
. owner = THIS_MODULE ,
. ids = blkfront_ids ,
. probe = blkfront_probe ,
. remove = blkfront_remove ,
. resume = blkfront_resume ,
. otherend_changed = backend_changed ,
2008-04-02 21:54:04 +04:00
. is_ready = blkfront_is_ready ,
2007-07-18 05:37:06 +04:00
} ;
static int __init xlblk_init ( void )
{
2008-08-20 00:16:17 +04:00
if ( ! xen_domain ( ) )
2007-07-18 05:37:06 +04:00
return - ENODEV ;
if ( register_blkdev ( XENVBD_MAJOR , DEV_NAME ) ) {
printk ( KERN_WARNING " xen_blk: can't get major %d with name %s \n " ,
XENVBD_MAJOR , DEV_NAME ) ;
return - ENODEV ;
}
return xenbus_register_frontend ( & blkfront ) ;
}
module_init ( xlblk_init ) ;
2008-06-17 12:47:08 +04:00
static void __exit xlblk_exit ( void )
2007-07-18 05:37:06 +04:00
{
return xenbus_unregister_driver ( & blkfront ) ;
}
module_exit ( xlblk_exit ) ;
MODULE_DESCRIPTION ( " Xen virtual block device frontend " ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_ALIAS_BLOCKDEV_MAJOR ( XENVBD_MAJOR ) ;
2008-04-02 21:54:05 +04:00
MODULE_ALIAS ( " xen:vbd " ) ;
2008-04-02 21:54:06 +04:00
MODULE_ALIAS ( " xenblk " ) ;