2005-04-17 02:20:36 +04:00
/*
* Copyright ( C ) 2001 , 2002 Sistina Software ( UK ) Limited .
* Copyright ( C ) 2004 Red Hat , Inc . All rights reserved .
*
* This file is released under the GPL .
*/
# include "dm.h"
# include "dm-bio-list.h"
# include <linux/init.h>
# include <linux/module.h>
# include <linux/moduleparam.h>
# include <linux/blkpg.h>
# include <linux/bio.h>
# include <linux/buffer_head.h>
# include <linux/mempool.h>
# include <linux/slab.h>
# include <linux/idr.h>
2006-03-23 22:00:26 +03:00
# include <linux/blktrace_api.h>
2005-04-17 02:20:36 +04:00
static const char * _name = DM_NAME ;
static unsigned int major = 0 ;
static unsigned int _major = 0 ;
/*
* One of these is allocated per bio .
*/
struct dm_io {
struct mapped_device * md ;
int error ;
struct bio * bio ;
atomic_t io_count ;
2006-02-01 14:04:53 +03:00
unsigned long start_time ;
2005-04-17 02:20:36 +04:00
} ;
/*
* One of these is allocated per target within a bio . Hopefully
* this will be simplified out one day .
*/
struct target_io {
struct dm_io * io ;
struct dm_target * ti ;
union map_info info ;
} ;
union map_info * dm_get_mapinfo ( struct bio * bio )
{
if ( bio & & bio - > bi_private )
return & ( ( struct target_io * ) bio - > bi_private ) - > info ;
return NULL ;
}
/*
* Bits for the md - > flags field .
*/
# define DMF_BLOCK_IO 0
# define DMF_SUSPENDED 1
2006-01-06 11:20:06 +03:00
# define DMF_FROZEN 2
2005-04-17 02:20:36 +04:00
struct mapped_device {
2005-07-29 08:16:00 +04:00
struct rw_semaphore io_lock ;
struct semaphore suspend_lock ;
2005-04-17 02:20:36 +04:00
rwlock_t map_lock ;
atomic_t holders ;
unsigned long flags ;
request_queue_t * queue ;
struct gendisk * disk ;
void * interface_ptr ;
/*
* A list of ios that arrived while we were suspended .
*/
atomic_t pending ;
wait_queue_head_t wait ;
struct bio_list deferred ;
/*
* The current mapping .
*/
struct dm_table * map ;
/*
* io objects are allocated from here .
*/
mempool_t * io_pool ;
mempool_t * tio_pool ;
/*
* Event handling .
*/
atomic_t event_nr ;
wait_queue_head_t eventq ;
/*
* freeze / thaw support require holding onto a super block
*/
struct super_block * frozen_sb ;
2006-01-06 11:20:05 +03:00
struct block_device * suspended_bdev ;
2005-04-17 02:20:36 +04:00
} ;
# define MIN_IOS 256
static kmem_cache_t * _io_cache ;
static kmem_cache_t * _tio_cache ;
static struct bio_set * dm_set ;
static int __init local_init ( void )
{
int r ;
dm_set = bioset_create ( 16 , 16 , 4 ) ;
if ( ! dm_set )
return - ENOMEM ;
/* allocate a slab for the dm_ios */
_io_cache = kmem_cache_create ( " dm_io " ,
sizeof ( struct dm_io ) , 0 , 0 , NULL , NULL ) ;
if ( ! _io_cache )
return - ENOMEM ;
/* allocate a slab for the target ios */
_tio_cache = kmem_cache_create ( " dm_tio " , sizeof ( struct target_io ) ,
0 , 0 , NULL , NULL ) ;
if ( ! _tio_cache ) {
kmem_cache_destroy ( _io_cache ) ;
return - ENOMEM ;
}
_major = major ;
r = register_blkdev ( _major , _name ) ;
if ( r < 0 ) {
kmem_cache_destroy ( _tio_cache ) ;
kmem_cache_destroy ( _io_cache ) ;
return r ;
}
if ( ! _major )
_major = r ;
return 0 ;
}
static void local_exit ( void )
{
kmem_cache_destroy ( _tio_cache ) ;
kmem_cache_destroy ( _io_cache ) ;
bioset_free ( dm_set ) ;
if ( unregister_blkdev ( _major , _name ) < 0 )
DMERR ( " devfs_unregister_blkdev failed " ) ;
_major = 0 ;
DMINFO ( " cleaned up " ) ;
}
int ( * _inits [ ] ) ( void ) __initdata = {
local_init ,
dm_target_init ,
dm_linear_init ,
dm_stripe_init ,
dm_interface_init ,
} ;
void ( * _exits [ ] ) ( void ) = {
local_exit ,
dm_target_exit ,
dm_linear_exit ,
dm_stripe_exit ,
dm_interface_exit ,
} ;
static int __init dm_init ( void )
{
const int count = ARRAY_SIZE ( _inits ) ;
int r , i ;
for ( i = 0 ; i < count ; i + + ) {
r = _inits [ i ] ( ) ;
if ( r )
goto bad ;
}
return 0 ;
bad :
while ( i - - )
_exits [ i ] ( ) ;
return r ;
}
static void __exit dm_exit ( void )
{
int i = ARRAY_SIZE ( _exits ) ;
while ( i - - )
_exits [ i ] ( ) ;
}
/*
* Block device functions
*/
static int dm_blk_open ( struct inode * inode , struct file * file )
{
struct mapped_device * md ;
md = inode - > i_bdev - > bd_disk - > private_data ;
dm_get ( md ) ;
return 0 ;
}
static int dm_blk_close ( struct inode * inode , struct file * file )
{
struct mapped_device * md ;
md = inode - > i_bdev - > bd_disk - > private_data ;
dm_put ( md ) ;
return 0 ;
}
static inline struct dm_io * alloc_io ( struct mapped_device * md )
{
return mempool_alloc ( md - > io_pool , GFP_NOIO ) ;
}
static inline void free_io ( struct mapped_device * md , struct dm_io * io )
{
mempool_free ( io , md - > io_pool ) ;
}
static inline struct target_io * alloc_tio ( struct mapped_device * md )
{
return mempool_alloc ( md - > tio_pool , GFP_NOIO ) ;
}
static inline void free_tio ( struct mapped_device * md , struct target_io * tio )
{
mempool_free ( tio , md - > tio_pool ) ;
}
2006-02-01 14:04:53 +03:00
static void start_io_acct ( struct dm_io * io )
{
struct mapped_device * md = io - > md ;
io - > start_time = jiffies ;
preempt_disable ( ) ;
disk_round_stats ( dm_disk ( md ) ) ;
preempt_enable ( ) ;
dm_disk ( md ) - > in_flight = atomic_inc_return ( & md - > pending ) ;
}
static int end_io_acct ( struct dm_io * io )
{
struct mapped_device * md = io - > md ;
struct bio * bio = io - > bio ;
unsigned long duration = jiffies - io - > start_time ;
int pending ;
int rw = bio_data_dir ( bio ) ;
preempt_disable ( ) ;
disk_round_stats ( dm_disk ( md ) ) ;
preempt_enable ( ) ;
dm_disk ( md ) - > in_flight = pending = atomic_dec_return ( & md - > pending ) ;
disk_stat_add ( dm_disk ( md ) , ticks [ rw ] , duration ) ;
return ! pending ;
}
2005-04-17 02:20:36 +04:00
/*
* Add the bio to the list of deferred io .
*/
static int queue_io ( struct mapped_device * md , struct bio * bio )
{
2005-07-29 08:16:00 +04:00
down_write ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
if ( ! test_bit ( DMF_BLOCK_IO , & md - > flags ) ) {
2005-07-29 08:16:00 +04:00
up_write ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
return 1 ;
}
bio_list_add ( & md - > deferred , bio ) ;
2005-07-29 08:16:00 +04:00
up_write ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
return 0 ; /* deferred successfully */
}
/*
* Everyone ( including functions in this file ) , should use this
* function to access the md - > map field , and make sure they call
* dm_table_put ( ) when finished .
*/
struct dm_table * dm_get_table ( struct mapped_device * md )
{
struct dm_table * t ;
read_lock ( & md - > map_lock ) ;
t = md - > map ;
if ( t )
dm_table_get ( t ) ;
read_unlock ( & md - > map_lock ) ;
return t ;
}
/*-----------------------------------------------------------------
* CRUD START :
* A more elegant soln is in the works that uses the queue
* merge fn , unfortunately there are a couple of changes to
* the block layer that I want to make for this . So in the
* interests of getting something for people to use I give
* you this clearly demarcated crap .
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into , completing the original io if necc .
*/
2006-01-15 00:20:43 +03:00
static void dec_pending ( struct dm_io * io , int error )
2005-04-17 02:20:36 +04:00
{
if ( error )
io - > error = error ;
if ( atomic_dec_and_test ( & io - > io_count ) ) {
2006-02-01 14:04:53 +03:00
if ( end_io_acct ( io ) )
2005-04-17 02:20:36 +04:00
/* nudge anyone waiting on suspend queue */
wake_up ( & io - > md - > wait ) ;
2006-03-23 22:00:26 +03:00
blk_add_trace_bio ( io - > md - > queue , io - > bio , BLK_TA_COMPLETE ) ;
2005-04-17 02:20:36 +04:00
bio_endio ( io - > bio , io - > bio - > bi_size , io - > error ) ;
free_io ( io - > md , io ) ;
}
}
static int clone_endio ( struct bio * bio , unsigned int done , int error )
{
int r = 0 ;
struct target_io * tio = bio - > bi_private ;
struct dm_io * io = tio - > io ;
dm_endio_fn endio = tio - > ti - > type - > end_io ;
if ( bio - > bi_size )
return 1 ;
if ( ! bio_flagged ( bio , BIO_UPTODATE ) & & ! error )
error = - EIO ;
if ( endio ) {
r = endio ( tio - > ti , bio , error , & tio - > info ) ;
if ( r < 0 )
error = r ;
else if ( r > 0 )
/* the target wants another shot at the io */
return 1 ;
}
free_tio ( io - > md , tio ) ;
dec_pending ( io , error ) ;
bio_put ( bio ) ;
return r ;
}
static sector_t max_io_len ( struct mapped_device * md ,
sector_t sector , struct dm_target * ti )
{
sector_t offset = sector - ti - > begin ;
sector_t len = ti - > len - offset ;
/*
* Does the target need to split even further ?
*/
if ( ti - > split_io ) {
sector_t boundary ;
boundary = ( ( offset + ti - > split_io ) & ~ ( ti - > split_io - 1 ) )
- offset ;
if ( len > boundary )
len = boundary ;
}
return len ;
}
static void __map_bio ( struct dm_target * ti , struct bio * clone ,
struct target_io * tio )
{
int r ;
2006-03-23 22:00:26 +03:00
sector_t sector ;
2005-04-17 02:20:36 +04:00
/*
* Sanity checks .
*/
BUG_ON ( ! clone - > bi_size ) ;
clone - > bi_end_io = clone_endio ;
clone - > bi_private = tio ;
/*
* Map the clone . If r = = 0 we don ' t need to do
* anything , the target has assumed ownership of
* this io .
*/
atomic_inc ( & tio - > io - > io_count ) ;
2006-03-23 22:00:26 +03:00
sector = clone - > bi_sector ;
2005-04-17 02:20:36 +04:00
r = ti - > type - > map ( ti , clone , & tio - > info ) ;
2006-03-23 22:00:26 +03:00
if ( r > 0 ) {
2005-04-17 02:20:36 +04:00
/* the bio has been remapped so dispatch it */
2006-03-23 22:00:26 +03:00
blk_add_trace_remap ( bdev_get_queue ( clone - > bi_bdev ) , clone ,
tio - > io - > bio - > bi_bdev - > bd_dev , sector ,
clone - > bi_sector ) ;
2005-04-17 02:20:36 +04:00
generic_make_request ( clone ) ;
2006-03-23 22:00:26 +03:00
}
2005-04-17 02:20:36 +04:00
else if ( r < 0 ) {
/* error the io and bail out */
struct dm_io * io = tio - > io ;
free_tio ( tio - > io - > md , tio ) ;
2005-07-13 02:53:01 +04:00
dec_pending ( io , r ) ;
2005-04-17 02:20:36 +04:00
bio_put ( clone ) ;
}
}
struct clone_info {
struct mapped_device * md ;
struct dm_table * map ;
struct bio * bio ;
struct dm_io * io ;
sector_t sector ;
sector_t sector_count ;
unsigned short idx ;
} ;
2005-09-07 02:16:42 +04:00
static void dm_bio_destructor ( struct bio * bio )
{
bio_free ( bio , dm_set ) ;
}
2005-04-17 02:20:36 +04:00
/*
* Creates a little bio that is just does part of a bvec .
*/
static struct bio * split_bvec ( struct bio * bio , sector_t sector ,
unsigned short idx , unsigned int offset ,
unsigned int len )
{
struct bio * clone ;
struct bio_vec * bv = bio - > bi_io_vec + idx ;
clone = bio_alloc_bioset ( GFP_NOIO , 1 , dm_set ) ;
2005-09-07 02:16:42 +04:00
clone - > bi_destructor = dm_bio_destructor ;
2005-04-17 02:20:36 +04:00
* clone - > bi_io_vec = * bv ;
clone - > bi_sector = sector ;
clone - > bi_bdev = bio - > bi_bdev ;
clone - > bi_rw = bio - > bi_rw ;
clone - > bi_vcnt = 1 ;
clone - > bi_size = to_bytes ( len ) ;
clone - > bi_io_vec - > bv_offset = offset ;
clone - > bi_io_vec - > bv_len = clone - > bi_size ;
return clone ;
}
/*
* Creates a bio that consists of range of complete bvecs .
*/
static struct bio * clone_bio ( struct bio * bio , sector_t sector ,
unsigned short idx , unsigned short bv_count ,
unsigned int len )
{
struct bio * clone ;
clone = bio_clone ( bio , GFP_NOIO ) ;
clone - > bi_sector = sector ;
clone - > bi_idx = idx ;
clone - > bi_vcnt = idx + bv_count ;
clone - > bi_size = to_bytes ( len ) ;
clone - > bi_flags & = ~ ( 1 < < BIO_SEG_VALID ) ;
return clone ;
}
static void __clone_and_map ( struct clone_info * ci )
{
struct bio * clone , * bio = ci - > bio ;
struct dm_target * ti = dm_table_find_target ( ci - > map , ci - > sector ) ;
sector_t len = 0 , max = max_io_len ( ci - > md , ci - > sector , ti ) ;
struct target_io * tio ;
/*
* Allocate a target io object .
*/
tio = alloc_tio ( ci - > md ) ;
tio - > io = ci - > io ;
tio - > ti = ti ;
memset ( & tio - > info , 0 , sizeof ( tio - > info ) ) ;
if ( ci - > sector_count < = max ) {
/*
* Optimise for the simple case where we can do all of
* the remaining io with a single clone .
*/
clone = clone_bio ( bio , ci - > sector , ci - > idx ,
bio - > bi_vcnt - ci - > idx , ci - > sector_count ) ;
__map_bio ( ti , clone , tio ) ;
ci - > sector_count = 0 ;
} else if ( to_sector ( bio - > bi_io_vec [ ci - > idx ] . bv_len ) < = max ) {
/*
* There are some bvecs that don ' t span targets .
* Do as many of these as possible .
*/
int i ;
sector_t remaining = max ;
sector_t bv_len ;
for ( i = ci - > idx ; remaining & & ( i < bio - > bi_vcnt ) ; i + + ) {
bv_len = to_sector ( bio - > bi_io_vec [ i ] . bv_len ) ;
if ( bv_len > remaining )
break ;
remaining - = bv_len ;
len + = bv_len ;
}
clone = clone_bio ( bio , ci - > sector , ci - > idx , i - ci - > idx , len ) ;
__map_bio ( ti , clone , tio ) ;
ci - > sector + = len ;
ci - > sector_count - = len ;
ci - > idx = i ;
} else {
/*
2006-03-22 11:07:42 +03:00
* Handle a bvec that must be split between two or more targets .
2005-04-17 02:20:36 +04:00
*/
struct bio_vec * bv = bio - > bi_io_vec + ci - > idx ;
2006-03-22 11:07:42 +03:00
sector_t remaining = to_sector ( bv - > bv_len ) ;
unsigned int offset = 0 ;
2005-04-17 02:20:36 +04:00
2006-03-22 11:07:42 +03:00
do {
if ( offset ) {
ti = dm_table_find_target ( ci - > map , ci - > sector ) ;
max = max_io_len ( ci - > md , ci - > sector , ti ) ;
2005-04-17 02:20:36 +04:00
2006-03-22 11:07:42 +03:00
tio = alloc_tio ( ci - > md ) ;
tio - > io = ci - > io ;
tio - > ti = ti ;
memset ( & tio - > info , 0 , sizeof ( tio - > info ) ) ;
}
len = min ( remaining , max ) ;
clone = split_bvec ( bio , ci - > sector , ci - > idx ,
bv - > bv_offset + offset , len ) ;
__map_bio ( ti , clone , tio ) ;
ci - > sector + = len ;
ci - > sector_count - = len ;
offset + = to_bytes ( len ) ;
} while ( remaining - = len ) ;
2005-04-17 02:20:36 +04:00
ci - > idx + + ;
}
}
/*
* Split the bio into several clones .
*/
static void __split_bio ( struct mapped_device * md , struct bio * bio )
{
struct clone_info ci ;
ci . map = dm_get_table ( md ) ;
if ( ! ci . map ) {
bio_io_error ( bio , bio - > bi_size ) ;
return ;
}
ci . md = md ;
ci . bio = bio ;
ci . io = alloc_io ( md ) ;
ci . io - > error = 0 ;
atomic_set ( & ci . io - > io_count , 1 ) ;
ci . io - > bio = bio ;
ci . io - > md = md ;
ci . sector = bio - > bi_sector ;
ci . sector_count = bio_sectors ( bio ) ;
ci . idx = bio - > bi_idx ;
2006-02-01 14:04:53 +03:00
start_io_acct ( ci . io ) ;
2005-04-17 02:20:36 +04:00
while ( ci . sector_count )
__clone_and_map ( & ci ) ;
/* drop the extra reference count */
dec_pending ( ci . io , 0 ) ;
dm_table_put ( ci . map ) ;
}
/*-----------------------------------------------------------------
* CRUD END
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/*
* The request function that just remaps the bio built up by
* dm_merge_bvec .
*/
static int dm_request ( request_queue_t * q , struct bio * bio )
{
int r ;
2006-02-01 14:04:52 +03:00
int rw = bio_data_dir ( bio ) ;
2005-04-17 02:20:36 +04:00
struct mapped_device * md = q - > queuedata ;
2005-07-29 08:16:00 +04:00
down_read ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
2006-02-01 14:04:52 +03:00
disk_stat_inc ( dm_disk ( md ) , ios [ rw ] ) ;
disk_stat_add ( dm_disk ( md ) , sectors [ rw ] , bio_sectors ( bio ) ) ;
2005-04-17 02:20:36 +04:00
/*
* If we ' re suspended we have to queue
* this io for later .
*/
while ( test_bit ( DMF_BLOCK_IO , & md - > flags ) ) {
2005-07-29 08:16:00 +04:00
up_read ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
if ( bio_rw ( bio ) = = READA ) {
bio_io_error ( bio , bio - > bi_size ) ;
return 0 ;
}
r = queue_io ( md , bio ) ;
if ( r < 0 ) {
bio_io_error ( bio , bio - > bi_size ) ;
return 0 ;
} else if ( r = = 0 )
return 0 ; /* deferred successfully */
/*
* We ' re in a while loop , because someone could suspend
* before we get to the following read lock .
*/
2005-07-29 08:16:00 +04:00
down_read ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
}
__split_bio ( md , bio ) ;
2005-07-29 08:16:00 +04:00
up_read ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
static int dm_flush_all ( request_queue_t * q , struct gendisk * disk ,
sector_t * error_sector )
{
struct mapped_device * md = q - > queuedata ;
struct dm_table * map = dm_get_table ( md ) ;
int ret = - ENXIO ;
if ( map ) {
2005-07-29 08:15:57 +04:00
ret = dm_table_flush_all ( map ) ;
2005-04-17 02:20:36 +04:00
dm_table_put ( map ) ;
}
return ret ;
}
static void dm_unplug_all ( request_queue_t * q )
{
struct mapped_device * md = q - > queuedata ;
struct dm_table * map = dm_get_table ( md ) ;
if ( map ) {
dm_table_unplug_all ( map ) ;
dm_table_put ( map ) ;
}
}
static int dm_any_congested ( void * congested_data , int bdi_bits )
{
int r ;
struct mapped_device * md = ( struct mapped_device * ) congested_data ;
struct dm_table * map = dm_get_table ( md ) ;
if ( ! map | | test_bit ( DMF_BLOCK_IO , & md - > flags ) )
r = bdi_bits ;
else
r = dm_table_any_congested ( map , bdi_bits ) ;
dm_table_put ( map ) ;
return r ;
}
/*-----------------------------------------------------------------
* An IDR is used to keep track of allocated minor numbers .
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static DECLARE_MUTEX ( _minor_lock ) ;
static DEFINE_IDR ( _minor_idr ) ;
static void free_minor ( unsigned int minor )
{
down ( & _minor_lock ) ;
idr_remove ( & _minor_idr , minor ) ;
up ( & _minor_lock ) ;
}
/*
* See if the device with a specific minor # is free .
*/
static int specific_minor ( struct mapped_device * md , unsigned int minor )
{
int r , m ;
if ( minor > = ( 1 < < MINORBITS ) )
return - EINVAL ;
down ( & _minor_lock ) ;
if ( idr_find ( & _minor_idr , minor ) ) {
r = - EBUSY ;
goto out ;
}
r = idr_pre_get ( & _minor_idr , GFP_KERNEL ) ;
if ( ! r ) {
r = - ENOMEM ;
goto out ;
}
r = idr_get_new_above ( & _minor_idr , md , minor , & m ) ;
if ( r ) {
goto out ;
}
if ( m ! = minor ) {
idr_remove ( & _minor_idr , m ) ;
r = - EBUSY ;
goto out ;
}
out :
up ( & _minor_lock ) ;
return r ;
}
static int next_free_minor ( struct mapped_device * md , unsigned int * minor )
{
int r ;
unsigned int m ;
down ( & _minor_lock ) ;
r = idr_pre_get ( & _minor_idr , GFP_KERNEL ) ;
if ( ! r ) {
r = - ENOMEM ;
goto out ;
}
r = idr_get_new ( & _minor_idr , md , & m ) ;
if ( r ) {
goto out ;
}
if ( m > = ( 1 < < MINORBITS ) ) {
idr_remove ( & _minor_idr , m ) ;
r = - ENOSPC ;
goto out ;
}
* minor = m ;
out :
up ( & _minor_lock ) ;
return r ;
}
static struct block_device_operations dm_blk_dops ;
/*
* Allocate and initialise a blank device with a given minor .
*/
static struct mapped_device * alloc_dev ( unsigned int minor , int persistent )
{
int r ;
struct mapped_device * md = kmalloc ( sizeof ( * md ) , GFP_KERNEL ) ;
if ( ! md ) {
DMWARN ( " unable to allocate device, out of memory. " ) ;
return NULL ;
}
/* get a minor number for the dev */
r = persistent ? specific_minor ( md , minor ) : next_free_minor ( md , & minor ) ;
if ( r < 0 )
goto bad1 ;
memset ( md , 0 , sizeof ( * md ) ) ;
2005-07-29 08:16:00 +04:00
init_rwsem ( & md - > io_lock ) ;
init_MUTEX ( & md - > suspend_lock ) ;
2005-04-17 02:20:36 +04:00
rwlock_init ( & md - > map_lock ) ;
atomic_set ( & md - > holders , 1 ) ;
atomic_set ( & md - > event_nr , 0 ) ;
md - > queue = blk_alloc_queue ( GFP_KERNEL ) ;
if ( ! md - > queue )
goto bad1 ;
md - > queue - > queuedata = md ;
md - > queue - > backing_dev_info . congested_fn = dm_any_congested ;
md - > queue - > backing_dev_info . congested_data = md ;
blk_queue_make_request ( md - > queue , dm_request ) ;
2006-01-10 12:48:02 +03:00
blk_queue_bounce_limit ( md - > queue , BLK_BOUNCE_ANY ) ;
2005-04-17 02:20:36 +04:00
md - > queue - > unplug_fn = dm_unplug_all ;
md - > queue - > issue_flush_fn = dm_flush_all ;
2006-03-26 13:37:50 +04:00
md - > io_pool = mempool_create_slab_pool ( MIN_IOS , _io_cache ) ;
2005-04-17 02:20:36 +04:00
if ( ! md - > io_pool )
goto bad2 ;
2006-03-26 13:37:50 +04:00
md - > tio_pool = mempool_create_slab_pool ( MIN_IOS , _tio_cache ) ;
2005-04-17 02:20:36 +04:00
if ( ! md - > tio_pool )
goto bad3 ;
md - > disk = alloc_disk ( 1 ) ;
if ( ! md - > disk )
goto bad4 ;
md - > disk - > major = _major ;
md - > disk - > first_minor = minor ;
md - > disk - > fops = & dm_blk_dops ;
md - > disk - > queue = md - > queue ;
md - > disk - > private_data = md ;
sprintf ( md - > disk - > disk_name , " dm-%d " , minor ) ;
add_disk ( md - > disk ) ;
atomic_set ( & md - > pending , 0 ) ;
init_waitqueue_head ( & md - > wait ) ;
init_waitqueue_head ( & md - > eventq ) ;
return md ;
bad4 :
mempool_destroy ( md - > tio_pool ) ;
bad3 :
mempool_destroy ( md - > io_pool ) ;
bad2 :
2006-03-12 19:02:03 +03:00
blk_cleanup_queue ( md - > queue ) ;
2005-04-17 02:20:36 +04:00
free_minor ( minor ) ;
bad1 :
kfree ( md ) ;
return NULL ;
}
static void free_dev ( struct mapped_device * md )
{
2006-02-25 00:04:25 +03:00
unsigned int minor = md - > disk - > first_minor ;
2006-02-25 00:04:24 +03:00
if ( md - > suspended_bdev ) {
thaw_bdev ( md - > suspended_bdev , NULL ) ;
bdput ( md - > suspended_bdev ) ;
}
2005-04-17 02:20:36 +04:00
mempool_destroy ( md - > tio_pool ) ;
mempool_destroy ( md - > io_pool ) ;
del_gendisk ( md - > disk ) ;
2006-02-25 00:04:25 +03:00
free_minor ( minor ) ;
2005-04-17 02:20:36 +04:00
put_disk ( md - > disk ) ;
2006-03-12 19:02:03 +03:00
blk_cleanup_queue ( md - > queue ) ;
2005-04-17 02:20:36 +04:00
kfree ( md ) ;
}
/*
* Bind a table to the device .
*/
static void event_callback ( void * context )
{
struct mapped_device * md = ( struct mapped_device * ) context ;
atomic_inc ( & md - > event_nr ) ;
wake_up ( & md - > eventq ) ;
}
2005-07-29 08:15:59 +04:00
static void __set_size ( struct mapped_device * md , sector_t size )
2005-04-17 02:20:36 +04:00
{
2005-07-29 08:15:59 +04:00
set_capacity ( md - > disk , size ) ;
2005-04-17 02:20:36 +04:00
2006-01-10 02:59:24 +03:00
mutex_lock ( & md - > suspended_bdev - > bd_inode - > i_mutex ) ;
2006-01-06 11:20:05 +03:00
i_size_write ( md - > suspended_bdev - > bd_inode , ( loff_t ) size < < SECTOR_SHIFT ) ;
2006-01-10 02:59:24 +03:00
mutex_unlock ( & md - > suspended_bdev - > bd_inode - > i_mutex ) ;
2005-04-17 02:20:36 +04:00
}
static int __bind ( struct mapped_device * md , struct dm_table * t )
{
request_queue_t * q = md - > queue ;
sector_t size ;
size = dm_table_get_size ( t ) ;
2005-07-29 08:15:59 +04:00
__set_size ( md , size ) ;
2005-04-17 02:20:36 +04:00
if ( size = = 0 )
return 0 ;
2005-07-29 08:16:00 +04:00
dm_table_get ( t ) ;
dm_table_event_callback ( t , event_callback , md ) ;
2005-04-17 02:20:36 +04:00
write_lock ( & md - > map_lock ) ;
md - > map = t ;
2005-07-29 08:16:00 +04:00
dm_table_set_restrictions ( t , q ) ;
2005-04-17 02:20:36 +04:00
write_unlock ( & md - > map_lock ) ;
return 0 ;
}
static void __unbind ( struct mapped_device * md )
{
struct dm_table * map = md - > map ;
if ( ! map )
return ;
dm_table_event_callback ( map , NULL , NULL ) ;
write_lock ( & md - > map_lock ) ;
md - > map = NULL ;
write_unlock ( & md - > map_lock ) ;
dm_table_put ( map ) ;
}
/*
* Constructor for a new device .
*/
static int create_aux ( unsigned int minor , int persistent ,
struct mapped_device * * result )
{
struct mapped_device * md ;
md = alloc_dev ( minor , persistent ) ;
if ( ! md )
return - ENXIO ;
* result = md ;
return 0 ;
}
int dm_create ( struct mapped_device * * result )
{
return create_aux ( 0 , 0 , result ) ;
}
int dm_create_with_minor ( unsigned int minor , struct mapped_device * * result )
{
return create_aux ( minor , 1 , result ) ;
}
2006-01-06 11:20:00 +03:00
static struct mapped_device * dm_find_md ( dev_t dev )
2005-04-17 02:20:36 +04:00
{
struct mapped_device * md ;
unsigned minor = MINOR ( dev ) ;
if ( MAJOR ( dev ) ! = _major | | minor > = ( 1 < < MINORBITS ) )
return NULL ;
down ( & _minor_lock ) ;
md = idr_find ( & _minor_idr , minor ) ;
2006-01-06 11:20:00 +03:00
if ( ! md | | ( dm_disk ( md ) - > first_minor ! = minor ) )
md = NULL ;
2005-04-17 02:20:36 +04:00
up ( & _minor_lock ) ;
2006-01-06 11:20:00 +03:00
return md ;
}
2006-01-06 11:20:01 +03:00
struct mapped_device * dm_get_md ( dev_t dev )
{
struct mapped_device * md = dm_find_md ( dev ) ;
if ( md )
dm_get ( md ) ;
return md ;
}
2006-01-06 11:20:00 +03:00
void * dm_get_mdptr ( dev_t dev )
{
struct mapped_device * md ;
void * mdptr = NULL ;
md = dm_find_md ( dev ) ;
if ( md )
mdptr = md - > interface_ptr ;
2005-04-17 02:20:36 +04:00
return mdptr ;
}
void dm_set_mdptr ( struct mapped_device * md , void * ptr )
{
md - > interface_ptr = ptr ;
}
void dm_get ( struct mapped_device * md )
{
atomic_inc ( & md - > holders ) ;
}
void dm_put ( struct mapped_device * md )
{
struct dm_table * map = dm_get_table ( md ) ;
if ( atomic_dec_and_test ( & md - > holders ) ) {
2005-07-29 08:15:57 +04:00
if ( ! dm_suspended ( md ) ) {
2005-04-17 02:20:36 +04:00
dm_table_presuspend_targets ( map ) ;
dm_table_postsuspend_targets ( map ) ;
}
__unbind ( md ) ;
free_dev ( md ) ;
}
dm_table_put ( map ) ;
}
/*
* Process the deferred bios
*/
static void __flush_deferred_io ( struct mapped_device * md , struct bio * c )
{
struct bio * n ;
while ( c ) {
n = c - > bi_next ;
c - > bi_next = NULL ;
__split_bio ( md , c ) ;
c = n ;
}
}
/*
* Swap in a new table ( destroying old one ) .
*/
int dm_swap_table ( struct mapped_device * md , struct dm_table * table )
{
2005-07-13 02:53:05 +04:00
int r = - EINVAL ;
2005-04-17 02:20:36 +04:00
2005-07-29 08:16:00 +04:00
down ( & md - > suspend_lock ) ;
2005-04-17 02:20:36 +04:00
/* device must be suspended */
2005-07-29 08:15:57 +04:00
if ( ! dm_suspended ( md ) )
2005-07-13 02:53:05 +04:00
goto out ;
2005-04-17 02:20:36 +04:00
__unbind ( md ) ;
r = __bind ( md , table ) ;
2005-07-13 02:53:05 +04:00
out :
2005-07-29 08:16:00 +04:00
up ( & md - > suspend_lock ) ;
2005-07-13 02:53:05 +04:00
return r ;
2005-04-17 02:20:36 +04:00
}
/*
* Functions to lock and unlock any filesystem running on the
* device .
*/
2005-07-29 08:16:00 +04:00
static int lock_fs ( struct mapped_device * md )
2005-04-17 02:20:36 +04:00
{
2006-01-06 11:20:05 +03:00
int r ;
2005-04-17 02:20:36 +04:00
WARN_ON ( md - > frozen_sb ) ;
2005-05-06 03:16:04 +04:00
2006-01-06 11:20:05 +03:00
md - > frozen_sb = freeze_bdev ( md - > suspended_bdev ) ;
2005-05-06 03:16:04 +04:00
if ( IS_ERR ( md - > frozen_sb ) ) {
2005-07-29 08:15:57 +04:00
r = PTR_ERR ( md - > frozen_sb ) ;
2006-01-06 11:20:05 +03:00
md - > frozen_sb = NULL ;
return r ;
2005-05-06 03:16:04 +04:00
}
2006-01-06 11:20:06 +03:00
set_bit ( DMF_FROZEN , & md - > flags ) ;
2005-04-17 02:20:36 +04:00
/* don't bdput right now, we don't want the bdev
2006-01-06 11:20:05 +03:00
* to go away while it is locked .
2005-04-17 02:20:36 +04:00
*/
return 0 ;
}
2005-07-29 08:16:00 +04:00
static void unlock_fs ( struct mapped_device * md )
2005-04-17 02:20:36 +04:00
{
2006-01-06 11:20:06 +03:00
if ( ! test_bit ( DMF_FROZEN , & md - > flags ) )
return ;
2006-01-06 11:20:05 +03:00
thaw_bdev ( md - > suspended_bdev , md - > frozen_sb ) ;
2005-04-17 02:20:36 +04:00
md - > frozen_sb = NULL ;
2006-01-06 11:20:06 +03:00
clear_bit ( DMF_FROZEN , & md - > flags ) ;
2005-04-17 02:20:36 +04:00
}
/*
* We need to be able to change a mapping table under a mounted
* filesystem . For example we might want to move some data in
* the background . Before the table can be swapped with
* dm_bind_table , dm_suspend must be called to flush any in
* flight bios and ensure that any further io gets deferred .
*/
2006-01-06 11:20:06 +03:00
int dm_suspend ( struct mapped_device * md , int do_lockfs )
2005-04-17 02:20:36 +04:00
{
2005-07-29 08:16:00 +04:00
struct dm_table * map = NULL ;
2005-04-17 02:20:36 +04:00
DECLARE_WAITQUEUE ( wait , current ) ;
2005-07-29 08:15:57 +04:00
int r = - EINVAL ;
2005-04-17 02:20:36 +04:00
2005-07-29 08:16:00 +04:00
down ( & md - > suspend_lock ) ;
if ( dm_suspended ( md ) )
goto out ;
2005-04-17 02:20:36 +04:00
map = dm_get_table ( md ) ;
2005-07-29 08:15:57 +04:00
/* This does not get reverted if there's an error later. */
dm_table_presuspend_targets ( map ) ;
2006-01-06 11:20:05 +03:00
md - > suspended_bdev = bdget_disk ( md - > disk , 0 ) ;
if ( ! md - > suspended_bdev ) {
DMWARN ( " bdget failed in dm_suspend " ) ;
r = - ENOMEM ;
goto out ;
}
2005-07-29 08:15:57 +04:00
/* Flush I/O to the device. */
2006-01-06 11:20:06 +03:00
if ( do_lockfs ) {
r = lock_fs ( md ) ;
if ( r )
goto out ;
}
2005-04-17 02:20:36 +04:00
/*
2005-05-06 03:16:05 +04:00
* First we set the BLOCK_IO flag so no more ios will be mapped .
2005-04-17 02:20:36 +04:00
*/
2005-07-29 08:16:00 +04:00
down_write ( & md - > io_lock ) ;
set_bit ( DMF_BLOCK_IO , & md - > flags ) ;
2005-04-17 02:20:36 +04:00
add_wait_queue ( & md - > wait , & wait ) ;
2005-07-29 08:16:00 +04:00
up_write ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
/* unplug */
2005-07-29 08:16:00 +04:00
if ( map )
2005-04-17 02:20:36 +04:00
dm_table_unplug_all ( map ) ;
/*
* Then we wait for the already mapped ios to
* complete .
*/
while ( 1 ) {
set_current_state ( TASK_INTERRUPTIBLE ) ;
if ( ! atomic_read ( & md - > pending ) | | signal_pending ( current ) )
break ;
io_schedule ( ) ;
}
set_current_state ( TASK_RUNNING ) ;
2005-07-29 08:16:00 +04:00
down_write ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
remove_wait_queue ( & md - > wait , & wait ) ;
/* were we interrupted ? */
2005-07-29 08:15:57 +04:00
r = - EINTR ;
2005-07-29 08:16:00 +04:00
if ( atomic_read ( & md - > pending ) ) {
up_write ( & md - > io_lock ) ;
unlock_fs ( md ) ;
clear_bit ( DMF_BLOCK_IO , & md - > flags ) ;
goto out ;
}
up_write ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
2005-07-29 08:15:57 +04:00
dm_table_postsuspend_targets ( map ) ;
2005-04-17 02:20:36 +04:00
2005-07-29 08:16:00 +04:00
set_bit ( DMF_SUSPENDED , & md - > flags ) ;
2005-05-06 03:16:06 +04:00
2005-07-29 08:16:00 +04:00
r = 0 ;
2005-05-06 03:16:06 +04:00
2005-07-29 08:16:00 +04:00
out :
2006-01-06 11:20:05 +03:00
if ( r & & md - > suspended_bdev ) {
bdput ( md - > suspended_bdev ) ;
md - > suspended_bdev = NULL ;
}
2005-07-29 08:16:00 +04:00
dm_table_put ( map ) ;
up ( & md - > suspend_lock ) ;
2005-07-29 08:15:57 +04:00
return r ;
2005-04-17 02:20:36 +04:00
}
int dm_resume ( struct mapped_device * md )
{
2005-07-29 08:15:57 +04:00
int r = - EINVAL ;
2005-04-17 02:20:36 +04:00
struct bio * def ;
2005-07-29 08:15:57 +04:00
struct dm_table * map = NULL ;
2005-04-17 02:20:36 +04:00
2005-07-29 08:16:00 +04:00
down ( & md - > suspend_lock ) ;
if ( ! dm_suspended ( md ) )
2005-07-29 08:15:57 +04:00
goto out ;
map = dm_get_table ( md ) ;
2005-07-29 08:16:00 +04:00
if ( ! map | | ! dm_table_get_size ( map ) )
2005-07-29 08:15:57 +04:00
goto out ;
2005-04-17 02:20:36 +04:00
dm_table_resume_targets ( map ) ;
2005-07-29 08:16:00 +04:00
down_write ( & md - > io_lock ) ;
2005-04-17 02:20:36 +04:00
clear_bit ( DMF_BLOCK_IO , & md - > flags ) ;
def = bio_list_get ( & md - > deferred ) ;
__flush_deferred_io ( md , def ) ;
2005-07-29 08:16:00 +04:00
up_write ( & md - > io_lock ) ;
unlock_fs ( md ) ;
2006-01-06 11:20:05 +03:00
bdput ( md - > suspended_bdev ) ;
md - > suspended_bdev = NULL ;
2005-07-29 08:16:00 +04:00
clear_bit ( DMF_SUSPENDED , & md - > flags ) ;
2005-04-17 02:20:36 +04:00
dm_table_unplug_all ( map ) ;
2005-07-29 08:15:57 +04:00
r = 0 ;
2005-07-29 08:16:00 +04:00
2005-07-29 08:15:57 +04:00
out :
dm_table_put ( map ) ;
2005-07-29 08:16:00 +04:00
up ( & md - > suspend_lock ) ;
2005-07-29 08:15:57 +04:00
return r ;
2005-04-17 02:20:36 +04:00
}
/*-----------------------------------------------------------------
* Event notification .
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
uint32_t dm_get_event_nr ( struct mapped_device * md )
{
return atomic_read ( & md - > event_nr ) ;
}
int dm_wait_event ( struct mapped_device * md , int event_nr )
{
return wait_event_interruptible ( md - > eventq ,
( event_nr ! = atomic_read ( & md - > event_nr ) ) ) ;
}
/*
* The gendisk is only valid as long as you have a reference
* count on ' md ' .
*/
struct gendisk * dm_disk ( struct mapped_device * md )
{
return md - > disk ;
}
int dm_suspended ( struct mapped_device * md )
{
return test_bit ( DMF_SUSPENDED , & md - > flags ) ;
}
static struct block_device_operations dm_blk_dops = {
. open = dm_blk_open ,
. release = dm_blk_close ,
. owner = THIS_MODULE
} ;
EXPORT_SYMBOL ( dm_get_mapinfo ) ;
/*
* module hooks
*/
module_init ( dm_init ) ;
module_exit ( dm_exit ) ;
module_param ( major , uint , 0 ) ;
MODULE_PARM_DESC ( major , " The major number of the device mapper " ) ;
MODULE_DESCRIPTION ( DM_NAME " driver " ) ;
MODULE_AUTHOR ( " Joe Thornber <dm-devel@redhat.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;