2005-04-16 15:20:36 -07:00
/*
* dm - snapshot . c
*
* Copyright ( C ) 2001 - 2002 Sistina Software ( UK ) Limited .
*
* This file is released under the GPL .
*/
# include <linux/blkdev.h>
# include <linux/ctype.h>
# include <linux/device-mapper.h>
# include <linux/fs.h>
# include <linux/init.h>
# include <linux/kdev_t.h>
# include <linux/list.h>
# include <linux/mempool.h>
# include <linux/module.h>
# include <linux/slab.h>
# include <linux/vmalloc.h>
# include "dm-snap.h"
# include "dm-bio-list.h"
# include "kcopyd.h"
2006-06-26 00:27:35 -07:00
# define DM_MSG_PREFIX "snapshots"
2005-04-16 15:20:36 -07:00
/*
* The percentage increment we will wake up users at
*/
# define WAKE_UP_PERCENT 5
/*
* kcopyd priority of snapshot operations
*/
# define SNAPSHOT_COPY_PRIORITY 2
/*
* Each snapshot reserves this many pages for io
*/
# define SNAPSHOT_PAGES 256
2006-12-08 02:41:13 -08:00
static struct workqueue_struct * ksnapd ;
2006-11-22 14:57:56 +00:00
static void flush_queued_bios ( struct work_struct * work ) ;
2006-10-03 01:15:30 -07:00
2005-04-16 15:20:36 -07:00
struct pending_exception {
struct exception e ;
/*
* Origin buffers waiting for this to complete are held
* in a bio list
*/
struct bio_list origin_bios ;
struct bio_list snapshot_bios ;
2006-03-27 01:17:42 -08:00
/*
* Short - term queue of pending exceptions prior to submission .
*/
struct list_head list ;
2005-04-16 15:20:36 -07:00
/*
2006-03-27 01:17:44 -08:00
* The primary pending_exception is the one that holds
2006-10-03 01:15:30 -07:00
* the ref_count and the list of origin_bios for a
2006-03-27 01:17:44 -08:00
* group of pending_exceptions . It is always last to get freed .
* These fields get set up when writing to the origin .
2005-04-16 15:20:36 -07:00
*/
2006-03-27 01:17:44 -08:00
struct pending_exception * primary_pe ;
/*
* Number of pending_exceptions processing this chunk .
* When this drops to zero we must complete the origin bios .
* If incrementing or decrementing this , hold pe - > snap - > lock for
* the sibling concerned and not pe - > primary_pe - > snap - > lock unless
* they are the same .
*/
2006-10-03 01:15:30 -07:00
atomic_t ref_count ;
2005-04-16 15:20:36 -07:00
/* Pointer back to snapshot context */
struct dm_snapshot * snap ;
/*
* 1 indicates the exception has already been sent to
* kcopyd .
*/
int started ;
} ;
/*
* Hash table mapping origin volumes to lists of snapshots and
* a lock to protect it
*/
2006-12-06 20:33:20 -08:00
static struct kmem_cache * exception_cache ;
static struct kmem_cache * pending_cache ;
2005-04-16 15:20:36 -07:00
static mempool_t * pending_pool ;
/*
* One of these per registered origin , held in the snapshot_origins hash
*/
struct origin {
/* The origin device */
struct block_device * bdev ;
struct list_head hash_list ;
/* List of snapshots for this origin */
struct list_head snapshots ;
} ;
/*
* Size of the hash table for origin volumes . If we make this
* the size of the minors list then it should be nearly perfect
*/
# define ORIGIN_HASH_SIZE 256
# define ORIGIN_MASK 0xFF
static struct list_head * _origins ;
static struct rw_semaphore _origins_lock ;
static int init_origin_hash ( void )
{
int i ;
_origins = kmalloc ( ORIGIN_HASH_SIZE * sizeof ( struct list_head ) ,
GFP_KERNEL ) ;
if ( ! _origins ) {
2006-06-26 00:27:35 -07:00
DMERR ( " unable to allocate memory " ) ;
2005-04-16 15:20:36 -07:00
return - ENOMEM ;
}
for ( i = 0 ; i < ORIGIN_HASH_SIZE ; i + + )
INIT_LIST_HEAD ( _origins + i ) ;
init_rwsem ( & _origins_lock ) ;
return 0 ;
}
static void exit_origin_hash ( void )
{
kfree ( _origins ) ;
}
static inline unsigned int origin_hash ( struct block_device * bdev )
{
return bdev - > bd_dev & ORIGIN_MASK ;
}
static struct origin * __lookup_origin ( struct block_device * origin )
{
struct list_head * ol ;
struct origin * o ;
ol = & _origins [ origin_hash ( origin ) ] ;
list_for_each_entry ( o , ol , hash_list )
if ( bdev_equal ( o - > bdev , origin ) )
return o ;
return NULL ;
}
static void __insert_origin ( struct origin * o )
{
struct list_head * sl = & _origins [ origin_hash ( o - > bdev ) ] ;
list_add_tail ( & o - > hash_list , sl ) ;
}
/*
* Make a note of the snapshot and its origin so we can look it
* up when the origin has a write on it .
*/
static int register_snapshot ( struct dm_snapshot * snap )
{
struct origin * o ;
struct block_device * bdev = snap - > origin - > bdev ;
down_write ( & _origins_lock ) ;
o = __lookup_origin ( bdev ) ;
if ( ! o ) {
/* New origin */
o = kmalloc ( sizeof ( * o ) , GFP_KERNEL ) ;
if ( ! o ) {
up_write ( & _origins_lock ) ;
return - ENOMEM ;
}
/* Initialise the struct */
INIT_LIST_HEAD ( & o - > snapshots ) ;
o - > bdev = bdev ;
__insert_origin ( o ) ;
}
list_add_tail ( & snap - > list , & o - > snapshots ) ;
up_write ( & _origins_lock ) ;
return 0 ;
}
static void unregister_snapshot ( struct dm_snapshot * s )
{
struct origin * o ;
down_write ( & _origins_lock ) ;
o = __lookup_origin ( s - > origin - > bdev ) ;
list_del ( & s - > list ) ;
if ( list_empty ( & o - > snapshots ) ) {
list_del ( & o - > hash_list ) ;
kfree ( o ) ;
}
up_write ( & _origins_lock ) ;
}
/*
* Implementation of the exception hash tables .
*/
static int init_exception_table ( struct exception_table * et , uint32_t size )
{
unsigned int i ;
et - > hash_mask = size - 1 ;
et - > table = dm_vcalloc ( size , sizeof ( struct list_head ) ) ;
if ( ! et - > table )
return - ENOMEM ;
for ( i = 0 ; i < size ; i + + )
INIT_LIST_HEAD ( et - > table + i ) ;
return 0 ;
}
2006-12-06 20:33:20 -08:00
static void exit_exception_table ( struct exception_table * et , struct kmem_cache * mem )
2005-04-16 15:20:36 -07:00
{
struct list_head * slot ;
struct exception * ex , * next ;
int i , size ;
size = et - > hash_mask + 1 ;
for ( i = 0 ; i < size ; i + + ) {
slot = et - > table + i ;
list_for_each_entry_safe ( ex , next , slot , hash_list )
kmem_cache_free ( mem , ex ) ;
}
vfree ( et - > table ) ;
}
static inline uint32_t exception_hash ( struct exception_table * et , chunk_t chunk )
{
return chunk & et - > hash_mask ;
}
static void insert_exception ( struct exception_table * eh , struct exception * e )
{
struct list_head * l = & eh - > table [ exception_hash ( eh , e - > old_chunk ) ] ;
list_add ( & e - > hash_list , l ) ;
}
static inline void remove_exception ( struct exception * e )
{
list_del ( & e - > hash_list ) ;
}
/*
* Return the exception data for a sector , or NULL if not
* remapped .
*/
static struct exception * lookup_exception ( struct exception_table * et ,
chunk_t chunk )
{
struct list_head * slot ;
struct exception * e ;
slot = & et - > table [ exception_hash ( et , chunk ) ] ;
list_for_each_entry ( e , slot , hash_list )
if ( e - > old_chunk = = chunk )
return e ;
return NULL ;
}
static inline struct exception * alloc_exception ( void )
{
struct exception * e ;
e = kmem_cache_alloc ( exception_cache , GFP_NOIO ) ;
if ( ! e )
e = kmem_cache_alloc ( exception_cache , GFP_ATOMIC ) ;
return e ;
}
static inline void free_exception ( struct exception * e )
{
kmem_cache_free ( exception_cache , e ) ;
}
static inline struct pending_exception * alloc_pending_exception ( void )
{
return mempool_alloc ( pending_pool , GFP_NOIO ) ;
}
static inline void free_pending_exception ( struct pending_exception * pe )
{
mempool_free ( pe , pending_pool ) ;
}
int dm_add_exception ( struct dm_snapshot * s , chunk_t old , chunk_t new )
{
struct exception * e ;
e = alloc_exception ( ) ;
if ( ! e )
return - ENOMEM ;
e - > old_chunk = old ;
e - > new_chunk = new ;
insert_exception ( & s - > complete , e ) ;
return 0 ;
}
/*
* Hard coded magic .
*/
static int calc_max_buckets ( void )
{
/* use a fixed size of 2MB */
unsigned long mem = 2 * 1024 * 1024 ;
mem / = sizeof ( struct list_head ) ;
return mem ;
}
/*
* Rounds a number down to a power of 2.
*/
static inline uint32_t round_down ( uint32_t n )
{
while ( n & ( n - 1 ) )
n & = ( n - 1 ) ;
return n ;
}
/*
* Allocate room for a suitable hash table .
*/
static int init_hash_tables ( struct dm_snapshot * s )
{
sector_t hash_size , cow_dev_size , origin_dev_size , max_buckets ;
/*
* Calculate based on the size of the original volume or
* the COW volume . . .
*/
cow_dev_size = get_dev_size ( s - > cow - > bdev ) ;
origin_dev_size = get_dev_size ( s - > origin - > bdev ) ;
max_buckets = calc_max_buckets ( ) ;
hash_size = min ( origin_dev_size , cow_dev_size ) > > s - > chunk_shift ;
hash_size = min ( hash_size , max_buckets ) ;
/* Round it down to a power of 2 */
hash_size = round_down ( hash_size ) ;
if ( init_exception_table ( & s - > complete , hash_size ) )
return - ENOMEM ;
/*
* Allocate hash table for in - flight exceptions
* Make this smaller than the real hash table
*/
hash_size > > = 3 ;
if ( hash_size < 64 )
hash_size = 64 ;
if ( init_exception_table ( & s - > pending , hash_size ) ) {
exit_exception_table ( & s - > complete , exception_cache ) ;
return - ENOMEM ;
}
return 0 ;
}
/*
* Round a number up to the nearest ' size ' boundary . size must
* be a power of 2.
*/
static inline ulong round_up ( ulong n , ulong size )
{
size - - ;
return ( n + size ) & ~ size ;
}
2006-10-03 01:15:25 -07:00
static int set_chunk_size ( struct dm_snapshot * s , const char * chunk_size_arg ,
char * * error )
{
unsigned long chunk_size ;
char * value ;
chunk_size = simple_strtoul ( chunk_size_arg , & value , 10 ) ;
if ( * chunk_size_arg = = ' \0 ' | | * value ! = ' \0 ' ) {
* error = " Invalid chunk size " ;
return - EINVAL ;
}
if ( ! chunk_size ) {
s - > chunk_size = s - > chunk_mask = s - > chunk_shift = 0 ;
return 0 ;
}
/*
* Chunk size must be multiple of page size . Silently
* round up if it ' s not .
*/
chunk_size = round_up ( chunk_size , PAGE_SIZE > > 9 ) ;
/* Check chunk_size is a power of 2 */
if ( chunk_size & ( chunk_size - 1 ) ) {
* error = " Chunk size is not a power of 2 " ;
return - EINVAL ;
}
/* Validate the chunk size against the device block size */
if ( chunk_size % ( bdev_hardsect_size ( s - > cow - > bdev ) > > 9 ) ) {
* error = " Chunk size is not a multiple of device blocksize " ;
return - EINVAL ;
}
s - > chunk_size = chunk_size ;
s - > chunk_mask = chunk_size - 1 ;
s - > chunk_shift = ffs ( chunk_size ) - 1 ;
return 0 ;
}
2005-04-16 15:20:36 -07:00
/*
* Construct a snapshot mapping : < origin_dev > < COW - dev > < p / n > < chunk - size >
*/
static int snapshot_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
struct dm_snapshot * s ;
int r = - EINVAL ;
char persistent ;
char * origin_path ;
char * cow_path ;
2006-10-03 01:15:25 -07:00
if ( argc ! = 4 ) {
2006-06-26 00:27:35 -07:00
ti - > error = " requires exactly 4 arguments " ;
2005-04-16 15:20:36 -07:00
r = - EINVAL ;
goto bad1 ;
}
origin_path = argv [ 0 ] ;
cow_path = argv [ 1 ] ;
persistent = toupper ( * argv [ 2 ] ) ;
if ( persistent ! = ' P ' & & persistent ! = ' N ' ) {
ti - > error = " Persistent flag is not P or N " ;
r = - EINVAL ;
goto bad1 ;
}
s = kmalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( s = = NULL ) {
ti - > error = " Cannot allocate snapshot context private "
" structure " ;
r = - ENOMEM ;
goto bad1 ;
}
r = dm_get_device ( ti , origin_path , 0 , ti - > len , FMODE_READ , & s - > origin ) ;
if ( r ) {
ti - > error = " Cannot get origin device " ;
goto bad2 ;
}
r = dm_get_device ( ti , cow_path , 0 , 0 ,
FMODE_READ | FMODE_WRITE , & s - > cow ) ;
if ( r ) {
dm_put_device ( ti , s - > origin ) ;
ti - > error = " Cannot get COW device " ;
goto bad2 ;
}
2006-10-03 01:15:25 -07:00
r = set_chunk_size ( s , argv [ 3 ] , & ti - > error ) ;
if ( r )
2005-04-16 15:20:36 -07:00
goto bad3 ;
s - > type = persistent ;
s - > valid = 1 ;
2006-02-01 03:04:50 -08:00
s - > active = 0 ;
2005-04-16 15:20:36 -07:00
s - > last_percent = 0 ;
init_rwsem ( & s - > lock ) ;
2006-10-03 01:15:30 -07:00
spin_lock_init ( & s - > pe_lock ) ;
2005-04-16 15:20:36 -07:00
s - > table = ti - > table ;
/* Allocate hash table for COW data */
if ( init_hash_tables ( s ) ) {
ti - > error = " Unable to allocate hash table space " ;
r = - ENOMEM ;
goto bad3 ;
}
s - > store . snap = s ;
if ( persistent = = ' P ' )
2006-10-03 01:15:25 -07:00
r = dm_create_persistent ( & s - > store ) ;
2005-04-16 15:20:36 -07:00
else
2006-10-03 01:15:25 -07:00
r = dm_create_transient ( & s - > store ) ;
2005-04-16 15:20:36 -07:00
if ( r ) {
ti - > error = " Couldn't create exception store " ;
r = - EINVAL ;
goto bad4 ;
}
r = kcopyd_client_create ( SNAPSHOT_PAGES , & s - > kcopyd_client ) ;
if ( r ) {
ti - > error = " Could not create kcopyd client " ;
goto bad5 ;
}
2006-02-01 03:04:50 -08:00
/* Metadata must only be loaded into one table at once */
2006-10-03 01:15:25 -07:00
r = s - > store . read_metadata ( & s - > store ) ;
if ( r ) {
ti - > error = " Failed to read snapshot metadata " ;
goto bad6 ;
}
2006-02-01 03:04:50 -08:00
2006-10-03 01:15:30 -07:00
bio_list_init ( & s - > queued_bios ) ;
2006-11-22 14:57:56 +00:00
INIT_WORK ( & s - > queued_bios_work , flush_queued_bios ) ;
2006-10-03 01:15:30 -07:00
2005-04-16 15:20:36 -07:00
/* Add snapshot to the list of snapshots for this origin */
2006-02-01 03:04:50 -08:00
/* Exceptions aren't triggered till snapshot_resume() is called */
2005-04-16 15:20:36 -07:00
if ( register_snapshot ( s ) ) {
r = - EINVAL ;
ti - > error = " Cannot register snapshot origin " ;
goto bad6 ;
}
ti - > private = s ;
2006-06-26 00:27:18 -07:00
ti - > split_io = s - > chunk_size ;
2005-04-16 15:20:36 -07:00
return 0 ;
bad6 :
kcopyd_client_destroy ( s - > kcopyd_client ) ;
bad5 :
s - > store . destroy ( & s - > store ) ;
bad4 :
exit_exception_table ( & s - > pending , pending_cache ) ;
exit_exception_table ( & s - > complete , exception_cache ) ;
bad3 :
dm_put_device ( ti , s - > cow ) ;
dm_put_device ( ti , s - > origin ) ;
bad2 :
kfree ( s ) ;
bad1 :
return r ;
}
2006-12-08 02:41:11 -08:00
static void __free_exceptions ( struct dm_snapshot * s )
{
kcopyd_client_destroy ( s - > kcopyd_client ) ;
s - > kcopyd_client = NULL ;
exit_exception_table ( & s - > pending , pending_cache ) ;
exit_exception_table ( & s - > complete , exception_cache ) ;
s - > store . destroy ( & s - > store ) ;
}
2005-04-16 15:20:36 -07:00
static void snapshot_dtr ( struct dm_target * ti )
{
struct dm_snapshot * s = ( struct dm_snapshot * ) ti - > private ;
2006-10-03 01:15:30 -07:00
flush_workqueue ( ksnapd ) ;
2006-03-27 01:17:50 -08:00
/* Prevent further origin writes from using this snapshot. */
/* After this returns there can be no new kcopyd jobs. */
2005-04-16 15:20:36 -07:00
unregister_snapshot ( s ) ;
2006-12-08 02:41:11 -08:00
__free_exceptions ( s ) ;
2005-04-16 15:20:36 -07:00
dm_put_device ( ti , s - > origin ) ;
dm_put_device ( ti , s - > cow ) ;
2006-03-27 01:17:50 -08:00
2005-04-16 15:20:36 -07:00
kfree ( s ) ;
}
/*
* Flush a list of buffers .
*/
static void flush_bios ( struct bio * bio )
{
struct bio * n ;
while ( bio ) {
n = bio - > bi_next ;
bio - > bi_next = NULL ;
generic_make_request ( bio ) ;
bio = n ;
}
}
2006-11-22 14:57:56 +00:00
static void flush_queued_bios ( struct work_struct * work )
2006-10-03 01:15:30 -07:00
{
2006-11-22 14:57:56 +00:00
struct dm_snapshot * s =
container_of ( work , struct dm_snapshot , queued_bios_work ) ;
2006-10-03 01:15:30 -07:00
struct bio * queued_bios ;
unsigned long flags ;
spin_lock_irqsave ( & s - > pe_lock , flags ) ;
queued_bios = bio_list_get ( & s - > queued_bios ) ;
spin_unlock_irqrestore ( & s - > pe_lock , flags ) ;
flush_bios ( queued_bios ) ;
}
2005-04-16 15:20:36 -07:00
/*
* Error a list of buffers .
*/
static void error_bios ( struct bio * bio )
{
struct bio * n ;
while ( bio ) {
n = bio - > bi_next ;
bio - > bi_next = NULL ;
bio_io_error ( bio , bio - > bi_size ) ;
bio = n ;
}
}
2006-10-03 01:15:31 -07:00
static void __invalidate_snapshot ( struct dm_snapshot * s , int err )
2006-03-27 01:17:45 -08:00
{
if ( ! s - > valid )
return ;
if ( err = = - EIO )
DMERR ( " Invalidating snapshot: Error reading/writing. " ) ;
else if ( err = = - ENOMEM )
DMERR ( " Invalidating snapshot: Unable to allocate exception. " ) ;
if ( s - > store . drop_snapshot )
s - > store . drop_snapshot ( & s - > store ) ;
s - > valid = 0 ;
dm_table_event ( s - > table ) ;
}
2006-10-03 01:15:30 -07:00
static void get_pending_exception ( struct pending_exception * pe )
{
atomic_inc ( & pe - > ref_count ) ;
}
static struct bio * put_pending_exception ( struct pending_exception * pe )
{
struct pending_exception * primary_pe ;
struct bio * origin_bios = NULL ;
primary_pe = pe - > primary_pe ;
/*
* If this pe is involved in a write to the origin and
* it is the last sibling to complete then release
* the bios for the original write to the origin .
*/
if ( primary_pe & &
atomic_dec_and_test ( & primary_pe - > ref_count ) )
origin_bios = bio_list_get ( & primary_pe - > origin_bios ) ;
/*
* Free the pe if it ' s not linked to an origin write or if
* it ' s not itself a primary pe .
*/
if ( ! primary_pe | | primary_pe ! = pe )
free_pending_exception ( pe ) ;
/*
* Free the primary pe if nothing references it .
*/
if ( primary_pe & & ! atomic_read ( & primary_pe - > ref_count ) )
free_pending_exception ( primary_pe ) ;
return origin_bios ;
}
2005-04-16 15:20:36 -07:00
static void pending_complete ( struct pending_exception * pe , int success )
{
struct exception * e ;
struct dm_snapshot * s = pe - > snap ;
2006-10-03 01:15:29 -07:00
struct bio * origin_bios = NULL ;
struct bio * snapshot_bios = NULL ;
int error = 0 ;
2005-04-16 15:20:36 -07:00
2006-03-27 01:17:45 -08:00
if ( ! success ) {
/* Read/write error - snapshot is unusable */
2005-04-16 15:20:36 -07:00
down_write ( & s - > lock ) ;
2006-10-03 01:15:31 -07:00
__invalidate_snapshot ( s , - EIO ) ;
2006-10-03 01:15:29 -07:00
error = 1 ;
2006-03-27 01:17:45 -08:00
goto out ;
}
e = alloc_exception ( ) ;
if ( ! e ) {
2005-04-16 15:20:36 -07:00
down_write ( & s - > lock ) ;
2006-10-03 01:15:31 -07:00
__invalidate_snapshot ( s , - ENOMEM ) ;
2006-10-03 01:15:29 -07:00
error = 1 ;
2006-03-27 01:17:45 -08:00
goto out ;
}
* e = pe - > e ;
2005-04-16 15:20:36 -07:00
2006-03-27 01:17:45 -08:00
down_write ( & s - > lock ) ;
if ( ! s - > valid ) {
free_exception ( e ) ;
2006-10-03 01:15:29 -07:00
error = 1 ;
2006-03-27 01:17:45 -08:00
goto out ;
2005-04-16 15:20:36 -07:00
}
2006-10-03 01:15:29 -07:00
/*
* Add a proper exception , and remove the
* in - flight exception from the list .
*/
2006-03-27 01:17:45 -08:00
insert_exception ( & s - > complete , e ) ;
2005-04-16 15:20:36 -07:00
out :
2006-10-03 01:15:31 -07:00
remove_exception ( & pe - > e ) ;
2006-10-03 01:15:29 -07:00
snapshot_bios = bio_list_get ( & pe - > snapshot_bios ) ;
2006-10-03 01:15:30 -07:00
origin_bios = put_pending_exception ( pe ) ;
2005-04-16 15:20:36 -07:00
2006-10-03 01:15:29 -07:00
up_write ( & s - > lock ) ;
/* Submit any pending write bios */
if ( error )
error_bios ( snapshot_bios ) ;
else
flush_bios ( snapshot_bios ) ;
flush_bios ( origin_bios ) ;
2005-04-16 15:20:36 -07:00
}
static void commit_callback ( void * context , int success )
{
struct pending_exception * pe = ( struct pending_exception * ) context ;
pending_complete ( pe , success ) ;
}
/*
* Called when the copy I / O has finished . kcopyd actually runs
* this code so don ' t block .
*/
static void copy_callback ( int read_err , unsigned int write_err , void * context )
{
struct pending_exception * pe = ( struct pending_exception * ) context ;
struct dm_snapshot * s = pe - > snap ;
if ( read_err | | write_err )
pending_complete ( pe , 0 ) ;
else
/* Update the metadata if we are persistent */
s - > store . commit_exception ( & s - > store , & pe - > e , commit_callback ,
pe ) ;
}
/*
* Dispatches the copy operation to kcopyd .
*/
2006-01-14 13:20:43 -08:00
static void start_copy ( struct pending_exception * pe )
2005-04-16 15:20:36 -07:00
{
struct dm_snapshot * s = pe - > snap ;
struct io_region src , dest ;
struct block_device * bdev = s - > origin - > bdev ;
sector_t dev_size ;
dev_size = get_dev_size ( bdev ) ;
src . bdev = bdev ;
src . sector = chunk_to_sector ( s , pe - > e . old_chunk ) ;
src . count = min ( s - > chunk_size , dev_size - src . sector ) ;
dest . bdev = s - > cow - > bdev ;
dest . sector = chunk_to_sector ( s , pe - > e . new_chunk ) ;
dest . count = src . count ;
/* Hand over to kcopyd */
kcopyd_copy ( s - > kcopyd_client ,
& src , 1 , & dest , 0 , copy_callback , pe ) ;
}
/*
* Looks to see if this snapshot already has a pending exception
* for this chunk , otherwise it allocates a new one and inserts
* it into the pending table .
*
* NOTE : a write lock must be held on snap - > lock before calling
* this .
*/
static struct pending_exception *
__find_pending_exception ( struct dm_snapshot * s , struct bio * bio )
{
struct exception * e ;
struct pending_exception * pe ;
chunk_t chunk = sector_to_chunk ( s , bio - > bi_sector ) ;
/*
* Is there a pending exception for this already ?
*/
e = lookup_exception ( & s - > pending , chunk ) ;
if ( e ) {
/* cast the exception to a pending exception */
pe = container_of ( e , struct pending_exception , e ) ;
2006-03-27 01:17:45 -08:00
goto out ;
}
2005-04-16 15:20:36 -07:00
2006-03-27 01:17:45 -08:00
/*
* Create a new pending exception , we don ' t want
* to hold the lock while we do this .
*/
up_write ( & s - > lock ) ;
pe = alloc_pending_exception ( ) ;
down_write ( & s - > lock ) ;
2005-04-16 15:20:36 -07:00
2006-03-27 01:17:45 -08:00
if ( ! s - > valid ) {
free_pending_exception ( pe ) ;
return NULL ;
}
2005-04-16 15:20:36 -07:00
2006-03-27 01:17:45 -08:00
e = lookup_exception ( & s - > pending , chunk ) ;
if ( e ) {
free_pending_exception ( pe ) ;
pe = container_of ( e , struct pending_exception , e ) ;
goto out ;
2005-04-16 15:20:36 -07:00
}
2006-03-27 01:17:45 -08:00
pe - > e . old_chunk = chunk ;
bio_list_init ( & pe - > origin_bios ) ;
bio_list_init ( & pe - > snapshot_bios ) ;
pe - > primary_pe = NULL ;
2006-10-03 01:15:30 -07:00
atomic_set ( & pe - > ref_count , 0 ) ;
2006-03-27 01:17:45 -08:00
pe - > snap = s ;
pe - > started = 0 ;
if ( s - > store . prepare_exception ( & s - > store , & pe - > e ) ) {
free_pending_exception ( pe ) ;
return NULL ;
}
2006-10-03 01:15:30 -07:00
get_pending_exception ( pe ) ;
2006-03-27 01:17:45 -08:00
insert_exception ( & s - > pending , & pe - > e ) ;
out :
2005-04-16 15:20:36 -07:00
return pe ;
}
static inline void remap_exception ( struct dm_snapshot * s , struct exception * e ,
struct bio * bio )
{
bio - > bi_bdev = s - > cow - > bdev ;
bio - > bi_sector = chunk_to_sector ( s , e - > new_chunk ) +
( bio - > bi_sector & s - > chunk_mask ) ;
}
static int snapshot_map ( struct dm_target * ti , struct bio * bio ,
union map_info * map_context )
{
struct exception * e ;
struct dm_snapshot * s = ( struct dm_snapshot * ) ti - > private ;
2006-12-08 02:41:06 -08:00
int r = DM_MAPIO_REMAPPED ;
2005-04-16 15:20:36 -07:00
chunk_t chunk ;
2006-03-27 01:17:45 -08:00
struct pending_exception * pe = NULL ;
2005-04-16 15:20:36 -07:00
chunk = sector_to_chunk ( s , bio - > bi_sector ) ;
/* Full snapshots are not usable */
2006-03-27 01:17:45 -08:00
/* To get here the table must be live so s->active is always set. */
2005-04-16 15:20:36 -07:00
if ( ! s - > valid )
2005-07-12 15:53:01 -07:00
return - EIO ;
2005-04-16 15:20:36 -07:00
2006-02-01 03:04:55 -08:00
if ( unlikely ( bio_barrier ( bio ) ) )
return - EOPNOTSUPP ;
2006-10-03 01:15:28 -07:00
/* FIXME: should only take write lock if we need
* to copy an exception */
down_write ( & s - > lock ) ;
if ( ! s - > valid ) {
r = - EIO ;
goto out_unlock ;
}
/* If the block is already remapped - use that, else remap it */
e = lookup_exception ( & s - > complete , chunk ) ;
if ( e ) {
remap_exception ( s , e , bio ) ;
goto out_unlock ;
}
2005-04-16 15:20:36 -07:00
/*
* Write to snapshot - higher level takes care of RW / RO
* flags so we should only get this if we are
* writeable .
*/
if ( bio_rw ( bio ) = = WRITE ) {
2006-03-27 01:17:45 -08:00
pe = __find_pending_exception ( s , bio ) ;
if ( ! pe ) {
2006-10-03 01:15:31 -07:00
__invalidate_snapshot ( s , - ENOMEM ) ;
2006-03-27 01:17:45 -08:00
r = - EIO ;
goto out_unlock ;
2005-04-16 15:20:36 -07:00
}
2006-03-27 01:17:45 -08:00
remap_exception ( s , & pe - > e , bio ) ;
bio_list_add ( & pe - > snapshot_bios , bio ) ;
2006-12-08 02:41:06 -08:00
r = DM_MAPIO_SUBMITTED ;
2006-10-03 01:15:28 -07:00
2006-03-27 01:17:45 -08:00
if ( ! pe - > started ) {
/* this is protected by snap->lock */
pe - > started = 1 ;
2006-10-03 01:15:28 -07:00
up_write ( & s - > lock ) ;
2006-03-27 01:17:45 -08:00
start_copy ( pe ) ;
2006-10-03 01:15:28 -07:00
goto out ;
}
} else
2005-04-16 15:20:36 -07:00
/*
* FIXME : this read path scares me because we
* always use the origin when we have a pending
* exception . However I can ' t think of a
* situation where this is wrong - ejt .
*/
2006-10-03 01:15:28 -07:00
bio - > bi_bdev = s - > origin - > bdev ;
2005-04-16 15:20:36 -07:00
2006-10-03 01:15:28 -07:00
out_unlock :
up_write ( & s - > lock ) ;
out :
2005-04-16 15:20:36 -07:00
return r ;
}
static void snapshot_resume ( struct dm_target * ti )
{
struct dm_snapshot * s = ( struct dm_snapshot * ) ti - > private ;
2006-02-01 03:04:50 -08:00
down_write ( & s - > lock ) ;
s - > active = 1 ;
up_write ( & s - > lock ) ;
2005-04-16 15:20:36 -07:00
}
static int snapshot_status ( struct dm_target * ti , status_type_t type ,
char * result , unsigned int maxlen )
{
struct dm_snapshot * snap = ( struct dm_snapshot * ) ti - > private ;
switch ( type ) {
case STATUSTYPE_INFO :
if ( ! snap - > valid )
snprintf ( result , maxlen , " Invalid " ) ;
else {
if ( snap - > store . fraction_full ) {
sector_t numerator , denominator ;
snap - > store . fraction_full ( & snap - > store ,
& numerator ,
& denominator ) ;
2006-03-27 01:17:48 -08:00
snprintf ( result , maxlen , " %llu/%llu " ,
( unsigned long long ) numerator ,
( unsigned long long ) denominator ) ;
2005-04-16 15:20:36 -07:00
}
else
snprintf ( result , maxlen , " Unknown " ) ;
}
break ;
case STATUSTYPE_TABLE :
/*
* kdevname returns a static pointer so we need
* to make private copies if the output is to
* make sense .
*/
2006-03-27 01:17:48 -08:00
snprintf ( result , maxlen , " %s %s %c %llu " ,
2005-04-16 15:20:36 -07:00
snap - > origin - > name , snap - > cow - > name ,
2006-03-27 01:17:48 -08:00
snap - > type ,
( unsigned long long ) snap - > chunk_size ) ;
2005-04-16 15:20:36 -07:00
break ;
}
return 0 ;
}
/*-----------------------------------------------------------------
* Origin methods
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int __origin_write ( struct list_head * snapshots , struct bio * bio )
{
2006-12-08 02:41:06 -08:00
int r = DM_MAPIO_REMAPPED , first = 0 ;
2005-04-16 15:20:36 -07:00
struct dm_snapshot * snap ;
struct exception * e ;
2006-03-27 01:17:44 -08:00
struct pending_exception * pe , * next_pe , * primary_pe = NULL ;
2005-04-16 15:20:36 -07:00
chunk_t chunk ;
2006-03-27 01:17:42 -08:00
LIST_HEAD ( pe_queue ) ;
2005-04-16 15:20:36 -07:00
/* Do all the snapshots on this origin */
list_for_each_entry ( snap , snapshots , list ) {
2006-03-27 01:17:45 -08:00
down_write ( & snap - > lock ) ;
2006-02-01 03:04:50 -08:00
/* Only deal with valid and active snapshots */
if ( ! snap - > valid | | ! snap - > active )
2006-03-27 01:17:45 -08:00
goto next_snapshot ;
2005-04-16 15:20:36 -07:00
2005-07-12 15:53:05 -07:00
/* Nothing to do if writing beyond end of snapshot */
if ( bio - > bi_sector > = dm_table_get_size ( snap - > table ) )
2006-03-27 01:17:45 -08:00
goto next_snapshot ;
2005-04-16 15:20:36 -07:00
/*
* Remember , different snapshots can have
* different chunk sizes .
*/
chunk = sector_to_chunk ( snap , bio - > bi_sector ) ;
/*
* Check exception table to see if block
* is already remapped in this snapshot
* and trigger an exception if not .
2006-03-27 01:17:44 -08:00
*
2006-10-03 01:15:30 -07:00
* ref_count is initialised to 1 so pending_complete ( )
2006-03-27 01:17:44 -08:00
* won ' t destroy the primary_pe while we ' re inside this loop .
2005-04-16 15:20:36 -07:00
*/
e = lookup_exception ( & snap - > complete , chunk ) ;
2006-03-27 01:17:45 -08:00
if ( e )
goto next_snapshot ;
pe = __find_pending_exception ( snap , bio ) ;
if ( ! pe ) {
2006-10-03 01:15:31 -07:00
__invalidate_snapshot ( snap , - ENOMEM ) ;
2006-03-27 01:17:45 -08:00
goto next_snapshot ;
}
if ( ! primary_pe ) {
/*
* Either every pe here has same
* primary_pe or none has one yet .
*/
if ( pe - > primary_pe )
primary_pe = pe - > primary_pe ;
else {
primary_pe = pe ;
first = 1 ;
2005-04-16 15:20:36 -07:00
}
2006-03-27 01:17:45 -08:00
bio_list_add ( & primary_pe - > origin_bios , bio ) ;
2006-12-08 02:41:06 -08:00
r = DM_MAPIO_SUBMITTED ;
2006-03-27 01:17:45 -08:00
}
if ( ! pe - > primary_pe ) {
pe - > primary_pe = primary_pe ;
2006-10-03 01:15:30 -07:00
get_pending_exception ( primary_pe ) ;
2006-03-27 01:17:45 -08:00
}
if ( ! pe - > started ) {
pe - > started = 1 ;
list_add_tail ( & pe - > list , & pe_queue ) ;
2005-04-16 15:20:36 -07:00
}
2006-03-27 01:17:45 -08:00
next_snapshot :
2005-04-16 15:20:36 -07:00
up_write ( & snap - > lock ) ;
}
2006-03-27 01:17:44 -08:00
if ( ! primary_pe )
2006-10-03 01:15:30 -07:00
return r ;
2006-03-27 01:17:44 -08:00
/*
* If this is the first time we ' re processing this chunk and
2006-10-03 01:15:30 -07:00
* ref_count is now 1 it means all the pending exceptions
2006-03-27 01:17:44 -08:00
* got completed while we were in the loop above , so it falls to
* us here to remove the primary_pe and submit any origin_bios .
*/
2006-10-03 01:15:30 -07:00
if ( first & & atomic_dec_and_test ( & primary_pe - > ref_count ) ) {
2006-03-27 01:17:44 -08:00
flush_bios ( bio_list_get ( & primary_pe - > origin_bios ) ) ;
free_pending_exception ( primary_pe ) ;
/* If we got here, pe_queue is necessarily empty. */
2006-10-03 01:15:30 -07:00
return r ;
2006-03-27 01:17:44 -08:00
}
2005-04-16 15:20:36 -07:00
/*
* Now that we have a complete pe list we can start the copying .
*/
2006-03-27 01:17:42 -08:00
list_for_each_entry_safe ( pe , next_pe , & pe_queue , list )
start_copy ( pe ) ;
2005-04-16 15:20:36 -07:00
return r ;
}
/*
* Called on a write from the origin driver .
*/
static int do_origin ( struct dm_dev * origin , struct bio * bio )
{
struct origin * o ;
2006-12-08 02:41:06 -08:00
int r = DM_MAPIO_REMAPPED ;
2005-04-16 15:20:36 -07:00
down_read ( & _origins_lock ) ;
o = __lookup_origin ( origin - > bdev ) ;
if ( o )
r = __origin_write ( & o - > snapshots , bio ) ;
up_read ( & _origins_lock ) ;
return r ;
}
/*
* Origin : maps a linear range of a device , with hooks for snapshotting .
*/
/*
* Construct an origin mapping : < dev_path >
* The context for an origin is merely a ' struct dm_dev * '
* pointing to the real device .
*/
static int origin_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
int r ;
struct dm_dev * dev ;
if ( argc ! = 1 ) {
2006-06-26 00:27:35 -07:00
ti - > error = " origin: incorrect number of arguments " ;
2005-04-16 15:20:36 -07:00
return - EINVAL ;
}
r = dm_get_device ( ti , argv [ 0 ] , 0 , ti - > len ,
dm_table_get_mode ( ti - > table ) , & dev ) ;
if ( r ) {
ti - > error = " Cannot get target device " ;
return r ;
}
ti - > private = dev ;
return 0 ;
}
static void origin_dtr ( struct dm_target * ti )
{
struct dm_dev * dev = ( struct dm_dev * ) ti - > private ;
dm_put_device ( ti , dev ) ;
}
static int origin_map ( struct dm_target * ti , struct bio * bio ,
union map_info * map_context )
{
struct dm_dev * dev = ( struct dm_dev * ) ti - > private ;
bio - > bi_bdev = dev - > bdev ;
2006-02-01 03:04:55 -08:00
if ( unlikely ( bio_barrier ( bio ) ) )
return - EOPNOTSUPP ;
2005-04-16 15:20:36 -07:00
/* Only tell snapshots if this is a write */
2006-12-08 02:41:06 -08:00
return ( bio_rw ( bio ) = = WRITE ) ? do_origin ( dev , bio ) : DM_MAPIO_REMAPPED ;
2005-04-16 15:20:36 -07:00
}
# define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
/*
* Set the target " split_io " field to the minimum of all the snapshots '
* chunk sizes .
*/
static void origin_resume ( struct dm_target * ti )
{
struct dm_dev * dev = ( struct dm_dev * ) ti - > private ;
struct dm_snapshot * snap ;
struct origin * o ;
chunk_t chunk_size = 0 ;
down_read ( & _origins_lock ) ;
o = __lookup_origin ( dev - > bdev ) ;
if ( o )
list_for_each_entry ( snap , & o - > snapshots , list )
chunk_size = min_not_zero ( chunk_size , snap - > chunk_size ) ;
up_read ( & _origins_lock ) ;
ti - > split_io = chunk_size ;
}
static int origin_status ( struct dm_target * ti , status_type_t type , char * result ,
unsigned int maxlen )
{
struct dm_dev * dev = ( struct dm_dev * ) ti - > private ;
switch ( type ) {
case STATUSTYPE_INFO :
result [ 0 ] = ' \0 ' ;
break ;
case STATUSTYPE_TABLE :
snprintf ( result , maxlen , " %s " , dev - > name ) ;
break ;
}
return 0 ;
}
static struct target_type origin_target = {
. name = " snapshot-origin " ,
2006-10-03 01:15:25 -07:00
. version = { 1 , 5 , 0 } ,
2005-04-16 15:20:36 -07:00
. module = THIS_MODULE ,
. ctr = origin_ctr ,
. dtr = origin_dtr ,
. map = origin_map ,
. resume = origin_resume ,
. status = origin_status ,
} ;
static struct target_type snapshot_target = {
. name = " snapshot " ,
2006-10-03 01:15:25 -07:00
. version = { 1 , 5 , 0 } ,
2005-04-16 15:20:36 -07:00
. module = THIS_MODULE ,
. ctr = snapshot_ctr ,
. dtr = snapshot_dtr ,
. map = snapshot_map ,
. resume = snapshot_resume ,
. status = snapshot_status ,
} ;
static int __init dm_snapshot_init ( void )
{
int r ;
r = dm_register_target ( & snapshot_target ) ;
if ( r ) {
DMERR ( " snapshot target register failed %d " , r ) ;
return r ;
}
r = dm_register_target ( & origin_target ) ;
if ( r < 0 ) {
2006-06-26 00:27:35 -07:00
DMERR ( " Origin target register failed %d " , r ) ;
2005-04-16 15:20:36 -07:00
goto bad1 ;
}
r = init_origin_hash ( ) ;
if ( r ) {
DMERR ( " init_origin_hash failed. " ) ;
goto bad2 ;
}
exception_cache = kmem_cache_create ( " dm-snapshot-ex " ,
sizeof ( struct exception ) ,
__alignof__ ( struct exception ) ,
0 , NULL , NULL ) ;
if ( ! exception_cache ) {
DMERR ( " Couldn't create exception cache. " ) ;
r = - ENOMEM ;
goto bad3 ;
}
pending_cache =
kmem_cache_create ( " dm-snapshot-in " ,
sizeof ( struct pending_exception ) ,
__alignof__ ( struct pending_exception ) ,
0 , NULL , NULL ) ;
if ( ! pending_cache ) {
DMERR ( " Couldn't create pending cache. " ) ;
r = - ENOMEM ;
goto bad4 ;
}
2006-03-26 01:37:50 -08:00
pending_pool = mempool_create_slab_pool ( 128 , pending_cache ) ;
2005-04-16 15:20:36 -07:00
if ( ! pending_pool ) {
DMERR ( " Couldn't create pending pool. " ) ;
r = - ENOMEM ;
goto bad5 ;
}
2006-10-03 01:15:30 -07:00
ksnapd = create_singlethread_workqueue ( " ksnapd " ) ;
if ( ! ksnapd ) {
DMERR ( " Failed to create ksnapd workqueue. " ) ;
r = - ENOMEM ;
goto bad6 ;
}
2005-04-16 15:20:36 -07:00
return 0 ;
2006-10-03 01:15:30 -07:00
bad6 :
mempool_destroy ( pending_pool ) ;
2005-04-16 15:20:36 -07:00
bad5 :
kmem_cache_destroy ( pending_cache ) ;
bad4 :
kmem_cache_destroy ( exception_cache ) ;
bad3 :
exit_origin_hash ( ) ;
bad2 :
dm_unregister_target ( & origin_target ) ;
bad1 :
dm_unregister_target ( & snapshot_target ) ;
return r ;
}
static void __exit dm_snapshot_exit ( void )
{
int r ;
2006-10-03 01:15:30 -07:00
destroy_workqueue ( ksnapd ) ;
2005-04-16 15:20:36 -07:00
r = dm_unregister_target ( & snapshot_target ) ;
if ( r )
DMERR ( " snapshot unregister failed %d " , r ) ;
r = dm_unregister_target ( & origin_target ) ;
if ( r )
DMERR ( " origin unregister failed %d " , r ) ;
exit_origin_hash ( ) ;
mempool_destroy ( pending_pool ) ;
kmem_cache_destroy ( pending_cache ) ;
kmem_cache_destroy ( exception_cache ) ;
}
/* Module hooks */
module_init ( dm_snapshot_init ) ;
module_exit ( dm_snapshot_exit ) ;
MODULE_DESCRIPTION ( DM_NAME " snapshot target " ) ;
MODULE_AUTHOR ( " Joe Thornber " ) ;
MODULE_LICENSE ( " GPL " ) ;