2009-01-06 06:05:17 +03:00
/*
* Copyright ( C ) 2001 - 2002 Sistina Software ( UK ) Limited .
* Copyright ( C ) 2006 - 2008 Red Hat GmbH
*
* This file is released under the GPL .
*/
# include "dm-exception-store.h"
2015-10-09 01:05:41 +03:00
# include <linux/ctype.h>
2009-01-06 06:05:17 +03:00
# include <linux/mm.h>
# include <linux/pagemap.h>
# include <linux/vmalloc.h>
2011-05-27 23:50:58 +04:00
# include <linux/export.h>
2009-01-06 06:05:17 +03:00
# include <linux/slab.h>
# include <linux/dm-io.h>
2014-01-14 04:12:36 +04:00
# include "dm-bufio.h"
2009-01-06 06:05:17 +03:00
# define DM_MSG_PREFIX "persistent snapshot"
# define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
2014-01-14 04:13:05 +04:00
# define DM_PREFETCH_CHUNKS 12
2009-01-06 06:05:17 +03:00
/*-----------------------------------------------------------------
* Persistent snapshots , by persistent we mean that the snapshot
* will survive a reboot .
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/*
* We need to store a record of which parts of the origin have
* been copied to the snapshot device . The snapshot code
* requires that we copy exception chunks to chunk aligned areas
* of the COW store . It makes sense therefore , to store the
* metadata in chunk size blocks .
*
* There is no backward or forward compatibility implemented ,
* snapshots with different disk versions than the kernel will
* not be usable . It is expected that " lvcreate " will blank out
* the start of a fresh COW device before calling the snapshot
* constructor .
*
* The first chunk of the COW device just contains the header .
* After this there is a chunk filled with exception metadata ,
* followed by as many exception chunks as can fit in the
* metadata areas .
*
* All on disk structures are in little - endian format . The end
* of the exceptions info is indicated by an exception with a
* new_chunk of 0 , which is invalid since it would point to the
* header chunk .
*/
/*
* Magic for persistent snapshots : " SnAp " - Feeble isn ' t it .
*/
# define SNAP_MAGIC 0x70416e53
/*
* The on - disk version of the metadata .
*/
# define SNAPSHOT_DISK_VERSION 1
2009-12-11 02:52:29 +03:00
# define NUM_SNAPSHOT_HDR_CHUNKS 1
2009-01-06 06:05:17 +03:00
struct disk_header {
2011-08-02 15:32:01 +04:00
__le32 magic ;
2009-01-06 06:05:17 +03:00
/*
* Is this snapshot valid . There is no way of recovering
* an invalid snapshot .
*/
2011-08-02 15:32:01 +04:00
__le32 valid ;
2009-01-06 06:05:17 +03:00
/*
* Simple , incrementing version . no backward
* compatibility .
*/
2011-08-02 15:32:01 +04:00
__le32 version ;
2009-01-06 06:05:17 +03:00
/* In sectors */
2011-08-02 15:32:01 +04:00
__le32 chunk_size ;
} __packed ;
2009-01-06 06:05:17 +03:00
struct disk_exception {
2011-08-02 15:32:01 +04:00
__le64 old_chunk ;
__le64 new_chunk ;
} __packed ;
struct core_exception {
2009-01-06 06:05:17 +03:00
uint64_t old_chunk ;
uint64_t new_chunk ;
} ;
struct commit_callback {
void ( * callback ) ( void * , int success ) ;
void * context ;
} ;
/*
* The top level structure for a persistent exception store .
*/
struct pstore {
2009-04-02 22:55:33 +04:00
struct dm_exception_store * store ;
2009-01-06 06:05:17 +03:00
int version ;
int valid ;
uint32_t exceptions_per_area ;
/*
* Now that we have an asynchronous kcopyd there is no
* need for large chunk sizes , so it wont hurt to have a
* whole chunks worth of metadata in memory at once .
*/
void * area ;
/*
* An area of zeros used to clear the next area .
*/
void * zero_area ;
2009-09-04 23:40:39 +04:00
/*
* An area used for header . The header can be written
* concurrently with metadata ( when invalidating the snapshot ) ,
* so it needs a separate buffer .
*/
void * header_area ;
2009-01-06 06:05:17 +03:00
/*
* Used to keep track of which metadata area the data in
* ' chunk ' refers to .
*/
chunk_t current_area ;
/*
* The next free chunk for an exception .
2009-12-11 02:52:29 +03:00
*
* When creating exceptions , all the chunks here and above are
* free . It holds the next chunk to be allocated . On rare
* occasions ( e . g . after a system crash ) holes can be left in
* the exception store because chunks can be committed out of
* order .
*
* When merging exceptions , it does not necessarily mean all the
* chunks here and above are free . It holds the value it would
* have held if all chunks had been committed in order of
* allocation . Consequently the value may occasionally be
* slightly too low , but since it ' s only used for ' status ' and
* it can never reach its minimum value too early this doesn ' t
* matter .
2009-01-06 06:05:17 +03:00
*/
2009-12-11 02:52:29 +03:00
2009-01-06 06:05:17 +03:00
chunk_t next_free ;
/*
* The index of next free exception in the current
* metadata area .
*/
uint32_t current_committed ;
atomic_t pending_count ;
uint32_t callback_count ;
struct commit_callback * callbacks ;
struct dm_io_client * io_client ;
struct workqueue_struct * metadata_wq ;
} ;
static int alloc_area ( struct pstore * ps )
{
int r = - ENOMEM ;
size_t len ;
2009-04-02 22:55:33 +04:00
len = ps - > store - > chunk_size < < SECTOR_SHIFT ;
2009-01-06 06:05:17 +03:00
/*
* Allocate the chunk_size block of memory that will hold
* a single metadata area .
*/
ps - > area = vmalloc ( len ) ;
if ( ! ps - > area )
2009-09-04 23:40:39 +04:00
goto err_area ;
2009-01-06 06:05:17 +03:00
2011-08-02 15:32:02 +04:00
ps - > zero_area = vzalloc ( len ) ;
2009-09-04 23:40:39 +04:00
if ( ! ps - > zero_area )
goto err_zero_area ;
2009-01-06 06:05:17 +03:00
2009-09-04 23:40:39 +04:00
ps - > header_area = vmalloc ( len ) ;
if ( ! ps - > header_area )
goto err_header_area ;
2009-01-06 06:05:17 +03:00
return 0 ;
2009-09-04 23:40:39 +04:00
err_header_area :
vfree ( ps - > zero_area ) ;
err_zero_area :
vfree ( ps - > area ) ;
err_area :
return r ;
2009-01-06 06:05:17 +03:00
}
static void free_area ( struct pstore * ps )
{
2015-02-02 16:38:29 +03:00
vfree ( ps - > area ) ;
2009-01-06 06:05:17 +03:00
ps - > area = NULL ;
2015-02-02 16:38:29 +03:00
vfree ( ps - > zero_area ) ;
2009-01-06 06:05:17 +03:00
ps - > zero_area = NULL ;
2015-02-02 16:38:29 +03:00
vfree ( ps - > header_area ) ;
2009-09-04 23:40:39 +04:00
ps - > header_area = NULL ;
2009-01-06 06:05:17 +03:00
}
struct mdata_req {
struct dm_io_region * where ;
struct dm_io_request * io_req ;
struct work_struct work ;
int result ;
} ;
static void do_metadata ( struct work_struct * work )
{
struct mdata_req * req = container_of ( work , struct mdata_req , work ) ;
req - > result = dm_io ( req - > io_req , 1 , req - > where , NULL ) ;
}
/*
* Read or write a chunk aligned and sized block of data from a device .
*/
2016-06-05 22:32:04 +03:00
static int chunk_io ( struct pstore * ps , void * area , chunk_t chunk , int op ,
int op_flags , int metadata )
2009-01-06 06:05:17 +03:00
{
struct dm_io_region where = {
2009-12-11 02:52:12 +03:00
. bdev = dm_snap_cow ( ps - > store - > snap ) - > bdev ,
2009-04-02 22:55:33 +04:00
. sector = ps - > store - > chunk_size * chunk ,
. count = ps - > store - > chunk_size ,
2009-01-06 06:05:17 +03:00
} ;
struct dm_io_request io_req = {
2016-06-05 22:32:04 +03:00
. bi_op = op ,
. bi_op_flags = op_flags ,
2009-01-06 06:05:17 +03:00
. mem . type = DM_IO_VMA ,
2009-09-04 23:40:37 +04:00
. mem . ptr . vma = area ,
2009-01-06 06:05:17 +03:00
. client = ps - > io_client ,
. notify . fn = NULL ,
} ;
struct mdata_req req ;
if ( ! metadata )
return dm_io ( & io_req , 1 , & where , NULL ) ;
req . where = & where ;
req . io_req = & io_req ;
/*
* Issue the synchronous I / O from a different thread
* to avoid generic_make_request recursion .
*/
2010-10-27 01:22:34 +04:00
INIT_WORK_ONSTACK ( & req . work , do_metadata ) ;
2009-01-06 06:05:17 +03:00
queue_work ( ps - > metadata_wq , & req . work ) ;
2013-09-19 03:14:22 +04:00
flush_workqueue ( ps - > metadata_wq ) ;
2014-01-07 12:56:18 +04:00
destroy_work_on_stack ( & req . work ) ;
2009-01-06 06:05:17 +03:00
return req . result ;
}
/*
* Convert a metadata area index to a chunk index .
*/
static chunk_t area_location ( struct pstore * ps , chunk_t area )
{
2010-08-12 07:13:59 +04:00
return NUM_SNAPSHOT_HDR_CHUNKS + ( ( ps - > exceptions_per_area + 1 ) * area ) ;
2009-01-06 06:05:17 +03:00
}
2013-10-16 06:17:47 +04:00
static void skip_metadata ( struct pstore * ps )
{
uint32_t stride = ps - > exceptions_per_area + 1 ;
chunk_t next_free = ps - > next_free ;
if ( sector_div ( next_free , stride ) = = NUM_SNAPSHOT_HDR_CHUNKS )
ps - > next_free + + ;
}
2009-01-06 06:05:17 +03:00
/*
* Read or write a metadata area . Remembering to skip the first
* chunk which holds the header .
*/
2016-06-05 22:32:04 +03:00
static int area_io ( struct pstore * ps , int op , int op_flags )
2009-01-06 06:05:17 +03:00
{
int r ;
chunk_t chunk ;
chunk = area_location ( ps , ps - > current_area ) ;
2016-06-05 22:32:04 +03:00
r = chunk_io ( ps , ps - > area , chunk , op , op_flags , 0 ) ;
2009-01-06 06:05:17 +03:00
if ( r )
return r ;
return 0 ;
}
static void zero_memory_area ( struct pstore * ps )
{
2009-04-02 22:55:33 +04:00
memset ( ps - > area , 0 , ps - > store - > chunk_size < < SECTOR_SHIFT ) ;
2009-01-06 06:05:17 +03:00
}
static int zero_disk_area ( struct pstore * ps , chunk_t area )
{
2016-06-05 22:32:04 +03:00
return chunk_io ( ps , ps - > zero_area , area_location ( ps , area ) ,
REQ_OP_WRITE , 0 , 0 ) ;
2009-01-06 06:05:17 +03:00
}
static int read_header ( struct pstore * ps , int * new_snapshot )
{
int r ;
struct disk_header * dh ;
2009-10-17 02:18:17 +04:00
unsigned chunk_size ;
2009-01-06 06:05:17 +03:00
int chunk_size_supplied = 1 ;
2009-09-04 23:40:43 +04:00
char * chunk_err ;
2009-01-06 06:05:17 +03:00
/*
2009-10-17 02:18:17 +04:00
* Use default chunk size ( or logical_block_size , if larger )
* if none supplied
2009-01-06 06:05:17 +03:00
*/
2009-04-02 22:55:33 +04:00
if ( ! ps - > store - > chunk_size ) {
ps - > store - > chunk_size = max ( DM_CHUNK_SIZE_DEFAULT_SECTORS ,
2009-12-11 02:52:12 +03:00
bdev_logical_block_size ( dm_snap_cow ( ps - > store - > snap ) - >
bdev ) > > 9 ) ;
2009-04-02 22:55:33 +04:00
ps - > store - > chunk_mask = ps - > store - > chunk_size - 1 ;
2015-10-02 18:21:24 +03:00
ps - > store - > chunk_shift = __ffs ( ps - > store - > chunk_size ) ;
2009-01-06 06:05:17 +03:00
chunk_size_supplied = 0 ;
}
2011-05-29 16:03:09 +04:00
ps - > io_client = dm_io_client_create ( ) ;
2009-01-06 06:05:17 +03:00
if ( IS_ERR ( ps - > io_client ) )
return PTR_ERR ( ps - > io_client ) ;
r = alloc_area ( ps ) ;
if ( r )
return r ;
2016-06-05 22:32:04 +03:00
r = chunk_io ( ps , ps - > header_area , 0 , REQ_OP_READ , 0 , 1 ) ;
2009-01-06 06:05:17 +03:00
if ( r )
goto bad ;
2009-09-04 23:40:39 +04:00
dh = ps - > header_area ;
2009-01-06 06:05:17 +03:00
if ( le32_to_cpu ( dh - > magic ) = = 0 ) {
* new_snapshot = 1 ;
return 0 ;
}
if ( le32_to_cpu ( dh - > magic ) ! = SNAP_MAGIC ) {
DMWARN ( " Invalid or corrupt snapshot " ) ;
r = - ENXIO ;
goto bad ;
}
* new_snapshot = 0 ;
ps - > valid = le32_to_cpu ( dh - > valid ) ;
ps - > version = le32_to_cpu ( dh - > version ) ;
chunk_size = le32_to_cpu ( dh - > chunk_size ) ;
2009-09-04 23:40:43 +04:00
if ( ps - > store - > chunk_size = = chunk_size )
2009-01-06 06:05:17 +03:00
return 0 ;
2009-09-04 23:40:43 +04:00
if ( chunk_size_supplied )
2009-10-17 02:18:17 +04:00
DMWARN ( " chunk size %u in device metadata overrides "
" table chunk size of %u. " ,
chunk_size , ps - > store - > chunk_size ) ;
2009-01-06 06:05:17 +03:00
/* We had a bogus chunk_size. Fix stuff up. */
free_area ( ps ) ;
2009-09-04 23:40:43 +04:00
r = dm_exception_store_set_chunk_size ( ps - > store , chunk_size ,
& chunk_err ) ;
if ( r ) {
2009-10-17 02:18:17 +04:00
DMERR ( " invalid on-disk chunk size %u: %s. " ,
chunk_size , chunk_err ) ;
2009-09-04 23:40:43 +04:00
return r ;
}
2009-01-06 06:05:17 +03:00
r = alloc_area ( ps ) ;
return r ;
bad :
free_area ( ps ) ;
return r ;
}
static int write_header ( struct pstore * ps )
{
struct disk_header * dh ;
2009-09-04 23:40:39 +04:00
memset ( ps - > header_area , 0 , ps - > store - > chunk_size < < SECTOR_SHIFT ) ;
2009-01-06 06:05:17 +03:00
2009-09-04 23:40:39 +04:00
dh = ps - > header_area ;
2009-01-06 06:05:17 +03:00
dh - > magic = cpu_to_le32 ( SNAP_MAGIC ) ;
dh - > valid = cpu_to_le32 ( ps - > valid ) ;
dh - > version = cpu_to_le32 ( ps - > version ) ;
2009-04-02 22:55:33 +04:00
dh - > chunk_size = cpu_to_le32 ( ps - > store - > chunk_size ) ;
2009-01-06 06:05:17 +03:00
2016-06-05 22:32:04 +03:00
return chunk_io ( ps , ps - > header_area , 0 , REQ_OP_WRITE , 0 , 1 ) ;
2009-01-06 06:05:17 +03:00
}
/*
* Access functions for the disk exceptions , these do the endian conversions .
*/
2014-01-14 04:14:04 +04:00
static struct disk_exception * get_exception ( struct pstore * ps , void * ps_area ,
uint32_t index )
2009-01-06 06:05:17 +03:00
{
BUG_ON ( index > = ps - > exceptions_per_area ) ;
2014-01-14 04:14:04 +04:00
return ( ( struct disk_exception * ) ps_area ) + index ;
2009-01-06 06:05:17 +03:00
}
2014-01-14 04:14:04 +04:00
static void read_exception ( struct pstore * ps , void * ps_area ,
2011-08-02 15:32:01 +04:00
uint32_t index , struct core_exception * result )
2009-01-06 06:05:17 +03:00
{
2014-01-14 04:14:04 +04:00
struct disk_exception * de = get_exception ( ps , ps_area , index ) ;
2009-01-06 06:05:17 +03:00
/* copy it */
2011-08-02 15:32:01 +04:00
result - > old_chunk = le64_to_cpu ( de - > old_chunk ) ;
result - > new_chunk = le64_to_cpu ( de - > new_chunk ) ;
2009-01-06 06:05:17 +03:00
}
static void write_exception ( struct pstore * ps ,
2011-08-02 15:32:01 +04:00
uint32_t index , struct core_exception * e )
2009-01-06 06:05:17 +03:00
{
2014-01-14 04:14:04 +04:00
struct disk_exception * de = get_exception ( ps , ps - > area , index ) ;
2009-01-06 06:05:17 +03:00
/* copy it */
2011-08-02 15:32:01 +04:00
de - > old_chunk = cpu_to_le64 ( e - > old_chunk ) ;
de - > new_chunk = cpu_to_le64 ( e - > new_chunk ) ;
2009-01-06 06:05:17 +03:00
}
2009-12-11 02:52:29 +03:00
static void clear_exception ( struct pstore * ps , uint32_t index )
{
2014-01-14 04:14:04 +04:00
struct disk_exception * de = get_exception ( ps , ps - > area , index ) ;
2009-12-11 02:52:29 +03:00
/* clear it */
2011-08-02 15:32:01 +04:00
de - > old_chunk = 0 ;
de - > new_chunk = 0 ;
2009-12-11 02:52:29 +03:00
}
2009-01-06 06:05:17 +03:00
/*
* Registers the exceptions that are present in the current area .
* ' full ' is filled in to indicate if the area has been
* filled .
*/
2014-01-14 04:14:04 +04:00
static int insert_exceptions ( struct pstore * ps , void * ps_area ,
2009-01-06 06:05:19 +03:00
int ( * callback ) ( void * callback_context ,
chunk_t old , chunk_t new ) ,
void * callback_context ,
int * full )
2009-01-06 06:05:17 +03:00
{
int r ;
unsigned int i ;
2011-08-02 15:32:01 +04:00
struct core_exception e ;
2009-01-06 06:05:17 +03:00
/* presume the area is full */
* full = 1 ;
for ( i = 0 ; i < ps - > exceptions_per_area ; i + + ) {
2014-01-14 04:14:04 +04:00
read_exception ( ps , ps_area , i , & e ) ;
2009-01-06 06:05:17 +03:00
/*
* If the new_chunk is pointing at the start of
* the COW device , where the first metadata area
* is we know that we ' ve hit the end of the
* exceptions . Therefore the area is not full .
*/
2011-08-02 15:32:01 +04:00
if ( e . new_chunk = = 0LL ) {
2009-01-06 06:05:17 +03:00
ps - > current_committed = i ;
* full = 0 ;
break ;
}
/*
* Keep track of the start of the free chunks .
*/
2011-08-02 15:32:01 +04:00
if ( ps - > next_free < = e . new_chunk )
ps - > next_free = e . new_chunk + 1 ;
2009-01-06 06:05:17 +03:00
/*
* Otherwise we add the exception to the snapshot .
*/
2011-08-02 15:32:01 +04:00
r = callback ( callback_context , e . old_chunk , e . new_chunk ) ;
2009-01-06 06:05:17 +03:00
if ( r )
return r ;
}
return 0 ;
}
2009-01-06 06:05:19 +03:00
static int read_exceptions ( struct pstore * ps ,
int ( * callback ) ( void * callback_context , chunk_t old ,
chunk_t new ) ,
void * callback_context )
2009-01-06 06:05:17 +03:00
{
int r , full = 1 ;
2014-01-14 04:12:36 +04:00
struct dm_bufio_client * client ;
2014-01-14 04:13:05 +04:00
chunk_t prefetch_area = 0 ;
2014-01-14 04:12:36 +04:00
client = dm_bufio_client_create ( dm_snap_cow ( ps - > store - > snap ) - > bdev ,
ps - > store - > chunk_size < < SECTOR_SHIFT ,
1 , 0 , NULL , NULL ) ;
if ( IS_ERR ( client ) )
return PTR_ERR ( client ) ;
2009-01-06 06:05:17 +03:00
2014-01-14 04:13:05 +04:00
/*
* Setup for one current buffer + desired readahead buffers .
*/
dm_bufio_set_minimum_buffers ( client , 1 + DM_PREFETCH_CHUNKS ) ;
2009-01-06 06:05:17 +03:00
/*
* Keeping reading chunks and inserting exceptions until
* we find a partially full area .
*/
for ( ps - > current_area = 0 ; full ; ps - > current_area + + ) {
2014-01-14 04:12:36 +04:00
struct dm_buffer * bp ;
void * area ;
2014-01-14 04:13:05 +04:00
chunk_t chunk ;
if ( unlikely ( prefetch_area < ps - > current_area ) )
prefetch_area = ps - > current_area ;
if ( DM_PREFETCH_CHUNKS ) do {
chunk_t pf_chunk = area_location ( ps , prefetch_area ) ;
if ( unlikely ( pf_chunk > = dm_bufio_get_device_size ( client ) ) )
break ;
dm_bufio_prefetch ( client , pf_chunk , 1 ) ;
prefetch_area + + ;
if ( unlikely ( ! prefetch_area ) )
break ;
} while ( prefetch_area < = ps - > current_area + DM_PREFETCH_CHUNKS ) ;
chunk = area_location ( ps , ps - > current_area ) ;
2014-01-14 04:12:36 +04:00
area = dm_bufio_read ( client , chunk , & bp ) ;
2015-08-10 09:12:26 +03:00
if ( IS_ERR ( area ) ) {
2014-01-14 04:12:36 +04:00
r = PTR_ERR ( area ) ;
goto ret_destroy_bufio ;
}
2009-01-06 06:05:17 +03:00
2014-01-14 04:12:36 +04:00
r = insert_exceptions ( ps , area , callback , callback_context ,
2014-01-14 04:14:04 +04:00
& full ) ;
2014-01-14 04:12:36 +04:00
2014-03-04 02:19:22 +04:00
if ( ! full )
memcpy ( ps - > area , area , ps - > store - > chunk_size < < SECTOR_SHIFT ) ;
2014-01-14 04:12:36 +04:00
dm_bufio_release ( bp ) ;
dm_bufio_forget ( client , chunk ) ;
if ( unlikely ( r ) )
goto ret_destroy_bufio ;
2009-01-06 06:05:17 +03:00
}
ps - > current_area - - ;
2013-10-16 06:17:47 +04:00
skip_metadata ( ps ) ;
2014-01-14 04:12:36 +04:00
r = 0 ;
ret_destroy_bufio :
dm_bufio_client_destroy ( client ) ;
return r ;
2009-01-06 06:05:17 +03:00
}
static struct pstore * get_info ( struct dm_exception_store * store )
{
return ( struct pstore * ) store - > context ;
}
2009-12-11 02:52:11 +03:00
static void persistent_usage ( struct dm_exception_store * store ,
sector_t * total_sectors ,
sector_t * sectors_allocated ,
sector_t * metadata_sectors )
2009-01-06 06:05:17 +03:00
{
2009-12-11 02:52:11 +03:00
struct pstore * ps = get_info ( store ) ;
* sectors_allocated = ps - > next_free * store - > chunk_size ;
2009-12-11 02:52:12 +03:00
* total_sectors = get_dev_size ( dm_snap_cow ( store - > snap ) - > bdev ) ;
2009-12-11 02:52:11 +03:00
/*
* First chunk is the fixed header .
* Then there are ( ps - > current_area + 1 ) metadata chunks , each one
* separated from the next by ps - > exceptions_per_area data chunks .
*/
2009-12-11 02:52:29 +03:00
* metadata_sectors = ( ps - > current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS ) *
store - > chunk_size ;
2009-01-06 06:05:17 +03:00
}
2009-04-02 22:55:31 +04:00
static void persistent_dtr ( struct dm_exception_store * store )
2009-01-06 06:05:17 +03:00
{
struct pstore * ps = get_info ( store ) ;
destroy_workqueue ( ps - > metadata_wq ) ;
2009-04-02 22:55:35 +04:00
/* Created in read_header */
if ( ps - > io_client )
dm_io_client_destroy ( ps - > io_client ) ;
2009-01-06 06:05:17 +03:00
free_area ( ps ) ;
2009-04-02 22:55:35 +04:00
/* Allocated in persistent_read_metadata */
2015-02-02 16:38:29 +03:00
vfree ( ps - > callbacks ) ;
2009-04-02 22:55:35 +04:00
2009-01-06 06:05:17 +03:00
kfree ( ps ) ;
}
2009-01-06 06:05:19 +03:00
static int persistent_read_metadata ( struct dm_exception_store * store ,
int ( * callback ) ( void * callback_context ,
chunk_t old , chunk_t new ) ,
void * callback_context )
2009-01-06 06:05:17 +03:00
{
int r , uninitialized_var ( new_snapshot ) ;
struct pstore * ps = get_info ( store ) ;
/*
* Read the snapshot header .
*/
r = read_header ( ps , & new_snapshot ) ;
if ( r )
return r ;
/*
* Now we know correct chunk_size , complete the initialisation .
*/
2009-04-02 22:55:33 +04:00
ps - > exceptions_per_area = ( ps - > store - > chunk_size < < SECTOR_SHIFT ) /
sizeof ( struct disk_exception ) ;
2009-01-06 06:05:17 +03:00
ps - > callbacks = dm_vcalloc ( ps - > exceptions_per_area ,
2011-08-02 15:32:03 +04:00
sizeof ( * ps - > callbacks ) ) ;
2009-01-06 06:05:17 +03:00
if ( ! ps - > callbacks )
return - ENOMEM ;
/*
* Do we need to setup a new snapshot ?
*/
if ( new_snapshot ) {
r = write_header ( ps ) ;
if ( r ) {
DMWARN ( " write_header failed " ) ;
return r ;
}
ps - > current_area = 0 ;
zero_memory_area ( ps ) ;
r = zero_disk_area ( ps , 0 ) ;
2009-12-11 02:52:07 +03:00
if ( r )
2009-01-06 06:05:17 +03:00
DMWARN ( " zero_disk_area(0) failed " ) ;
2009-12-11 02:52:07 +03:00
return r ;
}
/*
* Sanity checks .
*/
if ( ps - > version ! = SNAPSHOT_DISK_VERSION ) {
DMWARN ( " unable to handle snapshot disk version %d " ,
ps - > version ) ;
return - EINVAL ;
}
2009-01-06 06:05:17 +03:00
2009-12-11 02:52:07 +03:00
/*
* Metadata are valid , but snapshot is invalidated
*/
if ( ! ps - > valid )
return 1 ;
2009-01-06 06:05:17 +03:00
2009-12-11 02:52:07 +03:00
/*
* Read the metadata .
*/
r = read_exceptions ( ps , callback , callback_context ) ;
2009-01-06 06:05:17 +03:00
2009-12-11 02:52:07 +03:00
return r ;
2009-01-06 06:05:17 +03:00
}
2009-01-06 06:05:19 +03:00
static int persistent_prepare_exception ( struct dm_exception_store * store ,
2009-12-11 02:52:10 +03:00
struct dm_exception * e )
2009-01-06 06:05:17 +03:00
{
struct pstore * ps = get_info ( store ) ;
2009-12-11 02:52:12 +03:00
sector_t size = get_dev_size ( dm_snap_cow ( store - > snap ) - > bdev ) ;
2009-01-06 06:05:17 +03:00
/* Is there enough room ? */
2009-04-02 22:55:32 +04:00
if ( size < ( ( ps - > next_free + 1 ) * store - > chunk_size ) )
2009-01-06 06:05:17 +03:00
return - ENOSPC ;
e - > new_chunk = ps - > next_free ;
/*
* Move onto the next free pending , making sure to take
* into account the location of the metadata chunks .
*/
2013-10-16 06:17:47 +04:00
ps - > next_free + + ;
skip_metadata ( ps ) ;
2009-01-06 06:05:17 +03:00
atomic_inc ( & ps - > pending_count ) ;
return 0 ;
}
2009-01-06 06:05:19 +03:00
static void persistent_commit_exception ( struct dm_exception_store * store ,
2016-01-09 03:07:55 +03:00
struct dm_exception * e , int valid ,
2009-01-06 06:05:19 +03:00
void ( * callback ) ( void * , int success ) ,
void * callback_context )
2009-01-06 06:05:17 +03:00
{
unsigned int i ;
struct pstore * ps = get_info ( store ) ;
2011-08-02 15:32:01 +04:00
struct core_exception ce ;
2009-01-06 06:05:17 +03:00
struct commit_callback * cb ;
2016-01-09 03:07:55 +03:00
if ( ! valid )
ps - > valid = 0 ;
2011-08-02 15:32:01 +04:00
ce . old_chunk = e - > old_chunk ;
ce . new_chunk = e - > new_chunk ;
write_exception ( ps , ps - > current_committed + + , & ce ) ;
2009-01-06 06:05:17 +03:00
/*
* Add the callback to the back of the array . This code
* is the only place where the callback array is
* manipulated , and we know that it will never be called
* multiple times concurrently .
*/
cb = ps - > callbacks + ps - > callback_count + + ;
cb - > callback = callback ;
cb - > context = callback_context ;
/*
* If there are exceptions in flight and we have not yet
* filled this metadata area there ' s nothing more to do .
*/
if ( ! atomic_dec_and_test ( & ps - > pending_count ) & &
( ps - > current_committed ! = ps - > exceptions_per_area ) )
return ;
/*
* If we completely filled the current area , then wipe the next one .
*/
if ( ( ps - > current_committed = = ps - > exceptions_per_area ) & &
2011-08-02 15:32:03 +04:00
zero_disk_area ( ps , ps - > current_area + 1 ) )
2009-01-06 06:05:17 +03:00
ps - > valid = 0 ;
/*
* Commit exceptions to disk .
*/
2017-05-31 10:44:32 +03:00
if ( ps - > valid & & area_io ( ps , REQ_OP_WRITE ,
REQ_PREFLUSH | REQ_FUA | REQ_SYNC ) )
2009-01-06 06:05:17 +03:00
ps - > valid = 0 ;
/*
* Advance to the next area if this one is full .
*/
if ( ps - > current_committed = = ps - > exceptions_per_area ) {
ps - > current_committed = 0 ;
ps - > current_area + + ;
zero_memory_area ( ps ) ;
}
for ( i = 0 ; i < ps - > callback_count ; i + + ) {
cb = ps - > callbacks + i ;
cb - > callback ( cb - > context , ps - > valid ) ;
}
ps - > callback_count = 0 ;
}
2009-12-11 02:52:29 +03:00
static int persistent_prepare_merge ( struct dm_exception_store * store ,
chunk_t * last_old_chunk ,
chunk_t * last_new_chunk )
{
struct pstore * ps = get_info ( store ) ;
2011-08-02 15:32:01 +04:00
struct core_exception ce ;
2009-12-11 02:52:29 +03:00
int nr_consecutive ;
int r ;
/*
* When current area is empty , move back to preceding area .
*/
if ( ! ps - > current_committed ) {
/*
* Have we finished ?
*/
if ( ! ps - > current_area )
return 0 ;
ps - > current_area - - ;
2016-06-05 22:32:04 +03:00
r = area_io ( ps , REQ_OP_READ , 0 ) ;
2009-12-11 02:52:29 +03:00
if ( r < 0 )
return r ;
ps - > current_committed = ps - > exceptions_per_area ;
}
2014-01-14 04:14:04 +04:00
read_exception ( ps , ps - > area , ps - > current_committed - 1 , & ce ) ;
2011-08-02 15:32:01 +04:00
* last_old_chunk = ce . old_chunk ;
* last_new_chunk = ce . new_chunk ;
2009-12-11 02:52:29 +03:00
/*
* Find number of consecutive chunks within the current area ,
* working backwards .
*/
for ( nr_consecutive = 1 ; nr_consecutive < ps - > current_committed ;
nr_consecutive + + ) {
2014-01-14 04:14:04 +04:00
read_exception ( ps , ps - > area ,
ps - > current_committed - 1 - nr_consecutive , & ce ) ;
2011-08-02 15:32:01 +04:00
if ( ce . old_chunk ! = * last_old_chunk - nr_consecutive | |
ce . new_chunk ! = * last_new_chunk - nr_consecutive )
2009-12-11 02:52:29 +03:00
break ;
}
return nr_consecutive ;
}
static int persistent_commit_merge ( struct dm_exception_store * store ,
int nr_merged )
{
int r , i ;
struct pstore * ps = get_info ( store ) ;
BUG_ON ( nr_merged > ps - > current_committed ) ;
for ( i = 0 ; i < nr_merged ; i + + )
clear_exception ( ps , ps - > current_committed - 1 - i ) ;
2016-11-01 16:40:10 +03:00
r = area_io ( ps , REQ_OP_WRITE , REQ_PREFLUSH | REQ_FUA ) ;
2009-12-11 02:52:29 +03:00
if ( r < 0 )
return r ;
ps - > current_committed - = nr_merged ;
/*
* At this stage , only persistent_usage ( ) uses ps - > next_free , so
* we make no attempt to keep ps - > next_free strictly accurate
* as exceptions may have been committed out - of - order originally .
* Once a snapshot has become merging , we set it to the value it
* would have held had all the exceptions been committed in order .
*
* ps - > current_area does not get reduced by prepare_merge ( ) until
* after commit_merge ( ) has removed the nr_merged previous exceptions .
*/
2010-08-12 07:13:59 +04:00
ps - > next_free = area_location ( ps , ps - > current_area ) +
ps - > current_committed + 1 ;
2009-12-11 02:52:29 +03:00
return 0 ;
}
2009-01-06 06:05:19 +03:00
static void persistent_drop_snapshot ( struct dm_exception_store * store )
2009-01-06 06:05:17 +03:00
{
struct pstore * ps = get_info ( store ) ;
ps - > valid = 0 ;
if ( write_header ( ps ) )
DMWARN ( " write header failed " ) ;
}
2015-10-09 01:05:41 +03:00
static int persistent_ctr ( struct dm_exception_store * store , char * options )
2009-01-06 06:05:17 +03:00
{
struct pstore * ps ;
2015-10-13 11:03:08 +03:00
int r ;
2009-01-06 06:05:17 +03:00
/* allocate the pstore */
2009-04-02 22:55:35 +04:00
ps = kzalloc ( sizeof ( * ps ) , GFP_KERNEL ) ;
2009-01-06 06:05:17 +03:00
if ( ! ps )
return - ENOMEM ;
2009-04-02 22:55:33 +04:00
ps - > store = store ;
2009-01-06 06:05:17 +03:00
ps - > valid = 1 ;
ps - > version = SNAPSHOT_DISK_VERSION ;
ps - > area = NULL ;
2009-09-04 23:40:39 +04:00
ps - > zero_area = NULL ;
ps - > header_area = NULL ;
2009-12-11 02:52:29 +03:00
ps - > next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1 ; /* header and 1st area */
2009-01-06 06:05:17 +03:00
ps - > current_committed = 0 ;
ps - > callback_count = 0 ;
atomic_set ( & ps - > pending_count , 0 ) ;
ps - > callbacks = NULL ;
2011-01-13 22:59:59 +03:00
ps - > metadata_wq = alloc_workqueue ( " ksnaphd " , WQ_MEM_RECLAIM , 0 ) ;
2009-01-06 06:05:17 +03:00
if ( ! ps - > metadata_wq ) {
DMERR ( " couldn't start header metadata update thread " ) ;
2015-10-13 11:03:08 +03:00
r = - ENOMEM ;
goto err_workqueue ;
2009-01-06 06:05:17 +03:00
}
2015-10-09 01:05:41 +03:00
if ( options ) {
char overflow = toupper ( options [ 0 ] ) ;
if ( overflow = = ' O ' )
store - > userspace_supports_overflow = true ;
else {
DMERR ( " Unsupported persistent store option: %s " , options ) ;
2015-10-13 11:03:08 +03:00
r = - EINVAL ;
goto err_options ;
2015-10-09 01:05:41 +03:00
}
}
2009-01-06 06:05:17 +03:00
store - > context = ps ;
return 0 ;
2015-10-13 11:03:08 +03:00
err_options :
destroy_workqueue ( ps - > metadata_wq ) ;
err_workqueue :
kfree ( ps ) ;
return r ;
2009-01-06 06:05:17 +03:00
}
2009-04-02 22:55:35 +04:00
static unsigned persistent_status ( struct dm_exception_store * store ,
status_type_t status , char * result ,
unsigned maxlen )
2009-04-02 22:55:31 +04:00
{
2009-04-02 22:55:35 +04:00
unsigned sz = 0 ;
switch ( status ) {
case STATUSTYPE_INFO :
break ;
case STATUSTYPE_TABLE :
2015-10-09 01:05:41 +03:00
DMEMIT ( " %s %llu " , store - > userspace_supports_overflow ? " PO " : " P " ,
( unsigned long long ) store - > chunk_size ) ;
2009-04-02 22:55:35 +04:00
}
2009-04-02 22:55:31 +04:00
return sz ;
}
static struct dm_exception_store_type _persistent_type = {
. name = " persistent " ,
. module = THIS_MODULE ,
. ctr = persistent_ctr ,
. dtr = persistent_dtr ,
. read_metadata = persistent_read_metadata ,
. prepare_exception = persistent_prepare_exception ,
. commit_exception = persistent_commit_exception ,
2009-12-11 02:52:29 +03:00
. prepare_merge = persistent_prepare_merge ,
. commit_merge = persistent_commit_merge ,
2009-04-02 22:55:31 +04:00
. drop_snapshot = persistent_drop_snapshot ,
2009-12-11 02:52:11 +03:00
. usage = persistent_usage ,
2009-04-02 22:55:31 +04:00
. status = persistent_status ,
} ;
static struct dm_exception_store_type _persistent_compat_type = {
. name = " P " ,
. module = THIS_MODULE ,
. ctr = persistent_ctr ,
. dtr = persistent_dtr ,
. read_metadata = persistent_read_metadata ,
. prepare_exception = persistent_prepare_exception ,
. commit_exception = persistent_commit_exception ,
2009-12-11 02:52:29 +03:00
. prepare_merge = persistent_prepare_merge ,
. commit_merge = persistent_commit_merge ,
2009-04-02 22:55:31 +04:00
. drop_snapshot = persistent_drop_snapshot ,
2009-12-11 02:52:11 +03:00
. usage = persistent_usage ,
2009-04-02 22:55:31 +04:00
. status = persistent_status ,
} ;
2009-01-06 06:05:17 +03:00
int dm_persistent_snapshot_init ( void )
{
2009-04-02 22:55:31 +04:00
int r ;
r = dm_exception_store_type_register ( & _persistent_type ) ;
if ( r ) {
DMERR ( " Unable to register persistent exception store type " ) ;
return r ;
}
r = dm_exception_store_type_register ( & _persistent_compat_type ) ;
if ( r ) {
DMERR ( " Unable to register old-style persistent exception "
" store type " ) ;
dm_exception_store_type_unregister ( & _persistent_type ) ;
return r ;
}
return r ;
2009-01-06 06:05:17 +03:00
}
void dm_persistent_snapshot_exit ( void )
{
2009-04-02 22:55:31 +04:00
dm_exception_store_type_unregister ( & _persistent_type ) ;
dm_exception_store_type_unregister ( & _persistent_compat_type ) ;
2009-01-06 06:05:17 +03:00
}