2011-11-01 00:21:18 +04:00
/*
2012-07-27 18:08:16 +04:00
* Copyright ( C ) 2011 - 2012 Red Hat UK .
2011-11-01 00:21:18 +04:00
*
* This file is released under the GPL .
*/
# include "dm-thin-metadata.h"
2012-10-13 00:02:13 +04:00
# include "dm-bio-prison.h"
2012-07-27 18:08:16 +04:00
# include "dm.h"
2011-11-01 00:21:18 +04:00
# include <linux/device-mapper.h>
# include <linux/dm-io.h>
# include <linux/dm-kcopyd.h>
2014-05-23 00:42:37 +04:00
# include <linux/jiffies.h>
2014-10-10 02:43:25 +04:00
# include <linux/log2.h>
2011-11-01 00:21:18 +04:00
# include <linux/list.h>
2014-03-21 05:17:14 +04:00
# include <linux/rculist.h>
2011-11-01 00:21:18 +04:00
# include <linux/init.h>
# include <linux/module.h>
# include <linux/slab.h>
2015-07-03 12:22:42 +03:00
# include <linux/vmalloc.h>
2014-10-10 19:42:10 +04:00
# include <linux/sort.h>
2014-03-22 02:33:41 +04:00
# include <linux/rbtree.h>
2011-11-01 00:21:18 +04:00
# define DM_MSG_PREFIX "thin"
/*
* Tunable constants
*/
2012-07-27 18:07:57 +04:00
# define ENDIO_HOOK_POOL_SIZE 1024
2011-11-01 00:21:18 +04:00
# define MAPPING_POOL_SIZE 1024
2012-03-28 21:41:27 +04:00
# define COMMIT_PERIOD HZ
2014-05-20 21:38:33 +04:00
# define NO_SPACE_TIMEOUT_SECS 60
static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS ;
2011-11-01 00:21:18 +04:00
2013-03-02 02:45:49 +04:00
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM ( snapshot_copy_throttle ,
" A percentage of time allocated for copy on write " ) ;
2011-11-01 00:21:18 +04:00
/*
* The block size of the device holding pool data must be
* between 64 KB and 1 GB .
*/
# define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
# define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
/*
* Device id is restricted to 24 bits .
*/
# define MAX_DEV_ID ((1 << 24) - 1)
/*
* How do we handle breaking sharing of data blocks ?
* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
*
* We use a standard copy - on - write btree to store the mappings for the
* devices ( note I ' m talking about copy - on - write of the metadata here , not
* the data ) . When you take an internal snapshot you clone the root node
* of the origin btree . After this there is no concept of an origin or a
* snapshot . They are just two device trees that happen to point to the
* same data blocks .
*
* When we get a write in we decide if it ' s to a shared data block using
* some timestamp magic . If it is , we have to break sharing .
*
* Let ' s say we write to a shared block in what was the origin . The
* steps are :
*
* i ) plug io further to this physical block . ( see bio_prison code ) .
*
* ii ) quiesce any read io to that shared data block . Obviously
2012-10-13 00:02:10 +04:00
* including all devices that share this block . ( see dm_deferred_set code )
2011-11-01 00:21:18 +04:00
*
* iii ) copy the data block to a newly allocate block . This step can be
* missed out if the io covers the block . ( schedule_copy ) .
*
* iv ) insert the new mapping into the origin ' s btree
2012-03-28 21:41:24 +04:00
* ( process_prepared_mapping ) . This act of inserting breaks some
2011-11-01 00:21:18 +04:00
* sharing of btree nodes between the two devices . Breaking sharing only
* effects the btree of that specific device . Btrees for the other
* devices that share the block never change . The btree for the origin
* device as it was after the last commit is untouched , ie . we ' re using
* persistent data structures in the functional programming sense .
*
* v ) unplug io to this physical block , including the io that triggered
* the breaking of sharing .
*
* Steps ( ii ) and ( iii ) occur in parallel .
*
* The metadata _doesn ' t_ need to be committed before the io continues . We
* get away with this because the io is always written to a _new_ block .
* If there ' s a crash , then :
*
* - The origin mapping will point to the old origin block ( the shared
* one ) . This will contain the data as it was before the io that triggered
* the breaking of sharing came in .
*
* - The snap mapping still points to the old block . As it would after
* the commit .
*
* The downside of this scheme is the timestamp magic isn ' t perfect , and
* will continue to think that data block in the snapshot device is shared
* even after the write to the origin has broken sharing . I suspect data
* blocks will typically be shared by many different devices , so we ' re
* breaking sharing n + 1 times , rather than n , where n is the number of
* devices that reference this data block . At the moment I think the
* benefits far , far outweigh the disadvantages .
*/
/*----------------------------------------------------------------*/
/*
* Key building .
*/
2015-04-16 14:58:35 +03:00
enum lock_space {
VIRTUAL ,
PHYSICAL
} ;
static void build_key ( struct dm_thin_device * td , enum lock_space ls ,
dm_block_t b , dm_block_t e , struct dm_cell_key * key )
2011-11-01 00:21:18 +04:00
{
2015-04-16 14:58:35 +03:00
key - > virtual = ( ls = = VIRTUAL ) ;
2011-11-01 00:21:18 +04:00
key - > dev = dm_thin_dev_id ( td ) ;
2014-09-17 13:17:39 +04:00
key - > block_begin = b ;
2015-04-16 14:58:35 +03:00
key - > block_end = e ;
}
static void build_data_key ( struct dm_thin_device * td , dm_block_t b ,
struct dm_cell_key * key )
{
build_key ( td , PHYSICAL , b , b + 1llu , key ) ;
2011-11-01 00:21:18 +04:00
}
static void build_virtual_key ( struct dm_thin_device * td , dm_block_t b ,
2012-10-13 00:02:10 +04:00
struct dm_cell_key * key )
2011-11-01 00:21:18 +04:00
{
2015-04-16 14:58:35 +03:00
build_key ( td , VIRTUAL , b , b + 1llu , key ) ;
2011-11-01 00:21:18 +04:00
}
/*----------------------------------------------------------------*/
2014-10-06 18:45:59 +04:00
# define THROTTLE_THRESHOLD (1 * HZ)
struct throttle {
struct rw_semaphore lock ;
unsigned long threshold ;
bool throttle_applied ;
} ;
static void throttle_init ( struct throttle * t )
{
init_rwsem ( & t - > lock ) ;
t - > throttle_applied = false ;
}
static void throttle_work_start ( struct throttle * t )
{
t - > threshold = jiffies + THROTTLE_THRESHOLD ;
}
static void throttle_work_update ( struct throttle * t )
{
if ( ! t - > throttle_applied & & jiffies > t - > threshold ) {
down_write ( & t - > lock ) ;
t - > throttle_applied = true ;
}
}
static void throttle_work_complete ( struct throttle * t )
{
if ( t - > throttle_applied ) {
t - > throttle_applied = false ;
up_write ( & t - > lock ) ;
}
}
static void throttle_lock ( struct throttle * t )
{
down_read ( & t - > lock ) ;
}
static void throttle_unlock ( struct throttle * t )
{
up_read ( & t - > lock ) ;
}
/*----------------------------------------------------------------*/
2011-11-01 00:21:18 +04:00
/*
* A pool device ties together a metadata device and a data device . It
* also provides the interface for creating and destroying internal
* devices .
*/
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping ;
2012-03-28 21:41:29 +04:00
2012-07-27 18:08:16 +04:00
/*
2014-03-03 20:03:26 +04:00
* The pool runs in 4 modes . Ordered in degraded order for comparisons .
2012-07-27 18:08:16 +04:00
*/
enum pool_mode {
PM_WRITE , /* metadata may be changed */
2014-03-03 20:03:26 +04:00
PM_OUT_OF_DATA_SPACE , /* metadata may be changed, though data may not be allocated */
2012-07-27 18:08:16 +04:00
PM_READ_ONLY , /* metadata may not be changed */
PM_FAIL , /* all I/O fails */
} ;
2012-03-28 21:41:29 +04:00
struct pool_features {
2012-07-27 18:08:16 +04:00
enum pool_mode mode ;
2012-09-27 02:45:46 +04:00
bool zero_new_blocks : 1 ;
bool discard_enabled : 1 ;
bool discard_passdown : 1 ;
2013-12-07 01:21:43 +04:00
bool error_if_no_space : 1 ;
2012-03-28 21:41:29 +04:00
} ;
2012-07-27 18:08:16 +04:00
struct thin_c ;
typedef void ( * process_bio_fn ) ( struct thin_c * tc , struct bio * bio ) ;
2014-10-10 16:43:14 +04:00
typedef void ( * process_cell_fn ) ( struct thin_c * tc , struct dm_bio_prison_cell * cell ) ;
2012-07-27 18:08:16 +04:00
typedef void ( * process_mapping_fn ) ( struct dm_thin_new_mapping * m ) ;
2014-10-10 19:42:10 +04:00
# define CELL_SORT_ARRAY_SIZE 8192
2011-11-01 00:21:18 +04:00
struct pool {
struct list_head list ;
struct dm_target * ti ; /* Only set if a pool target is bound */
struct mapped_device * pool_md ;
struct block_device * md_dev ;
struct dm_pool_metadata * pmd ;
dm_block_t low_water_blocks ;
2012-07-27 18:08:02 +04:00
uint32_t sectors_per_block ;
2012-07-27 18:08:03 +04:00
int sectors_per_block_shift ;
2011-11-01 00:21:18 +04:00
2012-03-28 21:41:29 +04:00
struct pool_features pf ;
2013-12-05 05:16:12 +04:00
bool low_water_triggered : 1 ; /* A dm event has been sent */
2014-11-07 23:09:46 +03:00
bool suspended : 1 ;
2016-03-10 19:31:35 +03:00
bool out_of_data_space : 1 ;
2011-11-01 00:21:18 +04:00
2012-10-13 00:02:10 +04:00
struct dm_bio_prison * prison ;
2011-11-01 00:21:18 +04:00
struct dm_kcopyd_client * copier ;
struct workqueue_struct * wq ;
2014-10-06 18:45:59 +04:00
struct throttle throttle ;
2011-11-01 00:21:18 +04:00
struct work_struct worker ;
2012-03-28 21:41:27 +04:00
struct delayed_work waker ;
2014-05-09 18:59:38 +04:00
struct delayed_work no_space_timeout ;
2011-11-01 00:21:18 +04:00
2012-03-28 21:41:27 +04:00
unsigned long last_commit_jiffies ;
2012-07-27 18:08:02 +04:00
unsigned ref_count ;
2011-11-01 00:21:18 +04:00
spinlock_t lock ;
struct bio_list deferred_flush_bios ;
struct list_head prepared_mappings ;
2012-03-28 21:41:28 +04:00
struct list_head prepared_discards ;
2014-03-21 05:17:14 +04:00
struct list_head active_thins ;
2011-11-01 00:21:18 +04:00
2012-10-13 00:02:10 +04:00
struct dm_deferred_set * shared_read_ds ;
struct dm_deferred_set * all_io_ds ;
2011-11-01 00:21:18 +04:00
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping * next_mapping ;
2011-11-01 00:21:18 +04:00
mempool_t * mapping_pool ;
2012-07-27 18:08:16 +04:00
process_bio_fn process_bio ;
process_bio_fn process_discard ;
2014-10-10 16:43:14 +04:00
process_cell_fn process_cell ;
process_cell_fn process_discard_cell ;
2012-07-27 18:08:16 +04:00
process_mapping_fn process_prepared_mapping ;
process_mapping_fn process_prepared_discard ;
2014-10-10 19:42:10 +04:00
2015-07-03 12:22:42 +03:00
struct dm_bio_prison_cell * * cell_sort_array ;
2011-11-01 00:21:18 +04:00
} ;
2012-07-27 18:08:16 +04:00
static enum pool_mode get_pool_mode ( struct pool * pool ) ;
2013-12-05 04:51:33 +04:00
static void metadata_operation_failed ( struct pool * pool , const char * op , int r ) ;
2012-07-27 18:08:16 +04:00
2011-11-01 00:21:18 +04:00
/*
* Target context for a pool .
*/
struct pool_c {
struct dm_target * ti ;
struct pool * pool ;
struct dm_dev * data_dev ;
struct dm_dev * metadata_dev ;
struct dm_target_callbacks callbacks ;
dm_block_t low_water_blocks ;
2012-09-27 02:45:47 +04:00
struct pool_features requested_pf ; /* Features requested during table load */
struct pool_features adjusted_pf ; /* Features used after adjusting for constituent devices */
2011-11-01 00:21:18 +04:00
} ;
/*
* Target context for a thin .
*/
struct thin_c {
2014-03-21 05:17:14 +04:00
struct list_head list ;
2011-11-01 00:21:18 +04:00
struct dm_dev * pool_dev ;
2012-03-28 21:41:28 +04:00
struct dm_dev * origin_dev ;
2014-06-13 17:47:24 +04:00
sector_t origin_size ;
2011-11-01 00:21:18 +04:00
dm_thin_id dev_id ;
struct pool * pool ;
struct dm_thin_device * td ;
2014-10-29 03:58:45 +03:00
struct mapped_device * thin_md ;
2014-03-03 19:52:28 +04:00
bool requeue_mode : 1 ;
2014-03-21 05:17:14 +04:00
spinlock_t lock ;
2014-10-10 16:43:14 +04:00
struct list_head deferred_cells ;
2014-03-21 05:17:14 +04:00
struct bio_list deferred_bio_list ;
struct bio_list retry_on_resume_list ;
2014-03-22 02:33:41 +04:00
struct rb_root sort_bio_list ; /* sorted list of deferred bios */
2014-04-08 14:29:01 +04:00
/*
* Ensures the thin is not destroyed until the worker has finished
* iterating the active_thins list .
*/
atomic_t refcount ;
struct completion can_destroy ;
2011-11-01 00:21:18 +04:00
} ;
/*----------------------------------------------------------------*/
2015-04-16 14:58:35 +03:00
/**
* __blkdev_issue_discard_async - queue a discard with async completion
* @ bdev : blockdev to issue discard for
* @ sector : start sector
* @ nr_sects : number of sectors to discard
* @ gfp_mask : memory allocation flags ( for bio_alloc )
* @ flags : BLKDEV_IFL_ * flags to control behaviour
* @ parent_bio : parent discard bio that all sub discards get chained to
*
* Description :
* Asynchronously issue a discard request for the sectors in question .
*/
static int __blkdev_issue_discard_async ( struct block_device * bdev , sector_t sector ,
sector_t nr_sects , gfp_t gfp_mask , unsigned long flags ,
struct bio * parent_bio )
{
struct request_queue * q = bdev_get_queue ( bdev ) ;
int type = REQ_WRITE | REQ_DISCARD ;
struct bio * bio ;
2015-08-18 17:31:09 +03:00
if ( ! q | | ! nr_sects )
2015-04-16 14:58:35 +03:00
return - ENXIO ;
if ( ! blk_queue_discard ( q ) )
return - EOPNOTSUPP ;
if ( flags & BLKDEV_DISCARD_SECURE ) {
if ( ! blk_queue_secdiscard ( q ) )
return - EOPNOTSUPP ;
type | = REQ_SECURE ;
}
2015-08-18 17:31:09 +03:00
/*
* Required bio_put occurs in bio_endio thanks to bio_chain below
*/
bio = bio_alloc ( gfp_mask , 1 ) ;
if ( ! bio )
return - ENOMEM ;
2015-04-16 14:58:35 +03:00
2015-08-18 17:31:09 +03:00
bio_chain ( bio , parent_bio ) ;
2015-04-16 14:58:35 +03:00
2015-08-18 17:31:09 +03:00
bio - > bi_iter . bi_sector = sector ;
bio - > bi_bdev = bdev ;
bio - > bi_iter . bi_size = nr_sects < < 9 ;
2015-04-16 14:58:35 +03:00
2015-08-18 17:31:09 +03:00
submit_bio ( type , bio ) ;
2015-04-16 14:58:35 +03:00
2015-08-18 17:31:09 +03:00
return 0 ;
2015-04-16 14:58:35 +03:00
}
static bool block_size_is_power_of_two ( struct pool * pool )
{
return pool - > sectors_per_block_shift > = 0 ;
}
static sector_t block_to_sectors ( struct pool * pool , dm_block_t b )
{
return block_size_is_power_of_two ( pool ) ?
( b < < pool - > sectors_per_block_shift ) :
( b * pool - > sectors_per_block ) ;
}
static int issue_discard ( struct thin_c * tc , dm_block_t data_b , dm_block_t data_e ,
struct bio * parent_bio )
{
sector_t s = block_to_sectors ( tc - > pool , data_b ) ;
sector_t len = block_to_sectors ( tc - > pool , data_e - data_b ) ;
return __blkdev_issue_discard_async ( tc - > pool_dev - > bdev , s , len ,
GFP_NOWAIT , 0 , parent_bio ) ;
}
/*----------------------------------------------------------------*/
2013-03-02 02:45:50 +04:00
/*
* wake_worker ( ) is used when new work is queued and when pool_resume is
* ready to continue deferred IO processing .
*/
static void wake_worker ( struct pool * pool )
{
queue_work ( pool - > wq , & pool - > worker ) ;
}
/*----------------------------------------------------------------*/
2013-03-02 02:45:50 +04:00
static int bio_detain ( struct pool * pool , struct dm_cell_key * key , struct bio * bio ,
struct dm_bio_prison_cell * * cell_result )
{
int r ;
struct dm_bio_prison_cell * cell_prealloc ;
/*
* Allocate a cell from the prison ' s mempool .
* This might block but it can ' t fail .
*/
cell_prealloc = dm_bio_prison_alloc_cell ( pool - > prison , GFP_NOIO ) ;
r = dm_bio_detain ( pool - > prison , key , bio , cell_prealloc , cell_result ) ;
if ( r )
/*
* We reused an old cell ; we can get rid of
* the new one .
*/
dm_bio_prison_free_cell ( pool - > prison , cell_prealloc ) ;
return r ;
}
static void cell_release ( struct pool * pool ,
struct dm_bio_prison_cell * cell ,
struct bio_list * bios )
{
dm_cell_release ( pool - > prison , cell , bios ) ;
dm_bio_prison_free_cell ( pool - > prison , cell ) ;
}
2014-10-10 18:27:16 +04:00
static void cell_visit_release ( struct pool * pool ,
void ( * fn ) ( void * , struct dm_bio_prison_cell * ) ,
void * context ,
struct dm_bio_prison_cell * cell )
{
dm_cell_visit_release ( pool - > prison , fn , context , cell ) ;
dm_bio_prison_free_cell ( pool - > prison , cell ) ;
}
2013-03-02 02:45:50 +04:00
static void cell_release_no_holder ( struct pool * pool ,
struct dm_bio_prison_cell * cell ,
struct bio_list * bios )
{
dm_cell_release_no_holder ( pool - > prison , cell , bios ) ;
dm_bio_prison_free_cell ( pool - > prison , cell ) ;
}
2014-05-22 22:32:51 +04:00
static void cell_error_with_code ( struct pool * pool ,
struct dm_bio_prison_cell * cell , int error_code )
2013-03-02 02:45:50 +04:00
{
2014-05-22 22:32:51 +04:00
dm_cell_error ( pool - > prison , cell , error_code ) ;
2013-03-02 02:45:50 +04:00
dm_bio_prison_free_cell ( pool - > prison , cell ) ;
}
2016-03-10 19:31:35 +03:00
static int get_pool_io_error_code ( struct pool * pool )
{
return pool - > out_of_data_space ? - ENOSPC : - EIO ;
}
2014-05-22 22:32:51 +04:00
static void cell_error ( struct pool * pool , struct dm_bio_prison_cell * cell )
{
2016-03-10 19:31:35 +03:00
int error = get_pool_io_error_code ( pool ) ;
cell_error_with_code ( pool , cell , error ) ;
2014-05-22 22:32:51 +04:00
}
2014-10-10 16:43:14 +04:00
static void cell_success ( struct pool * pool , struct dm_bio_prison_cell * cell )
{
cell_error_with_code ( pool , cell , 0 ) ;
}
static void cell_requeue ( struct pool * pool , struct dm_bio_prison_cell * cell )
{
cell_error_with_code ( pool , cell , DM_ENDIO_REQUEUE ) ;
}
2013-03-02 02:45:50 +04:00
/*----------------------------------------------------------------*/
2011-11-01 00:21:18 +04:00
/*
* A global list of pools that uses a struct mapped_device as a key .
*/
static struct dm_thin_pool_table {
struct mutex mutex ;
struct list_head pools ;
} dm_thin_pool_table ;
static void pool_table_init ( void )
{
mutex_init ( & dm_thin_pool_table . mutex ) ;
INIT_LIST_HEAD ( & dm_thin_pool_table . pools ) ;
}
static void __pool_table_insert ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_add ( & pool - > list , & dm_thin_pool_table . pools ) ;
}
static void __pool_table_remove ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_del ( & pool - > list ) ;
}
static struct pool * __pool_table_lookup ( struct mapped_device * md )
{
struct pool * pool = NULL , * tmp ;
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_for_each_entry ( tmp , & dm_thin_pool_table . pools , list ) {
if ( tmp - > pool_md = = md ) {
pool = tmp ;
break ;
}
}
return pool ;
}
static struct pool * __pool_table_lookup_metadata_dev ( struct block_device * md_dev )
{
struct pool * pool = NULL , * tmp ;
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_for_each_entry ( tmp , & dm_thin_pool_table . pools , list ) {
if ( tmp - > md_dev = = md_dev ) {
pool = tmp ;
break ;
}
}
return pool ;
}
/*----------------------------------------------------------------*/
2012-06-03 03:30:00 +04:00
struct dm_thin_endio_hook {
2012-03-28 21:41:28 +04:00
struct thin_c * tc ;
2012-10-13 00:02:10 +04:00
struct dm_deferred_entry * shared_read_entry ;
struct dm_deferred_entry * all_io_entry ;
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping * overwrite_mapping ;
2014-03-22 02:33:41 +04:00
struct rb_node rb_node ;
2015-04-16 14:58:35 +03:00
struct dm_bio_prison_cell * cell ;
2012-03-28 21:41:28 +04:00
} ;
2014-10-19 15:52:44 +04:00
static void __merge_bio_list ( struct bio_list * bios , struct bio_list * master )
{
bio_list_merge ( bios , master ) ;
bio_list_init ( master ) ;
}
static void error_bio_list ( struct bio_list * bios , int error )
2011-11-01 00:21:18 +04:00
{
struct bio * bio ;
2014-10-19 15:52:44 +04:00
2015-07-20 16:29:37 +03:00
while ( ( bio = bio_list_pop ( bios ) ) ) {
bio - > bi_error = error ;
bio_endio ( bio ) ;
}
2014-10-19 15:52:44 +04:00
}
static void error_thin_bio_list ( struct thin_c * tc , struct bio_list * master , int error )
{
2011-11-01 00:21:18 +04:00
struct bio_list bios ;
2014-03-03 19:46:42 +04:00
unsigned long flags ;
2011-11-01 00:21:18 +04:00
bio_list_init ( & bios ) ;
2014-03-03 19:46:42 +04:00
2014-03-21 05:17:14 +04:00
spin_lock_irqsave ( & tc - > lock , flags ) ;
2014-10-19 15:52:44 +04:00
__merge_bio_list ( & bios , master ) ;
2014-03-21 05:17:14 +04:00
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
2011-11-01 00:21:18 +04:00
2014-10-19 15:52:44 +04:00
error_bio_list ( & bios , error ) ;
2011-11-01 00:21:18 +04:00
}
2014-10-10 16:43:14 +04:00
static void requeue_deferred_cells ( struct thin_c * tc )
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
struct list_head cells ;
struct dm_bio_prison_cell * cell , * tmp ;
INIT_LIST_HEAD ( & cells ) ;
spin_lock_irqsave ( & tc - > lock , flags ) ;
list_splice_init ( & tc - > deferred_cells , & cells ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
list_for_each_entry_safe ( cell , tmp , & cells , user_list )
cell_requeue ( pool , cell ) ;
}
2011-11-01 00:21:18 +04:00
static void requeue_io ( struct thin_c * tc )
{
2014-03-03 20:03:26 +04:00
struct bio_list bios ;
2014-10-19 15:52:44 +04:00
unsigned long flags ;
2014-03-03 20:03:26 +04:00
bio_list_init ( & bios ) ;
2014-03-21 05:17:14 +04:00
spin_lock_irqsave ( & tc - > lock , flags ) ;
2014-10-19 15:52:44 +04:00
__merge_bio_list ( & bios , & tc - > deferred_bio_list ) ;
__merge_bio_list ( & bios , & tc - > retry_on_resume_list ) ;
2014-03-21 05:17:14 +04:00
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
2014-03-03 20:03:26 +04:00
2014-10-19 15:52:44 +04:00
error_bio_list ( & bios , DM_ENDIO_REQUEUE ) ;
requeue_deferred_cells ( tc ) ;
2014-03-03 20:03:26 +04:00
}
2015-07-21 20:20:46 +03:00
static void error_retry_list_with_code ( struct pool * pool , int error )
2014-03-21 05:17:14 +04:00
{
struct thin_c * tc ;
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( tc , & pool - > active_thins , list )
2015-07-21 20:20:46 +03:00
error_thin_bio_list ( tc , & tc - > retry_on_resume_list , error ) ;
2014-03-21 05:17:14 +04:00
rcu_read_unlock ( ) ;
}
2015-07-21 20:20:46 +03:00
static void error_retry_list ( struct pool * pool )
{
2016-03-10 19:31:35 +03:00
int error = get_pool_io_error_code ( pool ) ;
return error_retry_list_with_code ( pool , error ) ;
2015-07-21 20:20:46 +03:00
}
2011-11-01 00:21:18 +04:00
/*
* This section of code contains the logic for processing a thin device ' s IO .
* Much of the code depends on pool object resources ( lists , workqueues , etc )
* but most is exclusively called from the thin target rather than the thin - pool
* target .
*/
static dm_block_t get_bio_block ( struct thin_c * tc , struct bio * bio )
{
2013-03-02 02:45:45 +04:00
struct pool * pool = tc - > pool ;
2013-10-12 02:44:27 +04:00
sector_t block_nr = bio - > bi_iter . bi_sector ;
2012-07-27 18:08:02 +04:00
2013-03-02 02:45:45 +04:00
if ( block_size_is_power_of_two ( pool ) )
block_nr > > = pool - > sectors_per_block_shift ;
2012-07-27 18:08:03 +04:00
else
2013-03-02 02:45:45 +04:00
( void ) sector_div ( block_nr , pool - > sectors_per_block ) ;
2012-07-27 18:08:02 +04:00
return block_nr ;
2011-11-01 00:21:18 +04:00
}
2015-04-16 14:58:35 +03:00
/*
* Returns the _complete_ blocks that this bio covers .
*/
static void get_bio_block_range ( struct thin_c * tc , struct bio * bio ,
dm_block_t * begin , dm_block_t * end )
{
struct pool * pool = tc - > pool ;
sector_t b = bio - > bi_iter . bi_sector ;
sector_t e = b + ( bio - > bi_iter . bi_size > > SECTOR_SHIFT ) ;
b + = pool - > sectors_per_block - 1ull ; /* so we round up */
if ( block_size_is_power_of_two ( pool ) ) {
b > > = pool - > sectors_per_block_shift ;
e > > = pool - > sectors_per_block_shift ;
} else {
( void ) sector_div ( b , pool - > sectors_per_block ) ;
( void ) sector_div ( e , pool - > sectors_per_block ) ;
}
if ( e < b )
/* Can happen if the bio is within a single block. */
e = b ;
* begin = b ;
* end = e ;
}
2011-11-01 00:21:18 +04:00
static void remap ( struct thin_c * tc , struct bio * bio , dm_block_t block )
{
struct pool * pool = tc - > pool ;
2013-10-12 02:44:27 +04:00
sector_t bi_sector = bio - > bi_iter . bi_sector ;
2011-11-01 00:21:18 +04:00
bio - > bi_bdev = tc - > pool_dev - > bdev ;
2013-03-02 02:45:45 +04:00
if ( block_size_is_power_of_two ( pool ) )
2013-10-12 02:44:27 +04:00
bio - > bi_iter . bi_sector =
( block < < pool - > sectors_per_block_shift ) |
( bi_sector & ( pool - > sectors_per_block - 1 ) ) ;
2013-03-02 02:45:45 +04:00
else
2013-10-12 02:44:27 +04:00
bio - > bi_iter . bi_sector = ( block * pool - > sectors_per_block ) +
2013-03-02 02:45:45 +04:00
sector_div ( bi_sector , pool - > sectors_per_block ) ;
2011-11-01 00:21:18 +04:00
}
2012-03-28 21:41:28 +04:00
static void remap_to_origin ( struct thin_c * tc , struct bio * bio )
{
bio - > bi_bdev = tc - > origin_dev - > bdev ;
}
2012-07-27 18:08:14 +04:00
static int bio_triggers_commit ( struct thin_c * tc , struct bio * bio )
{
return ( bio - > bi_rw & ( REQ_FLUSH | REQ_FUA ) ) & &
dm_thin_changed_this_transaction ( tc - > td ) ;
}
2012-12-22 00:23:31 +04:00
static void inc_all_io_entry ( struct pool * pool , struct bio * bio )
{
struct dm_thin_endio_hook * h ;
if ( bio - > bi_rw & REQ_DISCARD )
return ;
2012-12-22 00:23:40 +04:00
h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-12-22 00:23:31 +04:00
h - > all_io_entry = dm_deferred_entry_inc ( pool - > all_io_ds ) ;
}
2012-03-28 21:41:28 +04:00
static void issue ( struct thin_c * tc , struct bio * bio )
2011-11-01 00:21:18 +04:00
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
2012-07-27 18:08:16 +04:00
if ( ! bio_triggers_commit ( tc , bio ) ) {
generic_make_request ( bio ) ;
return ;
}
2011-11-01 00:21:18 +04:00
/*
2012-07-27 18:08:16 +04:00
* Complete bio with an error if earlier I / O caused changes to
* the metadata that can ' t be committed e . g , due to I / O errors
* on the metadata device .
2011-11-01 00:21:18 +04:00
*/
2012-07-27 18:08:16 +04:00
if ( dm_thin_aborted_changes ( tc - > td ) ) {
bio_io_error ( bio ) ;
return ;
}
/*
* Batch together any bios that trigger commits and then issue a
* single commit for them in process_deferred_bios ( ) .
*/
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_add ( & pool - > deferred_flush_bios , bio ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2011-11-01 00:21:18 +04:00
}
2012-03-28 21:41:28 +04:00
static void remap_to_origin_and_issue ( struct thin_c * tc , struct bio * bio )
{
remap_to_origin ( tc , bio ) ;
issue ( tc , bio ) ;
}
static void remap_and_issue ( struct thin_c * tc , struct bio * bio ,
dm_block_t block )
{
remap ( tc , bio , block ) ;
issue ( tc , bio ) ;
}
2011-11-01 00:21:18 +04:00
/*----------------------------------------------------------------*/
/*
* Bio endio functions .
*/
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping {
2011-11-01 00:21:18 +04:00
struct list_head list ;
2013-12-17 22:43:31 +04:00
bool pass_discard : 1 ;
2015-04-16 14:58:35 +03:00
bool maybe_shared : 1 ;
2011-11-01 00:21:18 +04:00
2014-06-13 16:57:09 +04:00
/*
* Track quiescing , copying and zeroing preparation actions . When this
* counter hits zero the block is prepared and can be inserted into the
* btree .
*/
atomic_t prepare_actions ;
2013-12-17 22:43:31 +04:00
int err ;
2011-11-01 00:21:18 +04:00
struct thin_c * tc ;
2015-04-16 14:58:35 +03:00
dm_block_t virt_begin , virt_end ;
2011-11-01 00:21:18 +04:00
dm_block_t data_block ;
2015-04-16 14:58:35 +03:00
struct dm_bio_prison_cell * cell ;
2011-11-01 00:21:18 +04:00
/*
* If the bio covers the whole area of a block then we can avoid
* zeroing or copying . Instead this bio is hooked . The bio will
* still be in the cell , so care has to be taken to avoid issuing
* the bio twice .
*/
struct bio * bio ;
bio_end_io_t * saved_bi_end_io ;
} ;
2014-06-13 16:57:09 +04:00
static void __complete_mapping_preparation ( struct dm_thin_new_mapping * m )
2011-11-01 00:21:18 +04:00
{
struct pool * pool = m - > tc - > pool ;
2014-06-13 16:57:09 +04:00
if ( atomic_dec_and_test ( & m - > prepare_actions ) ) {
2013-12-11 23:01:20 +04:00
list_add_tail ( & m - > list , & pool - > prepared_mappings ) ;
2011-11-01 00:21:18 +04:00
wake_worker ( pool ) ;
}
}
2014-06-13 17:47:24 +04:00
static void complete_mapping_preparation ( struct dm_thin_new_mapping * m )
2011-11-01 00:21:18 +04:00
{
unsigned long flags ;
struct pool * pool = m - > tc - > pool ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
2014-06-13 16:57:09 +04:00
__complete_mapping_preparation ( m ) ;
2011-11-01 00:21:18 +04:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
2014-06-13 17:47:24 +04:00
static void copy_complete ( int read_err , unsigned long write_err , void * context )
{
struct dm_thin_new_mapping * m = context ;
m - > err = read_err | | write_err ? - EIO : 0 ;
complete_mapping_preparation ( m ) ;
}
2015-07-20 16:29:37 +03:00
static void overwrite_endio ( struct bio * bio )
2011-11-01 00:21:18 +04:00
{
2012-12-22 00:23:40 +04:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping * m = h - > overwrite_mapping ;
2011-11-01 00:21:18 +04:00
2015-05-14 00:53:13 +03:00
bio - > bi_end_io = m - > saved_bi_end_io ;
2015-07-20 16:29:37 +03:00
m - > err = bio - > bi_error ;
2014-06-13 17:47:24 +04:00
complete_mapping_preparation ( m ) ;
2011-11-01 00:21:18 +04:00
}
/*----------------------------------------------------------------*/
/*
* Workqueue .
*/
/*
* Prepared mapping jobs .
*/
/*
2014-10-10 18:27:16 +04:00
* This sends the bios in the cell , except the original holder , back
* to the deferred_bios list .
2011-11-01 00:21:18 +04:00
*/
2012-12-22 00:23:33 +04:00
static void cell_defer_no_holder ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
2011-11-01 00:21:18 +04:00
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
2014-03-21 05:17:14 +04:00
spin_lock_irqsave ( & tc - > lock , flags ) ;
cell_release_no_holder ( pool , cell , & tc - > deferred_bio_list ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
2011-11-01 00:21:18 +04:00
wake_worker ( pool ) ;
}
2014-10-10 16:43:14 +04:00
static void thin_defer_bio ( struct thin_c * tc , struct bio * bio ) ;
2014-10-10 18:27:16 +04:00
struct remap_info {
struct thin_c * tc ;
struct bio_list defer_bios ;
struct bio_list issue_bios ;
} ;
static void __inc_remap_and_issue_cell ( void * context ,
struct dm_bio_prison_cell * cell )
2014-10-10 16:43:14 +04:00
{
2014-10-10 18:27:16 +04:00
struct remap_info * info = context ;
2014-10-10 16:43:14 +04:00
struct bio * bio ;
2014-10-10 18:27:16 +04:00
while ( ( bio = bio_list_pop ( & cell - > bios ) ) ) {
2014-10-10 16:43:14 +04:00
if ( bio - > bi_rw & ( REQ_DISCARD | REQ_FLUSH | REQ_FUA ) )
2014-10-10 18:27:16 +04:00
bio_list_add ( & info - > defer_bios , bio ) ;
2014-10-10 16:43:14 +04:00
else {
2014-10-10 18:27:16 +04:00
inc_all_io_entry ( info - > tc - > pool , bio ) ;
/*
* We can ' t issue the bios with the bio prison lock
* held , so we add them to a list to issue on
* return from this function .
*/
bio_list_add ( & info - > issue_bios , bio ) ;
2014-10-10 16:43:14 +04:00
}
}
}
2014-10-10 18:27:16 +04:00
static void inc_remap_and_issue_cell ( struct thin_c * tc ,
struct dm_bio_prison_cell * cell ,
dm_block_t block )
{
struct bio * bio ;
struct remap_info info ;
info . tc = tc ;
bio_list_init ( & info . defer_bios ) ;
bio_list_init ( & info . issue_bios ) ;
/*
* We have to be careful to inc any bios we ' re about to issue
* before the cell is released , and avoid a race with new bios
* being added to the cell .
*/
cell_visit_release ( tc - > pool , __inc_remap_and_issue_cell ,
& info , cell ) ;
while ( ( bio = bio_list_pop ( & info . defer_bios ) ) )
thin_defer_bio ( tc , bio ) ;
while ( ( bio = bio_list_pop ( & info . issue_bios ) ) )
remap_and_issue ( info . tc , bio , block ) ;
}
2012-07-27 18:08:16 +04:00
static void process_prepared_mapping_fail ( struct dm_thin_new_mapping * m )
{
2013-03-02 02:45:50 +04:00
cell_error ( m - > tc - > pool , m - > cell ) ;
2012-07-27 18:08:16 +04:00
list_del ( & m - > list ) ;
mempool_free ( m , m - > tc - > pool - > mapping_pool ) ;
}
2013-03-02 02:45:50 +04:00
2012-06-03 03:30:00 +04:00
static void process_prepared_mapping ( struct dm_thin_new_mapping * m )
2011-11-01 00:21:18 +04:00
{
struct thin_c * tc = m - > tc ;
2013-03-02 02:45:50 +04:00
struct pool * pool = tc - > pool ;
2015-05-14 00:53:13 +03:00
struct bio * bio = m - > bio ;
2011-11-01 00:21:18 +04:00
int r ;
if ( m - > err ) {
2013-03-02 02:45:50 +04:00
cell_error ( pool , m - > cell ) ;
2012-07-27 18:08:05 +04:00
goto out ;
2011-11-01 00:21:18 +04:00
}
/*
* Commit the prepared block into the mapping btree .
* Any I / O for this block arriving after this point will get
* remapped to it directly .
*/
2015-04-16 14:58:35 +03:00
r = dm_thin_insert_block ( tc - > td , m - > virt_begin , m - > data_block ) ;
2011-11-01 00:21:18 +04:00
if ( r ) {
2013-12-05 04:51:33 +04:00
metadata_operation_failed ( pool , " dm_thin_insert_block " , r ) ;
2013-03-02 02:45:50 +04:00
cell_error ( pool , m - > cell ) ;
2012-07-27 18:08:05 +04:00
goto out ;
2011-11-01 00:21:18 +04:00
}
/*
* Release any bios held while the block was being provisioned .
* If we are processing a write bio that completely covers the block ,
* we already processed it so can ignore it now when processing
* the bios in the cell .
*/
if ( bio ) {
2014-10-10 18:27:16 +04:00
inc_remap_and_issue_cell ( tc , m - > cell , m - > data_block ) ;
2015-07-20 16:29:37 +03:00
bio_endio ( bio ) ;
2014-10-10 18:27:16 +04:00
} else {
inc_all_io_entry ( tc - > pool , m - > cell - > holder ) ;
remap_and_issue ( tc , m - > cell - > holder , m - > data_block ) ;
inc_remap_and_issue_cell ( tc , m - > cell , m - > data_block ) ;
}
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:05 +04:00
out :
2011-11-01 00:21:18 +04:00
list_del ( & m - > list ) ;
2013-03-02 02:45:50 +04:00
mempool_free ( m , pool - > mapping_pool ) ;
2011-11-01 00:21:18 +04:00
}
2015-04-16 14:58:35 +03:00
/*----------------------------------------------------------------*/
static void free_discard_mapping ( struct dm_thin_new_mapping * m )
2012-03-28 21:41:28 +04:00
{
struct thin_c * tc = m - > tc ;
2015-04-16 14:58:35 +03:00
if ( m - > cell )
cell_defer_no_holder ( tc , m - > cell ) ;
mempool_free ( m , tc - > pool - > mapping_pool ) ;
}
2012-03-28 21:41:28 +04:00
2015-04-16 14:58:35 +03:00
static void process_prepared_discard_fail ( struct dm_thin_new_mapping * m )
{
2012-07-27 18:08:16 +04:00
bio_io_error ( m - > bio ) ;
2015-04-16 14:58:35 +03:00
free_discard_mapping ( m ) ;
}
static void process_prepared_discard_success ( struct dm_thin_new_mapping * m )
{
2015-07-20 16:29:37 +03:00
bio_endio ( m - > bio ) ;
2015-04-16 14:58:35 +03:00
free_discard_mapping ( m ) ;
}
static void process_prepared_discard_no_passdown ( struct dm_thin_new_mapping * m )
{
int r ;
struct thin_c * tc = m - > tc ;
r = dm_thin_remove_range ( tc - > td , m - > cell - > key . block_begin , m - > cell - > key . block_end ) ;
if ( r ) {
metadata_operation_failed ( tc - > pool , " dm_thin_remove_range " , r ) ;
bio_io_error ( m - > bio ) ;
} else
2015-07-20 16:29:37 +03:00
bio_endio ( m - > bio ) ;
2015-04-16 14:58:35 +03:00
2012-12-22 00:23:33 +04:00
cell_defer_no_holder ( tc , m - > cell ) ;
2012-07-27 18:08:16 +04:00
mempool_free ( m , tc - > pool - > mapping_pool ) ;
}
2015-04-16 14:58:35 +03:00
static int passdown_double_checking_shared_status ( struct dm_thin_new_mapping * m )
2012-07-27 18:08:16 +04:00
{
2015-04-16 14:58:35 +03:00
/*
* We ' ve already unmapped this range of blocks , but before we
* passdown we have to check that these blocks are now unused .
*/
int r ;
bool used = true ;
2012-07-27 18:08:16 +04:00
struct thin_c * tc = m - > tc ;
2015-04-16 14:58:35 +03:00
struct pool * pool = tc - > pool ;
dm_block_t b = m - > data_block , e , end = m - > data_block + m - > virt_end - m - > virt_begin ;
2012-03-28 21:41:28 +04:00
2015-04-16 14:58:35 +03:00
while ( b ! = end ) {
/* find start of unmapped run */
for ( ; b < end ; b + + ) {
r = dm_pool_block_is_used ( pool - > pmd , b , & used ) ;
if ( r )
return r ;
2012-12-22 00:23:31 +04:00
2015-04-16 14:58:35 +03:00
if ( ! used )
break ;
2013-12-17 21:09:40 +04:00
}
2012-03-28 21:41:28 +04:00
2015-04-16 14:58:35 +03:00
if ( b = = end )
break ;
/* find end of run */
for ( e = b + 1 ; e ! = end ; e + + ) {
r = dm_pool_block_is_used ( pool - > pmd , e , & used ) ;
if ( r )
return r ;
if ( used )
break ;
}
r = issue_discard ( tc , b , e , m - > bio ) ;
if ( r )
return r ;
b = e ;
}
return 0 ;
2012-03-28 21:41:28 +04:00
}
2015-04-16 14:58:35 +03:00
static void process_prepared_discard_passdown ( struct dm_thin_new_mapping * m )
2012-07-27 18:08:16 +04:00
{
int r ;
struct thin_c * tc = m - > tc ;
2015-04-16 14:58:35 +03:00
struct pool * pool = tc - > pool ;
2012-07-27 18:08:16 +04:00
2015-04-16 14:58:35 +03:00
r = dm_thin_remove_range ( tc - > td , m - > virt_begin , m - > virt_end ) ;
2012-07-27 18:08:16 +04:00
if ( r )
2015-04-16 14:58:35 +03:00
metadata_operation_failed ( pool , " dm_thin_remove_range " , r ) ;
else if ( m - > maybe_shared )
r = passdown_double_checking_shared_status ( m ) ;
else
r = issue_discard ( tc , m - > data_block , m - > data_block + ( m - > virt_end - m - > virt_begin ) , m - > bio ) ;
2012-07-27 18:08:16 +04:00
2015-04-16 14:58:35 +03:00
/*
* Even if r is set , there could be sub discards in flight that we
* need to wait for .
*/
2015-07-20 16:29:37 +03:00
m - > bio - > bi_error = r ;
bio_endio ( m - > bio ) ;
2015-04-16 14:58:35 +03:00
cell_defer_no_holder ( tc , m - > cell ) ;
mempool_free ( m , pool - > mapping_pool ) ;
2012-07-27 18:08:16 +04:00
}
2012-03-28 21:41:28 +04:00
static void process_prepared ( struct pool * pool , struct list_head * head ,
2012-07-27 18:08:16 +04:00
process_mapping_fn * fn )
2011-11-01 00:21:18 +04:00
{
unsigned long flags ;
struct list_head maps ;
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping * m , * tmp ;
2011-11-01 00:21:18 +04:00
INIT_LIST_HEAD ( & maps ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
2012-03-28 21:41:28 +04:00
list_splice_init ( head , & maps ) ;
2011-11-01 00:21:18 +04:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
list_for_each_entry_safe ( m , tmp , & maps , list )
2012-07-27 18:08:16 +04:00
( * fn ) ( m ) ;
2011-11-01 00:21:18 +04:00
}
/*
* Deferred bio jobs .
*/
2012-03-28 21:41:28 +04:00
static int io_overlaps_block ( struct pool * pool , struct bio * bio )
2011-11-01 00:21:18 +04:00
{
2013-10-12 02:44:27 +04:00
return bio - > bi_iter . bi_size = =
( pool - > sectors_per_block < < SECTOR_SHIFT ) ;
2012-03-28 21:41:28 +04:00
}
static int io_overwrites_block ( struct pool * pool , struct bio * bio )
{
return ( bio_data_dir ( bio ) = = WRITE ) & &
io_overlaps_block ( pool , bio ) ;
2011-11-01 00:21:18 +04:00
}
static void save_and_set_endio ( struct bio * bio , bio_end_io_t * * save ,
bio_end_io_t * fn )
{
* save = bio - > bi_end_io ;
bio - > bi_end_io = fn ;
}
static int ensure_next_mapping ( struct pool * pool )
{
if ( pool - > next_mapping )
return 0 ;
pool - > next_mapping = mempool_alloc ( pool - > mapping_pool , GFP_ATOMIC ) ;
return pool - > next_mapping ? 0 : - ENOMEM ;
}
2012-06-03 03:30:00 +04:00
static struct dm_thin_new_mapping * get_next_mapping ( struct pool * pool )
2011-11-01 00:21:18 +04:00
{
2013-12-17 22:19:11 +04:00
struct dm_thin_new_mapping * m = pool - > next_mapping ;
2011-11-01 00:21:18 +04:00
BUG_ON ( ! pool - > next_mapping ) ;
2013-12-17 22:19:11 +04:00
memset ( m , 0 , sizeof ( struct dm_thin_new_mapping ) ) ;
INIT_LIST_HEAD ( & m - > list ) ;
m - > bio = NULL ;
2011-11-01 00:21:18 +04:00
pool - > next_mapping = NULL ;
2013-12-17 22:19:11 +04:00
return m ;
2011-11-01 00:21:18 +04:00
}
2014-06-13 17:47:24 +04:00
static void ll_zero ( struct thin_c * tc , struct dm_thin_new_mapping * m ,
sector_t begin , sector_t end )
{
int r ;
struct dm_io_region to ;
to . bdev = tc - > pool_dev - > bdev ;
to . sector = begin ;
to . count = end - begin ;
r = dm_kcopyd_zero ( tc - > pool - > copier , 1 , & to , 0 , copy_complete , m ) ;
if ( r < 0 ) {
DMERR_LIMIT ( " dm_kcopyd_zero() failed " ) ;
copy_complete ( 1 , 1 , m ) ;
}
}
2014-10-10 03:20:21 +04:00
static void remap_and_issue_overwrite ( struct thin_c * tc , struct bio * bio ,
2015-04-16 14:58:35 +03:00
dm_block_t data_begin ,
2014-10-10 03:20:21 +04:00
struct dm_thin_new_mapping * m )
{
struct pool * pool = tc - > pool ;
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
h - > overwrite_mapping = m ;
m - > bio = bio ;
save_and_set_endio ( bio , & m - > saved_bi_end_io , overwrite_endio ) ;
inc_all_io_entry ( pool , bio ) ;
2015-04-16 14:58:35 +03:00
remap_and_issue ( tc , bio , data_begin ) ;
2014-10-10 03:20:21 +04:00
}
2014-06-13 17:47:24 +04:00
/*
* A partial copy also needs to zero the uncopied region .
*/
2011-11-01 00:21:18 +04:00
static void schedule_copy ( struct thin_c * tc , dm_block_t virt_block ,
2012-03-28 21:41:28 +04:00
struct dm_dev * origin , dm_block_t data_origin ,
dm_block_t data_dest ,
2014-06-13 17:47:24 +04:00
struct dm_bio_prison_cell * cell , struct bio * bio ,
sector_t len )
2011-11-01 00:21:18 +04:00
{
int r ;
struct pool * pool = tc - > pool ;
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping * m = get_next_mapping ( pool ) ;
2011-11-01 00:21:18 +04:00
m - > tc = tc ;
2015-04-16 14:58:35 +03:00
m - > virt_begin = virt_block ;
m - > virt_end = virt_block + 1u ;
2011-11-01 00:21:18 +04:00
m - > data_block = data_dest ;
m - > cell = cell ;
2014-06-13 17:47:24 +04:00
/*
* quiesce action + copy action + an extra reference held for the
* duration of this function ( we may need to inc later for a
* partial zero ) .
*/
atomic_set ( & m - > prepare_actions , 3 ) ;
2012-10-13 00:02:10 +04:00
if ( ! dm_deferred_set_add_work ( pool - > shared_read_ds , & m - > list ) )
2014-06-13 17:47:24 +04:00
complete_mapping_preparation ( m ) ; /* already quiesced */
2011-11-01 00:21:18 +04:00
/*
* IO to pool_dev remaps to the pool target ' s data_dev .
*
* If the whole block of data is being overwritten , we can issue the
* bio immediately . Otherwise we use kcopyd to clone the data first .
*/
2014-10-10 03:20:21 +04:00
if ( io_overwrites_block ( pool , bio ) )
remap_and_issue_overwrite ( tc , bio , data_dest , m ) ;
else {
2011-11-01 00:21:18 +04:00
struct dm_io_region from , to ;
2012-03-28 21:41:28 +04:00
from . bdev = origin - > bdev ;
2011-11-01 00:21:18 +04:00
from . sector = data_origin * pool - > sectors_per_block ;
2014-06-13 17:47:24 +04:00
from . count = len ;
2011-11-01 00:21:18 +04:00
to . bdev = tc - > pool_dev - > bdev ;
to . sector = data_dest * pool - > sectors_per_block ;
2014-06-13 17:47:24 +04:00
to . count = len ;
2011-11-01 00:21:18 +04:00
r = dm_kcopyd_copy ( pool - > copier , & from , 1 , & to ,
0 , copy_complete , m ) ;
if ( r < 0 ) {
2012-12-22 00:23:34 +04:00
DMERR_LIMIT ( " dm_kcopyd_copy() failed " ) ;
2014-06-13 17:47:24 +04:00
copy_complete ( 1 , 1 , m ) ;
/*
* We allow the zero to be issued , to simplify the
* error path . Otherwise we ' d need to start
* worrying about decrementing the prepare_actions
* counter .
*/
}
/*
* Do we need to zero a tail region ?
*/
if ( len < pool - > sectors_per_block & & pool - > pf . zero_new_blocks ) {
atomic_inc ( & m - > prepare_actions ) ;
ll_zero ( tc , m ,
data_dest * pool - > sectors_per_block + len ,
( data_dest + 1 ) * pool - > sectors_per_block ) ;
2011-11-01 00:21:18 +04:00
}
}
2014-06-13 17:47:24 +04:00
complete_mapping_preparation ( m ) ; /* drop our ref */
2011-11-01 00:21:18 +04:00
}
2012-03-28 21:41:28 +04:00
static void schedule_internal_copy ( struct thin_c * tc , dm_block_t virt_block ,
dm_block_t data_origin , dm_block_t data_dest ,
2012-06-03 03:30:00 +04:00
struct dm_bio_prison_cell * cell , struct bio * bio )
2012-03-28 21:41:28 +04:00
{
schedule_copy ( tc , virt_block , tc - > pool_dev ,
2014-06-13 17:47:24 +04:00
data_origin , data_dest , cell , bio ,
tc - > pool - > sectors_per_block ) ;
2012-03-28 21:41:28 +04:00
}
2011-11-01 00:21:18 +04:00
static void schedule_zero ( struct thin_c * tc , dm_block_t virt_block ,
2012-06-03 03:30:00 +04:00
dm_block_t data_block , struct dm_bio_prison_cell * cell ,
2011-11-01 00:21:18 +04:00
struct bio * bio )
{
struct pool * pool = tc - > pool ;
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping * m = get_next_mapping ( pool ) ;
2011-11-01 00:21:18 +04:00
2014-06-13 16:57:09 +04:00
atomic_set ( & m - > prepare_actions , 1 ) ; /* no need to quiesce */
2011-11-01 00:21:18 +04:00
m - > tc = tc ;
2015-04-16 14:58:35 +03:00
m - > virt_begin = virt_block ;
m - > virt_end = virt_block + 1u ;
2011-11-01 00:21:18 +04:00
m - > data_block = data_block ;
m - > cell = cell ;
/*
* If the whole block of data is being overwritten or we are not
* zeroing pre - existing data , we can issue the bio immediately .
* Otherwise we use kcopyd to zero the data first .
*/
2015-05-14 18:28:37 +03:00
if ( pool - > pf . zero_new_blocks ) {
if ( io_overwrites_block ( pool , bio ) )
remap_and_issue_overwrite ( tc , bio , data_block , m ) ;
else
ll_zero ( tc , m , data_block * pool - > sectors_per_block ,
( data_block + 1 ) * pool - > sectors_per_block ) ;
} else
2011-11-01 00:21:18 +04:00
process_prepared_mapping ( m ) ;
2014-06-13 17:47:24 +04:00
}
2011-11-01 00:21:18 +04:00
2014-06-13 17:47:24 +04:00
static void schedule_external_copy ( struct thin_c * tc , dm_block_t virt_block ,
dm_block_t data_dest ,
struct dm_bio_prison_cell * cell , struct bio * bio )
{
struct pool * pool = tc - > pool ;
sector_t virt_block_begin = virt_block * pool - > sectors_per_block ;
sector_t virt_block_end = ( virt_block + 1 ) * pool - > sectors_per_block ;
if ( virt_block_end < = tc - > origin_size )
schedule_copy ( tc , virt_block , tc - > origin_dev ,
virt_block , data_dest , cell , bio ,
pool - > sectors_per_block ) ;
else if ( virt_block_begin < tc - > origin_size )
schedule_copy ( tc , virt_block , tc - > origin_dev ,
virt_block , data_dest , cell , bio ,
tc - > origin_size - virt_block_begin ) ;
else
schedule_zero ( tc , virt_block , data_dest , cell , bio ) ;
2011-11-01 00:21:18 +04:00
}
2014-12-11 14:12:19 +03:00
static void set_pool_mode ( struct pool * pool , enum pool_mode new_mode ) ;
static void check_for_space ( struct pool * pool )
{
int r ;
dm_block_t nr_free ;
if ( get_pool_mode ( pool ) ! = PM_OUT_OF_DATA_SPACE )
return ;
r = dm_pool_get_free_block_count ( pool - > pmd , & nr_free ) ;
if ( r )
return ;
if ( nr_free )
set_pool_mode ( pool , PM_WRITE ) ;
}
2012-07-27 18:08:16 +04:00
/*
* A non - zero return indicates read_only or fail_io mode .
* Many callers don ' t care about the return value .
*/
2013-12-05 00:05:36 +04:00
static int commit ( struct pool * pool )
2012-07-27 18:08:16 +04:00
{
int r ;
2014-05-06 19:28:14 +04:00
if ( get_pool_mode ( pool ) > = PM_READ_ONLY )
2012-07-27 18:08:16 +04:00
return - EINVAL ;
2013-12-05 00:05:36 +04:00
r = dm_pool_commit_metadata ( pool - > pmd ) ;
2013-12-05 04:51:33 +04:00
if ( r )
metadata_operation_failed ( pool , " dm_pool_commit_metadata " , r ) ;
2014-12-11 14:12:19 +03:00
else
check_for_space ( pool ) ;
2012-07-27 18:08:16 +04:00
return r ;
}
2013-12-05 05:16:12 +04:00
static void check_low_water_mark ( struct pool * pool , dm_block_t free_blocks )
{
unsigned long flags ;
if ( free_blocks < = pool - > low_water_blocks & & ! pool - > low_water_triggered ) {
DMWARN ( " %s: reached low water mark for data device: sending event. " ,
dm_device_name ( pool - > pool_md ) ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
pool - > low_water_triggered = true ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
dm_table_event ( pool - > ti - > table ) ;
}
}
2011-11-01 00:21:18 +04:00
static int alloc_data_block ( struct thin_c * tc , dm_block_t * result )
{
int r ;
dm_block_t free_blocks ;
struct pool * pool = tc - > pool ;
2014-03-03 20:03:26 +04:00
if ( WARN_ON ( get_pool_mode ( pool ) ! = PM_WRITE ) )
2013-12-05 04:16:11 +04:00
return - EINVAL ;
2011-11-01 00:21:18 +04:00
r = dm_pool_get_free_block_count ( pool - > pmd , & free_blocks ) ;
2013-12-05 04:51:33 +04:00
if ( r ) {
metadata_operation_failed ( pool , " dm_pool_get_free_block_count " , r ) ;
2011-11-01 00:21:18 +04:00
return r ;
2013-12-05 04:51:33 +04:00
}
2011-11-01 00:21:18 +04:00
2013-12-05 05:16:12 +04:00
check_low_water_mark ( pool , free_blocks ) ;
2011-11-01 00:21:18 +04:00
if ( ! free_blocks ) {
2013-08-22 17:56:18 +04:00
/*
* Try to commit to see if that will free up some
* more space .
*/
2013-12-05 00:05:36 +04:00
r = commit ( pool ) ;
if ( r )
return r ;
2011-11-01 00:21:18 +04:00
2013-08-22 17:56:18 +04:00
r = dm_pool_get_free_block_count ( pool - > pmd , & free_blocks ) ;
2013-12-05 04:51:33 +04:00
if ( r ) {
metadata_operation_failed ( pool , " dm_pool_get_free_block_count " , r ) ;
2013-08-22 17:56:18 +04:00
return r ;
2013-12-05 04:51:33 +04:00
}
2011-11-01 00:21:18 +04:00
2013-08-22 17:56:18 +04:00
if ( ! free_blocks ) {
2014-03-03 20:03:26 +04:00
set_pool_mode ( pool , PM_OUT_OF_DATA_SPACE ) ;
2013-08-22 17:56:18 +04:00
return - ENOSPC ;
2011-11-01 00:21:18 +04:00
}
}
r = dm_pool_alloc_data_block ( pool - > pmd , result ) ;
2013-12-03 21:20:57 +04:00
if ( r ) {
2013-12-05 04:51:33 +04:00
metadata_operation_failed ( pool , " dm_pool_alloc_data_block " , r ) ;
2011-11-01 00:21:18 +04:00
return r ;
2013-12-03 21:20:57 +04:00
}
2011-11-01 00:21:18 +04:00
return 0 ;
}
/*
* If we have run out of space , queue bios until the device is
* resumed , presumably after having been reloaded with more space .
*/
static void retry_on_resume ( struct bio * bio )
{
2012-12-22 00:23:40 +04:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-03-28 21:41:28 +04:00
struct thin_c * tc = h - > tc ;
2011-11-01 00:21:18 +04:00
unsigned long flags ;
2014-03-21 05:17:14 +04:00
spin_lock_irqsave ( & tc - > lock , flags ) ;
bio_list_add ( & tc - > retry_on_resume_list , bio ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
2011-11-01 00:21:18 +04:00
}
2014-05-22 22:32:51 +04:00
static int should_error_unserviceable_bio ( struct pool * pool )
2013-12-06 00:47:24 +04:00
{
2014-03-03 20:03:26 +04:00
enum pool_mode m = get_pool_mode ( pool ) ;
switch ( m ) {
case PM_WRITE :
/* Shouldn't get here */
DMERR_LIMIT ( " bio unserviceable, yet pool is in PM_WRITE mode " ) ;
2014-05-22 22:32:51 +04:00
return - EIO ;
2014-03-03 20:03:26 +04:00
case PM_OUT_OF_DATA_SPACE :
2014-05-22 22:32:51 +04:00
return pool - > pf . error_if_no_space ? - ENOSPC : 0 ;
2014-03-03 20:03:26 +04:00
case PM_READ_ONLY :
case PM_FAIL :
2014-05-22 22:32:51 +04:00
return - EIO ;
2014-03-03 20:03:26 +04:00
default :
/* Shouldn't get here */
DMERR_LIMIT ( " bio unserviceable, yet pool has an unknown mode " ) ;
2014-05-22 22:32:51 +04:00
return - EIO ;
2014-03-03 20:03:26 +04:00
}
}
2013-12-06 00:47:24 +04:00
2014-03-03 20:03:26 +04:00
static void handle_unserviceable_bio ( struct pool * pool , struct bio * bio )
{
2014-05-22 22:32:51 +04:00
int error = should_error_unserviceable_bio ( pool ) ;
2015-07-20 16:29:37 +03:00
if ( error ) {
bio - > bi_error = error ;
bio_endio ( bio ) ;
} else
2013-12-21 03:09:02 +04:00
retry_on_resume ( bio ) ;
2013-12-06 00:47:24 +04:00
}
2013-12-06 01:03:33 +04:00
static void retry_bios_on_resume ( struct pool * pool , struct dm_bio_prison_cell * cell )
2011-11-01 00:21:18 +04:00
{
struct bio * bio ;
struct bio_list bios ;
2014-05-22 22:32:51 +04:00
int error ;
2011-11-01 00:21:18 +04:00
2014-05-22 22:32:51 +04:00
error = should_error_unserviceable_bio ( pool ) ;
if ( error ) {
cell_error_with_code ( pool , cell , error ) ;
2014-03-03 20:03:26 +04:00
return ;
}
2011-11-01 00:21:18 +04:00
bio_list_init ( & bios ) ;
2013-03-02 02:45:50 +04:00
cell_release ( pool , cell , & bios ) ;
2011-11-01 00:21:18 +04:00
2014-10-19 16:23:09 +04:00
while ( ( bio = bio_list_pop ( & bios ) ) )
retry_on_resume ( bio ) ;
2011-11-01 00:21:18 +04:00
}
2015-04-16 14:58:35 +03:00
static void process_discard_cell_no_passdown ( struct thin_c * tc ,
struct dm_bio_prison_cell * virt_cell )
2012-03-28 21:41:28 +04:00
{
struct pool * pool = tc - > pool ;
2015-04-16 14:58:35 +03:00
struct dm_thin_new_mapping * m = get_next_mapping ( pool ) ;
2012-03-28 21:41:28 +04:00
2015-04-16 14:58:35 +03:00
/*
* We don ' t need to lock the data blocks , since there ' s no
* passdown . We only lock data blocks for allocation and breaking sharing .
*/
m - > tc = tc ;
m - > virt_begin = virt_cell - > key . block_begin ;
m - > virt_end = virt_cell - > key . block_end ;
m - > cell = virt_cell ;
m - > bio = virt_cell - > holder ;
2012-03-28 21:41:28 +04:00
2015-04-16 14:58:35 +03:00
if ( ! dm_deferred_set_add_work ( pool - > all_io_ds , & m - > list ) )
pool - > process_prepared_discard ( m ) ;
}
2012-03-28 21:41:28 +04:00
2015-04-16 14:58:35 +03:00
/*
2015-08-18 17:31:09 +03:00
* __bio_inc_remaining ( ) is used to defer parent bios ' s end_io until
* we _know_ all chained sub range discard bios have completed .
2015-04-16 14:58:35 +03:00
*/
static inline void __bio_inc_remaining ( struct bio * bio )
{
bio - > bi_flags | = ( 1 < < BIO_CHAIN ) ;
smp_mb__before_atomic ( ) ;
atomic_inc ( & bio - > __bi_remaining ) ;
}
2014-09-12 14:34:01 +04:00
2015-04-16 14:58:35 +03:00
static void break_up_discard_bio ( struct thin_c * tc , dm_block_t begin , dm_block_t end ,
struct bio * bio )
{
struct pool * pool = tc - > pool ;
int r ;
bool maybe_shared ;
struct dm_cell_key data_key ;
struct dm_bio_prison_cell * data_cell ;
struct dm_thin_new_mapping * m ;
dm_block_t virt_begin , virt_end , data_begin ;
while ( begin ! = end ) {
r = ensure_next_mapping ( pool ) ;
if ( r )
/* we did our best */
return ;
2012-12-22 00:23:31 +04:00
2015-04-16 14:58:35 +03:00
r = dm_thin_find_mapped_range ( tc - > td , begin , end , & virt_begin , & virt_end ,
& data_begin , & maybe_shared ) ;
if ( r )
2012-03-28 21:41:28 +04:00
/*
2015-04-16 14:58:35 +03:00
* Silently fail , letting any mappings we ' ve
* created complete .
2012-03-28 21:41:28 +04:00
*/
2015-04-16 14:58:35 +03:00
break ;
build_key ( tc - > td , PHYSICAL , data_begin , data_begin + ( virt_end - virt_begin ) , & data_key ) ;
if ( bio_detain ( tc - > pool , & data_key , NULL , & data_cell ) ) {
/* contention, we'll give up with this range */
begin = virt_end ;
continue ;
2012-03-28 21:41:28 +04:00
}
/*
2015-04-16 14:58:35 +03:00
* IO may still be going to the destination block . We must
* quiesce before we can do the removal .
2012-03-28 21:41:28 +04:00
*/
2015-04-16 14:58:35 +03:00
m = get_next_mapping ( pool ) ;
m - > tc = tc ;
m - > maybe_shared = maybe_shared ;
m - > virt_begin = virt_begin ;
m - > virt_end = virt_end ;
m - > data_block = data_begin ;
m - > cell = data_cell ;
m - > bio = bio ;
2012-03-28 21:41:28 +04:00
2015-04-16 14:58:35 +03:00
/*
* The parent bio must not complete before sub discard bios are
* chained to it ( see __blkdev_issue_discard_async ' s bio_chain ) !
*
* This per - mapping bi_remaining increment is paired with
* the implicit decrement that occurs via bio_endio ( ) in
* process_prepared_discard_ { passdown , no_passdown } .
*/
__bio_inc_remaining ( bio ) ;
if ( ! dm_deferred_set_add_work ( pool - > all_io_ds , & m - > list ) )
pool - > process_prepared_discard ( m ) ;
begin = virt_end ;
2012-03-28 21:41:28 +04:00
}
}
2015-04-16 14:58:35 +03:00
static void process_discard_cell_passdown ( struct thin_c * tc , struct dm_bio_prison_cell * virt_cell )
{
struct bio * bio = virt_cell - > holder ;
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
/*
* The virt_cell will only get freed once the origin bio completes .
* This means it will remain locked while all the individual
* passdown bios are in flight .
*/
h - > cell = virt_cell ;
break_up_discard_bio ( tc , virt_cell - > key . block_begin , virt_cell - > key . block_end , bio ) ;
/*
* We complete the bio now , knowing that the bi_remaining field
* will prevent completion until the sub range discards have
* completed .
*/
2015-07-20 16:29:37 +03:00
bio_endio ( bio ) ;
2015-04-16 14:58:35 +03:00
}
2014-10-10 16:43:14 +04:00
static void process_discard_bio ( struct thin_c * tc , struct bio * bio )
{
2015-04-16 14:58:35 +03:00
dm_block_t begin , end ;
struct dm_cell_key virt_key ;
struct dm_bio_prison_cell * virt_cell ;
2014-10-10 16:43:14 +04:00
2015-04-16 14:58:35 +03:00
get_bio_block_range ( tc , bio , & begin , & end ) ;
if ( begin = = end ) {
/*
* The discard covers less than a block .
*/
2015-07-20 16:29:37 +03:00
bio_endio ( bio ) ;
2014-10-10 16:43:14 +04:00
return ;
2015-04-16 14:58:35 +03:00
}
2014-10-10 16:43:14 +04:00
2015-04-16 14:58:35 +03:00
build_key ( tc - > td , VIRTUAL , begin , end , & virt_key ) ;
if ( bio_detain ( tc - > pool , & virt_key , bio , & virt_cell ) )
/*
* Potential starvation issue : We ' re relying on the
* fs / application being well behaved , and not trying to
* send IO to a region at the same time as discarding it .
* If they do this persistently then it ' s possible this
* cell will never be granted .
*/
return ;
tc - > pool - > process_discard_cell ( tc , virt_cell ) ;
2014-10-10 16:43:14 +04:00
}
2011-11-01 00:21:18 +04:00
static void break_sharing ( struct thin_c * tc , struct bio * bio , dm_block_t block ,
2012-10-13 00:02:10 +04:00
struct dm_cell_key * key ,
2011-11-01 00:21:18 +04:00
struct dm_thin_lookup_result * lookup_result ,
2012-06-03 03:30:00 +04:00
struct dm_bio_prison_cell * cell )
2011-11-01 00:21:18 +04:00
{
int r ;
dm_block_t data_block ;
2013-08-22 01:40:11 +04:00
struct pool * pool = tc - > pool ;
2011-11-01 00:21:18 +04:00
r = alloc_data_block ( tc , & data_block ) ;
switch ( r ) {
case 0 :
2012-03-28 21:41:28 +04:00
schedule_internal_copy ( tc , block , lookup_result - > block ,
data_block , cell , bio ) ;
2011-11-01 00:21:18 +04:00
break ;
case - ENOSPC :
2013-12-06 01:03:33 +04:00
retry_bios_on_resume ( pool , cell ) ;
2011-11-01 00:21:18 +04:00
break ;
default :
2012-12-22 00:23:34 +04:00
DMERR_LIMIT ( " %s: alloc_data_block() failed: error = %d " ,
__func__ , r ) ;
2013-08-22 01:40:11 +04:00
cell_error ( pool , cell ) ;
2011-11-01 00:21:18 +04:00
break ;
}
}
2014-10-15 17:46:58 +04:00
static void __remap_and_issue_shared_cell ( void * context ,
struct dm_bio_prison_cell * cell )
{
struct remap_info * info = context ;
struct bio * bio ;
while ( ( bio = bio_list_pop ( & cell - > bios ) ) ) {
if ( ( bio_data_dir ( bio ) = = WRITE ) | |
( bio - > bi_rw & ( REQ_DISCARD | REQ_FLUSH | REQ_FUA ) ) )
bio_list_add ( & info - > defer_bios , bio ) ;
else {
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ; ;
h - > shared_read_entry = dm_deferred_entry_inc ( info - > tc - > pool - > shared_read_ds ) ;
inc_all_io_entry ( info - > tc - > pool , bio ) ;
bio_list_add ( & info - > issue_bios , bio ) ;
}
}
}
static void remap_and_issue_shared_cell ( struct thin_c * tc ,
struct dm_bio_prison_cell * cell ,
dm_block_t block )
{
struct bio * bio ;
struct remap_info info ;
info . tc = tc ;
bio_list_init ( & info . defer_bios ) ;
bio_list_init ( & info . issue_bios ) ;
cell_visit_release ( tc - > pool , __remap_and_issue_shared_cell ,
& info , cell ) ;
while ( ( bio = bio_list_pop ( & info . defer_bios ) ) )
thin_defer_bio ( tc , bio ) ;
while ( ( bio = bio_list_pop ( & info . issue_bios ) ) )
remap_and_issue ( tc , bio , block ) ;
}
2011-11-01 00:21:18 +04:00
static void process_shared_bio ( struct thin_c * tc , struct bio * bio ,
dm_block_t block ,
2014-10-15 17:46:58 +04:00
struct dm_thin_lookup_result * lookup_result ,
struct dm_bio_prison_cell * virt_cell )
2011-11-01 00:21:18 +04:00
{
2014-10-15 17:46:58 +04:00
struct dm_bio_prison_cell * data_cell ;
2011-11-01 00:21:18 +04:00
struct pool * pool = tc - > pool ;
2012-10-13 00:02:10 +04:00
struct dm_cell_key key ;
2011-11-01 00:21:18 +04:00
/*
* If cell is already occupied , then sharing is already in the process
* of being broken so we have nothing further to do here .
*/
build_data_key ( tc - > td , lookup_result - > block , & key ) ;
2014-10-15 17:46:58 +04:00
if ( bio_detain ( pool , & key , bio , & data_cell ) ) {
cell_defer_no_holder ( tc , virt_cell ) ;
2011-11-01 00:21:18 +04:00
return ;
2014-10-15 17:46:58 +04:00
}
2011-11-01 00:21:18 +04:00
2014-10-15 17:46:58 +04:00
if ( bio_data_dir ( bio ) = = WRITE & & bio - > bi_iter . bi_size ) {
break_sharing ( tc , bio , block , & key , lookup_result , data_cell ) ;
cell_defer_no_holder ( tc , virt_cell ) ;
} else {
2012-12-22 00:23:40 +04:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2011-11-01 00:21:18 +04:00
2012-10-13 00:02:10 +04:00
h - > shared_read_entry = dm_deferred_entry_inc ( pool - > shared_read_ds ) ;
2012-12-22 00:23:31 +04:00
inc_all_io_entry ( pool , bio ) ;
2011-11-01 00:21:18 +04:00
remap_and_issue ( tc , bio , lookup_result - > block ) ;
2014-10-15 17:46:58 +04:00
remap_and_issue_shared_cell ( tc , data_cell , lookup_result - > block ) ;
remap_and_issue_shared_cell ( tc , virt_cell , lookup_result - > block ) ;
2011-11-01 00:21:18 +04:00
}
}
static void provision_block ( struct thin_c * tc , struct bio * bio , dm_block_t block ,
2012-06-03 03:30:00 +04:00
struct dm_bio_prison_cell * cell )
2011-11-01 00:21:18 +04:00
{
int r ;
dm_block_t data_block ;
2013-03-02 02:45:50 +04:00
struct pool * pool = tc - > pool ;
2011-11-01 00:21:18 +04:00
/*
* Remap empty bios ( flushes ) immediately , without provisioning .
*/
2013-10-12 02:44:27 +04:00
if ( ! bio - > bi_iter . bi_size ) {
2013-03-02 02:45:50 +04:00
inc_all_io_entry ( pool , bio ) ;
2012-12-22 00:23:33 +04:00
cell_defer_no_holder ( tc , cell ) ;
2012-12-22 00:23:31 +04:00
2011-11-01 00:21:18 +04:00
remap_and_issue ( tc , bio , 0 ) ;
return ;
}
/*
* Fill read bios with zeroes and complete them immediately .
*/
if ( bio_data_dir ( bio ) = = READ ) {
zero_fill_bio ( bio ) ;
2012-12-22 00:23:33 +04:00
cell_defer_no_holder ( tc , cell ) ;
2015-07-20 16:29:37 +03:00
bio_endio ( bio ) ;
2011-11-01 00:21:18 +04:00
return ;
}
r = alloc_data_block ( tc , & data_block ) ;
switch ( r ) {
case 0 :
2012-03-28 21:41:28 +04:00
if ( tc - > origin_dev )
schedule_external_copy ( tc , block , data_block , cell , bio ) ;
else
schedule_zero ( tc , block , data_block , cell , bio ) ;
2011-11-01 00:21:18 +04:00
break ;
case - ENOSPC :
2013-12-06 01:03:33 +04:00
retry_bios_on_resume ( pool , cell ) ;
2011-11-01 00:21:18 +04:00
break ;
default :
2012-12-22 00:23:34 +04:00
DMERR_LIMIT ( " %s: alloc_data_block() failed: error = %d " ,
__func__ , r ) ;
2013-03-02 02:45:50 +04:00
cell_error ( pool , cell ) ;
2011-11-01 00:21:18 +04:00
break ;
}
}
2014-10-10 16:43:14 +04:00
static void process_cell ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
2011-11-01 00:21:18 +04:00
{
int r ;
2013-03-02 02:45:50 +04:00
struct pool * pool = tc - > pool ;
2014-10-10 16:43:14 +04:00
struct bio * bio = cell - > holder ;
2011-11-01 00:21:18 +04:00
dm_block_t block = get_bio_block ( tc , bio ) ;
struct dm_thin_lookup_result lookup_result ;
2014-10-10 16:43:14 +04:00
if ( tc - > requeue_mode ) {
cell_requeue ( pool , cell ) ;
2011-11-01 00:21:18 +04:00
return ;
2014-10-10 16:43:14 +04:00
}
2011-11-01 00:21:18 +04:00
r = dm_thin_find_block ( tc - > td , block , 1 , & lookup_result ) ;
switch ( r ) {
case 0 :
2014-10-15 17:46:58 +04:00
if ( lookup_result . shared )
process_shared_bio ( tc , bio , block , & lookup_result , cell ) ;
else {
2013-03-02 02:45:50 +04:00
inc_all_io_entry ( pool , bio ) ;
2011-11-01 00:21:18 +04:00
remap_and_issue ( tc , bio , lookup_result . block ) ;
2014-10-10 16:43:14 +04:00
inc_remap_and_issue_cell ( tc , cell , lookup_result . block ) ;
2012-12-22 00:23:31 +04:00
}
2011-11-01 00:21:18 +04:00
break ;
case - ENODATA :
2012-03-28 21:41:28 +04:00
if ( bio_data_dir ( bio ) = = READ & & tc - > origin_dev ) {
2013-03-02 02:45:50 +04:00
inc_all_io_entry ( pool , bio ) ;
2012-12-22 00:23:33 +04:00
cell_defer_no_holder ( tc , cell ) ;
2012-12-22 00:23:31 +04:00
2014-06-13 17:47:24 +04:00
if ( bio_end_sector ( bio ) < = tc - > origin_size )
remap_to_origin_and_issue ( tc , bio ) ;
else if ( bio - > bi_iter . bi_sector < tc - > origin_size ) {
zero_fill_bio ( bio ) ;
bio - > bi_iter . bi_size = ( tc - > origin_size - bio - > bi_iter . bi_sector ) < < SECTOR_SHIFT ;
remap_to_origin_and_issue ( tc , bio ) ;
} else {
zero_fill_bio ( bio ) ;
2015-07-20 16:29:37 +03:00
bio_endio ( bio ) ;
2014-06-13 17:47:24 +04:00
}
2012-03-28 21:41:28 +04:00
} else
provision_block ( tc , bio , block , cell ) ;
2011-11-01 00:21:18 +04:00
break ;
default :
2012-12-22 00:23:34 +04:00
DMERR_LIMIT ( " %s: dm_thin_find_block() failed: error = %d " ,
__func__ , r ) ;
2012-12-22 00:23:33 +04:00
cell_defer_no_holder ( tc , cell ) ;
2011-11-01 00:21:18 +04:00
bio_io_error ( bio ) ;
break ;
}
}
2014-10-10 16:43:14 +04:00
static void process_bio ( struct thin_c * tc , struct bio * bio )
{
struct pool * pool = tc - > pool ;
dm_block_t block = get_bio_block ( tc , bio ) ;
struct dm_bio_prison_cell * cell ;
struct dm_cell_key key ;
/*
* If cell is already occupied , then the block is already
* being provisioned so we have nothing further to do here .
*/
build_virtual_key ( tc - > td , block , & key ) ;
if ( bio_detain ( pool , & key , bio , & cell ) )
return ;
process_cell ( tc , cell ) ;
}
static void __process_bio_read_only ( struct thin_c * tc , struct bio * bio ,
struct dm_bio_prison_cell * cell )
2012-07-27 18:08:16 +04:00
{
int r ;
int rw = bio_data_dir ( bio ) ;
dm_block_t block = get_bio_block ( tc , bio ) ;
struct dm_thin_lookup_result lookup_result ;
r = dm_thin_find_block ( tc - > td , block , 1 , & lookup_result ) ;
switch ( r ) {
case 0 :
2014-10-10 16:43:14 +04:00
if ( lookup_result . shared & & ( rw = = WRITE ) & & bio - > bi_iter . bi_size ) {
2013-12-06 00:47:24 +04:00
handle_unserviceable_bio ( tc - > pool , bio ) ;
2014-10-10 16:43:14 +04:00
if ( cell )
cell_defer_no_holder ( tc , cell ) ;
} else {
2012-12-22 00:23:31 +04:00
inc_all_io_entry ( tc - > pool , bio ) ;
2012-07-27 18:08:16 +04:00
remap_and_issue ( tc , bio , lookup_result . block ) ;
2014-10-10 16:43:14 +04:00
if ( cell )
inc_remap_and_issue_cell ( tc , cell , lookup_result . block ) ;
2012-12-22 00:23:31 +04:00
}
2012-07-27 18:08:16 +04:00
break ;
case - ENODATA :
2014-10-10 16:43:14 +04:00
if ( cell )
cell_defer_no_holder ( tc , cell ) ;
2012-07-27 18:08:16 +04:00
if ( rw ! = READ ) {
2013-12-06 00:47:24 +04:00
handle_unserviceable_bio ( tc - > pool , bio ) ;
2012-07-27 18:08:16 +04:00
break ;
}
if ( tc - > origin_dev ) {
2012-12-22 00:23:31 +04:00
inc_all_io_entry ( tc - > pool , bio ) ;
2012-07-27 18:08:16 +04:00
remap_to_origin_and_issue ( tc , bio ) ;
break ;
}
zero_fill_bio ( bio ) ;
2015-07-20 16:29:37 +03:00
bio_endio ( bio ) ;
2012-07-27 18:08:16 +04:00
break ;
default :
2012-12-22 00:23:34 +04:00
DMERR_LIMIT ( " %s: dm_thin_find_block() failed: error = %d " ,
__func__ , r ) ;
2014-10-10 16:43:14 +04:00
if ( cell )
cell_defer_no_holder ( tc , cell ) ;
2012-07-27 18:08:16 +04:00
bio_io_error ( bio ) ;
break ;
}
}
2014-10-10 16:43:14 +04:00
static void process_bio_read_only ( struct thin_c * tc , struct bio * bio )
{
__process_bio_read_only ( tc , bio , NULL ) ;
}
static void process_cell_read_only ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
{
__process_bio_read_only ( tc , cell - > holder , cell ) ;
}
2014-03-03 20:03:26 +04:00
static void process_bio_success ( struct thin_c * tc , struct bio * bio )
{
2015-07-20 16:29:37 +03:00
bio_endio ( bio ) ;
2014-03-03 20:03:26 +04:00
}
2012-07-27 18:08:16 +04:00
static void process_bio_fail ( struct thin_c * tc , struct bio * bio )
{
bio_io_error ( bio ) ;
}
2014-10-10 16:43:14 +04:00
static void process_cell_success ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
{
cell_success ( tc - > pool , cell ) ;
}
static void process_cell_fail ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
{
cell_error ( tc - > pool , cell ) ;
}
2013-05-10 17:37:21 +04:00
/*
* FIXME : should we also commit due to size of transaction , measured in
* metadata blocks ?
*/
2012-03-28 21:41:27 +04:00
static int need_commit_due_to_time ( struct pool * pool )
{
2014-05-23 00:42:37 +04:00
return ! time_in_range ( jiffies , pool - > last_commit_jiffies ,
pool - > last_commit_jiffies + COMMIT_PERIOD ) ;
2012-03-28 21:41:27 +04:00
}
2014-03-22 02:33:41 +04:00
# define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node)
# define thin_bio(pbd) dm_bio_from_per_bio_data((pbd), sizeof(struct dm_thin_endio_hook))
static void __thin_bio_rb_add ( struct thin_c * tc , struct bio * bio )
{
struct rb_node * * rbp , * parent ;
struct dm_thin_endio_hook * pbd ;
sector_t bi_sector = bio - > bi_iter . bi_sector ;
rbp = & tc - > sort_bio_list . rb_node ;
parent = NULL ;
while ( * rbp ) {
parent = * rbp ;
pbd = thin_pbd ( parent ) ;
if ( bi_sector < thin_bio ( pbd ) - > bi_iter . bi_sector )
rbp = & ( * rbp ) - > rb_left ;
else
rbp = & ( * rbp ) - > rb_right ;
}
pbd = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
rb_link_node ( & pbd - > rb_node , parent , rbp ) ;
rb_insert_color ( & pbd - > rb_node , & tc - > sort_bio_list ) ;
}
static void __extract_sorted_bios ( struct thin_c * tc )
{
struct rb_node * node ;
struct dm_thin_endio_hook * pbd ;
struct bio * bio ;
for ( node = rb_first ( & tc - > sort_bio_list ) ; node ; node = rb_next ( node ) ) {
pbd = thin_pbd ( node ) ;
bio = thin_bio ( pbd ) ;
bio_list_add ( & tc - > deferred_bio_list , bio ) ;
rb_erase ( & pbd - > rb_node , & tc - > sort_bio_list ) ;
}
WARN_ON ( ! RB_EMPTY_ROOT ( & tc - > sort_bio_list ) ) ;
}
static void __sort_thin_deferred_bios ( struct thin_c * tc )
{
struct bio * bio ;
struct bio_list bios ;
bio_list_init ( & bios ) ;
bio_list_merge ( & bios , & tc - > deferred_bio_list ) ;
bio_list_init ( & tc - > deferred_bio_list ) ;
/* Sort deferred_bio_list using rb-tree */
while ( ( bio = bio_list_pop ( & bios ) ) )
__thin_bio_rb_add ( tc , bio ) ;
/*
* Transfer the sorted bios in sort_bio_list back to
* deferred_bio_list to allow lockless submission of
* all bios .
*/
__extract_sorted_bios ( tc ) ;
}
2014-03-21 05:17:14 +04:00
static void process_thin_deferred_bios ( struct thin_c * tc )
2011-11-01 00:21:18 +04:00
{
2014-03-21 05:17:14 +04:00
struct pool * pool = tc - > pool ;
2011-11-01 00:21:18 +04:00
unsigned long flags ;
struct bio * bio ;
struct bio_list bios ;
2014-03-22 02:33:41 +04:00
struct blk_plug plug ;
2014-10-06 18:28:30 +04:00
unsigned count = 0 ;
2011-11-01 00:21:18 +04:00
2014-03-21 05:17:14 +04:00
if ( tc - > requeue_mode ) {
2014-10-19 15:52:44 +04:00
error_thin_bio_list ( tc , & tc - > deferred_bio_list , DM_ENDIO_REQUEUE ) ;
2014-03-21 05:17:14 +04:00
return ;
}
2011-11-01 00:21:18 +04:00
bio_list_init ( & bios ) ;
2014-03-21 05:17:14 +04:00
spin_lock_irqsave ( & tc - > lock , flags ) ;
2014-03-22 02:33:41 +04:00
if ( bio_list_empty ( & tc - > deferred_bio_list ) ) {
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
return ;
}
__sort_thin_deferred_bios ( tc ) ;
2014-03-21 05:17:14 +04:00
bio_list_merge ( & bios , & tc - > deferred_bio_list ) ;
bio_list_init ( & tc - > deferred_bio_list ) ;
2014-03-22 02:33:41 +04:00
2014-03-21 05:17:14 +04:00
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
2011-11-01 00:21:18 +04:00
2014-03-22 02:33:41 +04:00
blk_start_plug ( & plug ) ;
2011-11-01 00:21:18 +04:00
while ( ( bio = bio_list_pop ( & bios ) ) ) {
/*
* If we ' ve got no free new_mapping structs , and processing
* this bio might require one , we pause until there are some
* prepared mappings to process .
*/
if ( ensure_next_mapping ( pool ) ) {
2014-03-21 05:17:14 +04:00
spin_lock_irqsave ( & tc - > lock , flags ) ;
bio_list_add ( & tc - > deferred_bio_list , bio ) ;
bio_list_merge ( & tc - > deferred_bio_list , & bios ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
2011-11-01 00:21:18 +04:00
break ;
}
2012-03-28 21:41:28 +04:00
if ( bio - > bi_rw & REQ_DISCARD )
2012-07-27 18:08:16 +04:00
pool - > process_discard ( tc , bio ) ;
2012-03-28 21:41:28 +04:00
else
2012-07-27 18:08:16 +04:00
pool - > process_bio ( tc , bio ) ;
2014-10-06 18:28:30 +04:00
if ( ( count + + & 127 ) = = 0 ) {
2014-10-06 18:45:59 +04:00
throttle_work_update ( & pool - > throttle ) ;
2014-10-06 18:28:30 +04:00
dm_pool_issue_prefetches ( pool - > pmd ) ;
}
2011-11-01 00:21:18 +04:00
}
2014-03-22 02:33:41 +04:00
blk_finish_plug ( & plug ) ;
2014-03-21 05:17:14 +04:00
}
2014-10-10 19:42:10 +04:00
static int cmp_cells ( const void * lhs , const void * rhs )
{
struct dm_bio_prison_cell * lhs_cell = * ( ( struct dm_bio_prison_cell * * ) lhs ) ;
struct dm_bio_prison_cell * rhs_cell = * ( ( struct dm_bio_prison_cell * * ) rhs ) ;
BUG_ON ( ! lhs_cell - > holder ) ;
BUG_ON ( ! rhs_cell - > holder ) ;
if ( lhs_cell - > holder - > bi_iter . bi_sector < rhs_cell - > holder - > bi_iter . bi_sector )
return - 1 ;
if ( lhs_cell - > holder - > bi_iter . bi_sector > rhs_cell - > holder - > bi_iter . bi_sector )
return 1 ;
return 0 ;
}
static unsigned sort_cells ( struct pool * pool , struct list_head * cells )
{
unsigned count = 0 ;
struct dm_bio_prison_cell * cell , * tmp ;
list_for_each_entry_safe ( cell , tmp , cells , user_list ) {
if ( count > = CELL_SORT_ARRAY_SIZE )
break ;
pool - > cell_sort_array [ count + + ] = cell ;
list_del ( & cell - > user_list ) ;
}
sort ( pool - > cell_sort_array , count , sizeof ( cell ) , cmp_cells , NULL ) ;
return count ;
}
2014-10-10 16:43:14 +04:00
static void process_thin_deferred_cells ( struct thin_c * tc )
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
struct list_head cells ;
2014-10-10 19:42:10 +04:00
struct dm_bio_prison_cell * cell ;
unsigned i , j , count ;
2014-10-10 16:43:14 +04:00
INIT_LIST_HEAD ( & cells ) ;
spin_lock_irqsave ( & tc - > lock , flags ) ;
list_splice_init ( & tc - > deferred_cells , & cells ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
if ( list_empty ( & cells ) )
return ;
2014-10-10 19:42:10 +04:00
do {
count = sort_cells ( tc - > pool , & cells ) ;
2014-10-10 16:43:14 +04:00
2014-10-10 19:42:10 +04:00
for ( i = 0 ; i < count ; i + + ) {
cell = pool - > cell_sort_array [ i ] ;
BUG_ON ( ! cell - > holder ) ;
2014-10-10 16:43:14 +04:00
2014-10-10 19:42:10 +04:00
/*
* If we ' ve got no free new_mapping structs , and processing
* this bio might require one , we pause until there are some
* prepared mappings to process .
*/
if ( ensure_next_mapping ( pool ) ) {
for ( j = i ; j < count ; j + + )
list_add ( & pool - > cell_sort_array [ j ] - > user_list , & cells ) ;
spin_lock_irqsave ( & tc - > lock , flags ) ;
list_splice ( & cells , & tc - > deferred_cells ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
return ;
}
if ( cell - > holder - > bi_rw & REQ_DISCARD )
pool - > process_discard_cell ( tc , cell ) ;
else
pool - > process_cell ( tc , cell ) ;
}
} while ( ! list_empty ( & cells ) ) ;
2014-10-10 16:43:14 +04:00
}
2014-04-08 14:29:01 +04:00
static void thin_get ( struct thin_c * tc ) ;
static void thin_put ( struct thin_c * tc ) ;
/*
* We can ' t hold rcu_read_lock ( ) around code that can block . So we
* find a thin with the rcu lock held ; bump a refcount ; then drop
* the lock .
*/
static struct thin_c * get_first_thin ( struct pool * pool )
{
struct thin_c * tc = NULL ;
rcu_read_lock ( ) ;
if ( ! list_empty ( & pool - > active_thins ) ) {
tc = list_entry_rcu ( pool - > active_thins . next , struct thin_c , list ) ;
thin_get ( tc ) ;
}
rcu_read_unlock ( ) ;
return tc ;
}
static struct thin_c * get_next_thin ( struct pool * pool , struct thin_c * tc )
{
struct thin_c * old_tc = tc ;
rcu_read_lock ( ) ;
list_for_each_entry_continue_rcu ( tc , & pool - > active_thins , list ) {
thin_get ( tc ) ;
thin_put ( old_tc ) ;
rcu_read_unlock ( ) ;
return tc ;
}
thin_put ( old_tc ) ;
rcu_read_unlock ( ) ;
return NULL ;
}
2014-03-21 05:17:14 +04:00
static void process_deferred_bios ( struct pool * pool )
{
unsigned long flags ;
struct bio * bio ;
struct bio_list bios ;
struct thin_c * tc ;
2014-04-08 14:29:01 +04:00
tc = get_first_thin ( pool ) ;
while ( tc ) {
2014-10-10 16:43:14 +04:00
process_thin_deferred_cells ( tc ) ;
2014-03-21 05:17:14 +04:00
process_thin_deferred_bios ( tc ) ;
2014-04-08 14:29:01 +04:00
tc = get_next_thin ( pool , tc ) ;
}
2011-11-01 00:21:18 +04:00
/*
* If there are any deferred flush bios , we must commit
* the metadata before issuing them .
*/
bio_list_init ( & bios ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_merge ( & bios , & pool - > deferred_flush_bios ) ;
bio_list_init ( & pool - > deferred_flush_bios ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2014-02-06 15:08:56 +04:00
if ( bio_list_empty ( & bios ) & &
! ( dm_pool_changed_this_transaction ( pool - > pmd ) & & need_commit_due_to_time ( pool ) ) )
2011-11-01 00:21:18 +04:00
return ;
2013-12-05 00:05:36 +04:00
if ( commit ( pool ) ) {
2011-11-01 00:21:18 +04:00
while ( ( bio = bio_list_pop ( & bios ) ) )
bio_io_error ( bio ) ;
return ;
}
2012-03-28 21:41:27 +04:00
pool - > last_commit_jiffies = jiffies ;
2011-11-01 00:21:18 +04:00
while ( ( bio = bio_list_pop ( & bios ) ) )
generic_make_request ( bio ) ;
}
static void do_worker ( struct work_struct * ws )
{
struct pool * pool = container_of ( ws , struct pool , worker ) ;
2014-10-06 18:45:59 +04:00
throttle_work_start ( & pool - > throttle ) ;
2014-10-06 18:28:30 +04:00
dm_pool_issue_prefetches ( pool - > pmd ) ;
2014-10-06 18:45:59 +04:00
throttle_work_update ( & pool - > throttle ) ;
2012-07-27 18:08:16 +04:00
process_prepared ( pool , & pool - > prepared_mappings , & pool - > process_prepared_mapping ) ;
2014-10-06 18:45:59 +04:00
throttle_work_update ( & pool - > throttle ) ;
2012-07-27 18:08:16 +04:00
process_prepared ( pool , & pool - > prepared_discards , & pool - > process_prepared_discard ) ;
2014-10-06 18:45:59 +04:00
throttle_work_update ( & pool - > throttle ) ;
2011-11-01 00:21:18 +04:00
process_deferred_bios ( pool ) ;
2014-10-06 18:45:59 +04:00
throttle_work_complete ( & pool - > throttle ) ;
2011-11-01 00:21:18 +04:00
}
2012-03-28 21:41:27 +04:00
/*
* We want to commit periodically so that not too much
* unwritten data builds up .
*/
static void do_waker ( struct work_struct * ws )
{
struct pool * pool = container_of ( to_delayed_work ( ws ) , struct pool , waker ) ;
wake_worker ( pool ) ;
queue_delayed_work ( pool - > wq , & pool - > waker , COMMIT_PERIOD ) ;
}
2015-07-15 23:52:04 +03:00
static void notify_of_pool_mode_change_to_oods ( struct pool * pool ) ;
2014-05-09 18:59:38 +04:00
/*
* We ' re holding onto IO to allow userland time to react . After the
* timeout either the pool will have been resized ( and thus back in
2015-07-15 23:52:04 +03:00
* PM_WRITE mode ) , or we degrade to PM_OUT_OF_DATA_SPACE w / error_if_no_space .
2014-05-09 18:59:38 +04:00
*/
static void do_no_space_timeout ( struct work_struct * ws )
{
struct pool * pool = container_of ( to_delayed_work ( ws ) , struct pool ,
no_space_timeout ) ;
2015-07-15 23:52:04 +03:00
if ( get_pool_mode ( pool ) = = PM_OUT_OF_DATA_SPACE & & ! pool - > pf . error_if_no_space ) {
pool - > pf . error_if_no_space = true ;
notify_of_pool_mode_change_to_oods ( pool ) ;
2015-07-21 20:20:46 +03:00
error_retry_list_with_code ( pool , - ENOSPC ) ;
2015-07-15 23:52:04 +03:00
}
2014-05-09 18:59:38 +04:00
}
2011-11-01 00:21:18 +04:00
/*----------------------------------------------------------------*/
2014-05-14 00:14:14 +04:00
struct pool_work {
2014-03-03 19:52:28 +04:00
struct work_struct worker ;
2014-05-14 00:14:14 +04:00
struct completion complete ;
} ;
static struct pool_work * to_pool_work ( struct work_struct * ws )
{
return container_of ( ws , struct pool_work , worker ) ;
}
static void pool_work_complete ( struct pool_work * pw )
{
complete ( & pw - > complete ) ;
}
2014-03-03 19:52:28 +04:00
2014-05-14 00:14:14 +04:00
static void pool_work_wait ( struct pool_work * pw , struct pool * pool ,
void ( * fn ) ( struct work_struct * ) )
{
INIT_WORK_ONSTACK ( & pw - > worker , fn ) ;
init_completion ( & pw - > complete ) ;
queue_work ( pool - > wq , & pw - > worker ) ;
wait_for_completion ( & pw - > complete ) ;
}
/*----------------------------------------------------------------*/
struct noflush_work {
struct pool_work pw ;
struct thin_c * tc ;
2014-03-03 19:52:28 +04:00
} ;
2014-05-14 00:14:14 +04:00
static struct noflush_work * to_noflush ( struct work_struct * ws )
2014-03-03 19:52:28 +04:00
{
2014-05-14 00:14:14 +04:00
return container_of ( to_pool_work ( ws ) , struct noflush_work , pw ) ;
2014-03-03 19:52:28 +04:00
}
static void do_noflush_start ( struct work_struct * ws )
{
2014-05-14 00:14:14 +04:00
struct noflush_work * w = to_noflush ( ws ) ;
2014-03-03 19:52:28 +04:00
w - > tc - > requeue_mode = true ;
requeue_io ( w - > tc ) ;
2014-05-14 00:14:14 +04:00
pool_work_complete ( & w - > pw ) ;
2014-03-03 19:52:28 +04:00
}
static void do_noflush_stop ( struct work_struct * ws )
{
2014-05-14 00:14:14 +04:00
struct noflush_work * w = to_noflush ( ws ) ;
2014-03-03 19:52:28 +04:00
w - > tc - > requeue_mode = false ;
2014-05-14 00:14:14 +04:00
pool_work_complete ( & w - > pw ) ;
2014-03-03 19:52:28 +04:00
}
static void noflush_work ( struct thin_c * tc , void ( * fn ) ( struct work_struct * ) )
{
struct noflush_work w ;
w . tc = tc ;
2014-05-14 00:14:14 +04:00
pool_work_wait ( & w . pw , tc - > pool , fn ) ;
2014-03-03 19:52:28 +04:00
}
/*----------------------------------------------------------------*/
2012-07-27 18:08:16 +04:00
static enum pool_mode get_pool_mode ( struct pool * pool )
{
return pool - > pf . mode ;
}
2014-03-03 20:03:26 +04:00
static void notify_of_pool_mode_change ( struct pool * pool , const char * new_mode )
{
dm_table_event ( pool - > ti - > table ) ;
DMINFO ( " %s: switching pool to %s mode " ,
dm_device_name ( pool - > pool_md ) , new_mode ) ;
}
2015-07-15 23:52:04 +03:00
static void notify_of_pool_mode_change_to_oods ( struct pool * pool )
{
if ( ! pool - > pf . error_if_no_space )
notify_of_pool_mode_change ( pool , " out-of-data-space (queue IO) " ) ;
else
notify_of_pool_mode_change ( pool , " out-of-data-space (error IO) " ) ;
}
2015-04-16 14:58:35 +03:00
static bool passdown_enabled ( struct pool_c * pt )
{
return pt - > adjusted_pf . discard_passdown ;
}
static void set_discard_callbacks ( struct pool * pool )
{
struct pool_c * pt = pool - > ti - > private ;
if ( passdown_enabled ( pt ) ) {
pool - > process_discard_cell = process_discard_cell_passdown ;
pool - > process_prepared_discard = process_prepared_discard_passdown ;
} else {
pool - > process_discard_cell = process_discard_cell_no_passdown ;
pool - > process_prepared_discard = process_prepared_discard_no_passdown ;
}
}
2013-12-20 23:27:28 +04:00
static void set_pool_mode ( struct pool * pool , enum pool_mode new_mode )
2012-07-27 18:08:16 +04:00
{
2014-02-15 03:10:55 +04:00
struct pool_c * pt = pool - > ti - > private ;
2014-02-14 20:58:41 +04:00
bool needs_check = dm_pool_metadata_needs_check ( pool - > pmd ) ;
enum pool_mode old_mode = get_pool_mode ( pool ) ;
2014-05-20 21:38:33 +04:00
unsigned long no_space_timeout = ACCESS_ONCE ( no_space_timeout_secs ) * HZ ;
2014-02-14 20:58:41 +04:00
/*
* Never allow the pool to transition to PM_WRITE mode if user
* intervention is required to verify metadata and data consistency .
*/
if ( new_mode = = PM_WRITE & & needs_check ) {
DMERR ( " %s: unable to switch pool to write mode until repaired. " ,
dm_device_name ( pool - > pool_md ) ) ;
if ( old_mode ! = new_mode )
new_mode = old_mode ;
else
new_mode = PM_READ_ONLY ;
}
/*
* If we were in PM_FAIL mode , rollback of metadata failed . We ' re
* not going to recover without a thin_repair . So we never let the
* pool move out of the old mode .
*/
if ( old_mode = = PM_FAIL )
new_mode = old_mode ;
2012-07-27 18:08:16 +04:00
2013-12-20 23:27:28 +04:00
switch ( new_mode ) {
2012-07-27 18:08:16 +04:00
case PM_FAIL :
2013-12-20 23:27:28 +04:00
if ( old_mode ! = new_mode )
2014-03-03 20:03:26 +04:00
notify_of_pool_mode_change ( pool , " failure " ) ;
2013-12-05 01:30:01 +04:00
dm_pool_metadata_read_only ( pool - > pmd ) ;
2012-07-27 18:08:16 +04:00
pool - > process_bio = process_bio_fail ;
pool - > process_discard = process_bio_fail ;
2014-10-10 16:43:14 +04:00
pool - > process_cell = process_cell_fail ;
pool - > process_discard_cell = process_cell_fail ;
2012-07-27 18:08:16 +04:00
pool - > process_prepared_mapping = process_prepared_mapping_fail ;
pool - > process_prepared_discard = process_prepared_discard_fail ;
2014-03-03 20:03:26 +04:00
error_retry_list ( pool ) ;
2012-07-27 18:08:16 +04:00
break ;
case PM_READ_ONLY :
2013-12-20 23:27:28 +04:00
if ( old_mode ! = new_mode )
2014-03-03 20:03:26 +04:00
notify_of_pool_mode_change ( pool , " read-only " ) ;
dm_pool_metadata_read_only ( pool - > pmd ) ;
pool - > process_bio = process_bio_read_only ;
pool - > process_discard = process_bio_success ;
2014-10-10 16:43:14 +04:00
pool - > process_cell = process_cell_read_only ;
pool - > process_discard_cell = process_cell_success ;
2014-03-03 20:03:26 +04:00
pool - > process_prepared_mapping = process_prepared_mapping_fail ;
2015-04-16 14:58:35 +03:00
pool - > process_prepared_discard = process_prepared_discard_success ;
2014-03-03 20:03:26 +04:00
error_retry_list ( pool ) ;
break ;
case PM_OUT_OF_DATA_SPACE :
/*
* Ideally we ' d never hit this state ; the low water mark
* would trigger userland to extend the pool before we
* completely run out of data space . However , many small
* IOs to unprovisioned space can consume data space at an
* alarming rate . Adjust your low water mark if you ' re
* frequently seeing this mode .
*/
if ( old_mode ! = new_mode )
2015-07-15 23:52:04 +03:00
notify_of_pool_mode_change_to_oods ( pool ) ;
2016-03-10 19:31:35 +03:00
pool - > out_of_data_space = true ;
2014-03-03 20:03:26 +04:00
pool - > process_bio = process_bio_read_only ;
2014-10-10 16:43:14 +04:00
pool - > process_discard = process_discard_bio ;
pool - > process_cell = process_cell_read_only ;
2014-03-03 20:03:26 +04:00
pool - > process_prepared_mapping = process_prepared_mapping ;
2015-04-16 14:58:35 +03:00
set_discard_callbacks ( pool ) ;
2014-05-09 18:59:38 +04:00
2014-05-20 21:38:33 +04:00
if ( ! pool - > pf . error_if_no_space & & no_space_timeout )
queue_delayed_work ( pool - > wq , & pool - > no_space_timeout , no_space_timeout ) ;
2012-07-27 18:08:16 +04:00
break ;
case PM_WRITE :
2013-12-20 23:27:28 +04:00
if ( old_mode ! = new_mode )
2014-03-03 20:03:26 +04:00
notify_of_pool_mode_change ( pool , " write " ) ;
2016-03-10 19:31:35 +03:00
pool - > out_of_data_space = false ;
2015-11-06 18:53:01 +03:00
pool - > pf . error_if_no_space = pt - > requested_pf . error_if_no_space ;
2013-12-05 01:58:19 +04:00
dm_pool_metadata_read_write ( pool - > pmd ) ;
2012-07-27 18:08:16 +04:00
pool - > process_bio = process_bio ;
2014-10-10 16:43:14 +04:00
pool - > process_discard = process_discard_bio ;
pool - > process_cell = process_cell ;
2012-07-27 18:08:16 +04:00
pool - > process_prepared_mapping = process_prepared_mapping ;
2015-04-16 14:58:35 +03:00
set_discard_callbacks ( pool ) ;
2012-07-27 18:08:16 +04:00
break ;
}
2013-12-20 23:27:28 +04:00
pool - > pf . mode = new_mode ;
2014-02-15 03:10:55 +04:00
/*
* The pool mode may have changed , sync it so bind_control_target ( )
* doesn ' t cause an unexpected mode transition on resume .
*/
pt - > adjusted_pf . mode = new_mode ;
2012-07-27 18:08:16 +04:00
}
2014-02-14 20:58:41 +04:00
static void abort_transaction ( struct pool * pool )
2013-12-05 04:51:33 +04:00
{
2014-02-14 20:58:41 +04:00
const char * dev_name = dm_device_name ( pool - > pool_md ) ;
DMERR_LIMIT ( " %s: aborting current metadata transaction " , dev_name ) ;
if ( dm_pool_abort_metadata ( pool - > pmd ) ) {
DMERR ( " %s: failed to abort metadata transaction " , dev_name ) ;
set_pool_mode ( pool , PM_FAIL ) ;
}
if ( dm_pool_metadata_set_needs_check ( pool - > pmd ) ) {
DMERR ( " %s: failed to set 'needs_check' flag in metadata " , dev_name ) ;
set_pool_mode ( pool , PM_FAIL ) ;
}
}
2013-12-06 01:03:33 +04:00
2014-02-14 20:58:41 +04:00
static void metadata_operation_failed ( struct pool * pool , const char * op , int r )
{
2013-12-05 04:51:33 +04:00
DMERR_LIMIT ( " %s: metadata operation '%s' failed: error = %d " ,
dm_device_name ( pool - > pool_md ) , op , r ) ;
2014-02-14 20:58:41 +04:00
abort_transaction ( pool ) ;
2013-12-05 04:51:33 +04:00
set_pool_mode ( pool , PM_READ_ONLY ) ;
}
2012-07-27 18:08:16 +04:00
/*----------------------------------------------------------------*/
2011-11-01 00:21:18 +04:00
/*
* Mapping functions .
*/
/*
* Called only while mapping a thin bio to hand it over to the workqueue .
*/
static void thin_defer_bio ( struct thin_c * tc , struct bio * bio )
{
unsigned long flags ;
struct pool * pool = tc - > pool ;
2014-03-21 05:17:14 +04:00
spin_lock_irqsave ( & tc - > lock , flags ) ;
bio_list_add ( & tc - > deferred_bio_list , bio ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
2011-11-01 00:21:18 +04:00
wake_worker ( pool ) ;
}
2014-10-06 18:45:59 +04:00
static void thin_defer_bio_with_throttle ( struct thin_c * tc , struct bio * bio )
{
struct pool * pool = tc - > pool ;
throttle_lock ( & pool - > throttle ) ;
thin_defer_bio ( tc , bio ) ;
throttle_unlock ( & pool - > throttle ) ;
}
2014-10-10 16:43:14 +04:00
static void thin_defer_cell ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
{
unsigned long flags ;
struct pool * pool = tc - > pool ;
throttle_lock ( & pool - > throttle ) ;
spin_lock_irqsave ( & tc - > lock , flags ) ;
list_add_tail ( & cell - > user_list , & tc - > deferred_cells ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
throttle_unlock ( & pool - > throttle ) ;
wake_worker ( pool ) ;
}
2012-12-22 00:23:40 +04:00
static void thin_hook_bio ( struct thin_c * tc , struct bio * bio )
2012-03-28 21:41:28 +04:00
{
2012-12-22 00:23:40 +04:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-03-28 21:41:28 +04:00
h - > tc = tc ;
h - > shared_read_entry = NULL ;
2012-12-22 00:23:31 +04:00
h - > all_io_entry = NULL ;
2012-03-28 21:41:28 +04:00
h - > overwrite_mapping = NULL ;
2015-04-16 14:58:35 +03:00
h - > cell = NULL ;
2012-03-28 21:41:28 +04:00
}
2011-11-01 00:21:18 +04:00
/*
* Non - blocking function called from the thin target ' s map function .
*/
2012-12-22 00:23:41 +04:00
static int thin_bio_map ( struct dm_target * ti , struct bio * bio )
2011-11-01 00:21:18 +04:00
{
int r ;
struct thin_c * tc = ti - > private ;
dm_block_t block = get_bio_block ( tc , bio ) ;
struct dm_thin_device * td = tc - > td ;
struct dm_thin_lookup_result result ;
2014-10-10 16:43:14 +04:00
struct dm_bio_prison_cell * virt_cell , * data_cell ;
2012-12-22 00:23:31 +04:00
struct dm_cell_key key ;
2011-11-01 00:21:18 +04:00
2012-12-22 00:23:40 +04:00
thin_hook_bio ( tc , bio ) ;
2012-07-27 18:08:16 +04:00
2014-03-03 19:52:28 +04:00
if ( tc - > requeue_mode ) {
2015-07-20 16:29:37 +03:00
bio - > bi_error = DM_ENDIO_REQUEUE ;
bio_endio ( bio ) ;
2014-03-03 19:52:28 +04:00
return DM_MAPIO_SUBMITTED ;
}
2012-07-27 18:08:16 +04:00
if ( get_pool_mode ( tc - > pool ) = = PM_FAIL ) {
bio_io_error ( bio ) ;
return DM_MAPIO_SUBMITTED ;
}
2012-03-28 21:41:28 +04:00
if ( bio - > bi_rw & ( REQ_DISCARD | REQ_FLUSH | REQ_FUA ) ) {
2014-10-06 18:45:59 +04:00
thin_defer_bio_with_throttle ( tc , bio ) ;
2011-11-01 00:21:18 +04:00
return DM_MAPIO_SUBMITTED ;
}
2014-10-10 12:41:09 +04:00
/*
* We must hold the virtual cell before doing the lookup , otherwise
* there ' s a race with discard .
*/
build_virtual_key ( tc - > td , block , & key ) ;
2014-10-10 16:43:14 +04:00
if ( bio_detain ( tc - > pool , & key , bio , & virt_cell ) )
2014-10-10 12:41:09 +04:00
return DM_MAPIO_SUBMITTED ;
2011-11-01 00:21:18 +04:00
r = dm_thin_find_block ( td , block , 0 , & result ) ;
/*
* Note that we defer readahead too .
*/
switch ( r ) {
case 0 :
if ( unlikely ( result . shared ) ) {
/*
* We have a race condition here between the
* result . shared value returned by the lookup and
* snapshot creation , which may cause new
* sharing .
*
* To avoid this always quiesce the origin before
* taking the snap . You want to do this anyway to
* ensure a consistent application view
* ( i . e . lockfs ) .
*
* More distant ancestors are irrelevant . The
* shared flag will be set in their case .
*/
2014-10-10 16:43:14 +04:00
thin_defer_cell ( tc , virt_cell ) ;
2012-12-22 00:23:31 +04:00
return DM_MAPIO_SUBMITTED ;
2011-11-01 00:21:18 +04:00
}
2012-12-22 00:23:31 +04:00
build_data_key ( tc - > td , result . block , & key ) ;
2014-10-10 16:43:14 +04:00
if ( bio_detain ( tc - > pool , & key , bio , & data_cell ) ) {
cell_defer_no_holder ( tc , virt_cell ) ;
2012-12-22 00:23:31 +04:00
return DM_MAPIO_SUBMITTED ;
}
inc_all_io_entry ( tc - > pool , bio ) ;
2014-10-10 16:43:14 +04:00
cell_defer_no_holder ( tc , data_cell ) ;
cell_defer_no_holder ( tc , virt_cell ) ;
2012-12-22 00:23:31 +04:00
remap ( tc , bio , result . block ) ;
return DM_MAPIO_REMAPPED ;
2011-11-01 00:21:18 +04:00
case - ENODATA :
2012-07-27 18:08:16 +04:00
case - EWOULDBLOCK :
2014-10-10 16:43:14 +04:00
thin_defer_cell ( tc , virt_cell ) ;
2012-12-22 00:23:33 +04:00
return DM_MAPIO_SUBMITTED ;
2012-07-27 18:08:16 +04:00
default :
/*
* Must always call bio_io_error on failure .
* dm_thin_find_block can fail with - EINVAL if the
* pool is switched to fail - io mode .
*/
bio_io_error ( bio ) ;
2014-10-10 16:43:14 +04:00
cell_defer_no_holder ( tc , virt_cell ) ;
2012-12-22 00:23:33 +04:00
return DM_MAPIO_SUBMITTED ;
2011-11-01 00:21:18 +04:00
}
}
static int pool_is_congested ( struct dm_target_callbacks * cb , int bdi_bits )
{
struct pool_c * pt = container_of ( cb , struct pool_c , callbacks ) ;
2014-03-20 16:36:47 +04:00
struct request_queue * q ;
2011-11-01 00:21:18 +04:00
2014-03-20 16:36:47 +04:00
if ( get_pool_mode ( pt - > pool ) = = PM_OUT_OF_DATA_SPACE )
return 1 ;
2011-11-01 00:21:18 +04:00
2014-03-20 16:36:47 +04:00
q = bdev_get_queue ( pt - > data_dev - > bdev ) ;
return bdi_congested ( & q - > backing_dev_info , bdi_bits ) ;
2011-11-01 00:21:18 +04:00
}
2014-03-21 05:17:14 +04:00
static void requeue_bios ( struct pool * pool )
2011-11-01 00:21:18 +04:00
{
2014-03-21 05:17:14 +04:00
unsigned long flags ;
struct thin_c * tc ;
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( tc , & pool - > active_thins , list ) {
spin_lock_irqsave ( & tc - > lock , flags ) ;
bio_list_merge ( & tc - > deferred_bio_list , & tc - > retry_on_resume_list ) ;
bio_list_init ( & tc - > retry_on_resume_list ) ;
spin_unlock_irqrestore ( & tc - > lock , flags ) ;
}
rcu_read_unlock ( ) ;
2011-11-01 00:21:18 +04:00
}
/*----------------------------------------------------------------
* Binding of control targets to a pool object
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2012-09-27 02:45:46 +04:00
static bool data_dev_supports_discard ( struct pool_c * pt )
{
struct request_queue * q = bdev_get_queue ( pt - > data_dev - > bdev ) ;
return q & & blk_queue_discard ( q ) ;
}
2013-03-20 21:21:25 +04:00
static bool is_factor ( sector_t block_size , uint32_t n )
{
return ! sector_div ( block_size , n ) ;
}
2012-09-27 02:45:46 +04:00
/*
* If discard_passdown was enabled verify that the data device
2012-09-27 02:45:47 +04:00
* supports discards . Disable discard_passdown if not .
2012-09-27 02:45:46 +04:00
*/
2012-09-27 02:45:47 +04:00
static void disable_passdown_if_not_supported ( struct pool_c * pt )
2012-09-27 02:45:46 +04:00
{
2012-09-27 02:45:47 +04:00
struct pool * pool = pt - > pool ;
struct block_device * data_bdev = pt - > data_dev - > bdev ;
struct queue_limits * data_limits = & bdev_get_queue ( data_bdev ) - > limits ;
const char * reason = NULL ;
2012-09-27 02:45:46 +04:00
char buf [ BDEVNAME_SIZE ] ;
2012-09-27 02:45:47 +04:00
if ( ! pt - > adjusted_pf . discard_passdown )
2012-09-27 02:45:46 +04:00
return ;
2012-09-27 02:45:47 +04:00
if ( ! data_dev_supports_discard ( pt ) )
reason = " discard unsupported " ;
else if ( data_limits - > max_discard_sectors < pool - > sectors_per_block )
reason = " max discard sectors smaller than a block " ;
2012-09-27 02:45:46 +04:00
2012-09-27 02:45:47 +04:00
if ( reason ) {
DMWARN ( " Data device (%s) %s: Disabling discard passdown. " , bdevname ( data_bdev , buf ) , reason ) ;
pt - > adjusted_pf . discard_passdown = false ;
}
2012-09-27 02:45:46 +04:00
}
2011-11-01 00:21:18 +04:00
static int bind_control_target ( struct pool * pool , struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
2012-07-27 18:08:16 +04:00
/*
2013-12-05 01:58:19 +04:00
* We want to make sure that a pool in PM_FAIL mode is never upgraded .
2012-07-27 18:08:16 +04:00
*/
2014-02-14 20:58:41 +04:00
enum pool_mode old_mode = get_pool_mode ( pool ) ;
2012-09-27 02:45:47 +04:00
enum pool_mode new_mode = pt - > adjusted_pf . mode ;
2012-07-27 18:08:16 +04:00
2013-12-20 23:27:28 +04:00
/*
* Don ' t change the pool ' s mode until set_pool_mode ( ) below .
* Otherwise the pool ' s process_ * function pointers may
* not match the desired pool mode .
*/
pt - > adjusted_pf . mode = old_mode ;
pool - > ti = ti ;
pool - > pf = pt - > adjusted_pf ;
pool - > low_water_blocks = pt - > low_water_blocks ;
2012-09-27 02:45:46 +04:00
set_pool_mode ( pool , new_mode ) ;
2012-05-19 04:01:01 +04:00
2011-11-01 00:21:18 +04:00
return 0 ;
}
static void unbind_control_target ( struct pool * pool , struct dm_target * ti )
{
if ( pool - > ti = = ti )
pool - > ti = NULL ;
}
/*----------------------------------------------------------------
* Pool creation
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2012-03-28 21:41:29 +04:00
/* Initialize pool features. */
static void pool_features_init ( struct pool_features * pf )
{
2012-07-27 18:08:16 +04:00
pf - > mode = PM_WRITE ;
2012-09-27 02:45:46 +04:00
pf - > zero_new_blocks = true ;
pf - > discard_enabled = true ;
pf - > discard_passdown = true ;
2013-12-07 01:21:43 +04:00
pf - > error_if_no_space = false ;
2012-03-28 21:41:29 +04:00
}
2011-11-01 00:21:18 +04:00
static void __pool_destroy ( struct pool * pool )
{
__pool_table_remove ( pool ) ;
2015-07-03 12:22:42 +03:00
vfree ( pool - > cell_sort_array ) ;
2011-11-01 00:21:18 +04:00
if ( dm_pool_metadata_close ( pool - > pmd ) < 0 )
DMWARN ( " %s: dm_pool_metadata_close() failed. " , __func__ ) ;
2012-10-13 00:02:10 +04:00
dm_bio_prison_destroy ( pool - > prison ) ;
2011-11-01 00:21:18 +04:00
dm_kcopyd_client_destroy ( pool - > copier ) ;
if ( pool - > wq )
destroy_workqueue ( pool - > wq ) ;
if ( pool - > next_mapping )
mempool_free ( pool - > next_mapping , pool - > mapping_pool ) ;
mempool_destroy ( pool - > mapping_pool ) ;
2012-10-13 00:02:10 +04:00
dm_deferred_set_destroy ( pool - > shared_read_ds ) ;
dm_deferred_set_destroy ( pool - > all_io_ds ) ;
2011-11-01 00:21:18 +04:00
kfree ( pool ) ;
}
2012-06-03 03:30:00 +04:00
static struct kmem_cache * _new_mapping_cache ;
2011-11-01 00:21:18 +04:00
static struct pool * pool_create ( struct mapped_device * pool_md ,
struct block_device * metadata_dev ,
2012-07-27 18:08:16 +04:00
unsigned long block_size ,
int read_only , char * * error )
2011-11-01 00:21:18 +04:00
{
int r ;
void * err_p ;
struct pool * pool ;
struct dm_pool_metadata * pmd ;
2012-07-27 18:08:16 +04:00
bool format_device = read_only ? false : true ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:16 +04:00
pmd = dm_pool_metadata_open ( metadata_dev , block_size , format_device ) ;
2011-11-01 00:21:18 +04:00
if ( IS_ERR ( pmd ) ) {
* error = " Error creating metadata object " ;
return ( struct pool * ) pmd ;
}
pool = kmalloc ( sizeof ( * pool ) , GFP_KERNEL ) ;
if ( ! pool ) {
* error = " Error allocating memory for pool " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_pool ;
}
pool - > pmd = pmd ;
pool - > sectors_per_block = block_size ;
2012-07-27 18:08:03 +04:00
if ( block_size & ( block_size - 1 ) )
pool - > sectors_per_block_shift = - 1 ;
else
pool - > sectors_per_block_shift = __ffs ( block_size ) ;
2011-11-01 00:21:18 +04:00
pool - > low_water_blocks = 0 ;
2012-03-28 21:41:29 +04:00
pool_features_init ( & pool - > pf ) ;
2014-10-07 00:30:06 +04:00
pool - > prison = dm_bio_prison_create ( ) ;
2011-11-01 00:21:18 +04:00
if ( ! pool - > prison ) {
* error = " Error creating pool's bio prison " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_prison ;
}
2013-03-02 02:45:49 +04:00
pool - > copier = dm_kcopyd_client_create ( & dm_kcopyd_throttle ) ;
2011-11-01 00:21:18 +04:00
if ( IS_ERR ( pool - > copier ) ) {
r = PTR_ERR ( pool - > copier ) ;
* error = " Error creating pool's kcopyd client " ;
err_p = ERR_PTR ( r ) ;
goto bad_kcopyd_client ;
}
/*
* Create singlethreaded workqueue that will service all devices
* that use this metadata .
*/
pool - > wq = alloc_ordered_workqueue ( " dm- " DM_MSG_PREFIX , WQ_MEM_RECLAIM ) ;
if ( ! pool - > wq ) {
* error = " Error creating pool's workqueue " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_wq ;
}
2014-10-06 18:45:59 +04:00
throttle_init ( & pool - > throttle ) ;
2011-11-01 00:21:18 +04:00
INIT_WORK ( & pool - > worker , do_worker ) ;
2012-03-28 21:41:27 +04:00
INIT_DELAYED_WORK ( & pool - > waker , do_waker ) ;
2014-05-09 18:59:38 +04:00
INIT_DELAYED_WORK ( & pool - > no_space_timeout , do_no_space_timeout ) ;
2011-11-01 00:21:18 +04:00
spin_lock_init ( & pool - > lock ) ;
bio_list_init ( & pool - > deferred_flush_bios ) ;
INIT_LIST_HEAD ( & pool - > prepared_mappings ) ;
2012-03-28 21:41:28 +04:00
INIT_LIST_HEAD ( & pool - > prepared_discards ) ;
2014-03-21 05:17:14 +04:00
INIT_LIST_HEAD ( & pool - > active_thins ) ;
2013-12-05 05:16:12 +04:00
pool - > low_water_triggered = false ;
2014-11-07 23:09:46 +03:00
pool - > suspended = true ;
2016-03-10 19:31:35 +03:00
pool - > out_of_data_space = false ;
2012-10-13 00:02:10 +04:00
pool - > shared_read_ds = dm_deferred_set_create ( ) ;
if ( ! pool - > shared_read_ds ) {
* error = " Error creating pool's shared read deferred set " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_shared_read_ds ;
}
pool - > all_io_ds = dm_deferred_set_create ( ) ;
if ( ! pool - > all_io_ds ) {
* error = " Error creating pool's all io deferred set " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_all_io_ds ;
}
2011-11-01 00:21:18 +04:00
pool - > next_mapping = NULL ;
2012-06-03 03:30:00 +04:00
pool - > mapping_pool = mempool_create_slab_pool ( MAPPING_POOL_SIZE ,
_new_mapping_cache ) ;
2011-11-01 00:21:18 +04:00
if ( ! pool - > mapping_pool ) {
* error = " Error creating pool's mapping mempool " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_mapping_pool ;
}
2015-07-03 12:22:42 +03:00
pool - > cell_sort_array = vmalloc ( sizeof ( * pool - > cell_sort_array ) * CELL_SORT_ARRAY_SIZE ) ;
if ( ! pool - > cell_sort_array ) {
* error = " Error allocating cell sort array " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_sort_array ;
}
2011-11-01 00:21:18 +04:00
pool - > ref_count = 1 ;
2012-03-28 21:41:27 +04:00
pool - > last_commit_jiffies = jiffies ;
2011-11-01 00:21:18 +04:00
pool - > pool_md = pool_md ;
pool - > md_dev = metadata_dev ;
__pool_table_insert ( pool ) ;
return pool ;
2015-07-03 12:22:42 +03:00
bad_sort_array :
mempool_destroy ( pool - > mapping_pool ) ;
2011-11-01 00:21:18 +04:00
bad_mapping_pool :
2012-10-13 00:02:10 +04:00
dm_deferred_set_destroy ( pool - > all_io_ds ) ;
bad_all_io_ds :
dm_deferred_set_destroy ( pool - > shared_read_ds ) ;
bad_shared_read_ds :
2011-11-01 00:21:18 +04:00
destroy_workqueue ( pool - > wq ) ;
bad_wq :
dm_kcopyd_client_destroy ( pool - > copier ) ;
bad_kcopyd_client :
2012-10-13 00:02:10 +04:00
dm_bio_prison_destroy ( pool - > prison ) ;
2011-11-01 00:21:18 +04:00
bad_prison :
kfree ( pool ) ;
bad_pool :
if ( dm_pool_metadata_close ( pmd ) )
DMWARN ( " %s: dm_pool_metadata_close() failed. " , __func__ ) ;
return err_p ;
}
static void __pool_inc ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
pool - > ref_count + + ;
}
static void __pool_dec ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
BUG_ON ( ! pool - > ref_count ) ;
if ( ! - - pool - > ref_count )
__pool_destroy ( pool ) ;
}
static struct pool * __pool_find ( struct mapped_device * pool_md ,
struct block_device * metadata_dev ,
2012-07-27 18:08:16 +04:00
unsigned long block_size , int read_only ,
char * * error , int * created )
2011-11-01 00:21:18 +04:00
{
struct pool * pool = __pool_table_lookup_metadata_dev ( metadata_dev ) ;
if ( pool ) {
2012-07-27 18:07:59 +04:00
if ( pool - > pool_md ! = pool_md ) {
* error = " metadata device already in use by a pool " ;
2011-11-01 00:21:18 +04:00
return ERR_PTR ( - EBUSY ) ;
2012-07-27 18:07:59 +04:00
}
2011-11-01 00:21:18 +04:00
__pool_inc ( pool ) ;
} else {
pool = __pool_table_lookup ( pool_md ) ;
if ( pool ) {
2012-07-27 18:07:59 +04:00
if ( pool - > md_dev ! = metadata_dev ) {
* error = " different pool cannot replace a pool " ;
2011-11-01 00:21:18 +04:00
return ERR_PTR ( - EINVAL ) ;
2012-07-27 18:07:59 +04:00
}
2011-11-01 00:21:18 +04:00
__pool_inc ( pool ) ;
2012-03-28 21:41:29 +04:00
} else {
2012-07-27 18:08:16 +04:00
pool = pool_create ( pool_md , metadata_dev , block_size , read_only , error ) ;
2012-03-28 21:41:29 +04:00
* created = 1 ;
}
2011-11-01 00:21:18 +04:00
}
return pool ;
}
/*----------------------------------------------------------------
* Pool target methods
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void pool_dtr ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
mutex_lock ( & dm_thin_pool_table . mutex ) ;
unbind_control_target ( pt - > pool , ti ) ;
__pool_dec ( pt - > pool ) ;
dm_put_device ( ti , pt - > metadata_dev ) ;
dm_put_device ( ti , pt - > data_dev ) ;
kfree ( pt ) ;
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
}
static int parse_pool_features ( struct dm_arg_set * as , struct pool_features * pf ,
struct dm_target * ti )
{
int r ;
unsigned argc ;
const char * arg_name ;
static struct dm_arg _args [ ] = {
2014-01-16 04:07:58 +04:00
{ 0 , 4 , " Invalid number of pool feature arguments " } ,
2011-11-01 00:21:18 +04:00
} ;
/*
* No feature arguments supplied .
*/
if ( ! as - > argc )
return 0 ;
r = dm_read_arg_group ( _args , as , & argc , & ti - > error ) ;
if ( r )
return - EINVAL ;
while ( argc & & ! r ) {
arg_name = dm_shift_arg ( as ) ;
argc - - ;
2012-07-27 18:08:16 +04:00
if ( ! strcasecmp ( arg_name , " skip_block_zeroing " ) )
2012-09-27 02:45:46 +04:00
pf - > zero_new_blocks = false ;
2012-07-27 18:08:16 +04:00
else if ( ! strcasecmp ( arg_name , " ignore_discard " ) )
2012-09-27 02:45:46 +04:00
pf - > discard_enabled = false ;
2012-07-27 18:08:16 +04:00
else if ( ! strcasecmp ( arg_name , " no_discard_passdown " ) )
2012-09-27 02:45:46 +04:00
pf - > discard_passdown = false ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:16 +04:00
else if ( ! strcasecmp ( arg_name , " read_only " ) )
pf - > mode = PM_READ_ONLY ;
2013-12-07 01:21:43 +04:00
else if ( ! strcasecmp ( arg_name , " error_if_no_space " ) )
pf - > error_if_no_space = true ;
2012-07-27 18:08:16 +04:00
else {
ti - > error = " Unrecognised pool feature requested " ;
r = - EINVAL ;
break ;
}
2011-11-01 00:21:18 +04:00
}
return r ;
}
2013-05-10 17:37:21 +04:00
static void metadata_low_callback ( void * context )
{
struct pool * pool = context ;
DMWARN ( " %s: reached low water mark for metadata device: sending event. " ,
dm_device_name ( pool - > pool_md ) ) ;
dm_table_event ( pool - > ti - > table ) ;
}
2014-02-13 08:58:15 +04:00
static sector_t get_dev_size ( struct block_device * bdev )
{
return i_size_read ( bdev - > bd_inode ) > > SECTOR_SHIFT ;
}
static void warn_if_metadata_device_too_big ( struct block_device * bdev )
2013-05-10 17:37:18 +04:00
{
2014-02-13 08:58:15 +04:00
sector_t metadata_dev_size = get_dev_size ( bdev ) ;
2013-05-10 17:37:18 +04:00
char buffer [ BDEVNAME_SIZE ] ;
2014-02-13 08:58:15 +04:00
if ( metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING )
2013-05-10 17:37:18 +04:00
DMWARN ( " Metadata device %s is larger than %u sectors: excess space will not be used. " ,
bdevname ( bdev , buffer ) , THIN_METADATA_MAX_SECTORS ) ;
2014-02-13 08:58:15 +04:00
}
static sector_t get_metadata_dev_size ( struct block_device * bdev )
{
sector_t metadata_dev_size = get_dev_size ( bdev ) ;
if ( metadata_dev_size > THIN_METADATA_MAX_SECTORS )
metadata_dev_size = THIN_METADATA_MAX_SECTORS ;
2013-05-10 17:37:18 +04:00
return metadata_dev_size ;
}
2013-05-10 17:37:19 +04:00
static dm_block_t get_metadata_dev_size_in_blocks ( struct block_device * bdev )
{
sector_t metadata_dev_size = get_metadata_dev_size ( bdev ) ;
2014-02-13 08:58:15 +04:00
sector_div ( metadata_dev_size , THIN_METADATA_BLOCK_SIZE ) ;
2013-05-10 17:37:19 +04:00
return metadata_dev_size ;
}
2013-05-10 17:37:21 +04:00
/*
* When a metadata threshold is crossed a dm event is triggered , and
* userland should respond by growing the metadata device . We could let
* userland set the threshold , like we do with the data threshold , but I ' m
* not sure they know enough to do this well .
*/
static dm_block_t calc_metadata_threshold ( struct pool_c * pt )
{
/*
* 4 M is ample for all ops with the possible exception of thin
* device deletion which is harmless if it fails ( just retry the
* delete after you ' ve grown the device ) .
*/
dm_block_t quarter = get_metadata_dev_size_in_blocks ( pt - > metadata_dev - > bdev ) / 4 ;
return min ( ( dm_block_t ) 1024ULL /* 4M */ , quarter ) ;
}
2011-11-01 00:21:18 +04:00
/*
* thin - pool < metadata dev > < data dev >
* < data block size ( sectors ) >
* < low water mark ( blocks ) >
* [ < # feature args > [ < arg > ] * ]
*
* Optional feature arguments are :
* skip_block_zeroing : skips the zeroing of newly - provisioned blocks .
2012-03-28 21:41:29 +04:00
* ignore_discard : disable discard
* no_discard_passdown : don ' t pass discards down to the data device
2013-12-07 01:21:43 +04:00
* read_only : Don ' t allow any changes to be made to the pool metadata .
* error_if_no_space : error IOs , instead of queueing , if no space .
2011-11-01 00:21:18 +04:00
*/
static int pool_ctr ( struct dm_target * ti , unsigned argc , char * * argv )
{
2012-03-28 21:41:29 +04:00
int r , pool_created = 0 ;
2011-11-01 00:21:18 +04:00
struct pool_c * pt ;
struct pool * pool ;
struct pool_features pf ;
struct dm_arg_set as ;
struct dm_dev * data_dev ;
unsigned long block_size ;
dm_block_t low_water_blocks ;
struct dm_dev * metadata_dev ;
2013-05-10 17:37:19 +04:00
fmode_t metadata_mode ;
2011-11-01 00:21:18 +04:00
/*
* FIXME Remove validation from scope of lock .
*/
mutex_lock ( & dm_thin_pool_table . mutex ) ;
if ( argc < 4 ) {
ti - > error = " Invalid argument count " ;
r = - EINVAL ;
goto out_unlock ;
}
2013-05-10 17:37:19 +04:00
2011-11-01 00:21:18 +04:00
as . argc = argc ;
as . argv = argv ;
2013-05-10 17:37:19 +04:00
/*
* Set default pool features .
*/
pool_features_init ( & pf ) ;
dm_consume_args ( & as , 4 ) ;
r = parse_pool_features ( & as , & pf , ti ) ;
if ( r )
goto out_unlock ;
metadata_mode = FMODE_READ | ( ( pf . mode = = PM_READ_ONLY ) ? 0 : FMODE_WRITE ) ;
r = dm_get_device ( ti , argv [ 0 ] , metadata_mode , & metadata_dev ) ;
2011-11-01 00:21:18 +04:00
if ( r ) {
ti - > error = " Error opening metadata block device " ;
goto out_unlock ;
}
2014-02-13 08:58:15 +04:00
warn_if_metadata_device_too_big ( metadata_dev - > bdev ) ;
2011-11-01 00:21:18 +04:00
r = dm_get_device ( ti , argv [ 1 ] , FMODE_READ | FMODE_WRITE , & data_dev ) ;
if ( r ) {
ti - > error = " Error getting data device " ;
goto out_metadata ;
}
if ( kstrtoul ( argv [ 2 ] , 10 , & block_size ) | | ! block_size | |
block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS | |
block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS | |
2012-07-27 18:08:02 +04:00
block_size & ( DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1 ) ) {
2011-11-01 00:21:18 +04:00
ti - > error = " Invalid block size " ;
r = - EINVAL ;
goto out ;
}
if ( kstrtoull ( argv [ 3 ] , 10 , ( unsigned long long * ) & low_water_blocks ) ) {
ti - > error = " Invalid low water mark " ;
r = - EINVAL ;
goto out ;
}
pt = kzalloc ( sizeof ( * pt ) , GFP_KERNEL ) ;
if ( ! pt ) {
r = - ENOMEM ;
goto out ;
}
pool = __pool_find ( dm_table_get_md ( ti - > table ) , metadata_dev - > bdev ,
2012-07-27 18:08:16 +04:00
block_size , pf . mode = = PM_READ_ONLY , & ti - > error , & pool_created ) ;
2011-11-01 00:21:18 +04:00
if ( IS_ERR ( pool ) ) {
r = PTR_ERR ( pool ) ;
goto out_free_pt ;
}
2012-03-28 21:41:29 +04:00
/*
* ' pool_created ' reflects whether this is the first table load .
* Top level discard support is not allowed to be changed after
* initial load . This would require a pool reload to trigger thin
* device changes .
*/
if ( ! pool_created & & pf . discard_enabled ! = pool - > pf . discard_enabled ) {
ti - > error = " Discard support cannot be disabled once enabled " ;
r = - EINVAL ;
goto out_flags_changed ;
}
2011-11-01 00:21:18 +04:00
pt - > pool = pool ;
pt - > ti = ti ;
pt - > metadata_dev = metadata_dev ;
pt - > data_dev = data_dev ;
pt - > low_water_blocks = low_water_blocks ;
2012-09-27 02:45:47 +04:00
pt - > adjusted_pf = pt - > requested_pf = pf ;
2013-03-02 02:45:47 +04:00
ti - > num_flush_bios = 1 ;
2012-09-27 02:45:46 +04:00
2012-03-28 21:41:29 +04:00
/*
* Only need to enable discards if the pool should pass
* them down to the data device . The thin device ' s discard
* processing will cause mappings to be removed from the btree .
*/
2013-09-20 02:49:11 +04:00
ti - > discard_zeroes_data_unsupported = true ;
2012-03-28 21:41:29 +04:00
if ( pf . discard_enabled & & pf . discard_passdown ) {
2013-03-02 02:45:47 +04:00
ti - > num_discard_bios = 1 ;
2012-09-27 02:45:46 +04:00
2012-03-28 21:41:29 +04:00
/*
* Setting ' discards_supported ' circumvents the normal
* stacking of discard limits ( this keeps the pool and
* thin devices ' discard limits consistent ) .
*/
2012-07-27 18:08:08 +04:00
ti - > discards_supported = true ;
2012-03-28 21:41:29 +04:00
}
2011-11-01 00:21:18 +04:00
ti - > private = pt ;
2013-05-10 17:37:21 +04:00
r = dm_pool_register_metadata_threshold ( pt - > pool - > pmd ,
calc_metadata_threshold ( pt ) ,
metadata_low_callback ,
pool ) ;
if ( r )
2015-10-13 19:04:28 +03:00
goto out_flags_changed ;
2013-05-10 17:37:21 +04:00
2011-11-01 00:21:18 +04:00
pt - > callbacks . congested_fn = pool_is_congested ;
dm_table_add_target_callbacks ( ti - > table , & pt - > callbacks ) ;
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
return 0 ;
2012-03-28 21:41:29 +04:00
out_flags_changed :
__pool_dec ( pool ) ;
2011-11-01 00:21:18 +04:00
out_free_pt :
kfree ( pt ) ;
out :
dm_put_device ( ti , data_dev ) ;
out_metadata :
dm_put_device ( ti , metadata_dev ) ;
out_unlock :
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
return r ;
}
2012-12-22 00:23:41 +04:00
static int pool_map ( struct dm_target * ti , struct bio * bio )
2011-11-01 00:21:18 +04:00
{
int r ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
unsigned long flags ;
/*
* As this is a singleton target , ti - > begin is always zero .
*/
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio - > bi_bdev = pt - > data_dev - > bdev ;
r = DM_MAPIO_REMAPPED ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
return r ;
}
2013-05-10 17:37:18 +04:00
static int maybe_resize_data_dev ( struct dm_target * ti , bool * need_commit )
2011-11-01 00:21:18 +04:00
{
int r ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
2012-07-27 18:08:02 +04:00
sector_t data_size = ti - > len ;
dm_block_t sb_data_size ;
2011-11-01 00:21:18 +04:00
2013-05-10 17:37:18 +04:00
* need_commit = false ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:02 +04:00
( void ) sector_div ( data_size , pool - > sectors_per_block ) ;
2011-11-01 00:21:18 +04:00
r = dm_pool_get_data_dev_size ( pool - > pmd , & sb_data_size ) ;
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: failed to retrieve data device size " ,
dm_device_name ( pool - > pool_md ) ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
if ( data_size < sb_data_size ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: pool target (%llu blocks) too small: expected %llu " ,
dm_device_name ( pool - > pool_md ) ,
2012-07-27 18:08:02 +04:00
( unsigned long long ) data_size , sb_data_size ) ;
2011-11-01 00:21:18 +04:00
return - EINVAL ;
} else if ( data_size > sb_data_size ) {
2014-02-14 20:58:41 +04:00
if ( dm_pool_metadata_needs_check ( pool - > pmd ) ) {
DMERR ( " %s: unable to grow the data device until repaired. " ,
dm_device_name ( pool - > pool_md ) ) ;
return 0 ;
}
2013-12-04 19:25:53 +04:00
if ( sb_data_size )
DMINFO ( " %s: growing the data device from %llu to %llu blocks " ,
dm_device_name ( pool - > pool_md ) ,
sb_data_size , ( unsigned long long ) data_size ) ;
2011-11-01 00:21:18 +04:00
r = dm_pool_resize_data_dev ( pool - > pmd , data_size ) ;
if ( r ) {
2013-12-05 04:51:33 +04:00
metadata_operation_failed ( pool , " dm_pool_resize_data_dev " , r ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2013-05-10 17:37:18 +04:00
* need_commit = true ;
2011-11-01 00:21:18 +04:00
}
return 0 ;
}
2013-05-10 17:37:19 +04:00
static int maybe_resize_metadata_dev ( struct dm_target * ti , bool * need_commit )
{
int r ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
dm_block_t metadata_dev_size , sb_metadata_dev_size ;
* need_commit = false ;
2013-05-19 21:57:50 +04:00
metadata_dev_size = get_metadata_dev_size_in_blocks ( pool - > md_dev ) ;
2013-05-10 17:37:19 +04:00
r = dm_pool_get_metadata_dev_size ( pool - > pmd , & sb_metadata_dev_size ) ;
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: failed to retrieve metadata device size " ,
dm_device_name ( pool - > pool_md ) ) ;
2013-05-10 17:37:19 +04:00
return r ;
}
if ( metadata_dev_size < sb_metadata_dev_size ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: metadata device (%llu blocks) too small: expected %llu " ,
dm_device_name ( pool - > pool_md ) ,
2013-05-10 17:37:19 +04:00
metadata_dev_size , sb_metadata_dev_size ) ;
return - EINVAL ;
} else if ( metadata_dev_size > sb_metadata_dev_size ) {
2014-02-14 20:58:41 +04:00
if ( dm_pool_metadata_needs_check ( pool - > pmd ) ) {
DMERR ( " %s: unable to grow the metadata device until repaired. " ,
dm_device_name ( pool - > pool_md ) ) ;
return 0 ;
}
2014-02-13 08:58:15 +04:00
warn_if_metadata_device_too_big ( pool - > md_dev ) ;
2013-12-04 19:25:53 +04:00
DMINFO ( " %s: growing the metadata device from %llu to %llu blocks " ,
dm_device_name ( pool - > pool_md ) ,
sb_metadata_dev_size , metadata_dev_size ) ;
2013-05-10 17:37:19 +04:00
r = dm_pool_resize_metadata_dev ( pool - > pmd , metadata_dev_size ) ;
if ( r ) {
2013-12-05 04:51:33 +04:00
metadata_operation_failed ( pool , " dm_pool_resize_metadata_dev " , r ) ;
2013-05-10 17:37:19 +04:00
return r ;
}
* need_commit = true ;
}
return 0 ;
}
2013-05-10 17:37:18 +04:00
/*
* Retrieves the number of blocks of the data device from
* the superblock and compares it to the actual device size ,
* thus resizing the data device in case it has grown .
*
* This both copes with opening preallocated data devices in the ctr
* being followed by a resume
* - and -
* calling the resume method individually after userspace has
* grown the data device in reaction to a table event .
*/
static int pool_preresume ( struct dm_target * ti )
{
int r ;
2013-05-10 17:37:19 +04:00
bool need_commit1 , need_commit2 ;
2013-05-10 17:37:18 +04:00
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
/*
* Take control of the pool object .
*/
r = bind_control_target ( pool , ti ) ;
if ( r )
return r ;
r = maybe_resize_data_dev ( ti , & need_commit1 ) ;
if ( r )
return r ;
2013-05-10 17:37:19 +04:00
r = maybe_resize_metadata_dev ( ti , & need_commit2 ) ;
if ( r )
return r ;
if ( need_commit1 | | need_commit2 )
2013-12-05 00:05:36 +04:00
( void ) commit ( pool ) ;
2013-05-10 17:37:18 +04:00
return 0 ;
}
2014-10-29 03:58:45 +03:00
static void pool_suspend_active_thins ( struct pool * pool )
{
struct thin_c * tc ;
/* Suspend all active thin devices */
tc = get_first_thin ( pool ) ;
while ( tc ) {
dm_internal_suspend_noflush ( tc - > thin_md ) ;
tc = get_next_thin ( pool , tc ) ;
}
}
static void pool_resume_active_thins ( struct pool * pool )
{
struct thin_c * tc ;
/* Resume all active thin devices */
tc = get_first_thin ( pool ) ;
while ( tc ) {
dm_internal_resume ( tc - > thin_md ) ;
tc = get_next_thin ( pool , tc ) ;
}
}
2011-11-01 00:21:18 +04:00
static void pool_resume ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
unsigned long flags ;
2014-10-29 03:58:45 +03:00
/*
* Must requeue active_thins ' bios and then resume
* active_thins _before_ clearing ' suspend ' flag .
*/
requeue_bios ( pool ) ;
pool_resume_active_thins ( pool ) ;
2011-11-01 00:21:18 +04:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
2013-12-05 05:16:12 +04:00
pool - > low_water_triggered = false ;
2014-11-07 23:09:46 +03:00
pool - > suspended = false ;
2011-11-01 00:21:18 +04:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2014-11-07 23:09:46 +03:00
2012-03-28 21:41:27 +04:00
do_waker ( & pool - > waker . work ) ;
2011-11-01 00:21:18 +04:00
}
2014-11-07 23:09:46 +03:00
static void pool_presuspend ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
unsigned long flags ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
pool - > suspended = true ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2014-10-29 03:58:45 +03:00
pool_suspend_active_thins ( pool ) ;
2014-11-07 23:09:46 +03:00
}
static void pool_presuspend_undo ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
unsigned long flags ;
2014-10-29 03:58:45 +03:00
pool_resume_active_thins ( pool ) ;
2014-11-07 23:09:46 +03:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
pool - > suspended = false ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
2011-11-01 00:21:18 +04:00
static void pool_postsuspend ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
2015-12-17 19:03:35 +03:00
cancel_delayed_work_sync ( & pool - > waker ) ;
cancel_delayed_work_sync ( & pool - > no_space_timeout ) ;
2011-11-01 00:21:18 +04:00
flush_workqueue ( pool - > wq ) ;
2013-12-05 00:05:36 +04:00
( void ) commit ( pool ) ;
2011-11-01 00:21:18 +04:00
}
static int check_arg_count ( unsigned argc , unsigned args_required )
{
if ( argc ! = args_required ) {
DMWARN ( " Message received with %u arguments instead of %u. " ,
argc , args_required ) ;
return - EINVAL ;
}
return 0 ;
}
static int read_dev_id ( char * arg , dm_thin_id * dev_id , int warning )
{
if ( ! kstrtoull ( arg , 10 , ( unsigned long long * ) dev_id ) & &
* dev_id < = MAX_DEV_ID )
return 0 ;
if ( warning )
DMWARN ( " Message received with invalid device id: %s " , arg ) ;
return - EINVAL ;
}
static int process_create_thin_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id dev_id ;
int r ;
r = check_arg_count ( argc , 2 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 1 ] , & dev_id , 1 ) ;
if ( r )
return r ;
r = dm_pool_create_thin ( pool - > pmd , dev_id ) ;
if ( r ) {
DMWARN ( " Creation of new thinly-provisioned device with id %s failed. " ,
argv [ 1 ] ) ;
return r ;
}
return 0 ;
}
static int process_create_snap_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id dev_id ;
dm_thin_id origin_dev_id ;
int r ;
r = check_arg_count ( argc , 3 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 1 ] , & dev_id , 1 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 2 ] , & origin_dev_id , 1 ) ;
if ( r )
return r ;
r = dm_pool_create_snap ( pool - > pmd , dev_id , origin_dev_id ) ;
if ( r ) {
DMWARN ( " Creation of new snapshot %s of device %s failed. " ,
argv [ 1 ] , argv [ 2 ] ) ;
return r ;
}
return 0 ;
}
static int process_delete_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id dev_id ;
int r ;
r = check_arg_count ( argc , 2 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 1 ] , & dev_id , 1 ) ;
if ( r )
return r ;
r = dm_pool_delete_thin_device ( pool - > pmd , dev_id ) ;
if ( r )
DMWARN ( " Deletion of thin device %s failed. " , argv [ 1 ] ) ;
return r ;
}
static int process_set_transaction_id_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id old_id , new_id ;
int r ;
r = check_arg_count ( argc , 3 ) ;
if ( r )
return r ;
if ( kstrtoull ( argv [ 1 ] , 10 , ( unsigned long long * ) & old_id ) ) {
DMWARN ( " set_transaction_id message: Unrecognised id %s. " , argv [ 1 ] ) ;
return - EINVAL ;
}
if ( kstrtoull ( argv [ 2 ] , 10 , ( unsigned long long * ) & new_id ) ) {
DMWARN ( " set_transaction_id message: Unrecognised new id %s. " , argv [ 2 ] ) ;
return - EINVAL ;
}
r = dm_pool_set_metadata_transaction_id ( pool - > pmd , old_id , new_id ) ;
if ( r ) {
DMWARN ( " Failed to change transaction id from %s to %s. " ,
argv [ 1 ] , argv [ 2 ] ) ;
return r ;
}
return 0 ;
}
2012-06-03 03:30:01 +04:00
static int process_reserve_metadata_snap_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
int r ;
r = check_arg_count ( argc , 1 ) ;
if ( r )
return r ;
2013-12-05 00:05:36 +04:00
( void ) commit ( pool ) ;
2012-07-03 15:55:31 +04:00
2012-06-03 03:30:01 +04:00
r = dm_pool_reserve_metadata_snap ( pool - > pmd ) ;
if ( r )
DMWARN ( " reserve_metadata_snap message failed. " ) ;
return r ;
}
static int process_release_metadata_snap_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
int r ;
r = check_arg_count ( argc , 1 ) ;
if ( r )
return r ;
r = dm_pool_release_metadata_snap ( pool - > pmd ) ;
if ( r )
DMWARN ( " release_metadata_snap message failed. " ) ;
return r ;
}
2011-11-01 00:21:18 +04:00
/*
* Messages supported :
* create_thin < dev_id >
* create_snap < dev_id > < origin_id >
* delete < dev_id >
* set_transaction_id < current_trans_id > < new_trans_id >
2012-06-03 03:30:01 +04:00
* reserve_metadata_snap
* release_metadata_snap
2011-11-01 00:21:18 +04:00
*/
static int pool_message ( struct dm_target * ti , unsigned argc , char * * argv )
{
int r = - EINVAL ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
2015-01-26 14:38:21 +03:00
if ( get_pool_mode ( pool ) > = PM_READ_ONLY ) {
DMERR ( " %s: unable to service pool target messages in READ_ONLY or FAIL mode " ,
dm_device_name ( pool - > pool_md ) ) ;
2015-06-09 19:31:26 +03:00
return - EOPNOTSUPP ;
2015-01-26 14:38:21 +03:00
}
2011-11-01 00:21:18 +04:00
if ( ! strcasecmp ( argv [ 0 ] , " create_thin " ) )
r = process_create_thin_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " create_snap " ) )
r = process_create_snap_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " delete " ) )
r = process_delete_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " set_transaction_id " ) )
r = process_set_transaction_id_mesg ( argc , argv , pool ) ;
2012-06-03 03:30:01 +04:00
else if ( ! strcasecmp ( argv [ 0 ] , " reserve_metadata_snap " ) )
r = process_reserve_metadata_snap_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " release_metadata_snap " ) )
r = process_release_metadata_snap_mesg ( argc , argv , pool ) ;
2011-11-01 00:21:18 +04:00
else
DMWARN ( " Unrecognised thin pool target message received: %s " , argv [ 0 ] ) ;
2012-07-27 18:08:16 +04:00
if ( ! r )
2013-12-05 00:05:36 +04:00
( void ) commit ( pool ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2012-07-27 18:08:16 +04:00
static void emit_flags ( struct pool_features * pf , char * result ,
unsigned sz , unsigned maxlen )
{
unsigned count = ! pf - > zero_new_blocks + ! pf - > discard_enabled +
2013-12-07 01:21:43 +04:00
! pf - > discard_passdown + ( pf - > mode = = PM_READ_ONLY ) +
pf - > error_if_no_space ;
2012-07-27 18:08:16 +04:00
DMEMIT ( " %u " , count ) ;
if ( ! pf - > zero_new_blocks )
DMEMIT ( " skip_block_zeroing " ) ;
if ( ! pf - > discard_enabled )
DMEMIT ( " ignore_discard " ) ;
if ( ! pf - > discard_passdown )
DMEMIT ( " no_discard_passdown " ) ;
if ( pf - > mode = = PM_READ_ONLY )
DMEMIT ( " read_only " ) ;
2013-12-07 01:21:43 +04:00
if ( pf - > error_if_no_space )
DMEMIT ( " error_if_no_space " ) ;
2012-07-27 18:08:16 +04:00
}
2011-11-01 00:21:18 +04:00
/*
* Status line is :
* < transaction id > < used metadata sectors > / < total metadata sectors >
* < used data sectors > / < total data sectors > < held metadata root >
2015-07-15 18:40:24 +03:00
* < pool mode > < discard config > < no space config > < needs_check >
2011-11-01 00:21:18 +04:00
*/
2013-03-02 02:45:44 +04:00
static void pool_status ( struct dm_target * ti , status_type_t type ,
unsigned status_flags , char * result , unsigned maxlen )
2011-11-01 00:21:18 +04:00
{
2012-07-27 18:08:16 +04:00
int r ;
2011-11-01 00:21:18 +04:00
unsigned sz = 0 ;
uint64_t transaction_id ;
dm_block_t nr_free_blocks_data ;
dm_block_t nr_free_blocks_metadata ;
dm_block_t nr_blocks_data ;
dm_block_t nr_blocks_metadata ;
dm_block_t held_root ;
char buf [ BDEVNAME_SIZE ] ;
char buf2 [ BDEVNAME_SIZE ] ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
switch ( type ) {
case STATUSTYPE_INFO :
2012-07-27 18:08:16 +04:00
if ( get_pool_mode ( pool ) = = PM_FAIL ) {
DMEMIT ( " Fail " ) ;
break ;
}
2012-07-27 18:08:16 +04:00
/* Commit to ensure statistics aren't out-of-date */
if ( ! ( status_flags & DM_STATUS_NOFLUSH_FLAG ) & & ! dm_suspended ( ti ) )
2013-12-05 00:05:36 +04:00
( void ) commit ( pool ) ;
2012-07-27 18:08:16 +04:00
2013-03-02 02:45:44 +04:00
r = dm_pool_get_metadata_transaction_id ( pool - > pmd , & transaction_id ) ;
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: dm_pool_get_metadata_transaction_id returned %d " ,
dm_device_name ( pool - > pool_md ) , r ) ;
2013-03-02 02:45:44 +04:00
goto err ;
}
2011-11-01 00:21:18 +04:00
2013-03-02 02:45:44 +04:00
r = dm_pool_get_free_metadata_block_count ( pool - > pmd , & nr_free_blocks_metadata ) ;
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: dm_pool_get_free_metadata_block_count returned %d " ,
dm_device_name ( pool - > pool_md ) , r ) ;
2013-03-02 02:45:44 +04:00
goto err ;
}
2011-11-01 00:21:18 +04:00
r = dm_pool_get_metadata_dev_size ( pool - > pmd , & nr_blocks_metadata ) ;
2013-03-02 02:45:44 +04:00
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: dm_pool_get_metadata_dev_size returned %d " ,
dm_device_name ( pool - > pool_md ) , r ) ;
2013-03-02 02:45:44 +04:00
goto err ;
}
2011-11-01 00:21:18 +04:00
2013-03-02 02:45:44 +04:00
r = dm_pool_get_free_block_count ( pool - > pmd , & nr_free_blocks_data ) ;
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: dm_pool_get_free_block_count returned %d " ,
dm_device_name ( pool - > pool_md ) , r ) ;
2013-03-02 02:45:44 +04:00
goto err ;
}
2011-11-01 00:21:18 +04:00
r = dm_pool_get_data_dev_size ( pool - > pmd , & nr_blocks_data ) ;
2013-03-02 02:45:44 +04:00
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: dm_pool_get_data_dev_size returned %d " ,
dm_device_name ( pool - > pool_md ) , r ) ;
2013-03-02 02:45:44 +04:00
goto err ;
}
2011-11-01 00:21:18 +04:00
2012-06-03 03:30:01 +04:00
r = dm_pool_get_metadata_snap ( pool - > pmd , & held_root ) ;
2013-03-02 02:45:44 +04:00
if ( r ) {
2013-08-22 01:30:40 +04:00
DMERR ( " %s: dm_pool_get_metadata_snap returned %d " ,
dm_device_name ( pool - > pool_md ) , r ) ;
2013-03-02 02:45:44 +04:00
goto err ;
}
2011-11-01 00:21:18 +04:00
DMEMIT ( " %llu %llu/%llu %llu/%llu " ,
( unsigned long long ) transaction_id ,
( unsigned long long ) ( nr_blocks_metadata - nr_free_blocks_metadata ) ,
( unsigned long long ) nr_blocks_metadata ,
( unsigned long long ) ( nr_blocks_data - nr_free_blocks_data ) ,
( unsigned long long ) nr_blocks_data ) ;
if ( held_root )
2012-07-27 18:08:16 +04:00
DMEMIT ( " %llu " , held_root ) ;
else
DMEMIT ( " - " ) ;
2014-03-03 20:03:26 +04:00
if ( pool - > pf . mode = = PM_OUT_OF_DATA_SPACE )
DMEMIT ( " out_of_data_space " ) ;
else if ( pool - > pf . mode = = PM_READ_ONLY )
2012-07-27 18:08:16 +04:00
DMEMIT ( " ro " ) ;
2011-11-01 00:21:18 +04:00
else
2012-07-27 18:08:16 +04:00
DMEMIT ( " rw " ) ;
2012-12-22 00:23:32 +04:00
if ( ! pool - > pf . discard_enabled )
2013-12-07 01:21:43 +04:00
DMEMIT ( " ignore_discard " ) ;
2012-12-22 00:23:32 +04:00
else if ( pool - > pf . discard_passdown )
2013-12-07 01:21:43 +04:00
DMEMIT ( " discard_passdown " ) ;
else
DMEMIT ( " no_discard_passdown " ) ;
if ( pool - > pf . error_if_no_space )
DMEMIT ( " error_if_no_space " ) ;
2012-07-27 18:08:16 +04:00
else
2013-12-07 01:21:43 +04:00
DMEMIT ( " queue_if_no_space " ) ;
2011-11-01 00:21:18 +04:00
2015-07-15 18:40:24 +03:00
if ( dm_pool_metadata_needs_check ( pool - > pmd ) )
DMEMIT ( " needs_check " ) ;
else
DMEMIT ( " - " ) ;
2011-11-01 00:21:18 +04:00
break ;
case STATUSTYPE_TABLE :
DMEMIT ( " %s %s %lu %llu " ,
format_dev_t ( buf , pt - > metadata_dev - > bdev - > bd_dev ) ,
format_dev_t ( buf2 , pt - > data_dev - > bdev - > bd_dev ) ,
( unsigned long ) pool - > sectors_per_block ,
( unsigned long long ) pt - > low_water_blocks ) ;
2012-09-27 02:45:47 +04:00
emit_flags ( & pt - > requested_pf , result , sz , maxlen ) ;
2011-11-01 00:21:18 +04:00
break ;
}
2013-03-02 02:45:44 +04:00
return ;
2011-11-01 00:21:18 +04:00
2013-03-02 02:45:44 +04:00
err :
DMEMIT ( " Error " ) ;
2011-11-01 00:21:18 +04:00
}
static int pool_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
struct pool_c * pt = ti - > private ;
return fn ( ti , pt - > data_dev , 0 , ti - > len , data ) ;
}
static void pool_io_hints ( struct dm_target * ti , struct queue_limits * limits )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
2014-10-10 02:43:25 +04:00
sector_t io_opt_sectors = limits - > io_opt > > SECTOR_SHIFT ;
/*
2014-11-21 02:07:43 +03:00
* If max_sectors is smaller than pool - > sectors_per_block adjust it
* to the highest possible power - of - 2 factor of pool - > sectors_per_block .
* This is especially beneficial when the pool ' s data device is a RAID
* device that has a full stripe width that matches pool - > sectors_per_block
* - - because even though partial RAID stripe - sized IOs will be issued to a
* single RAID stripe ; when aggregated they will end on a full RAID stripe
* boundary . . which avoids additional partial RAID stripe writes cascading
2014-10-10 02:43:25 +04:00
*/
if ( limits - > max_sectors < pool - > sectors_per_block ) {
while ( ! is_factor ( pool - > sectors_per_block , limits - > max_sectors ) ) {
if ( ( limits - > max_sectors & ( limits - > max_sectors - 1 ) ) = = 0 )
limits - > max_sectors - - ;
limits - > max_sectors = rounddown_pow_of_two ( limits - > max_sectors ) ;
}
}
2011-11-01 00:21:18 +04:00
2013-08-20 23:02:41 +04:00
/*
* If the system - determined stacked limits are compatible with the
* pool ' s blocksize ( io_opt is a factor ) do not override them .
*/
if ( io_opt_sectors < pool - > sectors_per_block | |
2014-10-10 02:43:25 +04:00
! is_factor ( io_opt_sectors , pool - > sectors_per_block ) ) {
if ( is_factor ( pool - > sectors_per_block , limits - > max_sectors ) )
blk_limits_io_min ( limits , limits - > max_sectors < < SECTOR_SHIFT ) ;
else
blk_limits_io_min ( limits , pool - > sectors_per_block < < SECTOR_SHIFT ) ;
2013-08-20 23:02:41 +04:00
blk_limits_io_opt ( limits , pool - > sectors_per_block < < SECTOR_SHIFT ) ;
}
2012-09-27 02:45:47 +04:00
/*
* pt - > adjusted_pf is a staging area for the actual features to use .
* They get transferred to the live pool in bind_control_target ( )
* called from pool_preresume ( ) .
*/
2013-09-20 02:49:11 +04:00
if ( ! pt - > adjusted_pf . discard_enabled ) {
/*
* Must explicitly disallow stacking discard limits otherwise the
* block layer will stack them if pool ' s data device has support .
* QUEUE_FLAG_DISCARD wouldn ' t be set but there is no way for the
* user to see that , so make sure to set all discard limits to 0.
*/
limits - > discard_granularity = 0 ;
2012-09-27 02:45:47 +04:00
return ;
2013-09-20 02:49:11 +04:00
}
2012-09-27 02:45:47 +04:00
disable_passdown_if_not_supported ( pt ) ;
2015-04-16 14:58:35 +03:00
/*
* The pool uses the same discard limits as the underlying data
* device . DM core has already set this up .
*/
2011-11-01 00:21:18 +04:00
}
static struct target_type pool_target = {
. name = " thin-pool " ,
. features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE ,
2016-03-10 19:31:35 +03:00
. version = { 1 , 18 , 0 } ,
2011-11-01 00:21:18 +04:00
. module = THIS_MODULE ,
. ctr = pool_ctr ,
. dtr = pool_dtr ,
. map = pool_map ,
2014-11-07 23:09:46 +03:00
. presuspend = pool_presuspend ,
. presuspend_undo = pool_presuspend_undo ,
2011-11-01 00:21:18 +04:00
. postsuspend = pool_postsuspend ,
. preresume = pool_preresume ,
. resume = pool_resume ,
. message = pool_message ,
. status = pool_status ,
. iterate_devices = pool_iterate_devices ,
. io_hints = pool_io_hints ,
} ;
/*----------------------------------------------------------------
* Thin target methods
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2014-04-08 14:29:01 +04:00
static void thin_get ( struct thin_c * tc )
{
atomic_inc ( & tc - > refcount ) ;
}
static void thin_put ( struct thin_c * tc )
{
if ( atomic_dec_and_test ( & tc - > refcount ) )
complete ( & tc - > can_destroy ) ;
}
2011-11-01 00:21:18 +04:00
static void thin_dtr ( struct dm_target * ti )
{
struct thin_c * tc = ti - > private ;
2014-03-21 05:17:14 +04:00
unsigned long flags ;
spin_lock_irqsave ( & tc - > pool - > lock , flags ) ;
list_del_rcu ( & tc - > list ) ;
spin_unlock_irqrestore ( & tc - > pool - > lock , flags ) ;
synchronize_rcu ( ) ;
2011-11-01 00:21:18 +04:00
2014-11-06 01:00:13 +03:00
thin_put ( tc ) ;
wait_for_completion ( & tc - > can_destroy ) ;
2011-11-01 00:21:18 +04:00
mutex_lock ( & dm_thin_pool_table . mutex ) ;
__pool_dec ( tc - > pool ) ;
dm_pool_close_thin_device ( tc - > td ) ;
dm_put_device ( ti , tc - > pool_dev ) ;
2012-03-28 21:41:28 +04:00
if ( tc - > origin_dev )
dm_put_device ( ti , tc - > origin_dev ) ;
2011-11-01 00:21:18 +04:00
kfree ( tc ) ;
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
}
/*
* Thin target parameters :
*
2012-03-28 21:41:28 +04:00
* < pool_dev > < dev_id > [ origin_dev ]
2011-11-01 00:21:18 +04:00
*
* pool_dev : the path to the pool ( eg , / dev / mapper / my_pool )
* dev_id : the internal device identifier
2012-03-28 21:41:28 +04:00
* origin_dev : a device external to the pool that should act as the origin
2012-03-28 21:41:29 +04:00
*
* If the pool device has discards disabled , they get disabled for the thin
* device as well .
2011-11-01 00:21:18 +04:00
*/
static int thin_ctr ( struct dm_target * ti , unsigned argc , char * * argv )
{
int r ;
struct thin_c * tc ;
2012-03-28 21:41:28 +04:00
struct dm_dev * pool_dev , * origin_dev ;
2011-11-01 00:21:18 +04:00
struct mapped_device * pool_md ;
2014-04-08 14:08:41 +04:00
unsigned long flags ;
2011-11-01 00:21:18 +04:00
mutex_lock ( & dm_thin_pool_table . mutex ) ;
2012-03-28 21:41:28 +04:00
if ( argc ! = 2 & & argc ! = 3 ) {
2011-11-01 00:21:18 +04:00
ti - > error = " Invalid argument count " ;
r = - EINVAL ;
goto out_unlock ;
}
tc = ti - > private = kzalloc ( sizeof ( * tc ) , GFP_KERNEL ) ;
if ( ! tc ) {
ti - > error = " Out of memory " ;
r = - ENOMEM ;
goto out_unlock ;
}
2014-10-29 03:58:45 +03:00
tc - > thin_md = dm_table_get_md ( ti - > table ) ;
2014-03-21 05:17:14 +04:00
spin_lock_init ( & tc - > lock ) ;
2014-10-10 16:43:14 +04:00
INIT_LIST_HEAD ( & tc - > deferred_cells ) ;
2014-03-21 05:17:14 +04:00
bio_list_init ( & tc - > deferred_bio_list ) ;
bio_list_init ( & tc - > retry_on_resume_list ) ;
2014-03-22 02:33:41 +04:00
tc - > sort_bio_list = RB_ROOT ;
2011-11-01 00:21:18 +04:00
2012-03-28 21:41:28 +04:00
if ( argc = = 3 ) {
r = dm_get_device ( ti , argv [ 2 ] , FMODE_READ , & origin_dev ) ;
if ( r ) {
ti - > error = " Error opening origin device " ;
goto bad_origin_dev ;
}
tc - > origin_dev = origin_dev ;
}
2011-11-01 00:21:18 +04:00
r = dm_get_device ( ti , argv [ 0 ] , dm_table_get_mode ( ti - > table ) , & pool_dev ) ;
if ( r ) {
ti - > error = " Error opening pool device " ;
goto bad_pool_dev ;
}
tc - > pool_dev = pool_dev ;
if ( read_dev_id ( argv [ 1 ] , ( unsigned long long * ) & tc - > dev_id , 0 ) ) {
ti - > error = " Invalid device id " ;
r = - EINVAL ;
goto bad_common ;
}
pool_md = dm_get_md ( tc - > pool_dev - > bdev - > bd_dev ) ;
if ( ! pool_md ) {
ti - > error = " Couldn't get pool mapped device " ;
r = - EINVAL ;
goto bad_common ;
}
tc - > pool = __pool_table_lookup ( pool_md ) ;
if ( ! tc - > pool ) {
ti - > error = " Couldn't find pool object " ;
r = - EINVAL ;
goto bad_pool_lookup ;
}
__pool_inc ( tc - > pool ) ;
2012-07-27 18:08:16 +04:00
if ( get_pool_mode ( tc - > pool ) = = PM_FAIL ) {
ti - > error = " Couldn't open thin device, Pool is in fail mode " ;
2014-02-20 05:32:33 +04:00
r = - EINVAL ;
2014-11-07 23:09:46 +03:00
goto bad_pool ;
2012-07-27 18:08:16 +04:00
}
2011-11-01 00:21:18 +04:00
r = dm_pool_open_thin_device ( tc - > pool - > pmd , tc - > dev_id , & tc - > td ) ;
if ( r ) {
ti - > error = " Couldn't open thin internal device " ;
2014-11-07 23:09:46 +03:00
goto bad_pool ;
2011-11-01 00:21:18 +04:00
}
2012-07-27 18:08:00 +04:00
r = dm_set_target_max_io_len ( ti , tc - > pool - > sectors_per_block ) ;
if ( r )
2014-11-07 23:09:46 +03:00
goto bad ;
2012-07-27 18:08:00 +04:00
2013-03-02 02:45:47 +04:00
ti - > num_flush_bios = 1 ;
2012-07-27 18:08:07 +04:00
ti - > flush_supported = true ;
2016-01-31 21:28:26 +03:00
ti - > per_io_data_size = sizeof ( struct dm_thin_endio_hook ) ;
2012-03-28 21:41:29 +04:00
/* In case the pool supports discards, pass them on. */
2013-09-20 02:49:11 +04:00
ti - > discard_zeroes_data_unsupported = true ;
2012-03-28 21:41:29 +04:00
if ( tc - > pool - > pf . discard_enabled ) {
2012-07-27 18:08:08 +04:00
ti - > discards_supported = true ;
2013-03-02 02:45:47 +04:00
ti - > num_discard_bios = 1 ;
2015-04-16 14:58:35 +03:00
ti - > split_discard_bios = false ;
2012-03-28 21:41:29 +04:00
}
2011-11-01 00:21:18 +04:00
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
2014-04-08 14:08:41 +04:00
spin_lock_irqsave ( & tc - > pool - > lock , flags ) ;
2014-11-07 23:09:46 +03:00
if ( tc - > pool - > suspended ) {
spin_unlock_irqrestore ( & tc - > pool - > lock , flags ) ;
mutex_lock ( & dm_thin_pool_table . mutex ) ; /* reacquire for __pool_dec */
ti - > error = " Unable to activate thin device while pool is suspended " ;
r = - EINVAL ;
goto bad ;
}
2014-12-17 15:59:59 +03:00
atomic_set ( & tc - > refcount , 1 ) ;
init_completion ( & tc - > can_destroy ) ;
2014-03-21 05:17:14 +04:00
list_add_tail_rcu ( & tc - > list , & tc - > pool - > active_thins ) ;
2014-04-08 14:08:41 +04:00
spin_unlock_irqrestore ( & tc - > pool - > lock , flags ) ;
2014-03-21 05:17:14 +04:00
/*
* This synchronize_rcu ( ) call is needed here otherwise we risk a
* wake_worker ( ) call finding no bios to process ( because the newly
* added tc isn ' t yet visible ) . So this reduces latency since we
* aren ' t then dependent on the periodic commit to wake_worker ( ) .
*/
synchronize_rcu ( ) ;
2014-11-07 23:09:46 +03:00
dm_put ( pool_md ) ;
2011-11-01 00:21:18 +04:00
return 0 ;
2014-11-07 23:09:46 +03:00
bad :
2014-02-20 05:32:33 +04:00
dm_pool_close_thin_device ( tc - > td ) ;
2014-11-07 23:09:46 +03:00
bad_pool :
2011-11-01 00:21:18 +04:00
__pool_dec ( tc - > pool ) ;
bad_pool_lookup :
dm_put ( pool_md ) ;
bad_common :
dm_put_device ( ti , tc - > pool_dev ) ;
bad_pool_dev :
2012-03-28 21:41:28 +04:00
if ( tc - > origin_dev )
dm_put_device ( ti , tc - > origin_dev ) ;
bad_origin_dev :
2011-11-01 00:21:18 +04:00
kfree ( tc ) ;
out_unlock :
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
return r ;
}
2012-12-22 00:23:41 +04:00
static int thin_map ( struct dm_target * ti , struct bio * bio )
2011-11-01 00:21:18 +04:00
{
2013-10-12 02:44:27 +04:00
bio - > bi_iter . bi_sector = dm_target_offset ( ti , bio - > bi_iter . bi_sector ) ;
2011-11-01 00:21:18 +04:00
2012-12-22 00:23:41 +04:00
return thin_bio_map ( ti , bio ) ;
2011-11-01 00:21:18 +04:00
}
2012-12-22 00:23:41 +04:00
static int thin_endio ( struct dm_target * ti , struct bio * bio , int err )
2012-03-28 21:41:28 +04:00
{
unsigned long flags ;
2012-12-22 00:23:40 +04:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-03-28 21:41:28 +04:00
struct list_head work ;
2012-06-03 03:30:00 +04:00
struct dm_thin_new_mapping * m , * tmp ;
2012-03-28 21:41:28 +04:00
struct pool * pool = h - > tc - > pool ;
if ( h - > shared_read_entry ) {
INIT_LIST_HEAD ( & work ) ;
2012-10-13 00:02:10 +04:00
dm_deferred_entry_dec ( h - > shared_read_entry , & work ) ;
2012-03-28 21:41:28 +04:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
list_for_each_entry_safe ( m , tmp , & work , list ) {
list_del ( & m - > list ) ;
2014-06-13 16:57:09 +04:00
__complete_mapping_preparation ( m ) ;
2012-03-28 21:41:28 +04:00
}
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
2012-03-28 21:41:28 +04:00
if ( h - > all_io_entry ) {
INIT_LIST_HEAD ( & work ) ;
2012-10-13 00:02:10 +04:00
dm_deferred_entry_dec ( h - > all_io_entry , & work ) ;
2012-12-22 00:23:31 +04:00
if ( ! list_empty ( & work ) ) {
spin_lock_irqsave ( & pool - > lock , flags ) ;
list_for_each_entry_safe ( m , tmp , & work , list )
2013-12-11 23:01:20 +04:00
list_add_tail ( & m - > list , & pool - > prepared_discards ) ;
2012-12-22 00:23:31 +04:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
wake_worker ( pool ) ;
}
2012-03-28 21:41:28 +04:00
}
2015-04-16 14:58:35 +03:00
if ( h - > cell )
cell_defer_no_holder ( h - > tc , h - > cell ) ;
2012-03-28 21:41:28 +04:00
return 0 ;
}
2014-03-03 19:52:28 +04:00
static void thin_presuspend ( struct dm_target * ti )
2011-11-01 00:21:18 +04:00
{
2014-03-03 19:52:28 +04:00
struct thin_c * tc = ti - > private ;
2011-11-01 00:21:18 +04:00
if ( dm_noflush_suspending ( ti ) )
2014-03-03 19:52:28 +04:00
noflush_work ( tc , do_noflush_start ) ;
}
static void thin_postsuspend ( struct dm_target * ti )
{
struct thin_c * tc = ti - > private ;
/*
* The dm_noflush_suspending flag has been cleared by now , so
* unfortunately we must always run this .
*/
noflush_work ( tc , do_noflush_stop ) ;
2011-11-01 00:21:18 +04:00
}
2014-06-13 17:47:24 +04:00
static int thin_preresume ( struct dm_target * ti )
{
struct thin_c * tc = ti - > private ;
if ( tc - > origin_dev )
tc - > origin_size = get_dev_size ( tc - > origin_dev - > bdev ) ;
return 0 ;
}
2011-11-01 00:21:18 +04:00
/*
* < nr mapped sectors > < highest mapped sector >
*/
2013-03-02 02:45:44 +04:00
static void thin_status ( struct dm_target * ti , status_type_t type ,
unsigned status_flags , char * result , unsigned maxlen )
2011-11-01 00:21:18 +04:00
{
int r ;
ssize_t sz = 0 ;
dm_block_t mapped , highest ;
char buf [ BDEVNAME_SIZE ] ;
struct thin_c * tc = ti - > private ;
2012-07-27 18:08:16 +04:00
if ( get_pool_mode ( tc - > pool ) = = PM_FAIL ) {
DMEMIT ( " Fail " ) ;
2013-03-02 02:45:44 +04:00
return ;
2012-07-27 18:08:16 +04:00
}
2011-11-01 00:21:18 +04:00
if ( ! tc - > td )
DMEMIT ( " - " ) ;
else {
switch ( type ) {
case STATUSTYPE_INFO :
r = dm_thin_get_mapped_count ( tc - > td , & mapped ) ;
2013-03-02 02:45:44 +04:00
if ( r ) {
DMERR ( " dm_thin_get_mapped_count returned %d " , r ) ;
goto err ;
}
2011-11-01 00:21:18 +04:00
r = dm_thin_get_highest_mapped_block ( tc - > td , & highest ) ;
2013-03-02 02:45:44 +04:00
if ( r < 0 ) {
DMERR ( " dm_thin_get_highest_mapped_block returned %d " , r ) ;
goto err ;
}
2011-11-01 00:21:18 +04:00
DMEMIT ( " %llu " , mapped * tc - > pool - > sectors_per_block ) ;
if ( r )
DMEMIT ( " %llu " , ( ( highest + 1 ) *
tc - > pool - > sectors_per_block ) - 1 ) ;
else
DMEMIT ( " - " ) ;
break ;
case STATUSTYPE_TABLE :
DMEMIT ( " %s %lu " ,
format_dev_t ( buf , tc - > pool_dev - > bdev - > bd_dev ) ,
( unsigned long ) tc - > dev_id ) ;
2012-03-28 21:41:28 +04:00
if ( tc - > origin_dev )
DMEMIT ( " %s " , format_dev_t ( buf , tc - > origin_dev - > bdev - > bd_dev ) ) ;
2011-11-01 00:21:18 +04:00
break ;
}
}
2013-03-02 02:45:44 +04:00
return ;
err :
DMEMIT ( " Error " ) ;
2011-11-01 00:21:18 +04:00
}
static int thin_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
2012-07-27 18:08:02 +04:00
sector_t blocks ;
2011-11-01 00:21:18 +04:00
struct thin_c * tc = ti - > private ;
2012-07-27 18:08:02 +04:00
struct pool * pool = tc - > pool ;
2011-11-01 00:21:18 +04:00
/*
* We can ' t call dm_pool_get_data_dev_size ( ) since that blocks . So
* we follow a more convoluted path through to the pool ' s target .
*/
2012-07-27 18:08:02 +04:00
if ( ! pool - > ti )
2011-11-01 00:21:18 +04:00
return 0 ; /* nothing is bound */
2012-07-27 18:08:02 +04:00
blocks = pool - > ti - > len ;
( void ) sector_div ( blocks , pool - > sectors_per_block ) ;
2011-11-01 00:21:18 +04:00
if ( blocks )
2012-07-27 18:08:02 +04:00
return fn ( ti , tc - > pool_dev , 0 , pool - > sectors_per_block * blocks , data ) ;
2011-11-01 00:21:18 +04:00
return 0 ;
}
2015-04-16 14:58:35 +03:00
static void thin_io_hints ( struct dm_target * ti , struct queue_limits * limits )
{
struct thin_c * tc = ti - > private ;
struct pool * pool = tc - > pool ;
2015-09-08 15:56:13 +03:00
2015-11-23 21:44:38 +03:00
if ( ! pool - > pf . discard_enabled )
return ;
2015-04-16 14:58:35 +03:00
limits - > discard_granularity = pool - > sectors_per_block < < SECTOR_SHIFT ;
limits - > max_discard_sectors = 2048 * 1024 * 16 ; /* 16G */
}
2011-11-01 00:21:18 +04:00
static struct target_type thin_target = {
. name = " thin " ,
2016-03-10 19:31:35 +03:00
. version = { 1 , 18 , 0 } ,
2011-11-01 00:21:18 +04:00
. module = THIS_MODULE ,
. ctr = thin_ctr ,
. dtr = thin_dtr ,
. map = thin_map ,
2012-03-28 21:41:28 +04:00
. end_io = thin_endio ,
2014-06-13 17:47:24 +04:00
. preresume = thin_preresume ,
2014-03-03 19:52:28 +04:00
. presuspend = thin_presuspend ,
2011-11-01 00:21:18 +04:00
. postsuspend = thin_postsuspend ,
. status = thin_status ,
. iterate_devices = thin_iterate_devices ,
2015-04-16 14:58:35 +03:00
. io_hints = thin_io_hints ,
2011-11-01 00:21:18 +04:00
} ;
/*----------------------------------------------------------------*/
static int __init dm_thin_init ( void )
{
int r ;
pool_table_init ( ) ;
r = dm_register_target ( & thin_target ) ;
if ( r )
return r ;
r = dm_register_target ( & pool_target ) ;
if ( r )
2012-06-03 03:30:00 +04:00
goto bad_pool_target ;
r = - ENOMEM ;
_new_mapping_cache = KMEM_CACHE ( dm_thin_new_mapping , 0 ) ;
if ( ! _new_mapping_cache )
goto bad_new_mapping_cache ;
return 0 ;
bad_new_mapping_cache :
dm_unregister_target ( & pool_target ) ;
bad_pool_target :
dm_unregister_target ( & thin_target ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
static void dm_thin_exit ( void )
{
dm_unregister_target ( & thin_target ) ;
dm_unregister_target ( & pool_target ) ;
2012-06-03 03:30:00 +04:00
kmem_cache_destroy ( _new_mapping_cache ) ;
2011-11-01 00:21:18 +04:00
}
module_init ( dm_thin_init ) ;
module_exit ( dm_thin_exit ) ;
2014-05-20 21:38:33 +04:00
module_param_named ( no_space_timeout , no_space_timeout_secs , uint , S_IRUGO | S_IWUSR ) ;
MODULE_PARM_DESC ( no_space_timeout , " Out of data space queue IO timeout in seconds " ) ;
2012-05-12 04:43:19 +04:00
MODULE_DESCRIPTION ( DM_NAME " thin provisioning target " ) ;
2011-11-01 00:21:18 +04:00
MODULE_AUTHOR ( " Joe Thornber <dm-devel@redhat.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;