2011-10-31 20:21:18 +00:00
/*
2012-07-27 15:08:16 +01:00
* Copyright ( C ) 2011 - 2012 Red Hat UK .
2011-10-31 20:21:18 +00:00
*
* This file is released under the GPL .
*/
# include "dm-thin-metadata.h"
2012-10-12 21:02:13 +01:00
# include "dm-bio-prison.h"
2012-07-27 15:08:16 +01:00
# include "dm.h"
2011-10-31 20:21:18 +00:00
# include <linux/device-mapper.h>
# include <linux/dm-io.h>
# include <linux/dm-kcopyd.h>
# include <linux/list.h>
# include <linux/init.h>
# include <linux/module.h>
# include <linux/slab.h>
# define DM_MSG_PREFIX "thin"
/*
* Tunable constants
*/
2012-07-27 15:07:57 +01:00
# define ENDIO_HOOK_POOL_SIZE 1024
2011-10-31 20:21:18 +00:00
# define MAPPING_POOL_SIZE 1024
# define PRISON_CELLS 1024
2012-03-28 18:41:27 +01:00
# define COMMIT_PERIOD HZ
2011-10-31 20:21:18 +00:00
/*
* The block size of the device holding pool data must be
* between 64 KB and 1 GB .
*/
# define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
# define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
/*
* Device id is restricted to 24 bits .
*/
# define MAX_DEV_ID ((1 << 24) - 1)
/*
* How do we handle breaking sharing of data blocks ?
* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
*
* We use a standard copy - on - write btree to store the mappings for the
* devices ( note I ' m talking about copy - on - write of the metadata here , not
* the data ) . When you take an internal snapshot you clone the root node
* of the origin btree . After this there is no concept of an origin or a
* snapshot . They are just two device trees that happen to point to the
* same data blocks .
*
* When we get a write in we decide if it ' s to a shared data block using
* some timestamp magic . If it is , we have to break sharing .
*
* Let ' s say we write to a shared block in what was the origin . The
* steps are :
*
* i ) plug io further to this physical block . ( see bio_prison code ) .
*
* ii ) quiesce any read io to that shared data block . Obviously
2012-10-12 21:02:10 +01:00
* including all devices that share this block . ( see dm_deferred_set code )
2011-10-31 20:21:18 +00:00
*
* iii ) copy the data block to a newly allocate block . This step can be
* missed out if the io covers the block . ( schedule_copy ) .
*
* iv ) insert the new mapping into the origin ' s btree
2012-03-28 18:41:24 +01:00
* ( process_prepared_mapping ) . This act of inserting breaks some
2011-10-31 20:21:18 +00:00
* sharing of btree nodes between the two devices . Breaking sharing only
* effects the btree of that specific device . Btrees for the other
* devices that share the block never change . The btree for the origin
* device as it was after the last commit is untouched , ie . we ' re using
* persistent data structures in the functional programming sense .
*
* v ) unplug io to this physical block , including the io that triggered
* the breaking of sharing .
*
* Steps ( ii ) and ( iii ) occur in parallel .
*
* The metadata _doesn ' t_ need to be committed before the io continues . We
* get away with this because the io is always written to a _new_ block .
* If there ' s a crash , then :
*
* - The origin mapping will point to the old origin block ( the shared
* one ) . This will contain the data as it was before the io that triggered
* the breaking of sharing came in .
*
* - The snap mapping still points to the old block . As it would after
* the commit .
*
* The downside of this scheme is the timestamp magic isn ' t perfect , and
* will continue to think that data block in the snapshot device is shared
* even after the write to the origin has broken sharing . I suspect data
* blocks will typically be shared by many different devices , so we ' re
* breaking sharing n + 1 times , rather than n , where n is the number of
* devices that reference this data block . At the moment I think the
* benefits far , far outweigh the disadvantages .
*/
/*----------------------------------------------------------------*/
/*
* Key building .
*/
static void build_data_key ( struct dm_thin_device * td ,
2012-10-12 21:02:10 +01:00
dm_block_t b , struct dm_cell_key * key )
2011-10-31 20:21:18 +00:00
{
key - > virtual = 0 ;
key - > dev = dm_thin_dev_id ( td ) ;
key - > block = b ;
}
static void build_virtual_key ( struct dm_thin_device * td , dm_block_t b ,
2012-10-12 21:02:10 +01:00
struct dm_cell_key * key )
2011-10-31 20:21:18 +00:00
{
key - > virtual = 1 ;
key - > dev = dm_thin_dev_id ( td ) ;
key - > block = b ;
}
/*----------------------------------------------------------------*/
/*
* A pool device ties together a metadata device and a data device . It
* also provides the interface for creating and destroying internal
* devices .
*/
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping ;
2012-03-28 18:41:29 +01:00
2012-07-27 15:08:16 +01:00
/*
* The pool runs in 3 modes . Ordered in degraded order for comparisons .
*/
enum pool_mode {
PM_WRITE , /* metadata may be changed */
PM_READ_ONLY , /* metadata may not be changed */
PM_FAIL , /* all I/O fails */
} ;
2012-03-28 18:41:29 +01:00
struct pool_features {
2012-07-27 15:08:16 +01:00
enum pool_mode mode ;
2012-09-26 23:45:46 +01:00
bool zero_new_blocks : 1 ;
bool discard_enabled : 1 ;
bool discard_passdown : 1 ;
2012-03-28 18:41:29 +01:00
} ;
2012-07-27 15:08:16 +01:00
struct thin_c ;
typedef void ( * process_bio_fn ) ( struct thin_c * tc , struct bio * bio ) ;
typedef void ( * process_mapping_fn ) ( struct dm_thin_new_mapping * m ) ;
2011-10-31 20:21:18 +00:00
struct pool {
struct list_head list ;
struct dm_target * ti ; /* Only set if a pool target is bound */
struct mapped_device * pool_md ;
struct block_device * md_dev ;
struct dm_pool_metadata * pmd ;
dm_block_t low_water_blocks ;
2012-07-27 15:08:02 +01:00
uint32_t sectors_per_block ;
2012-07-27 15:08:03 +01:00
int sectors_per_block_shift ;
2011-10-31 20:21:18 +00:00
2012-03-28 18:41:29 +01:00
struct pool_features pf ;
2011-10-31 20:21:18 +00:00
unsigned low_water_triggered : 1 ; /* A dm event has been sent */
unsigned no_free_space : 1 ; /* A -ENOSPC warning has been issued */
2012-10-12 21:02:10 +01:00
struct dm_bio_prison * prison ;
2011-10-31 20:21:18 +00:00
struct dm_kcopyd_client * copier ;
struct workqueue_struct * wq ;
struct work_struct worker ;
2012-03-28 18:41:27 +01:00
struct delayed_work waker ;
2011-10-31 20:21:18 +00:00
2012-03-28 18:41:27 +01:00
unsigned long last_commit_jiffies ;
2012-07-27 15:08:02 +01:00
unsigned ref_count ;
2011-10-31 20:21:18 +00:00
spinlock_t lock ;
struct bio_list deferred_bios ;
struct bio_list deferred_flush_bios ;
struct list_head prepared_mappings ;
2012-03-28 18:41:28 +01:00
struct list_head prepared_discards ;
2011-10-31 20:21:18 +00:00
struct bio_list retry_on_resume_list ;
2012-10-12 21:02:10 +01:00
struct dm_deferred_set * shared_read_ds ;
struct dm_deferred_set * all_io_ds ;
2011-10-31 20:21:18 +00:00
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * next_mapping ;
2011-10-31 20:21:18 +00:00
mempool_t * mapping_pool ;
2012-07-27 15:08:16 +01:00
process_bio_fn process_bio ;
process_bio_fn process_discard ;
process_mapping_fn process_prepared_mapping ;
process_mapping_fn process_prepared_discard ;
2011-10-31 20:21:18 +00:00
} ;
2012-07-27 15:08:16 +01:00
static enum pool_mode get_pool_mode ( struct pool * pool ) ;
static void set_pool_mode ( struct pool * pool , enum pool_mode mode ) ;
2011-10-31 20:21:18 +00:00
/*
* Target context for a pool .
*/
struct pool_c {
struct dm_target * ti ;
struct pool * pool ;
struct dm_dev * data_dev ;
struct dm_dev * metadata_dev ;
struct dm_target_callbacks callbacks ;
dm_block_t low_water_blocks ;
2012-09-26 23:45:47 +01:00
struct pool_features requested_pf ; /* Features requested during table load */
struct pool_features adjusted_pf ; /* Features used after adjusting for constituent devices */
2011-10-31 20:21:18 +00:00
} ;
/*
* Target context for a thin .
*/
struct thin_c {
struct dm_dev * pool_dev ;
2012-03-28 18:41:28 +01:00
struct dm_dev * origin_dev ;
2011-10-31 20:21:18 +00:00
dm_thin_id dev_id ;
struct pool * pool ;
struct dm_thin_device * td ;
} ;
/*----------------------------------------------------------------*/
/*
* A global list of pools that uses a struct mapped_device as a key .
*/
static struct dm_thin_pool_table {
struct mutex mutex ;
struct list_head pools ;
} dm_thin_pool_table ;
static void pool_table_init ( void )
{
mutex_init ( & dm_thin_pool_table . mutex ) ;
INIT_LIST_HEAD ( & dm_thin_pool_table . pools ) ;
}
static void __pool_table_insert ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_add ( & pool - > list , & dm_thin_pool_table . pools ) ;
}
static void __pool_table_remove ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_del ( & pool - > list ) ;
}
static struct pool * __pool_table_lookup ( struct mapped_device * md )
{
struct pool * pool = NULL , * tmp ;
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_for_each_entry ( tmp , & dm_thin_pool_table . pools , list ) {
if ( tmp - > pool_md = = md ) {
pool = tmp ;
break ;
}
}
return pool ;
}
static struct pool * __pool_table_lookup_metadata_dev ( struct block_device * md_dev )
{
struct pool * pool = NULL , * tmp ;
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
list_for_each_entry ( tmp , & dm_thin_pool_table . pools , list ) {
if ( tmp - > md_dev = = md_dev ) {
pool = tmp ;
break ;
}
}
return pool ;
}
/*----------------------------------------------------------------*/
2012-06-03 00:30:00 +01:00
struct dm_thin_endio_hook {
2012-03-28 18:41:28 +01:00
struct thin_c * tc ;
2012-10-12 21:02:10 +01:00
struct dm_deferred_entry * shared_read_entry ;
struct dm_deferred_entry * all_io_entry ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * overwrite_mapping ;
2012-03-28 18:41:28 +01:00
} ;
2011-10-31 20:21:18 +00:00
static void __requeue_bio_list ( struct thin_c * tc , struct bio_list * master )
{
struct bio * bio ;
struct bio_list bios ;
bio_list_init ( & bios ) ;
bio_list_merge ( & bios , master ) ;
bio_list_init ( master ) ;
while ( ( bio = bio_list_pop ( & bios ) ) ) {
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-06-03 00:30:00 +01:00
2012-03-28 18:41:28 +01:00
if ( h - > tc = = tc )
2011-10-31 20:21:18 +00:00
bio_endio ( bio , DM_ENDIO_REQUEUE ) ;
else
bio_list_add ( master , bio ) ;
}
}
static void requeue_io ( struct thin_c * tc )
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
__requeue_bio_list ( tc , & pool - > deferred_bios ) ;
__requeue_bio_list ( tc , & pool - > retry_on_resume_list ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
/*
* This section of code contains the logic for processing a thin device ' s IO .
* Much of the code depends on pool object resources ( lists , workqueues , etc )
* but most is exclusively called from the thin target rather than the thin - pool
* target .
*/
static dm_block_t get_bio_block ( struct thin_c * tc , struct bio * bio )
{
2012-07-27 15:08:02 +01:00
sector_t block_nr = bio - > bi_sector ;
2012-07-27 15:08:03 +01:00
if ( tc - > pool - > sectors_per_block_shift < 0 )
( void ) sector_div ( block_nr , tc - > pool - > sectors_per_block ) ;
else
block_nr > > = tc - > pool - > sectors_per_block_shift ;
2012-07-27 15:08:02 +01:00
return block_nr ;
2011-10-31 20:21:18 +00:00
}
static void remap ( struct thin_c * tc , struct bio * bio , dm_block_t block )
{
struct pool * pool = tc - > pool ;
2012-07-27 15:08:02 +01:00
sector_t bi_sector = bio - > bi_sector ;
2011-10-31 20:21:18 +00:00
bio - > bi_bdev = tc - > pool_dev - > bdev ;
2012-07-27 15:08:03 +01:00
if ( tc - > pool - > sectors_per_block_shift < 0 )
bio - > bi_sector = ( block * pool - > sectors_per_block ) +
sector_div ( bi_sector , pool - > sectors_per_block ) ;
else
bio - > bi_sector = ( block < < pool - > sectors_per_block_shift ) |
( bi_sector & ( pool - > sectors_per_block - 1 ) ) ;
2011-10-31 20:21:18 +00:00
}
2012-03-28 18:41:28 +01:00
static void remap_to_origin ( struct thin_c * tc , struct bio * bio )
{
bio - > bi_bdev = tc - > origin_dev - > bdev ;
}
2012-07-27 15:08:14 +01:00
static int bio_triggers_commit ( struct thin_c * tc , struct bio * bio )
{
return ( bio - > bi_rw & ( REQ_FLUSH | REQ_FUA ) ) & &
dm_thin_changed_this_transaction ( tc - > td ) ;
}
2012-12-21 20:23:31 +00:00
static void inc_all_io_entry ( struct pool * pool , struct bio * bio )
{
struct dm_thin_endio_hook * h ;
if ( bio - > bi_rw & REQ_DISCARD )
return ;
2012-12-21 20:23:40 +00:00
h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-12-21 20:23:31 +00:00
h - > all_io_entry = dm_deferred_entry_inc ( pool - > all_io_ds ) ;
}
2012-03-28 18:41:28 +01:00
static void issue ( struct thin_c * tc , struct bio * bio )
2011-10-31 20:21:18 +00:00
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
2012-07-27 15:08:16 +01:00
if ( ! bio_triggers_commit ( tc , bio ) ) {
generic_make_request ( bio ) ;
return ;
}
2011-10-31 20:21:18 +00:00
/*
2012-07-27 15:08:16 +01:00
* Complete bio with an error if earlier I / O caused changes to
* the metadata that can ' t be committed e . g , due to I / O errors
* on the metadata device .
2011-10-31 20:21:18 +00:00
*/
2012-07-27 15:08:16 +01:00
if ( dm_thin_aborted_changes ( tc - > td ) ) {
bio_io_error ( bio ) ;
return ;
}
/*
* Batch together any bios that trigger commits and then issue a
* single commit for them in process_deferred_bios ( ) .
*/
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_add ( & pool - > deferred_flush_bios , bio ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2011-10-31 20:21:18 +00:00
}
2012-03-28 18:41:28 +01:00
static void remap_to_origin_and_issue ( struct thin_c * tc , struct bio * bio )
{
remap_to_origin ( tc , bio ) ;
issue ( tc , bio ) ;
}
static void remap_and_issue ( struct thin_c * tc , struct bio * bio ,
dm_block_t block )
{
remap ( tc , bio , block ) ;
issue ( tc , bio ) ;
}
2011-10-31 20:21:18 +00:00
/*
* wake_worker ( ) is used when new work is queued and when pool_resume is
* ready to continue deferred IO processing .
*/
static void wake_worker ( struct pool * pool )
{
queue_work ( pool - > wq , & pool - > worker ) ;
}
/*----------------------------------------------------------------*/
/*
* Bio endio functions .
*/
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping {
2011-10-31 20:21:18 +00:00
struct list_head list ;
2012-03-28 18:41:28 +01:00
unsigned quiesced : 1 ;
unsigned prepared : 1 ;
2012-03-28 18:41:28 +01:00
unsigned pass_discard : 1 ;
2011-10-31 20:21:18 +00:00
struct thin_c * tc ;
dm_block_t virt_block ;
dm_block_t data_block ;
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell , * cell2 ;
2011-10-31 20:21:18 +00:00
int err ;
/*
* If the bio covers the whole area of a block then we can avoid
* zeroing or copying . Instead this bio is hooked . The bio will
* still be in the cell , so care has to be taken to avoid issuing
* the bio twice .
*/
struct bio * bio ;
bio_end_io_t * saved_bi_end_io ;
} ;
2012-06-03 00:30:00 +01:00
static void __maybe_add_mapping ( struct dm_thin_new_mapping * m )
2011-10-31 20:21:18 +00:00
{
struct pool * pool = m - > tc - > pool ;
2012-03-28 18:41:28 +01:00
if ( m - > quiesced & & m - > prepared ) {
2011-10-31 20:21:18 +00:00
list_add ( & m - > list , & pool - > prepared_mappings ) ;
wake_worker ( pool ) ;
}
}
static void copy_complete ( int read_err , unsigned long write_err , void * context )
{
unsigned long flags ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * m = context ;
2011-10-31 20:21:18 +00:00
struct pool * pool = m - > tc - > pool ;
m - > err = read_err | | write_err ? - EIO : 0 ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
m - > prepared = 1 ;
__maybe_add_mapping ( m ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
static void overwrite_endio ( struct bio * bio , int err )
{
unsigned long flags ;
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * m = h - > overwrite_mapping ;
2011-10-31 20:21:18 +00:00
struct pool * pool = m - > tc - > pool ;
m - > err = err ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
m - > prepared = 1 ;
__maybe_add_mapping ( m ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
/*----------------------------------------------------------------*/
/*
* Workqueue .
*/
/*
* Prepared mapping jobs .
*/
/*
* This sends the bios in the cell back to the deferred_bios list .
*/
2012-12-21 20:23:33 +00:00
static void cell_defer ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
2011-10-31 20:21:18 +00:00
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
2012-10-12 21:02:10 +01:00
dm_cell_release ( cell , & pool - > deferred_bios ) ;
2011-10-31 20:21:18 +00:00
spin_unlock_irqrestore ( & tc - > pool - > lock , flags ) ;
wake_worker ( pool ) ;
}
/*
2012-12-21 20:23:31 +00:00
* Same as cell_defer except it omits the original holder of the cell .
2011-10-31 20:21:18 +00:00
*/
2012-12-21 20:23:33 +00:00
static void cell_defer_no_holder ( struct thin_c * tc , struct dm_bio_prison_cell * cell )
2011-10-31 20:21:18 +00:00
{
struct pool * pool = tc - > pool ;
unsigned long flags ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
2012-10-12 21:02:10 +01:00
dm_cell_release_no_holder ( cell , & pool - > deferred_bios ) ;
2011-10-31 20:21:18 +00:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
wake_worker ( pool ) ;
}
2012-07-27 15:08:16 +01:00
static void process_prepared_mapping_fail ( struct dm_thin_new_mapping * m )
{
if ( m - > bio )
m - > bio - > bi_end_io = m - > saved_bi_end_io ;
2012-10-12 21:02:10 +01:00
dm_cell_error ( m - > cell ) ;
2012-07-27 15:08:16 +01:00
list_del ( & m - > list ) ;
mempool_free ( m , m - > tc - > pool - > mapping_pool ) ;
}
2012-06-03 00:30:00 +01:00
static void process_prepared_mapping ( struct dm_thin_new_mapping * m )
2011-10-31 20:21:18 +00:00
{
struct thin_c * tc = m - > tc ;
struct bio * bio ;
int r ;
bio = m - > bio ;
if ( bio )
bio - > bi_end_io = m - > saved_bi_end_io ;
if ( m - > err ) {
2012-10-12 21:02:10 +01:00
dm_cell_error ( m - > cell ) ;
2012-07-27 15:08:05 +01:00
goto out ;
2011-10-31 20:21:18 +00:00
}
/*
* Commit the prepared block into the mapping btree .
* Any I / O for this block arriving after this point will get
* remapped to it directly .
*/
r = dm_thin_insert_block ( tc - > td , m - > virt_block , m - > data_block ) ;
if ( r ) {
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " dm_thin_insert_block() failed " ) ;
2012-10-12 21:02:10 +01:00
dm_cell_error ( m - > cell ) ;
2012-07-27 15:08:05 +01:00
goto out ;
2011-10-31 20:21:18 +00:00
}
/*
* Release any bios held while the block was being provisioned .
* If we are processing a write bio that completely covers the block ,
* we already processed it so can ignore it now when processing
* the bios in the cell .
*/
if ( bio ) {
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , m - > cell ) ;
2011-10-31 20:21:18 +00:00
bio_endio ( bio , 0 ) ;
} else
2012-12-21 20:23:33 +00:00
cell_defer ( tc , m - > cell ) ;
2011-10-31 20:21:18 +00:00
2012-07-27 15:08:05 +01:00
out :
2011-10-31 20:21:18 +00:00
list_del ( & m - > list ) ;
mempool_free ( m , tc - > pool - > mapping_pool ) ;
}
2012-07-27 15:08:16 +01:00
static void process_prepared_discard_fail ( struct dm_thin_new_mapping * m )
2012-03-28 18:41:28 +01:00
{
struct thin_c * tc = m - > tc ;
2012-07-27 15:08:16 +01:00
bio_io_error ( m - > bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , m - > cell ) ;
cell_defer_no_holder ( tc , m - > cell2 ) ;
2012-07-27 15:08:16 +01:00
mempool_free ( m , tc - > pool - > mapping_pool ) ;
}
static void process_prepared_discard_passdown ( struct dm_thin_new_mapping * m )
{
struct thin_c * tc = m - > tc ;
2012-03-28 18:41:28 +01:00
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( tc - > pool , m - > bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , m - > cell ) ;
cell_defer_no_holder ( tc , m - > cell2 ) ;
2012-12-21 20:23:31 +00:00
2012-03-28 18:41:28 +01:00
if ( m - > pass_discard )
remap_and_issue ( tc , m - > bio , m - > data_block ) ;
else
bio_endio ( m - > bio , 0 ) ;
mempool_free ( m , tc - > pool - > mapping_pool ) ;
}
2012-07-27 15:08:16 +01:00
static void process_prepared_discard ( struct dm_thin_new_mapping * m )
{
int r ;
struct thin_c * tc = m - > tc ;
r = dm_thin_remove_block ( tc - > td , m - > virt_block ) ;
if ( r )
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " dm_thin_remove_block() failed " ) ;
2012-07-27 15:08:16 +01:00
process_prepared_discard_passdown ( m ) ;
}
2012-03-28 18:41:28 +01:00
static void process_prepared ( struct pool * pool , struct list_head * head ,
2012-07-27 15:08:16 +01:00
process_mapping_fn * fn )
2011-10-31 20:21:18 +00:00
{
unsigned long flags ;
struct list_head maps ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * m , * tmp ;
2011-10-31 20:21:18 +00:00
INIT_LIST_HEAD ( & maps ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
2012-03-28 18:41:28 +01:00
list_splice_init ( head , & maps ) ;
2011-10-31 20:21:18 +00:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
list_for_each_entry_safe ( m , tmp , & maps , list )
2012-07-27 15:08:16 +01:00
( * fn ) ( m ) ;
2011-10-31 20:21:18 +00:00
}
/*
* Deferred bio jobs .
*/
2012-03-28 18:41:28 +01:00
static int io_overlaps_block ( struct pool * pool , struct bio * bio )
2011-10-31 20:21:18 +00:00
{
2012-07-27 15:08:03 +01:00
return bio - > bi_size = = ( pool - > sectors_per_block < < SECTOR_SHIFT ) ;
2012-03-28 18:41:28 +01:00
}
static int io_overwrites_block ( struct pool * pool , struct bio * bio )
{
return ( bio_data_dir ( bio ) = = WRITE ) & &
io_overlaps_block ( pool , bio ) ;
2011-10-31 20:21:18 +00:00
}
static void save_and_set_endio ( struct bio * bio , bio_end_io_t * * save ,
bio_end_io_t * fn )
{
* save = bio - > bi_end_io ;
bio - > bi_end_io = fn ;
}
static int ensure_next_mapping ( struct pool * pool )
{
if ( pool - > next_mapping )
return 0 ;
pool - > next_mapping = mempool_alloc ( pool - > mapping_pool , GFP_ATOMIC ) ;
return pool - > next_mapping ? 0 : - ENOMEM ;
}
2012-06-03 00:30:00 +01:00
static struct dm_thin_new_mapping * get_next_mapping ( struct pool * pool )
2011-10-31 20:21:18 +00:00
{
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * r = pool - > next_mapping ;
2011-10-31 20:21:18 +00:00
BUG_ON ( ! pool - > next_mapping ) ;
pool - > next_mapping = NULL ;
return r ;
}
static void schedule_copy ( struct thin_c * tc , dm_block_t virt_block ,
2012-03-28 18:41:28 +01:00
struct dm_dev * origin , dm_block_t data_origin ,
dm_block_t data_dest ,
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell , struct bio * bio )
2011-10-31 20:21:18 +00:00
{
int r ;
struct pool * pool = tc - > pool ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * m = get_next_mapping ( pool ) ;
2011-10-31 20:21:18 +00:00
INIT_LIST_HEAD ( & m - > list ) ;
2012-03-28 18:41:28 +01:00
m - > quiesced = 0 ;
2011-10-31 20:21:18 +00:00
m - > prepared = 0 ;
m - > tc = tc ;
m - > virt_block = virt_block ;
m - > data_block = data_dest ;
m - > cell = cell ;
m - > err = 0 ;
m - > bio = NULL ;
2012-10-12 21:02:10 +01:00
if ( ! dm_deferred_set_add_work ( pool - > shared_read_ds , & m - > list ) )
2012-03-28 18:41:28 +01:00
m - > quiesced = 1 ;
2011-10-31 20:21:18 +00:00
/*
* IO to pool_dev remaps to the pool target ' s data_dev .
*
* If the whole block of data is being overwritten , we can issue the
* bio immediately . Otherwise we use kcopyd to clone the data first .
*/
if ( io_overwrites_block ( pool , bio ) ) {
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-06-03 00:30:00 +01:00
2012-03-28 18:41:28 +01:00
h - > overwrite_mapping = m ;
2011-10-31 20:21:18 +00:00
m - > bio = bio ;
save_and_set_endio ( bio , & m - > saved_bi_end_io , overwrite_endio ) ;
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( pool , bio ) ;
2011-10-31 20:21:18 +00:00
remap_and_issue ( tc , bio , data_dest ) ;
} else {
struct dm_io_region from , to ;
2012-03-28 18:41:28 +01:00
from . bdev = origin - > bdev ;
2011-10-31 20:21:18 +00:00
from . sector = data_origin * pool - > sectors_per_block ;
from . count = pool - > sectors_per_block ;
to . bdev = tc - > pool_dev - > bdev ;
to . sector = data_dest * pool - > sectors_per_block ;
to . count = pool - > sectors_per_block ;
r = dm_kcopyd_copy ( pool - > copier , & from , 1 , & to ,
0 , copy_complete , m ) ;
if ( r < 0 ) {
mempool_free ( m , pool - > mapping_pool ) ;
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " dm_kcopyd_copy() failed " ) ;
2012-10-12 21:02:10 +01:00
dm_cell_error ( cell ) ;
2011-10-31 20:21:18 +00:00
}
}
}
2012-03-28 18:41:28 +01:00
static void schedule_internal_copy ( struct thin_c * tc , dm_block_t virt_block ,
dm_block_t data_origin , dm_block_t data_dest ,
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell , struct bio * bio )
2012-03-28 18:41:28 +01:00
{
schedule_copy ( tc , virt_block , tc - > pool_dev ,
data_origin , data_dest , cell , bio ) ;
}
static void schedule_external_copy ( struct thin_c * tc , dm_block_t virt_block ,
dm_block_t data_dest ,
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell , struct bio * bio )
2012-03-28 18:41:28 +01:00
{
schedule_copy ( tc , virt_block , tc - > origin_dev ,
virt_block , data_dest , cell , bio ) ;
}
2011-10-31 20:21:18 +00:00
static void schedule_zero ( struct thin_c * tc , dm_block_t virt_block ,
2012-06-03 00:30:00 +01:00
dm_block_t data_block , struct dm_bio_prison_cell * cell ,
2011-10-31 20:21:18 +00:00
struct bio * bio )
{
struct pool * pool = tc - > pool ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * m = get_next_mapping ( pool ) ;
2011-10-31 20:21:18 +00:00
INIT_LIST_HEAD ( & m - > list ) ;
2012-03-28 18:41:28 +01:00
m - > quiesced = 1 ;
2011-10-31 20:21:18 +00:00
m - > prepared = 0 ;
m - > tc = tc ;
m - > virt_block = virt_block ;
m - > data_block = data_block ;
m - > cell = cell ;
m - > err = 0 ;
m - > bio = NULL ;
/*
* If the whole block of data is being overwritten or we are not
* zeroing pre - existing data , we can issue the bio immediately .
* Otherwise we use kcopyd to zero the data first .
*/
2012-03-28 18:41:29 +01:00
if ( ! pool - > pf . zero_new_blocks )
2011-10-31 20:21:18 +00:00
process_prepared_mapping ( m ) ;
else if ( io_overwrites_block ( pool , bio ) ) {
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-06-03 00:30:00 +01:00
2012-03-28 18:41:28 +01:00
h - > overwrite_mapping = m ;
2011-10-31 20:21:18 +00:00
m - > bio = bio ;
save_and_set_endio ( bio , & m - > saved_bi_end_io , overwrite_endio ) ;
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( pool , bio ) ;
2011-10-31 20:21:18 +00:00
remap_and_issue ( tc , bio , data_block ) ;
} else {
int r ;
struct dm_io_region to ;
to . bdev = tc - > pool_dev - > bdev ;
to . sector = data_block * pool - > sectors_per_block ;
to . count = pool - > sectors_per_block ;
r = dm_kcopyd_zero ( pool - > copier , 1 , & to , 0 , copy_complete , m ) ;
if ( r < 0 ) {
mempool_free ( m , pool - > mapping_pool ) ;
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " dm_kcopyd_zero() failed " ) ;
2012-10-12 21:02:10 +01:00
dm_cell_error ( cell ) ;
2011-10-31 20:21:18 +00:00
}
}
}
2012-07-27 15:08:16 +01:00
static int commit ( struct pool * pool )
{
int r ;
r = dm_pool_commit_metadata ( pool - > pmd ) ;
if ( r )
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " commit failed: error = %d " , r ) ;
2012-07-27 15:08:16 +01:00
return r ;
}
/*
* A non - zero return indicates read_only or fail_io mode .
* Many callers don ' t care about the return value .
*/
static int commit_or_fallback ( struct pool * pool )
{
int r ;
if ( get_pool_mode ( pool ) ! = PM_WRITE )
return - EINVAL ;
r = commit ( pool ) ;
if ( r )
set_pool_mode ( pool , PM_READ_ONLY ) ;
return r ;
}
2011-10-31 20:21:18 +00:00
static int alloc_data_block ( struct thin_c * tc , dm_block_t * result )
{
int r ;
dm_block_t free_blocks ;
unsigned long flags ;
struct pool * pool = tc - > pool ;
r = dm_pool_get_free_block_count ( pool - > pmd , & free_blocks ) ;
if ( r )
return r ;
if ( free_blocks < = pool - > low_water_blocks & & ! pool - > low_water_triggered ) {
DMWARN ( " %s: reached low water mark, sending event. " ,
dm_device_name ( pool - > pool_md ) ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
pool - > low_water_triggered = 1 ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
dm_table_event ( pool - > ti - > table ) ;
}
if ( ! free_blocks ) {
if ( pool - > no_free_space )
return - ENOSPC ;
else {
/*
* Try to commit to see if that will free up some
* more space .
*/
2012-07-27 15:08:16 +01:00
( void ) commit_or_fallback ( pool ) ;
2011-10-31 20:21:18 +00:00
r = dm_pool_get_free_block_count ( pool - > pmd , & free_blocks ) ;
if ( r )
return r ;
/*
* If we still have no space we set a flag to avoid
* doing all this checking and return - ENOSPC .
*/
if ( ! free_blocks ) {
DMWARN ( " %s: no free space available. " ,
dm_device_name ( pool - > pool_md ) ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
pool - > no_free_space = 1 ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
return - ENOSPC ;
}
}
}
r = dm_pool_alloc_data_block ( pool - > pmd , result ) ;
if ( r )
return r ;
return 0 ;
}
/*
* If we have run out of space , queue bios until the device is
* resumed , presumably after having been reloaded with more space .
*/
static void retry_on_resume ( struct bio * bio )
{
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-03-28 18:41:28 +01:00
struct thin_c * tc = h - > tc ;
2011-10-31 20:21:18 +00:00
struct pool * pool = tc - > pool ;
unsigned long flags ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_add ( & pool - > retry_on_resume_list , bio ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
2012-06-03 00:30:00 +01:00
static void no_space ( struct dm_bio_prison_cell * cell )
2011-10-31 20:21:18 +00:00
{
struct bio * bio ;
struct bio_list bios ;
bio_list_init ( & bios ) ;
2012-10-12 21:02:10 +01:00
dm_cell_release ( cell , & bios ) ;
2011-10-31 20:21:18 +00:00
while ( ( bio = bio_list_pop ( & bios ) ) )
retry_on_resume ( bio ) ;
}
2012-03-28 18:41:28 +01:00
static void process_discard ( struct thin_c * tc , struct bio * bio )
{
int r ;
2012-05-12 01:43:16 +01:00
unsigned long flags ;
2012-03-28 18:41:28 +01:00
struct pool * pool = tc - > pool ;
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell , * cell2 ;
2012-10-12 21:02:10 +01:00
struct dm_cell_key key , key2 ;
2012-03-28 18:41:28 +01:00
dm_block_t block = get_bio_block ( tc , bio ) ;
struct dm_thin_lookup_result lookup_result ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * m ;
2012-03-28 18:41:28 +01:00
build_virtual_key ( tc - > td , block , & key ) ;
2012-10-12 21:02:10 +01:00
if ( dm_bio_detain ( tc - > pool - > prison , & key , bio , & cell ) )
2012-03-28 18:41:28 +01:00
return ;
r = dm_thin_find_block ( tc - > td , block , 1 , & lookup_result ) ;
switch ( r ) {
case 0 :
/*
* Check nobody is fiddling with this pool block . This can
* happen if someone ' s in the process of breaking sharing
* on this block .
*/
build_data_key ( tc - > td , lookup_result . block , & key2 ) ;
2012-10-12 21:02:10 +01:00
if ( dm_bio_detain ( tc - > pool - > prison , & key2 , bio , & cell2 ) ) {
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-03-28 18:41:28 +01:00
break ;
}
if ( io_overlaps_block ( pool , bio ) ) {
/*
* IO may still be going to the destination block . We must
* quiesce before we can do the removal .
*/
m = get_next_mapping ( pool ) ;
m - > tc = tc ;
2012-07-27 15:07:57 +01:00
m - > pass_discard = ( ! lookup_result . shared ) & & pool - > pf . discard_passdown ;
2012-03-28 18:41:28 +01:00
m - > virt_block = block ;
m - > data_block = lookup_result . block ;
m - > cell = cell ;
m - > cell2 = cell2 ;
m - > err = 0 ;
m - > bio = bio ;
2012-10-12 21:02:10 +01:00
if ( ! dm_deferred_set_add_work ( pool - > all_io_ds , & m - > list ) ) {
2012-05-12 01:43:16 +01:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
2012-03-28 18:41:28 +01:00
list_add ( & m - > list , & pool - > prepared_discards ) ;
2012-05-12 01:43:16 +01:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2012-03-28 18:41:28 +01:00
wake_worker ( pool ) ;
}
} else {
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( pool , bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
cell_defer_no_holder ( tc , cell2 ) ;
2012-12-21 20:23:31 +00:00
2012-03-28 18:41:28 +01:00
/*
2012-07-27 15:08:03 +01:00
* The DM core makes sure that the discard doesn ' t span
* a block boundary . So we submit the discard of a
* partial block appropriately .
2012-03-28 18:41:28 +01:00
*/
2012-07-20 14:25:05 +01:00
if ( ( ! lookup_result . shared ) & & pool - > pf . discard_passdown )
remap_and_issue ( tc , bio , lookup_result . block ) ;
else
bio_endio ( bio , 0 ) ;
2012-03-28 18:41:28 +01:00
}
break ;
case - ENODATA :
/*
* It isn ' t provisioned , just forget it .
*/
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-03-28 18:41:28 +01:00
bio_endio ( bio , 0 ) ;
break ;
default :
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " %s: dm_thin_find_block() failed: error = %d " ,
__func__ , r ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-03-28 18:41:28 +01:00
bio_io_error ( bio ) ;
break ;
}
}
2011-10-31 20:21:18 +00:00
static void break_sharing ( struct thin_c * tc , struct bio * bio , dm_block_t block ,
2012-10-12 21:02:10 +01:00
struct dm_cell_key * key ,
2011-10-31 20:21:18 +00:00
struct dm_thin_lookup_result * lookup_result ,
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell )
2011-10-31 20:21:18 +00:00
{
int r ;
dm_block_t data_block ;
r = alloc_data_block ( tc , & data_block ) ;
switch ( r ) {
case 0 :
2012-03-28 18:41:28 +01:00
schedule_internal_copy ( tc , block , lookup_result - > block ,
data_block , cell , bio ) ;
2011-10-31 20:21:18 +00:00
break ;
case - ENOSPC :
no_space ( cell ) ;
break ;
default :
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " %s: alloc_data_block() failed: error = %d " ,
__func__ , r ) ;
2012-10-12 21:02:10 +01:00
dm_cell_error ( cell ) ;
2011-10-31 20:21:18 +00:00
break ;
}
}
static void process_shared_bio ( struct thin_c * tc , struct bio * bio ,
dm_block_t block ,
struct dm_thin_lookup_result * lookup_result )
{
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell ;
2011-10-31 20:21:18 +00:00
struct pool * pool = tc - > pool ;
2012-10-12 21:02:10 +01:00
struct dm_cell_key key ;
2011-10-31 20:21:18 +00:00
/*
* If cell is already occupied , then sharing is already in the process
* of being broken so we have nothing further to do here .
*/
build_data_key ( tc - > td , lookup_result - > block , & key ) ;
2012-10-12 21:02:10 +01:00
if ( dm_bio_detain ( pool - > prison , & key , bio , & cell ) )
2011-10-31 20:21:18 +00:00
return ;
2012-07-27 15:08:06 +01:00
if ( bio_data_dir ( bio ) = = WRITE & & bio - > bi_size )
2011-10-31 20:21:18 +00:00
break_sharing ( tc , bio , block , & key , lookup_result , cell ) ;
else {
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2011-10-31 20:21:18 +00:00
2012-10-12 21:02:10 +01:00
h - > shared_read_entry = dm_deferred_entry_inc ( pool - > shared_read_ds ) ;
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( pool , bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-12-21 20:23:31 +00:00
2011-10-31 20:21:18 +00:00
remap_and_issue ( tc , bio , lookup_result - > block ) ;
}
}
static void provision_block ( struct thin_c * tc , struct bio * bio , dm_block_t block ,
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell )
2011-10-31 20:21:18 +00:00
{
int r ;
dm_block_t data_block ;
/*
* Remap empty bios ( flushes ) immediately , without provisioning .
*/
if ( ! bio - > bi_size ) {
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( tc - > pool , bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-12-21 20:23:31 +00:00
2011-10-31 20:21:18 +00:00
remap_and_issue ( tc , bio , 0 ) ;
return ;
}
/*
* Fill read bios with zeroes and complete them immediately .
*/
if ( bio_data_dir ( bio ) = = READ ) {
zero_fill_bio ( bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2011-10-31 20:21:18 +00:00
bio_endio ( bio , 0 ) ;
return ;
}
r = alloc_data_block ( tc , & data_block ) ;
switch ( r ) {
case 0 :
2012-03-28 18:41:28 +01:00
if ( tc - > origin_dev )
schedule_external_copy ( tc , block , data_block , cell , bio ) ;
else
schedule_zero ( tc , block , data_block , cell , bio ) ;
2011-10-31 20:21:18 +00:00
break ;
case - ENOSPC :
no_space ( cell ) ;
break ;
default :
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " %s: alloc_data_block() failed: error = %d " ,
__func__ , r ) ;
2012-07-27 15:08:16 +01:00
set_pool_mode ( tc - > pool , PM_READ_ONLY ) ;
2012-10-12 21:02:10 +01:00
dm_cell_error ( cell ) ;
2011-10-31 20:21:18 +00:00
break ;
}
}
static void process_bio ( struct thin_c * tc , struct bio * bio )
{
int r ;
dm_block_t block = get_bio_block ( tc , bio ) ;
2012-06-03 00:30:00 +01:00
struct dm_bio_prison_cell * cell ;
2012-10-12 21:02:10 +01:00
struct dm_cell_key key ;
2011-10-31 20:21:18 +00:00
struct dm_thin_lookup_result lookup_result ;
/*
* If cell is already occupied , then the block is already
* being provisioned so we have nothing further to do here .
*/
build_virtual_key ( tc - > td , block , & key ) ;
2012-10-12 21:02:10 +01:00
if ( dm_bio_detain ( tc - > pool - > prison , & key , bio , & cell ) )
2011-10-31 20:21:18 +00:00
return ;
r = dm_thin_find_block ( tc - > td , block , 1 , & lookup_result ) ;
switch ( r ) {
case 0 :
2012-12-21 20:23:31 +00:00
if ( lookup_result . shared ) {
2011-10-31 20:21:18 +00:00
process_shared_bio ( tc , bio , block , & lookup_result ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-12-21 20:23:31 +00:00
} else {
inc_all_io_entry ( tc - > pool , bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-12-21 20:23:31 +00:00
2011-10-31 20:21:18 +00:00
remap_and_issue ( tc , bio , lookup_result . block ) ;
2012-12-21 20:23:31 +00:00
}
2011-10-31 20:21:18 +00:00
break ;
case - ENODATA :
2012-03-28 18:41:28 +01:00
if ( bio_data_dir ( bio ) = = READ & & tc - > origin_dev ) {
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( tc - > pool , bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2012-12-21 20:23:31 +00:00
2012-03-28 18:41:28 +01:00
remap_to_origin_and_issue ( tc , bio ) ;
} else
provision_block ( tc , bio , block , cell ) ;
2011-10-31 20:21:18 +00:00
break ;
default :
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " %s: dm_thin_find_block() failed: error = %d " ,
__func__ , r ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell ) ;
2011-10-31 20:21:18 +00:00
bio_io_error ( bio ) ;
break ;
}
}
2012-07-27 15:08:16 +01:00
static void process_bio_read_only ( struct thin_c * tc , struct bio * bio )
{
int r ;
int rw = bio_data_dir ( bio ) ;
dm_block_t block = get_bio_block ( tc , bio ) ;
struct dm_thin_lookup_result lookup_result ;
r = dm_thin_find_block ( tc - > td , block , 1 , & lookup_result ) ;
switch ( r ) {
case 0 :
if ( lookup_result . shared & & ( rw = = WRITE ) & & bio - > bi_size )
bio_io_error ( bio ) ;
2012-12-21 20:23:31 +00:00
else {
inc_all_io_entry ( tc - > pool , bio ) ;
2012-07-27 15:08:16 +01:00
remap_and_issue ( tc , bio , lookup_result . block ) ;
2012-12-21 20:23:31 +00:00
}
2012-07-27 15:08:16 +01:00
break ;
case - ENODATA :
if ( rw ! = READ ) {
bio_io_error ( bio ) ;
break ;
}
if ( tc - > origin_dev ) {
2012-12-21 20:23:31 +00:00
inc_all_io_entry ( tc - > pool , bio ) ;
2012-07-27 15:08:16 +01:00
remap_to_origin_and_issue ( tc , bio ) ;
break ;
}
zero_fill_bio ( bio ) ;
bio_endio ( bio , 0 ) ;
break ;
default :
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " %s: dm_thin_find_block() failed: error = %d " ,
__func__ , r ) ;
2012-07-27 15:08:16 +01:00
bio_io_error ( bio ) ;
break ;
}
}
static void process_bio_fail ( struct thin_c * tc , struct bio * bio )
{
bio_io_error ( bio ) ;
}
2012-03-28 18:41:27 +01:00
static int need_commit_due_to_time ( struct pool * pool )
{
return jiffies < pool - > last_commit_jiffies | |
jiffies > pool - > last_commit_jiffies + COMMIT_PERIOD ;
}
2011-10-31 20:21:18 +00:00
static void process_deferred_bios ( struct pool * pool )
{
unsigned long flags ;
struct bio * bio ;
struct bio_list bios ;
bio_list_init ( & bios ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_merge ( & bios , & pool - > deferred_bios ) ;
bio_list_init ( & pool - > deferred_bios ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
while ( ( bio = bio_list_pop ( & bios ) ) ) {
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-03-28 18:41:28 +01:00
struct thin_c * tc = h - > tc ;
2011-10-31 20:21:18 +00:00
/*
* If we ' ve got no free new_mapping structs , and processing
* this bio might require one , we pause until there are some
* prepared mappings to process .
*/
if ( ensure_next_mapping ( pool ) ) {
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_merge ( & pool - > deferred_bios , & bios ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
break ;
}
2012-03-28 18:41:28 +01:00
if ( bio - > bi_rw & REQ_DISCARD )
2012-07-27 15:08:16 +01:00
pool - > process_discard ( tc , bio ) ;
2012-03-28 18:41:28 +01:00
else
2012-07-27 15:08:16 +01:00
pool - > process_bio ( tc , bio ) ;
2011-10-31 20:21:18 +00:00
}
/*
* If there are any deferred flush bios , we must commit
* the metadata before issuing them .
*/
bio_list_init ( & bios ) ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_merge ( & bios , & pool - > deferred_flush_bios ) ;
bio_list_init ( & pool - > deferred_flush_bios ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2012-03-28 18:41:27 +01:00
if ( bio_list_empty ( & bios ) & & ! need_commit_due_to_time ( pool ) )
2011-10-31 20:21:18 +00:00
return ;
2012-07-27 15:08:16 +01:00
if ( commit_or_fallback ( pool ) ) {
2011-10-31 20:21:18 +00:00
while ( ( bio = bio_list_pop ( & bios ) ) )
bio_io_error ( bio ) ;
return ;
}
2012-03-28 18:41:27 +01:00
pool - > last_commit_jiffies = jiffies ;
2011-10-31 20:21:18 +00:00
while ( ( bio = bio_list_pop ( & bios ) ) )
generic_make_request ( bio ) ;
}
static void do_worker ( struct work_struct * ws )
{
struct pool * pool = container_of ( ws , struct pool , worker ) ;
2012-07-27 15:08:16 +01:00
process_prepared ( pool , & pool - > prepared_mappings , & pool - > process_prepared_mapping ) ;
process_prepared ( pool , & pool - > prepared_discards , & pool - > process_prepared_discard ) ;
2011-10-31 20:21:18 +00:00
process_deferred_bios ( pool ) ;
}
2012-03-28 18:41:27 +01:00
/*
* We want to commit periodically so that not too much
* unwritten data builds up .
*/
static void do_waker ( struct work_struct * ws )
{
struct pool * pool = container_of ( to_delayed_work ( ws ) , struct pool , waker ) ;
wake_worker ( pool ) ;
queue_delayed_work ( pool - > wq , & pool - > waker , COMMIT_PERIOD ) ;
}
2011-10-31 20:21:18 +00:00
/*----------------------------------------------------------------*/
2012-07-27 15:08:16 +01:00
static enum pool_mode get_pool_mode ( struct pool * pool )
{
return pool - > pf . mode ;
}
static void set_pool_mode ( struct pool * pool , enum pool_mode mode )
{
int r ;
pool - > pf . mode = mode ;
switch ( mode ) {
case PM_FAIL :
DMERR ( " switching pool to failure mode " ) ;
pool - > process_bio = process_bio_fail ;
pool - > process_discard = process_bio_fail ;
pool - > process_prepared_mapping = process_prepared_mapping_fail ;
pool - > process_prepared_discard = process_prepared_discard_fail ;
break ;
case PM_READ_ONLY :
DMERR ( " switching pool to read-only mode " ) ;
r = dm_pool_abort_metadata ( pool - > pmd ) ;
if ( r ) {
DMERR ( " aborting transaction failed " ) ;
set_pool_mode ( pool , PM_FAIL ) ;
} else {
dm_pool_metadata_read_only ( pool - > pmd ) ;
pool - > process_bio = process_bio_read_only ;
pool - > process_discard = process_discard ;
pool - > process_prepared_mapping = process_prepared_mapping_fail ;
pool - > process_prepared_discard = process_prepared_discard_passdown ;
}
break ;
case PM_WRITE :
pool - > process_bio = process_bio ;
pool - > process_discard = process_discard ;
pool - > process_prepared_mapping = process_prepared_mapping ;
pool - > process_prepared_discard = process_prepared_discard ;
break ;
}
}
/*----------------------------------------------------------------*/
2011-10-31 20:21:18 +00:00
/*
* Mapping functions .
*/
/*
* Called only while mapping a thin bio to hand it over to the workqueue .
*/
static void thin_defer_bio ( struct thin_c * tc , struct bio * bio )
{
unsigned long flags ;
struct pool * pool = tc - > pool ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio_list_add ( & pool - > deferred_bios , bio ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
wake_worker ( pool ) ;
}
2012-12-21 20:23:40 +00:00
static void thin_hook_bio ( struct thin_c * tc , struct bio * bio )
2012-03-28 18:41:28 +01:00
{
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-03-28 18:41:28 +01:00
h - > tc = tc ;
h - > shared_read_entry = NULL ;
2012-12-21 20:23:31 +00:00
h - > all_io_entry = NULL ;
2012-03-28 18:41:28 +01:00
h - > overwrite_mapping = NULL ;
}
2011-10-31 20:21:18 +00:00
/*
* Non - blocking function called from the thin target ' s map function .
*/
2012-12-21 20:23:41 +00:00
static int thin_bio_map ( struct dm_target * ti , struct bio * bio )
2011-10-31 20:21:18 +00:00
{
int r ;
struct thin_c * tc = ti - > private ;
dm_block_t block = get_bio_block ( tc , bio ) ;
struct dm_thin_device * td = tc - > td ;
struct dm_thin_lookup_result result ;
2012-12-21 20:23:31 +00:00
struct dm_bio_prison_cell * cell1 , * cell2 ;
struct dm_cell_key key ;
2011-10-31 20:21:18 +00:00
2012-12-21 20:23:40 +00:00
thin_hook_bio ( tc , bio ) ;
2012-07-27 15:08:16 +01:00
if ( get_pool_mode ( tc - > pool ) = = PM_FAIL ) {
bio_io_error ( bio ) ;
return DM_MAPIO_SUBMITTED ;
}
2012-03-28 18:41:28 +01:00
if ( bio - > bi_rw & ( REQ_DISCARD | REQ_FLUSH | REQ_FUA ) ) {
2011-10-31 20:21:18 +00:00
thin_defer_bio ( tc , bio ) ;
return DM_MAPIO_SUBMITTED ;
}
r = dm_thin_find_block ( td , block , 0 , & result ) ;
/*
* Note that we defer readahead too .
*/
switch ( r ) {
case 0 :
if ( unlikely ( result . shared ) ) {
/*
* We have a race condition here between the
* result . shared value returned by the lookup and
* snapshot creation , which may cause new
* sharing .
*
* To avoid this always quiesce the origin before
* taking the snap . You want to do this anyway to
* ensure a consistent application view
* ( i . e . lockfs ) .
*
* More distant ancestors are irrelevant . The
* shared flag will be set in their case .
*/
thin_defer_bio ( tc , bio ) ;
2012-12-21 20:23:31 +00:00
return DM_MAPIO_SUBMITTED ;
2011-10-31 20:21:18 +00:00
}
2012-12-21 20:23:31 +00:00
build_virtual_key ( tc - > td , block , & key ) ;
if ( dm_bio_detain ( tc - > pool - > prison , & key , bio , & cell1 ) )
return DM_MAPIO_SUBMITTED ;
build_data_key ( tc - > td , result . block , & key ) ;
if ( dm_bio_detain ( tc - > pool - > prison , & key , bio , & cell2 ) ) {
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell1 ) ;
2012-12-21 20:23:31 +00:00
return DM_MAPIO_SUBMITTED ;
}
inc_all_io_entry ( tc - > pool , bio ) ;
2012-12-21 20:23:33 +00:00
cell_defer_no_holder ( tc , cell2 ) ;
cell_defer_no_holder ( tc , cell1 ) ;
2012-12-21 20:23:31 +00:00
remap ( tc , bio , result . block ) ;
return DM_MAPIO_REMAPPED ;
2011-10-31 20:21:18 +00:00
case - ENODATA :
2012-07-27 15:08:16 +01:00
if ( get_pool_mode ( tc - > pool ) = = PM_READ_ONLY ) {
/*
* This block isn ' t provisioned , and we have no way
* of doing so . Just error it .
*/
bio_io_error ( bio ) ;
2012-12-21 20:23:33 +00:00
return DM_MAPIO_SUBMITTED ;
2012-07-27 15:08:16 +01:00
}
/* fall through */
case - EWOULDBLOCK :
2011-10-31 20:21:18 +00:00
/*
* In future , the failed dm_thin_find_block above could
* provide the hint to load the metadata into cache .
*/
thin_defer_bio ( tc , bio ) ;
2012-12-21 20:23:33 +00:00
return DM_MAPIO_SUBMITTED ;
2012-07-27 15:08:16 +01:00
default :
/*
* Must always call bio_io_error on failure .
* dm_thin_find_block can fail with - EINVAL if the
* pool is switched to fail - io mode .
*/
bio_io_error ( bio ) ;
2012-12-21 20:23:33 +00:00
return DM_MAPIO_SUBMITTED ;
2011-10-31 20:21:18 +00:00
}
}
static int pool_is_congested ( struct dm_target_callbacks * cb , int bdi_bits )
{
int r ;
unsigned long flags ;
struct pool_c * pt = container_of ( cb , struct pool_c , callbacks ) ;
spin_lock_irqsave ( & pt - > pool - > lock , flags ) ;
r = ! bio_list_empty ( & pt - > pool - > retry_on_resume_list ) ;
spin_unlock_irqrestore ( & pt - > pool - > lock , flags ) ;
if ( ! r ) {
struct request_queue * q = bdev_get_queue ( pt - > data_dev - > bdev ) ;
r = bdi_congested ( & q - > backing_dev_info , bdi_bits ) ;
}
return r ;
}
static void __requeue_bios ( struct pool * pool )
{
bio_list_merge ( & pool - > deferred_bios , & pool - > retry_on_resume_list ) ;
bio_list_init ( & pool - > retry_on_resume_list ) ;
}
/*----------------------------------------------------------------
* Binding of control targets to a pool object
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2012-09-26 23:45:46 +01:00
static bool data_dev_supports_discard ( struct pool_c * pt )
{
struct request_queue * q = bdev_get_queue ( pt - > data_dev - > bdev ) ;
return q & & blk_queue_discard ( q ) ;
}
/*
* If discard_passdown was enabled verify that the data device
2012-09-26 23:45:47 +01:00
* supports discards . Disable discard_passdown if not .
2012-09-26 23:45:46 +01:00
*/
2012-09-26 23:45:47 +01:00
static void disable_passdown_if_not_supported ( struct pool_c * pt )
2012-09-26 23:45:46 +01:00
{
2012-09-26 23:45:47 +01:00
struct pool * pool = pt - > pool ;
struct block_device * data_bdev = pt - > data_dev - > bdev ;
struct queue_limits * data_limits = & bdev_get_queue ( data_bdev ) - > limits ;
sector_t block_size = pool - > sectors_per_block < < SECTOR_SHIFT ;
const char * reason = NULL ;
2012-09-26 23:45:46 +01:00
char buf [ BDEVNAME_SIZE ] ;
2012-09-26 23:45:47 +01:00
if ( ! pt - > adjusted_pf . discard_passdown )
2012-09-26 23:45:46 +01:00
return ;
2012-09-26 23:45:47 +01:00
if ( ! data_dev_supports_discard ( pt ) )
reason = " discard unsupported " ;
else if ( data_limits - > max_discard_sectors < pool - > sectors_per_block )
reason = " max discard sectors smaller than a block " ;
2012-09-26 23:45:46 +01:00
2012-09-26 23:45:47 +01:00
else if ( data_limits - > discard_granularity > block_size )
reason = " discard granularity larger than a block " ;
else if ( block_size & ( data_limits - > discard_granularity - 1 ) )
reason = " discard granularity not a factor of block size " ;
if ( reason ) {
DMWARN ( " Data device (%s) %s: Disabling discard passdown. " , bdevname ( data_bdev , buf ) , reason ) ;
pt - > adjusted_pf . discard_passdown = false ;
}
2012-09-26 23:45:46 +01:00
}
2011-10-31 20:21:18 +00:00
static int bind_control_target ( struct pool * pool , struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
2012-07-27 15:08:16 +01:00
/*
* We want to make sure that degraded pools are never upgraded .
*/
enum pool_mode old_mode = pool - > pf . mode ;
2012-09-26 23:45:47 +01:00
enum pool_mode new_mode = pt - > adjusted_pf . mode ;
2012-07-27 15:08:16 +01:00
if ( old_mode > new_mode )
new_mode = old_mode ;
2011-10-31 20:21:18 +00:00
pool - > ti = ti ;
pool - > low_water_blocks = pt - > low_water_blocks ;
2012-09-26 23:45:47 +01:00
pool - > pf = pt - > adjusted_pf ;
2011-10-31 20:21:18 +00:00
2012-09-26 23:45:46 +01:00
set_pool_mode ( pool , new_mode ) ;
2012-05-19 01:01:01 +01:00
2011-10-31 20:21:18 +00:00
return 0 ;
}
static void unbind_control_target ( struct pool * pool , struct dm_target * ti )
{
if ( pool - > ti = = ti )
pool - > ti = NULL ;
}
/*----------------------------------------------------------------
* Pool creation
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2012-03-28 18:41:29 +01:00
/* Initialize pool features. */
static void pool_features_init ( struct pool_features * pf )
{
2012-07-27 15:08:16 +01:00
pf - > mode = PM_WRITE ;
2012-09-26 23:45:46 +01:00
pf - > zero_new_blocks = true ;
pf - > discard_enabled = true ;
pf - > discard_passdown = true ;
2012-03-28 18:41:29 +01:00
}
2011-10-31 20:21:18 +00:00
static void __pool_destroy ( struct pool * pool )
{
__pool_table_remove ( pool ) ;
if ( dm_pool_metadata_close ( pool - > pmd ) < 0 )
DMWARN ( " %s: dm_pool_metadata_close() failed. " , __func__ ) ;
2012-10-12 21:02:10 +01:00
dm_bio_prison_destroy ( pool - > prison ) ;
2011-10-31 20:21:18 +00:00
dm_kcopyd_client_destroy ( pool - > copier ) ;
if ( pool - > wq )
destroy_workqueue ( pool - > wq ) ;
if ( pool - > next_mapping )
mempool_free ( pool - > next_mapping , pool - > mapping_pool ) ;
mempool_destroy ( pool - > mapping_pool ) ;
2012-10-12 21:02:10 +01:00
dm_deferred_set_destroy ( pool - > shared_read_ds ) ;
dm_deferred_set_destroy ( pool - > all_io_ds ) ;
2011-10-31 20:21:18 +00:00
kfree ( pool ) ;
}
2012-06-03 00:30:00 +01:00
static struct kmem_cache * _new_mapping_cache ;
2011-10-31 20:21:18 +00:00
static struct pool * pool_create ( struct mapped_device * pool_md ,
struct block_device * metadata_dev ,
2012-07-27 15:08:16 +01:00
unsigned long block_size ,
int read_only , char * * error )
2011-10-31 20:21:18 +00:00
{
int r ;
void * err_p ;
struct pool * pool ;
struct dm_pool_metadata * pmd ;
2012-07-27 15:08:16 +01:00
bool format_device = read_only ? false : true ;
2011-10-31 20:21:18 +00:00
2012-07-27 15:08:16 +01:00
pmd = dm_pool_metadata_open ( metadata_dev , block_size , format_device ) ;
2011-10-31 20:21:18 +00:00
if ( IS_ERR ( pmd ) ) {
* error = " Error creating metadata object " ;
return ( struct pool * ) pmd ;
}
pool = kmalloc ( sizeof ( * pool ) , GFP_KERNEL ) ;
if ( ! pool ) {
* error = " Error allocating memory for pool " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_pool ;
}
pool - > pmd = pmd ;
pool - > sectors_per_block = block_size ;
2012-07-27 15:08:03 +01:00
if ( block_size & ( block_size - 1 ) )
pool - > sectors_per_block_shift = - 1 ;
else
pool - > sectors_per_block_shift = __ffs ( block_size ) ;
2011-10-31 20:21:18 +00:00
pool - > low_water_blocks = 0 ;
2012-03-28 18:41:29 +01:00
pool_features_init ( & pool - > pf ) ;
2012-10-12 21:02:10 +01:00
pool - > prison = dm_bio_prison_create ( PRISON_CELLS ) ;
2011-10-31 20:21:18 +00:00
if ( ! pool - > prison ) {
* error = " Error creating pool's bio prison " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_prison ;
}
pool - > copier = dm_kcopyd_client_create ( ) ;
if ( IS_ERR ( pool - > copier ) ) {
r = PTR_ERR ( pool - > copier ) ;
* error = " Error creating pool's kcopyd client " ;
err_p = ERR_PTR ( r ) ;
goto bad_kcopyd_client ;
}
/*
* Create singlethreaded workqueue that will service all devices
* that use this metadata .
*/
pool - > wq = alloc_ordered_workqueue ( " dm- " DM_MSG_PREFIX , WQ_MEM_RECLAIM ) ;
if ( ! pool - > wq ) {
* error = " Error creating pool's workqueue " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_wq ;
}
INIT_WORK ( & pool - > worker , do_worker ) ;
2012-03-28 18:41:27 +01:00
INIT_DELAYED_WORK ( & pool - > waker , do_waker ) ;
2011-10-31 20:21:18 +00:00
spin_lock_init ( & pool - > lock ) ;
bio_list_init ( & pool - > deferred_bios ) ;
bio_list_init ( & pool - > deferred_flush_bios ) ;
INIT_LIST_HEAD ( & pool - > prepared_mappings ) ;
2012-03-28 18:41:28 +01:00
INIT_LIST_HEAD ( & pool - > prepared_discards ) ;
2011-10-31 20:21:18 +00:00
pool - > low_water_triggered = 0 ;
pool - > no_free_space = 0 ;
bio_list_init ( & pool - > retry_on_resume_list ) ;
2012-10-12 21:02:10 +01:00
pool - > shared_read_ds = dm_deferred_set_create ( ) ;
if ( ! pool - > shared_read_ds ) {
* error = " Error creating pool's shared read deferred set " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_shared_read_ds ;
}
pool - > all_io_ds = dm_deferred_set_create ( ) ;
if ( ! pool - > all_io_ds ) {
* error = " Error creating pool's all io deferred set " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_all_io_ds ;
}
2011-10-31 20:21:18 +00:00
pool - > next_mapping = NULL ;
2012-06-03 00:30:00 +01:00
pool - > mapping_pool = mempool_create_slab_pool ( MAPPING_POOL_SIZE ,
_new_mapping_cache ) ;
2011-10-31 20:21:18 +00:00
if ( ! pool - > mapping_pool ) {
* error = " Error creating pool's mapping mempool " ;
err_p = ERR_PTR ( - ENOMEM ) ;
goto bad_mapping_pool ;
}
pool - > ref_count = 1 ;
2012-03-28 18:41:27 +01:00
pool - > last_commit_jiffies = jiffies ;
2011-10-31 20:21:18 +00:00
pool - > pool_md = pool_md ;
pool - > md_dev = metadata_dev ;
__pool_table_insert ( pool ) ;
return pool ;
bad_mapping_pool :
2012-10-12 21:02:10 +01:00
dm_deferred_set_destroy ( pool - > all_io_ds ) ;
bad_all_io_ds :
dm_deferred_set_destroy ( pool - > shared_read_ds ) ;
bad_shared_read_ds :
2011-10-31 20:21:18 +00:00
destroy_workqueue ( pool - > wq ) ;
bad_wq :
dm_kcopyd_client_destroy ( pool - > copier ) ;
bad_kcopyd_client :
2012-10-12 21:02:10 +01:00
dm_bio_prison_destroy ( pool - > prison ) ;
2011-10-31 20:21:18 +00:00
bad_prison :
kfree ( pool ) ;
bad_pool :
if ( dm_pool_metadata_close ( pmd ) )
DMWARN ( " %s: dm_pool_metadata_close() failed. " , __func__ ) ;
return err_p ;
}
static void __pool_inc ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
pool - > ref_count + + ;
}
static void __pool_dec ( struct pool * pool )
{
BUG_ON ( ! mutex_is_locked ( & dm_thin_pool_table . mutex ) ) ;
BUG_ON ( ! pool - > ref_count ) ;
if ( ! - - pool - > ref_count )
__pool_destroy ( pool ) ;
}
static struct pool * __pool_find ( struct mapped_device * pool_md ,
struct block_device * metadata_dev ,
2012-07-27 15:08:16 +01:00
unsigned long block_size , int read_only ,
char * * error , int * created )
2011-10-31 20:21:18 +00:00
{
struct pool * pool = __pool_table_lookup_metadata_dev ( metadata_dev ) ;
if ( pool ) {
2012-07-27 15:07:59 +01:00
if ( pool - > pool_md ! = pool_md ) {
* error = " metadata device already in use by a pool " ;
2011-10-31 20:21:18 +00:00
return ERR_PTR ( - EBUSY ) ;
2012-07-27 15:07:59 +01:00
}
2011-10-31 20:21:18 +00:00
__pool_inc ( pool ) ;
} else {
pool = __pool_table_lookup ( pool_md ) ;
if ( pool ) {
2012-07-27 15:07:59 +01:00
if ( pool - > md_dev ! = metadata_dev ) {
* error = " different pool cannot replace a pool " ;
2011-10-31 20:21:18 +00:00
return ERR_PTR ( - EINVAL ) ;
2012-07-27 15:07:59 +01:00
}
2011-10-31 20:21:18 +00:00
__pool_inc ( pool ) ;
2012-03-28 18:41:29 +01:00
} else {
2012-07-27 15:08:16 +01:00
pool = pool_create ( pool_md , metadata_dev , block_size , read_only , error ) ;
2012-03-28 18:41:29 +01:00
* created = 1 ;
}
2011-10-31 20:21:18 +00:00
}
return pool ;
}
/*----------------------------------------------------------------
* Pool target methods
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void pool_dtr ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
mutex_lock ( & dm_thin_pool_table . mutex ) ;
unbind_control_target ( pt - > pool , ti ) ;
__pool_dec ( pt - > pool ) ;
dm_put_device ( ti , pt - > metadata_dev ) ;
dm_put_device ( ti , pt - > data_dev ) ;
kfree ( pt ) ;
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
}
static int parse_pool_features ( struct dm_arg_set * as , struct pool_features * pf ,
struct dm_target * ti )
{
int r ;
unsigned argc ;
const char * arg_name ;
static struct dm_arg _args [ ] = {
2012-03-28 18:41:29 +01:00
{ 0 , 3 , " Invalid number of pool feature arguments " } ,
2011-10-31 20:21:18 +00:00
} ;
/*
* No feature arguments supplied .
*/
if ( ! as - > argc )
return 0 ;
r = dm_read_arg_group ( _args , as , & argc , & ti - > error ) ;
if ( r )
return - EINVAL ;
while ( argc & & ! r ) {
arg_name = dm_shift_arg ( as ) ;
argc - - ;
2012-07-27 15:08:16 +01:00
if ( ! strcasecmp ( arg_name , " skip_block_zeroing " ) )
2012-09-26 23:45:46 +01:00
pf - > zero_new_blocks = false ;
2012-07-27 15:08:16 +01:00
else if ( ! strcasecmp ( arg_name , " ignore_discard " ) )
2012-09-26 23:45:46 +01:00
pf - > discard_enabled = false ;
2012-07-27 15:08:16 +01:00
else if ( ! strcasecmp ( arg_name , " no_discard_passdown " ) )
2012-09-26 23:45:46 +01:00
pf - > discard_passdown = false ;
2011-10-31 20:21:18 +00:00
2012-07-27 15:08:16 +01:00
else if ( ! strcasecmp ( arg_name , " read_only " ) )
pf - > mode = PM_READ_ONLY ;
else {
ti - > error = " Unrecognised pool feature requested " ;
r = - EINVAL ;
break ;
}
2011-10-31 20:21:18 +00:00
}
return r ;
}
/*
* thin - pool < metadata dev > < data dev >
* < data block size ( sectors ) >
* < low water mark ( blocks ) >
* [ < # feature args > [ < arg > ] * ]
*
* Optional feature arguments are :
* skip_block_zeroing : skips the zeroing of newly - provisioned blocks .
2012-03-28 18:41:29 +01:00
* ignore_discard : disable discard
* no_discard_passdown : don ' t pass discards down to the data device
2011-10-31 20:21:18 +00:00
*/
static int pool_ctr ( struct dm_target * ti , unsigned argc , char * * argv )
{
2012-03-28 18:41:29 +01:00
int r , pool_created = 0 ;
2011-10-31 20:21:18 +00:00
struct pool_c * pt ;
struct pool * pool ;
struct pool_features pf ;
struct dm_arg_set as ;
struct dm_dev * data_dev ;
unsigned long block_size ;
dm_block_t low_water_blocks ;
struct dm_dev * metadata_dev ;
sector_t metadata_dev_size ;
2012-03-28 18:41:28 +01:00
char b [ BDEVNAME_SIZE ] ;
2011-10-31 20:21:18 +00:00
/*
* FIXME Remove validation from scope of lock .
*/
mutex_lock ( & dm_thin_pool_table . mutex ) ;
if ( argc < 4 ) {
ti - > error = " Invalid argument count " ;
r = - EINVAL ;
goto out_unlock ;
}
as . argc = argc ;
as . argv = argv ;
r = dm_get_device ( ti , argv [ 0 ] , FMODE_READ | FMODE_WRITE , & metadata_dev ) ;
if ( r ) {
ti - > error = " Error opening metadata block device " ;
goto out_unlock ;
}
metadata_dev_size = i_size_read ( metadata_dev - > bdev - > bd_inode ) > > SECTOR_SHIFT ;
2012-03-28 18:41:28 +01:00
if ( metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING )
DMWARN ( " Metadata device %s is larger than %u sectors: excess space will not be used. " ,
bdevname ( metadata_dev - > bdev , b ) , THIN_METADATA_MAX_SECTORS ) ;
2011-10-31 20:21:18 +00:00
r = dm_get_device ( ti , argv [ 1 ] , FMODE_READ | FMODE_WRITE , & data_dev ) ;
if ( r ) {
ti - > error = " Error getting data device " ;
goto out_metadata ;
}
if ( kstrtoul ( argv [ 2 ] , 10 , & block_size ) | | ! block_size | |
block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS | |
block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS | |
2012-07-27 15:08:02 +01:00
block_size & ( DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1 ) ) {
2011-10-31 20:21:18 +00:00
ti - > error = " Invalid block size " ;
r = - EINVAL ;
goto out ;
}
if ( kstrtoull ( argv [ 3 ] , 10 , ( unsigned long long * ) & low_water_blocks ) ) {
ti - > error = " Invalid low water mark " ;
r = - EINVAL ;
goto out ;
}
/*
* Set default pool features .
*/
2012-03-28 18:41:29 +01:00
pool_features_init ( & pf ) ;
2011-10-31 20:21:18 +00:00
dm_consume_args ( & as , 4 ) ;
r = parse_pool_features ( & as , & pf , ti ) ;
if ( r )
goto out ;
pt = kzalloc ( sizeof ( * pt ) , GFP_KERNEL ) ;
if ( ! pt ) {
r = - ENOMEM ;
goto out ;
}
pool = __pool_find ( dm_table_get_md ( ti - > table ) , metadata_dev - > bdev ,
2012-07-27 15:08:16 +01:00
block_size , pf . mode = = PM_READ_ONLY , & ti - > error , & pool_created ) ;
2011-10-31 20:21:18 +00:00
if ( IS_ERR ( pool ) ) {
r = PTR_ERR ( pool ) ;
goto out_free_pt ;
}
2012-03-28 18:41:29 +01:00
/*
* ' pool_created ' reflects whether this is the first table load .
* Top level discard support is not allowed to be changed after
* initial load . This would require a pool reload to trigger thin
* device changes .
*/
if ( ! pool_created & & pf . discard_enabled ! = pool - > pf . discard_enabled ) {
ti - > error = " Discard support cannot be disabled once enabled " ;
r = - EINVAL ;
goto out_flags_changed ;
}
2011-10-31 20:21:18 +00:00
pt - > pool = pool ;
pt - > ti = ti ;
pt - > metadata_dev = metadata_dev ;
pt - > data_dev = data_dev ;
pt - > low_water_blocks = low_water_blocks ;
2012-09-26 23:45:47 +01:00
pt - > adjusted_pf = pt - > requested_pf = pf ;
2011-10-31 20:21:18 +00:00
ti - > num_flush_requests = 1 ;
2012-09-26 23:45:46 +01:00
2012-03-28 18:41:29 +01:00
/*
* Only need to enable discards if the pool should pass
* them down to the data device . The thin device ' s discard
* processing will cause mappings to be removed from the btree .
*/
if ( pf . discard_enabled & & pf . discard_passdown ) {
ti - > num_discard_requests = 1 ;
2012-09-26 23:45:46 +01:00
2012-03-28 18:41:29 +01:00
/*
* Setting ' discards_supported ' circumvents the normal
* stacking of discard limits ( this keeps the pool and
* thin devices ' discard limits consistent ) .
*/
2012-07-27 15:08:08 +01:00
ti - > discards_supported = true ;
2012-09-26 23:45:39 +01:00
ti - > discard_zeroes_data_unsupported = true ;
2012-03-28 18:41:29 +01:00
}
2011-10-31 20:21:18 +00:00
ti - > private = pt ;
pt - > callbacks . congested_fn = pool_is_congested ;
dm_table_add_target_callbacks ( ti - > table , & pt - > callbacks ) ;
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
return 0 ;
2012-03-28 18:41:29 +01:00
out_flags_changed :
__pool_dec ( pool ) ;
2011-10-31 20:21:18 +00:00
out_free_pt :
kfree ( pt ) ;
out :
dm_put_device ( ti , data_dev ) ;
out_metadata :
dm_put_device ( ti , metadata_dev ) ;
out_unlock :
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
return r ;
}
2012-12-21 20:23:41 +00:00
static int pool_map ( struct dm_target * ti , struct bio * bio )
2011-10-31 20:21:18 +00:00
{
int r ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
unsigned long flags ;
/*
* As this is a singleton target , ti - > begin is always zero .
*/
spin_lock_irqsave ( & pool - > lock , flags ) ;
bio - > bi_bdev = pt - > data_dev - > bdev ;
r = DM_MAPIO_REMAPPED ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
return r ;
}
/*
* Retrieves the number of blocks of the data device from
* the superblock and compares it to the actual device size ,
* thus resizing the data device in case it has grown .
*
* This both copes with opening preallocated data devices in the ctr
* being followed by a resume
* - and -
* calling the resume method individually after userspace has
* grown the data device in reaction to a table event .
*/
static int pool_preresume ( struct dm_target * ti )
{
int r ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
2012-07-27 15:08:02 +01:00
sector_t data_size = ti - > len ;
dm_block_t sb_data_size ;
2011-10-31 20:21:18 +00:00
/*
* Take control of the pool object .
*/
r = bind_control_target ( pool , ti ) ;
if ( r )
return r ;
2012-07-27 15:08:02 +01:00
( void ) sector_div ( data_size , pool - > sectors_per_block ) ;
2011-10-31 20:21:18 +00:00
r = dm_pool_get_data_dev_size ( pool - > pmd , & sb_data_size ) ;
if ( r ) {
DMERR ( " failed to retrieve data device size " ) ;
return r ;
}
if ( data_size < sb_data_size ) {
DMERR ( " pool target too small, is %llu blocks (expected %llu) " ,
2012-07-27 15:08:02 +01:00
( unsigned long long ) data_size , sb_data_size ) ;
2011-10-31 20:21:18 +00:00
return - EINVAL ;
} else if ( data_size > sb_data_size ) {
r = dm_pool_resize_data_dev ( pool - > pmd , data_size ) ;
if ( r ) {
DMERR ( " failed to resize data device " ) ;
2012-07-27 15:08:16 +01:00
/* FIXME Stricter than necessary: Rollback transaction instead here */
set_pool_mode ( pool , PM_READ_ONLY ) ;
2011-10-31 20:21:18 +00:00
return r ;
}
2012-07-27 15:08:16 +01:00
( void ) commit_or_fallback ( pool ) ;
2011-10-31 20:21:18 +00:00
}
return 0 ;
}
static void pool_resume ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
unsigned long flags ;
spin_lock_irqsave ( & pool - > lock , flags ) ;
pool - > low_water_triggered = 0 ;
pool - > no_free_space = 0 ;
__requeue_bios ( pool ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2012-03-28 18:41:27 +01:00
do_waker ( & pool - > waker . work ) ;
2011-10-31 20:21:18 +00:00
}
static void pool_postsuspend ( struct dm_target * ti )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
2012-03-28 18:41:27 +01:00
cancel_delayed_work ( & pool - > waker ) ;
2011-10-31 20:21:18 +00:00
flush_workqueue ( pool - > wq ) ;
2012-07-27 15:08:16 +01:00
( void ) commit_or_fallback ( pool ) ;
2011-10-31 20:21:18 +00:00
}
static int check_arg_count ( unsigned argc , unsigned args_required )
{
if ( argc ! = args_required ) {
DMWARN ( " Message received with %u arguments instead of %u. " ,
argc , args_required ) ;
return - EINVAL ;
}
return 0 ;
}
static int read_dev_id ( char * arg , dm_thin_id * dev_id , int warning )
{
if ( ! kstrtoull ( arg , 10 , ( unsigned long long * ) dev_id ) & &
* dev_id < = MAX_DEV_ID )
return 0 ;
if ( warning )
DMWARN ( " Message received with invalid device id: %s " , arg ) ;
return - EINVAL ;
}
static int process_create_thin_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id dev_id ;
int r ;
r = check_arg_count ( argc , 2 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 1 ] , & dev_id , 1 ) ;
if ( r )
return r ;
r = dm_pool_create_thin ( pool - > pmd , dev_id ) ;
if ( r ) {
DMWARN ( " Creation of new thinly-provisioned device with id %s failed. " ,
argv [ 1 ] ) ;
return r ;
}
return 0 ;
}
static int process_create_snap_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id dev_id ;
dm_thin_id origin_dev_id ;
int r ;
r = check_arg_count ( argc , 3 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 1 ] , & dev_id , 1 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 2 ] , & origin_dev_id , 1 ) ;
if ( r )
return r ;
r = dm_pool_create_snap ( pool - > pmd , dev_id , origin_dev_id ) ;
if ( r ) {
DMWARN ( " Creation of new snapshot %s of device %s failed. " ,
argv [ 1 ] , argv [ 2 ] ) ;
return r ;
}
return 0 ;
}
static int process_delete_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id dev_id ;
int r ;
r = check_arg_count ( argc , 2 ) ;
if ( r )
return r ;
r = read_dev_id ( argv [ 1 ] , & dev_id , 1 ) ;
if ( r )
return r ;
r = dm_pool_delete_thin_device ( pool - > pmd , dev_id ) ;
if ( r )
DMWARN ( " Deletion of thin device %s failed. " , argv [ 1 ] ) ;
return r ;
}
static int process_set_transaction_id_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
dm_thin_id old_id , new_id ;
int r ;
r = check_arg_count ( argc , 3 ) ;
if ( r )
return r ;
if ( kstrtoull ( argv [ 1 ] , 10 , ( unsigned long long * ) & old_id ) ) {
DMWARN ( " set_transaction_id message: Unrecognised id %s. " , argv [ 1 ] ) ;
return - EINVAL ;
}
if ( kstrtoull ( argv [ 2 ] , 10 , ( unsigned long long * ) & new_id ) ) {
DMWARN ( " set_transaction_id message: Unrecognised new id %s. " , argv [ 2 ] ) ;
return - EINVAL ;
}
r = dm_pool_set_metadata_transaction_id ( pool - > pmd , old_id , new_id ) ;
if ( r ) {
DMWARN ( " Failed to change transaction id from %s to %s. " ,
argv [ 1 ] , argv [ 2 ] ) ;
return r ;
}
return 0 ;
}
2012-06-03 00:30:01 +01:00
static int process_reserve_metadata_snap_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
int r ;
r = check_arg_count ( argc , 1 ) ;
if ( r )
return r ;
2012-07-27 15:08:16 +01:00
( void ) commit_or_fallback ( pool ) ;
2012-07-03 12:55:31 +01:00
2012-06-03 00:30:01 +01:00
r = dm_pool_reserve_metadata_snap ( pool - > pmd ) ;
if ( r )
DMWARN ( " reserve_metadata_snap message failed. " ) ;
return r ;
}
static int process_release_metadata_snap_mesg ( unsigned argc , char * * argv , struct pool * pool )
{
int r ;
r = check_arg_count ( argc , 1 ) ;
if ( r )
return r ;
r = dm_pool_release_metadata_snap ( pool - > pmd ) ;
if ( r )
DMWARN ( " release_metadata_snap message failed. " ) ;
return r ;
}
2011-10-31 20:21:18 +00:00
/*
* Messages supported :
* create_thin < dev_id >
* create_snap < dev_id > < origin_id >
* delete < dev_id >
* trim < dev_id > < new_size_in_sectors >
* set_transaction_id < current_trans_id > < new_trans_id >
2012-06-03 00:30:01 +01:00
* reserve_metadata_snap
* release_metadata_snap
2011-10-31 20:21:18 +00:00
*/
static int pool_message ( struct dm_target * ti , unsigned argc , char * * argv )
{
int r = - EINVAL ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
if ( ! strcasecmp ( argv [ 0 ] , " create_thin " ) )
r = process_create_thin_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " create_snap " ) )
r = process_create_snap_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " delete " ) )
r = process_delete_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " set_transaction_id " ) )
r = process_set_transaction_id_mesg ( argc , argv , pool ) ;
2012-06-03 00:30:01 +01:00
else if ( ! strcasecmp ( argv [ 0 ] , " reserve_metadata_snap " ) )
r = process_reserve_metadata_snap_mesg ( argc , argv , pool ) ;
else if ( ! strcasecmp ( argv [ 0 ] , " release_metadata_snap " ) )
r = process_release_metadata_snap_mesg ( argc , argv , pool ) ;
2011-10-31 20:21:18 +00:00
else
DMWARN ( " Unrecognised thin pool target message received: %s " , argv [ 0 ] ) ;
2012-07-27 15:08:16 +01:00
if ( ! r )
( void ) commit_or_fallback ( pool ) ;
2011-10-31 20:21:18 +00:00
return r ;
}
2012-07-27 15:08:16 +01:00
static void emit_flags ( struct pool_features * pf , char * result ,
unsigned sz , unsigned maxlen )
{
unsigned count = ! pf - > zero_new_blocks + ! pf - > discard_enabled +
! pf - > discard_passdown + ( pf - > mode = = PM_READ_ONLY ) ;
DMEMIT ( " %u " , count ) ;
if ( ! pf - > zero_new_blocks )
DMEMIT ( " skip_block_zeroing " ) ;
if ( ! pf - > discard_enabled )
DMEMIT ( " ignore_discard " ) ;
if ( ! pf - > discard_passdown )
DMEMIT ( " no_discard_passdown " ) ;
if ( pf - > mode = = PM_READ_ONLY )
DMEMIT ( " read_only " ) ;
}
2011-10-31 20:21:18 +00:00
/*
* Status line is :
* < transaction id > < used metadata sectors > / < total metadata sectors >
* < used data sectors > / < total data sectors > < held metadata root >
*/
static int pool_status ( struct dm_target * ti , status_type_t type ,
2012-07-27 15:08:16 +01:00
unsigned status_flags , char * result , unsigned maxlen )
2011-10-31 20:21:18 +00:00
{
2012-07-27 15:08:16 +01:00
int r ;
2011-10-31 20:21:18 +00:00
unsigned sz = 0 ;
uint64_t transaction_id ;
dm_block_t nr_free_blocks_data ;
dm_block_t nr_free_blocks_metadata ;
dm_block_t nr_blocks_data ;
dm_block_t nr_blocks_metadata ;
dm_block_t held_root ;
char buf [ BDEVNAME_SIZE ] ;
char buf2 [ BDEVNAME_SIZE ] ;
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
switch ( type ) {
case STATUSTYPE_INFO :
2012-07-27 15:08:16 +01:00
if ( get_pool_mode ( pool ) = = PM_FAIL ) {
DMEMIT ( " Fail " ) ;
break ;
}
2012-07-27 15:08:16 +01:00
/* Commit to ensure statistics aren't out-of-date */
if ( ! ( status_flags & DM_STATUS_NOFLUSH_FLAG ) & & ! dm_suspended ( ti ) )
( void ) commit_or_fallback ( pool ) ;
2011-10-31 20:21:18 +00:00
r = dm_pool_get_metadata_transaction_id ( pool - > pmd ,
& transaction_id ) ;
if ( r )
return r ;
r = dm_pool_get_free_metadata_block_count ( pool - > pmd ,
& nr_free_blocks_metadata ) ;
if ( r )
return r ;
r = dm_pool_get_metadata_dev_size ( pool - > pmd , & nr_blocks_metadata ) ;
if ( r )
return r ;
r = dm_pool_get_free_block_count ( pool - > pmd ,
& nr_free_blocks_data ) ;
if ( r )
return r ;
r = dm_pool_get_data_dev_size ( pool - > pmd , & nr_blocks_data ) ;
if ( r )
return r ;
2012-06-03 00:30:01 +01:00
r = dm_pool_get_metadata_snap ( pool - > pmd , & held_root ) ;
2011-10-31 20:21:18 +00:00
if ( r )
return r ;
DMEMIT ( " %llu %llu/%llu %llu/%llu " ,
( unsigned long long ) transaction_id ,
( unsigned long long ) ( nr_blocks_metadata - nr_free_blocks_metadata ) ,
( unsigned long long ) nr_blocks_metadata ,
( unsigned long long ) ( nr_blocks_data - nr_free_blocks_data ) ,
( unsigned long long ) nr_blocks_data ) ;
if ( held_root )
2012-07-27 15:08:16 +01:00
DMEMIT ( " %llu " , held_root ) ;
else
DMEMIT ( " - " ) ;
if ( pool - > pf . mode = = PM_READ_ONLY )
DMEMIT ( " ro " ) ;
2011-10-31 20:21:18 +00:00
else
2012-07-27 15:08:16 +01:00
DMEMIT ( " rw " ) ;
2012-12-21 20:23:32 +00:00
if ( ! pool - > pf . discard_enabled )
DMEMIT ( " ignore_discard " ) ;
else if ( pool - > pf . discard_passdown )
2012-07-27 15:08:16 +01:00
DMEMIT ( " discard_passdown " ) ;
else
DMEMIT ( " no_discard_passdown " ) ;
2011-10-31 20:21:18 +00:00
break ;
case STATUSTYPE_TABLE :
DMEMIT ( " %s %s %lu %llu " ,
format_dev_t ( buf , pt - > metadata_dev - > bdev - > bd_dev ) ,
format_dev_t ( buf2 , pt - > data_dev - > bdev - > bd_dev ) ,
( unsigned long ) pool - > sectors_per_block ,
( unsigned long long ) pt - > low_water_blocks ) ;
2012-09-26 23:45:47 +01:00
emit_flags ( & pt - > requested_pf , result , sz , maxlen ) ;
2011-10-31 20:21:18 +00:00
break ;
}
return 0 ;
}
static int pool_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
struct pool_c * pt = ti - > private ;
return fn ( ti , pt - > data_dev , 0 , ti - > len , data ) ;
}
static int pool_merge ( struct dm_target * ti , struct bvec_merge_data * bvm ,
struct bio_vec * biovec , int max_size )
{
struct pool_c * pt = ti - > private ;
struct request_queue * q = bdev_get_queue ( pt - > data_dev - > bdev ) ;
if ( ! q - > merge_bvec_fn )
return max_size ;
bvm - > bi_bdev = pt - > data_dev - > bdev ;
return min ( max_size , q - > merge_bvec_fn ( q , bvm , biovec ) ) ;
}
2012-10-12 21:02:07 +01:00
static bool block_size_is_power_of_two ( struct pool * pool )
{
return pool - > sectors_per_block_shift > = 0 ;
}
2012-09-26 23:45:47 +01:00
static void set_discard_limits ( struct pool_c * pt , struct queue_limits * limits )
2012-03-28 18:41:28 +01:00
{
2012-09-26 23:45:47 +01:00
struct pool * pool = pt - > pool ;
struct queue_limits * data_limits ;
2012-03-28 18:41:28 +01:00
limits - > max_discard_sectors = pool - > sectors_per_block ;
/*
2012-09-26 23:45:47 +01:00
* discard_granularity is just a hint , and not enforced .
2012-03-28 18:41:28 +01:00
*/
2012-09-26 23:45:47 +01:00
if ( pt - > adjusted_pf . discard_passdown ) {
data_limits = & bdev_get_queue ( pt - > data_dev - > bdev ) - > limits ;
limits - > discard_granularity = data_limits - > discard_granularity ;
2012-10-12 21:02:07 +01:00
} else if ( block_size_is_power_of_two ( pool ) )
2012-09-26 23:45:47 +01:00
limits - > discard_granularity = pool - > sectors_per_block < < SECTOR_SHIFT ;
2012-10-12 21:02:07 +01:00
else
/*
* Use largest power of 2 that is a factor of sectors_per_block
* but at least DATA_DEV_BLOCK_SIZE_MIN_SECTORS .
*/
limits - > discard_granularity = max ( 1 < < ( ffs ( pool - > sectors_per_block ) - 1 ) ,
DATA_DEV_BLOCK_SIZE_MIN_SECTORS ) < < SECTOR_SHIFT ;
2012-03-28 18:41:28 +01:00
}
2011-10-31 20:21:18 +00:00
static void pool_io_hints ( struct dm_target * ti , struct queue_limits * limits )
{
struct pool_c * pt = ti - > private ;
struct pool * pool = pt - > pool ;
blk_limits_io_min ( limits , 0 ) ;
blk_limits_io_opt ( limits , pool - > sectors_per_block < < SECTOR_SHIFT ) ;
2012-09-26 23:45:47 +01:00
/*
* pt - > adjusted_pf is a staging area for the actual features to use .
* They get transferred to the live pool in bind_control_target ( )
* called from pool_preresume ( ) .
*/
if ( ! pt - > adjusted_pf . discard_enabled )
return ;
disable_passdown_if_not_supported ( pt ) ;
set_discard_limits ( pt , limits ) ;
2011-10-31 20:21:18 +00:00
}
static struct target_type pool_target = {
. name = " thin-pool " ,
. features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE ,
2012-12-21 20:23:32 +00:00
. version = { 1 , 6 , 0 } ,
2011-10-31 20:21:18 +00:00
. module = THIS_MODULE ,
. ctr = pool_ctr ,
. dtr = pool_dtr ,
. map = pool_map ,
. postsuspend = pool_postsuspend ,
. preresume = pool_preresume ,
. resume = pool_resume ,
. message = pool_message ,
. status = pool_status ,
. merge = pool_merge ,
. iterate_devices = pool_iterate_devices ,
. io_hints = pool_io_hints ,
} ;
/*----------------------------------------------------------------
* Thin target methods
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void thin_dtr ( struct dm_target * ti )
{
struct thin_c * tc = ti - > private ;
mutex_lock ( & dm_thin_pool_table . mutex ) ;
__pool_dec ( tc - > pool ) ;
dm_pool_close_thin_device ( tc - > td ) ;
dm_put_device ( ti , tc - > pool_dev ) ;
2012-03-28 18:41:28 +01:00
if ( tc - > origin_dev )
dm_put_device ( ti , tc - > origin_dev ) ;
2011-10-31 20:21:18 +00:00
kfree ( tc ) ;
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
}
/*
* Thin target parameters :
*
2012-03-28 18:41:28 +01:00
* < pool_dev > < dev_id > [ origin_dev ]
2011-10-31 20:21:18 +00:00
*
* pool_dev : the path to the pool ( eg , / dev / mapper / my_pool )
* dev_id : the internal device identifier
2012-03-28 18:41:28 +01:00
* origin_dev : a device external to the pool that should act as the origin
2012-03-28 18:41:29 +01:00
*
* If the pool device has discards disabled , they get disabled for the thin
* device as well .
2011-10-31 20:21:18 +00:00
*/
static int thin_ctr ( struct dm_target * ti , unsigned argc , char * * argv )
{
int r ;
struct thin_c * tc ;
2012-03-28 18:41:28 +01:00
struct dm_dev * pool_dev , * origin_dev ;
2011-10-31 20:21:18 +00:00
struct mapped_device * pool_md ;
mutex_lock ( & dm_thin_pool_table . mutex ) ;
2012-03-28 18:41:28 +01:00
if ( argc ! = 2 & & argc ! = 3 ) {
2011-10-31 20:21:18 +00:00
ti - > error = " Invalid argument count " ;
r = - EINVAL ;
goto out_unlock ;
}
tc = ti - > private = kzalloc ( sizeof ( * tc ) , GFP_KERNEL ) ;
if ( ! tc ) {
ti - > error = " Out of memory " ;
r = - ENOMEM ;
goto out_unlock ;
}
2012-03-28 18:41:28 +01:00
if ( argc = = 3 ) {
r = dm_get_device ( ti , argv [ 2 ] , FMODE_READ , & origin_dev ) ;
if ( r ) {
ti - > error = " Error opening origin device " ;
goto bad_origin_dev ;
}
tc - > origin_dev = origin_dev ;
}
2011-10-31 20:21:18 +00:00
r = dm_get_device ( ti , argv [ 0 ] , dm_table_get_mode ( ti - > table ) , & pool_dev ) ;
if ( r ) {
ti - > error = " Error opening pool device " ;
goto bad_pool_dev ;
}
tc - > pool_dev = pool_dev ;
if ( read_dev_id ( argv [ 1 ] , ( unsigned long long * ) & tc - > dev_id , 0 ) ) {
ti - > error = " Invalid device id " ;
r = - EINVAL ;
goto bad_common ;
}
pool_md = dm_get_md ( tc - > pool_dev - > bdev - > bd_dev ) ;
if ( ! pool_md ) {
ti - > error = " Couldn't get pool mapped device " ;
r = - EINVAL ;
goto bad_common ;
}
tc - > pool = __pool_table_lookup ( pool_md ) ;
if ( ! tc - > pool ) {
ti - > error = " Couldn't find pool object " ;
r = - EINVAL ;
goto bad_pool_lookup ;
}
__pool_inc ( tc - > pool ) ;
2012-07-27 15:08:16 +01:00
if ( get_pool_mode ( tc - > pool ) = = PM_FAIL ) {
ti - > error = " Couldn't open thin device, Pool is in fail mode " ;
goto bad_thin_open ;
}
2011-10-31 20:21:18 +00:00
r = dm_pool_open_thin_device ( tc - > pool - > pmd , tc - > dev_id , & tc - > td ) ;
if ( r ) {
ti - > error = " Couldn't open thin internal device " ;
goto bad_thin_open ;
}
2012-07-27 15:08:00 +01:00
r = dm_set_target_max_io_len ( ti , tc - > pool - > sectors_per_block ) ;
if ( r )
goto bad_thin_open ;
2011-10-31 20:21:18 +00:00
ti - > num_flush_requests = 1 ;
2012-07-27 15:08:07 +01:00
ti - > flush_supported = true ;
2012-12-21 20:23:40 +00:00
ti - > per_bio_data_size = sizeof ( struct dm_thin_endio_hook ) ;
2012-03-28 18:41:29 +01:00
/* In case the pool supports discards, pass them on. */
if ( tc - > pool - > pf . discard_enabled ) {
2012-07-27 15:08:08 +01:00
ti - > discards_supported = true ;
2012-03-28 18:41:29 +01:00
ti - > num_discard_requests = 1 ;
2012-07-27 15:08:08 +01:00
ti - > discard_zeroes_data_unsupported = true ;
2012-07-27 15:08:03 +01:00
/* Discard requests must be split on a block boundary */
2012-07-27 15:08:08 +01:00
ti - > split_discard_requests = true ;
2012-03-28 18:41:29 +01:00
}
2011-10-31 20:21:18 +00:00
dm_put ( pool_md ) ;
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
return 0 ;
bad_thin_open :
__pool_dec ( tc - > pool ) ;
bad_pool_lookup :
dm_put ( pool_md ) ;
bad_common :
dm_put_device ( ti , tc - > pool_dev ) ;
bad_pool_dev :
2012-03-28 18:41:28 +01:00
if ( tc - > origin_dev )
dm_put_device ( ti , tc - > origin_dev ) ;
bad_origin_dev :
2011-10-31 20:21:18 +00:00
kfree ( tc ) ;
out_unlock :
mutex_unlock ( & dm_thin_pool_table . mutex ) ;
return r ;
}
2012-12-21 20:23:41 +00:00
static int thin_map ( struct dm_target * ti , struct bio * bio )
2011-10-31 20:21:18 +00:00
{
2012-03-28 18:41:28 +01:00
bio - > bi_sector = dm_target_offset ( ti , bio - > bi_sector ) ;
2011-10-31 20:21:18 +00:00
2012-12-21 20:23:41 +00:00
return thin_bio_map ( ti , bio ) ;
2011-10-31 20:21:18 +00:00
}
2012-12-21 20:23:41 +00:00
static int thin_endio ( struct dm_target * ti , struct bio * bio , int err )
2012-03-28 18:41:28 +01:00
{
unsigned long flags ;
2012-12-21 20:23:40 +00:00
struct dm_thin_endio_hook * h = dm_per_bio_data ( bio , sizeof ( struct dm_thin_endio_hook ) ) ;
2012-03-28 18:41:28 +01:00
struct list_head work ;
2012-06-03 00:30:00 +01:00
struct dm_thin_new_mapping * m , * tmp ;
2012-03-28 18:41:28 +01:00
struct pool * pool = h - > tc - > pool ;
if ( h - > shared_read_entry ) {
INIT_LIST_HEAD ( & work ) ;
2012-10-12 21:02:10 +01:00
dm_deferred_entry_dec ( h - > shared_read_entry , & work ) ;
2012-03-28 18:41:28 +01:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
list_for_each_entry_safe ( m , tmp , & work , list ) {
list_del ( & m - > list ) ;
m - > quiesced = 1 ;
__maybe_add_mapping ( m ) ;
}
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
}
2012-03-28 18:41:28 +01:00
if ( h - > all_io_entry ) {
INIT_LIST_HEAD ( & work ) ;
2012-10-12 21:02:10 +01:00
dm_deferred_entry_dec ( h - > all_io_entry , & work ) ;
2012-12-21 20:23:31 +00:00
if ( ! list_empty ( & work ) ) {
spin_lock_irqsave ( & pool - > lock , flags ) ;
list_for_each_entry_safe ( m , tmp , & work , list )
list_add ( & m - > list , & pool - > prepared_discards ) ;
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
wake_worker ( pool ) ;
}
2012-03-28 18:41:28 +01:00
}
2012-03-28 18:41:28 +01:00
return 0 ;
}
2011-10-31 20:21:18 +00:00
static void thin_postsuspend ( struct dm_target * ti )
{
if ( dm_noflush_suspending ( ti ) )
requeue_io ( ( struct thin_c * ) ti - > private ) ;
}
/*
* < nr mapped sectors > < highest mapped sector >
*/
static int thin_status ( struct dm_target * ti , status_type_t type ,
2012-07-27 15:08:16 +01:00
unsigned status_flags , char * result , unsigned maxlen )
2011-10-31 20:21:18 +00:00
{
int r ;
ssize_t sz = 0 ;
dm_block_t mapped , highest ;
char buf [ BDEVNAME_SIZE ] ;
struct thin_c * tc = ti - > private ;
2012-07-27 15:08:16 +01:00
if ( get_pool_mode ( tc - > pool ) = = PM_FAIL ) {
DMEMIT ( " Fail " ) ;
return 0 ;
}
2011-10-31 20:21:18 +00:00
if ( ! tc - > td )
DMEMIT ( " - " ) ;
else {
switch ( type ) {
case STATUSTYPE_INFO :
r = dm_thin_get_mapped_count ( tc - > td , & mapped ) ;
if ( r )
return r ;
r = dm_thin_get_highest_mapped_block ( tc - > td , & highest ) ;
if ( r < 0 )
return r ;
DMEMIT ( " %llu " , mapped * tc - > pool - > sectors_per_block ) ;
if ( r )
DMEMIT ( " %llu " , ( ( highest + 1 ) *
tc - > pool - > sectors_per_block ) - 1 ) ;
else
DMEMIT ( " - " ) ;
break ;
case STATUSTYPE_TABLE :
DMEMIT ( " %s %lu " ,
format_dev_t ( buf , tc - > pool_dev - > bdev - > bd_dev ) ,
( unsigned long ) tc - > dev_id ) ;
2012-03-28 18:41:28 +01:00
if ( tc - > origin_dev )
DMEMIT ( " %s " , format_dev_t ( buf , tc - > origin_dev - > bdev - > bd_dev ) ) ;
2011-10-31 20:21:18 +00:00
break ;
}
}
return 0 ;
}
static int thin_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
2012-07-27 15:08:02 +01:00
sector_t blocks ;
2011-10-31 20:21:18 +00:00
struct thin_c * tc = ti - > private ;
2012-07-27 15:08:02 +01:00
struct pool * pool = tc - > pool ;
2011-10-31 20:21:18 +00:00
/*
* We can ' t call dm_pool_get_data_dev_size ( ) since that blocks . So
* we follow a more convoluted path through to the pool ' s target .
*/
2012-07-27 15:08:02 +01:00
if ( ! pool - > ti )
2011-10-31 20:21:18 +00:00
return 0 ; /* nothing is bound */
2012-07-27 15:08:02 +01:00
blocks = pool - > ti - > len ;
( void ) sector_div ( blocks , pool - > sectors_per_block ) ;
2011-10-31 20:21:18 +00:00
if ( blocks )
2012-07-27 15:08:02 +01:00
return fn ( ti , tc - > pool_dev , 0 , pool - > sectors_per_block * blocks , data ) ;
2011-10-31 20:21:18 +00:00
return 0 ;
}
static struct target_type thin_target = {
. name = " thin " ,
2013-01-31 14:11:14 +00:00
. version = { 1 , 7 , 0 } ,
2011-10-31 20:21:18 +00:00
. module = THIS_MODULE ,
. ctr = thin_ctr ,
. dtr = thin_dtr ,
. map = thin_map ,
2012-03-28 18:41:28 +01:00
. end_io = thin_endio ,
2011-10-31 20:21:18 +00:00
. postsuspend = thin_postsuspend ,
. status = thin_status ,
. iterate_devices = thin_iterate_devices ,
} ;
/*----------------------------------------------------------------*/
static int __init dm_thin_init ( void )
{
int r ;
pool_table_init ( ) ;
r = dm_register_target ( & thin_target ) ;
if ( r )
return r ;
r = dm_register_target ( & pool_target ) ;
if ( r )
2012-06-03 00:30:00 +01:00
goto bad_pool_target ;
r = - ENOMEM ;
_new_mapping_cache = KMEM_CACHE ( dm_thin_new_mapping , 0 ) ;
if ( ! _new_mapping_cache )
goto bad_new_mapping_cache ;
return 0 ;
bad_new_mapping_cache :
dm_unregister_target ( & pool_target ) ;
bad_pool_target :
dm_unregister_target ( & thin_target ) ;
2011-10-31 20:21:18 +00:00
return r ;
}
static void dm_thin_exit ( void )
{
dm_unregister_target ( & thin_target ) ;
dm_unregister_target ( & pool_target ) ;
2012-06-03 00:30:00 +01:00
kmem_cache_destroy ( _new_mapping_cache ) ;
2011-10-31 20:21:18 +00:00
}
module_init ( dm_thin_init ) ;
module_exit ( dm_thin_exit ) ;
2012-05-12 01:43:19 +01:00
MODULE_DESCRIPTION ( DM_NAME " thin provisioning target " ) ;
2011-10-31 20:21:18 +00:00
MODULE_AUTHOR ( " Joe Thornber <dm-devel@redhat.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;