2011-11-01 00:21:18 +04:00
/*
2012-07-27 18:08:15 +04:00
* Copyright ( C ) 2011 - 2012 Red Hat , Inc .
2011-11-01 00:21:18 +04:00
*
* This file is released under the GPL .
*/
# include "dm-thin-metadata.h"
# include "persistent-data/dm-btree.h"
# include "persistent-data/dm-space-map.h"
# include "persistent-data/dm-space-map-disk.h"
# include "persistent-data/dm-transaction-manager.h"
# include <linux/list.h>
# include <linux/device-mapper.h>
# include <linux/workqueue.h>
/*--------------------------------------------------------------------------
* As far as the metadata goes , there is :
*
* - A superblock in block zero , taking up fewer than 512 bytes for
* atomic writes .
*
* - A space map managing the metadata blocks .
*
* - A space map managing the data blocks .
*
* - A btree mapping our internal thin dev ids onto struct disk_device_details .
*
* - A hierarchical btree , with 2 levels which effectively maps ( thin
* dev id , virtual block ) - > block_time . Block time is a 64 - bit
2019-12-30 05:54:32 +03:00
* field holding the time in the low 24 bits , and block in the top 40
2011-11-01 00:21:18 +04:00
* bits .
*
* BTrees consist solely of btree_nodes , that fill a block . Some are
* internal nodes , as such their values are a __le64 pointing to other
* nodes . Leaf nodes can store data of any reasonable size ( ie . much
* smaller than the block size ) . The nodes consist of the header ,
* followed by an array of keys , followed by an array of values . We have
* to binary search on the keys so they ' re all held together to help the
* cpu cache .
*
* Space maps have 2 btrees :
*
* - One maps a uint64_t onto a struct index_entry . Which points to a
* bitmap block , and has some details about how many free entries there
* are etc .
*
* - The bitmap blocks have a header ( for the checksum ) . Then the rest
* of the block is pairs of bits . With the meaning being :
*
* 0 - ref count is 0
* 1 - ref count is 1
* 2 - ref count is 2
* 3 - ref count is higher than 2
*
* - If the count is higher than 2 then the ref count is entered in a
* second btree that directly maps the block_address to a uint32_t ref
* count .
*
* The space map metadata variant doesn ' t have a bitmaps btree . Instead
* it has one single blocks worth of index_entries . This avoids
* recursive issues with the bitmap btree needing to allocate space in
* order to insert . With a small data block size such as 64 k the
* metadata support data devices that are hundreds of terrabytes .
*
* The space maps allocate space linearly from front to back . Space that
* is freed in a transaction is never recycled within that transaction .
* To try and avoid fragmenting _free_ space the allocator always goes
* back and fills in gaps .
*
* All metadata io is in THIN_METADATA_BLOCK_SIZE sized / aligned chunks
* from the block manager .
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
# define DM_MSG_PREFIX "thin metadata"
# define THIN_SUPERBLOCK_MAGIC 27022010
# define THIN_SUPERBLOCK_LOCATION 0
2014-02-14 20:58:41 +04:00
# define THIN_VERSION 2
2011-11-01 00:21:18 +04:00
# define SECTOR_TO_BLOCK_SHIFT 3
2012-07-27 18:07:58 +04:00
/*
2017-12-12 13:21:40 +03:00
* For btree insert :
2012-07-27 18:07:58 +04:00
* 3 for btree insert +
* 2 for btree lookup used within space map
2017-12-12 13:21:40 +03:00
* For btree remove :
* 2 for shadow spine +
* 4 for rebalance 3 child node
2012-07-27 18:07:58 +04:00
*/
2017-12-12 13:21:40 +03:00
# define THIN_MAX_CONCURRENT_LOCKS 6
2012-07-27 18:07:58 +04:00
2011-11-01 00:21:18 +04:00
/* This should be plenty */
# define SPACE_MAP_ROOT_SIZE 128
/*
* Little endian on - disk superblock and device details .
*/
struct thin_disk_superblock {
__le32 csum ; /* Checksum of superblock except for this field. */
__le32 flags ;
__le64 blocknr ; /* This block number, dm_block_t. */
__u8 uuid [ 16 ] ;
__le64 magic ;
__le32 version ;
__le32 time ;
__le64 trans_id ;
/*
* Root held by userspace transactions .
*/
__le64 held_root ;
__u8 data_space_map_root [ SPACE_MAP_ROOT_SIZE ] ;
__u8 metadata_space_map_root [ SPACE_MAP_ROOT_SIZE ] ;
/*
* 2 - level btree mapping ( dev_id , ( dev block , time ) ) - > data block
*/
__le64 data_mapping_root ;
/*
* Device detail root mapping dev_id - > device_details
*/
__le64 device_details_root ;
__le32 data_block_size ; /* In 512-byte sectors. */
__le32 metadata_block_size ; /* In 512-byte sectors. */
__le64 metadata_nr_blocks ;
__le32 compat_flags ;
__le32 compat_ro_flags ;
__le32 incompat_flags ;
} __packed ;
struct disk_device_details {
__le64 mapped_blocks ;
__le64 transaction_id ; /* When created. */
__le32 creation_time ;
__le32 snapshotted_time ;
} __packed ;
struct dm_pool_metadata {
struct hlist_node hash ;
struct block_device * bdev ;
struct dm_block_manager * bm ;
struct dm_space_map * metadata_sm ;
struct dm_space_map * data_sm ;
struct dm_transaction_manager * tm ;
struct dm_transaction_manager * nb_tm ;
/*
* Two - level btree .
* First level holds thin_dev_t .
* Second level holds mappings .
*/
struct dm_btree_info info ;
/*
* Non - blocking version of the above .
*/
struct dm_btree_info nb_info ;
/*
* Just the top level for deleting whole devices .
*/
struct dm_btree_info tl_info ;
/*
* Just the bottom level for creating new devices .
*/
struct dm_btree_info bl_info ;
/*
* Describes the device details btree .
*/
struct dm_btree_info details_info ;
struct rw_semaphore root_lock ;
uint32_t time ;
dm_block_t root ;
dm_block_t details_root ;
struct list_head thin_devices ;
uint64_t trans_id ;
unsigned long flags ;
sector_t data_block_size ;
2012-07-27 18:08:15 +04:00
2019-12-04 17:07:41 +03:00
/*
* Pre - commit callback .
*
* This allows the thin provisioning target to run a callback before
* the metadata are committed .
*/
dm_pool_pre_commit_fn pre_commit_fn ;
void * pre_commit_context ;
2018-09-10 18:50:09 +03:00
/*
* We reserve a section of the metadata for commit overhead .
* All reported space does * not * include this .
*/
dm_block_t metadata_reserve ;
2012-07-27 18:08:15 +04:00
/*
* Set if a transaction has to be aborted but the attempt to roll back
* to the previous ( good ) transaction failed . The only pool metadata
* operation possible in this state is the closing of the device .
*/
bool fail_io : 1 ;
2014-03-27 18:13:23 +04:00
2019-04-18 17:29:48 +03:00
/*
* Set once a thin - pool has been accessed through one of the interfaces
* that imply the pool is in - service ( e . g . thin devices created / deleted ,
* thin - pool message , metadata snapshots , etc ) .
*/
bool in_service : 1 ;
2014-03-27 18:13:23 +04:00
/*
* Reading the space map roots can fail , so we read it into these
* buffers before the superblock is locked and updated .
*/
__u8 data_space_map_root [ SPACE_MAP_ROOT_SIZE ] ;
__u8 metadata_space_map_root [ SPACE_MAP_ROOT_SIZE ] ;
2011-11-01 00:21:18 +04:00
} ;
struct dm_thin_device {
struct list_head list ;
struct dm_pool_metadata * pmd ;
dm_thin_id id ;
int open_count ;
2012-07-27 18:08:15 +04:00
bool changed : 1 ;
bool aborted_with_changes : 1 ;
2011-11-01 00:21:18 +04:00
uint64_t mapped_blocks ;
uint64_t transaction_id ;
uint32_t creation_time ;
uint32_t snapshotted_time ;
} ;
/*----------------------------------------------------------------
* superblock validator
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
# define SUPERBLOCK_CSUM_XOR 160774
static void sb_prepare_for_write ( struct dm_block_validator * v ,
struct dm_block * b ,
size_t block_size )
{
struct thin_disk_superblock * disk_super = dm_block_data ( b ) ;
disk_super - > blocknr = cpu_to_le64 ( dm_block_location ( b ) ) ;
disk_super - > csum = cpu_to_le32 ( dm_bm_checksum ( & disk_super - > flags ,
block_size - sizeof ( __le32 ) ,
SUPERBLOCK_CSUM_XOR ) ) ;
}
static int sb_check ( struct dm_block_validator * v ,
struct dm_block * b ,
size_t block_size )
{
struct thin_disk_superblock * disk_super = dm_block_data ( b ) ;
__le32 csum_le ;
if ( dm_block_location ( b ) ! = le64_to_cpu ( disk_super - > blocknr ) ) {
DMERR ( " sb_check failed: blocknr %llu: "
" wanted %llu " , le64_to_cpu ( disk_super - > blocknr ) ,
( unsigned long long ) dm_block_location ( b ) ) ;
return - ENOTBLK ;
}
if ( le64_to_cpu ( disk_super - > magic ) ! = THIN_SUPERBLOCK_MAGIC ) {
DMERR ( " sb_check failed: magic %llu: "
" wanted %llu " , le64_to_cpu ( disk_super - > magic ) ,
( unsigned long long ) THIN_SUPERBLOCK_MAGIC ) ;
return - EILSEQ ;
}
csum_le = cpu_to_le32 ( dm_bm_checksum ( & disk_super - > flags ,
block_size - sizeof ( __le32 ) ,
SUPERBLOCK_CSUM_XOR ) ) ;
if ( csum_le ! = disk_super - > csum ) {
DMERR ( " sb_check failed: csum %u: wanted %u " ,
le32_to_cpu ( csum_le ) , le32_to_cpu ( disk_super - > csum ) ) ;
return - EILSEQ ;
}
return 0 ;
}
static struct dm_block_validator sb_validator = {
. name = " superblock " ,
. prepare_for_write = sb_prepare_for_write ,
. check = sb_check
} ;
/*----------------------------------------------------------------
* Methods for the btree value types
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static uint64_t pack_block_time ( dm_block_t b , uint32_t t )
{
return ( b < < 24 ) | t ;
}
static void unpack_block_time ( uint64_t v , dm_block_t * b , uint32_t * t )
{
* b = v > > 24 ;
* t = v & ( ( 1 < < 24 ) - 1 ) ;
}
2021-04-13 13:03:45 +03:00
/*
* It ' s more efficient to call dm_sm_ { inc , dec } _blocks as few times as
* possible . ' with_runs ' reads contiguous runs of blocks , and calls the
* given sm function .
*/
typedef int ( * run_fn ) ( struct dm_space_map * , dm_block_t , dm_block_t ) ;
static void with_runs ( struct dm_space_map * sm , const __le64 * value_le , unsigned count , run_fn fn )
2011-11-01 00:21:18 +04:00
{
2021-04-13 13:03:45 +03:00
uint64_t b , begin , end ;
2011-11-01 00:21:18 +04:00
uint32_t t ;
2021-04-13 13:03:45 +03:00
bool in_run = false ;
unsigned i ;
2011-11-01 00:21:18 +04:00
2021-04-13 13:03:45 +03:00
for ( i = 0 ; i < count ; i + + , value_le + + ) {
/* We know value_le is 8 byte aligned */
unpack_block_time ( le64_to_cpu ( * value_le ) , & b , & t ) ;
if ( in_run ) {
if ( b = = end ) {
end + + ;
} else {
fn ( sm , begin , end ) ;
begin = b ;
end = b + 1 ;
}
} else {
in_run = true ;
begin = b ;
end = b + 1 ;
}
}
if ( in_run )
fn ( sm , begin , end ) ;
2011-11-01 00:21:18 +04:00
}
2021-04-13 13:03:45 +03:00
static void data_block_inc ( void * context , const void * value_le , unsigned count )
2011-11-01 00:21:18 +04:00
{
2021-04-13 13:03:45 +03:00
with_runs ( ( struct dm_space_map * ) context ,
( const __le64 * ) value_le , count , dm_sm_inc_blocks ) ;
}
2011-11-01 00:21:18 +04:00
2021-04-13 13:03:45 +03:00
static void data_block_dec ( void * context , const void * value_le , unsigned count )
{
with_runs ( ( struct dm_space_map * ) context ,
( const __le64 * ) value_le , count , dm_sm_dec_blocks ) ;
2011-11-01 00:21:18 +04:00
}
2013-03-02 02:45:47 +04:00
static int data_block_equal ( void * context , const void * value1_le , const void * value2_le )
2011-11-01 00:21:18 +04:00
{
__le64 v1_le , v2_le ;
uint64_t b1 , b2 ;
uint32_t t ;
memcpy ( & v1_le , value1_le , sizeof ( v1_le ) ) ;
memcpy ( & v2_le , value2_le , sizeof ( v2_le ) ) ;
unpack_block_time ( le64_to_cpu ( v1_le ) , & b1 , & t ) ;
unpack_block_time ( le64_to_cpu ( v2_le ) , & b2 , & t ) ;
return b1 = = b2 ;
}
2021-04-13 13:03:45 +03:00
static void subtree_inc ( void * context , const void * value , unsigned count )
2011-11-01 00:21:18 +04:00
{
struct dm_btree_info * info = context ;
2021-04-13 13:03:45 +03:00
const __le64 * root_le = value ;
unsigned i ;
2011-11-01 00:21:18 +04:00
2021-04-13 13:03:45 +03:00
for ( i = 0 ; i < count ; i + + , root_le + + )
dm_tm_inc ( info - > tm , le64_to_cpu ( * root_le ) ) ;
2011-11-01 00:21:18 +04:00
}
2021-04-13 13:03:45 +03:00
static void subtree_dec ( void * context , const void * value , unsigned count )
2011-11-01 00:21:18 +04:00
{
struct dm_btree_info * info = context ;
2021-04-13 13:03:45 +03:00
const __le64 * root_le = value ;
unsigned i ;
2011-11-01 00:21:18 +04:00
2021-04-13 13:03:45 +03:00
for ( i = 0 ; i < count ; i + + , root_le + + )
if ( dm_btree_del ( info , le64_to_cpu ( * root_le ) ) )
DMERR ( " btree delete failed " ) ;
2011-11-01 00:21:18 +04:00
}
2013-03-02 02:45:47 +04:00
static int subtree_equal ( void * context , const void * value1_le , const void * value2_le )
2011-11-01 00:21:18 +04:00
{
__le64 v1_le , v2_le ;
memcpy ( & v1_le , value1_le , sizeof ( v1_le ) ) ;
memcpy ( & v2_le , value2_le , sizeof ( v2_le ) ) ;
return v1_le = = v2_le ;
}
/*----------------------------------------------------------------*/
2019-04-18 17:29:48 +03:00
/*
* Variant that is used for in - core only changes or code that
* shouldn ' t put the pool in service on its own ( e . g . commit ) .
*/
2020-01-13 19:18:51 +03:00
static inline void pmd_write_lock_in_core ( struct dm_pool_metadata * pmd )
2019-04-15 23:54:36 +03:00
__acquires ( pmd - > root_lock )
{
down_write ( & pmd - > root_lock ) ;
}
static inline void pmd_write_lock ( struct dm_pool_metadata * pmd )
{
2020-01-13 19:18:51 +03:00
pmd_write_lock_in_core ( pmd ) ;
2019-04-18 17:29:48 +03:00
if ( unlikely ( ! pmd - > in_service ) )
pmd - > in_service = true ;
2019-04-15 23:54:36 +03:00
}
static inline void pmd_write_unlock ( struct dm_pool_metadata * pmd )
__releases ( pmd - > root_lock )
{
up_write ( & pmd - > root_lock ) ;
}
/*----------------------------------------------------------------*/
2012-07-27 18:08:09 +04:00
static int superblock_lock_zero ( struct dm_pool_metadata * pmd ,
struct dm_block * * sblock )
{
return dm_bm_write_lock_zero ( pmd - > bm , THIN_SUPERBLOCK_LOCATION ,
& sb_validator , sblock ) ;
}
static int superblock_lock ( struct dm_pool_metadata * pmd ,
struct dm_block * * sblock )
{
return dm_bm_write_lock ( pmd - > bm , THIN_SUPERBLOCK_LOCATION ,
& sb_validator , sblock ) ;
}
2012-07-27 18:08:10 +04:00
static int __superblock_all_zeroes ( struct dm_block_manager * bm , int * result )
2011-11-01 00:21:18 +04:00
{
int r ;
unsigned i ;
struct dm_block * b ;
__le64 * data_le , zero = cpu_to_le64 ( 0 ) ;
unsigned block_size = dm_bm_block_size ( bm ) / sizeof ( __le64 ) ;
/*
* We can ' t use a validator here - it may be all zeroes .
*/
r = dm_bm_read_lock ( bm , THIN_SUPERBLOCK_LOCATION , NULL , & b ) ;
if ( r )
return r ;
data_le = dm_block_data ( b ) ;
* result = 1 ;
for ( i = 0 ; i < block_size ; i + + ) {
if ( data_le [ i ] ! = zero ) {
* result = 0 ;
break ;
}
}
2015-10-22 23:46:59 +03:00
dm_bm_unlock ( b ) ;
return 0 ;
2011-11-01 00:21:18 +04:00
}
2012-07-27 18:08:08 +04:00
static void __setup_btree_details ( struct dm_pool_metadata * pmd )
{
pmd - > info . tm = pmd - > tm ;
pmd - > info . levels = 2 ;
pmd - > info . value_type . context = pmd - > data_sm ;
pmd - > info . value_type . size = sizeof ( __le64 ) ;
pmd - > info . value_type . inc = data_block_inc ;
pmd - > info . value_type . dec = data_block_dec ;
pmd - > info . value_type . equal = data_block_equal ;
memcpy ( & pmd - > nb_info , & pmd - > info , sizeof ( pmd - > nb_info ) ) ;
pmd - > nb_info . tm = pmd - > nb_tm ;
pmd - > tl_info . tm = pmd - > tm ;
pmd - > tl_info . levels = 1 ;
2012-12-22 00:23:32 +04:00
pmd - > tl_info . value_type . context = & pmd - > bl_info ;
2012-07-27 18:08:08 +04:00
pmd - > tl_info . value_type . size = sizeof ( __le64 ) ;
pmd - > tl_info . value_type . inc = subtree_inc ;
pmd - > tl_info . value_type . dec = subtree_dec ;
pmd - > tl_info . value_type . equal = subtree_equal ;
pmd - > bl_info . tm = pmd - > tm ;
pmd - > bl_info . levels = 1 ;
pmd - > bl_info . value_type . context = pmd - > data_sm ;
pmd - > bl_info . value_type . size = sizeof ( __le64 ) ;
pmd - > bl_info . value_type . inc = data_block_inc ;
pmd - > bl_info . value_type . dec = data_block_dec ;
pmd - > bl_info . value_type . equal = data_block_equal ;
pmd - > details_info . tm = pmd - > tm ;
pmd - > details_info . levels = 1 ;
pmd - > details_info . value_type . context = NULL ;
pmd - > details_info . value_type . size = sizeof ( struct disk_device_details ) ;
pmd - > details_info . value_type . inc = NULL ;
pmd - > details_info . value_type . dec = NULL ;
pmd - > details_info . value_type . equal = NULL ;
}
2014-03-27 18:13:23 +04:00
static int save_sm_roots ( struct dm_pool_metadata * pmd )
{
int r ;
size_t len ;
r = dm_sm_root_size ( pmd - > metadata_sm , & len ) ;
if ( r < 0 )
return r ;
r = dm_sm_copy_root ( pmd - > metadata_sm , & pmd - > metadata_space_map_root , len ) ;
if ( r < 0 )
return r ;
r = dm_sm_root_size ( pmd - > data_sm , & len ) ;
if ( r < 0 )
return r ;
return dm_sm_copy_root ( pmd - > data_sm , & pmd - > data_space_map_root , len ) ;
}
static void copy_sm_roots ( struct dm_pool_metadata * pmd ,
struct thin_disk_superblock * disk )
{
memcpy ( & disk - > metadata_space_map_root ,
& pmd - > metadata_space_map_root ,
sizeof ( pmd - > metadata_space_map_root ) ) ;
memcpy ( & disk - > data_space_map_root ,
& pmd - > data_space_map_root ,
sizeof ( pmd - > data_space_map_root ) ) ;
}
2012-07-27 18:08:10 +04:00
static int __write_initial_superblock ( struct dm_pool_metadata * pmd )
{
int r ;
struct dm_block * sblock ;
struct thin_disk_superblock * disk_super ;
2021-10-18 13:11:05 +03:00
sector_t bdev_size = bdev_nr_sectors ( pmd - > bdev ) ;
2012-07-27 18:08:10 +04:00
if ( bdev_size > THIN_METADATA_MAX_SECTORS )
bdev_size = THIN_METADATA_MAX_SECTORS ;
2014-03-27 18:13:23 +04:00
r = dm_sm_commit ( pmd - > data_sm ) ;
2012-07-27 18:08:11 +04:00
if ( r < 0 )
return r ;
2017-05-15 16:43:05 +03:00
r = dm_tm_pre_commit ( pmd - > tm ) ;
2012-07-27 18:08:11 +04:00
if ( r < 0 )
return r ;
2017-05-15 16:43:05 +03:00
r = save_sm_roots ( pmd ) ;
2012-07-27 18:08:11 +04:00
if ( r < 0 )
return r ;
2012-07-27 18:08:10 +04:00
r = superblock_lock_zero ( pmd , & sblock ) ;
if ( r )
return r ;
disk_super = dm_block_data ( sblock ) ;
2012-07-27 18:08:11 +04:00
disk_super - > flags = 0 ;
2012-07-27 18:08:11 +04:00
memset ( disk_super - > uuid , 0 , sizeof ( disk_super - > uuid ) ) ;
2012-07-27 18:08:10 +04:00
disk_super - > magic = cpu_to_le64 ( THIN_SUPERBLOCK_MAGIC ) ;
disk_super - > version = cpu_to_le32 ( THIN_VERSION ) ;
disk_super - > time = 0 ;
2012-07-27 18:08:11 +04:00
disk_super - > trans_id = 0 ;
disk_super - > held_root = 0 ;
2014-03-27 18:13:23 +04:00
copy_sm_roots ( pmd , disk_super ) ;
2012-07-27 18:08:11 +04:00
disk_super - > data_mapping_root = cpu_to_le64 ( pmd - > root ) ;
disk_super - > device_details_root = cpu_to_le64 ( pmd - > details_root ) ;
2014-02-13 08:58:15 +04:00
disk_super - > metadata_block_size = cpu_to_le32 ( THIN_METADATA_BLOCK_SIZE ) ;
2012-07-27 18:08:10 +04:00
disk_super - > metadata_nr_blocks = cpu_to_le64 ( bdev_size > > SECTOR_TO_BLOCK_SHIFT ) ;
disk_super - > data_block_size = cpu_to_le32 ( pmd - > data_block_size ) ;
2012-07-27 18:08:11 +04:00
return dm_tm_commit ( pmd - > tm , sblock ) ;
2012-07-27 18:08:10 +04:00
}
2012-07-27 18:08:12 +04:00
static int __format_metadata ( struct dm_pool_metadata * pmd )
2011-11-01 00:21:18 +04:00
{
int r ;
2012-07-27 18:08:09 +04:00
2012-07-27 18:08:12 +04:00
r = dm_tm_create_with_sm ( pmd - > bm , THIN_SUPERBLOCK_LOCATION ,
& pmd - > tm , & pmd - > metadata_sm ) ;
if ( r < 0 ) {
DMERR ( " tm_create_with_sm failed " ) ;
return r ;
}
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:12 +04:00
pmd - > data_sm = dm_sm_disk_create ( pmd - > tm , 0 ) ;
2012-07-27 18:08:12 +04:00
if ( IS_ERR ( pmd - > data_sm ) ) {
DMERR ( " sm_disk_create failed " ) ;
r = PTR_ERR ( pmd - > data_sm ) ;
2012-07-27 18:08:14 +04:00
goto bad_cleanup_tm ;
2011-11-01 00:21:18 +04:00
}
2012-07-27 18:08:12 +04:00
pmd - > nb_tm = dm_tm_create_non_blocking_clone ( pmd - > tm ) ;
2011-11-01 00:21:18 +04:00
if ( ! pmd - > nb_tm ) {
2012-07-27 18:08:14 +04:00
DMERR ( " could not create non-blocking clone tm " ) ;
2011-11-01 00:21:18 +04:00
r = - ENOMEM ;
2012-07-27 18:08:14 +04:00
goto bad_cleanup_data_sm ;
2011-11-01 00:21:18 +04:00
}
2012-07-27 18:08:08 +04:00
__setup_btree_details ( pmd ) ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:10 +04:00
r = dm_btree_empty ( & pmd - > info , & pmd - > root ) ;
if ( r < 0 )
2012-07-27 18:08:14 +04:00
goto bad_cleanup_nb_tm ;
2012-07-27 18:08:10 +04:00
r = dm_btree_empty ( & pmd - > details_info , & pmd - > details_root ) ;
if ( r < 0 ) {
DMERR ( " couldn't create devices root " ) ;
2012-07-27 18:08:14 +04:00
goto bad_cleanup_nb_tm ;
2012-07-27 18:08:10 +04:00
}
r = __write_initial_superblock ( pmd ) ;
if ( r )
2012-07-27 18:08:14 +04:00
goto bad_cleanup_nb_tm ;
2012-07-27 18:08:10 +04:00
2011-11-01 00:21:18 +04:00
return 0 ;
2012-07-27 18:08:14 +04:00
bad_cleanup_nb_tm :
dm_tm_destroy ( pmd - > nb_tm ) ;
bad_cleanup_data_sm :
2012-07-27 18:08:12 +04:00
dm_sm_destroy ( pmd - > data_sm ) ;
2012-07-27 18:08:14 +04:00
bad_cleanup_tm :
2012-07-27 18:08:12 +04:00
dm_tm_destroy ( pmd - > tm ) ;
dm_sm_destroy ( pmd - > metadata_sm ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2012-07-27 18:08:13 +04:00
static int __check_incompat_features ( struct thin_disk_superblock * disk_super ,
struct dm_pool_metadata * pmd )
{
uint32_t features ;
features = le32_to_cpu ( disk_super - > incompat_flags ) & ~ THIN_FEATURE_INCOMPAT_SUPP ;
if ( features ) {
DMERR ( " could not access metadata due to unsupported optional features (%lx). " ,
( unsigned long ) features ) ;
return - EINVAL ;
}
/*
* Check for read - only metadata to skip the following RDWR checks .
*/
2021-01-09 13:42:49 +03:00
if ( bdev_read_only ( pmd - > bdev ) )
2012-07-27 18:08:13 +04:00
return 0 ;
features = le32_to_cpu ( disk_super - > compat_ro_flags ) & ~ THIN_FEATURE_COMPAT_RO_SUPP ;
if ( features ) {
DMERR ( " could not access metadata RDWR due to unsupported optional features (%lx). " ,
( unsigned long ) features ) ;
return - EINVAL ;
}
return 0 ;
}
2012-07-27 18:08:12 +04:00
static int __open_metadata ( struct dm_pool_metadata * pmd )
{
int r ;
struct dm_block * sblock ;
struct thin_disk_superblock * disk_super ;
r = dm_bm_read_lock ( pmd - > bm , THIN_SUPERBLOCK_LOCATION ,
& sb_validator , & sblock ) ;
if ( r < 0 ) {
DMERR ( " couldn't read superblock " ) ;
return r ;
}
disk_super = dm_block_data ( sblock ) ;
2012-07-27 18:08:13 +04:00
2014-07-15 00:35:54 +04:00
/* Verify the data block size hasn't changed */
if ( le32_to_cpu ( disk_super - > data_block_size ) ! = pmd - > data_block_size ) {
DMERR ( " changing the data block size (from %u to %llu) is not supported " ,
le32_to_cpu ( disk_super - > data_block_size ) ,
( unsigned long long ) pmd - > data_block_size ) ;
r = - EINVAL ;
goto bad_unlock_sblock ;
}
2012-07-27 18:08:13 +04:00
r = __check_incompat_features ( disk_super , pmd ) ;
2012-07-27 18:08:14 +04:00
if ( r < 0 )
goto bad_unlock_sblock ;
2012-07-27 18:08:13 +04:00
2012-07-27 18:08:12 +04:00
r = dm_tm_open_with_sm ( pmd - > bm , THIN_SUPERBLOCK_LOCATION ,
disk_super - > metadata_space_map_root ,
sizeof ( disk_super - > metadata_space_map_root ) ,
& pmd - > tm , & pmd - > metadata_sm ) ;
if ( r < 0 ) {
DMERR ( " tm_open_with_sm failed " ) ;
2012-07-27 18:08:14 +04:00
goto bad_unlock_sblock ;
2012-07-27 18:08:12 +04:00
}
pmd - > data_sm = dm_sm_disk_open ( pmd - > tm , disk_super - > data_space_map_root ,
sizeof ( disk_super - > data_space_map_root ) ) ;
if ( IS_ERR ( pmd - > data_sm ) ) {
DMERR ( " sm_disk_open failed " ) ;
r = PTR_ERR ( pmd - > data_sm ) ;
2012-07-27 18:08:14 +04:00
goto bad_cleanup_tm ;
2012-07-27 18:08:12 +04:00
}
pmd - > nb_tm = dm_tm_create_non_blocking_clone ( pmd - > tm ) ;
if ( ! pmd - > nb_tm ) {
2012-07-27 18:08:14 +04:00
DMERR ( " could not create non-blocking clone tm " ) ;
2012-07-27 18:08:12 +04:00
r = - ENOMEM ;
2012-07-27 18:08:14 +04:00
goto bad_cleanup_data_sm ;
2012-07-27 18:08:12 +04:00
}
dm thin: Use last transaction's pmd->root when commit failed
Recently we found a softlock up problem in dm thin pool btree lookup
code due to corrupted metadata:
Kernel panic - not syncing: softlockup: hung tasks
CPU: 7 PID: 2669225 Comm: kworker/u16:3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
Workqueue: dm-thin do_worker [dm_thin_pool]
Call Trace:
<IRQ>
dump_stack+0x9c/0xd3
panic+0x35d/0x6b9
watchdog_timer_fn.cold+0x16/0x25
__run_hrtimer+0xa2/0x2d0
</IRQ>
RIP: 0010:__relink_lru+0x102/0x220 [dm_bufio]
__bufio_new+0x11f/0x4f0 [dm_bufio]
new_read+0xa3/0x1e0 [dm_bufio]
dm_bm_read_lock+0x33/0xd0 [dm_persistent_data]
ro_step+0x63/0x100 [dm_persistent_data]
btree_lookup_raw.constprop.0+0x44/0x220 [dm_persistent_data]
dm_btree_lookup+0x16f/0x210 [dm_persistent_data]
dm_thin_find_block+0x12c/0x210 [dm_thin_pool]
__process_bio_read_only+0xc5/0x400 [dm_thin_pool]
process_thin_deferred_bios+0x1a4/0x4a0 [dm_thin_pool]
process_one_work+0x3c5/0x730
Following process may generate a broken btree mixed with fresh and
stale btree nodes, which could get dm thin trapped in an infinite loop
while looking up data block:
Transaction 1: pmd->root = A, A->B->C // One path in btree
pmd->root = X, X->Y->Z // Copy-up
Transaction 2: X,Z is updated on disk, Y write failed.
// Commit failed, dm thin becomes read-only.
process_bio_read_only
dm_thin_find_block
__find_block
dm_btree_lookup(pmd->root)
The pmd->root points to a broken btree, Y may contain stale node
pointing to any block, for example X, which gets dm thin trapped into
a dead loop while looking up Z.
Fix this by setting pmd->root in __open_metadata(), so that dm thin
will use the last transaction's pmd->root if commit failed.
Fetch a reproducer in [Link].
Linke: https://bugzilla.kernel.org/show_bug.cgi?id=216790
Cc: stable@vger.kernel.org
Fixes: 991d9fa02da0 ("dm: add thin provisioning target")
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
2022-12-08 17:28:02 +03:00
/*
* For pool metadata opening process , root setting is redundant
* because it will be set again in __begin_transaction ( ) . But dm
* pool aborting process really needs to get last transaction ' s
* root to avoid accessing broken btree .
*/
pmd - > root = le64_to_cpu ( disk_super - > data_mapping_root ) ;
pmd - > details_root = le64_to_cpu ( disk_super - > device_details_root ) ;
2012-07-27 18:08:12 +04:00
__setup_btree_details ( pmd ) ;
2015-10-22 23:46:59 +03:00
dm_bm_unlock ( sblock ) ;
return 0 ;
2012-07-27 18:08:12 +04:00
2012-07-27 18:08:14 +04:00
bad_cleanup_data_sm :
2012-07-27 18:08:12 +04:00
dm_sm_destroy ( pmd - > data_sm ) ;
2012-07-27 18:08:14 +04:00
bad_cleanup_tm :
2012-07-27 18:08:12 +04:00
dm_tm_destroy ( pmd - > tm ) ;
dm_sm_destroy ( pmd - > metadata_sm ) ;
2012-07-27 18:08:14 +04:00
bad_unlock_sblock :
dm_bm_unlock ( sblock ) ;
2012-07-27 18:08:12 +04:00
return r ;
}
2012-07-27 18:08:14 +04:00
static int __open_or_format_metadata ( struct dm_pool_metadata * pmd , bool format_device )
2012-07-27 18:08:12 +04:00
{
2012-07-27 18:08:13 +04:00
int r , unformatted ;
2012-07-27 18:08:13 +04:00
2012-07-27 18:08:13 +04:00
r = __superblock_all_zeroes ( pmd - > bm , & unformatted ) ;
2012-07-27 18:08:13 +04:00
if ( r )
return r ;
2012-07-27 18:08:13 +04:00
if ( unformatted )
2012-07-27 18:08:14 +04:00
return format_device ? __format_metadata ( pmd ) : - EPERM ;
return __open_metadata ( pmd ) ;
2012-07-27 18:08:12 +04:00
}
2012-07-27 18:08:14 +04:00
static int __create_persistent_data_objects ( struct dm_pool_metadata * pmd , bool format_device )
2012-07-27 18:08:10 +04:00
{
int r ;
2014-02-13 08:58:15 +04:00
pmd - > bm = dm_block_manager_create ( pmd - > bdev , THIN_METADATA_BLOCK_SIZE < < SECTOR_SHIFT ,
2012-07-27 18:08:10 +04:00
THIN_MAX_CONCURRENT_LOCKS ) ;
if ( IS_ERR ( pmd - > bm ) ) {
DMERR ( " could not create block manager " ) ;
2020-09-01 09:25:43 +03:00
r = PTR_ERR ( pmd - > bm ) ;
pmd - > bm = NULL ;
return r ;
2012-07-27 18:08:10 +04:00
}
2012-07-27 18:08:14 +04:00
r = __open_or_format_metadata ( pmd , format_device ) ;
2020-09-01 09:25:43 +03:00
if ( r ) {
2012-07-27 18:08:10 +04:00
dm_block_manager_destroy ( pmd - > bm ) ;
2020-09-01 09:25:43 +03:00
pmd - > bm = NULL ;
}
2012-07-27 18:08:10 +04:00
return r ;
}
2022-11-30 16:31:34 +03:00
static void __destroy_persistent_data_objects ( struct dm_pool_metadata * pmd ,
bool destroy_bm )
2012-07-27 18:08:10 +04:00
{
dm_sm_destroy ( pmd - > data_sm ) ;
dm_sm_destroy ( pmd - > metadata_sm ) ;
dm_tm_destroy ( pmd - > nb_tm ) ;
dm_tm_destroy ( pmd - > tm ) ;
2022-11-30 16:31:34 +03:00
if ( destroy_bm )
dm_block_manager_destroy ( pmd - > bm ) ;
2012-07-27 18:08:10 +04:00
}
2011-11-01 00:21:18 +04:00
static int __begin_transaction ( struct dm_pool_metadata * pmd )
{
int r ;
struct thin_disk_superblock * disk_super ;
struct dm_block * sblock ;
/*
* We re - read the superblock every time . Shouldn ' t need to do this
* really .
*/
r = dm_bm_read_lock ( pmd - > bm , THIN_SUPERBLOCK_LOCATION ,
& sb_validator , & sblock ) ;
if ( r )
return r ;
disk_super = dm_block_data ( sblock ) ;
pmd - > time = le32_to_cpu ( disk_super - > time ) ;
pmd - > root = le64_to_cpu ( disk_super - > data_mapping_root ) ;
pmd - > details_root = le64_to_cpu ( disk_super - > device_details_root ) ;
pmd - > trans_id = le64_to_cpu ( disk_super - > trans_id ) ;
pmd - > flags = le32_to_cpu ( disk_super - > flags ) ;
pmd - > data_block_size = le32_to_cpu ( disk_super - > data_block_size ) ;
dm_bm_unlock ( sblock ) ;
2012-07-27 18:08:13 +04:00
return 0 ;
2011-11-01 00:21:18 +04:00
}
static int __write_changed_details ( struct dm_pool_metadata * pmd )
{
int r ;
struct dm_thin_device * td , * tmp ;
struct disk_device_details details ;
uint64_t key ;
list_for_each_entry_safe ( td , tmp , & pmd - > thin_devices , list ) {
if ( ! td - > changed )
continue ;
key = td - > id ;
details . mapped_blocks = cpu_to_le64 ( td - > mapped_blocks ) ;
details . transaction_id = cpu_to_le64 ( td - > transaction_id ) ;
details . creation_time = cpu_to_le32 ( td - > creation_time ) ;
details . snapshotted_time = cpu_to_le32 ( td - > snapshotted_time ) ;
__dm_bless_for_disk ( & details ) ;
r = dm_btree_insert ( & pmd - > details_info , pmd - > details_root ,
& key , & details , & pmd - > details_root ) ;
if ( r )
return r ;
if ( td - > open_count )
2019-12-24 09:38:03 +03:00
td - > changed = false ;
2011-11-01 00:21:18 +04:00
else {
list_del ( & td - > list ) ;
kfree ( td ) ;
}
}
return 0 ;
}
static int __commit_transaction ( struct dm_pool_metadata * pmd )
{
int r ;
struct thin_disk_superblock * disk_super ;
struct dm_block * sblock ;
/*
* We need to know if the thin_disk_superblock exceeds a 512 - byte sector .
*/
BUILD_BUG_ON ( sizeof ( struct thin_disk_superblock ) > 512 ) ;
2020-01-13 19:18:51 +03:00
BUG_ON ( ! rwsem_is_locked ( & pmd - > root_lock ) ) ;
2011-11-01 00:21:18 +04:00
2019-04-18 17:29:48 +03:00
if ( unlikely ( ! pmd - > in_service ) )
return 0 ;
2019-12-04 17:07:41 +03:00
if ( pmd - > pre_commit_fn ) {
r = pmd - > pre_commit_fn ( pmd - > pre_commit_context ) ;
if ( r < 0 ) {
DMERR ( " pre-commit callback failed " ) ;
return r ;
}
}
2011-11-01 00:21:18 +04:00
r = __write_changed_details ( pmd ) ;
if ( r < 0 )
2012-07-27 18:07:58 +04:00
return r ;
2011-11-01 00:21:18 +04:00
r = dm_sm_commit ( pmd - > data_sm ) ;
if ( r < 0 )
2012-07-27 18:07:58 +04:00
return r ;
2011-11-01 00:21:18 +04:00
r = dm_tm_pre_commit ( pmd - > tm ) ;
if ( r < 0 )
2012-07-27 18:07:58 +04:00
return r ;
2011-11-01 00:21:18 +04:00
2014-03-27 18:13:23 +04:00
r = save_sm_roots ( pmd ) ;
if ( r < 0 )
return r ;
2012-07-27 18:08:09 +04:00
r = superblock_lock ( pmd , & sblock ) ;
2011-11-01 00:21:18 +04:00
if ( r )
2012-07-27 18:07:58 +04:00
return r ;
2011-11-01 00:21:18 +04:00
disk_super = dm_block_data ( sblock ) ;
disk_super - > time = cpu_to_le32 ( pmd - > time ) ;
disk_super - > data_mapping_root = cpu_to_le64 ( pmd - > root ) ;
disk_super - > device_details_root = cpu_to_le64 ( pmd - > details_root ) ;
disk_super - > trans_id = cpu_to_le64 ( pmd - > trans_id ) ;
disk_super - > flags = cpu_to_le32 ( pmd - > flags ) ;
2014-03-27 18:13:23 +04:00
copy_sm_roots ( pmd , disk_super ) ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:08 +04:00
return dm_tm_commit ( pmd - > tm , sblock ) ;
2011-11-01 00:21:18 +04:00
}
2018-09-10 18:50:09 +03:00
static void __set_metadata_reserve ( struct dm_pool_metadata * pmd )
{
int r ;
dm_block_t total ;
dm_block_t max_blocks = 4096 ; /* 16M */
r = dm_sm_get_nr_blocks ( pmd - > metadata_sm , & total ) ;
if ( r ) {
DMERR ( " could not get size of metadata device " ) ;
pmd - > metadata_reserve = max_blocks ;
2018-09-14 04:16:20 +03:00
} else
pmd - > metadata_reserve = min ( max_blocks , div_u64 ( total , 10 ) ) ;
2018-09-10 18:50:09 +03:00
}
2011-11-01 00:21:18 +04:00
struct dm_pool_metadata * dm_pool_metadata_open ( struct block_device * bdev ,
2012-07-27 18:08:14 +04:00
sector_t data_block_size ,
bool format_device )
2011-11-01 00:21:18 +04:00
{
int r ;
struct dm_pool_metadata * pmd ;
pmd = kmalloc ( sizeof ( * pmd ) , GFP_KERNEL ) ;
if ( ! pmd ) {
DMERR ( " could not allocate metadata struct " ) ;
return ERR_PTR ( - ENOMEM ) ;
}
2012-07-27 18:08:10 +04:00
init_rwsem ( & pmd - > root_lock ) ;
pmd - > time = 0 ;
INIT_LIST_HEAD ( & pmd - > thin_devices ) ;
2012-07-27 18:08:15 +04:00
pmd - > fail_io = false ;
2019-04-18 17:29:48 +03:00
pmd - > in_service = false ;
2012-07-27 18:08:10 +04:00
pmd - > bdev = bdev ;
2012-07-27 18:08:10 +04:00
pmd - > data_block_size = data_block_size ;
2019-12-04 17:07:41 +03:00
pmd - > pre_commit_fn = NULL ;
pmd - > pre_commit_context = NULL ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:14 +04:00
r = __create_persistent_data_objects ( pmd , format_device ) ;
2011-11-01 00:21:18 +04:00
if ( r ) {
kfree ( pmd ) ;
return ERR_PTR ( r ) ;
}
2012-07-27 18:08:11 +04:00
r = __begin_transaction ( pmd ) ;
if ( r < 0 ) {
if ( dm_pool_metadata_close ( pmd ) < 0 )
DMWARN ( " %s: dm_pool_metadata_close() failed. " , __func__ ) ;
return ERR_PTR ( r ) ;
2011-11-01 00:21:18 +04:00
}
2018-09-10 18:50:09 +03:00
__set_metadata_reserve ( pmd ) ;
2011-11-01 00:21:18 +04:00
return pmd ;
}
int dm_pool_metadata_close ( struct dm_pool_metadata * pmd )
{
int r ;
unsigned open_devices = 0 ;
struct dm_thin_device * td , * tmp ;
down_read ( & pmd - > root_lock ) ;
list_for_each_entry_safe ( td , tmp , & pmd - > thin_devices , list ) {
if ( td - > open_count )
open_devices + + ;
else {
list_del ( & td - > list ) ;
kfree ( td ) ;
}
}
up_read ( & pmd - > root_lock ) ;
if ( open_devices ) {
DMERR ( " attempt to close pmd when %u device(s) are still open " ,
open_devices ) ;
return - EBUSY ;
}
2020-01-13 19:18:51 +03:00
pmd_write_lock_in_core ( pmd ) ;
2020-09-01 09:25:44 +03:00
if ( ! pmd - > fail_io & & ! dm_bm_is_read_only ( pmd - > bm ) ) {
2012-07-27 18:08:15 +04:00
r = __commit_transaction ( pmd ) ;
if ( r < 0 )
DMWARN ( " %s: __commit_transaction() failed, error = %d " ,
__func__ , r ) ;
}
2020-02-23 22:54:58 +03:00
pmd_write_unlock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
2022-11-30 16:31:34 +03:00
__destroy_persistent_data_objects ( pmd , true ) ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:15 +04:00
kfree ( pmd ) ;
2011-11-01 00:21:18 +04:00
return 0 ;
}
2012-03-07 23:09:41 +04:00
/*
* __open_device : Returns @ td corresponding to device with id @ dev ,
* creating it if @ create is set and incrementing @ td - > open_count .
* On failure , @ td is undefined .
*/
2011-11-01 00:21:18 +04:00
static int __open_device ( struct dm_pool_metadata * pmd ,
dm_thin_id dev , int create ,
struct dm_thin_device * * td )
{
int r , changed = 0 ;
struct dm_thin_device * td2 ;
uint64_t key = dev ;
struct disk_device_details details_le ;
/*
2012-03-07 23:09:41 +04:00
* If the device is already open , return it .
2011-11-01 00:21:18 +04:00
*/
list_for_each_entry ( td2 , & pmd - > thin_devices , list )
if ( td2 - > id = = dev ) {
2012-03-07 23:09:41 +04:00
/*
* May not create an already - open device .
*/
if ( create )
return - EEXIST ;
2011-11-01 00:21:18 +04:00
td2 - > open_count + + ;
* td = td2 ;
return 0 ;
}
/*
* Check the device exists .
*/
r = dm_btree_lookup ( & pmd - > details_info , pmd - > details_root ,
& key , & details_le ) ;
if ( r ) {
if ( r ! = - ENODATA | | ! create )
return r ;
2012-03-07 23:09:41 +04:00
/*
* Create new device .
*/
2011-11-01 00:21:18 +04:00
changed = 1 ;
details_le . mapped_blocks = 0 ;
details_le . transaction_id = cpu_to_le64 ( pmd - > trans_id ) ;
details_le . creation_time = cpu_to_le32 ( pmd - > time ) ;
details_le . snapshotted_time = cpu_to_le32 ( pmd - > time ) ;
}
* td = kmalloc ( sizeof ( * * td ) , GFP_NOIO ) ;
if ( ! * td )
return - ENOMEM ;
( * td ) - > pmd = pmd ;
( * td ) - > id = dev ;
( * td ) - > open_count = 1 ;
( * td ) - > changed = changed ;
2012-07-27 18:08:15 +04:00
( * td ) - > aborted_with_changes = false ;
2011-11-01 00:21:18 +04:00
( * td ) - > mapped_blocks = le64_to_cpu ( details_le . mapped_blocks ) ;
( * td ) - > transaction_id = le64_to_cpu ( details_le . transaction_id ) ;
( * td ) - > creation_time = le32_to_cpu ( details_le . creation_time ) ;
( * td ) - > snapshotted_time = le32_to_cpu ( details_le . snapshotted_time ) ;
list_add ( & ( * td ) - > list , & pmd - > thin_devices ) ;
return 0 ;
}
static void __close_device ( struct dm_thin_device * td )
{
- - td - > open_count ;
}
static int __create_thin ( struct dm_pool_metadata * pmd ,
dm_thin_id dev )
{
int r ;
dm_block_t dev_root ;
uint64_t key = dev ;
struct dm_thin_device * td ;
__le64 value ;
r = dm_btree_lookup ( & pmd - > details_info , pmd - > details_root ,
2020-09-15 11:56:08 +03:00
& key , NULL ) ;
2011-11-01 00:21:18 +04:00
if ( ! r )
return - EEXIST ;
/*
* Create an empty btree for the mappings .
*/
r = dm_btree_empty ( & pmd - > bl_info , & dev_root ) ;
if ( r )
return r ;
/*
* Insert it into the main mapping tree .
*/
value = cpu_to_le64 ( dev_root ) ;
__dm_bless_for_disk ( & value ) ;
r = dm_btree_insert ( & pmd - > tl_info , pmd - > root , & key , & value , & pmd - > root ) ;
if ( r ) {
dm_btree_del ( & pmd - > bl_info , dev_root ) ;
return r ;
}
r = __open_device ( pmd , dev , 1 , & td ) ;
if ( r ) {
dm_btree_remove ( & pmd - > tl_info , pmd - > root , & key , & pmd - > root ) ;
dm_btree_del ( & pmd - > bl_info , dev_root ) ;
return r ;
}
__close_device ( td ) ;
return r ;
}
int dm_pool_create_thin ( struct dm_pool_metadata * pmd , dm_thin_id dev )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __create_thin ( pmd , dev ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
static int __set_snapshot_details ( struct dm_pool_metadata * pmd ,
struct dm_thin_device * snap ,
dm_thin_id origin , uint32_t time )
{
int r ;
struct dm_thin_device * td ;
r = __open_device ( pmd , origin , 0 , & td ) ;
if ( r )
return r ;
2019-12-24 09:38:03 +03:00
td - > changed = true ;
2011-11-01 00:21:18 +04:00
td - > snapshotted_time = time ;
snap - > mapped_blocks = td - > mapped_blocks ;
snap - > snapshotted_time = time ;
__close_device ( td ) ;
return 0 ;
}
static int __create_snap ( struct dm_pool_metadata * pmd ,
dm_thin_id dev , dm_thin_id origin )
{
int r ;
dm_block_t origin_root ;
uint64_t key = origin , dev_key = dev ;
struct dm_thin_device * td ;
__le64 value ;
/* check this device is unused */
r = dm_btree_lookup ( & pmd - > details_info , pmd - > details_root ,
2020-09-15 11:56:08 +03:00
& dev_key , NULL ) ;
2011-11-01 00:21:18 +04:00
if ( ! r )
return - EEXIST ;
/* find the mapping tree for the origin */
r = dm_btree_lookup ( & pmd - > tl_info , pmd - > root , & key , & value ) ;
if ( r )
return r ;
origin_root = le64_to_cpu ( value ) ;
/* clone the origin, an inc will do */
dm_tm_inc ( pmd - > tm , origin_root ) ;
/* insert into the main mapping tree */
value = cpu_to_le64 ( origin_root ) ;
__dm_bless_for_disk ( & value ) ;
key = dev ;
r = dm_btree_insert ( & pmd - > tl_info , pmd - > root , & key , & value , & pmd - > root ) ;
if ( r ) {
dm_tm_dec ( pmd - > tm , origin_root ) ;
return r ;
}
pmd - > time + + ;
r = __open_device ( pmd , dev , 1 , & td ) ;
if ( r )
goto bad ;
r = __set_snapshot_details ( pmd , td , origin , pmd - > time ) ;
2012-03-07 23:09:41 +04:00
__close_device ( td ) ;
2011-11-01 00:21:18 +04:00
if ( r )
goto bad ;
return 0 ;
bad :
dm_btree_remove ( & pmd - > tl_info , pmd - > root , & key , & pmd - > root ) ;
dm_btree_remove ( & pmd - > details_info , pmd - > details_root ,
& key , & pmd - > details_root ) ;
return r ;
}
int dm_pool_create_snap ( struct dm_pool_metadata * pmd ,
dm_thin_id dev ,
dm_thin_id origin )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __create_snap ( pmd , dev , origin ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
static int __delete_device ( struct dm_pool_metadata * pmd , dm_thin_id dev )
{
int r ;
uint64_t key = dev ;
struct dm_thin_device * td ;
/* TODO: failure should mark the transaction invalid */
r = __open_device ( pmd , dev , 0 , & td ) ;
if ( r )
return r ;
if ( td - > open_count > 1 ) {
__close_device ( td ) ;
return - EBUSY ;
}
list_del ( & td - > list ) ;
kfree ( td ) ;
r = dm_btree_remove ( & pmd - > details_info , pmd - > details_root ,
& key , & pmd - > details_root ) ;
if ( r )
return r ;
r = dm_btree_remove ( & pmd - > tl_info , pmd - > root , & key , & pmd - > root ) ;
if ( r )
return r ;
return 0 ;
}
int dm_pool_delete_thin_device ( struct dm_pool_metadata * pmd ,
dm_thin_id dev )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __delete_device ( pmd , dev ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
int dm_pool_set_metadata_transaction_id ( struct dm_pool_metadata * pmd ,
uint64_t current_id ,
uint64_t new_id )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( pmd - > fail_io )
goto out ;
2011-11-01 00:21:18 +04:00
if ( pmd - > trans_id ! = current_id ) {
DMERR ( " mismatched transaction id " ) ;
2012-07-27 18:08:15 +04:00
goto out ;
2011-11-01 00:21:18 +04:00
}
pmd - > trans_id = new_id ;
2012-07-27 18:08:15 +04:00
r = 0 ;
out :
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:15 +04:00
return r ;
2011-11-01 00:21:18 +04:00
}
int dm_pool_get_metadata_transaction_id ( struct dm_pool_metadata * pmd ,
uint64_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io ) {
* result = pmd - > trans_id ;
r = 0 ;
}
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
return r ;
2011-11-01 00:21:18 +04:00
}
2012-06-03 03:30:01 +04:00
static int __reserve_metadata_snap ( struct dm_pool_metadata * pmd )
{
int r , inc ;
struct thin_disk_superblock * disk_super ;
struct dm_block * copy , * sblock ;
dm_block_t held_root ;
2015-12-09 19:23:24 +03:00
/*
* We commit to ensure the btree roots which we increment in a
* moment are up to date .
*/
2019-04-15 23:40:08 +03:00
r = __commit_transaction ( pmd ) ;
if ( r < 0 ) {
DMWARN ( " %s: __commit_transaction() failed, error = %d " ,
__func__ , r ) ;
return r ;
}
2015-12-09 19:23:24 +03:00
2012-06-03 03:30:01 +04:00
/*
* Copy the superblock .
*/
dm_sm_inc_block ( pmd - > metadata_sm , THIN_SUPERBLOCK_LOCATION ) ;
r = dm_tm_shadow_block ( pmd - > tm , THIN_SUPERBLOCK_LOCATION ,
& sb_validator , & copy , & inc ) ;
if ( r )
return r ;
BUG_ON ( ! inc ) ;
held_root = dm_block_location ( copy ) ;
disk_super = dm_block_data ( copy ) ;
if ( le64_to_cpu ( disk_super - > held_root ) ) {
DMWARN ( " Pool metadata snapshot already exists: release this before taking another. " ) ;
dm_tm_dec ( pmd - > tm , held_root ) ;
dm_tm_unlock ( pmd - > tm , copy ) ;
return - EBUSY ;
}
/*
* Wipe the spacemap since we ' re not publishing this .
*/
memset ( & disk_super - > data_space_map_root , 0 ,
sizeof ( disk_super - > data_space_map_root ) ) ;
memset ( & disk_super - > metadata_space_map_root , 0 ,
sizeof ( disk_super - > metadata_space_map_root ) ) ;
/*
* Increment the data structures that need to be preserved .
*/
dm_tm_inc ( pmd - > tm , le64_to_cpu ( disk_super - > data_mapping_root ) ) ;
dm_tm_inc ( pmd - > tm , le64_to_cpu ( disk_super - > device_details_root ) ) ;
dm_tm_unlock ( pmd - > tm , copy ) ;
/*
* Write the held root into the superblock .
*/
2012-07-27 18:08:09 +04:00
r = superblock_lock ( pmd , & sblock ) ;
2012-06-03 03:30:01 +04:00
if ( r ) {
dm_tm_dec ( pmd - > tm , held_root ) ;
return r ;
}
disk_super = dm_block_data ( sblock ) ;
disk_super - > held_root = cpu_to_le64 ( held_root ) ;
dm_bm_unlock ( sblock ) ;
return 0 ;
}
int dm_pool_reserve_metadata_snap ( struct dm_pool_metadata * pmd )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2012-06-03 03:30:01 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __reserve_metadata_snap ( pmd ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2012-06-03 03:30:01 +04:00
return r ;
}
static int __release_metadata_snap ( struct dm_pool_metadata * pmd )
2011-11-01 00:21:18 +04:00
{
int r ;
struct thin_disk_superblock * disk_super ;
2012-06-03 03:30:01 +04:00
struct dm_block * sblock , * copy ;
dm_block_t held_root ;
2011-11-01 00:21:18 +04:00
2012-07-27 18:08:09 +04:00
r = superblock_lock ( pmd , & sblock ) ;
2011-11-01 00:21:18 +04:00
if ( r )
return r ;
2012-06-03 03:30:01 +04:00
disk_super = dm_block_data ( sblock ) ;
held_root = le64_to_cpu ( disk_super - > held_root ) ;
disk_super - > held_root = cpu_to_le64 ( 0 ) ;
dm_bm_unlock ( sblock ) ;
if ( ! held_root ) {
DMWARN ( " No pool metadata snapshot found: nothing to release. " ) ;
return - EINVAL ;
}
r = dm_tm_read_lock ( pmd - > tm , held_root , & sb_validator , & copy ) ;
if ( r )
return r ;
disk_super = dm_block_data ( copy ) ;
2015-08-12 17:10:21 +03:00
dm_btree_del ( & pmd - > info , le64_to_cpu ( disk_super - > data_mapping_root ) ) ;
dm_btree_del ( & pmd - > details_info , le64_to_cpu ( disk_super - > device_details_root ) ) ;
2012-06-03 03:30:01 +04:00
dm_sm_dec_block ( pmd - > metadata_sm , held_root ) ;
2015-10-22 23:46:59 +03:00
dm_tm_unlock ( pmd - > tm , copy ) ;
return 0 ;
2012-06-03 03:30:01 +04:00
}
int dm_pool_release_metadata_snap ( struct dm_pool_metadata * pmd )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2012-06-03 03:30:01 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __release_metadata_snap ( pmd ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2012-06-03 03:30:01 +04:00
return r ;
}
static int __get_metadata_snap ( struct dm_pool_metadata * pmd ,
dm_block_t * result )
{
int r ;
struct thin_disk_superblock * disk_super ;
struct dm_block * sblock ;
r = dm_bm_read_lock ( pmd - > bm , THIN_SUPERBLOCK_LOCATION ,
& sb_validator , & sblock ) ;
if ( r )
return r ;
2011-11-01 00:21:18 +04:00
disk_super = dm_block_data ( sblock ) ;
* result = le64_to_cpu ( disk_super - > held_root ) ;
2015-10-22 23:46:59 +03:00
dm_bm_unlock ( sblock ) ;
return 0 ;
2011-11-01 00:21:18 +04:00
}
2012-06-03 03:30:01 +04:00
int dm_pool_get_metadata_snap ( struct dm_pool_metadata * pmd ,
dm_block_t * result )
2011-11-01 00:21:18 +04:00
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __get_metadata_snap ( pmd , result ) ;
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
return r ;
}
int dm_pool_open_thin_device ( struct dm_pool_metadata * pmd , dm_thin_id dev ,
struct dm_thin_device * * td )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock_in_core ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __open_device ( pmd , dev , 0 , td ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
int dm_pool_close_thin_device ( struct dm_thin_device * td )
{
2019-04-15 23:54:36 +03:00
pmd_write_lock_in_core ( td - > pmd ) ;
2011-11-01 00:21:18 +04:00
__close_device ( td ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( td - > pmd ) ;
2011-11-01 00:21:18 +04:00
return 0 ;
}
dm_thin_id dm_thin_dev_id ( struct dm_thin_device * td )
{
return td - > id ;
}
2013-12-17 21:09:40 +04:00
/*
* Check whether @ time ( of block creation ) is older than @ td ' s last snapshot .
* If so then the associated block is shared with the last snapshot device .
* Any block on a device created * after * the device last got snapshotted is
* necessarily not shared .
*/
2012-07-27 18:07:57 +04:00
static bool __snapshotted_since ( struct dm_thin_device * td , uint32_t time )
2011-11-01 00:21:18 +04:00
{
return td - > snapshotted_time > time ;
}
2015-11-05 18:10:11 +03:00
static void unpack_lookup_result ( struct dm_thin_device * td , __le64 value ,
struct dm_thin_lookup_result * result )
{
uint64_t block_time = 0 ;
dm_block_t exception_block ;
uint32_t exception_time ;
block_time = le64_to_cpu ( value ) ;
unpack_block_time ( block_time , & exception_block , & exception_time ) ;
result - > block = exception_block ;
result - > shared = __snapshotted_since ( td , exception_time ) ;
}
2015-12-07 17:48:04 +03:00
static int __find_block ( struct dm_thin_device * td , dm_block_t block ,
int can_issue_io , struct dm_thin_lookup_result * result )
2011-11-01 00:21:18 +04:00
{
2014-10-06 18:24:55 +04:00
int r ;
2011-11-01 00:21:18 +04:00
__le64 value ;
struct dm_pool_metadata * pmd = td - > pmd ;
dm_block_t keys [ 2 ] = { td - > id , block } ;
2012-07-27 18:08:15 +04:00
struct dm_btree_info * info ;
2011-11-01 00:21:18 +04:00
2014-10-06 18:24:55 +04:00
if ( can_issue_io ) {
info = & pmd - > info ;
} else
info = & pmd - > nb_info ;
2012-07-27 18:08:15 +04:00
2014-10-06 18:24:55 +04:00
r = dm_btree_lookup ( info , pmd - > root , keys , & value ) ;
2015-11-05 18:10:11 +03:00
if ( ! r )
unpack_lookup_result ( td , value , result ) ;
return r ;
}
2015-12-07 17:48:04 +03:00
int dm_thin_find_block ( struct dm_thin_device * td , dm_block_t block ,
int can_issue_io , struct dm_thin_lookup_result * result )
2015-11-05 18:10:11 +03:00
{
int r ;
struct dm_pool_metadata * pmd = td - > pmd ;
down_read ( & pmd - > root_lock ) ;
if ( pmd - > fail_io ) {
up_read ( & pmd - > root_lock ) ;
return - EINVAL ;
2011-11-01 00:21:18 +04:00
}
2015-12-07 17:48:04 +03:00
r = __find_block ( td , block , can_issue_io , result ) ;
up_read ( & pmd - > root_lock ) ;
return r ;
}
static int __find_next_mapped_block ( struct dm_thin_device * td , dm_block_t block ,
dm_block_t * vblock ,
struct dm_thin_lookup_result * result )
{
int r ;
__le64 value ;
struct dm_pool_metadata * pmd = td - > pmd ;
dm_block_t keys [ 2 ] = { td - > id , block } ;
2015-11-05 18:10:11 +03:00
r = dm_btree_lookup_next ( & pmd - > info , pmd - > root , keys , vblock , & value ) ;
if ( ! r )
unpack_lookup_result ( td , value , result ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2015-12-07 17:48:04 +03:00
static int __find_mapped_range ( struct dm_thin_device * td ,
dm_block_t begin , dm_block_t end ,
dm_block_t * thin_begin , dm_block_t * thin_end ,
dm_block_t * pool_begin , bool * maybe_shared )
2015-04-16 14:47:21 +03:00
{
int r ;
dm_block_t pool_end ;
struct dm_thin_lookup_result lookup ;
if ( end < begin )
return - ENODATA ;
2015-12-07 17:48:04 +03:00
r = __find_next_mapped_block ( td , begin , & begin , & lookup ) ;
2015-11-05 18:10:11 +03:00
if ( r )
return r ;
2015-04-16 14:47:21 +03:00
2015-11-05 18:10:11 +03:00
if ( begin > = end )
2015-04-16 14:47:21 +03:00
return - ENODATA ;
* thin_begin = begin ;
* pool_begin = lookup . block ;
* maybe_shared = lookup . shared ;
begin + + ;
pool_end = * pool_begin + 1 ;
while ( begin ! = end ) {
2015-12-07 17:48:04 +03:00
r = __find_block ( td , begin , true , & lookup ) ;
2015-04-16 14:47:21 +03:00
if ( r ) {
if ( r = = - ENODATA )
break ;
else
return r ;
}
if ( ( lookup . block ! = pool_end ) | |
( lookup . shared ! = * maybe_shared ) )
break ;
pool_end + + ;
begin + + ;
}
* thin_end = begin ;
return 0 ;
}
2015-12-07 17:48:04 +03:00
int dm_thin_find_mapped_range ( struct dm_thin_device * td ,
dm_block_t begin , dm_block_t end ,
dm_block_t * thin_begin , dm_block_t * thin_end ,
dm_block_t * pool_begin , bool * maybe_shared )
{
int r = - EINVAL ;
struct dm_pool_metadata * pmd = td - > pmd ;
down_read ( & pmd - > root_lock ) ;
if ( ! pmd - > fail_io ) {
r = __find_mapped_range ( td , begin , end , thin_begin , thin_end ,
pool_begin , maybe_shared ) ;
}
up_read ( & pmd - > root_lock ) ;
return r ;
}
2011-11-01 00:21:18 +04:00
static int __insert ( struct dm_thin_device * td , dm_block_t block ,
dm_block_t data_block )
{
int r , inserted ;
__le64 value ;
struct dm_pool_metadata * pmd = td - > pmd ;
dm_block_t keys [ 2 ] = { td - > id , block } ;
value = cpu_to_le64 ( pack_block_time ( data_block , pmd - > time ) ) ;
__dm_bless_for_disk ( & value ) ;
r = dm_btree_insert_notify ( & pmd - > info , pmd - > root , keys , & value ,
& pmd - > root , & inserted ) ;
if ( r )
return r ;
2019-12-24 09:38:03 +03:00
td - > changed = true ;
2012-07-27 18:08:14 +04:00
if ( inserted )
2011-11-01 00:21:18 +04:00
td - > mapped_blocks + + ;
return 0 ;
}
int dm_thin_insert_block ( struct dm_thin_device * td , dm_block_t block ,
dm_block_t data_block )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( td - > pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! td - > pmd - > fail_io )
r = __insert ( td , block , data_block ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( td - > pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2015-04-13 11:45:25 +03:00
static int __remove_range ( struct dm_thin_device * td , dm_block_t begin , dm_block_t end )
{
int r ;
2015-12-02 15:24:39 +03:00
unsigned count , total_count = 0 ;
2015-04-13 11:45:25 +03:00
struct dm_pool_metadata * pmd = td - > pmd ;
dm_block_t keys [ 1 ] = { td - > id } ;
__le64 value ;
dm_block_t mapping_root ;
/*
* Find the mapping tree
*/
r = dm_btree_lookup ( & pmd - > tl_info , pmd - > root , keys , & value ) ;
if ( r )
return r ;
/*
* Remove from the mapping tree , taking care to inc the
* ref count so it doesn ' t get deleted .
*/
mapping_root = le64_to_cpu ( value ) ;
dm_tm_inc ( pmd - > tm , mapping_root ) ;
r = dm_btree_remove ( & pmd - > tl_info , pmd - > root , keys , & pmd - > root ) ;
if ( r )
return r ;
2015-12-02 15:24:39 +03:00
/*
* Remove leaves stops at the first unmapped entry , so we have to
* loop round finding mapped ranges .
*/
while ( begin < end ) {
r = dm_btree_lookup_next ( & pmd - > bl_info , mapping_root , & begin , & begin , & value ) ;
if ( r = = - ENODATA )
break ;
if ( r )
return r ;
if ( begin > = end )
break ;
r = dm_btree_remove_leaves ( & pmd - > bl_info , mapping_root , & begin , end , & mapping_root , & count ) ;
if ( r )
return r ;
total_count + = count ;
}
2015-04-13 11:45:25 +03:00
2015-12-02 15:24:39 +03:00
td - > mapped_blocks - = total_count ;
2019-12-24 09:38:03 +03:00
td - > changed = true ;
2015-04-13 11:45:25 +03:00
/*
* Reinsert the mapping tree .
*/
value = cpu_to_le64 ( mapping_root ) ;
__dm_bless_for_disk ( & value ) ;
return dm_btree_insert ( & pmd - > tl_info , pmd - > root , keys , & value , & pmd - > root ) ;
}
int dm_thin_remove_range ( struct dm_thin_device * td ,
dm_block_t begin , dm_block_t end )
{
int r = - EINVAL ;
2019-04-15 23:54:36 +03:00
pmd_write_lock ( td - > pmd ) ;
2015-04-13 11:45:25 +03:00
if ( ! td - > pmd - > fail_io )
r = __remove_range ( td , begin , end ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( td - > pmd ) ;
2015-04-13 11:45:25 +03:00
return r ;
}
2019-01-15 21:27:01 +03:00
int dm_pool_block_is_shared ( struct dm_pool_metadata * pmd , dm_block_t b , bool * result )
2013-12-17 21:09:40 +04:00
{
int r ;
uint32_t ref_count ;
down_read ( & pmd - > root_lock ) ;
r = dm_sm_get_count ( pmd - > data_sm , b , & ref_count ) ;
if ( ! r )
2019-01-15 21:27:01 +03:00
* result = ( ref_count > 1 ) ;
2013-12-17 21:09:40 +04:00
up_read ( & pmd - > root_lock ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2016-07-01 16:00:02 +03:00
int dm_pool_inc_data_range ( struct dm_pool_metadata * pmd , dm_block_t b , dm_block_t e )
{
int r = 0 ;
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2021-04-13 13:03:45 +03:00
r = dm_sm_inc_blocks ( pmd - > data_sm , b , e ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2016-07-01 16:00:02 +03:00
return r ;
}
int dm_pool_dec_data_range ( struct dm_pool_metadata * pmd , dm_block_t b , dm_block_t e )
{
int r = 0 ;
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2021-04-13 13:03:45 +03:00
r = dm_sm_dec_blocks ( pmd - > data_sm , b , e ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2016-07-01 16:00:02 +03:00
return r ;
}
2012-07-27 18:08:14 +04:00
bool dm_thin_changed_this_transaction ( struct dm_thin_device * td )
{
int r ;
down_read ( & td - > pmd - > root_lock ) ;
r = td - > changed ;
up_read ( & td - > pmd - > root_lock ) ;
return r ;
}
2014-02-06 15:08:56 +04:00
bool dm_pool_changed_this_transaction ( struct dm_pool_metadata * pmd )
{
bool r = false ;
struct dm_thin_device * td , * tmp ;
down_read ( & pmd - > root_lock ) ;
list_for_each_entry_safe ( td , tmp , & pmd - > thin_devices , list ) {
if ( td - > changed ) {
r = td - > changed ;
break ;
}
}
up_read ( & pmd - > root_lock ) ;
return r ;
}
2012-07-27 18:08:15 +04:00
bool dm_thin_aborted_changes ( struct dm_thin_device * td )
{
bool r ;
down_read ( & td - > pmd - > root_lock ) ;
r = td - > aborted_with_changes ;
up_read ( & td - > pmd - > root_lock ) ;
return r ;
}
2011-11-01 00:21:18 +04:00
int dm_pool_alloc_data_block ( struct dm_pool_metadata * pmd , dm_block_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = dm_sm_new_block ( pmd - > data_sm , result ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
int dm_pool_commit_metadata ( struct dm_pool_metadata * pmd )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-18 17:29:48 +03:00
/*
* Care is taken to not have commit be what
* triggers putting the thin - pool in - service .
*/
2020-01-13 19:18:51 +03:00
pmd_write_lock_in_core ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( pmd - > fail_io )
goto out ;
2011-11-01 00:21:18 +04:00
r = __commit_transaction ( pmd ) ;
2019-04-18 17:29:48 +03:00
if ( r < 0 )
2011-11-01 00:21:18 +04:00
goto out ;
/*
* Open the next transaction .
*/
r = __begin_transaction ( pmd ) ;
out :
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2012-07-27 18:08:15 +04:00
static void __set_abort_with_changes_flags ( struct dm_pool_metadata * pmd )
{
struct dm_thin_device * td ;
list_for_each_entry ( td , & pmd - > thin_devices , list )
td - > aborted_with_changes = td - > changed ;
}
int dm_pool_abort_metadata ( struct dm_pool_metadata * pmd )
{
int r = - EINVAL ;
2022-11-30 16:31:34 +03:00
struct dm_block_manager * old_bm = NULL , * new_bm = NULL ;
/* fail_io is double-checked with pmd->root_lock held below */
if ( unlikely ( pmd - > fail_io ) )
return r ;
/*
* Replacement block manager ( new_bm ) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result ( due to life - cycle of
* shrinker associated with the block manager ' s bufio client vs pmd root_lock ) .
* - must take shrinker_rwsem without holding pmd - > root_lock
*/
new_bm = dm_block_manager_create ( pmd - > bdev , THIN_METADATA_BLOCK_SIZE < < SECTOR_SHIFT ,
THIN_MAX_CONCURRENT_LOCKS ) ;
2012-07-27 18:08:15 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2022-11-30 16:31:34 +03:00
if ( pmd - > fail_io ) {
pmd_write_unlock ( pmd ) ;
2012-07-27 18:08:15 +04:00
goto out ;
2022-11-30 16:31:34 +03:00
}
2012-07-27 18:08:15 +04:00
__set_abort_with_changes_flags ( pmd ) ;
2022-11-30 16:31:34 +03:00
__destroy_persistent_data_objects ( pmd , false ) ;
old_bm = pmd - > bm ;
if ( IS_ERR ( new_bm ) ) {
DMERR ( " could not create block manager during abort " ) ;
pmd - > bm = NULL ;
r = PTR_ERR ( new_bm ) ;
goto out_unlock ;
}
pmd - > bm = new_bm ;
r = __open_or_format_metadata ( pmd , false ) ;
if ( r ) {
pmd - > bm = NULL ;
goto out_unlock ;
}
new_bm = NULL ;
out_unlock :
2012-07-27 18:08:15 +04:00
if ( r )
pmd - > fail_io = true ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2022-11-30 16:31:34 +03:00
dm_block_manager_destroy ( old_bm ) ;
out :
if ( new_bm & & ! IS_ERR ( new_bm ) )
dm_block_manager_destroy ( new_bm ) ;
2012-07-27 18:08:15 +04:00
return r ;
}
2011-11-01 00:21:18 +04:00
int dm_pool_get_free_block_count ( struct dm_pool_metadata * pmd , dm_block_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = dm_sm_get_nr_free ( pmd - > data_sm , result ) ;
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
return r ;
}
int dm_pool_get_free_metadata_block_count ( struct dm_pool_metadata * pmd ,
dm_block_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = dm_sm_get_nr_free ( pmd - > metadata_sm , result ) ;
2018-09-10 18:50:09 +03:00
if ( ! r ) {
if ( * result < pmd - > metadata_reserve )
* result = 0 ;
else
* result - = pmd - > metadata_reserve ;
}
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
return r ;
}
int dm_pool_get_metadata_dev_size ( struct dm_pool_metadata * pmd ,
dm_block_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = dm_sm_get_nr_blocks ( pmd - > metadata_sm , result ) ;
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
return r ;
}
int dm_pool_get_data_dev_size ( struct dm_pool_metadata * pmd , dm_block_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = dm_sm_get_nr_blocks ( pmd - > data_sm , result ) ;
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
return r ;
}
int dm_thin_get_mapped_count ( struct dm_thin_device * td , dm_block_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
struct dm_pool_metadata * pmd = td - > pmd ;
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io ) {
* result = td - > mapped_blocks ;
r = 0 ;
}
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
return r ;
2011-11-01 00:21:18 +04:00
}
static int __highest_block ( struct dm_thin_device * td , dm_block_t * result )
{
int r ;
__le64 value_le ;
dm_block_t thin_root ;
struct dm_pool_metadata * pmd = td - > pmd ;
r = dm_btree_lookup ( & pmd - > tl_info , pmd - > root , & td - > id , & value_le ) ;
if ( r )
return r ;
thin_root = le64_to_cpu ( value_le ) ;
return dm_btree_find_highest_key ( & pmd - > bl_info , thin_root , result ) ;
}
int dm_thin_get_highest_mapped_block ( struct dm_thin_device * td ,
dm_block_t * result )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
struct dm_pool_metadata * pmd = td - > pmd ;
down_read ( & pmd - > root_lock ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
r = __highest_block ( td , result ) ;
2011-11-01 00:21:18 +04:00
up_read ( & pmd - > root_lock ) ;
return r ;
}
2013-05-10 17:37:18 +04:00
static int __resize_space_map ( struct dm_space_map * sm , dm_block_t new_count )
2011-11-01 00:21:18 +04:00
{
int r ;
dm_block_t old_count ;
2013-05-10 17:37:18 +04:00
r = dm_sm_get_nr_blocks ( sm , & old_count ) ;
2011-11-01 00:21:18 +04:00
if ( r )
return r ;
if ( new_count = = old_count )
return 0 ;
if ( new_count < old_count ) {
2013-05-10 17:37:18 +04:00
DMERR ( " cannot reduce size of space map " ) ;
2011-11-01 00:21:18 +04:00
return - EINVAL ;
}
2013-05-10 17:37:18 +04:00
return dm_sm_extend ( sm , new_count - old_count ) ;
2011-11-01 00:21:18 +04:00
}
int dm_pool_resize_data_dev ( struct dm_pool_metadata * pmd , dm_block_t new_count )
{
2012-07-27 18:08:15 +04:00
int r = - EINVAL ;
2011-11-01 00:21:18 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2012-07-27 18:08:15 +04:00
if ( ! pmd - > fail_io )
2013-05-10 17:37:18 +04:00
r = __resize_space_map ( pmd - > data_sm , new_count ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2011-11-01 00:21:18 +04:00
return r ;
}
2012-07-27 18:08:15 +04:00
2013-05-10 17:37:19 +04:00
int dm_pool_resize_metadata_dev ( struct dm_pool_metadata * pmd , dm_block_t new_count )
{
int r = - EINVAL ;
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2018-09-10 18:50:09 +03:00
if ( ! pmd - > fail_io ) {
2013-05-10 17:37:19 +04:00
r = __resize_space_map ( pmd - > metadata_sm , new_count ) ;
2018-09-10 18:50:09 +03:00
if ( ! r )
__set_metadata_reserve ( pmd ) ;
}
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2013-05-10 17:37:19 +04:00
return r ;
}
2012-07-27 18:08:15 +04:00
void dm_pool_metadata_read_only ( struct dm_pool_metadata * pmd )
{
2019-04-15 23:54:36 +03:00
pmd_write_lock_in_core ( pmd ) ;
2012-07-27 18:08:15 +04:00
dm_bm_set_read_only ( pmd - > bm ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2012-07-27 18:08:15 +04:00
}
2013-05-10 17:37:21 +04:00
2013-12-05 01:58:19 +04:00
void dm_pool_metadata_read_write ( struct dm_pool_metadata * pmd )
{
2019-04-15 23:54:36 +03:00
pmd_write_lock_in_core ( pmd ) ;
2013-12-05 01:58:19 +04:00
dm_bm_set_read_write ( pmd - > bm ) ;
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2013-12-05 01:58:19 +04:00
}
2013-05-10 17:37:21 +04:00
int dm_pool_register_metadata_threshold ( struct dm_pool_metadata * pmd ,
dm_block_t threshold ,
dm_sm_threshold_fn fn ,
void * context )
{
2022-07-14 14:28:25 +03:00
int r = - EINVAL ;
2013-05-10 17:37:21 +04:00
2019-04-15 23:54:36 +03:00
pmd_write_lock_in_core ( pmd ) ;
2022-07-14 14:28:25 +03:00
if ( ! pmd - > fail_io ) {
r = dm_sm_register_threshold_callback ( pmd - > metadata_sm ,
threshold , fn , context ) ;
}
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2013-05-10 17:37:21 +04:00
return r ;
}
2014-02-14 20:58:41 +04:00
2019-12-04 17:07:41 +03:00
void dm_pool_register_pre_commit_callback ( struct dm_pool_metadata * pmd ,
dm_pool_pre_commit_fn fn ,
void * context )
{
pmd_write_lock_in_core ( pmd ) ;
pmd - > pre_commit_fn = fn ;
pmd - > pre_commit_context = context ;
pmd_write_unlock ( pmd ) ;
}
2014-02-14 20:58:41 +04:00
int dm_pool_metadata_set_needs_check ( struct dm_pool_metadata * pmd )
{
2019-07-02 22:50:08 +03:00
int r = - EINVAL ;
2014-02-14 20:58:41 +04:00
struct dm_block * sblock ;
struct thin_disk_superblock * disk_super ;
2019-04-15 23:54:36 +03:00
pmd_write_lock ( pmd ) ;
2019-07-02 22:50:08 +03:00
if ( pmd - > fail_io )
goto out ;
2014-02-14 20:58:41 +04:00
pmd - > flags | = THIN_METADATA_NEEDS_CHECK_FLAG ;
r = superblock_lock ( pmd , & sblock ) ;
if ( r ) {
2019-07-02 22:50:08 +03:00
DMERR ( " couldn't lock superblock " ) ;
2014-02-14 20:58:41 +04:00
goto out ;
}
disk_super = dm_block_data ( sblock ) ;
disk_super - > flags = cpu_to_le32 ( pmd - > flags ) ;
dm_bm_unlock ( sblock ) ;
out :
2019-04-15 23:54:36 +03:00
pmd_write_unlock ( pmd ) ;
2014-02-14 20:58:41 +04:00
return r ;
}
bool dm_pool_metadata_needs_check ( struct dm_pool_metadata * pmd )
{
bool needs_check ;
down_read ( & pmd - > root_lock ) ;
needs_check = pmd - > flags & THIN_METADATA_NEEDS_CHECK_FLAG ;
up_read ( & pmd - > root_lock ) ;
return needs_check ;
}
2014-10-06 18:28:30 +04:00
void dm_pool_issue_prefetches ( struct dm_pool_metadata * pmd )
{
2016-03-01 13:58:44 +03:00
down_read ( & pmd - > root_lock ) ;
if ( ! pmd - > fail_io )
dm_tm_issue_prefetches ( pmd - > tm ) ;
up_read ( & pmd - > root_lock ) ;
2014-10-06 18:28:30 +04:00
}