2019-08-03 01:02:50 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2017-06-07 09:55:39 +03:00
/*
* Copyright ( C ) 2017 Western Digital Corporation or its affiliates .
*
* This file is released under the GPL .
*/
# include "dm-zoned.h"
# include <linux/module.h>
# define DM_MSG_PREFIX "zoned"
# define DMZ_MIN_BIOS 8192
2020-05-11 11:24:30 +03:00
# define DMZ_MAX_DEVS 2
2017-06-07 09:55:39 +03:00
/*
* Zone BIO context .
*/
struct dmz_bioctx {
2020-05-11 11:24:25 +03:00
struct dmz_dev * dev ;
2017-06-07 09:55:39 +03:00
struct dm_zone * zone ;
struct bio * bio ;
2018-08-23 20:35:57 +03:00
refcount_t ref ;
2017-06-07 09:55:39 +03:00
} ;
/*
* Chunk work descriptor .
*/
struct dm_chunk_work {
struct work_struct work ;
2018-08-23 20:35:57 +03:00
refcount_t refcount ;
2017-06-07 09:55:39 +03:00
struct dmz_target * target ;
unsigned int chunk ;
struct bio_list bio_list ;
} ;
/*
* Target descriptor .
*/
struct dmz_target {
2020-05-11 11:24:30 +03:00
struct dm_dev * ddev [ DMZ_MAX_DEVS ] ;
2017-06-07 09:55:39 +03:00
unsigned long flags ;
/* Zoned block device information */
struct dmz_dev * dev ;
/* For metadata handling */
struct dmz_metadata * metadata ;
/* For reclaim */
struct dmz_reclaim * reclaim ;
/* For chunk work */
struct radix_tree_root chunk_rxtree ;
struct workqueue_struct * chunk_wq ;
2018-05-23 01:26:20 +03:00
struct mutex chunk_lock ;
2017-06-07 09:55:39 +03:00
/* For cloned BIOs to zones */
2018-05-21 01:25:53 +03:00
struct bio_set bio_set ;
2017-06-07 09:55:39 +03:00
/* For flush */
spinlock_t flush_lock ;
struct bio_list flush_list ;
struct delayed_work flush_work ;
struct workqueue_struct * flush_wq ;
} ;
/*
* Flush intervals ( seconds ) .
*/
# define DMZ_FLUSH_PERIOD (10 * HZ)
/*
* Target BIO completion .
*/
static inline void dmz_bio_endio ( struct bio * bio , blk_status_t status )
{
2020-05-11 11:24:25 +03:00
struct dmz_bioctx * bioctx =
dm_per_bio_data ( bio , sizeof ( struct dmz_bioctx ) ) ;
2017-06-07 09:55:39 +03:00
2018-11-30 09:31:48 +03:00
if ( status ! = BLK_STS_OK & & bio - > bi_status = = BLK_STS_OK )
bio - > bi_status = status ;
2020-05-11 11:24:30 +03:00
if ( bioctx - > dev & & bio - > bi_status ! = BLK_STS_OK )
2020-05-11 11:24:25 +03:00
bioctx - > dev - > flags | = DMZ_CHECK_BDEV ;
2018-11-30 09:31:48 +03:00
if ( refcount_dec_and_test ( & bioctx - > ref ) ) {
struct dm_zone * zone = bioctx - > zone ;
if ( zone ) {
if ( bio - > bi_status ! = BLK_STS_OK & &
bio_op ( bio ) = = REQ_OP_WRITE & &
dmz_is_seq ( zone ) )
set_bit ( DMZ_SEQ_WRITE_ERR , & zone - > flags ) ;
dmz_deactivate_zone ( zone ) ;
}
bio_endio ( bio ) ;
}
2017-06-07 09:55:39 +03:00
}
/*
2018-11-30 09:31:48 +03:00
* Completion callback for an internally cloned target BIO . This terminates the
2017-06-07 09:55:39 +03:00
* target BIO when there are no more references to its context .
*/
2018-11-30 09:31:48 +03:00
static void dmz_clone_endio ( struct bio * clone )
2017-06-07 09:55:39 +03:00
{
2018-11-30 09:31:48 +03:00
struct dmz_bioctx * bioctx = clone - > bi_private ;
blk_status_t status = clone - > bi_status ;
2017-06-07 09:55:39 +03:00
2018-11-30 09:31:48 +03:00
bio_put ( clone ) ;
2017-06-07 09:55:39 +03:00
dmz_bio_endio ( bioctx - > bio , status ) ;
}
/*
2018-11-30 09:31:48 +03:00
* Issue a clone of a target BIO . The clone may only partially process the
2017-06-07 09:55:39 +03:00
* original target BIO .
*/
2018-11-30 09:31:48 +03:00
static int dmz_submit_bio ( struct dmz_target * dmz , struct dm_zone * zone ,
struct bio * bio , sector_t chunk_block ,
unsigned int nr_blocks )
2017-06-07 09:55:39 +03:00
{
2020-05-11 11:24:25 +03:00
struct dmz_bioctx * bioctx =
dm_per_bio_data ( bio , sizeof ( struct dmz_bioctx ) ) ;
struct dmz_dev * dev = dmz_zone_to_dev ( dmz - > metadata , zone ) ;
2017-06-07 09:55:39 +03:00
struct bio * clone ;
2020-05-11 11:24:25 +03:00
if ( dev - > flags & DMZ_BDEV_DYING )
return - EIO ;
2018-05-21 01:25:53 +03:00
clone = bio_clone_fast ( bio , GFP_NOIO , & dmz - > bio_set ) ;
2017-06-07 09:55:39 +03:00
if ( ! clone )
return - ENOMEM ;
2020-05-11 11:24:25 +03:00
bio_set_dev ( clone , dev - > bdev ) ;
bioctx - > dev = dev ;
2018-11-30 09:31:48 +03:00
clone - > bi_iter . bi_sector =
dmz_start_sect ( dmz - > metadata , zone ) + dmz_blk2sect ( chunk_block ) ;
2017-06-07 09:55:39 +03:00
clone - > bi_iter . bi_size = dmz_blk2sect ( nr_blocks ) < < SECTOR_SHIFT ;
2018-11-30 09:31:48 +03:00
clone - > bi_end_io = dmz_clone_endio ;
2017-06-07 09:55:39 +03:00
clone - > bi_private = bioctx ;
bio_advance ( bio , clone - > bi_iter . bi_size ) ;
2018-08-23 20:35:57 +03:00
refcount_inc ( & bioctx - > ref ) ;
2017-06-07 09:55:39 +03:00
generic_make_request ( clone ) ;
2018-11-30 09:31:48 +03:00
if ( bio_op ( bio ) = = REQ_OP_WRITE & & dmz_is_seq ( zone ) )
zone - > wp_block + = nr_blocks ;
2017-06-07 09:55:39 +03:00
return 0 ;
}
/*
* Zero out pages of discarded blocks accessed by a read BIO .
*/
static void dmz_handle_read_zero ( struct dmz_target * dmz , struct bio * bio ,
sector_t chunk_block , unsigned int nr_blocks )
{
unsigned int size = nr_blocks < < DMZ_BLOCK_SHIFT ;
/* Clear nr_blocks */
swap ( bio - > bi_iter . bi_size , size ) ;
zero_fill_bio ( bio ) ;
swap ( bio - > bi_iter . bi_size , size ) ;
bio_advance ( bio , size ) ;
}
/*
* Process a read BIO .
*/
static int dmz_handle_read ( struct dmz_target * dmz , struct dm_zone * zone ,
struct bio * bio )
{
2020-05-11 11:24:21 +03:00
struct dmz_metadata * zmd = dmz - > metadata ;
sector_t chunk_block = dmz_chunk_block ( zmd , dmz_bio_block ( bio ) ) ;
2017-06-07 09:55:39 +03:00
unsigned int nr_blocks = dmz_bio_blocks ( bio ) ;
sector_t end_block = chunk_block + nr_blocks ;
struct dm_zone * rzone , * bzone ;
int ret ;
/* Read into unmapped chunks need only zeroing the BIO buffer */
if ( ! zone ) {
zero_fill_bio ( bio ) ;
return 0 ;
}
2020-05-11 11:24:22 +03:00
DMDEBUG ( " (%s): READ chunk %llu -> %s zone %u, block %llu, %u blocks " ,
dmz_metadata_label ( zmd ) ,
( unsigned long long ) dmz_bio_chunk ( zmd , bio ) ,
( dmz_is_rnd ( zone ) ? " RND " : " SEQ " ) ,
zone - > id ,
( unsigned long long ) chunk_block , nr_blocks ) ;
2017-06-07 09:55:39 +03:00
/* Check block validity to determine the read location */
bzone = zone - > bzone ;
while ( chunk_block < end_block ) {
nr_blocks = 0 ;
if ( dmz_is_rnd ( zone ) | | chunk_block < zone - > wp_block ) {
/* Test block validity in the data zone */
2020-05-11 11:24:21 +03:00
ret = dmz_block_valid ( zmd , zone , chunk_block ) ;
2017-06-07 09:55:39 +03:00
if ( ret < 0 )
return ret ;
if ( ret > 0 ) {
/* Read data zone blocks */
nr_blocks = ret ;
rzone = zone ;
}
}
/*
* No valid blocks found in the data zone .
* Check the buffer zone , if there is one .
*/
if ( ! nr_blocks & & bzone ) {
2020-05-11 11:24:21 +03:00
ret = dmz_block_valid ( zmd , bzone , chunk_block ) ;
2017-06-07 09:55:39 +03:00
if ( ret < 0 )
return ret ;
if ( ret > 0 ) {
/* Read buffer zone blocks */
nr_blocks = ret ;
rzone = bzone ;
}
}
if ( nr_blocks ) {
/* Valid blocks found: read them */
2020-05-11 11:24:25 +03:00
nr_blocks = min_t ( unsigned int , nr_blocks ,
end_block - chunk_block ) ;
ret = dmz_submit_bio ( dmz , rzone , bio ,
chunk_block , nr_blocks ) ;
2017-06-07 09:55:39 +03:00
if ( ret )
return ret ;
chunk_block + = nr_blocks ;
} else {
/* No valid block: zeroout the current BIO block */
dmz_handle_read_zero ( dmz , bio , chunk_block , 1 ) ;
chunk_block + + ;
}
}
return 0 ;
}
/*
* Write blocks directly in a data zone , at the write pointer .
* If a buffer zone is assigned , invalidate the blocks written
* in place .
*/
static int dmz_handle_direct_write ( struct dmz_target * dmz ,
struct dm_zone * zone , struct bio * bio ,
sector_t chunk_block ,
unsigned int nr_blocks )
{
struct dmz_metadata * zmd = dmz - > metadata ;
struct dm_zone * bzone = zone - > bzone ;
int ret ;
if ( dmz_is_readonly ( zone ) )
return - EROFS ;
/* Submit write */
2018-11-30 09:31:48 +03:00
ret = dmz_submit_bio ( dmz , zone , bio , chunk_block , nr_blocks ) ;
if ( ret )
return ret ;
2017-06-07 09:55:39 +03:00
/*
* Validate the blocks in the data zone and invalidate
* in the buffer zone , if there is one .
*/
ret = dmz_validate_blocks ( zmd , zone , chunk_block , nr_blocks ) ;
if ( ret = = 0 & & bzone )
ret = dmz_invalidate_blocks ( zmd , bzone , chunk_block , nr_blocks ) ;
return ret ;
}
/*
* Write blocks in the buffer zone of @ zone .
* If no buffer zone is assigned yet , get one .
* Called with @ zone write locked .
*/
static int dmz_handle_buffered_write ( struct dmz_target * dmz ,
struct dm_zone * zone , struct bio * bio ,
sector_t chunk_block ,
unsigned int nr_blocks )
{
struct dmz_metadata * zmd = dmz - > metadata ;
struct dm_zone * bzone ;
int ret ;
/* Get the buffer zone. One will be allocated if needed */
bzone = dmz_get_chunk_buffer ( zmd , zone ) ;
2019-08-11 00:43:11 +03:00
if ( IS_ERR ( bzone ) )
return PTR_ERR ( bzone ) ;
2017-06-07 09:55:39 +03:00
if ( dmz_is_readonly ( bzone ) )
return - EROFS ;
/* Submit write */
2018-11-30 09:31:48 +03:00
ret = dmz_submit_bio ( dmz , bzone , bio , chunk_block , nr_blocks ) ;
if ( ret )
return ret ;
2017-06-07 09:55:39 +03:00
/*
* Validate the blocks in the buffer zone
* and invalidate in the data zone .
*/
ret = dmz_validate_blocks ( zmd , bzone , chunk_block , nr_blocks ) ;
if ( ret = = 0 & & chunk_block < zone - > wp_block )
ret = dmz_invalidate_blocks ( zmd , zone , chunk_block , nr_blocks ) ;
return ret ;
}
/*
* Process a write BIO .
*/
static int dmz_handle_write ( struct dmz_target * dmz , struct dm_zone * zone ,
struct bio * bio )
{
2020-05-11 11:24:21 +03:00
struct dmz_metadata * zmd = dmz - > metadata ;
sector_t chunk_block = dmz_chunk_block ( zmd , dmz_bio_block ( bio ) ) ;
2017-06-07 09:55:39 +03:00
unsigned int nr_blocks = dmz_bio_blocks ( bio ) ;
if ( ! zone )
return - ENOSPC ;
2020-05-11 11:24:22 +03:00
DMDEBUG ( " (%s): WRITE chunk %llu -> %s zone %u, block %llu, %u blocks " ,
dmz_metadata_label ( zmd ) ,
( unsigned long long ) dmz_bio_chunk ( zmd , bio ) ,
( dmz_is_rnd ( zone ) ? " RND " : " SEQ " ) ,
zone - > id ,
( unsigned long long ) chunk_block , nr_blocks ) ;
2017-06-07 09:55:39 +03:00
if ( dmz_is_rnd ( zone ) | | chunk_block = = zone - > wp_block ) {
/*
* zone is a random zone or it is a sequential zone
* and the BIO is aligned to the zone write pointer :
* direct write the zone .
*/
2020-05-11 11:24:25 +03:00
return dmz_handle_direct_write ( dmz , zone , bio ,
chunk_block , nr_blocks ) ;
2017-06-07 09:55:39 +03:00
}
/*
* This is an unaligned write in a sequential zone :
* use buffered write .
*/
return dmz_handle_buffered_write ( dmz , zone , bio , chunk_block , nr_blocks ) ;
}
/*
* Process a discard BIO .
*/
static int dmz_handle_discard ( struct dmz_target * dmz , struct dm_zone * zone ,
struct bio * bio )
{
struct dmz_metadata * zmd = dmz - > metadata ;
sector_t block = dmz_bio_block ( bio ) ;
unsigned int nr_blocks = dmz_bio_blocks ( bio ) ;
2020-05-11 11:24:21 +03:00
sector_t chunk_block = dmz_chunk_block ( zmd , block ) ;
2017-06-07 09:55:39 +03:00
int ret = 0 ;
/* For unmapped chunks, there is nothing to do */
if ( ! zone )
return 0 ;
if ( dmz_is_readonly ( zone ) )
return - EROFS ;
2020-05-11 11:24:22 +03:00
DMDEBUG ( " (%s): DISCARD chunk %llu -> zone %u, block %llu, %u blocks " ,
dmz_metadata_label ( dmz - > metadata ) ,
( unsigned long long ) dmz_bio_chunk ( zmd , bio ) ,
zone - > id ,
( unsigned long long ) chunk_block , nr_blocks ) ;
2017-06-07 09:55:39 +03:00
/*
* Invalidate blocks in the data zone and its
* buffer zone if one is mapped .
*/
if ( dmz_is_rnd ( zone ) | | chunk_block < zone - > wp_block )
ret = dmz_invalidate_blocks ( zmd , zone , chunk_block , nr_blocks ) ;
if ( ret = = 0 & & zone - > bzone )
ret = dmz_invalidate_blocks ( zmd , zone - > bzone ,
chunk_block , nr_blocks ) ;
return ret ;
}
/*
* Process a BIO .
*/
static void dmz_handle_bio ( struct dmz_target * dmz , struct dm_chunk_work * cw ,
struct bio * bio )
{
2020-05-11 11:24:25 +03:00
struct dmz_bioctx * bioctx =
dm_per_bio_data ( bio , sizeof ( struct dmz_bioctx ) ) ;
2017-06-07 09:55:39 +03:00
struct dmz_metadata * zmd = dmz - > metadata ;
struct dm_zone * zone ;
int ret ;
/*
* Write may trigger a zone allocation . So make sure the
* allocation can succeed .
*/
if ( bio_op ( bio ) = = REQ_OP_WRITE )
dmz_schedule_reclaim ( dmz - > reclaim ) ;
dmz_lock_metadata ( zmd ) ;
/*
* Get the data zone mapping the chunk . There may be no
* mapping for read and discard . If a mapping is obtained ,
+ the zone returned will be set to active state .
*/
2020-05-11 11:24:21 +03:00
zone = dmz_get_chunk_mapping ( zmd , dmz_bio_chunk ( zmd , bio ) ,
2017-06-07 09:55:39 +03:00
bio_op ( bio ) ) ;
if ( IS_ERR ( zone ) ) {
ret = PTR_ERR ( zone ) ;
goto out ;
}
/* Process the BIO */
if ( zone ) {
dmz_activate_zone ( zone ) ;
bioctx - > zone = zone ;
}
switch ( bio_op ( bio ) ) {
case REQ_OP_READ :
ret = dmz_handle_read ( dmz , zone , bio ) ;
break ;
case REQ_OP_WRITE :
ret = dmz_handle_write ( dmz , zone , bio ) ;
break ;
case REQ_OP_DISCARD :
case REQ_OP_WRITE_ZEROES :
ret = dmz_handle_discard ( dmz , zone , bio ) ;
break ;
default :
2020-05-11 11:24:22 +03:00
DMERR ( " (%s): Unsupported BIO operation 0x%x " ,
dmz_metadata_label ( dmz - > metadata ) , bio_op ( bio ) ) ;
2017-06-07 09:55:39 +03:00
ret = - EIO ;
}
/*
* Release the chunk mapping . This will check that the mapping
* is still valid , that is , that the zone used still has valid blocks .
*/
if ( zone )
dmz_put_chunk_mapping ( zmd , zone ) ;
out :
dmz_bio_endio ( bio , errno_to_blk_status ( ret ) ) ;
dmz_unlock_metadata ( zmd ) ;
}
/*
* Increment a chunk reference counter .
*/
static inline void dmz_get_chunk_work ( struct dm_chunk_work * cw )
{
2018-08-23 20:35:57 +03:00
refcount_inc ( & cw - > refcount ) ;
2017-06-07 09:55:39 +03:00
}
/*
* Decrement a chunk work reference count and
* free it if it becomes 0.
*/
static void dmz_put_chunk_work ( struct dm_chunk_work * cw )
{
2018-08-23 20:35:57 +03:00
if ( refcount_dec_and_test ( & cw - > refcount ) ) {
2017-06-07 09:55:39 +03:00
WARN_ON ( ! bio_list_empty ( & cw - > bio_list ) ) ;
radix_tree_delete ( & cw - > target - > chunk_rxtree , cw - > chunk ) ;
kfree ( cw ) ;
}
}
/*
* Chunk BIO work function .
*/
static void dmz_chunk_work ( struct work_struct * work )
{
struct dm_chunk_work * cw = container_of ( work , struct dm_chunk_work , work ) ;
struct dmz_target * dmz = cw - > target ;
struct bio * bio ;
mutex_lock ( & dmz - > chunk_lock ) ;
/* Process the chunk BIOs */
while ( ( bio = bio_list_pop ( & cw - > bio_list ) ) ) {
mutex_unlock ( & dmz - > chunk_lock ) ;
dmz_handle_bio ( dmz , cw , bio ) ;
mutex_lock ( & dmz - > chunk_lock ) ;
dmz_put_chunk_work ( cw ) ;
}
/* Queueing the work incremented the work refcount */
dmz_put_chunk_work ( cw ) ;
mutex_unlock ( & dmz - > chunk_lock ) ;
}
/*
* Flush work .
*/
static void dmz_flush_work ( struct work_struct * work )
{
struct dmz_target * dmz = container_of ( work , struct dmz_target , flush_work . work ) ;
struct bio * bio ;
int ret ;
/* Flush dirty metadata blocks */
ret = dmz_flush_metadata ( dmz - > metadata ) ;
2019-08-11 00:43:11 +03:00
if ( ret )
2020-05-14 09:09:29 +03:00
DMDEBUG ( " (%s): Metadata flush failed, rc=%d " ,
2020-05-11 11:24:22 +03:00
dmz_metadata_label ( dmz - > metadata ) , ret ) ;
2017-06-07 09:55:39 +03:00
/* Process queued flush requests */
while ( 1 ) {
spin_lock ( & dmz - > flush_lock ) ;
bio = bio_list_pop ( & dmz - > flush_list ) ;
spin_unlock ( & dmz - > flush_lock ) ;
if ( ! bio )
break ;
dmz_bio_endio ( bio , errno_to_blk_status ( ret ) ) ;
}
queue_delayed_work ( dmz - > flush_wq , & dmz - > flush_work , DMZ_FLUSH_PERIOD ) ;
}
/*
* Get a chunk work and start it to process a new BIO .
* If the BIO chunk has no work yet , create one .
*/
2019-08-11 00:43:10 +03:00
static int dmz_queue_chunk_work ( struct dmz_target * dmz , struct bio * bio )
2017-06-07 09:55:39 +03:00
{
2020-05-11 11:24:21 +03:00
unsigned int chunk = dmz_bio_chunk ( dmz - > metadata , bio ) ;
2017-06-07 09:55:39 +03:00
struct dm_chunk_work * cw ;
2019-08-11 00:43:10 +03:00
int ret = 0 ;
2017-06-07 09:55:39 +03:00
mutex_lock ( & dmz - > chunk_lock ) ;
/* Get the BIO chunk work. If one is not active yet, create one */
cw = radix_tree_lookup ( & dmz - > chunk_rxtree , chunk ) ;
2020-02-27 03:18:52 +03:00
if ( cw ) {
dmz_get_chunk_work ( cw ) ;
} else {
2017-06-07 09:55:39 +03:00
/* Create a new chunk work */
2017-07-24 10:44:37 +03:00
cw = kmalloc ( sizeof ( struct dm_chunk_work ) , GFP_NOIO ) ;
2019-08-11 00:43:10 +03:00
if ( unlikely ( ! cw ) ) {
ret = - ENOMEM ;
2017-06-07 09:55:39 +03:00
goto out ;
2019-08-11 00:43:10 +03:00
}
2017-06-07 09:55:39 +03:00
INIT_WORK ( & cw - > work , dmz_chunk_work ) ;
2020-02-27 03:18:52 +03:00
refcount_set ( & cw - > refcount , 1 ) ;
2017-06-07 09:55:39 +03:00
cw - > target = dmz ;
cw - > chunk = chunk ;
bio_list_init ( & cw - > bio_list ) ;
ret = radix_tree_insert ( & dmz - > chunk_rxtree , chunk , cw ) ;
if ( unlikely ( ret ) ) {
kfree ( cw ) ;
goto out ;
}
}
bio_list_add ( & cw - > bio_list , bio ) ;
2019-08-11 00:43:10 +03:00
dmz_reclaim_bio_acc ( dmz - > reclaim ) ;
2017-06-07 09:55:39 +03:00
if ( queue_work ( dmz - > chunk_wq , & cw - > work ) )
dmz_get_chunk_work ( cw ) ;
out :
mutex_unlock ( & dmz - > chunk_lock ) ;
2019-08-11 00:43:10 +03:00
return ret ;
2017-06-07 09:55:39 +03:00
}
2019-08-11 00:43:11 +03:00
/*
2019-11-07 01:34:35 +03:00
* Check if the backing device is being removed . If it ' s on the way out ,
2019-08-11 00:43:11 +03:00
* start failing I / O . Reclaim and metadata components also call this
* function to cleanly abort operation in the event of such failure .
*/
bool dmz_bdev_is_dying ( struct dmz_dev * dmz_dev )
{
2019-11-07 01:34:35 +03:00
if ( dmz_dev - > flags & DMZ_BDEV_DYING )
return true ;
2019-08-11 00:43:11 +03:00
2019-11-07 01:34:35 +03:00
if ( dmz_dev - > flags & DMZ_CHECK_BDEV )
return ! dmz_check_bdev ( dmz_dev ) ;
if ( blk_queue_dying ( bdev_get_queue ( dmz_dev - > bdev ) ) ) {
dmz_dev_warn ( dmz_dev , " Backing device queue dying " ) ;
dmz_dev - > flags | = DMZ_BDEV_DYING ;
2019-08-11 00:43:11 +03:00
}
return dmz_dev - > flags & DMZ_BDEV_DYING ;
}
2019-11-07 01:34:35 +03:00
/*
* Check the backing device availability . This detects such events as
* backing device going offline due to errors , media removals , etc .
* This check is less efficient than dmz_bdev_is_dying ( ) and should
* only be performed as a part of error handling .
*/
bool dmz_check_bdev ( struct dmz_dev * dmz_dev )
{
struct gendisk * disk ;
dmz_dev - > flags & = ~ DMZ_CHECK_BDEV ;
if ( dmz_bdev_is_dying ( dmz_dev ) )
return false ;
disk = dmz_dev - > bdev - > bd_disk ;
if ( disk - > fops - > check_events & &
disk - > fops - > check_events ( disk , 0 ) & DISK_EVENT_MEDIA_CHANGE ) {
dmz_dev_warn ( dmz_dev , " Backing device offline " ) ;
dmz_dev - > flags | = DMZ_BDEV_DYING ;
}
return ! ( dmz_dev - > flags & DMZ_BDEV_DYING ) ;
}
2017-06-07 09:55:39 +03:00
/*
* Process a new BIO .
*/
static int dmz_map ( struct dm_target * ti , struct bio * bio )
{
struct dmz_target * dmz = ti - > private ;
2020-05-11 11:24:21 +03:00
struct dmz_metadata * zmd = dmz - > metadata ;
2017-06-07 09:55:39 +03:00
struct dmz_bioctx * bioctx = dm_per_bio_data ( bio , sizeof ( struct dmz_bioctx ) ) ;
sector_t sector = bio - > bi_iter . bi_sector ;
unsigned int nr_sectors = bio_sectors ( bio ) ;
sector_t chunk_sector ;
2019-08-11 00:43:10 +03:00
int ret ;
2017-06-07 09:55:39 +03:00
2020-05-11 11:24:23 +03:00
if ( dmz_dev_is_dying ( zmd ) )
2019-08-11 00:43:11 +03:00
return DM_MAPIO_KILL ;
2020-05-11 11:24:22 +03:00
DMDEBUG ( " (%s): BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks " ,
dmz_metadata_label ( zmd ) ,
bio_op ( bio ) , ( unsigned long long ) sector , nr_sectors ,
( unsigned long long ) dmz_bio_chunk ( zmd , bio ) ,
( unsigned long long ) dmz_chunk_block ( zmd , dmz_bio_block ( bio ) ) ,
( unsigned int ) dmz_bio_blocks ( bio ) ) ;
2017-06-07 09:55:39 +03:00
2017-07-21 18:56:46 +03:00
if ( ! nr_sectors & & bio_op ( bio ) ! = REQ_OP_WRITE )
2017-06-07 09:55:39 +03:00
return DM_MAPIO_REMAPPED ;
/* The BIO should be block aligned */
if ( ( nr_sectors & DMZ_BLOCK_SECTORS_MASK ) | | ( sector & DMZ_BLOCK_SECTORS_MASK ) )
return DM_MAPIO_KILL ;
/* Initialize the BIO context */
2020-05-11 11:24:25 +03:00
bioctx - > dev = NULL ;
2017-06-07 09:55:39 +03:00
bioctx - > zone = NULL ;
bioctx - > bio = bio ;
2018-08-23 20:35:57 +03:00
refcount_set ( & bioctx - > ref , 1 ) ;
2017-06-07 09:55:39 +03:00
/* Set the BIO pending in the flush list */
2017-07-21 18:56:46 +03:00
if ( ! nr_sectors & & bio_op ( bio ) = = REQ_OP_WRITE ) {
2017-06-07 09:55:39 +03:00
spin_lock ( & dmz - > flush_lock ) ;
bio_list_add ( & dmz - > flush_list , bio ) ;
spin_unlock ( & dmz - > flush_lock ) ;
mod_delayed_work ( dmz - > flush_wq , & dmz - > flush_work , 0 ) ;
return DM_MAPIO_SUBMITTED ;
}
/* Split zone BIOs to fit entirely into a zone */
2020-05-11 11:24:21 +03:00
chunk_sector = sector & ( dmz_zone_nr_sectors ( zmd ) - 1 ) ;
if ( chunk_sector + nr_sectors > dmz_zone_nr_sectors ( zmd ) )
dm_accept_partial_bio ( bio , dmz_zone_nr_sectors ( zmd ) - chunk_sector ) ;
2017-06-07 09:55:39 +03:00
/* Now ready to handle this BIO */
2019-08-11 00:43:10 +03:00
ret = dmz_queue_chunk_work ( dmz , bio ) ;
if ( ret ) {
2020-05-14 09:09:29 +03:00
DMDEBUG ( " (%s): BIO op %d, can't process chunk %llu, err %i " ,
2020-05-11 11:24:22 +03:00
dmz_metadata_label ( zmd ) ,
bio_op ( bio ) , ( u64 ) dmz_bio_chunk ( zmd , bio ) ,
ret ) ;
2019-08-11 00:43:10 +03:00
return DM_MAPIO_REQUEUE ;
}
2017-06-07 09:55:39 +03:00
return DM_MAPIO_SUBMITTED ;
}
/*
* Get zoned device information .
*/
2020-05-11 11:24:30 +03:00
static int dmz_get_zoned_device ( struct dm_target * ti , char * path ,
int idx , int nr_devs )
2017-06-07 09:55:39 +03:00
{
struct dmz_target * dmz = ti - > private ;
2020-05-11 11:24:30 +03:00
struct dm_dev * ddev ;
2017-06-07 09:55:39 +03:00
struct dmz_dev * dev ;
int ret ;
2020-05-11 11:24:30 +03:00
struct block_device * bdev ;
2017-06-07 09:55:39 +03:00
/* Get the target device */
2020-05-11 11:24:30 +03:00
ret = dm_get_device ( ti , path , dm_table_get_mode ( ti - > table ) , & ddev ) ;
2017-06-07 09:55:39 +03:00
if ( ret ) {
ti - > error = " Get target device failed " ;
return ret ;
}
2020-05-11 11:24:30 +03:00
bdev = ddev - > bdev ;
if ( bdev_zoned_model ( bdev ) = = BLK_ZONED_NONE ) {
if ( nr_devs = = 1 ) {
ti - > error = " Invalid regular device " ;
goto err ;
}
if ( idx ! = 0 ) {
ti - > error = " First device must be a regular device " ;
goto err ;
}
if ( dmz - > ddev [ 0 ] ) {
ti - > error = " Too many regular devices " ;
goto err ;
}
dev = & dmz - > dev [ idx ] ;
dev - > flags = DMZ_BDEV_REGULAR ;
} else {
if ( dmz - > ddev [ idx ] ) {
ti - > error = " Too many zoned devices " ;
goto err ;
}
if ( nr_devs > 1 & & idx = = 0 ) {
ti - > error = " First device must be a regular device " ;
goto err ;
}
dev = & dmz - > dev [ idx ] ;
2017-06-07 09:55:39 +03:00
}
2020-05-11 11:24:30 +03:00
dev - > bdev = bdev ;
2017-06-07 09:55:39 +03:00
( void ) bdevname ( dev - > bdev , dev - > name ) ;
2020-05-11 11:24:30 +03:00
dev - > capacity = i_size_read ( bdev - > bd_inode ) > > SECTOR_SHIFT ;
if ( ti - > begin ) {
ti - > error = " Partial mapping is not supported " ;
2017-06-07 09:55:39 +03:00
goto err ;
}
2020-05-11 11:24:30 +03:00
dmz - > ddev [ idx ] = ddev ;
2017-06-07 09:55:39 +03:00
return 0 ;
err :
2020-05-11 11:24:30 +03:00
dm_put_device ( ti , ddev ) ;
return - EINVAL ;
2017-06-07 09:55:39 +03:00
}
/*
* Cleanup zoned device information .
*/
static void dmz_put_zoned_device ( struct dm_target * ti )
{
struct dmz_target * dmz = ti - > private ;
2020-05-11 11:24:30 +03:00
int i ;
2017-06-07 09:55:39 +03:00
2020-05-11 11:24:30 +03:00
for ( i = 0 ; i < DMZ_MAX_DEVS ; i + + ) {
if ( dmz - > ddev [ i ] ) {
dm_put_device ( ti , dmz - > ddev [ i ] ) ;
dmz - > ddev [ i ] = NULL ;
}
}
}
static int dmz_fixup_devices ( struct dm_target * ti )
{
struct dmz_target * dmz = ti - > private ;
struct dmz_dev * reg_dev , * zoned_dev ;
struct request_queue * q ;
/*
* When we have two devices , the first one must be a regular block
* device and the second a zoned block device .
*/
if ( dmz - > ddev [ 0 ] & & dmz - > ddev [ 1 ] ) {
reg_dev = & dmz - > dev [ 0 ] ;
if ( ! ( reg_dev - > flags & DMZ_BDEV_REGULAR ) ) {
ti - > error = " Primary disk is not a regular device " ;
return - EINVAL ;
}
zoned_dev = & dmz - > dev [ 1 ] ;
if ( zoned_dev - > flags & DMZ_BDEV_REGULAR ) {
ti - > error = " Secondary disk is not a zoned device " ;
return - EINVAL ;
}
} else {
reg_dev = NULL ;
zoned_dev = & dmz - > dev [ 0 ] ;
if ( zoned_dev - > flags & DMZ_BDEV_REGULAR ) {
ti - > error = " Disk is not a zoned device " ;
return - EINVAL ;
}
}
q = bdev_get_queue ( zoned_dev - > bdev ) ;
zoned_dev - > zone_nr_sectors = blk_queue_zone_sectors ( q ) ;
zoned_dev - > nr_zones = blkdev_nr_zones ( zoned_dev - > bdev - > bd_disk ) ;
if ( reg_dev ) {
reg_dev - > zone_nr_sectors = zoned_dev - > zone_nr_sectors ;
2020-05-13 11:45:22 +03:00
reg_dev - > nr_zones =
DIV_ROUND_UP_SECTOR_T ( reg_dev - > capacity ,
reg_dev - > zone_nr_sectors ) ;
2020-05-11 11:24:30 +03:00
zoned_dev - > zone_offset = reg_dev - > nr_zones ;
}
return 0 ;
2017-06-07 09:55:39 +03:00
}
/*
* Setup target .
*/
static int dmz_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
struct dmz_target * dmz ;
int ret ;
/* Check arguments */
2020-05-11 11:24:30 +03:00
if ( argc < 1 | | argc > 2 ) {
2017-06-07 09:55:39 +03:00
ti - > error = " Invalid argument count " ;
return - EINVAL ;
}
/* Allocate and initialize the target descriptor */
dmz = kzalloc ( sizeof ( struct dmz_target ) , GFP_KERNEL ) ;
if ( ! dmz ) {
ti - > error = " Unable to allocate the zoned target descriptor " ;
return - ENOMEM ;
}
2020-05-11 11:24:30 +03:00
dmz - > dev = kcalloc ( 2 , sizeof ( struct dmz_dev ) , GFP_KERNEL ) ;
if ( ! dmz - > dev ) {
ti - > error = " Unable to allocate the zoned device descriptors " ;
kfree ( dmz ) ;
return - ENOMEM ;
}
2017-06-07 09:55:39 +03:00
ti - > private = dmz ;
/* Get the target zoned block device */
2020-05-11 11:24:30 +03:00
ret = dmz_get_zoned_device ( ti , argv [ 0 ] , 0 , argc ) ;
if ( ret )
goto err ;
if ( argc = = 2 ) {
ret = dmz_get_zoned_device ( ti , argv [ 1 ] , 1 , argc ) ;
if ( ret ) {
dmz_put_zoned_device ( ti ) ;
goto err ;
}
}
ret = dmz_fixup_devices ( ti ) ;
2017-06-07 09:55:39 +03:00
if ( ret ) {
2020-05-11 11:24:30 +03:00
dmz_put_zoned_device ( ti ) ;
2017-06-07 09:55:39 +03:00
goto err ;
}
/* Initialize metadata */
2020-05-11 11:24:30 +03:00
ret = dmz_ctr_metadata ( dmz - > dev , argc , & dmz - > metadata ,
2020-05-11 11:24:22 +03:00
dm_table_device_name ( ti - > table ) ) ;
2017-06-07 09:55:39 +03:00
if ( ret ) {
ti - > error = " Metadata initialization failed " ;
goto err_dev ;
}
/* Set target (no write same support) */
2020-05-11 11:24:21 +03:00
ti - > max_io_len = dmz_zone_nr_sectors ( dmz - > metadata ) < < 9 ;
2017-06-07 09:55:39 +03:00
ti - > num_flush_bios = 1 ;
ti - > num_discard_bios = 1 ;
ti - > num_write_zeroes_bios = 1 ;
ti - > per_io_data_size = sizeof ( struct dmz_bioctx ) ;
ti - > flush_supported = true ;
ti - > discards_supported = true ;
/* The exposed capacity is the number of chunks that can be mapped */
2020-05-11 11:24:21 +03:00
ti - > len = ( sector_t ) dmz_nr_chunks ( dmz - > metadata ) < <
dmz_zone_nr_sectors_shift ( dmz - > metadata ) ;
2017-06-07 09:55:39 +03:00
/* Zone BIO */
2018-05-21 01:25:53 +03:00
ret = bioset_init ( & dmz - > bio_set , DMZ_MIN_BIOS , 0 , 0 ) ;
if ( ret ) {
2017-06-07 09:55:39 +03:00
ti - > error = " Create BIO set failed " ;
goto err_meta ;
}
/* Chunk BIO work */
mutex_init ( & dmz - > chunk_lock ) ;
2018-06-22 18:09:11 +03:00
INIT_RADIX_TREE ( & dmz - > chunk_rxtree , GFP_NOIO ) ;
2020-05-11 11:24:22 +03:00
dmz - > chunk_wq = alloc_workqueue ( " dmz_cwq_%s " ,
WQ_MEM_RECLAIM | WQ_UNBOUND , 0 ,
dmz_metadata_label ( dmz - > metadata ) ) ;
2017-06-07 09:55:39 +03:00
if ( ! dmz - > chunk_wq ) {
ti - > error = " Create chunk workqueue failed " ;
ret = - ENOMEM ;
goto err_bio ;
}
/* Flush work */
spin_lock_init ( & dmz - > flush_lock ) ;
bio_list_init ( & dmz - > flush_list ) ;
INIT_DELAYED_WORK ( & dmz - > flush_work , dmz_flush_work ) ;
dmz - > flush_wq = alloc_ordered_workqueue ( " dmz_fwq_%s " , WQ_MEM_RECLAIM ,
2020-05-11 11:24:22 +03:00
dmz_metadata_label ( dmz - > metadata ) ) ;
2017-06-07 09:55:39 +03:00
if ( ! dmz - > flush_wq ) {
ti - > error = " Create flush workqueue failed " ;
ret = - ENOMEM ;
goto err_cwq ;
}
mod_delayed_work ( dmz - > flush_wq , & dmz - > flush_work , DMZ_FLUSH_PERIOD ) ;
/* Initialize reclaim */
2020-05-11 11:24:24 +03:00
ret = dmz_ctr_reclaim ( dmz - > metadata , & dmz - > reclaim ) ;
2017-06-07 09:55:39 +03:00
if ( ret ) {
ti - > error = " Zone reclaim initialization failed " ;
goto err_fwq ;
}
2020-05-11 11:24:22 +03:00
DMINFO ( " (%s): Target device: %llu 512-byte logical sectors (%llu blocks) " ,
dmz_metadata_label ( dmz - > metadata ) ,
( unsigned long long ) ti - > len ,
( unsigned long long ) dmz_sect2blk ( ti - > len ) ) ;
2017-06-07 09:55:39 +03:00
return 0 ;
err_fwq :
destroy_workqueue ( dmz - > flush_wq ) ;
err_cwq :
destroy_workqueue ( dmz - > chunk_wq ) ;
err_bio :
2018-01-06 05:17:20 +03:00
mutex_destroy ( & dmz - > chunk_lock ) ;
2018-05-21 01:25:53 +03:00
bioset_exit ( & dmz - > bio_set ) ;
2017-06-07 09:55:39 +03:00
err_meta :
dmz_dtr_metadata ( dmz - > metadata ) ;
err_dev :
dmz_put_zoned_device ( ti ) ;
err :
2020-05-11 11:24:30 +03:00
kfree ( dmz - > dev ) ;
2017-06-07 09:55:39 +03:00
kfree ( dmz ) ;
return ret ;
}
/*
* Cleanup target .
*/
static void dmz_dtr ( struct dm_target * ti )
{
struct dmz_target * dmz = ti - > private ;
flush_workqueue ( dmz - > chunk_wq ) ;
destroy_workqueue ( dmz - > chunk_wq ) ;
dmz_dtr_reclaim ( dmz - > reclaim ) ;
cancel_delayed_work_sync ( & dmz - > flush_work ) ;
destroy_workqueue ( dmz - > flush_wq ) ;
( void ) dmz_flush_metadata ( dmz - > metadata ) ;
dmz_dtr_metadata ( dmz - > metadata ) ;
2018-05-21 01:25:53 +03:00
bioset_exit ( & dmz - > bio_set ) ;
2017-06-07 09:55:39 +03:00
dmz_put_zoned_device ( ti ) ;
2018-01-06 05:17:20 +03:00
mutex_destroy ( & dmz - > chunk_lock ) ;
2020-05-11 11:24:30 +03:00
kfree ( dmz - > dev ) ;
2017-06-07 09:55:39 +03:00
kfree ( dmz ) ;
}
/*
* Setup target request queue limits .
*/
static void dmz_io_hints ( struct dm_target * ti , struct queue_limits * limits )
{
struct dmz_target * dmz = ti - > private ;
2020-05-11 11:24:21 +03:00
unsigned int chunk_sectors = dmz_zone_nr_sectors ( dmz - > metadata ) ;
2017-06-07 09:55:39 +03:00
limits - > logical_block_size = DMZ_BLOCK_SIZE ;
limits - > physical_block_size = DMZ_BLOCK_SIZE ;
blk_limits_io_min ( limits , DMZ_BLOCK_SIZE ) ;
blk_limits_io_opt ( limits , DMZ_BLOCK_SIZE ) ;
limits - > discard_alignment = DMZ_BLOCK_SIZE ;
limits - > discard_granularity = DMZ_BLOCK_SIZE ;
limits - > max_discard_sectors = chunk_sectors ;
limits - > max_hw_discard_sectors = chunk_sectors ;
limits - > max_write_zeroes_sectors = chunk_sectors ;
/* FS hint to try to align to the device zone size */
limits - > chunk_sectors = chunk_sectors ;
limits - > max_sectors = chunk_sectors ;
/* We are exposing a drive-managed zoned block device */
limits - > zoned = BLK_ZONED_NONE ;
}
/*
* Pass on ioctl to the backend device .
*/
2018-04-03 23:54:10 +03:00
static int dmz_prepare_ioctl ( struct dm_target * ti , struct block_device * * bdev )
2017-06-07 09:55:39 +03:00
{
struct dmz_target * dmz = ti - > private ;
2020-05-11 11:24:25 +03:00
struct dmz_dev * dev = & dmz - > dev [ 0 ] ;
2017-06-07 09:55:39 +03:00
2020-05-11 11:24:25 +03:00
if ( ! dmz_check_bdev ( dev ) )
2019-11-07 01:34:35 +03:00
return - EIO ;
2019-08-11 00:43:11 +03:00
2020-05-11 11:24:25 +03:00
* bdev = dev - > bdev ;
2017-06-07 09:55:39 +03:00
return 0 ;
}
/*
* Stop works on suspend .
*/
static void dmz_suspend ( struct dm_target * ti )
{
struct dmz_target * dmz = ti - > private ;
flush_workqueue ( dmz - > chunk_wq ) ;
dmz_suspend_reclaim ( dmz - > reclaim ) ;
cancel_delayed_work_sync ( & dmz - > flush_work ) ;
}
/*
* Restart works on resume or if suspend failed .
*/
static void dmz_resume ( struct dm_target * ti )
{
struct dmz_target * dmz = ti - > private ;
queue_delayed_work ( dmz - > flush_wq , & dmz - > flush_work , DMZ_FLUSH_PERIOD ) ;
dmz_resume_reclaim ( dmz - > reclaim ) ;
}
static int dmz_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
struct dmz_target * dmz = ti - > private ;
2020-05-11 11:24:30 +03:00
unsigned int zone_nr_sectors = dmz_zone_nr_sectors ( dmz - > metadata ) ;
sector_t capacity ;
int r ;
capacity = dmz - > dev [ 0 ] . capacity & ~ ( zone_nr_sectors - 1 ) ;
r = fn ( ti , dmz - > ddev [ 0 ] , 0 , capacity , data ) ;
if ( ! r & & dmz - > ddev [ 1 ] ) {
capacity = dmz - > dev [ 1 ] . capacity & ~ ( zone_nr_sectors - 1 ) ;
r = fn ( ti , dmz - > ddev [ 1 ] , 0 , capacity , data ) ;
}
return r ;
2017-06-07 09:55:39 +03:00
}
2020-05-11 11:24:16 +03:00
static void dmz_status ( struct dm_target * ti , status_type_t type ,
unsigned int status_flags , char * result ,
unsigned int maxlen )
{
struct dmz_target * dmz = ti - > private ;
ssize_t sz = 0 ;
char buf [ BDEVNAME_SIZE ] ;
2020-05-11 11:24:30 +03:00
struct dmz_dev * dev ;
2020-05-11 11:24:16 +03:00
switch ( type ) {
case STATUSTYPE_INFO :
DMEMIT ( " %u zones %u/%u random %u/%u sequential " ,
dmz_nr_zones ( dmz - > metadata ) ,
dmz_nr_unmap_rnd_zones ( dmz - > metadata ) ,
dmz_nr_rnd_zones ( dmz - > metadata ) ,
dmz_nr_unmap_seq_zones ( dmz - > metadata ) ,
dmz_nr_seq_zones ( dmz - > metadata ) ) ;
break ;
case STATUSTYPE_TABLE :
2020-05-11 11:24:30 +03:00
dev = & dmz - > dev [ 0 ] ;
format_dev_t ( buf , dev - > bdev - > bd_dev ) ;
2020-05-11 11:24:16 +03:00
DMEMIT ( " %s " , buf ) ;
2020-05-11 11:24:30 +03:00
if ( dmz - > dev [ 1 ] . bdev ) {
dev = & dmz - > dev [ 1 ] ;
format_dev_t ( buf , dev - > bdev - > bd_dev ) ;
DMEMIT ( " %s " , buf ) ;
}
2020-05-11 11:24:16 +03:00
break ;
}
return ;
}
2020-05-11 11:24:17 +03:00
static int dmz_message ( struct dm_target * ti , unsigned int argc , char * * argv ,
char * result , unsigned int maxlen )
{
struct dmz_target * dmz = ti - > private ;
int r = - EINVAL ;
if ( ! strcasecmp ( argv [ 0 ] , " reclaim " ) ) {
dmz_schedule_reclaim ( dmz - > reclaim ) ;
r = 0 ;
} else
DMERR ( " unrecognized message %s " , argv [ 0 ] ) ;
return r ;
}
2017-06-07 09:55:39 +03:00
static struct target_type dmz_type = {
. name = " zoned " ,
2020-05-11 11:24:30 +03:00
. version = { 2 , 0 , 0 } ,
2017-06-07 09:55:39 +03:00
. features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM ,
. module = THIS_MODULE ,
. ctr = dmz_ctr ,
. dtr = dmz_dtr ,
. map = dmz_map ,
. io_hints = dmz_io_hints ,
. prepare_ioctl = dmz_prepare_ioctl ,
. postsuspend = dmz_suspend ,
. resume = dmz_resume ,
. iterate_devices = dmz_iterate_devices ,
2020-05-11 11:24:16 +03:00
. status = dmz_status ,
2020-05-11 11:24:17 +03:00
. message = dmz_message ,
2017-06-07 09:55:39 +03:00
} ;
static int __init dmz_init ( void )
{
return dm_register_target ( & dmz_type ) ;
}
static void __exit dmz_exit ( void )
{
dm_unregister_target ( & dmz_type ) ;
}
module_init ( dmz_init ) ;
module_exit ( dmz_exit ) ;
MODULE_DESCRIPTION ( DM_NAME " target for zoned block devices " ) ;
MODULE_AUTHOR ( " Damien Le Moal <damien.lemoal@wdc.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;