2020-03-10 01:26:38 +03:00
/*
* Copyright ( C ) 2020 Red Hat GmbH
*
* This file is released under the GPL .
*
* Device - mapper target to emulate smaller logical block
* size on backing devices exposing ( natively ) larger ones .
*
* E . g . 512 byte sector emulation on 4 K native disks .
*/
# include "dm.h"
# include <linux/module.h>
# include <linux/workqueue.h>
# include <linux/dm-bufio.h>
# define DM_MSG_PREFIX "ebs"
static void ebs_dtr ( struct dm_target * ti ) ;
/* Emulated block size context. */
struct ebs_c {
struct dm_dev * dev ; /* Underlying device to emulate block size on. */
struct dm_bufio_client * bufio ; /* Use dm-bufio for read and read-modify-write processing. */
struct workqueue_struct * wq ; /* Workqueue for ^ processing of bios. */
struct work_struct ws ; /* Work item used for ^. */
struct bio_list bios_in ; /* Worker bios input list. */
spinlock_t lock ; /* Guard bios input list above. */
sector_t start ; /* <start> table line argument, see ebs_ctr below. */
unsigned int e_bs ; /* Emulated block size in sectors exposed to upper layer. */
2021-03-19 04:17:50 +03:00
unsigned int u_bs ; /* Underlying block size in sectors retrieved from/set on lower layer device. */
2020-03-10 01:26:38 +03:00
unsigned char block_shift ; /* bitshift sectors -> blocks used in dm-bufio API. */
bool u_bs_set : 1 ; /* Flag to indicate underlying block size is set on table line. */
} ;
static inline sector_t __sector_to_block ( struct ebs_c * ec , sector_t sector )
{
return sector > > ec - > block_shift ;
}
static inline sector_t __block_mod ( sector_t sector , unsigned int bs )
{
return sector & ( bs - 1 ) ;
}
2021-03-19 04:17:50 +03:00
/* Return number of blocks for a bio, accounting for misalignment of start and end sectors. */
2020-03-10 01:26:38 +03:00
static inline unsigned int __nr_blocks ( struct ebs_c * ec , struct bio * bio )
{
sector_t end_sector = __block_mod ( bio - > bi_iter . bi_sector , ec - > u_bs ) + bio_sectors ( bio ) ;
return __sector_to_block ( ec , end_sector ) + ( __block_mod ( end_sector , ec - > u_bs ) ? 1 : 0 ) ;
}
static inline bool __ebs_check_bs ( unsigned int bs )
{
return bs & & is_power_of_2 ( bs ) ;
}
/*
* READ / WRITE :
*
* copy blocks between bufio blocks and bio vector ' s ( partial / overlapping ) pages .
*/
static int __ebs_rw_bvec ( struct ebs_c * ec , int rw , struct bio_vec * bv , struct bvec_iter * iter )
{
int r = 0 ;
unsigned char * ba , * pa ;
unsigned int cur_len ;
unsigned int bv_len = bv - > bv_len ;
unsigned int buf_off = to_bytes ( __block_mod ( iter - > bi_sector , ec - > u_bs ) ) ;
sector_t block = __sector_to_block ( ec , iter - > bi_sector ) ;
struct dm_buffer * b ;
if ( unlikely ( ! bv - > bv_page | | ! bv_len ) )
return - EIO ;
pa = page_address ( bv - > bv_page ) + bv - > bv_offset ;
/* Handle overlapping page <-> blocks */
while ( bv_len ) {
cur_len = min ( dm_bufio_get_block_size ( ec - > bufio ) - buf_off , bv_len ) ;
/* Avoid reading for writes in case bio vector's page overwrites block completely. */
if ( rw = = READ | | buf_off | | bv_len < dm_bufio_get_block_size ( ec - > bufio ) )
ba = dm_bufio_read ( ec - > bufio , block , & b ) ;
else
ba = dm_bufio_new ( ec - > bufio , block , & b ) ;
2020-12-10 11:50:49 +03:00
if ( IS_ERR ( ba ) ) {
2020-03-10 01:26:38 +03:00
/*
* Carry on with next buffer , if any , to issue all possible
* data but return error .
*/
r = PTR_ERR ( ba ) ;
} else {
/* Copy data to/from bio to buffer if read/new was successful above. */
ba + = buf_off ;
if ( rw = = READ ) {
memcpy ( pa , ba , cur_len ) ;
flush_dcache_page ( bv - > bv_page ) ;
} else {
flush_dcache_page ( bv - > bv_page ) ;
memcpy ( ba , pa , cur_len ) ;
dm_bufio_mark_partial_buffer_dirty ( b , buf_off , buf_off + cur_len ) ;
}
dm_bufio_release ( b ) ;
}
pa + = cur_len ;
bv_len - = cur_len ;
buf_off = 0 ;
block + + ;
}
return r ;
}
/* READ/WRITE: iterate bio vector's copying between (partial) pages and bufio blocks. */
static int __ebs_rw_bio ( struct ebs_c * ec , int rw , struct bio * bio )
{
int r = 0 , rr ;
struct bio_vec bv ;
struct bvec_iter iter ;
bio_for_each_bvec ( bv , bio , iter ) {
rr = __ebs_rw_bvec ( ec , rw , & bv , & iter ) ;
if ( rr )
r = rr ;
}
return r ;
}
2020-04-29 17:47:03 +03:00
/*
* Discard bio ' s blocks , i . e . pass discards down .
*
* Avoid discarding partial blocks at beginning and end ;
* return 0 in case no blocks can be discarded as a result .
*/
static int __ebs_discard_bio ( struct ebs_c * ec , struct bio * bio )
{
sector_t block , blocks , sector = bio - > bi_iter . bi_sector ;
block = __sector_to_block ( ec , sector ) ;
blocks = __nr_blocks ( ec , bio ) ;
/*
* Partial first underlying block ( __nr_blocks ( ) may have
* resulted in one block ) .
*/
if ( __block_mod ( sector , ec - > u_bs ) ) {
block + + ;
blocks - - ;
}
/* Partial last underlying block if any. */
if ( blocks & & __block_mod ( bio_end_sector ( bio ) , ec - > u_bs ) )
blocks - - ;
return blocks ? dm_bufio_issue_discard ( ec - > bufio , block , blocks ) : 0 ;
}
/* Release blocks them from the bufio cache. */
static void __ebs_forget_bio ( struct ebs_c * ec , struct bio * bio )
2020-03-10 01:26:38 +03:00
{
sector_t blocks , sector = bio - > bi_iter . bi_sector ;
blocks = __nr_blocks ( ec , bio ) ;
2020-06-02 16:34:41 +03:00
dm_bufio_forget_buffers ( ec - > bufio , __sector_to_block ( ec , sector ) , blocks ) ;
2020-03-10 01:26:38 +03:00
}
2021-03-19 04:17:50 +03:00
/* Worker function to process incoming bios. */
2020-03-10 01:26:38 +03:00
static void __ebs_process_bios ( struct work_struct * ws )
{
int r ;
bool write = false ;
sector_t block1 , block2 ;
struct ebs_c * ec = container_of ( ws , struct ebs_c , ws ) ;
struct bio * bio ;
struct bio_list bios ;
bio_list_init ( & bios ) ;
spin_lock_irq ( & ec - > lock ) ;
bios = ec - > bios_in ;
bio_list_init ( & ec - > bios_in ) ;
spin_unlock_irq ( & ec - > lock ) ;
/* Prefetch all read and any mis-aligned write buffers */
bio_list_for_each ( bio , & bios ) {
block1 = __sector_to_block ( ec , bio - > bi_iter . bi_sector ) ;
if ( bio_op ( bio ) = = REQ_OP_READ )
dm_bufio_prefetch ( ec - > bufio , block1 , __nr_blocks ( ec , bio ) ) ;
else if ( bio_op ( bio ) = = REQ_OP_WRITE & & ! ( bio - > bi_opf & REQ_PREFLUSH ) ) {
block2 = __sector_to_block ( ec , bio_end_sector ( bio ) ) ;
if ( __block_mod ( bio - > bi_iter . bi_sector , ec - > u_bs ) )
dm_bufio_prefetch ( ec - > bufio , block1 , 1 ) ;
if ( __block_mod ( bio_end_sector ( bio ) , ec - > u_bs ) & & block2 ! = block1 )
dm_bufio_prefetch ( ec - > bufio , block2 , 1 ) ;
}
}
bio_list_for_each ( bio , & bios ) {
r = - EIO ;
if ( bio_op ( bio ) = = REQ_OP_READ )
r = __ebs_rw_bio ( ec , READ , bio ) ;
else if ( bio_op ( bio ) = = REQ_OP_WRITE ) {
write = true ;
r = __ebs_rw_bio ( ec , WRITE , bio ) ;
} else if ( bio_op ( bio ) = = REQ_OP_DISCARD ) {
2020-04-29 17:47:03 +03:00
__ebs_forget_bio ( ec , bio ) ;
r = __ebs_discard_bio ( ec , bio ) ;
2020-03-10 01:26:38 +03:00
}
if ( r < 0 )
bio - > bi_status = errno_to_blk_status ( r ) ;
}
/*
* We write dirty buffers after processing I / O on them
* but before we endio thus addressing REQ_FUA / REQ_SYNC .
*/
r = write ? dm_bufio_write_dirty_buffers ( ec - > bufio ) : 0 ;
while ( ( bio = bio_list_pop ( & bios ) ) ) {
/* Any other request is endioed. */
if ( unlikely ( r & & bio_op ( bio ) = = REQ_OP_WRITE ) )
bio_io_error ( bio ) ;
else
bio_endio ( bio ) ;
}
}
/*
* Construct an emulated block size mapping : < dev_path > < offset > < ebs > [ < ubs > ]
*
* < dev_path > : path of the underlying device
* < offset > : offset in 512 bytes sectors into < dev_path >
* < ebs > : emulated block size in units of 512 bytes exposed to the upper layer
* [ < ubs > ] : underlying block size in units of 512 bytes imposed on the lower layer ;
* optional , if not supplied , retrieve logical block size from underlying device
*/
static int ebs_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
int r ;
unsigned short tmp1 ;
unsigned long long tmp ;
char dummy ;
struct ebs_c * ec ;
if ( argc < 3 | | argc > 4 ) {
ti - > error = " Invalid argument count " ;
return - EINVAL ;
}
ec = ti - > private = kzalloc ( sizeof ( * ec ) , GFP_KERNEL ) ;
if ( ! ec ) {
ti - > error = " Cannot allocate ebs context " ;
return - ENOMEM ;
}
r = - EINVAL ;
if ( sscanf ( argv [ 1 ] , " %llu%c " , & tmp , & dummy ) ! = 1 | |
tmp ! = ( sector_t ) tmp | |
( sector_t ) tmp > = ti - > len ) {
ti - > error = " Invalid device offset sector " ;
goto bad ;
}
ec - > start = tmp ;
if ( sscanf ( argv [ 2 ] , " %hu%c " , & tmp1 , & dummy ) ! = 1 | |
! __ebs_check_bs ( tmp1 ) | |
to_bytes ( tmp1 ) > PAGE_SIZE ) {
ti - > error = " Invalid emulated block size " ;
goto bad ;
}
ec - > e_bs = tmp1 ;
if ( argc > 3 ) {
if ( sscanf ( argv [ 3 ] , " %hu%c " , & tmp1 , & dummy ) ! = 1 | | ! __ebs_check_bs ( tmp1 ) ) {
ti - > error = " Invalid underlying block size " ;
goto bad ;
}
ec - > u_bs = tmp1 ;
ec - > u_bs_set = true ;
} else
ec - > u_bs_set = false ;
r = dm_get_device ( ti , argv [ 0 ] , dm_table_get_mode ( ti - > table ) , & ec - > dev ) ;
if ( r ) {
ti - > error = " Device lookup failed " ;
ec - > dev = NULL ;
goto bad ;
}
r = - EINVAL ;
if ( ! ec - > u_bs_set ) {
ec - > u_bs = to_sector ( bdev_logical_block_size ( ec - > dev - > bdev ) ) ;
if ( ! __ebs_check_bs ( ec - > u_bs ) ) {
ti - > error = " Invalid retrieved underlying block size " ;
goto bad ;
}
}
if ( ! ec - > u_bs_set & & ec - > e_bs = = ec - > u_bs )
DMINFO ( " Emulation superfluous: emulated equal to underlying block size " ) ;
if ( __block_mod ( ec - > start , ec - > u_bs ) ) {
ti - > error = " Device offset must be multiple of underlying block size " ;
goto bad ;
}
ec - > bufio = dm_bufio_client_create ( ec - > dev - > bdev , to_bytes ( ec - > u_bs ) , 1 , 0 , NULL , NULL ) ;
if ( IS_ERR ( ec - > bufio ) ) {
ti - > error = " Cannot create dm bufio client " ;
r = PTR_ERR ( ec - > bufio ) ;
ec - > bufio = NULL ;
goto bad ;
}
ec - > wq = alloc_ordered_workqueue ( " dm- " DM_MSG_PREFIX , WQ_MEM_RECLAIM ) ;
if ( ! ec - > wq ) {
ti - > error = " Cannot create dm- " DM_MSG_PREFIX " workqueue " ;
r = - ENOMEM ;
goto bad ;
}
ec - > block_shift = __ffs ( ec - > u_bs ) ;
INIT_WORK ( & ec - > ws , & __ebs_process_bios ) ;
bio_list_init ( & ec - > bios_in ) ;
spin_lock_init ( & ec - > lock ) ;
ti - > num_flush_bios = 1 ;
ti - > num_discard_bios = 1 ;
ti - > num_secure_erase_bios = 0 ;
ti - > num_write_same_bios = 0 ;
ti - > num_write_zeroes_bios = 0 ;
return 0 ;
bad :
ebs_dtr ( ti ) ;
return r ;
}
static void ebs_dtr ( struct dm_target * ti )
{
struct ebs_c * ec = ti - > private ;
if ( ec - > wq )
destroy_workqueue ( ec - > wq ) ;
if ( ec - > bufio )
dm_bufio_client_destroy ( ec - > bufio ) ;
if ( ec - > dev )
dm_put_device ( ti , ec - > dev ) ;
kfree ( ec ) ;
}
static int ebs_map ( struct dm_target * ti , struct bio * bio )
{
struct ebs_c * ec = ti - > private ;
bio_set_dev ( bio , ec - > dev - > bdev ) ;
bio - > bi_iter . bi_sector = ec - > start + dm_target_offset ( ti , bio - > bi_iter . bi_sector ) ;
2020-08-01 01:46:45 +03:00
if ( unlikely ( bio_op ( bio ) = = REQ_OP_FLUSH ) )
2020-03-10 01:26:38 +03:00
return DM_MAPIO_REMAPPED ;
/*
* Only queue for bufio processing in case of partial or overlapping buffers
* - or -
* emulation with ebs = = ubs aiming for tests of dm - bufio overhead .
*/
if ( likely ( __block_mod ( bio - > bi_iter . bi_sector , ec - > u_bs ) | |
__block_mod ( bio_end_sector ( bio ) , ec - > u_bs ) | |
ec - > e_bs = = ec - > u_bs ) ) {
spin_lock_irq ( & ec - > lock ) ;
bio_list_add ( & ec - > bios_in , bio ) ;
spin_unlock_irq ( & ec - > lock ) ;
queue_work ( ec - > wq , & ec - > ws ) ;
return DM_MAPIO_SUBMITTED ;
}
/* Forget any buffer content relative to this direct backing device I/O. */
__ebs_forget_bio ( ec , bio ) ;
return DM_MAPIO_REMAPPED ;
}
static void ebs_status ( struct dm_target * ti , status_type_t type ,
unsigned status_flags , char * result , unsigned maxlen )
{
struct ebs_c * ec = ti - > private ;
switch ( type ) {
case STATUSTYPE_INFO :
* result = ' \0 ' ;
break ;
case STATUSTYPE_TABLE :
snprintf ( result , maxlen , ec - > u_bs_set ? " %s %llu %u %u " : " %s %llu %u " ,
ec - > dev - > name , ( unsigned long long ) ec - > start , ec - > e_bs , ec - > u_bs ) ;
break ;
}
}
static int ebs_prepare_ioctl ( struct dm_target * ti , struct block_device * * bdev )
{
struct ebs_c * ec = ti - > private ;
struct dm_dev * dev = ec - > dev ;
/*
* Only pass ioctls through if the device sizes match exactly .
*/
* bdev = dev - > bdev ;
return ! ! ( ec - > start | | ti - > len ! = i_size_read ( dev - > bdev - > bd_inode ) > > SECTOR_SHIFT ) ;
}
static void ebs_io_hints ( struct dm_target * ti , struct queue_limits * limits )
{
struct ebs_c * ec = ti - > private ;
limits - > logical_block_size = to_bytes ( ec - > e_bs ) ;
limits - > physical_block_size = to_bytes ( ec - > u_bs ) ;
limits - > alignment_offset = limits - > physical_block_size ;
blk_limits_io_min ( limits , limits - > logical_block_size ) ;
}
static int ebs_iterate_devices ( struct dm_target * ti ,
iterate_devices_callout_fn fn , void * data )
{
struct ebs_c * ec = ti - > private ;
return fn ( ti , ec - > dev , ec - > start , ti - > len , data ) ;
}
static struct target_type ebs_target = {
. name = " ebs " ,
2020-04-29 17:47:03 +03:00
. version = { 1 , 0 , 1 } ,
2020-03-10 01:26:38 +03:00
. features = DM_TARGET_PASSES_INTEGRITY ,
. module = THIS_MODULE ,
. ctr = ebs_ctr ,
. dtr = ebs_dtr ,
. map = ebs_map ,
. status = ebs_status ,
. io_hints = ebs_io_hints ,
. prepare_ioctl = ebs_prepare_ioctl ,
. iterate_devices = ebs_iterate_devices ,
} ;
static int __init dm_ebs_init ( void )
{
int r = dm_register_target ( & ebs_target ) ;
if ( r < 0 )
DMERR ( " register failed %d " , r ) ;
return r ;
}
static void dm_ebs_exit ( void )
{
dm_unregister_target ( & ebs_target ) ;
}
module_init ( dm_ebs_init ) ;
module_exit ( dm_ebs_exit ) ;
MODULE_AUTHOR ( " Heinz Mauelshagen <dm-devel@redhat.com> " ) ;
MODULE_DESCRIPTION ( DM_NAME " emulated block size target " ) ;
MODULE_LICENSE ( " GPL " ) ;