2019-04-30 14:42:43 -04:00
// SPDX-License-Identifier: GPL-2.0
2016-10-18 15:40:33 +09:00
/*
* Zoned block device handling
*
* Copyright ( c ) 2015 , Hannes Reinecke
* Copyright ( c ) 2015 , SUSE Linux GmbH
*
* Copyright ( c ) 2016 , Damien Le Moal
* Copyright ( c ) 2016 , Western Digital
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/rbtree.h>
# include <linux/blkdev.h>
2018-10-12 19:08:50 +09:00
# include <linux/blk-mq.h>
2019-07-01 14:09:18 +09:00
# include <linux/mm.h>
# include <linux/vmalloc.h>
2019-07-01 14:09:16 +09:00
# include <linux/sched/mm.h>
2016-10-18 15:40:33 +09:00
2018-10-12 19:08:47 +09:00
# include "blk.h"
2020-03-25 10:49:54 -07:00
# define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
static const char * const zone_cond_name [ ] = {
ZONE_COND_NAME ( NOT_WP ) ,
ZONE_COND_NAME ( EMPTY ) ,
ZONE_COND_NAME ( IMP_OPEN ) ,
ZONE_COND_NAME ( EXP_OPEN ) ,
ZONE_COND_NAME ( CLOSED ) ,
ZONE_COND_NAME ( READONLY ) ,
ZONE_COND_NAME ( FULL ) ,
ZONE_COND_NAME ( OFFLINE ) ,
} ;
# undef ZONE_COND_NAME
/**
* blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX .
* @ zone_cond : BLK_ZONE_COND_XXX .
*
* Description : Centralize block layer function to convert BLK_ZONE_COND_XXX
* into string format . Useful in the debugging and tracing zone conditions . For
* invalid BLK_ZONE_COND_XXX it returns string " UNKNOWN " .
*/
const char * blk_zone_cond_str ( enum blk_zone_cond zone_cond )
{
static const char * zone_cond_str = " UNKNOWN " ;
if ( zone_cond < ARRAY_SIZE ( zone_cond_name ) & & zone_cond_name [ zone_cond ] )
zone_cond_str = zone_cond_name [ zone_cond ] ;
return zone_cond_str ;
}
EXPORT_SYMBOL_GPL ( blk_zone_cond_str ) ;
2017-12-21 15:43:38 +09:00
/*
* Return true if a request is a write requests that needs zone write locking .
*/
bool blk_req_needs_zone_write_lock ( struct request * rq )
{
2022-07-06 09:03:50 +02:00
if ( blk_rq_is_passthrough ( rq ) )
2017-12-21 15:43:38 +09:00
return false ;
2022-07-06 09:03:50 +02:00
if ( ! rq - > q - > disk - > seq_zones_wlock )
2017-12-21 15:43:38 +09:00
return false ;
2022-09-29 09:47:44 +02:00
if ( bdev_op_is_zoned_write ( rq - > q - > disk - > part0 , req_op ( rq ) ) )
2017-12-21 15:43:38 +09:00
return blk_rq_zone_is_seq ( rq ) ;
2022-09-29 09:47:44 +02:00
return false ;
2017-12-21 15:43:38 +09:00
}
EXPORT_SYMBOL_GPL ( blk_req_needs_zone_write_lock ) ;
2020-05-12 17:55:48 +09:00
bool blk_req_zone_write_trylock ( struct request * rq )
{
unsigned int zno = blk_rq_zone_no ( rq ) ;
2022-07-06 09:03:50 +02:00
if ( test_and_set_bit ( zno , rq - > q - > disk - > seq_zones_wlock ) )
2020-05-12 17:55:48 +09:00
return false ;
WARN_ON_ONCE ( rq - > rq_flags & RQF_ZONE_WRITE_LOCKED ) ;
rq - > rq_flags | = RQF_ZONE_WRITE_LOCKED ;
return true ;
}
EXPORT_SYMBOL_GPL ( blk_req_zone_write_trylock ) ;
2017-12-21 15:43:38 +09:00
void __blk_req_zone_write_lock ( struct request * rq )
{
if ( WARN_ON_ONCE ( test_and_set_bit ( blk_rq_zone_no ( rq ) ,
2022-07-06 09:03:50 +02:00
rq - > q - > disk - > seq_zones_wlock ) ) )
2017-12-21 15:43:38 +09:00
return ;
WARN_ON_ONCE ( rq - > rq_flags & RQF_ZONE_WRITE_LOCKED ) ;
rq - > rq_flags | = RQF_ZONE_WRITE_LOCKED ;
}
EXPORT_SYMBOL_GPL ( __blk_req_zone_write_lock ) ;
void __blk_req_zone_write_unlock ( struct request * rq )
{
rq - > rq_flags & = ~ RQF_ZONE_WRITE_LOCKED ;
2022-07-06 09:03:50 +02:00
if ( rq - > q - > disk - > seq_zones_wlock )
2017-12-21 15:43:38 +09:00
WARN_ON_ONCE ( ! test_and_clear_bit ( blk_rq_zone_no ( rq ) ,
2022-07-06 09:03:50 +02:00
rq - > q - > disk - > seq_zones_wlock ) ) ;
2017-12-21 15:43:38 +09:00
}
EXPORT_SYMBOL_GPL ( __blk_req_zone_write_unlock ) ;
2018-10-12 19:08:43 +09:00
/**
2022-07-06 09:03:45 +02:00
* bdev_nr_zones - Get number of zones
* @ bdev : Target device
2018-10-12 19:08:43 +09:00
*
2019-12-03 10:39:04 +01:00
* Return the total number of zones of a zoned block device . For a block
* device without zone capabilities , the number of zones is always 0.
2018-10-12 19:08:43 +09:00
*/
2022-07-06 09:03:45 +02:00
unsigned int bdev_nr_zones ( struct block_device * bdev )
2018-10-12 19:08:43 +09:00
{
2022-07-06 09:03:45 +02:00
sector_t zone_sectors = bdev_zone_sectors ( bdev ) ;
2018-10-12 19:08:43 +09:00
2022-07-06 09:03:45 +02:00
if ( ! bdev_is_zoned ( bdev ) )
2018-10-12 19:08:43 +09:00
return 0 ;
2022-07-06 09:03:45 +02:00
return ( bdev_nr_sectors ( bdev ) + zone_sectors - 1 ) > >
ilog2 ( zone_sectors ) ;
2018-10-12 19:08:43 +09:00
}
2022-07-06 09:03:45 +02:00
EXPORT_SYMBOL_GPL ( bdev_nr_zones ) ;
2018-10-12 19:08:43 +09:00
2016-10-18 15:40:33 +09:00
/**
* blkdev_report_zones - Get zones information
* @ bdev : Target block device
* @ sector : Sector from which to report zones
2019-11-11 11:39:30 +09:00
* @ nr_zones : Maximum number of zones to report
* @ cb : Callback function called for each reported zone
* @ data : Private data for the callback
2016-10-18 15:40:33 +09:00
*
* Description :
2019-11-11 11:39:30 +09:00
* Get zone information starting from the zone containing @ sector for at most
* @ nr_zones , and call @ cb for each zone reported by the device .
* To report all zones in a device starting from @ sector , the BLK_ALL_ZONES
* constant can be passed to @ nr_zones .
* Returns the number of zones reported by the device , or a negative errno
* value in case of failure .
*
* Note : The caller must use memalloc_noXX_save / restore ( ) calls to control
* memory allocations done within this function .
2016-10-18 15:40:33 +09:00
*/
2018-10-12 19:08:49 +09:00
int blkdev_report_zones ( struct block_device * bdev , sector_t sector ,
2019-11-11 11:39:30 +09:00
unsigned int nr_zones , report_zones_cb cb , void * data )
2016-10-18 15:40:33 +09:00
{
2019-11-11 11:39:24 +09:00
struct gendisk * disk = bdev - > bd_disk ;
2019-11-11 11:39:25 +09:00
sector_t capacity = get_capacity ( disk ) ;
2016-10-18 15:40:33 +09:00
2022-07-06 09:03:37 +02:00
if ( ! bdev_is_zoned ( bdev ) | | WARN_ON_ONCE ( ! disk - > fops - > report_zones ) )
2018-10-12 19:08:49 +09:00
return - EOPNOTSUPP ;
2016-10-18 15:40:33 +09:00
2019-11-11 11:39:30 +09:00
if ( ! nr_zones | | sector > = capacity )
2016-10-18 15:40:33 +09:00
return 0 ;
2019-11-11 11:39:30 +09:00
return disk - > fops - > report_zones ( disk , sector , nr_zones , cb , data ) ;
2016-10-18 15:40:33 +09:00
}
EXPORT_SYMBOL_GPL ( blkdev_report_zones ) ;
2021-05-26 06:24:51 +09:00
static inline unsigned long * blk_alloc_zone_bitmap ( int node ,
unsigned int nr_zones )
2019-08-01 10:26:36 -07:00
{
2021-05-26 06:24:51 +09:00
return kcalloc_node ( BITS_TO_LONGS ( nr_zones ) , sizeof ( unsigned long ) ,
GFP_NOIO , node ) ;
}
2019-08-01 10:26:36 -07:00
2021-05-26 06:24:51 +09:00
static int blk_zone_need_reset_cb ( struct blk_zone * zone , unsigned int idx ,
void * data )
{
2019-08-01 10:26:36 -07:00
/*
2021-05-26 06:24:51 +09:00
* For an all - zones reset , ignore conventional , empty , read - only
* and offline zones .
2019-08-01 10:26:36 -07:00
*/
2021-05-26 06:24:51 +09:00
switch ( zone - > cond ) {
case BLK_ZONE_COND_NOT_WP :
case BLK_ZONE_COND_EMPTY :
case BLK_ZONE_COND_READONLY :
case BLK_ZONE_COND_OFFLINE :
return 0 ;
default :
set_bit ( idx , ( unsigned long * ) data ) ;
return 0 ;
}
}
static int blkdev_zone_reset_all_emulated ( struct block_device * bdev ,
gfp_t gfp_mask )
{
2022-07-06 09:03:50 +02:00
struct gendisk * disk = bdev - > bd_disk ;
2022-07-06 09:03:46 +02:00
sector_t capacity = bdev_nr_sectors ( bdev ) ;
sector_t zone_sectors = bdev_zone_sectors ( bdev ) ;
2021-05-26 06:24:51 +09:00
unsigned long * need_reset ;
struct bio * bio = NULL ;
sector_t sector = 0 ;
int ret ;
2022-07-06 09:03:50 +02:00
need_reset = blk_alloc_zone_bitmap ( disk - > queue - > node , disk - > nr_zones ) ;
2021-05-26 06:24:51 +09:00
if ( ! need_reset )
return - ENOMEM ;
2022-07-06 09:03:50 +02:00
ret = disk - > fops - > report_zones ( disk , 0 , disk - > nr_zones ,
blk_zone_need_reset_cb , need_reset ) ;
2021-05-26 06:24:51 +09:00
if ( ret < 0 )
goto out_free_need_reset ;
ret = 0 ;
while ( sector < capacity ) {
2022-07-06 09:03:50 +02:00
if ( ! test_bit ( disk_zone_no ( disk , sector ) , need_reset ) ) {
2021-05-26 06:24:51 +09:00
sector + = zone_sectors ;
continue ;
}
2022-01-24 10:11:02 +01:00
bio = blk_next_bio ( bio , bdev , 0 , REQ_OP_ZONE_RESET | REQ_SYNC ,
gfp_mask ) ;
2021-05-26 06:24:51 +09:00
bio - > bi_iter . bi_sector = sector ;
sector + = zone_sectors ;
/* This may take a while, so be nice to others */
cond_resched ( ) ;
}
if ( bio ) {
ret = submit_bio_wait ( bio ) ;
bio_put ( bio ) ;
}
out_free_need_reset :
kfree ( need_reset ) ;
return ret ;
}
static int blkdev_zone_reset_all ( struct block_device * bdev , gfp_t gfp_mask )
{
struct bio bio ;
2022-01-24 10:11:06 +01:00
bio_init ( & bio , bdev , NULL , 0 , REQ_OP_ZONE_RESET_ALL | REQ_SYNC ) ;
2021-05-26 06:24:51 +09:00
return submit_bio_wait ( & bio ) ;
2019-08-01 10:26:36 -07:00
}
2016-10-18 15:40:33 +09:00
/**
2019-10-27 23:05:45 +09:00
* blkdev_zone_mgmt - Execute a zone management operation on a range of zones
2016-10-18 15:40:33 +09:00
* @ bdev : Target block device
2019-10-27 23:05:45 +09:00
* @ op : Operation to be performed on the zones
* @ sector : Start sector of the first zone to operate on
* @ nr_sectors : Number of sectors , should be at least the length of one zone and
* must be zone size aligned .
2016-10-18 15:40:33 +09:00
* @ gfp_mask : Memory allocation flags ( for bio_alloc )
*
* Description :
2019-10-27 23:05:45 +09:00
* Perform the specified operation on the range of zones specified by
2016-10-18 15:40:33 +09:00
* @ sector . . @ sector + @ nr_sectors . Specifying the entire disk sector range
* is valid , but the specified range should not contain conventional zones .
2019-10-27 23:05:45 +09:00
* The operation to execute on each zone can be a zone reset , open , close
* or finish request .
2016-10-18 15:40:33 +09:00
*/
2022-07-14 11:06:27 -07:00
int blkdev_zone_mgmt ( struct block_device * bdev , enum req_op op ,
sector_t sector , sector_t nr_sectors , gfp_t gfp_mask )
2016-10-18 15:40:33 +09:00
{
struct request_queue * q = bdev_get_queue ( bdev ) ;
2022-07-06 09:03:46 +02:00
sector_t zone_sectors = bdev_zone_sectors ( bdev ) ;
sector_t capacity = bdev_nr_sectors ( bdev ) ;
2016-10-18 15:40:33 +09:00
sector_t end_sector = sector + nr_sectors ;
2018-10-12 19:08:47 +09:00
struct bio * bio = NULL ;
2021-05-26 06:24:51 +09:00
int ret = 0 ;
2016-10-18 15:40:33 +09:00
2022-07-06 09:03:37 +02:00
if ( ! bdev_is_zoned ( bdev ) )
2016-10-18 15:40:33 +09:00
return - EOPNOTSUPP ;
2018-10-12 19:08:47 +09:00
if ( bdev_read_only ( bdev ) )
return - EPERM ;
2019-10-27 23:05:45 +09:00
if ( ! op_is_zone_mgmt ( op ) )
return - EOPNOTSUPP ;
2020-02-12 20:40:27 +03:00
if ( end_sector < = sector | | end_sector > capacity )
2016-10-18 15:40:33 +09:00
/* Out of range */
return - EINVAL ;
/* Check alignment (handle eventual smaller last zone) */
if ( sector & ( zone_sectors - 1 ) )
return - EINVAL ;
2019-11-11 11:39:25 +09:00
if ( ( nr_sectors & ( zone_sectors - 1 ) ) & & end_sector ! = capacity )
2016-10-18 15:40:33 +09:00
return - EINVAL ;
2021-05-26 06:24:51 +09:00
/*
* In the case of a zone reset operation over all zones ,
* REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
* command . For other devices , we emulate this command behavior by
* identifying the zones needing a reset .
*/
if ( op = = REQ_OP_ZONE_RESET & & sector = = 0 & & nr_sectors = = capacity ) {
if ( ! blk_queue_zone_resetall ( q ) )
return blkdev_zone_reset_all_emulated ( bdev , gfp_mask ) ;
return blkdev_zone_reset_all ( bdev , gfp_mask ) ;
}
2016-10-18 15:40:33 +09:00
while ( sector < end_sector ) {
2022-01-24 10:11:02 +01:00
bio = blk_next_bio ( bio , bdev , 0 , op | REQ_SYNC , gfp_mask ) ;
2019-10-27 23:05:43 +09:00
bio - > bi_iter . bi_sector = sector ;
2016-10-18 15:40:33 +09:00
sector + = zone_sectors ;
/* This may take a while, so be nice to others */
cond_resched ( ) ;
}
2018-10-12 19:08:47 +09:00
ret = submit_bio_wait ( bio ) ;
bio_put ( bio ) ;
return ret ;
2016-10-18 15:40:33 +09:00
}
2019-10-27 23:05:45 +09:00
EXPORT_SYMBOL_GPL ( blkdev_zone_mgmt ) ;
2016-10-18 15:40:35 +09:00
2019-11-11 11:39:30 +09:00
struct zone_report_args {
struct blk_zone __user * zones ;
} ;
static int blkdev_copy_zone_to_user ( struct blk_zone * zone , unsigned int idx ,
void * data )
{
struct zone_report_args * args = data ;
if ( copy_to_user ( & args - > zones [ idx ] , zone , sizeof ( struct blk_zone ) ) )
return - EFAULT ;
return 0 ;
}
2018-03-08 15:28:50 -08:00
/*
2016-10-18 15:40:35 +09:00
* BLKREPORTZONE ioctl processing .
* Called from blkdev_ioctl .
*/
int blkdev_report_zones_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
void __user * argp = ( void __user * ) arg ;
2019-11-11 11:39:30 +09:00
struct zone_report_args args ;
2016-10-18 15:40:35 +09:00
struct request_queue * q ;
struct blk_zone_report rep ;
int ret ;
if ( ! argp )
return - EINVAL ;
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
2022-07-06 09:03:37 +02:00
if ( ! bdev_is_zoned ( bdev ) )
2016-10-18 15:40:35 +09:00
return - ENOTTY ;
if ( copy_from_user ( & rep , argp , sizeof ( struct blk_zone_report ) ) )
return - EFAULT ;
if ( ! rep . nr_zones )
return - EINVAL ;
2019-11-11 11:39:30 +09:00
args . zones = argp + sizeof ( struct blk_zone_report ) ;
ret = blkdev_report_zones ( bdev , rep . sector , rep . nr_zones ,
blkdev_copy_zone_to_user , & args ) ;
if ( ret < 0 )
return ret ;
2016-10-18 15:40:35 +09:00
2019-11-11 11:39:30 +09:00
rep . nr_zones = ret ;
2020-06-29 12:06:37 -07:00
rep . flags = BLK_ZONE_REP_CAPACITY ;
2019-11-11 11:39:30 +09:00
if ( copy_to_user ( argp , & rep , sizeof ( struct blk_zone_report ) ) )
return - EFAULT ;
return 0 ;
2016-10-18 15:40:35 +09:00
}
2021-03-11 16:25:46 +09:00
static int blkdev_truncate_zone_range ( struct block_device * bdev , fmode_t mode ,
const struct blk_zone_range * zrange )
{
loff_t start , end ;
if ( zrange - > sector + zrange - > nr_sectors < = zrange - > sector | |
zrange - > sector + zrange - > nr_sectors > get_capacity ( bdev - > bd_disk ) )
/* Out of range */
return - EINVAL ;
start = zrange - > sector < < SECTOR_SHIFT ;
end = ( ( zrange - > sector + zrange - > nr_sectors ) < < SECTOR_SHIFT ) - 1 ;
return truncate_bdev_range ( bdev , mode , start , end ) ;
}
2018-03-08 15:28:50 -08:00
/*
2019-10-27 23:05:46 +09:00
* BLKRESETZONE , BLKOPENZONE , BLKCLOSEZONE and BLKFINISHZONE ioctl processing .
2016-10-18 15:40:35 +09:00
* Called from blkdev_ioctl .
*/
2019-10-27 23:05:46 +09:00
int blkdev_zone_mgmt_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
2016-10-18 15:40:35 +09:00
{
void __user * argp = ( void __user * ) arg ;
struct request_queue * q ;
struct blk_zone_range zrange ;
2022-07-14 11:06:27 -07:00
enum req_op op ;
2021-03-11 16:25:46 +09:00
int ret ;
2016-10-18 15:40:35 +09:00
if ( ! argp )
return - EINVAL ;
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
2022-07-06 09:03:37 +02:00
if ( ! bdev_is_zoned ( bdev ) )
2016-10-18 15:40:35 +09:00
return - ENOTTY ;
if ( ! ( mode & FMODE_WRITE ) )
return - EBADF ;
if ( copy_from_user ( & zrange , argp , sizeof ( struct blk_zone_range ) ) )
return - EFAULT ;
2019-10-27 23:05:46 +09:00
switch ( cmd ) {
case BLKRESETZONE :
op = REQ_OP_ZONE_RESET ;
2021-03-11 16:25:46 +09:00
/* Invalidate the page cache, including dirty pages. */
2021-11-11 17:52:38 +09:00
filemap_invalidate_lock ( bdev - > bd_inode - > i_mapping ) ;
2021-03-11 16:25:46 +09:00
ret = blkdev_truncate_zone_range ( bdev , mode , & zrange ) ;
if ( ret )
2021-11-11 17:52:38 +09:00
goto fail ;
2019-10-27 23:05:46 +09:00
break ;
case BLKOPENZONE :
op = REQ_OP_ZONE_OPEN ;
break ;
case BLKCLOSEZONE :
op = REQ_OP_ZONE_CLOSE ;
break ;
case BLKFINISHZONE :
op = REQ_OP_ZONE_FINISH ;
break ;
default :
return - ENOTTY ;
}
2021-03-11 16:25:46 +09:00
ret = blkdev_zone_mgmt ( bdev , op , zrange . sector , zrange . nr_sectors ,
GFP_KERNEL ) ;
2021-11-11 17:52:38 +09:00
fail :
if ( cmd = = BLKRESETZONE )
filemap_invalidate_unlock ( bdev - > bd_inode - > i_mapping ) ;
2021-03-11 16:25:46 +09:00
return ret ;
2016-10-18 15:40:35 +09:00
}
2018-10-12 19:08:50 +09:00
2022-07-06 09:03:42 +02:00
void disk_free_zone_bitmaps ( struct gendisk * disk )
2018-10-12 19:08:50 +09:00
{
2022-07-06 09:03:50 +02:00
kfree ( disk - > conv_zones_bitmap ) ;
disk - > conv_zones_bitmap = NULL ;
kfree ( disk - > seq_zones_wlock ) ;
disk - > seq_zones_wlock = NULL ;
2018-10-12 19:08:50 +09:00
}
2019-11-11 11:39:30 +09:00
struct blk_revalidate_zone_args {
struct gendisk * disk ;
2019-12-03 10:39:05 +01:00
unsigned long * conv_zones_bitmap ;
2019-11-11 11:39:30 +09:00
unsigned long * seq_zones_wlock ;
2019-12-03 10:39:06 +01:00
unsigned int nr_zones ;
2019-12-03 10:39:08 +01:00
sector_t zone_sectors ;
2019-11-11 11:39:30 +09:00
sector_t sector ;
} ;
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
/*
* Helper function to check the validity of zones of a zoned block device .
*/
2019-11-11 11:39:30 +09:00
static int blk_revalidate_zone_cb ( struct blk_zone * zone , unsigned int idx ,
void * data )
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
{
2019-11-11 11:39:30 +09:00
struct blk_revalidate_zone_args * args = data ;
struct gendisk * disk = args - > disk ;
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
struct request_queue * q = disk - > queue ;
sector_t capacity = get_capacity ( disk ) ;
/*
* All zones must have the same size , with the exception on an eventual
* smaller last zone .
*/
2019-12-03 10:39:08 +01:00
if ( zone - > start = = 0 ) {
if ( zone - > len = = 0 | | ! is_power_of_2 ( zone - > len ) ) {
pr_warn ( " %s: Invalid zoned device with non power of two zone size (%llu) \n " ,
disk - > disk_name , zone - > len ) ;
return - ENODEV ;
}
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
2019-12-03 10:39:08 +01:00
args - > zone_sectors = zone - > len ;
args - > nr_zones = ( capacity + zone - > len - 1 ) > > ilog2 ( zone - > len ) ;
} else if ( zone - > start + args - > zone_sectors < capacity ) {
if ( zone - > len ! = args - > zone_sectors ) {
pr_warn ( " %s: Invalid zoned device with non constant zone size \n " ,
disk - > disk_name ) ;
return - ENODEV ;
}
} else {
if ( zone - > len > args - > zone_sectors ) {
pr_warn ( " %s: Invalid zoned device with larger last zone size \n " ,
disk - > disk_name ) ;
return - ENODEV ;
}
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
}
/* Check for holes in the zone report */
2019-11-11 11:39:30 +09:00
if ( zone - > start ! = args - > sector ) {
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
pr_warn ( " %s: Zone gap at sectors %llu..%llu \n " ,
2019-11-11 11:39:30 +09:00
disk - > disk_name , args - > sector , zone - > start ) ;
return - ENODEV ;
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
}
/* Check zone type */
switch ( zone - > type ) {
case BLK_ZONE_TYPE_CONVENTIONAL :
2019-12-03 10:39:06 +01:00
if ( ! args - > conv_zones_bitmap ) {
args - > conv_zones_bitmap =
blk_alloc_zone_bitmap ( q - > node , args - > nr_zones ) ;
if ( ! args - > conv_zones_bitmap )
return - ENOMEM ;
}
set_bit ( idx , args - > conv_zones_bitmap ) ;
break ;
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
case BLK_ZONE_TYPE_SEQWRITE_REQ :
case BLK_ZONE_TYPE_SEQWRITE_PREF :
2019-12-03 10:39:06 +01:00
if ( ! args - > seq_zones_wlock ) {
args - > seq_zones_wlock =
blk_alloc_zone_bitmap ( q - > node , args - > nr_zones ) ;
if ( ! args - > seq_zones_wlock )
return - ENOMEM ;
}
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
break ;
default :
pr_warn ( " %s: Invalid zone type 0x%x at sectors %llu \n " ,
disk - > disk_name , ( int ) zone - > type , zone - > start ) ;
2019-11-11 11:39:30 +09:00
return - ENODEV ;
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 11:39:22 +09:00
}
2019-11-11 11:39:30 +09:00
args - > sector + = zone - > len ;
return 0 ;
}
2018-10-12 19:08:50 +09:00
/**
* blk_revalidate_disk_zones - ( re ) allocate and initialize zone bitmaps
* @ disk : Target disk
2020-05-12 17:55:49 +09:00
* @ update_driver_data : Callback to update driver data on the frozen disk
2018-10-12 19:08:50 +09:00
*
* Helper function for low - level device drivers to ( re ) allocate and initialize
* a disk request queue zone bitmaps . This functions should normally be called
2019-12-03 10:39:07 +01:00
* within the disk - > revalidate method for blk - mq based drivers . For BIO based
* drivers only q - > nr_zones needs to be updated so that the sysfs exposed value
* is correct .
2020-05-12 17:55:49 +09:00
* If the @ update_driver_data callback function is not NULL , the callback is
* executed with the device request queue frozen after all zones have been
* checked .
2018-10-12 19:08:50 +09:00
*/
2020-05-12 17:55:49 +09:00
int blk_revalidate_disk_zones ( struct gendisk * disk ,
void ( * update_driver_data ) ( struct gendisk * disk ) )
2018-10-12 19:08:50 +09:00
{
struct request_queue * q = disk - > queue ;
2019-12-03 10:39:06 +01:00
struct blk_revalidate_zone_args args = {
. disk = disk ,
} ;
2019-12-03 10:39:08 +01:00
unsigned int noio_flag ;
int ret ;
2018-10-12 19:08:50 +09:00
2019-11-11 11:39:23 +09:00
if ( WARN_ON_ONCE ( ! blk_queue_is_zoned ( q ) ) )
return - EIO ;
2019-12-03 10:39:07 +01:00
if ( WARN_ON_ONCE ( ! queue_is_mq ( q ) ) )
return - EIO ;
2018-10-12 19:08:50 +09:00
2020-07-30 20:25:17 +09:00
if ( ! get_capacity ( disk ) )
return - EIO ;
2019-12-03 10:39:06 +01:00
/*
2019-12-03 10:39:08 +01:00
* Ensure that all memory allocations in this context are done as if
* GFP_NOIO was specified .
2019-12-03 10:39:06 +01:00
*/
2019-12-03 10:39:08 +01:00
noio_flag = memalloc_noio_save ( ) ;
ret = disk - > fops - > report_zones ( disk , 0 , UINT_MAX ,
blk_revalidate_zone_cb , & args ) ;
2020-11-11 16:36:06 +09:00
if ( ! ret ) {
pr_warn ( " %s: No zones reported \n " , disk - > disk_name ) ;
ret = - ENODEV ;
}
2019-12-03 10:39:08 +01:00
memalloc_noio_restore ( noio_flag ) ;
2018-10-12 19:08:50 +09:00
2020-11-11 16:36:06 +09:00
/*
* If zones where reported , make sure that the entire disk capacity
* has been checked .
*/
if ( ret > 0 & & args . sector ! = get_capacity ( disk ) ) {
pr_warn ( " %s: Missing zones from sector %llu \n " ,
disk - > disk_name , args . sector ) ;
ret = - ENODEV ;
}
2018-10-12 19:08:50 +09:00
/*
2019-12-03 10:39:08 +01:00
* Install the new bitmaps and update nr_zones only once the queue is
* stopped and all I / Os are completed ( i . e . a scheduler is not
* referencing the bitmaps ) .
2018-10-12 19:08:50 +09:00
*/
blk_mq_freeze_queue ( q ) ;
2020-11-11 16:36:06 +09:00
if ( ret > 0 ) {
2019-12-03 10:39:08 +01:00
blk_queue_chunk_sectors ( q , args . zone_sectors ) ;
2022-07-06 09:03:50 +02:00
disk - > nr_zones = args . nr_zones ;
swap ( disk - > seq_zones_wlock , args . seq_zones_wlock ) ;
swap ( disk - > conv_zones_bitmap , args . conv_zones_bitmap ) ;
2020-05-12 17:55:49 +09:00
if ( update_driver_data )
update_driver_data ( disk ) ;
2019-11-11 11:39:30 +09:00
ret = 0 ;
} else {
2018-10-12 19:08:50 +09:00
pr_warn ( " %s: failed to revalidate zones \n " , disk - > disk_name ) ;
2022-07-06 09:03:42 +02:00
disk_free_zone_bitmaps ( disk ) ;
2018-10-12 19:08:50 +09:00
}
2019-11-11 11:39:30 +09:00
blk_mq_unfreeze_queue ( q ) ;
2018-10-12 19:08:50 +09:00
2019-11-11 11:39:30 +09:00
kfree ( args . seq_zones_wlock ) ;
2019-12-03 10:39:05 +01:00
kfree ( args . conv_zones_bitmap ) ;
2018-10-12 19:08:50 +09:00
return ret ;
}
EXPORT_SYMBOL_GPL ( blk_revalidate_disk_zones ) ;
2021-01-28 13:47:32 +09:00
2022-07-06 09:03:41 +02:00
void disk_clear_zone_settings ( struct gendisk * disk )
2021-01-28 13:47:32 +09:00
{
2022-07-06 09:03:41 +02:00
struct request_queue * q = disk - > queue ;
2021-01-28 13:47:32 +09:00
blk_mq_freeze_queue ( q ) ;
2022-07-06 09:03:42 +02:00
disk_free_zone_bitmaps ( disk ) ;
2021-01-28 13:47:32 +09:00
blk_queue_flag_clear ( QUEUE_FLAG_ZONE_RESETALL , q ) ;
q - > required_elevator_features & = ~ ELEVATOR_F_ZBD_SEQ_WRITE ;
2022-07-06 09:03:50 +02:00
disk - > nr_zones = 0 ;
disk - > max_open_zones = 0 ;
disk - > max_active_zones = 0 ;
2021-01-28 13:47:32 +09:00
q - > limits . chunk_sectors = 0 ;
q - > limits . zone_write_granularity = 0 ;
q - > limits . max_zone_append_sectors = 0 ;
blk_mq_unfreeze_queue ( q ) ;
}