2019-04-30 21:42:43 +03:00
// SPDX-License-Identifier: GPL-2.0
2016-10-18 09:40:33 +03:00
/*
* Zoned block device handling
*
* Copyright ( c ) 2015 , Hannes Reinecke
* Copyright ( c ) 2015 , SUSE Linux GmbH
*
* Copyright ( c ) 2016 , Damien Le Moal
* Copyright ( c ) 2016 , Western Digital
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/rbtree.h>
# include <linux/blkdev.h>
2018-10-12 13:08:50 +03:00
# include <linux/blk-mq.h>
2019-07-01 08:09:18 +03:00
# include <linux/mm.h>
# include <linux/vmalloc.h>
2019-07-01 08:09:16 +03:00
# include <linux/sched/mm.h>
2016-10-18 09:40:33 +03:00
2018-10-12 13:08:47 +03:00
# include "blk.h"
2016-10-18 09:40:33 +03:00
static inline sector_t blk_zone_start ( struct request_queue * q ,
sector_t sector )
{
2017-01-12 17:58:32 +03:00
sector_t zone_mask = blk_queue_zone_sectors ( q ) - 1 ;
2016-10-18 09:40:33 +03:00
return sector & ~ zone_mask ;
}
2017-12-21 09:43:38 +03:00
/*
* Return true if a request is a write requests that needs zone write locking .
*/
bool blk_req_needs_zone_write_lock ( struct request * rq )
{
if ( ! rq - > q - > seq_zones_wlock )
return false ;
if ( blk_rq_is_passthrough ( rq ) )
return false ;
switch ( req_op ( rq ) ) {
case REQ_OP_WRITE_ZEROES :
case REQ_OP_WRITE_SAME :
case REQ_OP_WRITE :
return blk_rq_zone_is_seq ( rq ) ;
default :
return false ;
}
}
EXPORT_SYMBOL_GPL ( blk_req_needs_zone_write_lock ) ;
void __blk_req_zone_write_lock ( struct request * rq )
{
if ( WARN_ON_ONCE ( test_and_set_bit ( blk_rq_zone_no ( rq ) ,
rq - > q - > seq_zones_wlock ) ) )
return ;
WARN_ON_ONCE ( rq - > rq_flags & RQF_ZONE_WRITE_LOCKED ) ;
rq - > rq_flags | = RQF_ZONE_WRITE_LOCKED ;
}
EXPORT_SYMBOL_GPL ( __blk_req_zone_write_lock ) ;
void __blk_req_zone_write_unlock ( struct request * rq )
{
rq - > rq_flags & = ~ RQF_ZONE_WRITE_LOCKED ;
if ( rq - > q - > seq_zones_wlock )
WARN_ON_ONCE ( ! test_and_clear_bit ( blk_rq_zone_no ( rq ) ,
rq - > q - > seq_zones_wlock ) ) ;
}
EXPORT_SYMBOL_GPL ( __blk_req_zone_write_unlock ) ;
2018-10-12 13:08:43 +03:00
static inline unsigned int __blkdev_nr_zones ( struct request_queue * q ,
sector_t nr_sectors )
{
2019-07-10 07:53:10 +03:00
sector_t zone_sectors = blk_queue_zone_sectors ( q ) ;
2018-10-12 13:08:43 +03:00
return ( nr_sectors + zone_sectors - 1 ) > > ilog2 ( zone_sectors ) ;
}
/**
* blkdev_nr_zones - Get number of zones
* @ bdev : Target block device
*
* Description :
* Return the total number of zones of a zoned block device .
* For a regular block device , the number of zones is always 0.
*/
unsigned int blkdev_nr_zones ( struct block_device * bdev )
{
struct request_queue * q = bdev_get_queue ( bdev ) ;
if ( ! blk_queue_is_zoned ( q ) )
return 0 ;
return __blkdev_nr_zones ( q , bdev - > bd_part - > nr_sects ) ;
}
EXPORT_SYMBOL_GPL ( blkdev_nr_zones ) ;
2016-10-18 09:40:33 +03:00
/*
2018-10-12 13:08:49 +03:00
* Check that a zone report belongs to this partition , and if yes , fix its start
* sector and write pointer and return true . Return false otherwise .
2016-10-18 09:40:33 +03:00
*/
2018-10-12 13:08:49 +03:00
static bool blkdev_report_zone ( struct block_device * bdev , struct blk_zone * rep )
2016-10-18 09:40:33 +03:00
{
sector_t offset = get_start_sect ( bdev ) ;
if ( rep - > start < offset )
return false ;
rep - > start - = offset ;
if ( rep - > start + rep - > len > bdev - > bd_part - > nr_sects )
return false ;
if ( rep - > type = = BLK_ZONE_TYPE_CONVENTIONAL )
rep - > wp = rep - > start + rep - > len ;
else
rep - > wp - = offset ;
return true ;
}
2018-10-12 13:08:49 +03:00
static int blk_report_zones ( struct gendisk * disk , sector_t sector ,
2019-07-01 08:09:16 +03:00
struct blk_zone * zones , unsigned int * nr_zones )
2018-10-12 13:08:49 +03:00
{
struct request_queue * q = disk - > queue ;
unsigned int z = 0 , n , nrz = * nr_zones ;
sector_t capacity = get_capacity ( disk ) ;
int ret ;
while ( z < nrz & & sector < capacity ) {
n = nrz - z ;
2019-07-01 08:09:16 +03:00
ret = disk - > fops - > report_zones ( disk , sector , & zones [ z ] , & n ) ;
2018-10-12 13:08:49 +03:00
if ( ret )
return ret ;
if ( ! n )
break ;
sector + = blk_queue_zone_sectors ( q ) * n ;
z + = n ;
}
WARN_ON ( z > * nr_zones ) ;
* nr_zones = z ;
return 0 ;
}
2016-10-18 09:40:33 +03:00
/**
* blkdev_report_zones - Get zones information
* @ bdev : Target block device
* @ sector : Sector from which to report zones
* @ zones : Array of zone structures where to return the zones information
* @ nr_zones : Number of zone structures in the zone array
*
* Description :
* Get zone information starting from the zone containing @ sector .
* The number of zone information reported may be less than the number
* requested by @ nr_zones . The number of zones actually reported is
* returned in @ nr_zones .
2019-07-01 08:09:16 +03:00
* The caller must use memalloc_noXX_save / restore ( ) calls to control
* memory allocations done within this function ( zone array and command
* buffer allocation by the device driver ) .
2016-10-18 09:40:33 +03:00
*/
2018-10-12 13:08:49 +03:00
int blkdev_report_zones ( struct block_device * bdev , sector_t sector ,
2019-07-01 08:09:16 +03:00
struct blk_zone * zones , unsigned int * nr_zones )
2016-10-18 09:40:33 +03:00
{
struct request_queue * q = bdev_get_queue ( bdev ) ;
2018-10-12 13:08:49 +03:00
unsigned int i , nrz ;
2016-10-21 18:42:33 +03:00
int ret ;
2016-10-18 09:40:33 +03:00
if ( ! blk_queue_is_zoned ( q ) )
return - EOPNOTSUPP ;
2018-10-12 13:08:49 +03:00
/*
* A block device that advertized itself as zoned must have a
* report_zones method . If it does not have one defined , the device
* driver has a bug . So warn about that .
*/
if ( WARN_ON_ONCE ( ! bdev - > bd_disk - > fops - > report_zones ) )
return - EOPNOTSUPP ;
2016-10-18 09:40:33 +03:00
2018-10-12 13:08:49 +03:00
if ( ! * nr_zones | | sector > = bdev - > bd_part - > nr_sects ) {
2016-10-18 09:40:33 +03:00
* nr_zones = 0 ;
return 0 ;
}
2018-10-12 13:08:49 +03:00
nrz = min ( * nr_zones ,
__blkdev_nr_zones ( q , bdev - > bd_part - > nr_sects - sector ) ) ;
ret = blk_report_zones ( bdev - > bd_disk , get_start_sect ( bdev ) + sector ,
2019-07-01 08:09:16 +03:00
zones , & nrz ) ;
2016-10-18 09:40:33 +03:00
if ( ret )
2018-10-12 13:08:49 +03:00
return ret ;
2016-10-18 09:40:33 +03:00
2018-10-12 13:08:49 +03:00
for ( i = 0 ; i < nrz ; i + + ) {
if ( ! blkdev_report_zone ( bdev , zones ) )
2016-10-18 09:40:33 +03:00
break ;
2018-10-12 13:08:49 +03:00
zones + + ;
2016-10-18 09:40:33 +03:00
}
2018-10-12 13:08:49 +03:00
* nr_zones = i ;
2016-10-18 09:40:33 +03:00
2018-10-12 13:08:49 +03:00
return 0 ;
2016-10-18 09:40:33 +03:00
}
EXPORT_SYMBOL_GPL ( blkdev_report_zones ) ;
2019-08-01 20:26:36 +03:00
static inline bool blkdev_allow_reset_all_zones ( struct block_device * bdev ,
2019-10-27 17:05:43 +03:00
sector_t sector ,
2019-08-01 20:26:36 +03:00
sector_t nr_sectors )
{
if ( ! blk_queue_zone_resetall ( bdev_get_queue ( bdev ) ) )
return false ;
2019-10-27 17:05:43 +03:00
if ( sector | | nr_sectors ! = part_nr_sects_read ( bdev - > bd_part ) )
2019-08-01 20:26:36 +03:00
return false ;
/*
* REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
* the entire disk , that is , if the blocks device start offset is 0 and
* its capacity is the same as the entire disk .
*/
return get_start_sect ( bdev ) = = 0 & &
part_nr_sects_read ( bdev - > bd_part ) = = get_capacity ( bdev - > bd_disk ) ;
}
2016-10-18 09:40:33 +03:00
/**
2019-10-27 17:05:45 +03:00
* blkdev_zone_mgmt - Execute a zone management operation on a range of zones
2016-10-18 09:40:33 +03:00
* @ bdev : Target block device
2019-10-27 17:05:45 +03:00
* @ op : Operation to be performed on the zones
* @ sector : Start sector of the first zone to operate on
* @ nr_sectors : Number of sectors , should be at least the length of one zone and
* must be zone size aligned .
2016-10-18 09:40:33 +03:00
* @ gfp_mask : Memory allocation flags ( for bio_alloc )
*
* Description :
2019-10-27 17:05:45 +03:00
* Perform the specified operation on the range of zones specified by
2016-10-18 09:40:33 +03:00
* @ sector . . @ sector + @ nr_sectors . Specifying the entire disk sector range
* is valid , but the specified range should not contain conventional zones .
2019-10-27 17:05:45 +03:00
* The operation to execute on each zone can be a zone reset , open , close
* or finish request .
2016-10-18 09:40:33 +03:00
*/
2019-10-27 17:05:45 +03:00
int blkdev_zone_mgmt ( struct block_device * bdev , enum req_opf op ,
sector_t sector , sector_t nr_sectors ,
gfp_t gfp_mask )
2016-10-18 09:40:33 +03:00
{
struct request_queue * q = bdev_get_queue ( bdev ) ;
2019-10-27 17:05:45 +03:00
sector_t zone_sectors = blk_queue_zone_sectors ( q ) ;
2016-10-18 09:40:33 +03:00
sector_t end_sector = sector + nr_sectors ;
2018-10-12 13:08:47 +03:00
struct bio * bio = NULL ;
2016-10-18 09:40:33 +03:00
int ret ;
if ( ! blk_queue_is_zoned ( q ) )
return - EOPNOTSUPP ;
2018-10-12 13:08:47 +03:00
if ( bdev_read_only ( bdev ) )
return - EPERM ;
2019-10-27 17:05:45 +03:00
if ( ! op_is_zone_mgmt ( op ) )
return - EOPNOTSUPP ;
2018-10-12 13:08:47 +03:00
if ( ! nr_sectors | | end_sector > bdev - > bd_part - > nr_sects )
2016-10-18 09:40:33 +03:00
/* Out of range */
return - EINVAL ;
/* Check alignment (handle eventual smaller last zone) */
if ( sector & ( zone_sectors - 1 ) )
return - EINVAL ;
if ( ( nr_sectors & ( zone_sectors - 1 ) ) & &
end_sector ! = bdev - > bd_part - > nr_sects )
return - EINVAL ;
while ( sector < end_sector ) {
2018-10-12 13:08:47 +03:00
bio = blk_next_bio ( bio , 0 , gfp_mask ) ;
2017-08-23 20:10:32 +03:00
bio_set_dev ( bio , bdev ) ;
2016-10-18 09:40:33 +03:00
2019-10-27 17:05:43 +03:00
/*
* Special case for the zone reset operation that reset all
* zones , this is useful for applications like mkfs .
*/
2019-10-27 17:05:45 +03:00
if ( op = = REQ_OP_ZONE_RESET & &
blkdev_allow_reset_all_zones ( bdev , sector , nr_sectors ) ) {
2019-10-27 17:05:43 +03:00
bio - > bi_opf = REQ_OP_ZONE_RESET_ALL ;
break ;
}
2019-10-27 17:05:45 +03:00
bio - > bi_opf = op ;
2019-10-27 17:05:43 +03:00
bio - > bi_iter . bi_sector = sector ;
2016-10-18 09:40:33 +03:00
sector + = zone_sectors ;
/* This may take a while, so be nice to others */
cond_resched ( ) ;
}
2018-10-12 13:08:47 +03:00
ret = submit_bio_wait ( bio ) ;
bio_put ( bio ) ;
return ret ;
2016-10-18 09:40:33 +03:00
}
2019-10-27 17:05:45 +03:00
EXPORT_SYMBOL_GPL ( blkdev_zone_mgmt ) ;
2016-10-18 09:40:35 +03:00
2018-03-09 02:28:50 +03:00
/*
2016-10-18 09:40:35 +03:00
* BLKREPORTZONE ioctl processing .
* Called from blkdev_ioctl .
*/
int blkdev_report_zones_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
void __user * argp = ( void __user * ) arg ;
struct request_queue * q ;
struct blk_zone_report rep ;
struct blk_zone * zones ;
int ret ;
if ( ! argp )
return - EINVAL ;
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
if ( ! blk_queue_is_zoned ( q ) )
return - ENOTTY ;
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EACCES ;
if ( copy_from_user ( & rep , argp , sizeof ( struct blk_zone_report ) ) )
return - EFAULT ;
if ( ! rep . nr_zones )
return - EINVAL ;
2018-10-12 13:08:44 +03:00
rep . nr_zones = min ( blkdev_nr_zones ( bdev ) , rep . nr_zones ) ;
2018-05-22 18:27:22 +03:00
treewide: kvmalloc() -> kvmalloc_array()
The kvmalloc() function has a 2-factor argument form, kvmalloc_array(). This
patch replaces cases of:
kvmalloc(a * b, gfp)
with:
kvmalloc_array(a * b, gfp)
as well as handling cases of:
kvmalloc(a * b * c, gfp)
with:
kvmalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kvmalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kvmalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kvmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kvmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kvmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kvmalloc
+ kvmalloc_array
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kvmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kvmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kvmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kvmalloc(C1 * C2 * C3, ...)
|
kvmalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kvmalloc(sizeof(THING) * C2, ...)
|
kvmalloc(sizeof(TYPE) * C2, ...)
|
kvmalloc(C1 * C2 * C3, ...)
|
kvmalloc(C1 * C2, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- (E1) * E2
+ E1, E2
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 00:04:32 +03:00
zones = kvmalloc_array ( rep . nr_zones , sizeof ( struct blk_zone ) ,
GFP_KERNEL | __GFP_ZERO ) ;
2016-10-18 09:40:35 +03:00
if ( ! zones )
return - ENOMEM ;
2019-07-01 08:09:16 +03:00
ret = blkdev_report_zones ( bdev , rep . sector , zones , & rep . nr_zones ) ;
2016-10-18 09:40:35 +03:00
if ( ret )
goto out ;
if ( copy_to_user ( argp , & rep , sizeof ( struct blk_zone_report ) ) ) {
ret = - EFAULT ;
goto out ;
}
if ( rep . nr_zones ) {
if ( copy_to_user ( argp + sizeof ( struct blk_zone_report ) , zones ,
sizeof ( struct blk_zone ) * rep . nr_zones ) )
ret = - EFAULT ;
}
out :
2018-05-22 18:27:22 +03:00
kvfree ( zones ) ;
2016-10-18 09:40:35 +03:00
return ret ;
}
2018-03-09 02:28:50 +03:00
/*
2019-10-27 17:05:46 +03:00
* BLKRESETZONE , BLKOPENZONE , BLKCLOSEZONE and BLKFINISHZONE ioctl processing .
2016-10-18 09:40:35 +03:00
* Called from blkdev_ioctl .
*/
2019-10-27 17:05:46 +03:00
int blkdev_zone_mgmt_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
2016-10-18 09:40:35 +03:00
{
void __user * argp = ( void __user * ) arg ;
struct request_queue * q ;
struct blk_zone_range zrange ;
2019-10-27 17:05:46 +03:00
enum req_opf op ;
2016-10-18 09:40:35 +03:00
if ( ! argp )
return - EINVAL ;
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
if ( ! blk_queue_is_zoned ( q ) )
return - ENOTTY ;
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EACCES ;
if ( ! ( mode & FMODE_WRITE ) )
return - EBADF ;
if ( copy_from_user ( & zrange , argp , sizeof ( struct blk_zone_range ) ) )
return - EFAULT ;
2019-10-27 17:05:46 +03:00
switch ( cmd ) {
case BLKRESETZONE :
op = REQ_OP_ZONE_RESET ;
break ;
case BLKOPENZONE :
op = REQ_OP_ZONE_OPEN ;
break ;
case BLKCLOSEZONE :
op = REQ_OP_ZONE_CLOSE ;
break ;
case BLKFINISHZONE :
op = REQ_OP_ZONE_FINISH ;
break ;
default :
return - ENOTTY ;
}
return blkdev_zone_mgmt ( bdev , op , zrange . sector , zrange . nr_sectors ,
GFP_KERNEL ) ;
2016-10-18 09:40:35 +03:00
}
2018-10-12 13:08:50 +03:00
static inline unsigned long * blk_alloc_zone_bitmap ( int node ,
unsigned int nr_zones )
{
return kcalloc_node ( BITS_TO_LONGS ( nr_zones ) , sizeof ( unsigned long ) ,
GFP_NOIO , node ) ;
}
/*
* Allocate an array of struct blk_zone to get nr_zones zone information .
* The allocated array may be smaller than nr_zones .
*/
2019-07-01 08:09:18 +03:00
static struct blk_zone * blk_alloc_zones ( unsigned int * nr_zones )
2018-10-12 13:08:50 +03:00
{
2019-07-01 08:09:18 +03:00
struct blk_zone * zones ;
size_t nrz = min ( * nr_zones , BLK_ZONED_REPORT_MAX_ZONES ) ;
/*
* GFP_KERNEL here is meaningless as the caller task context has
* the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones ( )
* with memalloc_noio_save ( ) .
*/
zones = kvcalloc ( nrz , sizeof ( struct blk_zone ) , GFP_KERNEL ) ;
if ( ! zones ) {
* nr_zones = 0 ;
return NULL ;
2018-10-12 13:08:50 +03:00
}
2019-07-01 08:09:18 +03:00
* nr_zones = nrz ;
return zones ;
2018-10-12 13:08:50 +03:00
}
void blk_queue_free_zone_bitmaps ( struct request_queue * q )
{
kfree ( q - > seq_zones_bitmap ) ;
q - > seq_zones_bitmap = NULL ;
kfree ( q - > seq_zones_wlock ) ;
q - > seq_zones_wlock = NULL ;
}
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 05:39:22 +03:00
/*
* Helper function to check the validity of zones of a zoned block device .
*/
static bool blk_zone_valid ( struct gendisk * disk , struct blk_zone * zone ,
sector_t * sector )
{
struct request_queue * q = disk - > queue ;
sector_t zone_sectors = blk_queue_zone_sectors ( q ) ;
sector_t capacity = get_capacity ( disk ) ;
/*
* All zones must have the same size , with the exception on an eventual
* smaller last zone .
*/
if ( zone - > start + zone_sectors < capacity & &
zone - > len ! = zone_sectors ) {
pr_warn ( " %s: Invalid zoned device with non constant zone size \n " ,
disk - > disk_name ) ;
return false ;
}
if ( zone - > start + zone - > len > = capacity & &
zone - > len > zone_sectors ) {
pr_warn ( " %s: Invalid zoned device with larger last zone size \n " ,
disk - > disk_name ) ;
return false ;
}
/* Check for holes in the zone report */
if ( zone - > start ! = * sector ) {
pr_warn ( " %s: Zone gap at sectors %llu..%llu \n " ,
disk - > disk_name , * sector , zone - > start ) ;
return false ;
}
/* Check zone type */
switch ( zone - > type ) {
case BLK_ZONE_TYPE_CONVENTIONAL :
case BLK_ZONE_TYPE_SEQWRITE_REQ :
case BLK_ZONE_TYPE_SEQWRITE_PREF :
break ;
default :
pr_warn ( " %s: Invalid zone type 0x%x at sectors %llu \n " ,
disk - > disk_name , ( int ) zone - > type , zone - > start ) ;
return false ;
}
* sector + = zone - > len ;
return true ;
}
2018-10-12 13:08:50 +03:00
/**
* blk_revalidate_disk_zones - ( re ) allocate and initialize zone bitmaps
* @ disk : Target disk
*
* Helper function for low - level device drivers to ( re ) allocate and initialize
* a disk request queue zone bitmaps . This functions should normally be called
* within the disk - > revalidate method . For BIO based queues , no zone bitmap
* is allocated .
*/
int blk_revalidate_disk_zones ( struct gendisk * disk )
{
struct request_queue * q = disk - > queue ;
unsigned int nr_zones = __blkdev_nr_zones ( q , get_capacity ( disk ) ) ;
unsigned long * seq_zones_wlock = NULL , * seq_zones_bitmap = NULL ;
unsigned int i , rep_nr_zones = 0 , z = 0 , nrz ;
struct blk_zone * zones = NULL ;
2019-07-01 08:09:16 +03:00
unsigned int noio_flag ;
2018-10-12 13:08:50 +03:00
sector_t sector = 0 ;
int ret = 0 ;
/*
* BIO based queues do not use a scheduler so only q - > nr_zones
* needs to be updated so that the sysfs exposed value is correct .
*/
2018-11-15 22:22:51 +03:00
if ( ! queue_is_mq ( q ) ) {
2018-10-12 13:08:50 +03:00
q - > nr_zones = nr_zones ;
return 0 ;
}
2019-07-01 08:09:16 +03:00
/*
* Ensure that all memory allocations in this context are done as
* if GFP_NOIO was specified .
*/
noio_flag = memalloc_noio_save ( ) ;
2018-10-12 13:08:50 +03:00
if ( ! blk_queue_is_zoned ( q ) | | ! nr_zones ) {
nr_zones = 0 ;
goto update ;
}
/* Allocate bitmaps */
ret = - ENOMEM ;
seq_zones_wlock = blk_alloc_zone_bitmap ( q - > node , nr_zones ) ;
if ( ! seq_zones_wlock )
goto out ;
seq_zones_bitmap = blk_alloc_zone_bitmap ( q - > node , nr_zones ) ;
if ( ! seq_zones_bitmap )
goto out ;
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 05:39:22 +03:00
/*
* Get zone information to check the zones and initialize
* seq_zones_bitmap .
*/
2018-10-12 13:08:50 +03:00
rep_nr_zones = nr_zones ;
2019-07-01 08:09:18 +03:00
zones = blk_alloc_zones ( & rep_nr_zones ) ;
2018-10-12 13:08:50 +03:00
if ( ! zones )
goto out ;
while ( z < nr_zones ) {
nrz = min ( nr_zones - z , rep_nr_zones ) ;
2019-07-01 08:09:16 +03:00
ret = blk_report_zones ( disk , sector , zones , & nrz ) ;
2018-10-12 13:08:50 +03:00
if ( ret )
goto out ;
if ( ! nrz )
break ;
for ( i = 0 ; i < nrz ; i + + ) {
block: Enhance blk_revalidate_disk_zones()
For ZBC and ZAC zoned devices, the scsi driver revalidation processing
implemented by sd_revalidate_disk() includes a call to
sd_zbc_read_zones() which executes a full disk zone report used to
check that all zones of the disk are the same size. This processing is
followed by a call to blk_revalidate_disk_zones(), used to initialize
the device request queue zone bitmaps (zone type and zone write lock
bitmaps). To do so, blk_revalidate_disk_zones() also executes a full
device zone report to obtain zone types. As a result, the entire
zoned block device revalidation process includes two full device zone
report.
By moving the zone size checks into blk_revalidate_disk_zones(), this
process can be optimized to a single full device zone report, leading to
shorter device scan and revalidation times. This patch implements this
optimization, reducing the original full device zone report implemented
in sd_zbc_check_zones() to a single, small, report zones command
execution to obtain the size of the first zone of the device. Checks
whether all zones of the device are the same size as the first zone
size are moved to the generic blk_check_zone() function called from
blk_revalidate_disk_zones().
This optimization also has the following benefits:
1) fewer memory allocations in the scsi layer during disk revalidation
as the potentailly large buffer for zone report execution is not
needed.
2) Implement zone checks in a generic manner, reducing the burden on
device driver which only need to obtain the zone size and check that
this size is a power of 2 number of LBAs. Any new type of zoned
block device will benefit from this.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-11-11 05:39:22 +03:00
if ( ! blk_zone_valid ( disk , & zones [ i ] , & sector ) ) {
ret = - ENODEV ;
goto out ;
}
2018-10-12 13:08:50 +03:00
if ( zones [ i ] . type ! = BLK_ZONE_TYPE_CONVENTIONAL )
set_bit ( z , seq_zones_bitmap ) ;
z + + ;
}
}
if ( WARN_ON ( z ! = nr_zones ) ) {
ret = - EIO ;
goto out ;
}
update :
/*
* Install the new bitmaps , making sure the queue is stopped and
* all I / Os are completed ( i . e . a scheduler is not referencing the
* bitmaps ) .
*/
blk_mq_freeze_queue ( q ) ;
q - > nr_zones = nr_zones ;
swap ( q - > seq_zones_wlock , seq_zones_wlock ) ;
swap ( q - > seq_zones_bitmap , seq_zones_bitmap ) ;
blk_mq_unfreeze_queue ( q ) ;
out :
2019-07-01 08:09:16 +03:00
memalloc_noio_restore ( noio_flag ) ;
2019-07-01 08:09:18 +03:00
kvfree ( zones ) ;
2018-10-12 13:08:50 +03:00
kfree ( seq_zones_wlock ) ;
kfree ( seq_zones_bitmap ) ;
if ( ret ) {
pr_warn ( " %s: failed to revalidate zones \n " , disk - > disk_name ) ;
blk_mq_freeze_queue ( q ) ;
blk_queue_free_zone_bitmaps ( q ) ;
blk_mq_unfreeze_queue ( q ) ;
}
return ret ;
}
EXPORT_SYMBOL_GPL ( blk_revalidate_disk_zones ) ;