2016-10-18 15:40:33 +09:00
/*
* Zoned block device handling
*
* Copyright ( c ) 2015 , Hannes Reinecke
* Copyright ( c ) 2015 , SUSE Linux GmbH
*
* Copyright ( c ) 2016 , Damien Le Moal
* Copyright ( c ) 2016 , Western Digital
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/rbtree.h>
# include <linux/blkdev.h>
static inline sector_t blk_zone_start ( struct request_queue * q ,
sector_t sector )
{
2017-01-12 07:58:32 -07:00
sector_t zone_mask = blk_queue_zone_sectors ( q ) - 1 ;
2016-10-18 15:40:33 +09:00
return sector & ~ zone_mask ;
}
2017-12-21 15:43:38 +09:00
/*
* Return true if a request is a write requests that needs zone write locking .
*/
bool blk_req_needs_zone_write_lock ( struct request * rq )
{
if ( ! rq - > q - > seq_zones_wlock )
return false ;
if ( blk_rq_is_passthrough ( rq ) )
return false ;
switch ( req_op ( rq ) ) {
case REQ_OP_WRITE_ZEROES :
case REQ_OP_WRITE_SAME :
case REQ_OP_WRITE :
return blk_rq_zone_is_seq ( rq ) ;
default :
return false ;
}
}
EXPORT_SYMBOL_GPL ( blk_req_needs_zone_write_lock ) ;
void __blk_req_zone_write_lock ( struct request * rq )
{
if ( WARN_ON_ONCE ( test_and_set_bit ( blk_rq_zone_no ( rq ) ,
rq - > q - > seq_zones_wlock ) ) )
return ;
WARN_ON_ONCE ( rq - > rq_flags & RQF_ZONE_WRITE_LOCKED ) ;
rq - > rq_flags | = RQF_ZONE_WRITE_LOCKED ;
}
EXPORT_SYMBOL_GPL ( __blk_req_zone_write_lock ) ;
void __blk_req_zone_write_unlock ( struct request * rq )
{
rq - > rq_flags & = ~ RQF_ZONE_WRITE_LOCKED ;
if ( rq - > q - > seq_zones_wlock )
WARN_ON_ONCE ( ! test_and_clear_bit ( blk_rq_zone_no ( rq ) ,
rq - > q - > seq_zones_wlock ) ) ;
}
EXPORT_SYMBOL_GPL ( __blk_req_zone_write_unlock ) ;
2016-10-18 15:40:33 +09:00
/*
* Check that a zone report belongs to the partition .
* If yes , fix its start sector and write pointer , copy it in the
* zone information array and return true . Return false otherwise .
*/
static bool blkdev_report_zone ( struct block_device * bdev ,
struct blk_zone * rep ,
struct blk_zone * zone )
{
sector_t offset = get_start_sect ( bdev ) ;
if ( rep - > start < offset )
return false ;
rep - > start - = offset ;
if ( rep - > start + rep - > len > bdev - > bd_part - > nr_sects )
return false ;
if ( rep - > type = = BLK_ZONE_TYPE_CONVENTIONAL )
rep - > wp = rep - > start + rep - > len ;
else
rep - > wp - = offset ;
memcpy ( zone , rep , sizeof ( struct blk_zone ) ) ;
return true ;
}
/**
* blkdev_report_zones - Get zones information
* @ bdev : Target block device
* @ sector : Sector from which to report zones
* @ zones : Array of zone structures where to return the zones information
* @ nr_zones : Number of zone structures in the zone array
* @ gfp_mask : Memory allocation flags ( for bio_alloc )
*
* Description :
* Get zone information starting from the zone containing @ sector .
* The number of zone information reported may be less than the number
* requested by @ nr_zones . The number of zones actually reported is
* returned in @ nr_zones .
*/
int blkdev_report_zones ( struct block_device * bdev ,
sector_t sector ,
struct blk_zone * zones ,
unsigned int * nr_zones ,
gfp_t gfp_mask )
{
struct request_queue * q = bdev_get_queue ( bdev ) ;
struct blk_zone_report_hdr * hdr ;
unsigned int nrz = * nr_zones ;
struct page * page ;
unsigned int nr_rep ;
size_t rep_bytes ;
unsigned int nr_pages ;
struct bio * bio ;
struct bio_vec * bv ;
unsigned int i , n , nz ;
unsigned int ofst ;
void * addr ;
2016-10-21 17:42:33 +02:00
int ret ;
2016-10-18 15:40:33 +09:00
if ( ! q )
return - ENXIO ;
if ( ! blk_queue_is_zoned ( q ) )
return - EOPNOTSUPP ;
if ( ! nrz )
return 0 ;
if ( sector > bdev - > bd_part - > nr_sects ) {
* nr_zones = 0 ;
return 0 ;
}
/*
* The zone report has a header . So make room for it in the
* payload . Also make sure that the report fits in a single BIO
* that will not be split down the stack .
*/
rep_bytes = sizeof ( struct blk_zone_report_hdr ) +
sizeof ( struct blk_zone ) * nrz ;
rep_bytes = ( rep_bytes + PAGE_SIZE - 1 ) & PAGE_MASK ;
if ( rep_bytes > ( queue_max_sectors ( q ) < < 9 ) )
rep_bytes = queue_max_sectors ( q ) < < 9 ;
nr_pages = min_t ( unsigned int , BIO_MAX_PAGES ,
rep_bytes > > PAGE_SHIFT ) ;
nr_pages = min_t ( unsigned int , nr_pages ,
queue_max_segments ( q ) ) ;
bio = bio_alloc ( gfp_mask , nr_pages ) ;
if ( ! bio )
return - ENOMEM ;
2017-08-23 19:10:32 +02:00
bio_set_dev ( bio , bdev ) ;
2016-10-18 15:40:33 +09:00
bio - > bi_iter . bi_sector = blk_zone_start ( q , sector ) ;
bio_set_op_attrs ( bio , REQ_OP_ZONE_REPORT , 0 ) ;
for ( i = 0 ; i < nr_pages ; i + + ) {
page = alloc_page ( gfp_mask ) ;
if ( ! page ) {
ret = - ENOMEM ;
goto out ;
}
if ( ! bio_add_page ( bio , page , PAGE_SIZE , 0 ) ) {
__free_page ( page ) ;
break ;
}
}
if ( i = = 0 )
ret = - ENOMEM ;
else
ret = submit_bio_wait ( bio ) ;
if ( ret )
goto out ;
/*
* Process the report result : skip the header and go through the
* reported zones to fixup and fixup the zone information for
* partitions . At the same time , return the zone information into
* the zone array .
*/
n = 0 ;
nz = 0 ;
nr_rep = 0 ;
bio_for_each_segment_all ( bv , bio , i ) {
if ( ! bv - > bv_page )
break ;
addr = kmap_atomic ( bv - > bv_page ) ;
/* Get header in the first page */
ofst = 0 ;
if ( ! nr_rep ) {
2018-06-15 14:55:17 -07:00
hdr = addr ;
2016-10-18 15:40:33 +09:00
nr_rep = hdr - > nr_zones ;
ofst = sizeof ( struct blk_zone_report_hdr ) ;
}
/* Fixup and report zones */
while ( ofst < bv - > bv_len & &
n < nr_rep & & nz < nrz ) {
if ( blkdev_report_zone ( bdev , addr + ofst , & zones [ nz ] ) )
nz + + ;
ofst + = sizeof ( struct blk_zone ) ;
n + + ;
}
kunmap_atomic ( addr ) ;
if ( n > = nr_rep | | nz > = nrz )
break ;
}
2016-10-21 17:42:33 +02:00
* nr_zones = nz ;
2016-10-18 15:40:33 +09:00
out :
bio_for_each_segment_all ( bv , bio , i )
__free_page ( bv - > bv_page ) ;
bio_put ( bio ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( blkdev_report_zones ) ;
/**
* blkdev_reset_zones - Reset zones write pointer
* @ bdev : Target block device
* @ sector : Start sector of the first zone to reset
* @ nr_sectors : Number of sectors , at least the length of one zone
* @ gfp_mask : Memory allocation flags ( for bio_alloc )
*
* Description :
* Reset the write pointer of the zones contained in the range
* @ sector . . @ sector + @ nr_sectors . Specifying the entire disk sector range
* is valid , but the specified range should not contain conventional zones .
*/
int blkdev_reset_zones ( struct block_device * bdev ,
sector_t sector , sector_t nr_sectors ,
gfp_t gfp_mask )
{
struct request_queue * q = bdev_get_queue ( bdev ) ;
sector_t zone_sectors ;
sector_t end_sector = sector + nr_sectors ;
struct bio * bio ;
int ret ;
if ( ! q )
return - ENXIO ;
if ( ! blk_queue_is_zoned ( q ) )
return - EOPNOTSUPP ;
if ( end_sector > bdev - > bd_part - > nr_sects )
/* Out of range */
return - EINVAL ;
/* Check alignment (handle eventual smaller last zone) */
2017-01-12 07:58:32 -07:00
zone_sectors = blk_queue_zone_sectors ( q ) ;
2016-10-18 15:40:33 +09:00
if ( sector & ( zone_sectors - 1 ) )
return - EINVAL ;
if ( ( nr_sectors & ( zone_sectors - 1 ) ) & &
end_sector ! = bdev - > bd_part - > nr_sects )
return - EINVAL ;
while ( sector < end_sector ) {
bio = bio_alloc ( gfp_mask , 0 ) ;
bio - > bi_iter . bi_sector = sector ;
2017-08-23 19:10:32 +02:00
bio_set_dev ( bio , bdev ) ;
2016-10-18 15:40:33 +09:00
bio_set_op_attrs ( bio , REQ_OP_ZONE_RESET , 0 ) ;
ret = submit_bio_wait ( bio ) ;
bio_put ( bio ) ;
if ( ret )
return ret ;
sector + = zone_sectors ;
/* This may take a while, so be nice to others */
cond_resched ( ) ;
}
return 0 ;
}
EXPORT_SYMBOL_GPL ( blkdev_reset_zones ) ;
2016-10-18 15:40:35 +09:00
2018-03-08 15:28:50 -08:00
/*
2016-10-18 15:40:35 +09:00
* BLKREPORTZONE ioctl processing .
* Called from blkdev_ioctl .
*/
int blkdev_report_zones_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
void __user * argp = ( void __user * ) arg ;
struct request_queue * q ;
struct blk_zone_report rep ;
struct blk_zone * zones ;
int ret ;
if ( ! argp )
return - EINVAL ;
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
if ( ! blk_queue_is_zoned ( q ) )
return - ENOTTY ;
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EACCES ;
if ( copy_from_user ( & rep , argp , sizeof ( struct blk_zone_report ) ) )
return - EFAULT ;
if ( ! rep . nr_zones )
return - EINVAL ;
2018-05-22 08:27:22 -07:00
if ( rep . nr_zones > INT_MAX / sizeof ( struct blk_zone ) )
return - ERANGE ;
treewide: kvmalloc() -> kvmalloc_array()
The kvmalloc() function has a 2-factor argument form, kvmalloc_array(). This
patch replaces cases of:
kvmalloc(a * b, gfp)
with:
kvmalloc_array(a * b, gfp)
as well as handling cases of:
kvmalloc(a * b * c, gfp)
with:
kvmalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kvmalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kvmalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kvmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kvmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kvmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kvmalloc
+ kvmalloc_array
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kvmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kvmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kvmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kvmalloc(C1 * C2 * C3, ...)
|
kvmalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kvmalloc(sizeof(THING) * C2, ...)
|
kvmalloc(sizeof(TYPE) * C2, ...)
|
kvmalloc(C1 * C2 * C3, ...)
|
kvmalloc(C1 * C2, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- (E1) * E2
+ E1, E2
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:04:32 -07:00
zones = kvmalloc_array ( rep . nr_zones , sizeof ( struct blk_zone ) ,
GFP_KERNEL | __GFP_ZERO ) ;
2016-10-18 15:40:35 +09:00
if ( ! zones )
return - ENOMEM ;
ret = blkdev_report_zones ( bdev , rep . sector ,
zones , & rep . nr_zones ,
GFP_KERNEL ) ;
if ( ret )
goto out ;
if ( copy_to_user ( argp , & rep , sizeof ( struct blk_zone_report ) ) ) {
ret = - EFAULT ;
goto out ;
}
if ( rep . nr_zones ) {
if ( copy_to_user ( argp + sizeof ( struct blk_zone_report ) , zones ,
sizeof ( struct blk_zone ) * rep . nr_zones ) )
ret = - EFAULT ;
}
out :
2018-05-22 08:27:22 -07:00
kvfree ( zones ) ;
2016-10-18 15:40:35 +09:00
return ret ;
}
2018-03-08 15:28:50 -08:00
/*
2016-10-18 15:40:35 +09:00
* BLKRESETZONE ioctl processing .
* Called from blkdev_ioctl .
*/
int blkdev_reset_zones_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
void __user * argp = ( void __user * ) arg ;
struct request_queue * q ;
struct blk_zone_range zrange ;
if ( ! argp )
return - EINVAL ;
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
if ( ! blk_queue_is_zoned ( q ) )
return - ENOTTY ;
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EACCES ;
if ( ! ( mode & FMODE_WRITE ) )
return - EBADF ;
if ( copy_from_user ( & zrange , argp , sizeof ( struct blk_zone_range ) ) )
return - EFAULT ;
return blkdev_reset_zones ( bdev , zrange . sector , zrange . nr_sectors ,
GFP_KERNEL ) ;
}