for-5.20/block-2022-08-04
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmLsRfkQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpj43EADBydQhe7nQHH65gecqvttnio2GqEmcbozt lKFQlPPd3SHGMAJjSdR1dIwqtPsJ8q6xZXH+TjHhLXb2kgVu+TQ31krNHIqBwE14 s7SsgGRgvopA46lSf/ls18/8sh6Yz1NgI39YcMVPjvkbLaVFK7zRkL9OSp4RQCwH u/IIHJmV415EeF6QNTgABBel/gEIPBLsvwOxTBIkzDOyUohtExZPYj83MDm7jdr3 jsTUd2MiumNMh7ziMJIp1iN32nQOtIKtwWZaMHDCzfU/IUnBSmh2nj9oXr3+vcwo IsBMDUfUj9Eig5QQ/XcVIrFezi0GnunpBhScXPqL+dxPN812lzxNjkx6PsC+rPn8 mWmXoaeK1ayoyotdHJlmINNmWUSCkOMwVnA2r1c4Hp4cQS5vRUtkKcpNLTpMhk4I OwQ3bjt9mA//WlH+apbhJqXqxjcoBwCwMoveJ4mHVtku9lo+JJAKVGdUs17QjZkC NxACP1MtBcXy1hurNQf14oH5C0Hyg4TBJShPauKmrqGtOFnbOAdX2qIhldvyNfH1 l9cOvGNSgbQ6FLD6MVto6dC/KYOEM3LelVxgNB/80GbSmGwj88Kd/nzQLYFP89JJ 0Wkt14mSkm82gabOvNqXGG8P8hLb/+v6sp4qZv0mf+op0xmb4FB5eaZvoceptVzM 3Z+hmT7MfA== =pgNf -----END PGP SIGNATURE----- Merge tag 'for-5.20/block-2022-08-04' of git://git.kernel.dk/linux-block Pull block driver updates from Jens Axboe: - NVMe pull requests via Christoph: - add support for In-Band authentication (Hannes Reinecke) - handle the persistent internal error AER (Michael Kelley) - use in-capsule data for TCP I/O queue connect (Caleb Sander) - remove timeout for getting RDMA-CM established event (Israel Rukshin) - misc cleanups (Joel Granados, Sagi Grimberg, Chaitanya Kulkarni, Guixin Liu, Xiang wangx) - use command_id instead of req->tag in trace_nvme_complete_rq() (Bean Huo) - various fixes for the new authentication code (Lukas Bulwahn, Dan Carpenter, Colin Ian King, Chaitanya Kulkarni, Hannes Reinecke) - small cleanups (Liu Song, Christoph Hellwig) - restore compat_ioctl support (Nick Bowler) - make a nvmet-tcp workqueue lockdep-safe (Sagi Grimberg) - enable generic interface (/dev/ngXnY) for unknown command sets (Joel Granados, Christoph Hellwig) - don't always build constants.o (Christoph Hellwig) - print the command name of aborted commands (Christoph Hellwig) - MD pull requests via Song: - Improve raid5 lock contention, by Logan Gunthorpe. - Misc fixes to raid5, by Logan Gunthorpe. - Fix race condition with md_reap_sync_thread(), by Guoqing Jiang. - Fix potential deadlock with raid5_quiesce and raid5_get_active_stripe, by Logan Gunthorpe. - Refactoring md_alloc(), by Christoph" - Fix md disk_name lifetime problems, by Christoph Hellwig - Convert prepare_to_wait() to wait_woken() api, by Logan Gunthorpe; - Fix sectors_to_do bitmap issue, by Logan Gunthorpe. - Work on unifying the null_blk module parameters and configfs API (Vincent) - drbd bitmap IO error fix (Lars) - Set of rnbd fixes (Guoqing, Md Haris) - Remove experimental marker on bcache async device registration (Coly) - Series from cleaning up the bio splitting (Christoph) - Removal of the sx8 block driver. This hardware never really widespread, and it didn't receive a lot of attention after the initial merge of it back in 2005 (Christoph) - A few fixes for s390 dasd (Eric, Jiang) - Followup set of fixes for ublk (Ming) - Support for UBLK_IO_NEED_GET_DATA for ublk (ZiyangZhang) - Fixes for the dio dma alignment (Keith) - Misc fixes and cleanups (Ming, Yu, Dan, Christophe * tag 'for-5.20/block-2022-08-04' of git://git.kernel.dk/linux-block: (136 commits) s390/dasd: Establish DMA alignment s390/dasd: drop unexpected word 'for' in comments ublk_drv: add support for UBLK_IO_NEED_GET_DATA ublk_cmd.h: add one new ublk command: UBLK_IO_NEED_GET_DATA ublk_drv: cleanup ublksrv_ctrl_dev_info ublk_drv: add SET_PARAMS/GET_PARAMS control command ublk_drv: fix ublk device leak in case that add_disk fails ublk_drv: cancel device even though disk isn't up block: fix leaking page ref on truncated direct io block: ensure bio_iov_add_page can't fail block: ensure iov_iter advances for added pages drivers:md:fix a potential use-after-free bug md/raid5: Ensure batch_last is released before sleeping for quiesce md/raid5: Move stripe_request_ctx up md/raid5: Drop unnecessary call to r5c_check_stripe_cache_usage() md/raid5: Make is_inactive_blocked() helper md/raid5: Refactor raid5_get_active_stripe() block: pass struct queue_limits to the bio splitting helpers block: move bio_allowed_max_sectors to blk-merge.c block: move the call to get_max_io_size out of blk_bio_segment_split ...
This commit is contained in:
commit
fa9db655d0
@ -72,6 +72,28 @@ submit_queues=[1..nr_cpus]: Default: 1
|
|||||||
hw_queue_depth=[0..qdepth]: Default: 64
|
hw_queue_depth=[0..qdepth]: Default: 64
|
||||||
The hardware queue depth of the device.
|
The hardware queue depth of the device.
|
||||||
|
|
||||||
|
memory_backed=[0/1]: Default: 0
|
||||||
|
Whether or not to use a memory buffer to respond to IO requests
|
||||||
|
|
||||||
|
= =============================================
|
||||||
|
0 Transfer no data in response to IO requests
|
||||||
|
1 Use a memory buffer to respond to IO requests
|
||||||
|
= =============================================
|
||||||
|
|
||||||
|
discard=[0/1]: Default: 0
|
||||||
|
Support discard operations (requires memory-backed null_blk device).
|
||||||
|
|
||||||
|
= =====================================
|
||||||
|
0 Do not support discard operations
|
||||||
|
1 Enable support for discard operations
|
||||||
|
= =====================================
|
||||||
|
|
||||||
|
cache_size=[Size in MB]: Default: 0
|
||||||
|
Cache size in MB for memory-backed device.
|
||||||
|
|
||||||
|
mbps=[Maximum bandwidth in MB/s]: Default: 0 (no limit)
|
||||||
|
Bandwidth limit for device performance.
|
||||||
|
|
||||||
Multi-queue specific parameters
|
Multi-queue specific parameters
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
|
@ -14507,7 +14507,8 @@ S: Supported
|
|||||||
W: http://git.infradead.org/nvme.git
|
W: http://git.infradead.org/nvme.git
|
||||||
T: git://git.infradead.org/nvme.git
|
T: git://git.infradead.org/nvme.git
|
||||||
F: drivers/nvme/host/
|
F: drivers/nvme/host/
|
||||||
F: include/linux/nvme.h
|
F: drivers/nvme/common/
|
||||||
|
F: include/linux/nvme*
|
||||||
F: include/uapi/linux/nvme_ioctl.h
|
F: include/uapi/linux/nvme_ioctl.h
|
||||||
|
|
||||||
NVM EXPRESS FC TRANSPORT DRIVERS
|
NVM EXPRESS FC TRANSPORT DRIVERS
|
||||||
@ -18838,6 +18839,7 @@ SOFTWARE RAID (Multiple Disks) SUPPORT
|
|||||||
M: Song Liu <song@kernel.org>
|
M: Song Liu <song@kernel.org>
|
||||||
L: linux-raid@vger.kernel.org
|
L: linux-raid@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
|
Q: https://patchwork.kernel.org/project/linux-raid/list/
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/song/md.git
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/song/md.git
|
||||||
F: drivers/md/Kconfig
|
F: drivers/md/Kconfig
|
||||||
F: drivers/md/Makefile
|
F: drivers/md/Makefile
|
||||||
|
@ -134,7 +134,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
|
|||||||
iv = bip->bip_vec + bip->bip_vcnt;
|
iv = bip->bip_vec + bip->bip_vcnt;
|
||||||
|
|
||||||
if (bip->bip_vcnt &&
|
if (bip->bip_vcnt &&
|
||||||
bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev),
|
bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
|
||||||
&bip->bip_vec[bip->bip_vcnt - 1], offset))
|
&bip->bip_vec[bip->bip_vcnt - 1], offset))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
51
block/bio.c
51
block/bio.c
@ -965,7 +965,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
|||||||
* would create a gap, disallow it.
|
* would create a gap, disallow it.
|
||||||
*/
|
*/
|
||||||
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||||
if (bvec_gap_to_prev(q, bvec, offset))
|
if (bvec_gap_to_prev(&q->limits, bvec, offset))
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1151,22 +1151,12 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
|
|||||||
bio_set_flag(bio, BIO_CLONED);
|
bio_set_flag(bio, BIO_CLONED);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bio_put_pages(struct page **pages, size_t size, size_t off)
|
|
||||||
{
|
|
||||||
size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
|
|
||||||
|
|
||||||
for (i = 0; i < nr; i++)
|
|
||||||
put_page(pages[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bio_iov_add_page(struct bio *bio, struct page *page,
|
static int bio_iov_add_page(struct bio *bio, struct page *page,
|
||||||
unsigned int len, unsigned int offset)
|
unsigned int len, unsigned int offset)
|
||||||
{
|
{
|
||||||
bool same_page = false;
|
bool same_page = false;
|
||||||
|
|
||||||
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
|
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
|
||||||
if (WARN_ON_ONCE(bio_full(bio, len)))
|
|
||||||
return -EINVAL;
|
|
||||||
__bio_add_page(bio, page, len, offset);
|
__bio_add_page(bio, page, len, offset);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1209,8 +1199,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
|||||||
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
||||||
struct page **pages = (struct page **)bv;
|
struct page **pages = (struct page **)bv;
|
||||||
ssize_t size, left;
|
ssize_t size, left;
|
||||||
unsigned len, i;
|
unsigned len, i = 0;
|
||||||
size_t offset;
|
size_t offset;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Move page array up in the allocated memory for the bio vecs as far as
|
* Move page array up in the allocated memory for the bio vecs as far as
|
||||||
@ -1227,32 +1218,40 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
|||||||
* result to ensure the bio's total size is correct. The remainder of
|
* result to ensure the bio's total size is correct. The remainder of
|
||||||
* the iov data will be picked up in the next bio iteration.
|
* the iov data will be picked up in the next bio iteration.
|
||||||
*/
|
*/
|
||||||
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
|
size = iov_iter_get_pages(iter, pages, UINT_MAX - bio->bi_iter.bi_size,
|
||||||
if (size > 0)
|
nr_pages, &offset);
|
||||||
|
if (size > 0) {
|
||||||
|
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
|
||||||
size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev));
|
size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev));
|
||||||
if (unlikely(size <= 0))
|
} else
|
||||||
return size ? size : -EFAULT;
|
nr_pages = 0;
|
||||||
|
|
||||||
|
if (unlikely(size <= 0)) {
|
||||||
|
ret = size ? size : -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
for (left = size, i = 0; left > 0; left -= len, i++) {
|
for (left = size, i = 0; left > 0; left -= len, i++) {
|
||||||
struct page *page = pages[i];
|
struct page *page = pages[i];
|
||||||
int ret;
|
|
||||||
|
|
||||||
len = min_t(size_t, PAGE_SIZE - offset, left);
|
len = min_t(size_t, PAGE_SIZE - offset, left);
|
||||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
|
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||||
ret = bio_iov_add_zone_append_page(bio, page, len,
|
ret = bio_iov_add_zone_append_page(bio, page, len,
|
||||||
offset);
|
offset);
|
||||||
else
|
if (ret)
|
||||||
ret = bio_iov_add_page(bio, page, len, offset);
|
break;
|
||||||
|
} else
|
||||||
|
bio_iov_add_page(bio, page, len, offset);
|
||||||
|
|
||||||
if (ret) {
|
|
||||||
bio_put_pages(pages + i, left, offset);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
iov_iter_advance(iter, size);
|
iov_iter_advance(iter, size - left);
|
||||||
return 0;
|
out:
|
||||||
|
while (i < nr_pages)
|
||||||
|
put_page(pages[i++]);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -377,7 +377,6 @@ static void blk_timeout_work(struct work_struct *work)
|
|||||||
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
int ret;
|
|
||||||
|
|
||||||
q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
|
q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
|
||||||
GFP_KERNEL | __GFP_ZERO, node_id);
|
GFP_KERNEL | __GFP_ZERO, node_id);
|
||||||
@ -396,13 +395,9 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
|||||||
if (q->id < 0)
|
if (q->id < 0)
|
||||||
goto fail_srcu;
|
goto fail_srcu;
|
||||||
|
|
||||||
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0);
|
|
||||||
if (ret)
|
|
||||||
goto fail_id;
|
|
||||||
|
|
||||||
q->stats = blk_alloc_queue_stats();
|
q->stats = blk_alloc_queue_stats();
|
||||||
if (!q->stats)
|
if (!q->stats)
|
||||||
goto fail_split;
|
goto fail_id;
|
||||||
|
|
||||||
q->node = node_id;
|
q->node = node_id;
|
||||||
|
|
||||||
@ -439,8 +434,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
|||||||
|
|
||||||
fail_stats:
|
fail_stats:
|
||||||
blk_free_queue_stats(q->stats);
|
blk_free_queue_stats(q->stats);
|
||||||
fail_split:
|
|
||||||
bioset_exit(&q->bio_split);
|
|
||||||
fail_id:
|
fail_id:
|
||||||
ida_free(&blk_queue_ida, q->id);
|
ida_free(&blk_queue_ida, q->id);
|
||||||
fail_srcu:
|
fail_srcu:
|
||||||
|
@ -82,7 +82,7 @@ static inline bool bio_will_gap(struct request_queue *q,
|
|||||||
bio_get_first_bvec(next, &nb);
|
bio_get_first_bvec(next, &nb);
|
||||||
if (biovec_phys_mergeable(q, &pb, &nb))
|
if (biovec_phys_mergeable(q, &pb, &nb))
|
||||||
return false;
|
return false;
|
||||||
return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
|
return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
|
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
|
||||||
@ -95,23 +95,30 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
|
|||||||
return bio_will_gap(req->q, NULL, bio, req->bio);
|
return bio_will_gap(req->q, NULL, bio, req->bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bio *blk_bio_discard_split(struct request_queue *q,
|
/*
|
||||||
struct bio *bio,
|
* The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
|
||||||
struct bio_set *bs,
|
* is defined as 'unsigned int', meantime it has to be aligned to with the
|
||||||
unsigned *nsegs)
|
* logical block size, which is the minimum accepted unit by hardware.
|
||||||
|
*/
|
||||||
|
static unsigned int bio_allowed_max_sectors(struct queue_limits *lim)
|
||||||
|
{
|
||||||
|
return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bio *bio_split_discard(struct bio *bio, struct queue_limits *lim,
|
||||||
|
unsigned *nsegs, struct bio_set *bs)
|
||||||
{
|
{
|
||||||
unsigned int max_discard_sectors, granularity;
|
unsigned int max_discard_sectors, granularity;
|
||||||
int alignment;
|
|
||||||
sector_t tmp;
|
sector_t tmp;
|
||||||
unsigned split_sectors;
|
unsigned split_sectors;
|
||||||
|
|
||||||
*nsegs = 1;
|
*nsegs = 1;
|
||||||
|
|
||||||
/* Zero-sector (unknown) and one-sector granularities are the same. */
|
/* Zero-sector (unknown) and one-sector granularities are the same. */
|
||||||
granularity = max(q->limits.discard_granularity >> 9, 1U);
|
granularity = max(lim->discard_granularity >> 9, 1U);
|
||||||
|
|
||||||
max_discard_sectors = min(q->limits.max_discard_sectors,
|
max_discard_sectors =
|
||||||
bio_allowed_max_sectors(q));
|
min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
|
||||||
max_discard_sectors -= max_discard_sectors % granularity;
|
max_discard_sectors -= max_discard_sectors % granularity;
|
||||||
|
|
||||||
if (unlikely(!max_discard_sectors)) {
|
if (unlikely(!max_discard_sectors)) {
|
||||||
@ -128,9 +135,8 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
|
|||||||
* If the next starting sector would be misaligned, stop the discard at
|
* If the next starting sector would be misaligned, stop the discard at
|
||||||
* the previous aligned sector.
|
* the previous aligned sector.
|
||||||
*/
|
*/
|
||||||
alignment = (q->limits.discard_alignment >> 9) % granularity;
|
tmp = bio->bi_iter.bi_sector + split_sectors -
|
||||||
|
((lim->discard_alignment >> 9) % granularity);
|
||||||
tmp = bio->bi_iter.bi_sector + split_sectors - alignment;
|
|
||||||
tmp = sector_div(tmp, granularity);
|
tmp = sector_div(tmp, granularity);
|
||||||
|
|
||||||
if (split_sectors > tmp)
|
if (split_sectors > tmp)
|
||||||
@ -139,18 +145,15 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
|
|||||||
return bio_split(bio, split_sectors, GFP_NOIO, bs);
|
return bio_split(bio, split_sectors, GFP_NOIO, bs);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
|
static struct bio *bio_split_write_zeroes(struct bio *bio,
|
||||||
struct bio *bio, struct bio_set *bs, unsigned *nsegs)
|
struct queue_limits *lim, unsigned *nsegs, struct bio_set *bs)
|
||||||
{
|
{
|
||||||
*nsegs = 0;
|
*nsegs = 0;
|
||||||
|
if (!lim->max_write_zeroes_sectors)
|
||||||
if (!q->limits.max_write_zeroes_sectors)
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
|
||||||
if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
|
||||||
return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -161,17 +164,17 @@ static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
|
|||||||
* requests that are submitted to a block device if the start of a bio is not
|
* requests that are submitted to a block device if the start of a bio is not
|
||||||
* aligned to a physical block boundary.
|
* aligned to a physical block boundary.
|
||||||
*/
|
*/
|
||||||
static inline unsigned get_max_io_size(struct request_queue *q,
|
static inline unsigned get_max_io_size(struct bio *bio,
|
||||||
struct bio *bio)
|
struct queue_limits *lim)
|
||||||
{
|
{
|
||||||
unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
|
unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
|
||||||
unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
|
unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
|
||||||
unsigned max_sectors = queue_max_sectors(q), start, end;
|
unsigned max_sectors = lim->max_sectors, start, end;
|
||||||
|
|
||||||
if (q->limits.chunk_sectors) {
|
if (lim->chunk_sectors) {
|
||||||
max_sectors = min(max_sectors,
|
max_sectors = min(max_sectors,
|
||||||
blk_chunk_sectors_left(bio->bi_iter.bi_sector,
|
blk_chunk_sectors_left(bio->bi_iter.bi_sector,
|
||||||
q->limits.chunk_sectors));
|
lim->chunk_sectors));
|
||||||
}
|
}
|
||||||
|
|
||||||
start = bio->bi_iter.bi_sector & (pbs - 1);
|
start = bio->bi_iter.bi_sector & (pbs - 1);
|
||||||
@ -181,11 +184,10 @@ static inline unsigned get_max_io_size(struct request_queue *q,
|
|||||||
return max_sectors & ~(lbs - 1);
|
return max_sectors & ~(lbs - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned get_max_segment_size(const struct request_queue *q,
|
static inline unsigned get_max_segment_size(struct queue_limits *lim,
|
||||||
struct page *start_page,
|
struct page *start_page, unsigned long offset)
|
||||||
unsigned long offset)
|
|
||||||
{
|
{
|
||||||
unsigned long mask = queue_segment_boundary(q);
|
unsigned long mask = lim->seg_boundary_mask;
|
||||||
|
|
||||||
offset = mask & (page_to_phys(start_page) + offset);
|
offset = mask & (page_to_phys(start_page) + offset);
|
||||||
|
|
||||||
@ -194,12 +196,12 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
|
|||||||
* on 32bit arch, use queue's max segment size when that happens.
|
* on 32bit arch, use queue's max segment size when that happens.
|
||||||
*/
|
*/
|
||||||
return min_not_zero(mask - offset + 1,
|
return min_not_zero(mask - offset + 1,
|
||||||
(unsigned long)queue_max_segment_size(q));
|
(unsigned long)lim->max_segment_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bvec_split_segs - verify whether or not a bvec should be split in the middle
|
* bvec_split_segs - verify whether or not a bvec should be split in the middle
|
||||||
* @q: [in] request queue associated with the bio associated with @bv
|
* @lim: [in] queue limits to split based on
|
||||||
* @bv: [in] bvec to examine
|
* @bv: [in] bvec to examine
|
||||||
* @nsegs: [in,out] Number of segments in the bio being built. Incremented
|
* @nsegs: [in,out] Number of segments in the bio being built. Incremented
|
||||||
* by the number of segments from @bv that may be appended to that
|
* by the number of segments from @bv that may be appended to that
|
||||||
@ -217,9 +219,8 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
|
|||||||
* *@nsegs segments and *@sectors sectors would make that bio unacceptable for
|
* *@nsegs segments and *@sectors sectors would make that bio unacceptable for
|
||||||
* the block driver.
|
* the block driver.
|
||||||
*/
|
*/
|
||||||
static bool bvec_split_segs(const struct request_queue *q,
|
static bool bvec_split_segs(struct queue_limits *lim, const struct bio_vec *bv,
|
||||||
const struct bio_vec *bv, unsigned *nsegs,
|
unsigned *nsegs, unsigned *bytes, unsigned max_segs,
|
||||||
unsigned *bytes, unsigned max_segs,
|
|
||||||
unsigned max_bytes)
|
unsigned max_bytes)
|
||||||
{
|
{
|
||||||
unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
|
unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
|
||||||
@ -228,7 +229,7 @@ static bool bvec_split_segs(const struct request_queue *q,
|
|||||||
unsigned seg_size = 0;
|
unsigned seg_size = 0;
|
||||||
|
|
||||||
while (len && *nsegs < max_segs) {
|
while (len && *nsegs < max_segs) {
|
||||||
seg_size = get_max_segment_size(q, bv->bv_page,
|
seg_size = get_max_segment_size(lim, bv->bv_page,
|
||||||
bv->bv_offset + total_len);
|
bv->bv_offset + total_len);
|
||||||
seg_size = min(seg_size, len);
|
seg_size = min(seg_size, len);
|
||||||
|
|
||||||
@ -236,7 +237,7 @@ static bool bvec_split_segs(const struct request_queue *q,
|
|||||||
total_len += seg_size;
|
total_len += seg_size;
|
||||||
len -= seg_size;
|
len -= seg_size;
|
||||||
|
|
||||||
if ((bv->bv_offset + total_len) & queue_virt_boundary(q))
|
if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,16 +248,17 @@ static bool bvec_split_segs(const struct request_queue *q,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blk_bio_segment_split - split a bio in two bios
|
* bio_split_rw - split a bio in two bios
|
||||||
* @q: [in] request queue pointer
|
|
||||||
* @bio: [in] bio to be split
|
* @bio: [in] bio to be split
|
||||||
* @bs: [in] bio set to allocate the clone from
|
* @lim: [in] queue limits to split based on
|
||||||
* @segs: [out] number of segments in the bio with the first half of the sectors
|
* @segs: [out] number of segments in the bio with the first half of the sectors
|
||||||
|
* @bs: [in] bio set to allocate the clone from
|
||||||
|
* @max_bytes: [in] maximum number of bytes per bio
|
||||||
*
|
*
|
||||||
* Clone @bio, update the bi_iter of the clone to represent the first sectors
|
* Clone @bio, update the bi_iter of the clone to represent the first sectors
|
||||||
* of @bio and update @bio->bi_iter to represent the remaining sectors. The
|
* of @bio and update @bio->bi_iter to represent the remaining sectors. The
|
||||||
* following is guaranteed for the cloned bio:
|
* following is guaranteed for the cloned bio:
|
||||||
* - That it has at most get_max_io_size(@q, @bio) sectors.
|
* - That it has at most @max_bytes worth of data
|
||||||
* - That it has at most queue_max_segments(@q) segments.
|
* - That it has at most queue_max_segments(@q) segments.
|
||||||
*
|
*
|
||||||
* Except for discard requests the cloned bio will point at the bi_io_vec of
|
* Except for discard requests the cloned bio will point at the bi_io_vec of
|
||||||
@ -265,32 +267,29 @@ static bool bvec_split_segs(const struct request_queue *q,
|
|||||||
* responsible for ensuring that @bs is only destroyed after processing of the
|
* responsible for ensuring that @bs is only destroyed after processing of the
|
||||||
* split bio has finished.
|
* split bio has finished.
|
||||||
*/
|
*/
|
||||||
static struct bio *blk_bio_segment_split(struct request_queue *q,
|
static struct bio *bio_split_rw(struct bio *bio, struct queue_limits *lim,
|
||||||
struct bio *bio,
|
unsigned *segs, struct bio_set *bs, unsigned max_bytes)
|
||||||
struct bio_set *bs,
|
|
||||||
unsigned *segs)
|
|
||||||
{
|
{
|
||||||
struct bio_vec bv, bvprv, *bvprvp = NULL;
|
struct bio_vec bv, bvprv, *bvprvp = NULL;
|
||||||
struct bvec_iter iter;
|
struct bvec_iter iter;
|
||||||
unsigned nsegs = 0, bytes = 0;
|
unsigned nsegs = 0, bytes = 0;
|
||||||
const unsigned max_bytes = get_max_io_size(q, bio) << 9;
|
|
||||||
const unsigned max_segs = queue_max_segments(q);
|
|
||||||
|
|
||||||
bio_for_each_bvec(bv, bio, iter) {
|
bio_for_each_bvec(bv, bio, iter) {
|
||||||
/*
|
/*
|
||||||
* If the queue doesn't support SG gaps and adding this
|
* If the queue doesn't support SG gaps and adding this
|
||||||
* offset would create a gap, disallow it.
|
* offset would create a gap, disallow it.
|
||||||
*/
|
*/
|
||||||
if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
|
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
|
||||||
goto split;
|
goto split;
|
||||||
|
|
||||||
if (nsegs < max_segs &&
|
if (nsegs < lim->max_segments &&
|
||||||
bytes + bv.bv_len <= max_bytes &&
|
bytes + bv.bv_len <= max_bytes &&
|
||||||
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
||||||
nsegs++;
|
nsegs++;
|
||||||
bytes += bv.bv_len;
|
bytes += bv.bv_len;
|
||||||
} else if (bvec_split_segs(q, &bv, &nsegs, &bytes, max_segs,
|
} else {
|
||||||
max_bytes)) {
|
if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
|
||||||
|
lim->max_segments, max_bytes))
|
||||||
goto split;
|
goto split;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,7 +307,7 @@ split:
|
|||||||
* split size so that each bio is properly block size aligned, even if
|
* split size so that each bio is properly block size aligned, even if
|
||||||
* we do not use the full hardware limits.
|
* we do not use the full hardware limits.
|
||||||
*/
|
*/
|
||||||
bytes = ALIGN_DOWN(bytes, queue_logical_block_size(q));
|
bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bio splitting may cause subtle trouble such as hang when doing sync
|
* Bio splitting may cause subtle trouble such as hang when doing sync
|
||||||
@ -320,34 +319,35 @@ split:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __blk_queue_split - split a bio and submit the second half
|
* __bio_split_to_limits - split a bio to fit the queue limits
|
||||||
* @q: [in] request_queue new bio is being queued at
|
* @bio: bio to be split
|
||||||
* @bio: [in, out] bio to be split
|
* @lim: queue limits to split based on
|
||||||
* @nr_segs: [out] number of segments in the first bio
|
* @nr_segs: returns the number of segments in the returned bio
|
||||||
*
|
*
|
||||||
* Split a bio into two bios, chain the two bios, submit the second half and
|
* Check if @bio needs splitting based on the queue limits, and if so split off
|
||||||
* store a pointer to the first half in *@bio. If the second bio is still too
|
* a bio fitting the limits from the beginning of @bio and return it. @bio is
|
||||||
* big it will be split by a recursive call to this function. Since this
|
* shortened to the remainder and re-submitted.
|
||||||
* function may allocate a new bio from q->bio_split, it is the responsibility
|
*
|
||||||
* of the caller to ensure that q->bio_split is only released after processing
|
* The split bio is allocated from @q->bio_split, which is provided by the
|
||||||
* of the split bio has finished.
|
* block layer.
|
||||||
*/
|
*/
|
||||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
|
||||||
unsigned int *nr_segs)
|
unsigned int *nr_segs)
|
||||||
{
|
{
|
||||||
struct bio *split = NULL;
|
struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
|
||||||
|
struct bio *split;
|
||||||
|
|
||||||
switch (bio_op(*bio)) {
|
switch (bio_op(bio)) {
|
||||||
case REQ_OP_DISCARD:
|
case REQ_OP_DISCARD:
|
||||||
case REQ_OP_SECURE_ERASE:
|
case REQ_OP_SECURE_ERASE:
|
||||||
split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
|
split = bio_split_discard(bio, lim, nr_segs, bs);
|
||||||
break;
|
break;
|
||||||
case REQ_OP_WRITE_ZEROES:
|
case REQ_OP_WRITE_ZEROES:
|
||||||
split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
|
split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
|
||||||
nr_segs);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
|
split = bio_split_rw(bio, lim, nr_segs, bs,
|
||||||
|
get_max_io_size(bio, lim) << SECTOR_SHIFT);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -356,32 +356,35 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
|||||||
split->bi_opf |= REQ_NOMERGE;
|
split->bi_opf |= REQ_NOMERGE;
|
||||||
|
|
||||||
blkcg_bio_issue_init(split);
|
blkcg_bio_issue_init(split);
|
||||||
bio_chain(split, *bio);
|
bio_chain(split, bio);
|
||||||
trace_block_split(split, (*bio)->bi_iter.bi_sector);
|
trace_block_split(split, bio->bi_iter.bi_sector);
|
||||||
submit_bio_noacct(*bio);
|
submit_bio_noacct(bio);
|
||||||
*bio = split;
|
return split;
|
||||||
}
|
}
|
||||||
|
return bio;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blk_queue_split - split a bio and submit the second half
|
* bio_split_to_limits - split a bio to fit the queue limits
|
||||||
* @bio: [in, out] bio to be split
|
* @bio: bio to be split
|
||||||
*
|
*
|
||||||
* Split a bio into two bios, chains the two bios, submit the second half and
|
* Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
|
||||||
* store a pointer to the first half in *@bio. Since this function may allocate
|
* if so split off a bio fitting the limits from the beginning of @bio and
|
||||||
* a new bio from q->bio_split, it is the responsibility of the caller to ensure
|
* return it. @bio is shortened to the remainder and re-submitted.
|
||||||
* that q->bio_split is only released after processing of the split bio has
|
*
|
||||||
* finished.
|
* The split bio is allocated from @q->bio_split, which is provided by the
|
||||||
|
* block layer.
|
||||||
*/
|
*/
|
||||||
void blk_queue_split(struct bio **bio)
|
struct bio *bio_split_to_limits(struct bio *bio)
|
||||||
{
|
{
|
||||||
struct request_queue *q = bdev_get_queue((*bio)->bi_bdev);
|
struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
|
||||||
unsigned int nr_segs;
|
unsigned int nr_segs;
|
||||||
|
|
||||||
if (blk_may_split(q, *bio))
|
if (bio_may_exceed_limits(bio, lim))
|
||||||
__blk_queue_split(q, bio, &nr_segs);
|
return __bio_split_to_limits(bio, lim, &nr_segs);
|
||||||
|
return bio;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_queue_split);
|
EXPORT_SYMBOL(bio_split_to_limits);
|
||||||
|
|
||||||
unsigned int blk_recalc_rq_segments(struct request *rq)
|
unsigned int blk_recalc_rq_segments(struct request *rq)
|
||||||
{
|
{
|
||||||
@ -411,7 +414,7 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
|
|||||||
}
|
}
|
||||||
|
|
||||||
rq_for_each_bvec(bv, rq, iter)
|
rq_for_each_bvec(bv, rq, iter)
|
||||||
bvec_split_segs(rq->q, &bv, &nr_phys_segs, &bytes,
|
bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
|
||||||
UINT_MAX, UINT_MAX);
|
UINT_MAX, UINT_MAX);
|
||||||
return nr_phys_segs;
|
return nr_phys_segs;
|
||||||
}
|
}
|
||||||
@ -442,8 +445,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
|
|||||||
|
|
||||||
while (nbytes > 0) {
|
while (nbytes > 0) {
|
||||||
unsigned offset = bvec->bv_offset + total;
|
unsigned offset = bvec->bv_offset + total;
|
||||||
unsigned len = min(get_max_segment_size(q, bvec->bv_page,
|
unsigned len = min(get_max_segment_size(&q->limits,
|
||||||
offset), nbytes);
|
bvec->bv_page, offset), nbytes);
|
||||||
struct page *page = bvec->bv_page;
|
struct page *page = bvec->bv_page;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2815,9 +2815,9 @@ void blk_mq_submit_bio(struct bio *bio)
|
|||||||
unsigned int nr_segs = 1;
|
unsigned int nr_segs = 1;
|
||||||
blk_status_t ret;
|
blk_status_t ret;
|
||||||
|
|
||||||
blk_queue_bounce(q, &bio);
|
bio = blk_queue_bounce(bio, q);
|
||||||
if (blk_may_split(q, bio))
|
if (bio_may_exceed_limits(bio, &q->limits))
|
||||||
__blk_queue_split(q, &bio, &nr_segs);
|
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||||
|
|
||||||
if (!bio_integrity_prep(bio))
|
if (!bio_integrity_prep(bio))
|
||||||
return;
|
return;
|
||||||
|
@ -779,8 +779,6 @@ static void blk_release_queue(struct kobject *kobj)
|
|||||||
if (queue_is_mq(q))
|
if (queue_is_mq(q))
|
||||||
blk_mq_release(q);
|
blk_mq_release(q);
|
||||||
|
|
||||||
bioset_exit(&q->bio_split);
|
|
||||||
|
|
||||||
if (blk_queue_has_srcu(q))
|
if (blk_queue_has_srcu(q))
|
||||||
cleanup_srcu_struct(q->srcu);
|
cleanup_srcu_struct(q->srcu);
|
||||||
|
|
||||||
|
45
block/blk.h
45
block/blk.h
@ -97,23 +97,23 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool __bvec_gap_to_prev(struct request_queue *q,
|
static inline bool __bvec_gap_to_prev(struct queue_limits *lim,
|
||||||
struct bio_vec *bprv, unsigned int offset)
|
struct bio_vec *bprv, unsigned int offset)
|
||||||
{
|
{
|
||||||
return (offset & queue_virt_boundary(q)) ||
|
return (offset & lim->virt_boundary_mask) ||
|
||||||
((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
|
((bprv->bv_offset + bprv->bv_len) & lim->virt_boundary_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if adding a bio_vec after bprv with offset would create a gap in
|
* Check if adding a bio_vec after bprv with offset would create a gap in
|
||||||
* the SG list. Most drivers don't care about this, but some do.
|
* the SG list. Most drivers don't care about this, but some do.
|
||||||
*/
|
*/
|
||||||
static inline bool bvec_gap_to_prev(struct request_queue *q,
|
static inline bool bvec_gap_to_prev(struct queue_limits *lim,
|
||||||
struct bio_vec *bprv, unsigned int offset)
|
struct bio_vec *bprv, unsigned int offset)
|
||||||
{
|
{
|
||||||
if (!queue_virt_boundary(q))
|
if (!lim->virt_boundary_mask)
|
||||||
return false;
|
return false;
|
||||||
return __bvec_gap_to_prev(q, bprv, offset);
|
return __bvec_gap_to_prev(lim, bprv, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool rq_mergeable(struct request *rq)
|
static inline bool rq_mergeable(struct request *rq)
|
||||||
@ -189,7 +189,8 @@ static inline bool integrity_req_gap_back_merge(struct request *req,
|
|||||||
struct bio_integrity_payload *bip = bio_integrity(req->bio);
|
struct bio_integrity_payload *bip = bio_integrity(req->bio);
|
||||||
struct bio_integrity_payload *bip_next = bio_integrity(next);
|
struct bio_integrity_payload *bip_next = bio_integrity(next);
|
||||||
|
|
||||||
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
|
return bvec_gap_to_prev(&req->q->limits,
|
||||||
|
&bip->bip_vec[bip->bip_vcnt - 1],
|
||||||
bip_next->bip_vec[0].bv_offset);
|
bip_next->bip_vec[0].bv_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -199,7 +200,8 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
|
|||||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||||
struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
|
struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
|
||||||
|
|
||||||
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
|
return bvec_gap_to_prev(&req->q->limits,
|
||||||
|
&bip->bip_vec[bip->bip_vcnt - 1],
|
||||||
bip_next->bip_vec[0].bv_offset);
|
bip_next->bip_vec[0].bv_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -288,7 +290,8 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
|
|||||||
ssize_t part_timeout_store(struct device *, struct device_attribute *,
|
ssize_t part_timeout_store(struct device *, struct device_attribute *,
|
||||||
const char *, size_t);
|
const char *, size_t);
|
||||||
|
|
||||||
static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
|
static inline bool bio_may_exceed_limits(struct bio *bio,
|
||||||
|
struct queue_limits *lim)
|
||||||
{
|
{
|
||||||
switch (bio_op(bio)) {
|
switch (bio_op(bio)) {
|
||||||
case REQ_OP_DISCARD:
|
case REQ_OP_DISCARD:
|
||||||
@ -307,11 +310,11 @@ static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
|
|||||||
* to the performance impact of cloned bios themselves the loop below
|
* to the performance impact of cloned bios themselves the loop below
|
||||||
* doesn't matter anyway.
|
* doesn't matter anyway.
|
||||||
*/
|
*/
|
||||||
return q->limits.chunk_sectors || bio->bi_vcnt != 1 ||
|
return lim->chunk_sectors || bio->bi_vcnt != 1 ||
|
||||||
bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
|
bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
|
||||||
unsigned int *nr_segs);
|
unsigned int *nr_segs);
|
||||||
int ll_back_merge_fn(struct request *req, struct bio *bio,
|
int ll_back_merge_fn(struct request *req, struct bio *bio,
|
||||||
unsigned int nr_segs);
|
unsigned int nr_segs);
|
||||||
@ -344,16 +347,6 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
|
|||||||
q->last_merge = NULL;
|
q->last_merge = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
|
|
||||||
* is defined as 'unsigned int', meantime it has to aligned to with logical
|
|
||||||
* block size which is the minimum accepted unit by hardware.
|
|
||||||
*/
|
|
||||||
static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
|
|
||||||
{
|
|
||||||
return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Internal io_context interface
|
* Internal io_context interface
|
||||||
*/
|
*/
|
||||||
@ -378,7 +371,7 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
|
|||||||
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
|
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void __blk_queue_bounce(struct request_queue *q, struct bio **bio);
|
struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
|
||||||
|
|
||||||
static inline bool blk_queue_may_bounce(struct request_queue *q)
|
static inline bool blk_queue_may_bounce(struct request_queue *q)
|
||||||
{
|
{
|
||||||
@ -387,10 +380,12 @@ static inline bool blk_queue_may_bounce(struct request_queue *q)
|
|||||||
max_low_pfn >= max_pfn;
|
max_low_pfn >= max_pfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
|
static inline struct bio *blk_queue_bounce(struct bio *bio,
|
||||||
|
struct request_queue *q)
|
||||||
{
|
{
|
||||||
if (unlikely(blk_queue_may_bounce(q) && bio_has_data(*bio)))
|
if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio)))
|
||||||
__blk_queue_bounce(q, bio);
|
return __blk_queue_bounce(bio, q);
|
||||||
|
return bio;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
|
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
|
||||||
|
@ -199,24 +199,24 @@ err_put:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
struct bio *__blk_queue_bounce(struct bio *bio_orig, struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
int rw = bio_data_dir(*bio_orig);
|
int rw = bio_data_dir(bio_orig);
|
||||||
struct bio_vec *to, from;
|
struct bio_vec *to, from;
|
||||||
struct bvec_iter iter;
|
struct bvec_iter iter;
|
||||||
unsigned i = 0, bytes = 0;
|
unsigned i = 0, bytes = 0;
|
||||||
bool bounce = false;
|
bool bounce = false;
|
||||||
int sectors;
|
int sectors;
|
||||||
|
|
||||||
bio_for_each_segment(from, *bio_orig, iter) {
|
bio_for_each_segment(from, bio_orig, iter) {
|
||||||
if (i++ < BIO_MAX_VECS)
|
if (i++ < BIO_MAX_VECS)
|
||||||
bytes += from.bv_len;
|
bytes += from.bv_len;
|
||||||
if (PageHighMem(from.bv_page))
|
if (PageHighMem(from.bv_page))
|
||||||
bounce = true;
|
bounce = true;
|
||||||
}
|
}
|
||||||
if (!bounce)
|
if (!bounce)
|
||||||
return;
|
return bio_orig;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Individual bvecs might not be logical block aligned. Round down
|
* Individual bvecs might not be logical block aligned. Round down
|
||||||
@ -225,13 +225,13 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
|||||||
*/
|
*/
|
||||||
sectors = ALIGN_DOWN(bytes, queue_logical_block_size(q)) >>
|
sectors = ALIGN_DOWN(bytes, queue_logical_block_size(q)) >>
|
||||||
SECTOR_SHIFT;
|
SECTOR_SHIFT;
|
||||||
if (sectors < bio_sectors(*bio_orig)) {
|
if (sectors < bio_sectors(bio_orig)) {
|
||||||
bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
|
bio = bio_split(bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
|
||||||
bio_chain(bio, *bio_orig);
|
bio_chain(bio, bio_orig);
|
||||||
submit_bio_noacct(*bio_orig);
|
submit_bio_noacct(bio_orig);
|
||||||
*bio_orig = bio;
|
bio_orig = bio;
|
||||||
}
|
}
|
||||||
bio = bounce_clone_bio(*bio_orig);
|
bio = bounce_clone_bio(bio_orig);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bvec table can't be updated by bio_for_each_segment_all(),
|
* Bvec table can't be updated by bio_for_each_segment_all(),
|
||||||
@ -254,7 +254,7 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
|||||||
to->bv_page = bounce_page;
|
to->bv_page = bounce_page;
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_block_bio_bounce(*bio_orig);
|
trace_block_bio_bounce(bio_orig);
|
||||||
|
|
||||||
bio->bi_flags |= (1 << BIO_BOUNCED);
|
bio->bi_flags |= (1 << BIO_BOUNCED);
|
||||||
|
|
||||||
@ -263,6 +263,6 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
|||||||
else
|
else
|
||||||
bio->bi_end_io = bounce_end_io_write;
|
bio->bi_end_io = bounce_end_io_write;
|
||||||
|
|
||||||
bio->bi_private = *bio_orig;
|
bio->bi_private = bio_orig;
|
||||||
*bio_orig = bio;
|
return bio;
|
||||||
}
|
}
|
||||||
|
@ -1151,6 +1151,7 @@ static void disk_release(struct device *dev)
|
|||||||
blk_mq_exit_queue(disk->queue);
|
blk_mq_exit_queue(disk->queue);
|
||||||
|
|
||||||
blkcg_exit_queue(disk->queue);
|
blkcg_exit_queue(disk->queue);
|
||||||
|
bioset_exit(&disk->bio_split);
|
||||||
|
|
||||||
disk_release_events(disk);
|
disk_release_events(disk);
|
||||||
kfree(disk->random);
|
kfree(disk->random);
|
||||||
@ -1342,9 +1343,12 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
|
|||||||
if (!disk)
|
if (!disk)
|
||||||
goto out_put_queue;
|
goto out_put_queue;
|
||||||
|
|
||||||
|
if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0))
|
||||||
|
goto out_free_disk;
|
||||||
|
|
||||||
disk->bdi = bdi_alloc(node_id);
|
disk->bdi = bdi_alloc(node_id);
|
||||||
if (!disk->bdi)
|
if (!disk->bdi)
|
||||||
goto out_free_disk;
|
goto out_free_bioset;
|
||||||
|
|
||||||
/* bdev_alloc() might need the queue, set before the first call */
|
/* bdev_alloc() might need the queue, set before the first call */
|
||||||
disk->queue = q;
|
disk->queue = q;
|
||||||
@ -1382,6 +1386,8 @@ out_destroy_part_tbl:
|
|||||||
iput(disk->part0->bd_inode);
|
iput(disk->part0->bd_inode);
|
||||||
out_free_bdi:
|
out_free_bdi:
|
||||||
bdi_put(disk->bdi);
|
bdi_put(disk->bdi);
|
||||||
|
out_free_bioset:
|
||||||
|
bioset_exit(&disk->bio_split);
|
||||||
out_free_disk:
|
out_free_disk:
|
||||||
kfree(disk);
|
kfree(disk);
|
||||||
out_put_queue:
|
out_put_queue:
|
||||||
|
@ -104,6 +104,12 @@ int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(crypto_grab_kpp);
|
EXPORT_SYMBOL_GPL(crypto_grab_kpp);
|
||||||
|
|
||||||
|
int crypto_has_kpp(const char *alg_name, u32 type, u32 mask)
|
||||||
|
{
|
||||||
|
return crypto_type_has_alg(alg_name, &crypto_kpp_type, type, mask);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(crypto_has_kpp);
|
||||||
|
|
||||||
static void kpp_prepare_alg(struct kpp_alg *alg)
|
static void kpp_prepare_alg(struct kpp_alg *alg)
|
||||||
{
|
{
|
||||||
struct crypto_alg *base = &alg->base;
|
struct crypto_alg *base = &alg->base;
|
||||||
|
@ -521,6 +521,12 @@ struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(crypto_alloc_shash);
|
EXPORT_SYMBOL_GPL(crypto_alloc_shash);
|
||||||
|
|
||||||
|
int crypto_has_shash(const char *alg_name, u32 type, u32 mask)
|
||||||
|
{
|
||||||
|
return crypto_type_has_alg(alg_name, &crypto_shash_type, type, mask);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(crypto_has_shash);
|
||||||
|
|
||||||
static int shash_prepare_alg(struct shash_alg *alg)
|
static int shash_prepare_alg(struct shash_alg *alg)
|
||||||
{
|
{
|
||||||
struct crypto_alg *base = &alg->base;
|
struct crypto_alg *base = &alg->base;
|
||||||
|
@ -248,15 +248,6 @@ config BLK_DEV_NBD
|
|||||||
|
|
||||||
If unsure, say N.
|
If unsure, say N.
|
||||||
|
|
||||||
config BLK_DEV_SX8
|
|
||||||
tristate "Promise SATA SX8 support"
|
|
||||||
depends on PCI
|
|
||||||
help
|
|
||||||
Saying Y or M here will enable support for the
|
|
||||||
Promise SATA SX8 controllers.
|
|
||||||
|
|
||||||
Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
|
|
||||||
|
|
||||||
config BLK_DEV_RAM
|
config BLK_DEV_RAM
|
||||||
tristate "RAM block device support"
|
tristate "RAM block device support"
|
||||||
help
|
help
|
||||||
|
@ -26,8 +26,6 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o
|
|||||||
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
|
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
|
||||||
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
|
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
|
||||||
|
|
||||||
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
|
|
||||||
|
|
||||||
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
|
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
|
||||||
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
|
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
|
||||||
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
|
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
|
||||||
|
@ -974,25 +974,58 @@ static void drbd_bm_endio(struct bio *bio)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* For the layout, see comment above drbd_md_set_sector_offsets(). */
|
||||||
|
static inline sector_t drbd_md_last_bitmap_sector(struct drbd_backing_dev *bdev)
|
||||||
|
{
|
||||||
|
switch (bdev->md.meta_dev_idx) {
|
||||||
|
case DRBD_MD_INDEX_INTERNAL:
|
||||||
|
case DRBD_MD_INDEX_FLEX_INT:
|
||||||
|
return bdev->md.md_offset + bdev->md.al_offset -1;
|
||||||
|
case DRBD_MD_INDEX_FLEX_EXT:
|
||||||
|
default:
|
||||||
|
return bdev->md.md_offset + bdev->md.md_size_sect -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
|
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
|
||||||
{
|
{
|
||||||
struct drbd_device *device = ctx->device;
|
struct drbd_device *device = ctx->device;
|
||||||
enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE;
|
enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE;
|
||||||
struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op,
|
|
||||||
GFP_NOIO, &drbd_md_io_bio_set);
|
|
||||||
struct drbd_bitmap *b = device->bitmap;
|
struct drbd_bitmap *b = device->bitmap;
|
||||||
|
struct bio *bio;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
sector_t last_bm_sect;
|
||||||
|
sector_t first_bm_sect;
|
||||||
|
sector_t on_disk_sector;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
|
|
||||||
sector_t on_disk_sector =
|
first_bm_sect = device->ldev->md.md_offset + device->ldev->md.bm_offset;
|
||||||
device->ldev->md.md_offset + device->ldev->md.bm_offset;
|
on_disk_sector = first_bm_sect + (((sector_t)page_nr) << (PAGE_SHIFT-SECTOR_SHIFT));
|
||||||
on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
|
|
||||||
|
|
||||||
/* this might happen with very small
|
/* this might happen with very small
|
||||||
* flexible external meta data device,
|
* flexible external meta data device,
|
||||||
* or with PAGE_SIZE > 4k */
|
* or with PAGE_SIZE > 4k */
|
||||||
len = min_t(unsigned int, PAGE_SIZE,
|
last_bm_sect = drbd_md_last_bitmap_sector(device->ldev);
|
||||||
(drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
|
if (first_bm_sect <= on_disk_sector && last_bm_sect >= on_disk_sector) {
|
||||||
|
sector_t len_sect = last_bm_sect - on_disk_sector + 1;
|
||||||
|
if (len_sect < PAGE_SIZE/SECTOR_SIZE)
|
||||||
|
len = (unsigned int)len_sect*SECTOR_SIZE;
|
||||||
|
else
|
||||||
|
len = PAGE_SIZE;
|
||||||
|
} else {
|
||||||
|
if (__ratelimit(&drbd_ratelimit_state)) {
|
||||||
|
drbd_err(device, "Invalid offset during on-disk bitmap access: "
|
||||||
|
"page idx %u, sector %llu\n", page_nr, on_disk_sector);
|
||||||
|
}
|
||||||
|
ctx->error = -EIO;
|
||||||
|
bm_set_page_io_err(b->bm_pages[page_nr]);
|
||||||
|
if (atomic_dec_and_test(&ctx->in_flight)) {
|
||||||
|
ctx->done = 1;
|
||||||
|
wake_up(&device->misc_wait);
|
||||||
|
kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* serialize IO on this page */
|
/* serialize IO on this page */
|
||||||
bm_page_lock_io(device, page_nr);
|
bm_page_lock_io(device, page_nr);
|
||||||
@ -1007,6 +1040,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
|
|||||||
bm_store_page_idx(page, page_nr);
|
bm_store_page_idx(page, page_nr);
|
||||||
} else
|
} else
|
||||||
page = b->bm_pages[page_nr];
|
page = b->bm_pages[page_nr];
|
||||||
|
bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO,
|
||||||
|
&drbd_md_io_bio_set);
|
||||||
bio->bi_iter.bi_sector = on_disk_sector;
|
bio->bi_iter.bi_sector = on_disk_sector;
|
||||||
/* bio_add_page of a single page to an empty bio will always succeed,
|
/* bio_add_page of a single page to an empty bio will always succeed,
|
||||||
* according to api. Do we want to assert that? */
|
* according to api. Do we want to assert that? */
|
||||||
|
@ -1608,7 +1608,7 @@ void drbd_submit_bio(struct bio *bio)
|
|||||||
{
|
{
|
||||||
struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
|
struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
|
||||||
|
|
||||||
blk_queue_split(&bio);
|
bio = bio_split_to_limits(bio);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* what we "blindly" assume:
|
* what we "blindly" assume:
|
||||||
|
@ -11,6 +11,8 @@
|
|||||||
* (part of code stolen from loop.c)
|
* (part of code stolen from loop.c)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define pr_fmt(fmt) "nbd: " fmt
|
||||||
|
|
||||||
#include <linux/major.h>
|
#include <linux/major.h>
|
||||||
|
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
@ -1950,7 +1952,7 @@ again:
|
|||||||
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
|
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
|
||||||
!refcount_inc_not_zero(&nbd->refs)) {
|
!refcount_inc_not_zero(&nbd->refs)) {
|
||||||
mutex_unlock(&nbd_index_mutex);
|
mutex_unlock(&nbd_index_mutex);
|
||||||
pr_err("nbd: device at index %d is going down\n",
|
pr_err("device at index %d is going down\n",
|
||||||
index);
|
index);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@ -1961,7 +1963,7 @@ again:
|
|||||||
if (!nbd) {
|
if (!nbd) {
|
||||||
nbd = nbd_dev_add(index, 2);
|
nbd = nbd_dev_add(index, 2);
|
||||||
if (IS_ERR(nbd)) {
|
if (IS_ERR(nbd)) {
|
||||||
pr_err("nbd: failed to add new device\n");
|
pr_err("failed to add new device\n");
|
||||||
return PTR_ERR(nbd);
|
return PTR_ERR(nbd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -201,6 +201,22 @@ static bool g_use_per_node_hctx;
|
|||||||
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
|
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
|
||||||
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
|
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
|
||||||
|
|
||||||
|
static bool g_memory_backed;
|
||||||
|
module_param_named(memory_backed, g_memory_backed, bool, 0444);
|
||||||
|
MODULE_PARM_DESC(memory_backed, "Create a memory-backed block device. Default: false");
|
||||||
|
|
||||||
|
static bool g_discard;
|
||||||
|
module_param_named(discard, g_discard, bool, 0444);
|
||||||
|
MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed null_blk device). Default: false");
|
||||||
|
|
||||||
|
static unsigned long g_cache_size;
|
||||||
|
module_param_named(cache_size, g_cache_size, ulong, 0444);
|
||||||
|
MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
|
||||||
|
|
||||||
|
static unsigned int g_mbps;
|
||||||
|
module_param_named(mbps, g_mbps, uint, 0444);
|
||||||
|
MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
|
||||||
|
|
||||||
static bool g_zoned;
|
static bool g_zoned;
|
||||||
module_param_named(zoned, g_zoned, bool, S_IRUGO);
|
module_param_named(zoned, g_zoned, bool, S_IRUGO);
|
||||||
MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
|
MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
|
||||||
@ -409,6 +425,8 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
|
|||||||
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
|
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
|
||||||
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
|
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
|
||||||
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
|
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
|
||||||
|
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
|
||||||
|
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
|
||||||
|
|
||||||
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
|
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
|
||||||
{
|
{
|
||||||
@ -532,6 +550,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
|
|||||||
&nullb_device_attr_zone_max_open,
|
&nullb_device_attr_zone_max_open,
|
||||||
&nullb_device_attr_zone_max_active,
|
&nullb_device_attr_zone_max_active,
|
||||||
&nullb_device_attr_virt_boundary,
|
&nullb_device_attr_virt_boundary,
|
||||||
|
&nullb_device_attr_no_sched,
|
||||||
|
&nullb_device_attr_shared_tag_bitmap,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -588,7 +608,13 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
|
|||||||
static ssize_t memb_group_features_show(struct config_item *item, char *page)
|
static ssize_t memb_group_features_show(struct config_item *item, char *page)
|
||||||
{
|
{
|
||||||
return snprintf(page, PAGE_SIZE,
|
return snprintf(page, PAGE_SIZE,
|
||||||
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
|
"badblocks,blocking,blocksize,cache_size,"
|
||||||
|
"completion_nsec,discard,home_node,hw_queue_depth,"
|
||||||
|
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
|
||||||
|
"poll_queues,power,queue_mode,shared_tag_bitmap,size,"
|
||||||
|
"submit_queues,use_per_node_hctx,virt_boundary,zoned,"
|
||||||
|
"zone_capacity,zone_max_active,zone_max_open,"
|
||||||
|
"zone_nr_conv,zone_size\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
CONFIGFS_ATTR_RO(memb_group_, features);
|
CONFIGFS_ATTR_RO(memb_group_, features);
|
||||||
@ -650,6 +676,10 @@ static struct nullb_device *null_alloc_dev(void)
|
|||||||
dev->irqmode = g_irqmode;
|
dev->irqmode = g_irqmode;
|
||||||
dev->hw_queue_depth = g_hw_queue_depth;
|
dev->hw_queue_depth = g_hw_queue_depth;
|
||||||
dev->blocking = g_blocking;
|
dev->blocking = g_blocking;
|
||||||
|
dev->memory_backed = g_memory_backed;
|
||||||
|
dev->discard = g_discard;
|
||||||
|
dev->cache_size = g_cache_size;
|
||||||
|
dev->mbps = g_mbps;
|
||||||
dev->use_per_node_hctx = g_use_per_node_hctx;
|
dev->use_per_node_hctx = g_use_per_node_hctx;
|
||||||
dev->zoned = g_zoned;
|
dev->zoned = g_zoned;
|
||||||
dev->zone_size = g_zone_size;
|
dev->zone_size = g_zone_size;
|
||||||
@ -658,6 +688,8 @@ static struct nullb_device *null_alloc_dev(void)
|
|||||||
dev->zone_max_open = g_zone_max_open;
|
dev->zone_max_open = g_zone_max_open;
|
||||||
dev->zone_max_active = g_zone_max_active;
|
dev->zone_max_active = g_zone_max_active;
|
||||||
dev->virt_boundary = g_virt_boundary;
|
dev->virt_boundary = g_virt_boundary;
|
||||||
|
dev->no_sched = g_no_sched;
|
||||||
|
dev->shared_tag_bitmap = g_shared_tag_bitmap;
|
||||||
return dev;
|
return dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1655,7 +1687,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||||||
|
|
||||||
static void cleanup_queue(struct nullb_queue *nq)
|
static void cleanup_queue(struct nullb_queue *nq)
|
||||||
{
|
{
|
||||||
kfree(nq->tag_map);
|
bitmap_free(nq->tag_map);
|
||||||
kfree(nq->cmds);
|
kfree(nq->cmds);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1782,14 +1814,13 @@ static const struct block_device_operations null_rq_ops = {
|
|||||||
static int setup_commands(struct nullb_queue *nq)
|
static int setup_commands(struct nullb_queue *nq)
|
||||||
{
|
{
|
||||||
struct nullb_cmd *cmd;
|
struct nullb_cmd *cmd;
|
||||||
int i, tag_size;
|
int i;
|
||||||
|
|
||||||
nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
|
nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
|
||||||
if (!nq->cmds)
|
if (!nq->cmds)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
|
nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL);
|
||||||
nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
|
|
||||||
if (!nq->tag_map) {
|
if (!nq->tag_map) {
|
||||||
kfree(nq->cmds);
|
kfree(nq->cmds);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@ -1866,31 +1897,48 @@ static int null_gendisk_register(struct nullb *nullb)
|
|||||||
|
|
||||||
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
|
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
|
||||||
{
|
{
|
||||||
|
unsigned int flags = BLK_MQ_F_SHOULD_MERGE;
|
||||||
|
int hw_queues, numa_node;
|
||||||
|
unsigned int queue_depth;
|
||||||
int poll_queues;
|
int poll_queues;
|
||||||
|
|
||||||
set->ops = &null_mq_ops;
|
if (nullb) {
|
||||||
set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
|
hw_queues = nullb->dev->submit_queues;
|
||||||
g_submit_queues;
|
poll_queues = nullb->dev->poll_queues;
|
||||||
poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues;
|
queue_depth = nullb->dev->hw_queue_depth;
|
||||||
if (poll_queues)
|
numa_node = nullb->dev->home_node;
|
||||||
set->nr_hw_queues += poll_queues;
|
if (nullb->dev->no_sched)
|
||||||
set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
|
flags |= BLK_MQ_F_NO_SCHED;
|
||||||
g_hw_queue_depth;
|
if (nullb->dev->shared_tag_bitmap)
|
||||||
set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
|
flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||||
set->cmd_size = sizeof(struct nullb_cmd);
|
if (nullb->dev->blocking)
|
||||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
flags |= BLK_MQ_F_BLOCKING;
|
||||||
|
} else {
|
||||||
|
hw_queues = g_submit_queues;
|
||||||
|
poll_queues = g_poll_queues;
|
||||||
|
queue_depth = g_hw_queue_depth;
|
||||||
|
numa_node = g_home_node;
|
||||||
if (g_no_sched)
|
if (g_no_sched)
|
||||||
set->flags |= BLK_MQ_F_NO_SCHED;
|
flags |= BLK_MQ_F_NO_SCHED;
|
||||||
if (g_shared_tag_bitmap)
|
if (g_shared_tag_bitmap)
|
||||||
set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||||
set->driver_data = nullb;
|
if (g_blocking)
|
||||||
if (poll_queues)
|
flags |= BLK_MQ_F_BLOCKING;
|
||||||
set->nr_maps = 3;
|
}
|
||||||
else
|
|
||||||
set->nr_maps = 1;
|
|
||||||
|
|
||||||
if ((nullb && nullb->dev->blocking) || g_blocking)
|
set->ops = &null_mq_ops;
|
||||||
set->flags |= BLK_MQ_F_BLOCKING;
|
set->cmd_size = sizeof(struct nullb_cmd);
|
||||||
|
set->flags = flags;
|
||||||
|
set->driver_data = nullb;
|
||||||
|
set->nr_hw_queues = hw_queues;
|
||||||
|
set->queue_depth = queue_depth;
|
||||||
|
set->numa_node = numa_node;
|
||||||
|
if (poll_queues) {
|
||||||
|
set->nr_hw_queues += poll_queues;
|
||||||
|
set->nr_maps = 3;
|
||||||
|
} else {
|
||||||
|
set->nr_maps = 1;
|
||||||
|
}
|
||||||
|
|
||||||
return blk_mq_alloc_tag_set(set);
|
return blk_mq_alloc_tag_set(set);
|
||||||
}
|
}
|
||||||
@ -2042,8 +2090,13 @@ static int null_add_dev(struct nullb_device *dev)
|
|||||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
|
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
|
||||||
|
|
||||||
mutex_lock(&lock);
|
mutex_lock(&lock);
|
||||||
nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
|
rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
|
||||||
dev->index = nullb->index;
|
if (rv < 0) {
|
||||||
|
mutex_unlock(&lock);
|
||||||
|
goto out_cleanup_zone;
|
||||||
|
}
|
||||||
|
nullb->index = rv;
|
||||||
|
dev->index = rv;
|
||||||
mutex_unlock(&lock);
|
mutex_unlock(&lock);
|
||||||
|
|
||||||
blk_queue_logical_block_size(nullb->q, dev->blocksize);
|
blk_queue_logical_block_size(nullb->q, dev->blocksize);
|
||||||
@ -2069,7 +2122,7 @@ static int null_add_dev(struct nullb_device *dev)
|
|||||||
|
|
||||||
rv = null_gendisk_register(nullb);
|
rv = null_gendisk_register(nullb);
|
||||||
if (rv)
|
if (rv)
|
||||||
goto out_cleanup_zone;
|
goto out_ida_free;
|
||||||
|
|
||||||
mutex_lock(&lock);
|
mutex_lock(&lock);
|
||||||
list_add_tail(&nullb->list, &nullb_list);
|
list_add_tail(&nullb->list, &nullb_list);
|
||||||
@ -2078,6 +2131,9 @@ static int null_add_dev(struct nullb_device *dev)
|
|||||||
pr_info("disk %s created\n", nullb->disk_name);
|
pr_info("disk %s created\n", nullb->disk_name);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
out_ida_free:
|
||||||
|
ida_free(&nullb_indexes, nullb->index);
|
||||||
out_cleanup_zone:
|
out_cleanup_zone:
|
||||||
null_free_zoned_dev(dev);
|
null_free_zoned_dev(dev);
|
||||||
out_cleanup_disk:
|
out_cleanup_disk:
|
||||||
|
@ -113,6 +113,8 @@ struct nullb_device {
|
|||||||
bool discard; /* if support discard */
|
bool discard; /* if support discard */
|
||||||
bool zoned; /* if device is zoned */
|
bool zoned; /* if device is zoned */
|
||||||
bool virt_boundary; /* virtual boundary on/off for the device */
|
bool virt_boundary; /* virtual boundary on/off for the device */
|
||||||
|
bool no_sched; /* no IO scheduler for the device */
|
||||||
|
bool shared_tag_bitmap; /* use hostwide shared tags */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nullb {
|
struct nullb {
|
||||||
|
@ -2399,7 +2399,7 @@ static void pkt_submit_bio(struct bio *bio)
|
|||||||
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
|
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
|
||||||
struct bio *split;
|
struct bio *split;
|
||||||
|
|
||||||
blk_queue_split(&bio);
|
bio = bio_split_to_limits(bio);
|
||||||
|
|
||||||
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
|
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
|
||||||
(unsigned long long)bio->bi_iter.bi_sector,
|
(unsigned long long)bio->bi_iter.bi_sector,
|
||||||
|
@ -586,7 +586,7 @@ static void ps3vram_submit_bio(struct bio *bio)
|
|||||||
|
|
||||||
dev_dbg(&dev->core, "%s\n", __func__);
|
dev_dbg(&dev->core, "%s\n", __func__);
|
||||||
|
|
||||||
blk_queue_split(&bio);
|
bio = bio_split_to_limits(bio);
|
||||||
|
|
||||||
spin_lock_irq(&priv->lock);
|
spin_lock_irq(&priv->lock);
|
||||||
busy = !bio_list_empty(&priv->list);
|
busy = !bio_list_empty(&priv->list);
|
||||||
|
@ -376,7 +376,7 @@ static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj,
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = rnbd_clt_resize_disk(dev, (size_t)sectors);
|
ret = rnbd_clt_resize_disk(dev, sectors);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -68,39 +68,18 @@ static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev)
|
|||||||
return refcount_inc_not_zero(&dev->refcount);
|
return refcount_inc_not_zero(&dev->refcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
|
static void rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
|
||||||
const struct rnbd_msg_open_rsp *rsp)
|
sector_t new_nsectors)
|
||||||
{
|
{
|
||||||
struct rnbd_clt_session *sess = dev->sess;
|
if (get_capacity(dev->gd) == new_nsectors)
|
||||||
|
return;
|
||||||
|
|
||||||
if (!rsp->logical_block_size)
|
/*
|
||||||
return -EINVAL;
|
* If the size changed, we need to revalidate it
|
||||||
|
*/
|
||||||
dev->device_id = le32_to_cpu(rsp->device_id);
|
rnbd_clt_info(dev, "Device size changed from %llu to %llu sectors\n",
|
||||||
dev->nsectors = le64_to_cpu(rsp->nsectors);
|
get_capacity(dev->gd), new_nsectors);
|
||||||
dev->logical_block_size = le16_to_cpu(rsp->logical_block_size);
|
set_capacity_and_notify(dev->gd, new_nsectors);
|
||||||
dev->physical_block_size = le16_to_cpu(rsp->physical_block_size);
|
|
||||||
dev->max_discard_sectors = le32_to_cpu(rsp->max_discard_sectors);
|
|
||||||
dev->discard_granularity = le32_to_cpu(rsp->discard_granularity);
|
|
||||||
dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
|
|
||||||
dev->secure_discard = le16_to_cpu(rsp->secure_discard);
|
|
||||||
dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
|
|
||||||
dev->fua = !!(rsp->cache_policy & RNBD_FUA);
|
|
||||||
|
|
||||||
dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
|
|
||||||
dev->max_segments = sess->max_segments;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
|
|
||||||
size_t new_nsectors)
|
|
||||||
{
|
|
||||||
rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n",
|
|
||||||
dev->nsectors, new_nsectors);
|
|
||||||
dev->nsectors = new_nsectors;
|
|
||||||
set_capacity_and_notify(dev->gd, dev->nsectors);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
|
static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
|
||||||
@ -119,19 +98,16 @@ static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
|
|||||||
if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) {
|
if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) {
|
||||||
u64 nsectors = le64_to_cpu(rsp->nsectors);
|
u64 nsectors = le64_to_cpu(rsp->nsectors);
|
||||||
|
|
||||||
/*
|
|
||||||
* If the device was remapped and the size changed in the
|
|
||||||
* meantime we need to revalidate it
|
|
||||||
*/
|
|
||||||
if (dev->nsectors != nsectors)
|
|
||||||
rnbd_clt_change_capacity(dev, nsectors);
|
rnbd_clt_change_capacity(dev, nsectors);
|
||||||
gd_kobj = &disk_to_dev(dev->gd)->kobj;
|
gd_kobj = &disk_to_dev(dev->gd)->kobj;
|
||||||
kobject_uevent(gd_kobj, KOBJ_ONLINE);
|
kobject_uevent(gd_kobj, KOBJ_ONLINE);
|
||||||
rnbd_clt_info(dev, "Device online, device remapped successfully\n");
|
rnbd_clt_info(dev, "Device online, device remapped successfully\n");
|
||||||
}
|
}
|
||||||
err = rnbd_clt_set_dev_attr(dev, rsp);
|
if (!rsp->logical_block_size) {
|
||||||
if (err)
|
err = -EINVAL;
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
dev->device_id = le32_to_cpu(rsp->device_id);
|
||||||
dev->dev_state = DEV_STATE_MAPPED;
|
dev->dev_state = DEV_STATE_MAPPED;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@ -140,7 +116,7 @@ out:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
|
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@ -150,7 +126,7 @@ int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
|
|||||||
ret = -ENOENT;
|
ret = -ENOENT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ret = rnbd_clt_change_capacity(dev, newsize);
|
rnbd_clt_change_capacity(dev, newsize);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&dev->lock);
|
mutex_unlock(&dev->lock);
|
||||||
@ -507,6 +483,11 @@ static void msg_open_conf(struct work_struct *work)
|
|||||||
struct rnbd_msg_open_rsp *rsp = iu->buf;
|
struct rnbd_msg_open_rsp *rsp = iu->buf;
|
||||||
struct rnbd_clt_dev *dev = iu->dev;
|
struct rnbd_clt_dev *dev = iu->dev;
|
||||||
int errno = iu->errno;
|
int errno = iu->errno;
|
||||||
|
bool from_map = false;
|
||||||
|
|
||||||
|
/* INIT state is only triggered from rnbd_clt_map_device */
|
||||||
|
if (dev->dev_state == DEV_STATE_INIT)
|
||||||
|
from_map = true;
|
||||||
|
|
||||||
if (errno) {
|
if (errno) {
|
||||||
rnbd_clt_err(dev,
|
rnbd_clt_err(dev,
|
||||||
@ -523,6 +504,8 @@ static void msg_open_conf(struct work_struct *work)
|
|||||||
send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT);
|
send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* We free rsp in rnbd_clt_map_device for map scenario */
|
||||||
|
if (!from_map)
|
||||||
kfree(rsp);
|
kfree(rsp);
|
||||||
wake_up_iu_comp(iu, errno);
|
wake_up_iu_comp(iu, errno);
|
||||||
rnbd_put_iu(dev->sess, iu);
|
rnbd_put_iu(dev->sess, iu);
|
||||||
@ -942,7 +925,7 @@ static int rnbd_client_open(struct block_device *block_device, fmode_t mode)
|
|||||||
{
|
{
|
||||||
struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
|
struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
|
||||||
|
|
||||||
if (dev->read_only && (mode & FMODE_WRITE))
|
if (get_disk_ro(dev->gd) && (mode & FMODE_WRITE))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
if (dev->dev_state == DEV_STATE_UNMAPPED ||
|
if (dev->dev_state == DEV_STATE_UNMAPPED ||
|
||||||
@ -963,10 +946,10 @@ static int rnbd_client_getgeo(struct block_device *block_device,
|
|||||||
struct hd_geometry *geo)
|
struct hd_geometry *geo)
|
||||||
{
|
{
|
||||||
u64 size;
|
u64 size;
|
||||||
struct rnbd_clt_dev *dev;
|
struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
|
||||||
|
struct queue_limits *limit = &dev->queue->limits;
|
||||||
|
|
||||||
dev = block_device->bd_disk->private_data;
|
size = dev->size * (limit->logical_block_size / SECTOR_SIZE);
|
||||||
size = dev->size * (dev->logical_block_size / SECTOR_SIZE);
|
|
||||||
geo->cylinders = size >> 6; /* size/64 */
|
geo->cylinders = size >> 6; /* size/64 */
|
||||||
geo->heads = 4;
|
geo->heads = 4;
|
||||||
geo->sectors = 16;
|
geo->sectors = 16;
|
||||||
@ -1350,11 +1333,15 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void setup_request_queue(struct rnbd_clt_dev *dev)
|
static void setup_request_queue(struct rnbd_clt_dev *dev,
|
||||||
|
struct rnbd_msg_open_rsp *rsp)
|
||||||
{
|
{
|
||||||
blk_queue_logical_block_size(dev->queue, dev->logical_block_size);
|
blk_queue_logical_block_size(dev->queue,
|
||||||
blk_queue_physical_block_size(dev->queue, dev->physical_block_size);
|
le16_to_cpu(rsp->logical_block_size));
|
||||||
blk_queue_max_hw_sectors(dev->queue, dev->max_hw_sectors);
|
blk_queue_physical_block_size(dev->queue,
|
||||||
|
le16_to_cpu(rsp->physical_block_size));
|
||||||
|
blk_queue_max_hw_sectors(dev->queue,
|
||||||
|
dev->sess->max_io_size / SECTOR_SIZE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we don't support discards to "discontiguous" segments
|
* we don't support discards to "discontiguous" segments
|
||||||
@ -1362,21 +1349,27 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
|
|||||||
*/
|
*/
|
||||||
blk_queue_max_discard_segments(dev->queue, 1);
|
blk_queue_max_discard_segments(dev->queue, 1);
|
||||||
|
|
||||||
blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
|
blk_queue_max_discard_sectors(dev->queue,
|
||||||
dev->queue->limits.discard_granularity = dev->discard_granularity;
|
le32_to_cpu(rsp->max_discard_sectors));
|
||||||
dev->queue->limits.discard_alignment = dev->discard_alignment;
|
dev->queue->limits.discard_granularity =
|
||||||
if (dev->secure_discard)
|
le32_to_cpu(rsp->discard_granularity);
|
||||||
|
dev->queue->limits.discard_alignment =
|
||||||
|
le32_to_cpu(rsp->discard_alignment);
|
||||||
|
if (le16_to_cpu(rsp->secure_discard))
|
||||||
blk_queue_max_secure_erase_sectors(dev->queue,
|
blk_queue_max_secure_erase_sectors(dev->queue,
|
||||||
dev->max_discard_sectors);
|
le32_to_cpu(rsp->max_discard_sectors));
|
||||||
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
|
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
|
||||||
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
|
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
|
||||||
blk_queue_max_segments(dev->queue, dev->max_segments);
|
blk_queue_max_segments(dev->queue, dev->sess->max_segments);
|
||||||
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
|
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
|
||||||
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
|
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
|
||||||
blk_queue_write_cache(dev->queue, dev->wc, dev->fua);
|
blk_queue_write_cache(dev->queue,
|
||||||
|
!!(rsp->cache_policy & RNBD_WRITEBACK),
|
||||||
|
!!(rsp->cache_policy & RNBD_FUA));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
|
||||||
|
struct rnbd_msg_open_rsp *rsp, int idx)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
@ -1388,19 +1381,15 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
|||||||
dev->gd->private_data = dev;
|
dev->gd->private_data = dev;
|
||||||
snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d",
|
snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d",
|
||||||
idx);
|
idx);
|
||||||
pr_debug("disk_name=%s, capacity=%zu\n",
|
pr_debug("disk_name=%s, capacity=%llu\n",
|
||||||
dev->gd->disk_name,
|
dev->gd->disk_name,
|
||||||
dev->nsectors * (dev->logical_block_size / SECTOR_SIZE)
|
le64_to_cpu(rsp->nsectors) *
|
||||||
);
|
(le16_to_cpu(rsp->logical_block_size) / SECTOR_SIZE));
|
||||||
|
|
||||||
set_capacity(dev->gd, dev->nsectors);
|
set_capacity(dev->gd, le64_to_cpu(rsp->nsectors));
|
||||||
|
|
||||||
if (dev->access_mode == RNBD_ACCESS_RO) {
|
if (dev->access_mode == RNBD_ACCESS_RO)
|
||||||
dev->read_only = true;
|
|
||||||
set_disk_ro(dev->gd, true);
|
set_disk_ro(dev->gd, true);
|
||||||
} else {
|
|
||||||
dev->read_only = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Network device does not need rotational
|
* Network device does not need rotational
|
||||||
@ -1413,11 +1402,13 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
|
static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
|
||||||
|
struct rnbd_msg_open_rsp *rsp)
|
||||||
{
|
{
|
||||||
int idx = dev->clt_device_id;
|
int idx = dev->clt_device_id;
|
||||||
|
|
||||||
dev->size = dev->nsectors * dev->logical_block_size;
|
dev->size = le64_to_cpu(rsp->nsectors) *
|
||||||
|
le16_to_cpu(rsp->logical_block_size);
|
||||||
|
|
||||||
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
|
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
|
||||||
if (IS_ERR(dev->gd))
|
if (IS_ERR(dev->gd))
|
||||||
@ -1425,8 +1416,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
|
|||||||
dev->queue = dev->gd->queue;
|
dev->queue = dev->gd->queue;
|
||||||
rnbd_init_mq_hw_queues(dev);
|
rnbd_init_mq_hw_queues(dev);
|
||||||
|
|
||||||
setup_request_queue(dev);
|
setup_request_queue(dev, rsp);
|
||||||
return rnbd_clt_setup_gen_disk(dev, idx);
|
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
|
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
|
||||||
@ -1562,7 +1553,14 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
|||||||
{
|
{
|
||||||
struct rnbd_clt_session *sess;
|
struct rnbd_clt_session *sess;
|
||||||
struct rnbd_clt_dev *dev;
|
struct rnbd_clt_dev *dev;
|
||||||
int ret;
|
int ret, errno;
|
||||||
|
struct rnbd_msg_open_rsp *rsp;
|
||||||
|
struct rnbd_msg_open msg;
|
||||||
|
struct rnbd_iu *iu;
|
||||||
|
struct kvec vec = {
|
||||||
|
.iov_base = &msg,
|
||||||
|
.iov_len = sizeof(msg)
|
||||||
|
};
|
||||||
|
|
||||||
if (exists_devpath(pathname, sessname))
|
if (exists_devpath(pathname, sessname))
|
||||||
return ERR_PTR(-EEXIST);
|
return ERR_PTR(-EEXIST);
|
||||||
@ -1582,17 +1580,47 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
|||||||
ret = -EEXIST;
|
ret = -EEXIST;
|
||||||
goto put_dev;
|
goto put_dev;
|
||||||
}
|
}
|
||||||
ret = send_msg_open(dev, RTRS_PERMIT_WAIT);
|
|
||||||
|
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
|
||||||
|
if (!rsp) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto del_dev;
|
||||||
|
}
|
||||||
|
|
||||||
|
iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT);
|
||||||
|
if (!iu) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
kfree(rsp);
|
||||||
|
goto del_dev;
|
||||||
|
}
|
||||||
|
iu->buf = rsp;
|
||||||
|
iu->dev = dev;
|
||||||
|
sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
|
||||||
|
|
||||||
|
msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN);
|
||||||
|
msg.access_mode = dev->access_mode;
|
||||||
|
strscpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name));
|
||||||
|
|
||||||
|
WARN_ON(!rnbd_clt_get_dev(dev));
|
||||||
|
ret = send_usr_msg(sess->rtrs, READ, iu,
|
||||||
|
&vec, sizeof(*rsp), iu->sgt.sgl, 1,
|
||||||
|
msg_open_conf, &errno, RTRS_PERMIT_WAIT);
|
||||||
|
if (ret) {
|
||||||
|
rnbd_clt_put_dev(dev);
|
||||||
|
rnbd_put_iu(sess, iu);
|
||||||
|
} else {
|
||||||
|
ret = errno;
|
||||||
|
}
|
||||||
if (ret) {
|
if (ret) {
|
||||||
rnbd_clt_err(dev,
|
rnbd_clt_err(dev,
|
||||||
"map_device: failed, can't open remote device, err: %d\n",
|
"map_device: failed, can't open remote device, err: %d\n",
|
||||||
ret);
|
ret);
|
||||||
goto del_dev;
|
goto put_iu;
|
||||||
}
|
}
|
||||||
mutex_lock(&dev->lock);
|
mutex_lock(&dev->lock);
|
||||||
pr_debug("Opened remote device: session=%s, path='%s'\n",
|
pr_debug("Opened remote device: session=%s, path='%s'\n",
|
||||||
sess->sessname, pathname);
|
sess->sessname, pathname);
|
||||||
ret = rnbd_client_setup_device(dev);
|
ret = rnbd_client_setup_device(dev, rsp);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
rnbd_clt_err(dev,
|
rnbd_clt_err(dev,
|
||||||
"map_device: Failed to configure device, err: %d\n",
|
"map_device: Failed to configure device, err: %d\n",
|
||||||
@ -1602,21 +1630,30 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
|||||||
}
|
}
|
||||||
|
|
||||||
rnbd_clt_info(dev,
|
rnbd_clt_info(dev,
|
||||||
"map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
|
"map_device: Device mapped as %s (nsectors: %llu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
|
||||||
dev->gd->disk_name, dev->nsectors,
|
dev->gd->disk_name, le64_to_cpu(rsp->nsectors),
|
||||||
dev->logical_block_size, dev->physical_block_size,
|
le16_to_cpu(rsp->logical_block_size),
|
||||||
dev->max_discard_sectors,
|
le16_to_cpu(rsp->physical_block_size),
|
||||||
dev->discard_granularity, dev->discard_alignment,
|
le32_to_cpu(rsp->max_discard_sectors),
|
||||||
dev->secure_discard, dev->max_segments,
|
le32_to_cpu(rsp->discard_granularity),
|
||||||
dev->max_hw_sectors, dev->wc, dev->fua);
|
le32_to_cpu(rsp->discard_alignment),
|
||||||
|
le16_to_cpu(rsp->secure_discard),
|
||||||
|
sess->max_segments, sess->max_io_size / SECTOR_SIZE,
|
||||||
|
!!(rsp->cache_policy & RNBD_WRITEBACK),
|
||||||
|
!!(rsp->cache_policy & RNBD_FUA));
|
||||||
|
|
||||||
mutex_unlock(&dev->lock);
|
mutex_unlock(&dev->lock);
|
||||||
|
kfree(rsp);
|
||||||
|
rnbd_put_iu(sess, iu);
|
||||||
rnbd_clt_put_sess(sess);
|
rnbd_clt_put_sess(sess);
|
||||||
|
|
||||||
return dev;
|
return dev;
|
||||||
|
|
||||||
send_close:
|
send_close:
|
||||||
send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
|
send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
|
||||||
|
put_iu:
|
||||||
|
kfree(rsp);
|
||||||
|
rnbd_put_iu(sess, iu);
|
||||||
del_dev:
|
del_dev:
|
||||||
delete_dev(dev);
|
delete_dev(dev);
|
||||||
put_dev:
|
put_dev:
|
||||||
|
@ -106,6 +106,7 @@ struct rnbd_queue {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct rnbd_clt_dev {
|
struct rnbd_clt_dev {
|
||||||
|
struct kobject kobj;
|
||||||
struct rnbd_clt_session *sess;
|
struct rnbd_clt_session *sess;
|
||||||
struct request_queue *queue;
|
struct request_queue *queue;
|
||||||
struct rnbd_queue *hw_queues;
|
struct rnbd_queue *hw_queues;
|
||||||
@ -114,27 +115,14 @@ struct rnbd_clt_dev {
|
|||||||
u32 clt_device_id;
|
u32 clt_device_id;
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
enum rnbd_clt_dev_state dev_state;
|
enum rnbd_clt_dev_state dev_state;
|
||||||
|
refcount_t refcount;
|
||||||
char *pathname;
|
char *pathname;
|
||||||
enum rnbd_access_mode access_mode;
|
enum rnbd_access_mode access_mode;
|
||||||
u32 nr_poll_queues;
|
u32 nr_poll_queues;
|
||||||
bool read_only;
|
|
||||||
bool wc;
|
|
||||||
bool fua;
|
|
||||||
u32 max_hw_sectors;
|
|
||||||
u32 max_discard_sectors;
|
|
||||||
u32 discard_granularity;
|
|
||||||
u32 discard_alignment;
|
|
||||||
u16 secure_discard;
|
|
||||||
u16 physical_block_size;
|
|
||||||
u16 logical_block_size;
|
|
||||||
u16 max_segments;
|
|
||||||
size_t nsectors;
|
|
||||||
u64 size; /* device size in bytes */
|
u64 size; /* device size in bytes */
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
struct gendisk *gd;
|
struct gendisk *gd;
|
||||||
struct kobject kobj;
|
|
||||||
char *blk_symlink_name;
|
char *blk_symlink_name;
|
||||||
refcount_t refcount;
|
|
||||||
struct work_struct unmap_on_rmmod_work;
|
struct work_struct unmap_on_rmmod_work;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -150,7 +138,7 @@ int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
|
|||||||
const struct attribute *sysfs_self);
|
const struct attribute *sysfs_self);
|
||||||
|
|
||||||
int rnbd_clt_remap_device(struct rnbd_clt_dev *dev);
|
int rnbd_clt_remap_device(struct rnbd_clt_dev *dev);
|
||||||
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize);
|
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize);
|
||||||
|
|
||||||
/* rnbd-clt-sysfs.c */
|
/* rnbd-clt-sysfs.c */
|
||||||
|
|
||||||
|
@ -224,7 +224,6 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
|
|||||||
wait_for_completion(&dc); /* wait for inflights to drop to zero */
|
wait_for_completion(&dc); /* wait for inflights to drop to zero */
|
||||||
|
|
||||||
rnbd_dev_close(sess_dev->rnbd_dev);
|
rnbd_dev_close(sess_dev->rnbd_dev);
|
||||||
list_del(&sess_dev->sess_list);
|
|
||||||
mutex_lock(&sess_dev->dev->lock);
|
mutex_lock(&sess_dev->dev->lock);
|
||||||
list_del(&sess_dev->dev_list);
|
list_del(&sess_dev->dev_list);
|
||||||
if (sess_dev->open_flags & FMODE_WRITE)
|
if (sess_dev->open_flags & FMODE_WRITE)
|
||||||
@ -239,14 +238,14 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
|
|||||||
|
|
||||||
static void destroy_sess(struct rnbd_srv_session *srv_sess)
|
static void destroy_sess(struct rnbd_srv_session *srv_sess)
|
||||||
{
|
{
|
||||||
struct rnbd_srv_sess_dev *sess_dev, *tmp;
|
struct rnbd_srv_sess_dev *sess_dev;
|
||||||
|
unsigned long index;
|
||||||
|
|
||||||
if (list_empty(&srv_sess->sess_dev_list))
|
if (xa_empty(&srv_sess->index_idr))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
mutex_lock(&srv_sess->lock);
|
mutex_lock(&srv_sess->lock);
|
||||||
list_for_each_entry_safe(sess_dev, tmp, &srv_sess->sess_dev_list,
|
xa_for_each(&srv_sess->index_idr, index, sess_dev)
|
||||||
sess_list)
|
|
||||||
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
|
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
|
||||||
mutex_unlock(&srv_sess->lock);
|
mutex_unlock(&srv_sess->lock);
|
||||||
|
|
||||||
@ -281,7 +280,6 @@ static int create_sess(struct rtrs_srv_sess *rtrs)
|
|||||||
|
|
||||||
srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
|
srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
|
||||||
xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
|
xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
|
||||||
INIT_LIST_HEAD(&srv_sess->sess_dev_list);
|
|
||||||
mutex_init(&srv_sess->lock);
|
mutex_init(&srv_sess->lock);
|
||||||
mutex_lock(&sess_lock);
|
mutex_lock(&sess_lock);
|
||||||
list_add(&srv_sess->list, &sess_list);
|
list_add(&srv_sess->list, &sess_list);
|
||||||
@ -323,10 +321,11 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
|
|||||||
{
|
{
|
||||||
struct rnbd_srv_session *sess = sess_dev->sess;
|
struct rnbd_srv_session *sess = sess_dev->sess;
|
||||||
|
|
||||||
sess_dev->keep_id = true;
|
|
||||||
/* It is already started to close by client's close message. */
|
/* It is already started to close by client's close message. */
|
||||||
if (!mutex_trylock(&sess->lock))
|
if (!mutex_trylock(&sess->lock))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
sess_dev->keep_id = true;
|
||||||
/* first remove sysfs itself to avoid deadlock */
|
/* first remove sysfs itself to avoid deadlock */
|
||||||
sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
|
sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
|
||||||
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
|
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
|
||||||
@ -666,11 +665,12 @@ static struct rnbd_srv_sess_dev *
|
|||||||
find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name)
|
find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name)
|
||||||
{
|
{
|
||||||
struct rnbd_srv_sess_dev *sess_dev;
|
struct rnbd_srv_sess_dev *sess_dev;
|
||||||
|
unsigned long index;
|
||||||
|
|
||||||
if (list_empty(&srv_sess->sess_dev_list))
|
if (xa_empty(&srv_sess->index_idr))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
list_for_each_entry(sess_dev, &srv_sess->sess_dev_list, sess_list)
|
xa_for_each(&srv_sess->index_idr, index, sess_dev)
|
||||||
if (!strcmp(sess_dev->pathname, dev_name))
|
if (!strcmp(sess_dev->pathname, dev_name))
|
||||||
return sess_dev;
|
return sess_dev;
|
||||||
|
|
||||||
@ -780,8 +780,6 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
|
|||||||
list_add(&srv_sess_dev->dev_list, &srv_dev->sess_dev_list);
|
list_add(&srv_sess_dev->dev_list, &srv_dev->sess_dev_list);
|
||||||
mutex_unlock(&srv_dev->lock);
|
mutex_unlock(&srv_dev->lock);
|
||||||
|
|
||||||
list_add(&srv_sess_dev->sess_list, &srv_sess->sess_dev_list);
|
|
||||||
|
|
||||||
rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->id);
|
rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->id);
|
||||||
|
|
||||||
kfree(full_path);
|
kfree(full_path);
|
||||||
|
@ -25,8 +25,6 @@ struct rnbd_srv_session {
|
|||||||
int queue_depth;
|
int queue_depth;
|
||||||
|
|
||||||
struct xarray index_idr;
|
struct xarray index_idr;
|
||||||
/* List of struct rnbd_srv_sess_dev */
|
|
||||||
struct list_head sess_dev_list;
|
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
u8 ver;
|
u8 ver;
|
||||||
};
|
};
|
||||||
@ -48,8 +46,6 @@ struct rnbd_srv_dev {
|
|||||||
struct rnbd_srv_sess_dev {
|
struct rnbd_srv_sess_dev {
|
||||||
/* Entry inside rnbd_srv_dev struct */
|
/* Entry inside rnbd_srv_dev struct */
|
||||||
struct list_head dev_list;
|
struct list_head dev_list;
|
||||||
/* Entry inside rnbd_srv_session struct */
|
|
||||||
struct list_head sess_list;
|
|
||||||
struct rnbd_dev *rnbd_dev;
|
struct rnbd_dev *rnbd_dev;
|
||||||
struct rnbd_srv_session *sess;
|
struct rnbd_srv_session *sess;
|
||||||
struct rnbd_srv_dev *dev;
|
struct rnbd_srv_dev *dev;
|
||||||
|
1582
drivers/block/sx8.c
1582
drivers/block/sx8.c
File diff suppressed because it is too large
Load Diff
@ -47,7 +47,12 @@
|
|||||||
#define UBLK_MINORS (1U << MINORBITS)
|
#define UBLK_MINORS (1U << MINORBITS)
|
||||||
|
|
||||||
/* All UBLK_F_* have to be included into UBLK_F_ALL */
|
/* All UBLK_F_* have to be included into UBLK_F_ALL */
|
||||||
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_URING_CMD_COMP_IN_TASK)
|
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
|
||||||
|
| UBLK_F_URING_CMD_COMP_IN_TASK \
|
||||||
|
| UBLK_F_NEED_GET_DATA)
|
||||||
|
|
||||||
|
/* All UBLK_PARAM_TYPE_* should be included here */
|
||||||
|
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
|
||||||
|
|
||||||
struct ublk_rq_data {
|
struct ublk_rq_data {
|
||||||
struct callback_head work;
|
struct callback_head work;
|
||||||
@ -86,6 +91,15 @@ struct ublk_uring_cmd_pdu {
|
|||||||
*/
|
*/
|
||||||
#define UBLK_IO_FLAG_ABORTED 0x04
|
#define UBLK_IO_FLAG_ABORTED 0x04
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
|
||||||
|
* get data buffer address from ublksrv.
|
||||||
|
*
|
||||||
|
* Then, bio data could be copied into this data buffer for a WRITE request
|
||||||
|
* after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset.
|
||||||
|
*/
|
||||||
|
#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
|
||||||
|
|
||||||
struct ublk_io {
|
struct ublk_io {
|
||||||
/* userspace buffer address from io cmd */
|
/* userspace buffer address from io cmd */
|
||||||
__u64 addr;
|
__u64 addr;
|
||||||
@ -119,7 +133,6 @@ struct ublk_device {
|
|||||||
char *__queues;
|
char *__queues;
|
||||||
|
|
||||||
unsigned short queue_size;
|
unsigned short queue_size;
|
||||||
unsigned short bs_shift;
|
|
||||||
struct ublksrv_ctrl_dev_info dev_info;
|
struct ublksrv_ctrl_dev_info dev_info;
|
||||||
|
|
||||||
struct blk_mq_tag_set tag_set;
|
struct blk_mq_tag_set tag_set;
|
||||||
@ -137,6 +150,8 @@ struct ublk_device {
|
|||||||
spinlock_t mm_lock;
|
spinlock_t mm_lock;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
|
|
||||||
|
struct ublk_params params;
|
||||||
|
|
||||||
struct completion completion;
|
struct completion completion;
|
||||||
unsigned int nr_queues_ready;
|
unsigned int nr_queues_ready;
|
||||||
atomic_t nr_aborted_queues;
|
atomic_t nr_aborted_queues;
|
||||||
@ -149,6 +164,12 @@ struct ublk_device {
|
|||||||
struct work_struct stop_work;
|
struct work_struct stop_work;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* header of ublk_params */
|
||||||
|
struct ublk_params_header {
|
||||||
|
__u32 len;
|
||||||
|
__u32 types;
|
||||||
|
};
|
||||||
|
|
||||||
static dev_t ublk_chr_devt;
|
static dev_t ublk_chr_devt;
|
||||||
static struct class *ublk_chr_class;
|
static struct class *ublk_chr_class;
|
||||||
|
|
||||||
@ -160,6 +181,90 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
|
|||||||
|
|
||||||
static struct miscdevice ublk_misc;
|
static struct miscdevice ublk_misc;
|
||||||
|
|
||||||
|
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||||
|
{
|
||||||
|
struct request_queue *q = ub->ub_disk->queue;
|
||||||
|
const struct ublk_param_basic *p = &ub->params.basic;
|
||||||
|
|
||||||
|
blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
|
||||||
|
blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
|
||||||
|
blk_queue_io_min(q, 1 << p->io_min_shift);
|
||||||
|
blk_queue_io_opt(q, 1 << p->io_opt_shift);
|
||||||
|
|
||||||
|
blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
|
||||||
|
p->attrs & UBLK_ATTR_FUA);
|
||||||
|
if (p->attrs & UBLK_ATTR_ROTATIONAL)
|
||||||
|
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||||
|
else
|
||||||
|
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||||
|
|
||||||
|
blk_queue_max_hw_sectors(q, p->max_sectors);
|
||||||
|
blk_queue_chunk_sectors(q, p->chunk_sectors);
|
||||||
|
blk_queue_virt_boundary(q, p->virt_boundary_mask);
|
||||||
|
|
||||||
|
if (p->attrs & UBLK_ATTR_READ_ONLY)
|
||||||
|
set_disk_ro(ub->ub_disk, true);
|
||||||
|
|
||||||
|
set_capacity(ub->ub_disk, p->dev_sectors);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ublk_dev_param_discard_apply(struct ublk_device *ub)
|
||||||
|
{
|
||||||
|
struct request_queue *q = ub->ub_disk->queue;
|
||||||
|
const struct ublk_param_discard *p = &ub->params.discard;
|
||||||
|
|
||||||
|
q->limits.discard_alignment = p->discard_alignment;
|
||||||
|
q->limits.discard_granularity = p->discard_granularity;
|
||||||
|
blk_queue_max_discard_sectors(q, p->max_discard_sectors);
|
||||||
|
blk_queue_max_write_zeroes_sectors(q,
|
||||||
|
p->max_write_zeroes_sectors);
|
||||||
|
blk_queue_max_discard_segments(q, p->max_discard_segments);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ublk_validate_params(const struct ublk_device *ub)
|
||||||
|
{
|
||||||
|
/* basic param is the only one which must be set */
|
||||||
|
if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
|
||||||
|
const struct ublk_param_basic *p = &ub->params.basic;
|
||||||
|
|
||||||
|
if (p->logical_bs_shift > PAGE_SHIFT)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (p->logical_bs_shift > p->physical_bs_shift)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
|
||||||
|
return -EINVAL;
|
||||||
|
} else
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
|
||||||
|
const struct ublk_param_discard *p = &ub->params.discard;
|
||||||
|
|
||||||
|
/* So far, only support single segment discard */
|
||||||
|
if (p->max_discard_sectors && p->max_discard_segments != 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!p->discard_granularity)
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ublk_apply_params(struct ublk_device *ub)
|
||||||
|
{
|
||||||
|
if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ublk_dev_param_basic_apply(ub);
|
||||||
|
|
||||||
|
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
|
||||||
|
ublk_dev_param_discard_apply(ub);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
|
static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
|
||||||
{
|
{
|
||||||
if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
|
if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
|
||||||
@ -168,6 +273,13 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
|
||||||
|
{
|
||||||
|
if (ubq->flags & UBLK_F_NEED_GET_DATA)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
|
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
|
||||||
{
|
{
|
||||||
if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
|
if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
|
||||||
@ -509,6 +621,21 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ubq_complete_io_cmd(struct ublk_io *io, int res)
|
||||||
|
{
|
||||||
|
/* mark this cmd owned by ublksrv */
|
||||||
|
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clear ACTIVE since we are done with this sqe/cmd slot
|
||||||
|
* We can only accept io cmd in case of being not active.
|
||||||
|
*/
|
||||||
|
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
|
||||||
|
|
||||||
|
/* tell ublksrv one io request is coming */
|
||||||
|
io_uring_cmd_done(io->cmd, res, 0);
|
||||||
|
}
|
||||||
|
|
||||||
#define UBLK_REQUEUE_DELAY_MS 3
|
#define UBLK_REQUEUE_DELAY_MS 3
|
||||||
|
|
||||||
static inline void __ublk_rq_task_work(struct request *req)
|
static inline void __ublk_rq_task_work(struct request *req)
|
||||||
@ -531,6 +658,30 @@ static inline void __ublk_rq_task_work(struct request *req)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ublk_need_get_data(ubq) &&
|
||||||
|
(req_op(req) == REQ_OP_WRITE ||
|
||||||
|
req_op(req) == REQ_OP_FLUSH)) {
|
||||||
|
/*
|
||||||
|
* We have not handled UBLK_IO_NEED_GET_DATA command yet,
|
||||||
|
* so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
|
||||||
|
* and notify it.
|
||||||
|
*/
|
||||||
|
if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
|
||||||
|
io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
|
||||||
|
pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
|
||||||
|
__func__, io->cmd->cmd_op, ubq->q_id,
|
||||||
|
req->tag, io->flags);
|
||||||
|
ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* We have handled UBLK_IO_NEED_GET_DATA command,
|
||||||
|
* so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
|
||||||
|
* do the copy work.
|
||||||
|
*/
|
||||||
|
io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
|
||||||
|
}
|
||||||
|
|
||||||
mapped_bytes = ublk_map_io(ubq, req, io);
|
mapped_bytes = ublk_map_io(ubq, req, io);
|
||||||
|
|
||||||
/* partially mapped, update io descriptor */
|
/* partially mapped, update io descriptor */
|
||||||
@ -553,17 +704,7 @@ static inline void __ublk_rq_task_work(struct request *req)
|
|||||||
mapped_bytes >> 9;
|
mapped_bytes >> 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* mark this cmd owned by ublksrv */
|
ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
|
||||||
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* clear ACTIVE since we are done with this sqe/cmd slot
|
|
||||||
* We can only accept io cmd in case of being not active.
|
|
||||||
*/
|
|
||||||
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
|
|
||||||
|
|
||||||
/* tell ublksrv one io request is coming */
|
|
||||||
io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
|
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
|
||||||
@ -788,16 +929,27 @@ static void ublk_daemon_monitor_work(struct work_struct *work)
|
|||||||
UBLK_DAEMON_MONITOR_PERIOD);
|
UBLK_DAEMON_MONITOR_PERIOD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
|
||||||
|
{
|
||||||
|
return ubq->nr_io_ready == ubq->q_depth;
|
||||||
|
}
|
||||||
|
|
||||||
static void ublk_cancel_queue(struct ublk_queue *ubq)
|
static void ublk_cancel_queue(struct ublk_queue *ubq)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
if (!ublk_queue_ready(ubq))
|
||||||
|
return;
|
||||||
|
|
||||||
for (i = 0; i < ubq->q_depth; i++) {
|
for (i = 0; i < ubq->q_depth; i++) {
|
||||||
struct ublk_io *io = &ubq->ios[i];
|
struct ublk_io *io = &ubq->ios[i];
|
||||||
|
|
||||||
if (io->flags & UBLK_IO_FLAG_ACTIVE)
|
if (io->flags & UBLK_IO_FLAG_ACTIVE)
|
||||||
io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
|
io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* all io commands are canceled */
|
||||||
|
ubq->nr_io_ready = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Cancel all pending commands, must be called after del_gendisk() returns */
|
/* Cancel all pending commands, must be called after del_gendisk() returns */
|
||||||
@ -818,19 +970,14 @@ static void ublk_stop_dev(struct ublk_device *ub)
|
|||||||
del_gendisk(ub->ub_disk);
|
del_gendisk(ub->ub_disk);
|
||||||
ub->dev_info.state = UBLK_S_DEV_DEAD;
|
ub->dev_info.state = UBLK_S_DEV_DEAD;
|
||||||
ub->dev_info.ublksrv_pid = -1;
|
ub->dev_info.ublksrv_pid = -1;
|
||||||
ublk_cancel_dev(ub);
|
|
||||||
put_disk(ub->ub_disk);
|
put_disk(ub->ub_disk);
|
||||||
ub->ub_disk = NULL;
|
ub->ub_disk = NULL;
|
||||||
unlock:
|
unlock:
|
||||||
|
ublk_cancel_dev(ub);
|
||||||
mutex_unlock(&ub->mutex);
|
mutex_unlock(&ub->mutex);
|
||||||
cancel_delayed_work_sync(&ub->monitor_work);
|
cancel_delayed_work_sync(&ub->monitor_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
|
|
||||||
{
|
|
||||||
return ubq->nr_io_ready == ubq->q_depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* device can only be started after all IOs are ready */
|
/* device can only be started after all IOs are ready */
|
||||||
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||||
{
|
{
|
||||||
@ -846,6 +993,25 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
|||||||
mutex_unlock(&ub->mutex);
|
mutex_unlock(&ub->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
|
||||||
|
int tag, struct io_uring_cmd *cmd)
|
||||||
|
{
|
||||||
|
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
|
||||||
|
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
|
||||||
|
|
||||||
|
if (ublk_can_use_task_work(ubq)) {
|
||||||
|
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
|
||||||
|
|
||||||
|
/* should not fail since we call it just in ubq->ubq_daemon */
|
||||||
|
task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
|
||||||
|
} else {
|
||||||
|
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
|
||||||
|
|
||||||
|
pdu->req = req;
|
||||||
|
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||||
{
|
{
|
||||||
struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
|
struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
|
||||||
@ -884,6 +1050,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ensure that the user issues UBLK_IO_NEED_GET_DATA
|
||||||
|
* iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
|
||||||
|
*/
|
||||||
|
if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
|
||||||
|
^ (cmd_op == UBLK_IO_NEED_GET_DATA))
|
||||||
|
goto out;
|
||||||
|
|
||||||
switch (cmd_op) {
|
switch (cmd_op) {
|
||||||
case UBLK_IO_FETCH_REQ:
|
case UBLK_IO_FETCH_REQ:
|
||||||
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
|
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
|
||||||
@ -917,6 +1091,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
|||||||
io->cmd = cmd;
|
io->cmd = cmd;
|
||||||
ublk_commit_completion(ub, ub_cmd);
|
ublk_commit_completion(ub, ub_cmd);
|
||||||
break;
|
break;
|
||||||
|
case UBLK_IO_NEED_GET_DATA:
|
||||||
|
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
|
||||||
|
goto out;
|
||||||
|
io->addr = ub_cmd->addr;
|
||||||
|
io->cmd = cmd;
|
||||||
|
io->flags |= UBLK_IO_FLAG_ACTIVE;
|
||||||
|
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -1083,13 +1265,13 @@ static void ublk_stop_work_fn(struct work_struct *work)
|
|||||||
ublk_stop_dev(ub);
|
ublk_stop_dev(ub);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* align maximum I/O size to PAGE_SIZE */
|
/* align max io buffer size with PAGE_SIZE */
|
||||||
static void ublk_align_max_io_size(struct ublk_device *ub)
|
static void ublk_align_max_io_size(struct ublk_device *ub)
|
||||||
{
|
{
|
||||||
unsigned int max_rq_bytes = ub->dev_info.rq_max_blocks << ub->bs_shift;
|
unsigned int max_io_bytes = ub->dev_info.max_io_buf_bytes;
|
||||||
|
|
||||||
ub->dev_info.rq_max_blocks =
|
ub->dev_info.max_io_buf_bytes =
|
||||||
round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift;
|
round_down(max_io_bytes, PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ublk_add_tag_set(struct ublk_device *ub)
|
static int ublk_add_tag_set(struct ublk_device *ub)
|
||||||
@ -1132,7 +1314,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
|||||||
{
|
{
|
||||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||||
int ublksrv_pid = (int)header->data[0];
|
int ublksrv_pid = (int)header->data[0];
|
||||||
unsigned long dev_blocks = header->data[1];
|
|
||||||
struct ublk_device *ub;
|
struct ublk_device *ub;
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
int ret = -EINVAL;
|
int ret = -EINVAL;
|
||||||
@ -1155,10 +1336,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We may get disk size updated */
|
|
||||||
if (dev_blocks)
|
|
||||||
ub->dev_info.dev_blocks = dev_blocks;
|
|
||||||
|
|
||||||
disk = blk_mq_alloc_disk(&ub->tag_set, ub);
|
disk = blk_mq_alloc_disk(&ub->tag_set, ub);
|
||||||
if (IS_ERR(disk)) {
|
if (IS_ERR(disk)) {
|
||||||
ret = PTR_ERR(disk);
|
ret = PTR_ERR(disk);
|
||||||
@ -1168,27 +1345,28 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
|||||||
disk->fops = &ub_fops;
|
disk->fops = &ub_fops;
|
||||||
disk->private_data = ub;
|
disk->private_data = ub;
|
||||||
|
|
||||||
blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size);
|
|
||||||
blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size);
|
|
||||||
blk_queue_io_min(disk->queue, ub->dev_info.block_size);
|
|
||||||
blk_queue_max_hw_sectors(disk->queue,
|
|
||||||
ub->dev_info.rq_max_blocks << (ub->bs_shift - 9));
|
|
||||||
disk->queue->limits.discard_granularity = PAGE_SIZE;
|
|
||||||
blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9);
|
|
||||||
blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9);
|
|
||||||
|
|
||||||
set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9));
|
|
||||||
|
|
||||||
ub->dev_info.ublksrv_pid = ublksrv_pid;
|
ub->dev_info.ublksrv_pid = ublksrv_pid;
|
||||||
ub->ub_disk = disk;
|
ub->ub_disk = disk;
|
||||||
|
|
||||||
|
ret = ublk_apply_params(ub);
|
||||||
|
if (ret)
|
||||||
|
goto out_put_disk;
|
||||||
|
|
||||||
get_device(&ub->cdev_dev);
|
get_device(&ub->cdev_dev);
|
||||||
ret = add_disk(disk);
|
ret = add_disk(disk);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
put_disk(disk);
|
/*
|
||||||
goto out_unlock;
|
* Has to drop the reference since ->free_disk won't be
|
||||||
|
* called in case of add_disk failure.
|
||||||
|
*/
|
||||||
|
ublk_put_device(ub);
|
||||||
|
goto out_put_disk;
|
||||||
}
|
}
|
||||||
set_bit(UB_STATE_USED, &ub->state);
|
set_bit(UB_STATE_USED, &ub->state);
|
||||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||||
|
out_put_disk:
|
||||||
|
if (ret)
|
||||||
|
put_disk(disk);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&ub->mutex);
|
mutex_unlock(&ub->mutex);
|
||||||
ublk_put_device(ub);
|
ublk_put_device(ub);
|
||||||
@ -1250,9 +1428,8 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
|
|||||||
{
|
{
|
||||||
pr_devel("%s: dev id %d flags %llx\n", __func__,
|
pr_devel("%s: dev id %d flags %llx\n", __func__,
|
||||||
info->dev_id, info->flags);
|
info->dev_id, info->flags);
|
||||||
pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
|
pr_devel("\t nr_hw_queues %d queue_depth %d\n",
|
||||||
info->nr_hw_queues, info->queue_depth,
|
info->nr_hw_queues, info->queue_depth);
|
||||||
info->block_size, info->dev_blocks);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||||
@ -1312,7 +1489,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
|||||||
/* We are not ready to support zero copy */
|
/* We are not ready to support zero copy */
|
||||||
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
|
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
|
||||||
|
|
||||||
ub->bs_shift = ilog2(ub->dev_info.block_size);
|
|
||||||
ub->dev_info.nr_hw_queues = min_t(unsigned int,
|
ub->dev_info.nr_hw_queues = min_t(unsigned int,
|
||||||
ub->dev_info.nr_hw_queues, nr_cpu_ids);
|
ub->dev_info.nr_hw_queues, nr_cpu_ids);
|
||||||
ublk_align_max_io_size(ub);
|
ublk_align_max_io_size(ub);
|
||||||
@ -1436,6 +1612,82 @@ static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
|
||||||
|
{
|
||||||
|
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||||
|
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||||
|
struct ublk_params_header ph;
|
||||||
|
struct ublk_device *ub;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (header->len <= sizeof(ph) || !header->addr)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (copy_from_user(&ph, argp, sizeof(ph)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (ph.len > header->len || !ph.len)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (ph.len > sizeof(struct ublk_params))
|
||||||
|
ph.len = sizeof(struct ublk_params);
|
||||||
|
|
||||||
|
ub = ublk_get_device_from_id(header->dev_id);
|
||||||
|
if (!ub)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_lock(&ub->mutex);
|
||||||
|
if (copy_to_user(argp, &ub->params, ph.len))
|
||||||
|
ret = -EFAULT;
|
||||||
|
else
|
||||||
|
ret = 0;
|
||||||
|
mutex_unlock(&ub->mutex);
|
||||||
|
|
||||||
|
ublk_put_device(ub);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
|
||||||
|
{
|
||||||
|
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||||
|
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||||
|
struct ublk_params_header ph;
|
||||||
|
struct ublk_device *ub;
|
||||||
|
int ret = -EFAULT;
|
||||||
|
|
||||||
|
if (header->len <= sizeof(ph) || !header->addr)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (copy_from_user(&ph, argp, sizeof(ph)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (ph.len > header->len || !ph.len || !ph.types)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (ph.len > sizeof(struct ublk_params))
|
||||||
|
ph.len = sizeof(struct ublk_params);
|
||||||
|
|
||||||
|
ub = ublk_get_device_from_id(header->dev_id);
|
||||||
|
if (!ub)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* parameters can only be changed when device isn't live */
|
||||||
|
mutex_lock(&ub->mutex);
|
||||||
|
if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
|
||||||
|
ret = -EACCES;
|
||||||
|
} else if (copy_from_user(&ub->params, argp, ph.len)) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
} else {
|
||||||
|
/* clear all we don't support yet */
|
||||||
|
ub->params.types &= UBLK_PARAM_TYPE_ALL;
|
||||||
|
ret = ublk_validate_params(ub);
|
||||||
|
}
|
||||||
|
mutex_unlock(&ub->mutex);
|
||||||
|
ublk_put_device(ub);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||||
unsigned int issue_flags)
|
unsigned int issue_flags)
|
||||||
{
|
{
|
||||||
@ -1471,6 +1723,12 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
|||||||
case UBLK_CMD_GET_QUEUE_AFFINITY:
|
case UBLK_CMD_GET_QUEUE_AFFINITY:
|
||||||
ret = ublk_ctrl_get_queue_affinity(cmd);
|
ret = ublk_ctrl_get_queue_affinity(cmd);
|
||||||
break;
|
break;
|
||||||
|
case UBLK_CMD_GET_PARAMS:
|
||||||
|
ret = ublk_ctrl_get_params(cmd);
|
||||||
|
break;
|
||||||
|
case UBLK_CMD_SET_PARAMS:
|
||||||
|
ret = ublk_ctrl_set_params(cmd);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,7 @@ config BCACHE_CLOSURES_DEBUG
|
|||||||
operations that get stuck.
|
operations that get stuck.
|
||||||
|
|
||||||
config BCACHE_ASYNC_REGISTRATION
|
config BCACHE_ASYNC_REGISTRATION
|
||||||
bool "Asynchronous device registration (EXPERIMENTAL)"
|
bool "Asynchronous device registration"
|
||||||
depends on BCACHE
|
depends on BCACHE
|
||||||
help
|
help
|
||||||
Add a sysfs file /sys/fs/bcache/register_async. Writing registering
|
Add a sysfs file /sys/fs/bcache/register_async. Writing registering
|
||||||
|
@ -3728,6 +3728,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
|
|||||||
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
|
md_unregister_thread(&mddev->sync_thread);
|
||||||
md_reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
}
|
}
|
||||||
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
|
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
|
||||||
|
@ -1016,7 +1016,7 @@ static void dm_wq_requeue_work(struct work_struct *work)
|
|||||||
while (io) {
|
while (io) {
|
||||||
struct dm_io *next = io->next;
|
struct dm_io *next = io->next;
|
||||||
|
|
||||||
dm_io_rewind(io, &md->queue->bio_split);
|
dm_io_rewind(io, &md->disk->bio_split);
|
||||||
|
|
||||||
io->next = NULL;
|
io->next = NULL;
|
||||||
__dm_io_complete(io, false);
|
__dm_io_complete(io, false);
|
||||||
@ -1181,7 +1181,7 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector)
|
|||||||
* Does the target need to split IO even further?
|
* Does the target need to split IO even further?
|
||||||
* - varied (per target) IO splitting is a tenet of DM; this
|
* - varied (per target) IO splitting is a tenet of DM; this
|
||||||
* explains why stacked chunk_sectors based splitting via
|
* explains why stacked chunk_sectors based splitting via
|
||||||
* blk_queue_split() isn't possible here.
|
* bio_split_to_limits() isn't possible here.
|
||||||
*/
|
*/
|
||||||
if (!ti->max_io_len)
|
if (!ti->max_io_len)
|
||||||
return len;
|
return len;
|
||||||
@ -1751,10 +1751,10 @@ static void dm_split_and_process_bio(struct mapped_device *md,
|
|||||||
is_abnormal = is_abnormal_io(bio);
|
is_abnormal = is_abnormal_io(bio);
|
||||||
if (unlikely(is_abnormal)) {
|
if (unlikely(is_abnormal)) {
|
||||||
/*
|
/*
|
||||||
* Use blk_queue_split() for abnormal IO (e.g. discard, etc)
|
* Use bio_split_to_limits() for abnormal IO (e.g. discard, etc)
|
||||||
* otherwise associated queue_limits won't be imposed.
|
* otherwise associated queue_limits won't be imposed.
|
||||||
*/
|
*/
|
||||||
blk_queue_split(&bio);
|
bio = bio_split_to_limits(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
init_clone_info(&ci, md, map, bio, is_abnormal);
|
init_clone_info(&ci, md, map, bio, is_abnormal);
|
||||||
|
@ -125,7 +125,6 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
|||||||
char *devname = args->device_names;
|
char *devname = args->device_names;
|
||||||
dev_t devices[MD_SB_DISKS + 1], mdev;
|
dev_t devices[MD_SB_DISKS + 1], mdev;
|
||||||
struct mdu_array_info_s ainfo = { };
|
struct mdu_array_info_s ainfo = { };
|
||||||
struct block_device *bdev;
|
|
||||||
struct mddev *mddev;
|
struct mddev *mddev;
|
||||||
int err = 0, i;
|
int err = 0, i;
|
||||||
char name[16];
|
char name[16];
|
||||||
@ -169,24 +168,16 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
|||||||
|
|
||||||
pr_info("md: Loading %s: %s\n", name, args->device_names);
|
pr_info("md: Loading %s: %s\n", name, args->device_names);
|
||||||
|
|
||||||
bdev = blkdev_get_by_dev(mdev, FMODE_READ, NULL);
|
mddev = md_alloc(mdev, name);
|
||||||
if (IS_ERR(bdev)) {
|
if (IS_ERR(mddev)) {
|
||||||
pr_err("md: open failed - cannot start array %s\n", name);
|
pr_err("md: md_alloc failed - cannot start array %s\n", name);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = -EIO;
|
|
||||||
if (WARN(bdev->bd_disk->fops != &md_fops,
|
|
||||||
"Opening block device %x resulted in non-md device\n",
|
|
||||||
mdev))
|
|
||||||
goto out_blkdev_put;
|
|
||||||
|
|
||||||
mddev = bdev->bd_disk->private_data;
|
|
||||||
|
|
||||||
err = mddev_lock(mddev);
|
err = mddev_lock(mddev);
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("md: failed to lock array %s\n", name);
|
pr_err("md: failed to lock array %s\n", name);
|
||||||
goto out_blkdev_put;
|
goto out_mddev_put;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!list_empty(&mddev->disks) || mddev->raid_disks) {
|
if (!list_empty(&mddev->disks) || mddev->raid_disks) {
|
||||||
@ -230,8 +221,8 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
|||||||
pr_warn("md: starting %s failed\n", name);
|
pr_warn("md: starting %s failed\n", name);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mddev_unlock(mddev);
|
mddev_unlock(mddev);
|
||||||
out_blkdev_put:
|
out_mddev_put:
|
||||||
blkdev_put(bdev, FMODE_READ);
|
mddev_put(mddev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init raid_setup(char *str)
|
static int __init raid_setup(char *str)
|
||||||
|
@ -40,7 +40,7 @@ struct resync_info {
|
|||||||
|
|
||||||
/* Lock the send communication. This is done through
|
/* Lock the send communication. This is done through
|
||||||
* bit manipulation as opposed to a mutex in order to
|
* bit manipulation as opposed to a mutex in order to
|
||||||
* accomodate lock and hold. See next comment.
|
* accommodate lock and hold. See next comment.
|
||||||
*/
|
*/
|
||||||
#define MD_CLUSTER_SEND_LOCK 4
|
#define MD_CLUSTER_SEND_LOCK 4
|
||||||
/* If cluster operations (such as adding a disk) must lock the
|
/* If cluster operations (such as adding a disk) must lock the
|
||||||
@ -689,7 +689,7 @@ static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked)
|
|||||||
/*
|
/*
|
||||||
* If resync thread run after raid1d thread, then process_metadata_update
|
* If resync thread run after raid1d thread, then process_metadata_update
|
||||||
* could not continue if raid1d held reconfig_mutex (and raid1d is blocked
|
* could not continue if raid1d held reconfig_mutex (and raid1d is blocked
|
||||||
* since another node already got EX on Token and waitting the EX of Ack),
|
* since another node already got EX on Token and waiting the EX of Ack),
|
||||||
* so let resync wake up thread in case flag is set.
|
* so let resync wake up thread in case flag is set.
|
||||||
*/
|
*/
|
||||||
if (mddev_locked && !test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
|
if (mddev_locked && !test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
|
||||||
|
410
drivers/md/md.c
410
drivers/md/md.c
@ -368,28 +368,6 @@ EXPORT_SYMBOL_GPL(md_new_event);
|
|||||||
static LIST_HEAD(all_mddevs);
|
static LIST_HEAD(all_mddevs);
|
||||||
static DEFINE_SPINLOCK(all_mddevs_lock);
|
static DEFINE_SPINLOCK(all_mddevs_lock);
|
||||||
|
|
||||||
/*
|
|
||||||
* iterates through all used mddevs in the system.
|
|
||||||
* We take care to grab the all_mddevs_lock whenever navigating
|
|
||||||
* the list, and to always hold a refcount when unlocked.
|
|
||||||
* Any code which breaks out of this loop while own
|
|
||||||
* a reference to the current mddev and must mddev_put it.
|
|
||||||
*/
|
|
||||||
#define for_each_mddev(_mddev,_tmp) \
|
|
||||||
\
|
|
||||||
for (({ spin_lock(&all_mddevs_lock); \
|
|
||||||
_tmp = all_mddevs.next; \
|
|
||||||
_mddev = NULL;}); \
|
|
||||||
({ if (_tmp != &all_mddevs) \
|
|
||||||
mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
|
|
||||||
spin_unlock(&all_mddevs_lock); \
|
|
||||||
if (_mddev) mddev_put(_mddev); \
|
|
||||||
_mddev = list_entry(_tmp, struct mddev, all_mddevs); \
|
|
||||||
_tmp != &all_mddevs;}); \
|
|
||||||
({ spin_lock(&all_mddevs_lock); \
|
|
||||||
_tmp = _tmp->next;}) \
|
|
||||||
)
|
|
||||||
|
|
||||||
/* Rather than calling directly into the personality make_request function,
|
/* Rather than calling directly into the personality make_request function,
|
||||||
* IO requests come here first so that we can check if the device is
|
* IO requests come here first so that we can check if the device is
|
||||||
* being suspended pending a reconfiguration.
|
* being suspended pending a reconfiguration.
|
||||||
@ -464,7 +442,7 @@ static void md_submit_bio(struct bio *bio)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_queue_split(&bio);
|
bio = bio_split_to_limits(bio);
|
||||||
|
|
||||||
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
|
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
|
||||||
if (bio_sectors(bio) != 0)
|
if (bio_sectors(bio) != 0)
|
||||||
@ -647,13 +625,17 @@ EXPORT_SYMBOL(md_flush_request);
|
|||||||
|
|
||||||
static inline struct mddev *mddev_get(struct mddev *mddev)
|
static inline struct mddev *mddev_get(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
|
lockdep_assert_held(&all_mddevs_lock);
|
||||||
|
|
||||||
|
if (test_bit(MD_DELETED, &mddev->flags))
|
||||||
|
return NULL;
|
||||||
atomic_inc(&mddev->active);
|
atomic_inc(&mddev->active);
|
||||||
return mddev;
|
return mddev;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mddev_delayed_delete(struct work_struct *ws);
|
static void mddev_delayed_delete(struct work_struct *ws);
|
||||||
|
|
||||||
static void mddev_put(struct mddev *mddev)
|
void mddev_put(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
|
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
|
||||||
return;
|
return;
|
||||||
@ -661,7 +643,7 @@ static void mddev_put(struct mddev *mddev)
|
|||||||
mddev->ctime == 0 && !mddev->hold_active) {
|
mddev->ctime == 0 && !mddev->hold_active) {
|
||||||
/* Array is not configured at all, and not held active,
|
/* Array is not configured at all, and not held active,
|
||||||
* so destroy it */
|
* so destroy it */
|
||||||
list_del_init(&mddev->all_mddevs);
|
set_bit(MD_DELETED, &mddev->flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Call queue_work inside the spinlock so that
|
* Call queue_work inside the spinlock so that
|
||||||
@ -678,7 +660,6 @@ static void md_safemode_timeout(struct timer_list *t);
|
|||||||
|
|
||||||
void mddev_init(struct mddev *mddev)
|
void mddev_init(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
kobject_init(&mddev->kobj, &md_ktype);
|
|
||||||
mutex_init(&mddev->open_mutex);
|
mutex_init(&mddev->open_mutex);
|
||||||
mutex_init(&mddev->reconfig_mutex);
|
mutex_init(&mddev->reconfig_mutex);
|
||||||
mutex_init(&mddev->bitmap_info.mutex);
|
mutex_init(&mddev->bitmap_info.mutex);
|
||||||
@ -733,22 +714,6 @@ static dev_t mddev_alloc_unit(void)
|
|||||||
return dev;
|
return dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mddev *mddev_find(dev_t unit)
|
|
||||||
{
|
|
||||||
struct mddev *mddev;
|
|
||||||
|
|
||||||
if (MAJOR(unit) != MD_MAJOR)
|
|
||||||
unit &= ~((1 << MdpMinorShift) - 1);
|
|
||||||
|
|
||||||
spin_lock(&all_mddevs_lock);
|
|
||||||
mddev = mddev_find_locked(unit);
|
|
||||||
if (mddev)
|
|
||||||
mddev_get(mddev);
|
|
||||||
spin_unlock(&all_mddevs_lock);
|
|
||||||
|
|
||||||
return mddev;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct mddev *mddev_alloc(dev_t unit)
|
static struct mddev *mddev_alloc(dev_t unit)
|
||||||
{
|
{
|
||||||
struct mddev *new;
|
struct mddev *new;
|
||||||
@ -791,6 +756,15 @@ out_free_new:
|
|||||||
return ERR_PTR(error);
|
return ERR_PTR(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mddev_free(struct mddev *mddev)
|
||||||
|
{
|
||||||
|
spin_lock(&all_mddevs_lock);
|
||||||
|
list_del(&mddev->all_mddevs);
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
|
||||||
|
kfree(mddev);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct attribute_group md_redundancy_group;
|
static const struct attribute_group md_redundancy_group;
|
||||||
|
|
||||||
void mddev_unlock(struct mddev *mddev)
|
void mddev_unlock(struct mddev *mddev)
|
||||||
@ -3335,14 +3309,35 @@ rdev_size_show(struct md_rdev *rdev, char *page)
|
|||||||
return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
|
return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
|
static int md_rdevs_overlap(struct md_rdev *a, struct md_rdev *b)
|
||||||
{
|
{
|
||||||
/* check if two start/length pairs overlap */
|
/* check if two start/length pairs overlap */
|
||||||
if (s1+l1 <= s2)
|
if (a->data_offset + a->sectors <= b->data_offset)
|
||||||
return 0;
|
return false;
|
||||||
if (s2+l2 <= s1)
|
if (b->data_offset + b->sectors <= a->data_offset)
|
||||||
return 0;
|
return false;
|
||||||
return 1;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool md_rdev_overlaps(struct md_rdev *rdev)
|
||||||
|
{
|
||||||
|
struct mddev *mddev;
|
||||||
|
struct md_rdev *rdev2;
|
||||||
|
|
||||||
|
spin_lock(&all_mddevs_lock);
|
||||||
|
list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
|
||||||
|
if (test_bit(MD_DELETED, &mddev->flags))
|
||||||
|
continue;
|
||||||
|
rdev_for_each(rdev2, mddev) {
|
||||||
|
if (rdev != rdev2 && rdev->bdev == rdev2->bdev &&
|
||||||
|
md_rdevs_overlap(rdev, rdev2)) {
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
|
static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
|
||||||
@ -3394,47 +3389,22 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
|
|||||||
return -EINVAL; /* component must fit device */
|
return -EINVAL; /* component must fit device */
|
||||||
|
|
||||||
rdev->sectors = sectors;
|
rdev->sectors = sectors;
|
||||||
if (sectors > oldsectors && my_mddev->external) {
|
|
||||||
/* Need to check that all other rdevs with the same
|
/*
|
||||||
* ->bdev do not overlap. 'rcu' is sufficient to walk
|
* Check that all other rdevs with the same bdev do not overlap. This
|
||||||
* the rdev lists safely.
|
* check does not provide a hard guarantee, it just helps avoid
|
||||||
* This check does not provide a hard guarantee, it
|
* dangerous mistakes.
|
||||||
* just helps avoid dangerous mistakes.
|
|
||||||
*/
|
*/
|
||||||
struct mddev *mddev;
|
if (sectors > oldsectors && my_mddev->external &&
|
||||||
int overlap = 0;
|
md_rdev_overlaps(rdev)) {
|
||||||
struct list_head *tmp;
|
/*
|
||||||
|
* Someone else could have slipped in a size change here, but
|
||||||
rcu_read_lock();
|
* doing so is just silly. We put oldsectors back because we
|
||||||
for_each_mddev(mddev, tmp) {
|
* know it is safe, and trust userspace not to race with itself.
|
||||||
struct md_rdev *rdev2;
|
|
||||||
|
|
||||||
rdev_for_each(rdev2, mddev)
|
|
||||||
if (rdev->bdev == rdev2->bdev &&
|
|
||||||
rdev != rdev2 &&
|
|
||||||
overlaps(rdev->data_offset, rdev->sectors,
|
|
||||||
rdev2->data_offset,
|
|
||||||
rdev2->sectors)) {
|
|
||||||
overlap = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (overlap) {
|
|
||||||
mddev_put(mddev);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rcu_read_unlock();
|
|
||||||
if (overlap) {
|
|
||||||
/* Someone else could have slipped in a size
|
|
||||||
* change here, but doing so is just silly.
|
|
||||||
* We put oldsectors back because we *know* it is
|
|
||||||
* safe, and trust userspace not to race with
|
|
||||||
* itself
|
|
||||||
*/
|
*/
|
||||||
rdev->sectors = oldsectors;
|
rdev->sectors = oldsectors;
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4830,6 +4800,19 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
|||||||
if (work_pending(&mddev->del_work))
|
if (work_pending(&mddev->del_work))
|
||||||
flush_workqueue(md_misc_wq);
|
flush_workqueue(md_misc_wq);
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
|
sector_t save_rp = mddev->reshape_position;
|
||||||
|
|
||||||
|
mddev_unlock(mddev);
|
||||||
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
|
md_unregister_thread(&mddev->sync_thread);
|
||||||
|
mddev_lock_nointr(mddev);
|
||||||
|
/*
|
||||||
|
* set RECOVERY_INTR again and restore reshape
|
||||||
|
* position in case others changed them after
|
||||||
|
* got lock, eg, reshape_position_store and
|
||||||
|
* md_check_recovery.
|
||||||
|
*/
|
||||||
|
mddev->reshape_position = save_rp;
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
md_reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
}
|
}
|
||||||
@ -5001,7 +4984,7 @@ static ssize_t
|
|||||||
sync_speed_show(struct mddev *mddev, char *page)
|
sync_speed_show(struct mddev *mddev, char *page)
|
||||||
{
|
{
|
||||||
unsigned long resync, dt, db;
|
unsigned long resync, dt, db;
|
||||||
if (mddev->curr_resync == 0)
|
if (mddev->curr_resync == MD_RESYNC_NONE)
|
||||||
return sprintf(page, "none\n");
|
return sprintf(page, "none\n");
|
||||||
resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
|
resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
|
||||||
dt = (jiffies - mddev->resync_mark) / HZ;
|
dt = (jiffies - mddev->resync_mark) / HZ;
|
||||||
@ -5020,8 +5003,8 @@ sync_completed_show(struct mddev *mddev, char *page)
|
|||||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||||
return sprintf(page, "none\n");
|
return sprintf(page, "none\n");
|
||||||
|
|
||||||
if (mddev->curr_resync == 1 ||
|
if (mddev->curr_resync == MD_RESYNC_YIELDED ||
|
||||||
mddev->curr_resync == 2)
|
mddev->curr_resync == MD_RESYNC_DELAYED)
|
||||||
return sprintf(page, "delayed\n");
|
return sprintf(page, "delayed\n");
|
||||||
|
|
||||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||||
@ -5532,11 +5515,10 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
|
|||||||
if (!entry->show)
|
if (!entry->show)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
spin_lock(&all_mddevs_lock);
|
spin_lock(&all_mddevs_lock);
|
||||||
if (list_empty(&mddev->all_mddevs)) {
|
if (!mddev_get(mddev)) {
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
mddev_get(mddev);
|
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
|
||||||
rv = entry->show(mddev, page);
|
rv = entry->show(mddev, page);
|
||||||
@ -5557,18 +5539,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
|
|||||||
if (!capable(CAP_SYS_ADMIN))
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
spin_lock(&all_mddevs_lock);
|
spin_lock(&all_mddevs_lock);
|
||||||
if (list_empty(&mddev->all_mddevs)) {
|
if (!mddev_get(mddev)) {
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
mddev_get(mddev);
|
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
rv = entry->store(mddev, page, length);
|
rv = entry->store(mddev, page, length);
|
||||||
mddev_put(mddev);
|
mddev_put(mddev);
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void md_free(struct kobject *ko)
|
static void md_kobj_release(struct kobject *ko)
|
||||||
{
|
{
|
||||||
struct mddev *mddev = container_of(ko, struct mddev, kobj);
|
struct mddev *mddev = container_of(ko, struct mddev, kobj);
|
||||||
|
|
||||||
@ -5577,15 +5558,8 @@ static void md_free(struct kobject *ko)
|
|||||||
if (mddev->sysfs_level)
|
if (mddev->sysfs_level)
|
||||||
sysfs_put(mddev->sysfs_level);
|
sysfs_put(mddev->sysfs_level);
|
||||||
|
|
||||||
if (mddev->gendisk) {
|
|
||||||
del_gendisk(mddev->gendisk);
|
del_gendisk(mddev->gendisk);
|
||||||
put_disk(mddev->gendisk);
|
put_disk(mddev->gendisk);
|
||||||
}
|
|
||||||
percpu_ref_exit(&mddev->writes_pending);
|
|
||||||
|
|
||||||
bioset_exit(&mddev->bio_set);
|
|
||||||
bioset_exit(&mddev->sync_set);
|
|
||||||
kfree(mddev);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct sysfs_ops md_sysfs_ops = {
|
static const struct sysfs_ops md_sysfs_ops = {
|
||||||
@ -5593,7 +5567,7 @@ static const struct sysfs_ops md_sysfs_ops = {
|
|||||||
.store = md_attr_store,
|
.store = md_attr_store,
|
||||||
};
|
};
|
||||||
static struct kobj_type md_ktype = {
|
static struct kobj_type md_ktype = {
|
||||||
.release = md_free,
|
.release = md_kobj_release,
|
||||||
.sysfs_ops = &md_sysfs_ops,
|
.sysfs_ops = &md_sysfs_ops,
|
||||||
.default_groups = md_attr_groups,
|
.default_groups = md_attr_groups,
|
||||||
};
|
};
|
||||||
@ -5604,7 +5578,6 @@ static void mddev_delayed_delete(struct work_struct *ws)
|
|||||||
{
|
{
|
||||||
struct mddev *mddev = container_of(ws, struct mddev, del_work);
|
struct mddev *mddev = container_of(ws, struct mddev, del_work);
|
||||||
|
|
||||||
kobject_del(&mddev->kobj);
|
|
||||||
kobject_put(&mddev->kobj);
|
kobject_put(&mddev->kobj);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5623,7 +5596,7 @@ int mddev_init_writes_pending(struct mddev *mddev)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
|
EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
|
||||||
|
|
||||||
static int md_alloc(dev_t dev, char *name)
|
struct mddev *md_alloc(dev_t dev, char *name)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If dev is zero, name is the name of a device to allocate with
|
* If dev is zero, name is the name of a device to allocate with
|
||||||
@ -5651,8 +5624,8 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
mutex_lock(&disks_mutex);
|
mutex_lock(&disks_mutex);
|
||||||
mddev = mddev_alloc(dev);
|
mddev = mddev_alloc(dev);
|
||||||
if (IS_ERR(mddev)) {
|
if (IS_ERR(mddev)) {
|
||||||
mutex_unlock(&disks_mutex);
|
error = PTR_ERR(mddev);
|
||||||
return PTR_ERR(mddev);
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
|
partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
|
||||||
@ -5670,7 +5643,7 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
strcmp(mddev2->gendisk->disk_name, name) == 0) {
|
strcmp(mddev2->gendisk->disk_name, name) == 0) {
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
error = -EEXIST;
|
error = -EEXIST;
|
||||||
goto out_unlock_disks_mutex;
|
goto out_free_mddev;
|
||||||
}
|
}
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
}
|
}
|
||||||
@ -5683,7 +5656,7 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||||
if (!disk)
|
if (!disk)
|
||||||
goto out_unlock_disks_mutex;
|
goto out_free_mddev;
|
||||||
|
|
||||||
disk->major = MAJOR(mddev->unit);
|
disk->major = MAJOR(mddev->unit);
|
||||||
disk->first_minor = unit << shift;
|
disk->first_minor = unit << shift;
|
||||||
@ -5704,25 +5677,45 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
mddev->gendisk = disk;
|
mddev->gendisk = disk;
|
||||||
error = add_disk(disk);
|
error = add_disk(disk);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_cleanup_disk;
|
goto out_put_disk;
|
||||||
|
|
||||||
|
kobject_init(&mddev->kobj, &md_ktype);
|
||||||
error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
|
error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
|
||||||
if (error)
|
if (error) {
|
||||||
goto out_del_gendisk;
|
/*
|
||||||
|
* The disk is already live at this point. Clear the hold flag
|
||||||
|
* and let mddev_put take care of the deletion, as it isn't any
|
||||||
|
* different from a normal close on last release now.
|
||||||
|
*/
|
||||||
|
mddev->hold_active = 0;
|
||||||
|
mutex_unlock(&disks_mutex);
|
||||||
|
mddev_put(mddev);
|
||||||
|
return ERR_PTR(error);
|
||||||
|
}
|
||||||
|
|
||||||
kobject_uevent(&mddev->kobj, KOBJ_ADD);
|
kobject_uevent(&mddev->kobj, KOBJ_ADD);
|
||||||
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
|
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
|
||||||
mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
|
mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
|
||||||
goto out_unlock_disks_mutex;
|
|
||||||
|
|
||||||
out_del_gendisk:
|
|
||||||
del_gendisk(disk);
|
|
||||||
out_cleanup_disk:
|
|
||||||
put_disk(disk);
|
|
||||||
out_unlock_disks_mutex:
|
|
||||||
mutex_unlock(&disks_mutex);
|
mutex_unlock(&disks_mutex);
|
||||||
|
return mddev;
|
||||||
|
|
||||||
|
out_put_disk:
|
||||||
|
put_disk(disk);
|
||||||
|
out_free_mddev:
|
||||||
|
mddev_free(mddev);
|
||||||
|
out_unlock:
|
||||||
|
mutex_unlock(&disks_mutex);
|
||||||
|
return ERR_PTR(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int md_alloc_and_put(dev_t dev, char *name)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = md_alloc(dev, name);
|
||||||
|
|
||||||
|
if (IS_ERR(mddev))
|
||||||
|
return PTR_ERR(mddev);
|
||||||
mddev_put(mddev);
|
mddev_put(mddev);
|
||||||
return error;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void md_probe(dev_t dev)
|
static void md_probe(dev_t dev)
|
||||||
@ -5730,7 +5723,7 @@ static void md_probe(dev_t dev)
|
|||||||
if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
|
if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
|
||||||
return;
|
return;
|
||||||
if (create_on_open)
|
if (create_on_open)
|
||||||
md_alloc(dev, NULL);
|
md_alloc_and_put(dev, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int add_named_array(const char *val, const struct kernel_param *kp)
|
static int add_named_array(const char *val, const struct kernel_param *kp)
|
||||||
@ -5752,12 +5745,12 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
|
|||||||
return -E2BIG;
|
return -E2BIG;
|
||||||
strscpy(buf, val, len+1);
|
strscpy(buf, val, len+1);
|
||||||
if (strncmp(buf, "md_", 3) == 0)
|
if (strncmp(buf, "md_", 3) == 0)
|
||||||
return md_alloc(0, buf);
|
return md_alloc_and_put(0, buf);
|
||||||
if (strncmp(buf, "md", 2) == 0 &&
|
if (strncmp(buf, "md", 2) == 0 &&
|
||||||
isdigit(buf[2]) &&
|
isdigit(buf[2]) &&
|
||||||
kstrtoul(buf+2, 10, &devnum) == 0 &&
|
kstrtoul(buf+2, 10, &devnum) == 0 &&
|
||||||
devnum <= MINORMASK)
|
devnum <= MINORMASK)
|
||||||
return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
|
return md_alloc_and_put(MKDEV(MD_MAJOR, devnum), NULL);
|
||||||
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@ -6197,6 +6190,7 @@ static void __md_stop_writes(struct mddev *mddev)
|
|||||||
flush_workqueue(md_misc_wq);
|
flush_workqueue(md_misc_wq);
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
|
md_unregister_thread(&mddev->sync_thread);
|
||||||
md_reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6244,11 +6238,11 @@ static void mddev_detach(struct mddev *mddev)
|
|||||||
static void __md_stop(struct mddev *mddev)
|
static void __md_stop(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
struct md_personality *pers = mddev->pers;
|
struct md_personality *pers = mddev->pers;
|
||||||
md_bitmap_destroy(mddev);
|
|
||||||
mddev_detach(mddev);
|
mddev_detach(mddev);
|
||||||
/* Ensure ->event_work is done */
|
/* Ensure ->event_work is done */
|
||||||
if (mddev->event_work.func)
|
if (mddev->event_work.func)
|
||||||
flush_workqueue(md_misc_wq);
|
flush_workqueue(md_misc_wq);
|
||||||
|
md_bitmap_destroy(mddev);
|
||||||
spin_lock(&mddev->lock);
|
spin_lock(&mddev->lock);
|
||||||
mddev->pers = NULL;
|
mddev->pers = NULL;
|
||||||
spin_unlock(&mddev->lock);
|
spin_unlock(&mddev->lock);
|
||||||
@ -6497,9 +6491,8 @@ static void autorun_devices(int part)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
md_probe(dev);
|
mddev = md_alloc(dev, NULL);
|
||||||
mddev = mddev_find(dev);
|
if (IS_ERR(mddev))
|
||||||
if (!mddev)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (mddev_lock(mddev))
|
if (mddev_lock(mddev))
|
||||||
@ -7782,44 +7775,32 @@ out_unlock:
|
|||||||
|
|
||||||
static int md_open(struct block_device *bdev, fmode_t mode)
|
static int md_open(struct block_device *bdev, fmode_t mode)
|
||||||
{
|
{
|
||||||
/*
|
struct mddev *mddev;
|
||||||
* Succeed if we can lock the mddev, which confirms that
|
|
||||||
* it isn't being stopped right now.
|
|
||||||
*/
|
|
||||||
struct mddev *mddev = mddev_find(bdev->bd_dev);
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
spin_lock(&all_mddevs_lock);
|
||||||
|
mddev = mddev_get(bdev->bd_disk->private_data);
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
if (!mddev)
|
if (!mddev)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
if (mddev->gendisk != bdev->bd_disk) {
|
err = mutex_lock_interruptible(&mddev->open_mutex);
|
||||||
/* we are racing with mddev_put which is discarding this
|
if (err)
|
||||||
* bd_disk.
|
|
||||||
*/
|
|
||||||
mddev_put(mddev);
|
|
||||||
/* Wait until bdev->bd_disk is definitely gone */
|
|
||||||
if (work_pending(&mddev->del_work))
|
|
||||||
flush_workqueue(md_misc_wq);
|
|
||||||
return -EBUSY;
|
|
||||||
}
|
|
||||||
BUG_ON(mddev != bdev->bd_disk->private_data);
|
|
||||||
|
|
||||||
if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (test_bit(MD_CLOSING, &mddev->flags)) {
|
|
||||||
mutex_unlock(&mddev->open_mutex);
|
|
||||||
err = -ENODEV;
|
err = -ENODEV;
|
||||||
goto out;
|
if (test_bit(MD_CLOSING, &mddev->flags))
|
||||||
}
|
goto out_unlock;
|
||||||
|
|
||||||
err = 0;
|
|
||||||
atomic_inc(&mddev->openers);
|
atomic_inc(&mddev->openers);
|
||||||
mutex_unlock(&mddev->open_mutex);
|
mutex_unlock(&mddev->open_mutex);
|
||||||
|
|
||||||
bdev_check_media_change(bdev);
|
bdev_check_media_change(bdev);
|
||||||
out:
|
return 0;
|
||||||
if (err)
|
|
||||||
|
out_unlock:
|
||||||
|
mutex_unlock(&mddev->open_mutex);
|
||||||
|
out:
|
||||||
mddev_put(mddev);
|
mddev_put(mddev);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -7844,6 +7825,17 @@ static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void md_free_disk(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = disk->private_data;
|
||||||
|
|
||||||
|
percpu_ref_exit(&mddev->writes_pending);
|
||||||
|
bioset_exit(&mddev->bio_set);
|
||||||
|
bioset_exit(&mddev->sync_set);
|
||||||
|
|
||||||
|
mddev_free(mddev);
|
||||||
|
}
|
||||||
|
|
||||||
const struct block_device_operations md_fops =
|
const struct block_device_operations md_fops =
|
||||||
{
|
{
|
||||||
.owner = THIS_MODULE,
|
.owner = THIS_MODULE,
|
||||||
@ -7857,6 +7849,7 @@ const struct block_device_operations md_fops =
|
|||||||
.getgeo = md_getgeo,
|
.getgeo = md_getgeo,
|
||||||
.check_events = md_check_events,
|
.check_events = md_check_events,
|
||||||
.set_read_only = md_set_read_only,
|
.set_read_only = md_set_read_only,
|
||||||
|
.free_disk = md_free_disk,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int md_thread(void *arg)
|
static int md_thread(void *arg)
|
||||||
@ -8018,16 +8011,26 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
|
|||||||
max_sectors = mddev->dev_sectors;
|
max_sectors = mddev->dev_sectors;
|
||||||
|
|
||||||
resync = mddev->curr_resync;
|
resync = mddev->curr_resync;
|
||||||
if (resync <= 3) {
|
if (resync < MD_RESYNC_ACTIVE) {
|
||||||
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
|
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
|
||||||
/* Still cleaning up */
|
/* Still cleaning up */
|
||||||
resync = max_sectors;
|
resync = max_sectors;
|
||||||
} else if (resync > max_sectors)
|
} else if (resync > max_sectors) {
|
||||||
resync = max_sectors;
|
resync = max_sectors;
|
||||||
else
|
} else {
|
||||||
resync -= atomic_read(&mddev->recovery_active);
|
resync -= atomic_read(&mddev->recovery_active);
|
||||||
|
if (resync < MD_RESYNC_ACTIVE) {
|
||||||
|
/*
|
||||||
|
* Resync has started, but the subtraction has
|
||||||
|
* yielded one of the special values. Force it
|
||||||
|
* to active to ensure the status reports an
|
||||||
|
* active resync.
|
||||||
|
*/
|
||||||
|
resync = MD_RESYNC_ACTIVE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (resync == 0) {
|
if (resync == MD_RESYNC_NONE) {
|
||||||
if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
|
if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
|
|
||||||
@ -8051,7 +8054,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
|
|||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (resync < 3) {
|
if (resync < MD_RESYNC_ACTIVE) {
|
||||||
seq_printf(seq, "\tresync=DELAYED");
|
seq_printf(seq, "\tresync=DELAYED");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -8152,6 +8155,8 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos)
|
|||||||
if (!l--) {
|
if (!l--) {
|
||||||
mddev = list_entry(tmp, struct mddev, all_mddevs);
|
mddev = list_entry(tmp, struct mddev, all_mddevs);
|
||||||
mddev_get(mddev);
|
mddev_get(mddev);
|
||||||
|
if (!mddev_get(mddev))
|
||||||
|
continue;
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
return mddev;
|
return mddev;
|
||||||
}
|
}
|
||||||
@ -8165,25 +8170,35 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|||||||
{
|
{
|
||||||
struct list_head *tmp;
|
struct list_head *tmp;
|
||||||
struct mddev *next_mddev, *mddev = v;
|
struct mddev *next_mddev, *mddev = v;
|
||||||
|
struct mddev *to_put = NULL;
|
||||||
|
|
||||||
++*pos;
|
++*pos;
|
||||||
if (v == (void*)2)
|
if (v == (void*)2)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
spin_lock(&all_mddevs_lock);
|
spin_lock(&all_mddevs_lock);
|
||||||
if (v == (void*)1)
|
if (v == (void*)1) {
|
||||||
tmp = all_mddevs.next;
|
tmp = all_mddevs.next;
|
||||||
else
|
} else {
|
||||||
|
to_put = mddev;
|
||||||
tmp = mddev->all_mddevs.next;
|
tmp = mddev->all_mddevs.next;
|
||||||
if (tmp != &all_mddevs)
|
}
|
||||||
next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
|
|
||||||
else {
|
for (;;) {
|
||||||
|
if (tmp == &all_mddevs) {
|
||||||
next_mddev = (void*)2;
|
next_mddev = (void*)2;
|
||||||
*pos = 0x10000;
|
*pos = 0x10000;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
next_mddev = list_entry(tmp, struct mddev, all_mddevs);
|
||||||
|
if (mddev_get(next_mddev))
|
||||||
|
break;
|
||||||
|
mddev = next_mddev;
|
||||||
|
tmp = mddev->all_mddevs.next;
|
||||||
}
|
}
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
|
||||||
if (v != (void*)1)
|
if (to_put)
|
||||||
mddev_put(mddev);
|
mddev_put(mddev);
|
||||||
return next_mddev;
|
return next_mddev;
|
||||||
|
|
||||||
@ -8682,7 +8697,6 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
unsigned long update_time;
|
unsigned long update_time;
|
||||||
sector_t mark_cnt[SYNC_MARKS];
|
sector_t mark_cnt[SYNC_MARKS];
|
||||||
int last_mark,m;
|
int last_mark,m;
|
||||||
struct list_head *tmp;
|
|
||||||
sector_t last_check;
|
sector_t last_check;
|
||||||
int skipped = 0;
|
int skipped = 0;
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
@ -8729,13 +8743,7 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
|
|
||||||
mddev->last_sync_action = action ?: desc;
|
mddev->last_sync_action = action ?: desc;
|
||||||
|
|
||||||
/* we overload curr_resync somewhat here.
|
/*
|
||||||
* 0 == not engaged in resync at all
|
|
||||||
* 2 == checking that there is no conflict with another sync
|
|
||||||
* 1 == like 2, but have yielded to allow conflicting resync to
|
|
||||||
* commence
|
|
||||||
* other == active in resync - this many blocks
|
|
||||||
*
|
|
||||||
* Before starting a resync we must have set curr_resync to
|
* Before starting a resync we must have set curr_resync to
|
||||||
* 2, and then checked that every "conflicting" array has curr_resync
|
* 2, and then checked that every "conflicting" array has curr_resync
|
||||||
* less than ours. When we find one that is the same or higher
|
* less than ours. When we find one that is the same or higher
|
||||||
@ -8747,24 +8755,29 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
int mddev2_minor = -1;
|
int mddev2_minor = -1;
|
||||||
mddev->curr_resync = 2;
|
mddev->curr_resync = MD_RESYNC_DELAYED;
|
||||||
|
|
||||||
try_again:
|
try_again:
|
||||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||||
goto skip;
|
goto skip;
|
||||||
for_each_mddev(mddev2, tmp) {
|
spin_lock(&all_mddevs_lock);
|
||||||
|
list_for_each_entry(mddev2, &all_mddevs, all_mddevs) {
|
||||||
|
if (test_bit(MD_DELETED, &mddev2->flags))
|
||||||
|
continue;
|
||||||
if (mddev2 == mddev)
|
if (mddev2 == mddev)
|
||||||
continue;
|
continue;
|
||||||
if (!mddev->parallel_resync
|
if (!mddev->parallel_resync
|
||||||
&& mddev2->curr_resync
|
&& mddev2->curr_resync
|
||||||
&& match_mddev_units(mddev, mddev2)) {
|
&& match_mddev_units(mddev, mddev2)) {
|
||||||
DEFINE_WAIT(wq);
|
DEFINE_WAIT(wq);
|
||||||
if (mddev < mddev2 && mddev->curr_resync == 2) {
|
if (mddev < mddev2 &&
|
||||||
|
mddev->curr_resync == MD_RESYNC_DELAYED) {
|
||||||
/* arbitrarily yield */
|
/* arbitrarily yield */
|
||||||
mddev->curr_resync = 1;
|
mddev->curr_resync = MD_RESYNC_YIELDED;
|
||||||
wake_up(&resync_wait);
|
wake_up(&resync_wait);
|
||||||
}
|
}
|
||||||
if (mddev > mddev2 && mddev->curr_resync == 1)
|
if (mddev > mddev2 &&
|
||||||
|
mddev->curr_resync == MD_RESYNC_YIELDED)
|
||||||
/* no need to wait here, we can wait the next
|
/* no need to wait here, we can wait the next
|
||||||
* time 'round when curr_resync == 2
|
* time 'round when curr_resync == 2
|
||||||
*/
|
*/
|
||||||
@ -8782,7 +8795,8 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
desc, mdname(mddev),
|
desc, mdname(mddev),
|
||||||
mdname(mddev2));
|
mdname(mddev2));
|
||||||
}
|
}
|
||||||
mddev_put(mddev2);
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
|
||||||
if (signal_pending(current))
|
if (signal_pending(current))
|
||||||
flush_signals(current);
|
flush_signals(current);
|
||||||
schedule();
|
schedule();
|
||||||
@ -8792,7 +8806,8 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
finish_wait(&resync_wait, &wq);
|
finish_wait(&resync_wait, &wq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (mddev->curr_resync < 2);
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
} while (mddev->curr_resync < MD_RESYNC_DELAYED);
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||||
@ -8876,7 +8891,7 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
desc, mdname(mddev));
|
desc, mdname(mddev));
|
||||||
mddev->curr_resync = j;
|
mddev->curr_resync = j;
|
||||||
} else
|
} else
|
||||||
mddev->curr_resync = 3; /* no longer delayed */
|
mddev->curr_resync = MD_RESYNC_ACTIVE; /* no longer delayed */
|
||||||
mddev->curr_resync_completed = j;
|
mddev->curr_resync_completed = j;
|
||||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||||
md_new_event();
|
md_new_event();
|
||||||
@ -9011,14 +9026,14 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
|
|
||||||
if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||||
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||||
mddev->curr_resync > 3) {
|
mddev->curr_resync >= MD_RESYNC_ACTIVE) {
|
||||||
mddev->curr_resync_completed = mddev->curr_resync;
|
mddev->curr_resync_completed = mddev->curr_resync;
|
||||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||||
}
|
}
|
||||||
mddev->pers->sync_request(mddev, max_sectors, &skipped);
|
mddev->pers->sync_request(mddev, max_sectors, &skipped);
|
||||||
|
|
||||||
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
|
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
|
||||||
mddev->curr_resync > 3) {
|
mddev->curr_resync >= MD_RESYNC_ACTIVE) {
|
||||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||||
if (mddev->curr_resync >= mddev->recovery_cp) {
|
if (mddev->curr_resync >= mddev->recovery_cp) {
|
||||||
@ -9082,7 +9097,7 @@ void md_do_sync(struct md_thread *thread)
|
|||||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||||
mddev->resync_min = mddev->curr_resync_completed;
|
mddev->resync_min = mddev->curr_resync_completed;
|
||||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||||
mddev->curr_resync = 0;
|
mddev->curr_resync = MD_RESYNC_NONE;
|
||||||
spin_unlock(&mddev->lock);
|
spin_unlock(&mddev->lock);
|
||||||
|
|
||||||
wake_up(&resync_wait);
|
wake_up(&resync_wait);
|
||||||
@ -9303,6 +9318,7 @@ void md_check_recovery(struct mddev *mddev)
|
|||||||
* ->spare_active and clear saved_raid_disk
|
* ->spare_active and clear saved_raid_disk
|
||||||
*/
|
*/
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
|
md_unregister_thread(&mddev->sync_thread);
|
||||||
md_reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
@ -9338,6 +9354,7 @@ void md_check_recovery(struct mddev *mddev)
|
|||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
|
md_unregister_thread(&mddev->sync_thread);
|
||||||
md_reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
@ -9417,8 +9434,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
|||||||
sector_t old_dev_sectors = mddev->dev_sectors;
|
sector_t old_dev_sectors = mddev->dev_sectors;
|
||||||
bool is_reshaped = false;
|
bool is_reshaped = false;
|
||||||
|
|
||||||
/* resync has finished, collect result */
|
/* sync_thread should be unregistered, collect result */
|
||||||
md_unregister_thread(&mddev->sync_thread);
|
|
||||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||||
mddev->degraded != mddev->raid_disks) {
|
mddev->degraded != mddev->raid_disks) {
|
||||||
@ -9466,6 +9482,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
|||||||
wake_up(&resync_wait);
|
wake_up(&resync_wait);
|
||||||
/* flag recovery needed just to double check */
|
/* flag recovery needed just to double check */
|
||||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||||
md_new_event();
|
md_new_event();
|
||||||
if (mddev->event_work.func)
|
if (mddev->event_work.func)
|
||||||
@ -9544,11 +9561,14 @@ EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
|
|||||||
static int md_notify_reboot(struct notifier_block *this,
|
static int md_notify_reboot(struct notifier_block *this,
|
||||||
unsigned long code, void *x)
|
unsigned long code, void *x)
|
||||||
{
|
{
|
||||||
struct list_head *tmp;
|
struct mddev *mddev, *n;
|
||||||
struct mddev *mddev;
|
|
||||||
int need_delay = 0;
|
int need_delay = 0;
|
||||||
|
|
||||||
for_each_mddev(mddev, tmp) {
|
spin_lock(&all_mddevs_lock);
|
||||||
|
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
|
||||||
|
if (!mddev_get(mddev))
|
||||||
|
continue;
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
if (mddev_trylock(mddev)) {
|
if (mddev_trylock(mddev)) {
|
||||||
if (mddev->pers)
|
if (mddev->pers)
|
||||||
__md_stop_writes(mddev);
|
__md_stop_writes(mddev);
|
||||||
@ -9557,7 +9577,11 @@ static int md_notify_reboot(struct notifier_block *this,
|
|||||||
mddev_unlock(mddev);
|
mddev_unlock(mddev);
|
||||||
}
|
}
|
||||||
need_delay = 1;
|
need_delay = 1;
|
||||||
|
mddev_put(mddev);
|
||||||
|
spin_lock(&all_mddevs_lock);
|
||||||
}
|
}
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* certain more exotic SCSI devices are known to be
|
* certain more exotic SCSI devices are known to be
|
||||||
* volatile wrt too early system reboots. While the
|
* volatile wrt too early system reboots. While the
|
||||||
@ -9876,8 +9900,7 @@ void md_autostart_arrays(int part)
|
|||||||
|
|
||||||
static __exit void md_exit(void)
|
static __exit void md_exit(void)
|
||||||
{
|
{
|
||||||
struct mddev *mddev;
|
struct mddev *mddev, *n;
|
||||||
struct list_head *tmp;
|
|
||||||
int delay = 1;
|
int delay = 1;
|
||||||
|
|
||||||
unregister_blkdev(MD_MAJOR,"md");
|
unregister_blkdev(MD_MAJOR,"md");
|
||||||
@ -9897,17 +9920,24 @@ static __exit void md_exit(void)
|
|||||||
}
|
}
|
||||||
remove_proc_entry("mdstat", NULL);
|
remove_proc_entry("mdstat", NULL);
|
||||||
|
|
||||||
for_each_mddev(mddev, tmp) {
|
spin_lock(&all_mddevs_lock);
|
||||||
|
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
|
||||||
|
if (!mddev_get(mddev))
|
||||||
|
continue;
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
export_array(mddev);
|
export_array(mddev);
|
||||||
mddev->ctime = 0;
|
mddev->ctime = 0;
|
||||||
mddev->hold_active = 0;
|
mddev->hold_active = 0;
|
||||||
/*
|
/*
|
||||||
* for_each_mddev() will call mddev_put() at the end of each
|
* As the mddev is now fully clear, mddev_put will schedule
|
||||||
* iteration. As the mddev is now fully clear, this will
|
* the mddev for destruction by a workqueue, and the
|
||||||
* schedule the mddev for destruction by a workqueue, and the
|
|
||||||
* destroy_workqueue() below will wait for that to complete.
|
* destroy_workqueue() below will wait for that to complete.
|
||||||
*/
|
*/
|
||||||
|
mddev_put(mddev);
|
||||||
|
spin_lock(&all_mddevs_lock);
|
||||||
}
|
}
|
||||||
|
spin_unlock(&all_mddevs_lock);
|
||||||
|
|
||||||
destroy_workqueue(md_rdev_misc_wq);
|
destroy_workqueue(md_rdev_misc_wq);
|
||||||
destroy_workqueue(md_misc_wq);
|
destroy_workqueue(md_misc_wq);
|
||||||
destroy_workqueue(md_wq);
|
destroy_workqueue(md_wq);
|
||||||
|
@ -254,6 +254,7 @@ struct md_cluster_info;
|
|||||||
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
|
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
|
||||||
* array is ready yet.
|
* array is ready yet.
|
||||||
* @MD_BROKEN: This is used to stop writes and mark array as failed.
|
* @MD_BROKEN: This is used to stop writes and mark array as failed.
|
||||||
|
* @MD_DELETED: This device is being deleted
|
||||||
*
|
*
|
||||||
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
|
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
|
||||||
*/
|
*/
|
||||||
@ -270,6 +271,7 @@ enum mddev_flags {
|
|||||||
MD_UPDATING_SB,
|
MD_UPDATING_SB,
|
||||||
MD_NOT_READY,
|
MD_NOT_READY,
|
||||||
MD_BROKEN,
|
MD_BROKEN,
|
||||||
|
MD_DELETED,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum mddev_sb_flags {
|
enum mddev_sb_flags {
|
||||||
@ -288,6 +290,21 @@ struct serial_info {
|
|||||||
sector_t _subtree_last; /* highest sector in subtree of rb node */
|
sector_t _subtree_last; /* highest sector in subtree of rb node */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mddev->curr_resync stores the current sector of the resync but
|
||||||
|
* also has some overloaded values.
|
||||||
|
*/
|
||||||
|
enum {
|
||||||
|
/* No resync in progress */
|
||||||
|
MD_RESYNC_NONE = 0,
|
||||||
|
/* Yielded to allow another conflicting resync to commence */
|
||||||
|
MD_RESYNC_YIELDED = 1,
|
||||||
|
/* Delayed to check that there is no conflict with another sync */
|
||||||
|
MD_RESYNC_DELAYED = 2,
|
||||||
|
/* Any value greater than or equal to this is in an active resync */
|
||||||
|
MD_RESYNC_ACTIVE = 3,
|
||||||
|
};
|
||||||
|
|
||||||
struct mddev {
|
struct mddev {
|
||||||
void *private;
|
void *private;
|
||||||
struct md_personality *pers;
|
struct md_personality *pers;
|
||||||
@ -750,6 +767,8 @@ extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
|||||||
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
|
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
|
||||||
|
|
||||||
extern void mddev_init(struct mddev *mddev);
|
extern void mddev_init(struct mddev *mddev);
|
||||||
|
struct mddev *md_alloc(dev_t dev, char *name);
|
||||||
|
void mddev_put(struct mddev *mddev);
|
||||||
extern int md_run(struct mddev *mddev);
|
extern int md_run(struct mddev *mddev);
|
||||||
extern int md_start(struct mddev *mddev);
|
extern int md_start(struct mddev *mddev);
|
||||||
extern void md_stop(struct mddev *mddev);
|
extern void md_stop(struct mddev *mddev);
|
||||||
|
@ -2167,9 +2167,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
int number = rdev->raid_disk;
|
int number = rdev->raid_disk;
|
||||||
struct md_rdev **rdevp;
|
struct md_rdev **rdevp;
|
||||||
struct raid10_info *p = conf->mirrors + number;
|
struct raid10_info *p;
|
||||||
|
|
||||||
print_conf(conf);
|
print_conf(conf);
|
||||||
|
if (unlikely(number >= mddev->raid_disks))
|
||||||
|
return 0;
|
||||||
|
p = conf->mirrors + number;
|
||||||
if (rdev == p->rdev)
|
if (rdev == p->rdev)
|
||||||
rdevp = &p->rdev;
|
rdevp = &p->rdev;
|
||||||
else if (rdev == p->replacement)
|
else if (rdev == p->replacement)
|
||||||
|
@ -1590,18 +1590,13 @@ void r5l_quiesce(struct r5l_log *log, int quiesce)
|
|||||||
|
|
||||||
bool r5l_log_disk_error(struct r5conf *conf)
|
bool r5l_log_disk_error(struct r5conf *conf)
|
||||||
{
|
{
|
||||||
struct r5l_log *log;
|
struct r5l_log *log = conf->log;
|
||||||
bool ret;
|
|
||||||
/* don't allow write if journal disk is missing */
|
|
||||||
rcu_read_lock();
|
|
||||||
log = rcu_dereference(conf->log);
|
|
||||||
|
|
||||||
|
/* don't allow write if journal disk is missing */
|
||||||
if (!log)
|
if (!log)
|
||||||
ret = test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
return test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
||||||
else
|
else
|
||||||
ret = test_bit(Faulty, &log->rdev->flags);
|
return test_bit(Faulty, &log->rdev->flags);
|
||||||
rcu_read_unlock();
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define R5L_RECOVERY_PAGE_POOL_SIZE 256
|
#define R5L_RECOVERY_PAGE_POOL_SIZE 256
|
||||||
@ -2534,12 +2529,13 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
|
|||||||
struct r5conf *conf;
|
struct r5conf *conf;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
spin_lock(&mddev->lock);
|
ret = mddev_lock(mddev);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
conf = mddev->private;
|
conf = mddev->private;
|
||||||
if (!conf || !conf->log) {
|
if (!conf || !conf->log)
|
||||||
spin_unlock(&mddev->lock);
|
goto out_unlock;
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (conf->log->r5c_journal_mode) {
|
switch (conf->log->r5c_journal_mode) {
|
||||||
case R5C_JOURNAL_MODE_WRITE_THROUGH:
|
case R5C_JOURNAL_MODE_WRITE_THROUGH:
|
||||||
@ -2557,7 +2553,9 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
|
|||||||
default:
|
default:
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
spin_unlock(&mddev->lock);
|
|
||||||
|
out_unlock:
|
||||||
|
mddev_unlock(mddev);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2639,7 +2637,7 @@ int r5c_try_caching_write(struct r5conf *conf,
|
|||||||
int i;
|
int i;
|
||||||
struct r5dev *dev;
|
struct r5dev *dev;
|
||||||
int to_cache = 0;
|
int to_cache = 0;
|
||||||
void **pslot;
|
void __rcu **pslot;
|
||||||
sector_t tree_index;
|
sector_t tree_index;
|
||||||
int ret;
|
int ret;
|
||||||
uintptr_t refcount;
|
uintptr_t refcount;
|
||||||
@ -2806,7 +2804,7 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
|
|||||||
int i;
|
int i;
|
||||||
int do_wakeup = 0;
|
int do_wakeup = 0;
|
||||||
sector_t tree_index;
|
sector_t tree_index;
|
||||||
void **pslot;
|
void __rcu **pslot;
|
||||||
uintptr_t refcount;
|
uintptr_t refcount;
|
||||||
|
|
||||||
if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
|
if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
|
||||||
@ -3145,7 +3143,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
|||||||
spin_lock_init(&log->stripe_in_journal_lock);
|
spin_lock_init(&log->stripe_in_journal_lock);
|
||||||
atomic_set(&log->stripe_in_journal_count, 0);
|
atomic_set(&log->stripe_in_journal_count, 0);
|
||||||
|
|
||||||
rcu_assign_pointer(conf->log, log);
|
conf->log = log;
|
||||||
|
|
||||||
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
||||||
return 0;
|
return 0;
|
||||||
@ -3167,13 +3165,13 @@ void r5l_exit_log(struct r5conf *conf)
|
|||||||
{
|
{
|
||||||
struct r5l_log *log = conf->log;
|
struct r5l_log *log = conf->log;
|
||||||
|
|
||||||
conf->log = NULL;
|
|
||||||
synchronize_rcu();
|
|
||||||
|
|
||||||
/* Ensure disable_writeback_work wakes up and exits */
|
/* Ensure disable_writeback_work wakes up and exits */
|
||||||
wake_up(&conf->mddev->sb_wait);
|
wake_up(&conf->mddev->sb_wait);
|
||||||
flush_work(&log->disable_writeback_work);
|
flush_work(&log->disable_writeback_work);
|
||||||
md_unregister_thread(&log->reclaim_thread);
|
md_unregister_thread(&log->reclaim_thread);
|
||||||
|
|
||||||
|
conf->log = NULL;
|
||||||
|
|
||||||
mempool_exit(&log->meta_pool);
|
mempool_exit(&log->meta_pool);
|
||||||
bioset_exit(&log->bs);
|
bioset_exit(&log->bs);
|
||||||
mempool_exit(&log->io_pool);
|
mempool_exit(&log->io_pool);
|
||||||
|
@ -2,49 +2,46 @@
|
|||||||
#ifndef _RAID5_LOG_H
|
#ifndef _RAID5_LOG_H
|
||||||
#define _RAID5_LOG_H
|
#define _RAID5_LOG_H
|
||||||
|
|
||||||
extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
|
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
|
||||||
extern void r5l_exit_log(struct r5conf *conf);
|
void r5l_exit_log(struct r5conf *conf);
|
||||||
extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh);
|
int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh);
|
||||||
extern void r5l_write_stripe_run(struct r5l_log *log);
|
void r5l_write_stripe_run(struct r5l_log *log);
|
||||||
extern void r5l_flush_stripe_to_raid(struct r5l_log *log);
|
void r5l_flush_stripe_to_raid(struct r5l_log *log);
|
||||||
extern void r5l_stripe_write_finished(struct stripe_head *sh);
|
void r5l_stripe_write_finished(struct stripe_head *sh);
|
||||||
extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
|
int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
|
||||||
extern void r5l_quiesce(struct r5l_log *log, int quiesce);
|
void r5l_quiesce(struct r5l_log *log, int quiesce);
|
||||||
extern bool r5l_log_disk_error(struct r5conf *conf);
|
bool r5l_log_disk_error(struct r5conf *conf);
|
||||||
extern bool r5c_is_writeback(struct r5l_log *log);
|
bool r5c_is_writeback(struct r5l_log *log);
|
||||||
extern int
|
int r5c_try_caching_write(struct r5conf *conf, struct stripe_head *sh,
|
||||||
r5c_try_caching_write(struct r5conf *conf, struct stripe_head *sh,
|
|
||||||
struct stripe_head_state *s, int disks);
|
struct stripe_head_state *s, int disks);
|
||||||
extern void
|
void r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
|
||||||
r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
|
|
||||||
struct stripe_head_state *s);
|
struct stripe_head_state *s);
|
||||||
extern void r5c_release_extra_page(struct stripe_head *sh);
|
void r5c_release_extra_page(struct stripe_head *sh);
|
||||||
extern void r5c_use_extra_page(struct stripe_head *sh);
|
void r5c_use_extra_page(struct stripe_head *sh);
|
||||||
extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
|
void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
|
||||||
extern void r5c_handle_cached_data_endio(struct r5conf *conf,
|
void r5c_handle_cached_data_endio(struct r5conf *conf,
|
||||||
struct stripe_head *sh, int disks);
|
struct stripe_head *sh, int disks);
|
||||||
extern int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh);
|
int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh);
|
||||||
extern void r5c_make_stripe_write_out(struct stripe_head *sh);
|
void r5c_make_stripe_write_out(struct stripe_head *sh);
|
||||||
extern void r5c_flush_cache(struct r5conf *conf, int num);
|
void r5c_flush_cache(struct r5conf *conf, int num);
|
||||||
extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
|
void r5c_check_stripe_cache_usage(struct r5conf *conf);
|
||||||
extern void r5c_check_cached_full_stripe(struct r5conf *conf);
|
void r5c_check_cached_full_stripe(struct r5conf *conf);
|
||||||
extern struct md_sysfs_entry r5c_journal_mode;
|
extern struct md_sysfs_entry r5c_journal_mode;
|
||||||
extern void r5c_update_on_rdev_error(struct mddev *mddev,
|
void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev);
|
||||||
struct md_rdev *rdev);
|
bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
|
||||||
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
|
int r5l_start(struct r5l_log *log);
|
||||||
extern int r5l_start(struct r5l_log *log);
|
|
||||||
|
|
||||||
extern struct dma_async_tx_descriptor *
|
struct dma_async_tx_descriptor *
|
||||||
ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
|
ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
|
||||||
struct dma_async_tx_descriptor *tx);
|
struct dma_async_tx_descriptor *tx);
|
||||||
extern int ppl_init_log(struct r5conf *conf);
|
int ppl_init_log(struct r5conf *conf);
|
||||||
extern void ppl_exit_log(struct r5conf *conf);
|
void ppl_exit_log(struct r5conf *conf);
|
||||||
extern int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh);
|
int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh);
|
||||||
extern void ppl_write_stripe_run(struct r5conf *conf);
|
void ppl_write_stripe_run(struct r5conf *conf);
|
||||||
extern void ppl_stripe_write_finished(struct stripe_head *sh);
|
void ppl_stripe_write_finished(struct stripe_head *sh);
|
||||||
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
|
int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
|
||||||
extern void ppl_quiesce(struct r5conf *conf, int quiesce);
|
void ppl_quiesce(struct r5conf *conf, int quiesce);
|
||||||
extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
|
int ppl_handle_flush_request(struct bio *bio);
|
||||||
extern struct md_sysfs_entry ppl_write_hint;
|
extern struct md_sysfs_entry ppl_write_hint;
|
||||||
|
|
||||||
static inline bool raid5_has_log(struct r5conf *conf)
|
static inline bool raid5_has_log(struct r5conf *conf)
|
||||||
@ -111,7 +108,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
|
|||||||
if (conf->log)
|
if (conf->log)
|
||||||
ret = r5l_handle_flush_request(conf->log, bio);
|
ret = r5l_handle_flush_request(conf->log, bio);
|
||||||
else if (raid5_has_ppl(conf))
|
else if (raid5_has_ppl(conf))
|
||||||
ret = ppl_handle_flush_request(conf->log, bio);
|
ret = ppl_handle_flush_request(bio);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -679,7 +679,7 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
|
int ppl_handle_flush_request(struct bio *bio)
|
||||||
{
|
{
|
||||||
if (bio->bi_iter.bi_size == 0) {
|
if (bio->bi_iter.bi_size == 0) {
|
||||||
bio_endio(bio);
|
bio_endio(bio);
|
||||||
|
@ -61,6 +61,8 @@
|
|||||||
#define cpu_to_group(cpu) cpu_to_node(cpu)
|
#define cpu_to_group(cpu) cpu_to_node(cpu)
|
||||||
#define ANY_GROUP NUMA_NO_NODE
|
#define ANY_GROUP NUMA_NO_NODE
|
||||||
|
|
||||||
|
#define RAID5_MAX_REQ_STRIPES 256
|
||||||
|
|
||||||
static bool devices_handle_discard_safely = false;
|
static bool devices_handle_discard_safely = false;
|
||||||
module_param(devices_handle_discard_safely, bool, 0644);
|
module_param(devices_handle_discard_safely, bool, 0644);
|
||||||
MODULE_PARM_DESC(devices_handle_discard_safely,
|
MODULE_PARM_DESC(devices_handle_discard_safely,
|
||||||
@ -624,6 +626,49 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct stripe_head *find_get_stripe(struct r5conf *conf,
|
||||||
|
sector_t sector, short generation, int hash)
|
||||||
|
{
|
||||||
|
int inc_empty_inactive_list_flag;
|
||||||
|
struct stripe_head *sh;
|
||||||
|
|
||||||
|
sh = __find_stripe(conf, sector, generation);
|
||||||
|
if (!sh)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (atomic_inc_not_zero(&sh->count))
|
||||||
|
return sh;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Slow path. The reference count is zero which means the stripe must
|
||||||
|
* be on a list (sh->lru). Must remove the stripe from the list that
|
||||||
|
* references it with the device_lock held.
|
||||||
|
*/
|
||||||
|
|
||||||
|
spin_lock(&conf->device_lock);
|
||||||
|
if (!atomic_read(&sh->count)) {
|
||||||
|
if (!test_bit(STRIPE_HANDLE, &sh->state))
|
||||||
|
atomic_inc(&conf->active_stripes);
|
||||||
|
BUG_ON(list_empty(&sh->lru) &&
|
||||||
|
!test_bit(STRIPE_EXPANDING, &sh->state));
|
||||||
|
inc_empty_inactive_list_flag = 0;
|
||||||
|
if (!list_empty(conf->inactive_list + hash))
|
||||||
|
inc_empty_inactive_list_flag = 1;
|
||||||
|
list_del_init(&sh->lru);
|
||||||
|
if (list_empty(conf->inactive_list + hash) &&
|
||||||
|
inc_empty_inactive_list_flag)
|
||||||
|
atomic_inc(&conf->empty_inactive_list_nr);
|
||||||
|
if (sh->group) {
|
||||||
|
sh->group->stripes_cnt--;
|
||||||
|
sh->group = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
atomic_inc(&sh->count);
|
||||||
|
spin_unlock(&conf->device_lock);
|
||||||
|
|
||||||
|
return sh;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Need to check if array has failed when deciding whether to:
|
* Need to check if array has failed when deciding whether to:
|
||||||
* - start an array
|
* - start an array
|
||||||
@ -710,80 +755,121 @@ static bool has_failed(struct r5conf *conf)
|
|||||||
return degraded > conf->max_degraded;
|
return degraded > conf->max_degraded;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stripe_head *
|
enum stripe_result {
|
||||||
raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
|
STRIPE_SUCCESS = 0,
|
||||||
int previous, int noblock, int noquiesce)
|
STRIPE_RETRY,
|
||||||
|
STRIPE_SCHEDULE_AND_RETRY,
|
||||||
|
STRIPE_FAIL,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct stripe_request_ctx {
|
||||||
|
/* a reference to the last stripe_head for batching */
|
||||||
|
struct stripe_head *batch_last;
|
||||||
|
|
||||||
|
/* first sector in the request */
|
||||||
|
sector_t first_sector;
|
||||||
|
|
||||||
|
/* last sector in the request */
|
||||||
|
sector_t last_sector;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bitmap to track stripe sectors that have been added to stripes
|
||||||
|
* add one to account for unaligned requests
|
||||||
|
*/
|
||||||
|
DECLARE_BITMAP(sectors_to_do, RAID5_MAX_REQ_STRIPES + 1);
|
||||||
|
|
||||||
|
/* the request had REQ_PREFLUSH, cleared after the first stripe_head */
|
||||||
|
bool do_flush;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block until another thread clears R5_INACTIVE_BLOCKED or
|
||||||
|
* there are fewer than 3/4 the maximum number of active stripes
|
||||||
|
* and there is an inactive stripe available.
|
||||||
|
*/
|
||||||
|
static bool is_inactive_blocked(struct r5conf *conf, int hash)
|
||||||
|
{
|
||||||
|
int active = atomic_read(&conf->active_stripes);
|
||||||
|
|
||||||
|
if (list_empty(conf->inactive_list + hash))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return active < (conf->max_nr_stripes * 3 / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct stripe_head *__raid5_get_active_stripe(struct r5conf *conf,
|
||||||
|
struct stripe_request_ctx *ctx, sector_t sector,
|
||||||
|
bool previous, bool noblock, bool noquiesce)
|
||||||
{
|
{
|
||||||
struct stripe_head *sh;
|
struct stripe_head *sh;
|
||||||
int hash = stripe_hash_locks_hash(conf, sector);
|
int hash = stripe_hash_locks_hash(conf, sector);
|
||||||
int inc_empty_inactive_list_flag;
|
|
||||||
|
|
||||||
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
|
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
|
||||||
|
|
||||||
spin_lock_irq(conf->hash_locks + hash);
|
spin_lock_irq(conf->hash_locks + hash);
|
||||||
|
|
||||||
do {
|
retry:
|
||||||
wait_event_lock_irq(conf->wait_for_quiescent,
|
if (!noquiesce && conf->quiesce) {
|
||||||
conf->quiesce == 0 || noquiesce,
|
/*
|
||||||
*(conf->hash_locks + hash));
|
* Must release the reference to batch_last before waiting,
|
||||||
sh = __find_stripe(conf, sector, conf->generation - previous);
|
* on quiesce, otherwise the batch_last will hold a reference
|
||||||
if (!sh) {
|
* to a stripe and raid5_quiesce() will deadlock waiting for
|
||||||
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
|
* active_stripes to go to zero.
|
||||||
sh = get_free_stripe(conf, hash);
|
*/
|
||||||
if (!sh && !test_bit(R5_DID_ALLOC,
|
if (ctx && ctx->batch_last) {
|
||||||
&conf->cache_state))
|
raid5_release_stripe(ctx->batch_last);
|
||||||
set_bit(R5_ALLOC_MORE,
|
ctx->batch_last = NULL;
|
||||||
&conf->cache_state);
|
|
||||||
}
|
}
|
||||||
if (noblock && sh == NULL)
|
|
||||||
break;
|
|
||||||
|
|
||||||
r5c_check_stripe_cache_usage(conf);
|
wait_event_lock_irq(conf->wait_for_quiescent, !conf->quiesce,
|
||||||
if (!sh) {
|
|
||||||
set_bit(R5_INACTIVE_BLOCKED,
|
|
||||||
&conf->cache_state);
|
|
||||||
r5l_wake_reclaim(conf->log, 0);
|
|
||||||
wait_event_lock_irq(
|
|
||||||
conf->wait_for_stripe,
|
|
||||||
!list_empty(conf->inactive_list + hash) &&
|
|
||||||
(atomic_read(&conf->active_stripes)
|
|
||||||
< (conf->max_nr_stripes * 3 / 4)
|
|
||||||
|| !test_bit(R5_INACTIVE_BLOCKED,
|
|
||||||
&conf->cache_state)),
|
|
||||||
*(conf->hash_locks + hash));
|
*(conf->hash_locks + hash));
|
||||||
clear_bit(R5_INACTIVE_BLOCKED,
|
}
|
||||||
&conf->cache_state);
|
|
||||||
} else {
|
sh = find_get_stripe(conf, sector, conf->generation - previous, hash);
|
||||||
|
if (sh)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
|
||||||
|
goto wait_for_stripe;
|
||||||
|
|
||||||
|
sh = get_free_stripe(conf, hash);
|
||||||
|
if (sh) {
|
||||||
|
r5c_check_stripe_cache_usage(conf);
|
||||||
init_stripe(sh, sector, previous);
|
init_stripe(sh, sector, previous);
|
||||||
atomic_inc(&sh->count);
|
atomic_inc(&sh->count);
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
} else if (!atomic_inc_not_zero(&sh->count)) {
|
|
||||||
spin_lock(&conf->device_lock);
|
|
||||||
if (!atomic_read(&sh->count)) {
|
|
||||||
if (!test_bit(STRIPE_HANDLE, &sh->state))
|
|
||||||
atomic_inc(&conf->active_stripes);
|
|
||||||
BUG_ON(list_empty(&sh->lru) &&
|
|
||||||
!test_bit(STRIPE_EXPANDING, &sh->state));
|
|
||||||
inc_empty_inactive_list_flag = 0;
|
|
||||||
if (!list_empty(conf->inactive_list + hash))
|
|
||||||
inc_empty_inactive_list_flag = 1;
|
|
||||||
list_del_init(&sh->lru);
|
|
||||||
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
|
|
||||||
atomic_inc(&conf->empty_inactive_list_nr);
|
|
||||||
if (sh->group) {
|
|
||||||
sh->group->stripes_cnt--;
|
|
||||||
sh->group = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
atomic_inc(&sh->count);
|
|
||||||
spin_unlock(&conf->device_lock);
|
|
||||||
}
|
|
||||||
} while (sh == NULL);
|
|
||||||
|
|
||||||
|
if (!test_bit(R5_DID_ALLOC, &conf->cache_state))
|
||||||
|
set_bit(R5_ALLOC_MORE, &conf->cache_state);
|
||||||
|
|
||||||
|
wait_for_stripe:
|
||||||
|
if (noblock)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
|
||||||
|
r5l_wake_reclaim(conf->log, 0);
|
||||||
|
wait_event_lock_irq(conf->wait_for_stripe,
|
||||||
|
is_inactive_blocked(conf, hash),
|
||||||
|
*(conf->hash_locks + hash));
|
||||||
|
clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
|
||||||
|
goto retry;
|
||||||
|
|
||||||
|
out:
|
||||||
spin_unlock_irq(conf->hash_locks + hash);
|
spin_unlock_irq(conf->hash_locks + hash);
|
||||||
return sh;
|
return sh;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct stripe_head *raid5_get_active_stripe(struct r5conf *conf,
|
||||||
|
sector_t sector, bool previous, bool noblock, bool noquiesce)
|
||||||
|
{
|
||||||
|
return __raid5_get_active_stripe(conf, NULL, sector, previous, noblock,
|
||||||
|
noquiesce);
|
||||||
|
}
|
||||||
|
|
||||||
static bool is_full_stripe_write(struct stripe_head *sh)
|
static bool is_full_stripe_write(struct stripe_head *sh)
|
||||||
{
|
{
|
||||||
BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
|
BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
|
||||||
@ -824,13 +910,13 @@ static bool stripe_can_batch(struct stripe_head *sh)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* we only do back search */
|
/* we only do back search */
|
||||||
static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
|
static void stripe_add_to_batch_list(struct r5conf *conf,
|
||||||
|
struct stripe_head *sh, struct stripe_head *last_sh)
|
||||||
{
|
{
|
||||||
struct stripe_head *head;
|
struct stripe_head *head;
|
||||||
sector_t head_sector, tmp_sec;
|
sector_t head_sector, tmp_sec;
|
||||||
int hash;
|
int hash;
|
||||||
int dd_idx;
|
int dd_idx;
|
||||||
int inc_empty_inactive_list_flag;
|
|
||||||
|
|
||||||
/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
|
/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
|
||||||
tmp_sec = sh->sector;
|
tmp_sec = sh->sector;
|
||||||
@ -838,36 +924,20 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
|
|||||||
return;
|
return;
|
||||||
head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
|
head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
|
||||||
|
|
||||||
|
if (last_sh && head_sector == last_sh->sector) {
|
||||||
|
head = last_sh;
|
||||||
|
atomic_inc(&head->count);
|
||||||
|
} else {
|
||||||
hash = stripe_hash_locks_hash(conf, head_sector);
|
hash = stripe_hash_locks_hash(conf, head_sector);
|
||||||
spin_lock_irq(conf->hash_locks + hash);
|
spin_lock_irq(conf->hash_locks + hash);
|
||||||
head = __find_stripe(conf, head_sector, conf->generation);
|
head = find_get_stripe(conf, head_sector, conf->generation,
|
||||||
if (head && !atomic_inc_not_zero(&head->count)) {
|
hash);
|
||||||
spin_lock(&conf->device_lock);
|
|
||||||
if (!atomic_read(&head->count)) {
|
|
||||||
if (!test_bit(STRIPE_HANDLE, &head->state))
|
|
||||||
atomic_inc(&conf->active_stripes);
|
|
||||||
BUG_ON(list_empty(&head->lru) &&
|
|
||||||
!test_bit(STRIPE_EXPANDING, &head->state));
|
|
||||||
inc_empty_inactive_list_flag = 0;
|
|
||||||
if (!list_empty(conf->inactive_list + hash))
|
|
||||||
inc_empty_inactive_list_flag = 1;
|
|
||||||
list_del_init(&head->lru);
|
|
||||||
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
|
|
||||||
atomic_inc(&conf->empty_inactive_list_nr);
|
|
||||||
if (head->group) {
|
|
||||||
head->group->stripes_cnt--;
|
|
||||||
head->group = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
atomic_inc(&head->count);
|
|
||||||
spin_unlock(&conf->device_lock);
|
|
||||||
}
|
|
||||||
spin_unlock_irq(conf->hash_locks + hash);
|
spin_unlock_irq(conf->hash_locks + hash);
|
||||||
|
|
||||||
if (!head)
|
if (!head)
|
||||||
return;
|
return;
|
||||||
if (!stripe_can_batch(head))
|
if (!stripe_can_batch(head))
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
lock_two_stripes(head, sh);
|
lock_two_stripes(head, sh);
|
||||||
/* clear_batch_ready clear the flag */
|
/* clear_batch_ready clear the flag */
|
||||||
@ -2882,10 +2952,10 @@ static void raid5_end_write_request(struct bio *bi)
|
|||||||
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
|
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
|
||||||
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||||
set_bit(STRIPE_HANDLE, &sh->state);
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
raid5_release_stripe(sh);
|
|
||||||
|
|
||||||
if (sh->batch_head && sh != sh->batch_head)
|
if (sh->batch_head && sh != sh->batch_head)
|
||||||
raid5_release_stripe(sh->batch_head);
|
raid5_release_stripe(sh->batch_head);
|
||||||
|
raid5_release_stripe(sh);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
@ -3413,39 +3483,32 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
|
|||||||
s->locked, s->ops_request);
|
s->locked, s->ops_request);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static bool stripe_bio_overlaps(struct stripe_head *sh, struct bio *bi,
|
||||||
* Each stripe/dev can have one or more bion attached.
|
int dd_idx, int forwrite)
|
||||||
* toread/towrite point to the first in a chain.
|
|
||||||
* The bi_next chain must be in order.
|
|
||||||
*/
|
|
||||||
static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
|
||||||
int forwrite, int previous)
|
|
||||||
{
|
{
|
||||||
struct bio **bip;
|
|
||||||
struct r5conf *conf = sh->raid_conf;
|
struct r5conf *conf = sh->raid_conf;
|
||||||
int firstwrite=0;
|
struct bio **bip;
|
||||||
|
|
||||||
pr_debug("adding bi b#%llu to stripe s#%llu\n",
|
pr_debug("checking bi b#%llu to stripe s#%llu\n",
|
||||||
(unsigned long long)bi->bi_iter.bi_sector,
|
bi->bi_iter.bi_sector, sh->sector);
|
||||||
(unsigned long long)sh->sector);
|
|
||||||
|
|
||||||
spin_lock_irq(&sh->stripe_lock);
|
|
||||||
/* Don't allow new IO added to stripes in batch list */
|
/* Don't allow new IO added to stripes in batch list */
|
||||||
if (sh->batch_head)
|
if (sh->batch_head)
|
||||||
goto overlap;
|
return true;
|
||||||
if (forwrite) {
|
|
||||||
|
if (forwrite)
|
||||||
bip = &sh->dev[dd_idx].towrite;
|
bip = &sh->dev[dd_idx].towrite;
|
||||||
if (*bip == NULL)
|
else
|
||||||
firstwrite = 1;
|
|
||||||
} else
|
|
||||||
bip = &sh->dev[dd_idx].toread;
|
bip = &sh->dev[dd_idx].toread;
|
||||||
|
|
||||||
while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
|
while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
|
||||||
if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
|
if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
|
||||||
goto overlap;
|
return true;
|
||||||
bip = & (*bip)->bi_next;
|
bip = &(*bip)->bi_next;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
|
if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
|
||||||
goto overlap;
|
return true;
|
||||||
|
|
||||||
if (forwrite && raid5_has_ppl(conf)) {
|
if (forwrite && raid5_has_ppl(conf)) {
|
||||||
/*
|
/*
|
||||||
@ -3474,9 +3537,30 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (first + conf->chunk_sectors * (count - 1) != last)
|
if (first + conf->chunk_sectors * (count - 1) != last)
|
||||||
goto overlap;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
|
||||||
|
int dd_idx, int forwrite, int previous)
|
||||||
|
{
|
||||||
|
struct r5conf *conf = sh->raid_conf;
|
||||||
|
struct bio **bip;
|
||||||
|
int firstwrite = 0;
|
||||||
|
|
||||||
|
if (forwrite) {
|
||||||
|
bip = &sh->dev[dd_idx].towrite;
|
||||||
|
if (!*bip)
|
||||||
|
firstwrite = 1;
|
||||||
|
} else {
|
||||||
|
bip = &sh->dev[dd_idx].toread;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector)
|
||||||
|
bip = &(*bip)->bi_next;
|
||||||
|
|
||||||
if (!forwrite || previous)
|
if (!forwrite || previous)
|
||||||
clear_bit(STRIPE_BATCH_READY, &sh->state);
|
clear_bit(STRIPE_BATCH_READY, &sh->state);
|
||||||
|
|
||||||
@ -3502,9 +3586,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
|||||||
sh->overwrite_disks++;
|
sh->overwrite_disks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
|
pr_debug("added bi b#%llu to stripe s#%llu, disk %d, logical %llu\n",
|
||||||
(unsigned long long)(*bip)->bi_iter.bi_sector,
|
(*bip)->bi_iter.bi_sector, sh->sector, dd_idx,
|
||||||
(unsigned long long)sh->sector, dd_idx);
|
sh->dev[dd_idx].sector);
|
||||||
|
|
||||||
if (conf->mddev->bitmap && firstwrite) {
|
if (conf->mddev->bitmap && firstwrite) {
|
||||||
/* Cannot hold spinlock over bitmap_startwrite,
|
/* Cannot hold spinlock over bitmap_startwrite,
|
||||||
@ -3512,7 +3596,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
|||||||
* we have added to the bitmap and set bm_seq.
|
* we have added to the bitmap and set bm_seq.
|
||||||
* So set STRIPE_BITMAP_PENDING to prevent
|
* So set STRIPE_BITMAP_PENDING to prevent
|
||||||
* batching.
|
* batching.
|
||||||
* If multiple add_stripe_bio() calls race here they
|
* If multiple __add_stripe_bio() calls race here they
|
||||||
* much all set STRIPE_BITMAP_PENDING. So only the first one
|
* much all set STRIPE_BITMAP_PENDING. So only the first one
|
||||||
* to complete "bitmap_startwrite" gets to set
|
* to complete "bitmap_startwrite" gets to set
|
||||||
* STRIPE_BIT_DELAY. This is important as once a stripe
|
* STRIPE_BIT_DELAY. This is important as once a stripe
|
||||||
@ -3530,16 +3614,27 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
|||||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock_irq(&sh->stripe_lock);
|
}
|
||||||
|
|
||||||
if (stripe_can_batch(sh))
|
/*
|
||||||
stripe_add_to_batch_list(conf, sh);
|
* Each stripe/dev can have one or more bios attached.
|
||||||
return 1;
|
* toread/towrite point to the first in a chain.
|
||||||
|
* The bi_next chain must be in order.
|
||||||
|
*/
|
||||||
|
static bool add_stripe_bio(struct stripe_head *sh, struct bio *bi,
|
||||||
|
int dd_idx, int forwrite, int previous)
|
||||||
|
{
|
||||||
|
spin_lock_irq(&sh->stripe_lock);
|
||||||
|
|
||||||
overlap:
|
if (stripe_bio_overlaps(sh, bi, dd_idx, forwrite)) {
|
||||||
set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
|
set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
|
||||||
spin_unlock_irq(&sh->stripe_lock);
|
spin_unlock_irq(&sh->stripe_lock);
|
||||||
return 0;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
__add_stripe_bio(sh, bi, dd_idx, forwrite, previous);
|
||||||
|
spin_unlock_irq(&sh->stripe_lock);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void end_reshape(struct r5conf *conf);
|
static void end_reshape(struct r5conf *conf);
|
||||||
@ -5785,17 +5880,215 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
|
|||||||
bio_endio(bi);
|
bio_endio(bi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ahead_of_reshape(struct mddev *mddev, sector_t sector,
|
||||||
|
sector_t reshape_sector)
|
||||||
|
{
|
||||||
|
return mddev->reshape_backwards ? sector < reshape_sector :
|
||||||
|
sector >= reshape_sector;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool range_ahead_of_reshape(struct mddev *mddev, sector_t min,
|
||||||
|
sector_t max, sector_t reshape_sector)
|
||||||
|
{
|
||||||
|
return mddev->reshape_backwards ? max < reshape_sector :
|
||||||
|
min >= reshape_sector;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf,
|
||||||
|
struct stripe_head *sh)
|
||||||
|
{
|
||||||
|
sector_t max_sector = 0, min_sector = MaxSector;
|
||||||
|
bool ret = false;
|
||||||
|
int dd_idx;
|
||||||
|
|
||||||
|
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
|
||||||
|
if (dd_idx == sh->pd_idx)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
min_sector = min(min_sector, sh->dev[dd_idx].sector);
|
||||||
|
max_sector = min(max_sector, sh->dev[dd_idx].sector);
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock_irq(&conf->device_lock);
|
||||||
|
|
||||||
|
if (!range_ahead_of_reshape(mddev, min_sector, max_sector,
|
||||||
|
conf->reshape_progress))
|
||||||
|
/* mismatch, need to try again */
|
||||||
|
ret = true;
|
||||||
|
|
||||||
|
spin_unlock_irq(&conf->device_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int add_all_stripe_bios(struct r5conf *conf,
|
||||||
|
struct stripe_request_ctx *ctx, struct stripe_head *sh,
|
||||||
|
struct bio *bi, int forwrite, int previous)
|
||||||
|
{
|
||||||
|
int dd_idx;
|
||||||
|
int ret = 1;
|
||||||
|
|
||||||
|
spin_lock_irq(&sh->stripe_lock);
|
||||||
|
|
||||||
|
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
|
||||||
|
struct r5dev *dev = &sh->dev[dd_idx];
|
||||||
|
|
||||||
|
if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (dev->sector < ctx->first_sector ||
|
||||||
|
dev->sector >= ctx->last_sector)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (stripe_bio_overlaps(sh, bi, dd_idx, forwrite)) {
|
||||||
|
set_bit(R5_Overlap, &dev->flags);
|
||||||
|
ret = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
|
||||||
|
struct r5dev *dev = &sh->dev[dd_idx];
|
||||||
|
|
||||||
|
if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (dev->sector < ctx->first_sector ||
|
||||||
|
dev->sector >= ctx->last_sector)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
__add_stripe_bio(sh, bi, dd_idx, forwrite, previous);
|
||||||
|
clear_bit((dev->sector - ctx->first_sector) >>
|
||||||
|
RAID5_STRIPE_SHIFT(conf), ctx->sectors_to_do);
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
spin_unlock_irq(&sh->stripe_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||||
|
struct r5conf *conf, struct stripe_request_ctx *ctx,
|
||||||
|
sector_t logical_sector, struct bio *bi)
|
||||||
|
{
|
||||||
|
const int rw = bio_data_dir(bi);
|
||||||
|
enum stripe_result ret;
|
||||||
|
struct stripe_head *sh;
|
||||||
|
sector_t new_sector;
|
||||||
|
int previous = 0;
|
||||||
|
int seq, dd_idx;
|
||||||
|
|
||||||
|
seq = read_seqcount_begin(&conf->gen_lock);
|
||||||
|
|
||||||
|
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||||
|
/*
|
||||||
|
* Spinlock is needed as reshape_progress may be
|
||||||
|
* 64bit on a 32bit platform, and so it might be
|
||||||
|
* possible to see a half-updated value
|
||||||
|
* Of course reshape_progress could change after
|
||||||
|
* the lock is dropped, so once we get a reference
|
||||||
|
* to the stripe that we think it is, we will have
|
||||||
|
* to check again.
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&conf->device_lock);
|
||||||
|
if (ahead_of_reshape(mddev, logical_sector,
|
||||||
|
conf->reshape_progress)) {
|
||||||
|
previous = 1;
|
||||||
|
} else {
|
||||||
|
if (ahead_of_reshape(mddev, logical_sector,
|
||||||
|
conf->reshape_safe)) {
|
||||||
|
spin_unlock_irq(&conf->device_lock);
|
||||||
|
return STRIPE_SCHEDULE_AND_RETRY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock_irq(&conf->device_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
new_sector = raid5_compute_sector(conf, logical_sector, previous,
|
||||||
|
&dd_idx, NULL);
|
||||||
|
pr_debug("raid456: %s, sector %llu logical %llu\n", __func__,
|
||||||
|
new_sector, logical_sector);
|
||||||
|
|
||||||
|
sh = __raid5_get_active_stripe(conf, ctx, new_sector, previous,
|
||||||
|
(bi->bi_opf & REQ_RAHEAD), 0);
|
||||||
|
if (unlikely(!sh)) {
|
||||||
|
/* cannot get stripe, just give-up */
|
||||||
|
bi->bi_status = BLK_STS_IOERR;
|
||||||
|
return STRIPE_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(previous) &&
|
||||||
|
stripe_ahead_of_reshape(mddev, conf, sh)) {
|
||||||
|
/*
|
||||||
|
* Expansion moved on while waiting for a stripe.
|
||||||
|
* Expansion could still move past after this
|
||||||
|
* test, but as we are holding a reference to
|
||||||
|
* 'sh', we know that if that happens,
|
||||||
|
* STRIPE_EXPANDING will get set and the expansion
|
||||||
|
* won't proceed until we finish with the stripe.
|
||||||
|
*/
|
||||||
|
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||||
|
goto out_release;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (read_seqcount_retry(&conf->gen_lock, seq)) {
|
||||||
|
/* Might have got the wrong stripe_head by accident */
|
||||||
|
ret = STRIPE_RETRY;
|
||||||
|
goto out_release;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_bit(STRIPE_EXPANDING, &sh->state) ||
|
||||||
|
!add_all_stripe_bios(conf, ctx, sh, bi, rw, previous)) {
|
||||||
|
/*
|
||||||
|
* Stripe is busy expanding or add failed due to
|
||||||
|
* overlap. Flush everything and wait a while.
|
||||||
|
*/
|
||||||
|
md_wakeup_thread(mddev->thread);
|
||||||
|
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||||
|
goto out_release;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stripe_can_batch(sh)) {
|
||||||
|
stripe_add_to_batch_list(conf, sh, ctx->batch_last);
|
||||||
|
if (ctx->batch_last)
|
||||||
|
raid5_release_stripe(ctx->batch_last);
|
||||||
|
atomic_inc(&sh->count);
|
||||||
|
ctx->batch_last = sh;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx->do_flush) {
|
||||||
|
set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
|
||||||
|
/* we only need flush for one stripe */
|
||||||
|
ctx->do_flush = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
|
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||||
|
if ((!sh->batch_head || sh == sh->batch_head) &&
|
||||||
|
(bi->bi_opf & REQ_SYNC) &&
|
||||||
|
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||||
|
atomic_inc(&conf->preread_active_stripes);
|
||||||
|
|
||||||
|
release_stripe_plug(mddev, sh);
|
||||||
|
return STRIPE_SUCCESS;
|
||||||
|
|
||||||
|
out_release:
|
||||||
|
raid5_release_stripe(sh);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||||
{
|
{
|
||||||
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||||
struct r5conf *conf = mddev->private;
|
struct r5conf *conf = mddev->private;
|
||||||
int dd_idx;
|
sector_t logical_sector;
|
||||||
sector_t new_sector;
|
struct stripe_request_ctx ctx = {};
|
||||||
sector_t logical_sector, last_sector;
|
|
||||||
struct stripe_head *sh;
|
|
||||||
const int rw = bio_data_dir(bi);
|
const int rw = bio_data_dir(bi);
|
||||||
DEFINE_WAIT(w);
|
enum stripe_result res;
|
||||||
bool do_prepare;
|
int s, stripe_cnt;
|
||||||
bool do_flush = false;
|
|
||||||
|
|
||||||
if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
|
if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
|
||||||
int ret = log_handle_flush_request(conf, bi);
|
int ret = log_handle_flush_request(conf, bi);
|
||||||
@ -5811,7 +6104,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
|||||||
* if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
|
* if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
|
||||||
* we need to flush journal device
|
* we need to flush journal device
|
||||||
*/
|
*/
|
||||||
do_flush = bi->bi_opf & REQ_PREFLUSH;
|
ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!md_write_start(mddev, bi))
|
if (!md_write_start(mddev, bi))
|
||||||
@ -5835,134 +6128,68 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
|||||||
}
|
}
|
||||||
|
|
||||||
logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
|
logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
|
||||||
last_sector = bio_end_sector(bi);
|
ctx.first_sector = logical_sector;
|
||||||
|
ctx.last_sector = bio_end_sector(bi);
|
||||||
bi->bi_next = NULL;
|
bi->bi_next = NULL;
|
||||||
|
|
||||||
|
stripe_cnt = DIV_ROUND_UP_SECTOR_T(ctx.last_sector - logical_sector,
|
||||||
|
RAID5_STRIPE_SECTORS(conf));
|
||||||
|
bitmap_set(ctx.sectors_to_do, 0, stripe_cnt);
|
||||||
|
|
||||||
|
pr_debug("raid456: %s, logical %llu to %llu\n", __func__,
|
||||||
|
bi->bi_iter.bi_sector, ctx.last_sector);
|
||||||
|
|
||||||
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
|
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
|
||||||
if ((bi->bi_opf & REQ_NOWAIT) &&
|
if ((bi->bi_opf & REQ_NOWAIT) &&
|
||||||
(conf->reshape_progress != MaxSector) &&
|
(conf->reshape_progress != MaxSector) &&
|
||||||
(mddev->reshape_backwards
|
!ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
|
||||||
? (logical_sector > conf->reshape_progress && logical_sector <= conf->reshape_safe)
|
ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
|
||||||
: (logical_sector >= conf->reshape_safe && logical_sector < conf->reshape_progress))) {
|
|
||||||
bio_wouldblock_error(bi);
|
bio_wouldblock_error(bi);
|
||||||
if (rw == WRITE)
|
if (rw == WRITE)
|
||||||
md_write_end(mddev);
|
md_write_end(mddev);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
md_account_bio(mddev, &bi);
|
md_account_bio(mddev, &bi);
|
||||||
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
|
|
||||||
for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
|
|
||||||
int previous;
|
|
||||||
int seq;
|
|
||||||
|
|
||||||
do_prepare = false;
|
add_wait_queue(&conf->wait_for_overlap, &wait);
|
||||||
retry:
|
while (1) {
|
||||||
seq = read_seqcount_begin(&conf->gen_lock);
|
res = make_stripe_request(mddev, conf, &ctx, logical_sector,
|
||||||
previous = 0;
|
bi);
|
||||||
if (do_prepare)
|
if (res == STRIPE_FAIL)
|
||||||
prepare_to_wait(&conf->wait_for_overlap, &w,
|
|
||||||
TASK_UNINTERRUPTIBLE);
|
|
||||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
|
||||||
/* spinlock is needed as reshape_progress may be
|
|
||||||
* 64bit on a 32bit platform, and so it might be
|
|
||||||
* possible to see a half-updated value
|
|
||||||
* Of course reshape_progress could change after
|
|
||||||
* the lock is dropped, so once we get a reference
|
|
||||||
* to the stripe that we think it is, we will have
|
|
||||||
* to check again.
|
|
||||||
*/
|
|
||||||
spin_lock_irq(&conf->device_lock);
|
|
||||||
if (mddev->reshape_backwards
|
|
||||||
? logical_sector < conf->reshape_progress
|
|
||||||
: logical_sector >= conf->reshape_progress) {
|
|
||||||
previous = 1;
|
|
||||||
} else {
|
|
||||||
if (mddev->reshape_backwards
|
|
||||||
? logical_sector < conf->reshape_safe
|
|
||||||
: logical_sector >= conf->reshape_safe) {
|
|
||||||
spin_unlock_irq(&conf->device_lock);
|
|
||||||
schedule();
|
|
||||||
do_prepare = true;
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock_irq(&conf->device_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
new_sector = raid5_compute_sector(conf, logical_sector,
|
|
||||||
previous,
|
|
||||||
&dd_idx, NULL);
|
|
||||||
pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
|
|
||||||
(unsigned long long)new_sector,
|
|
||||||
(unsigned long long)logical_sector);
|
|
||||||
|
|
||||||
sh = raid5_get_active_stripe(conf, new_sector, previous,
|
|
||||||
(bi->bi_opf & REQ_RAHEAD), 0);
|
|
||||||
if (sh) {
|
|
||||||
if (unlikely(previous)) {
|
|
||||||
/* expansion might have moved on while waiting for a
|
|
||||||
* stripe, so we must do the range check again.
|
|
||||||
* Expansion could still move past after this
|
|
||||||
* test, but as we are holding a reference to
|
|
||||||
* 'sh', we know that if that happens,
|
|
||||||
* STRIPE_EXPANDING will get set and the expansion
|
|
||||||
* won't proceed until we finish with the stripe.
|
|
||||||
*/
|
|
||||||
int must_retry = 0;
|
|
||||||
spin_lock_irq(&conf->device_lock);
|
|
||||||
if (mddev->reshape_backwards
|
|
||||||
? logical_sector >= conf->reshape_progress
|
|
||||||
: logical_sector < conf->reshape_progress)
|
|
||||||
/* mismatch, need to try again */
|
|
||||||
must_retry = 1;
|
|
||||||
spin_unlock_irq(&conf->device_lock);
|
|
||||||
if (must_retry) {
|
|
||||||
raid5_release_stripe(sh);
|
|
||||||
schedule();
|
|
||||||
do_prepare = true;
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (read_seqcount_retry(&conf->gen_lock, seq)) {
|
|
||||||
/* Might have got the wrong stripe_head
|
|
||||||
* by accident
|
|
||||||
*/
|
|
||||||
raid5_release_stripe(sh);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (test_bit(STRIPE_EXPANDING, &sh->state) ||
|
|
||||||
!add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
|
|
||||||
/* Stripe is busy expanding or
|
|
||||||
* add failed due to overlap. Flush everything
|
|
||||||
* and wait a while
|
|
||||||
*/
|
|
||||||
md_wakeup_thread(mddev->thread);
|
|
||||||
raid5_release_stripe(sh);
|
|
||||||
schedule();
|
|
||||||
do_prepare = true;
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
if (do_flush) {
|
|
||||||
set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
|
|
||||||
/* we only need flush for one stripe */
|
|
||||||
do_flush = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
set_bit(STRIPE_HANDLE, &sh->state);
|
|
||||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
|
||||||
if ((!sh->batch_head || sh == sh->batch_head) &&
|
|
||||||
(bi->bi_opf & REQ_SYNC) &&
|
|
||||||
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
|
||||||
atomic_inc(&conf->preread_active_stripes);
|
|
||||||
release_stripe_plug(mddev, sh);
|
|
||||||
} else {
|
|
||||||
/* cannot get stripe for read-ahead, just give-up */
|
|
||||||
bi->bi_status = BLK_STS_IOERR;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (res == STRIPE_RETRY)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (res == STRIPE_SCHEDULE_AND_RETRY) {
|
||||||
|
/*
|
||||||
|
* Must release the reference to batch_last before
|
||||||
|
* scheduling and waiting for work to be done,
|
||||||
|
* otherwise the batch_last stripe head could prevent
|
||||||
|
* raid5_activate_delayed() from making progress
|
||||||
|
* and thus deadlocking.
|
||||||
|
*/
|
||||||
|
if (ctx.batch_last) {
|
||||||
|
raid5_release_stripe(ctx.batch_last);
|
||||||
|
ctx.batch_last = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wait_woken(&wait, TASK_UNINTERRUPTIBLE,
|
||||||
|
MAX_SCHEDULE_TIMEOUT);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
finish_wait(&conf->wait_for_overlap, &w);
|
|
||||||
|
s = find_first_bit(ctx.sectors_to_do, stripe_cnt);
|
||||||
|
if (s == stripe_cnt)
|
||||||
|
break;
|
||||||
|
|
||||||
|
logical_sector = ctx.first_sector +
|
||||||
|
(s << RAID5_STRIPE_SHIFT(conf));
|
||||||
|
}
|
||||||
|
remove_wait_queue(&conf->wait_for_overlap, &wait);
|
||||||
|
|
||||||
|
if (ctx.batch_last)
|
||||||
|
raid5_release_stripe(ctx.batch_last);
|
||||||
|
|
||||||
if (rw == WRITE)
|
if (rw == WRITE)
|
||||||
md_write_end(mddev);
|
md_write_end(mddev);
|
||||||
@ -7815,7 +8042,15 @@ static int raid5_run(struct mddev *mddev)
|
|||||||
mddev->queue->limits.discard_granularity < stripe)
|
mddev->queue->limits.discard_granularity < stripe)
|
||||||
blk_queue_max_discard_sectors(mddev->queue, 0);
|
blk_queue_max_discard_sectors(mddev->queue, 0);
|
||||||
|
|
||||||
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
|
/*
|
||||||
|
* Requests require having a bitmap for each stripe.
|
||||||
|
* Limit the max sectors based on this.
|
||||||
|
*/
|
||||||
|
blk_queue_max_hw_sectors(mddev->queue,
|
||||||
|
RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf));
|
||||||
|
|
||||||
|
/* No restrictions on the number of segments in the request */
|
||||||
|
blk_queue_max_segments(mddev->queue, USHRT_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
|
if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
|
||||||
@ -8066,8 +8301,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
* find the disk ... but prefer rdev->saved_raid_disk
|
* find the disk ... but prefer rdev->saved_raid_disk
|
||||||
* if possible.
|
* if possible.
|
||||||
*/
|
*/
|
||||||
if (rdev->saved_raid_disk >= 0 &&
|
if (rdev->saved_raid_disk >= first &&
|
||||||
rdev->saved_raid_disk >= first &&
|
|
||||||
rdev->saved_raid_disk <= last &&
|
rdev->saved_raid_disk <= last &&
|
||||||
conf->disks[rdev->saved_raid_disk].rdev == NULL)
|
conf->disks[rdev->saved_raid_disk].rdev == NULL)
|
||||||
first = rdev->saved_raid_disk;
|
first = rdev->saved_raid_disk;
|
||||||
@ -8704,8 +8938,11 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
|
|||||||
err = log_init(conf, NULL, true);
|
err = log_init(conf, NULL, true);
|
||||||
if (!err) {
|
if (!err) {
|
||||||
err = resize_stripes(conf, conf->pool_size);
|
err = resize_stripes(conf, conf->pool_size);
|
||||||
if (err)
|
if (err) {
|
||||||
|
mddev_suspend(mddev);
|
||||||
log_exit(conf);
|
log_exit(conf);
|
||||||
|
mddev_resume(mddev);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
|
@ -812,7 +812,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
|
|||||||
struct stripe_head *sh);
|
struct stripe_head *sh);
|
||||||
extern struct stripe_head *
|
extern struct stripe_head *
|
||||||
raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
|
raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
|
||||||
int previous, int noblock, int noquiesce);
|
bool previous, bool noblock, bool noquiesce);
|
||||||
extern int raid5_calc_degraded(struct r5conf *conf);
|
extern int raid5_calc_degraded(struct r5conf *conf);
|
||||||
extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode);
|
extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode);
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
menu "NVME Support"
|
menu "NVME Support"
|
||||||
|
|
||||||
|
source "drivers/nvme/common/Kconfig"
|
||||||
source "drivers/nvme/host/Kconfig"
|
source "drivers/nvme/host/Kconfig"
|
||||||
source "drivers/nvme/target/Kconfig"
|
source "drivers/nvme/target/Kconfig"
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
|
||||||
|
obj-$(CONFIG_NVME_COMMON) += common/
|
||||||
obj-y += host/
|
obj-y += host/
|
||||||
obj-y += target/
|
obj-y += target/
|
||||||
|
4
drivers/nvme/common/Kconfig
Normal file
4
drivers/nvme/common/Kconfig
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
|
||||||
|
config NVME_COMMON
|
||||||
|
tristate
|
7
drivers/nvme/common/Makefile
Normal file
7
drivers/nvme/common/Makefile
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
ccflags-y += -I$(src)
|
||||||
|
|
||||||
|
obj-$(CONFIG_NVME_COMMON) += nvme-common.o
|
||||||
|
|
||||||
|
nvme-common-y += auth.o
|
483
drivers/nvme/common/auth.c
Normal file
483
drivers/nvme/common/auth.c
Normal file
@ -0,0 +1,483 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020 Hannes Reinecke, SUSE Linux
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/crc32.h>
|
||||||
|
#include <linux/base64.h>
|
||||||
|
#include <linux/prandom.h>
|
||||||
|
#include <linux/scatterlist.h>
|
||||||
|
#include <asm/unaligned.h>
|
||||||
|
#include <crypto/hash.h>
|
||||||
|
#include <crypto/dh.h>
|
||||||
|
#include <linux/nvme.h>
|
||||||
|
#include <linux/nvme-auth.h>
|
||||||
|
|
||||||
|
static u32 nvme_dhchap_seqnum;
|
||||||
|
static DEFINE_MUTEX(nvme_dhchap_mutex);
|
||||||
|
|
||||||
|
u32 nvme_auth_get_seqnum(void)
|
||||||
|
{
|
||||||
|
u32 seqnum;
|
||||||
|
|
||||||
|
mutex_lock(&nvme_dhchap_mutex);
|
||||||
|
if (!nvme_dhchap_seqnum)
|
||||||
|
nvme_dhchap_seqnum = prandom_u32();
|
||||||
|
else {
|
||||||
|
nvme_dhchap_seqnum++;
|
||||||
|
if (!nvme_dhchap_seqnum)
|
||||||
|
nvme_dhchap_seqnum++;
|
||||||
|
}
|
||||||
|
seqnum = nvme_dhchap_seqnum;
|
||||||
|
mutex_unlock(&nvme_dhchap_mutex);
|
||||||
|
return seqnum;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_get_seqnum);
|
||||||
|
|
||||||
|
static struct nvme_auth_dhgroup_map {
|
||||||
|
const char name[16];
|
||||||
|
const char kpp[16];
|
||||||
|
} dhgroup_map[] = {
|
||||||
|
[NVME_AUTH_DHGROUP_NULL] = {
|
||||||
|
.name = "null", .kpp = "null" },
|
||||||
|
[NVME_AUTH_DHGROUP_2048] = {
|
||||||
|
.name = "ffdhe2048", .kpp = "ffdhe2048(dh)" },
|
||||||
|
[NVME_AUTH_DHGROUP_3072] = {
|
||||||
|
.name = "ffdhe3072", .kpp = "ffdhe3072(dh)" },
|
||||||
|
[NVME_AUTH_DHGROUP_4096] = {
|
||||||
|
.name = "ffdhe4096", .kpp = "ffdhe4096(dh)" },
|
||||||
|
[NVME_AUTH_DHGROUP_6144] = {
|
||||||
|
.name = "ffdhe6144", .kpp = "ffdhe6144(dh)" },
|
||||||
|
[NVME_AUTH_DHGROUP_8192] = {
|
||||||
|
.name = "ffdhe8192", .kpp = "ffdhe8192(dh)" },
|
||||||
|
};
|
||||||
|
|
||||||
|
const char *nvme_auth_dhgroup_name(u8 dhgroup_id)
|
||||||
|
{
|
||||||
|
if (dhgroup_id >= ARRAY_SIZE(dhgroup_map))
|
||||||
|
return NULL;
|
||||||
|
return dhgroup_map[dhgroup_id].name;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_name);
|
||||||
|
|
||||||
|
const char *nvme_auth_dhgroup_kpp(u8 dhgroup_id)
|
||||||
|
{
|
||||||
|
if (dhgroup_id >= ARRAY_SIZE(dhgroup_map))
|
||||||
|
return NULL;
|
||||||
|
return dhgroup_map[dhgroup_id].kpp;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_kpp);
|
||||||
|
|
||||||
|
u8 nvme_auth_dhgroup_id(const char *dhgroup_name)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!dhgroup_name || !strlen(dhgroup_name))
|
||||||
|
return NVME_AUTH_DHGROUP_INVALID;
|
||||||
|
for (i = 0; i < ARRAY_SIZE(dhgroup_map); i++) {
|
||||||
|
if (!strlen(dhgroup_map[i].name))
|
||||||
|
continue;
|
||||||
|
if (!strncmp(dhgroup_map[i].name, dhgroup_name,
|
||||||
|
strlen(dhgroup_map[i].name)))
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
return NVME_AUTH_DHGROUP_INVALID;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_id);
|
||||||
|
|
||||||
|
static struct nvme_dhchap_hash_map {
|
||||||
|
int len;
|
||||||
|
const char hmac[15];
|
||||||
|
const char digest[8];
|
||||||
|
} hash_map[] = {
|
||||||
|
[NVME_AUTH_HASH_SHA256] = {
|
||||||
|
.len = 32,
|
||||||
|
.hmac = "hmac(sha256)",
|
||||||
|
.digest = "sha256",
|
||||||
|
},
|
||||||
|
[NVME_AUTH_HASH_SHA384] = {
|
||||||
|
.len = 48,
|
||||||
|
.hmac = "hmac(sha384)",
|
||||||
|
.digest = "sha384",
|
||||||
|
},
|
||||||
|
[NVME_AUTH_HASH_SHA512] = {
|
||||||
|
.len = 64,
|
||||||
|
.hmac = "hmac(sha512)",
|
||||||
|
.digest = "sha512",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const char *nvme_auth_hmac_name(u8 hmac_id)
|
||||||
|
{
|
||||||
|
if (hmac_id >= ARRAY_SIZE(hash_map))
|
||||||
|
return NULL;
|
||||||
|
return hash_map[hmac_id].hmac;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_hmac_name);
|
||||||
|
|
||||||
|
const char *nvme_auth_digest_name(u8 hmac_id)
|
||||||
|
{
|
||||||
|
if (hmac_id >= ARRAY_SIZE(hash_map))
|
||||||
|
return NULL;
|
||||||
|
return hash_map[hmac_id].digest;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_digest_name);
|
||||||
|
|
||||||
|
u8 nvme_auth_hmac_id(const char *hmac_name)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!hmac_name || !strlen(hmac_name))
|
||||||
|
return NVME_AUTH_HASH_INVALID;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(hash_map); i++) {
|
||||||
|
if (!strlen(hash_map[i].hmac))
|
||||||
|
continue;
|
||||||
|
if (!strncmp(hash_map[i].hmac, hmac_name,
|
||||||
|
strlen(hash_map[i].hmac)))
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
return NVME_AUTH_HASH_INVALID;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_hmac_id);
|
||||||
|
|
||||||
|
size_t nvme_auth_hmac_hash_len(u8 hmac_id)
|
||||||
|
{
|
||||||
|
if (hmac_id >= ARRAY_SIZE(hash_map))
|
||||||
|
return 0;
|
||||||
|
return hash_map[hmac_id].len;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_hmac_hash_len);
|
||||||
|
|
||||||
|
struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
|
||||||
|
u8 key_hash)
|
||||||
|
{
|
||||||
|
struct nvme_dhchap_key *key;
|
||||||
|
unsigned char *p;
|
||||||
|
u32 crc;
|
||||||
|
int ret, key_len;
|
||||||
|
size_t allocated_len = strlen(secret);
|
||||||
|
|
||||||
|
/* Secret might be affixed with a ':' */
|
||||||
|
p = strrchr(secret, ':');
|
||||||
|
if (p)
|
||||||
|
allocated_len = p - secret;
|
||||||
|
key = kzalloc(sizeof(*key), GFP_KERNEL);
|
||||||
|
if (!key)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
key->key = kzalloc(allocated_len, GFP_KERNEL);
|
||||||
|
if (!key->key) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
key_len = base64_decode(secret, allocated_len, key->key);
|
||||||
|
if (key_len < 0) {
|
||||||
|
pr_debug("base64 key decoding error %d\n",
|
||||||
|
key_len);
|
||||||
|
ret = key_len;
|
||||||
|
goto out_free_secret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key_len != 36 && key_len != 52 &&
|
||||||
|
key_len != 68) {
|
||||||
|
pr_err("Invalid key len %d\n", key_len);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_free_secret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key_hash > 0 &&
|
||||||
|
(key_len - 4) != nvme_auth_hmac_hash_len(key_hash)) {
|
||||||
|
pr_err("Mismatched key len %d for %s\n", key_len,
|
||||||
|
nvme_auth_hmac_name(key_hash));
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_free_secret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The last four bytes is the CRC in little-endian format */
|
||||||
|
key_len -= 4;
|
||||||
|
/*
|
||||||
|
* The linux implementation doesn't do pre- and post-increments,
|
||||||
|
* so we have to do it manually.
|
||||||
|
*/
|
||||||
|
crc = ~crc32(~0, key->key, key_len);
|
||||||
|
|
||||||
|
if (get_unaligned_le32(key->key + key_len) != crc) {
|
||||||
|
pr_err("key crc mismatch (key %08x, crc %08x)\n",
|
||||||
|
get_unaligned_le32(key->key + key_len), crc);
|
||||||
|
ret = -EKEYREJECTED;
|
||||||
|
goto out_free_secret;
|
||||||
|
}
|
||||||
|
key->len = key_len;
|
||||||
|
key->hash = key_hash;
|
||||||
|
return key;
|
||||||
|
out_free_secret:
|
||||||
|
kfree_sensitive(key->key);
|
||||||
|
out_free_key:
|
||||||
|
kfree(key);
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_extract_key);
|
||||||
|
|
||||||
|
void nvme_auth_free_key(struct nvme_dhchap_key *key)
|
||||||
|
{
|
||||||
|
if (!key)
|
||||||
|
return;
|
||||||
|
kfree_sensitive(key->key);
|
||||||
|
kfree(key);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_free_key);
|
||||||
|
|
||||||
|
u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn)
|
||||||
|
{
|
||||||
|
const char *hmac_name;
|
||||||
|
struct crypto_shash *key_tfm;
|
||||||
|
struct shash_desc *shash;
|
||||||
|
u8 *transformed_key;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!key || !key->key) {
|
||||||
|
pr_warn("No key specified\n");
|
||||||
|
return ERR_PTR(-ENOKEY);
|
||||||
|
}
|
||||||
|
if (key->hash == 0) {
|
||||||
|
transformed_key = kmemdup(key->key, key->len, GFP_KERNEL);
|
||||||
|
return transformed_key ? transformed_key : ERR_PTR(-ENOMEM);
|
||||||
|
}
|
||||||
|
hmac_name = nvme_auth_hmac_name(key->hash);
|
||||||
|
if (!hmac_name) {
|
||||||
|
pr_warn("Invalid key hash id %d\n", key->hash);
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
key_tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||||
|
if (IS_ERR(key_tfm))
|
||||||
|
return (u8 *)key_tfm;
|
||||||
|
|
||||||
|
shash = kmalloc(sizeof(struct shash_desc) +
|
||||||
|
crypto_shash_descsize(key_tfm),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!shash) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
transformed_key = kzalloc(crypto_shash_digestsize(key_tfm), GFP_KERNEL);
|
||||||
|
if (!transformed_key) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_shash;
|
||||||
|
}
|
||||||
|
|
||||||
|
shash->tfm = key_tfm;
|
||||||
|
ret = crypto_shash_setkey(key_tfm, key->key, key->len);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_free_transformed_key;
|
||||||
|
ret = crypto_shash_init(shash);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_free_transformed_key;
|
||||||
|
ret = crypto_shash_update(shash, nqn, strlen(nqn));
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_free_transformed_key;
|
||||||
|
ret = crypto_shash_update(shash, "NVMe-over-Fabrics", 17);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_free_transformed_key;
|
||||||
|
ret = crypto_shash_final(shash, transformed_key);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_free_transformed_key;
|
||||||
|
|
||||||
|
kfree(shash);
|
||||||
|
crypto_free_shash(key_tfm);
|
||||||
|
|
||||||
|
return transformed_key;
|
||||||
|
|
||||||
|
out_free_transformed_key:
|
||||||
|
kfree_sensitive(transformed_key);
|
||||||
|
out_free_shash:
|
||||||
|
kfree(shash);
|
||||||
|
out_free_key:
|
||||||
|
crypto_free_shash(key_tfm);
|
||||||
|
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_transform_key);
|
||||||
|
|
||||||
|
static int nvme_auth_hash_skey(int hmac_id, u8 *skey, size_t skey_len, u8 *hkey)
|
||||||
|
{
|
||||||
|
const char *digest_name;
|
||||||
|
struct crypto_shash *tfm;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
digest_name = nvme_auth_digest_name(hmac_id);
|
||||||
|
if (!digest_name) {
|
||||||
|
pr_debug("%s: failed to get digest for %d\n", __func__,
|
||||||
|
hmac_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
tfm = crypto_alloc_shash(digest_name, 0, 0);
|
||||||
|
if (IS_ERR(tfm))
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
ret = crypto_shash_tfm_digest(tfm, skey, skey_len, hkey);
|
||||||
|
if (ret < 0)
|
||||||
|
pr_debug("%s: Failed to hash digest len %zu\n", __func__,
|
||||||
|
skey_len);
|
||||||
|
|
||||||
|
crypto_free_shash(tfm);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
|
||||||
|
u8 *challenge, u8 *aug, size_t hlen)
|
||||||
|
{
|
||||||
|
struct crypto_shash *tfm;
|
||||||
|
struct shash_desc *desc;
|
||||||
|
u8 *hashed_key;
|
||||||
|
const char *hmac_name;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
hashed_key = kmalloc(hlen, GFP_KERNEL);
|
||||||
|
if (!hashed_key)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
ret = nvme_auth_hash_skey(hmac_id, skey,
|
||||||
|
skey_len, hashed_key);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_free_key;
|
||||||
|
|
||||||
|
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||||
|
if (!hmac_name) {
|
||||||
|
pr_warn("%s: invalid hash algorithm %d\n",
|
||||||
|
__func__, hmac_id);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_free_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||||
|
if (IS_ERR(tfm)) {
|
||||||
|
ret = PTR_ERR(tfm);
|
||||||
|
goto out_free_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!desc) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_hash;
|
||||||
|
}
|
||||||
|
desc->tfm = tfm;
|
||||||
|
|
||||||
|
ret = crypto_shash_setkey(tfm, hashed_key, hlen);
|
||||||
|
if (ret)
|
||||||
|
goto out_free_desc;
|
||||||
|
|
||||||
|
ret = crypto_shash_init(desc);
|
||||||
|
if (ret)
|
||||||
|
goto out_free_desc;
|
||||||
|
|
||||||
|
ret = crypto_shash_update(desc, challenge, hlen);
|
||||||
|
if (ret)
|
||||||
|
goto out_free_desc;
|
||||||
|
|
||||||
|
ret = crypto_shash_final(desc, aug);
|
||||||
|
out_free_desc:
|
||||||
|
kfree_sensitive(desc);
|
||||||
|
out_free_hash:
|
||||||
|
crypto_free_shash(tfm);
|
||||||
|
out_free_key:
|
||||||
|
kfree_sensitive(hashed_key);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_augmented_challenge);
|
||||||
|
|
||||||
|
int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = crypto_kpp_set_secret(dh_tfm, NULL, 0);
|
||||||
|
if (ret)
|
||||||
|
pr_debug("failed to set private key, error %d\n", ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_gen_privkey);
|
||||||
|
|
||||||
|
int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm,
|
||||||
|
u8 *host_key, size_t host_key_len)
|
||||||
|
{
|
||||||
|
struct kpp_request *req;
|
||||||
|
struct crypto_wait wait;
|
||||||
|
struct scatterlist dst;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
req = kpp_request_alloc(dh_tfm, GFP_KERNEL);
|
||||||
|
if (!req)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
crypto_init_wait(&wait);
|
||||||
|
kpp_request_set_input(req, NULL, 0);
|
||||||
|
sg_init_one(&dst, host_key, host_key_len);
|
||||||
|
kpp_request_set_output(req, &dst, host_key_len);
|
||||||
|
kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||||
|
crypto_req_done, &wait);
|
||||||
|
|
||||||
|
ret = crypto_wait_req(crypto_kpp_generate_public_key(req), &wait);
|
||||||
|
kpp_request_free(req);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_gen_pubkey);
|
||||||
|
|
||||||
|
int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
|
||||||
|
u8 *ctrl_key, size_t ctrl_key_len,
|
||||||
|
u8 *sess_key, size_t sess_key_len)
|
||||||
|
{
|
||||||
|
struct kpp_request *req;
|
||||||
|
struct crypto_wait wait;
|
||||||
|
struct scatterlist src, dst;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
req = kpp_request_alloc(dh_tfm, GFP_KERNEL);
|
||||||
|
if (!req)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
crypto_init_wait(&wait);
|
||||||
|
sg_init_one(&src, ctrl_key, ctrl_key_len);
|
||||||
|
kpp_request_set_input(req, &src, ctrl_key_len);
|
||||||
|
sg_init_one(&dst, sess_key, sess_key_len);
|
||||||
|
kpp_request_set_output(req, &dst, sess_key_len);
|
||||||
|
kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||||
|
crypto_req_done, &wait);
|
||||||
|
|
||||||
|
ret = crypto_wait_req(crypto_kpp_compute_shared_secret(req), &wait);
|
||||||
|
|
||||||
|
kpp_request_free(req);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_gen_shared_secret);
|
||||||
|
|
||||||
|
int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
|
||||||
|
{
|
||||||
|
struct nvme_dhchap_key *key;
|
||||||
|
u8 key_hash;
|
||||||
|
|
||||||
|
if (!secret) {
|
||||||
|
*ret_key = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sscanf(secret, "DHHC-1:%hhd:%*s:", &key_hash) != 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Pass in the secret without the 'DHHC-1:XX:' prefix */
|
||||||
|
key = nvme_auth_extract_key(secret + 10, key_hash);
|
||||||
|
if (IS_ERR(key)) {
|
||||||
|
*ret_key = NULL;
|
||||||
|
return PTR_ERR(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
*ret_key = key;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
|
||||||
|
|
||||||
|
MODULE_LICENSE("GPL v2");
|
@ -92,6 +92,21 @@ config NVME_TCP
|
|||||||
|
|
||||||
If unsure, say N.
|
If unsure, say N.
|
||||||
|
|
||||||
|
config NVME_AUTH
|
||||||
|
bool "NVM Express over Fabrics In-Band Authentication"
|
||||||
|
depends on NVME_CORE
|
||||||
|
select NVME_COMMON
|
||||||
|
select CRYPTO
|
||||||
|
select CRYPTO_HMAC
|
||||||
|
select CRYPTO_SHA256
|
||||||
|
select CRYPTO_SHA512
|
||||||
|
select CRYPTO_DH
|
||||||
|
select CRYPTO_DH_RFC7919_GROUPS
|
||||||
|
help
|
||||||
|
This provides support for NVMe over Fabrics In-Band Authentication.
|
||||||
|
|
||||||
|
If unsure, say N.
|
||||||
|
|
||||||
config NVME_APPLE
|
config NVME_APPLE
|
||||||
tristate "Apple ANS2 NVM Express host driver"
|
tristate "Apple ANS2 NVM Express host driver"
|
||||||
depends on OF && BLOCK
|
depends on OF && BLOCK
|
||||||
|
@ -10,12 +10,14 @@ obj-$(CONFIG_NVME_FC) += nvme-fc.o
|
|||||||
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
|
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
|
||||||
obj-$(CONFIG_NVME_APPLE) += nvme-apple.o
|
obj-$(CONFIG_NVME_APPLE) += nvme-apple.o
|
||||||
|
|
||||||
nvme-core-y := core.o ioctl.o constants.o
|
nvme-core-y += core.o ioctl.o
|
||||||
|
nvme-core-$(CONFIG_NVME_VERBOSE_ERRORS) += constants.o
|
||||||
nvme-core-$(CONFIG_TRACING) += trace.o
|
nvme-core-$(CONFIG_TRACING) += trace.o
|
||||||
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
|
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
|
||||||
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
||||||
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
|
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
|
||||||
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
|
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
|
||||||
|
nvme-core-$(CONFIG_NVME_AUTH) += auth.o
|
||||||
|
|
||||||
nvme-y += pci.o
|
nvme-y += pci.o
|
||||||
|
|
||||||
|
@ -845,11 +845,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
|
|||||||
apple_nvme_handle_cq(&anv->adminq, true);
|
apple_nvme_handle_cq(&anv->adminq, true);
|
||||||
spin_unlock_irqrestore(&anv->lock, flags);
|
spin_unlock_irqrestore(&anv->lock, flags);
|
||||||
|
|
||||||
blk_mq_tagset_busy_iter(&anv->tagset, nvme_cancel_request, &anv->ctrl);
|
nvme_cancel_tagset(&anv->ctrl);
|
||||||
blk_mq_tagset_busy_iter(&anv->admin_tagset, nvme_cancel_request,
|
nvme_cancel_admin_tagset(&anv->ctrl);
|
||||||
&anv->ctrl);
|
|
||||||
blk_mq_tagset_wait_completed_request(&anv->tagset);
|
|
||||||
blk_mq_tagset_wait_completed_request(&anv->admin_tagset);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The driver will not be starting up queues again if shutting down so
|
* The driver will not be starting up queues again if shutting down so
|
||||||
@ -1222,6 +1219,11 @@ static void apple_nvme_async_probe(void *data, async_cookie_t cookie)
|
|||||||
nvme_put_ctrl(&anv->ctrl);
|
nvme_put_ctrl(&anv->ctrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void devm_apple_nvme_put_tag_set(void *data)
|
||||||
|
{
|
||||||
|
blk_mq_free_tag_set(data);
|
||||||
|
}
|
||||||
|
|
||||||
static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
|
static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
@ -1238,8 +1240,7 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
|
|||||||
ret = blk_mq_alloc_tag_set(&anv->admin_tagset);
|
ret = blk_mq_alloc_tag_set(&anv->admin_tagset);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
ret = devm_add_action_or_reset(anv->dev,
|
ret = devm_add_action_or_reset(anv->dev, devm_apple_nvme_put_tag_set,
|
||||||
(void (*)(void *))blk_mq_free_tag_set,
|
|
||||||
&anv->admin_tagset);
|
&anv->admin_tagset);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -1263,8 +1264,8 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
|
|||||||
ret = blk_mq_alloc_tag_set(&anv->tagset);
|
ret = blk_mq_alloc_tag_set(&anv->tagset);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
ret = devm_add_action_or_reset(
|
ret = devm_add_action_or_reset(anv->dev, devm_apple_nvme_put_tag_set,
|
||||||
anv->dev, (void (*)(void *))blk_mq_free_tag_set, &anv->tagset);
|
&anv->tagset);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -1365,6 +1366,11 @@ static int apple_nvme_attach_genpd(struct apple_nvme *anv)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void devm_apple_nvme_mempool_destroy(void *data)
|
||||||
|
{
|
||||||
|
mempool_destroy(data);
|
||||||
|
}
|
||||||
|
|
||||||
static int apple_nvme_probe(struct platform_device *pdev)
|
static int apple_nvme_probe(struct platform_device *pdev)
|
||||||
{
|
{
|
||||||
struct device *dev = &pdev->dev;
|
struct device *dev = &pdev->dev;
|
||||||
@ -1462,8 +1468,8 @@ static int apple_nvme_probe(struct platform_device *pdev)
|
|||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto put_dev;
|
goto put_dev;
|
||||||
}
|
}
|
||||||
ret = devm_add_action_or_reset(
|
ret = devm_add_action_or_reset(anv->dev,
|
||||||
anv->dev, (void (*)(void *))mempool_destroy, anv->iod_mempool);
|
devm_apple_nvme_mempool_destroy, anv->iod_mempool);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto put_dev;
|
goto put_dev;
|
||||||
|
|
||||||
|
1017
drivers/nvme/host/auth.c
Normal file
1017
drivers/nvme/host/auth.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,6 @@
|
|||||||
|
|
||||||
#include "nvme.h"
|
#include "nvme.h"
|
||||||
|
|
||||||
#ifdef CONFIG_NVME_VERBOSE_ERRORS
|
|
||||||
static const char * const nvme_ops[] = {
|
static const char * const nvme_ops[] = {
|
||||||
[nvme_cmd_flush] = "Flush",
|
[nvme_cmd_flush] = "Flush",
|
||||||
[nvme_cmd_write] = "Write",
|
[nvme_cmd_write] = "Write",
|
||||||
@ -178,6 +177,7 @@ const unsigned char *nvme_get_opcode_str(u8 opcode)
|
|||||||
return nvme_ops[opcode];
|
return nvme_ops[opcode];
|
||||||
return "Unknown";
|
return "Unknown";
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_get_opcode_str);
|
||||||
|
|
||||||
const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
||||||
{
|
{
|
||||||
@ -185,4 +185,3 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
|||||||
return nvme_admin_ops[opcode];
|
return nvme_admin_ops[opcode];
|
||||||
return "Unknown";
|
return "Unknown";
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_NVME_VERBOSE_ERRORS */
|
|
||||||
|
@ -24,12 +24,22 @@
|
|||||||
|
|
||||||
#include "nvme.h"
|
#include "nvme.h"
|
||||||
#include "fabrics.h"
|
#include "fabrics.h"
|
||||||
|
#include <linux/nvme-auth.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
#define NVME_MINORS (1U << MINORBITS)
|
#define NVME_MINORS (1U << MINORBITS)
|
||||||
|
|
||||||
|
struct nvme_ns_info {
|
||||||
|
struct nvme_ns_ids ids;
|
||||||
|
u32 nsid;
|
||||||
|
__le32 anagrpid;
|
||||||
|
bool is_shared;
|
||||||
|
bool is_readonly;
|
||||||
|
bool is_ready;
|
||||||
|
};
|
||||||
|
|
||||||
unsigned int admin_timeout = 60;
|
unsigned int admin_timeout = 60;
|
||||||
module_param(admin_timeout, uint, 0644);
|
module_param(admin_timeout, uint, 0644);
|
||||||
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
|
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
|
||||||
@ -330,6 +340,7 @@ enum nvme_disposition {
|
|||||||
COMPLETE,
|
COMPLETE,
|
||||||
RETRY,
|
RETRY,
|
||||||
FAILOVER,
|
FAILOVER,
|
||||||
|
AUTHENTICATE,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
||||||
@ -337,6 +348,9 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
|||||||
if (likely(nvme_req(req)->status == 0))
|
if (likely(nvme_req(req)->status == 0))
|
||||||
return COMPLETE;
|
return COMPLETE;
|
||||||
|
|
||||||
|
if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
|
||||||
|
return AUTHENTICATE;
|
||||||
|
|
||||||
if (blk_noretry_request(req) ||
|
if (blk_noretry_request(req) ||
|
||||||
(nvme_req(req)->status & NVME_SC_DNR) ||
|
(nvme_req(req)->status & NVME_SC_DNR) ||
|
||||||
nvme_req(req)->retries >= nvme_max_retries)
|
nvme_req(req)->retries >= nvme_max_retries)
|
||||||
@ -375,11 +389,13 @@ static inline void nvme_end_req(struct request *req)
|
|||||||
|
|
||||||
void nvme_complete_rq(struct request *req)
|
void nvme_complete_rq(struct request *req)
|
||||||
{
|
{
|
||||||
|
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
|
||||||
|
|
||||||
trace_nvme_complete_rq(req);
|
trace_nvme_complete_rq(req);
|
||||||
nvme_cleanup_cmd(req);
|
nvme_cleanup_cmd(req);
|
||||||
|
|
||||||
if (nvme_req(req)->ctrl->kas)
|
if (ctrl->kas)
|
||||||
nvme_req(req)->ctrl->comp_seen = true;
|
ctrl->comp_seen = true;
|
||||||
|
|
||||||
switch (nvme_decide_disposition(req)) {
|
switch (nvme_decide_disposition(req)) {
|
||||||
case COMPLETE:
|
case COMPLETE:
|
||||||
@ -391,6 +407,14 @@ void nvme_complete_rq(struct request *req)
|
|||||||
case FAILOVER:
|
case FAILOVER:
|
||||||
nvme_failover_req(req);
|
nvme_failover_req(req);
|
||||||
return;
|
return;
|
||||||
|
case AUTHENTICATE:
|
||||||
|
#ifdef CONFIG_NVME_AUTH
|
||||||
|
queue_work(nvme_wq, &ctrl->dhchap_auth_work);
|
||||||
|
nvme_retry_req(req);
|
||||||
|
#else
|
||||||
|
nvme_end_req(req);
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nvme_complete_rq);
|
EXPORT_SYMBOL_GPL(nvme_complete_rq);
|
||||||
@ -702,7 +726,9 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
|
|||||||
switch (ctrl->state) {
|
switch (ctrl->state) {
|
||||||
case NVME_CTRL_CONNECTING:
|
case NVME_CTRL_CONNECTING:
|
||||||
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
|
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
|
||||||
req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
|
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
|
||||||
|
req->cmd->fabrics.fctype == nvme_fabrics_type_auth_send ||
|
||||||
|
req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive))
|
||||||
return true;
|
return true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -990,8 +1016,7 @@ static int nvme_execute_rq(struct request *rq, bool at_head)
|
|||||||
*/
|
*/
|
||||||
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||||
union nvme_result *result, void *buffer, unsigned bufflen,
|
union nvme_result *result, void *buffer, unsigned bufflen,
|
||||||
unsigned timeout, int qid, int at_head,
|
int qid, int at_head, blk_mq_req_flags_t flags)
|
||||||
blk_mq_req_flags_t flags)
|
|
||||||
{
|
{
|
||||||
struct request *req;
|
struct request *req;
|
||||||
int ret;
|
int ret;
|
||||||
@ -1000,15 +1025,12 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
|||||||
req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
|
req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
|
||||||
else
|
else
|
||||||
req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
|
req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
|
||||||
qid ? qid - 1 : 0);
|
qid - 1);
|
||||||
|
|
||||||
if (IS_ERR(req))
|
if (IS_ERR(req))
|
||||||
return PTR_ERR(req);
|
return PTR_ERR(req);
|
||||||
nvme_init_request(req, cmd);
|
nvme_init_request(req, cmd);
|
||||||
|
|
||||||
if (timeout)
|
|
||||||
req->timeout = timeout;
|
|
||||||
|
|
||||||
if (buffer && bufflen) {
|
if (buffer && bufflen) {
|
||||||
ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
|
ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1028,7 +1050,7 @@ EXPORT_SYMBOL_GPL(__nvme_submit_sync_cmd);
|
|||||||
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||||
void *buffer, unsigned bufflen)
|
void *buffer, unsigned bufflen)
|
||||||
{
|
{
|
||||||
return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0,
|
return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen,
|
||||||
NVME_QID_ANY, 0, 0);
|
NVME_QID_ANY, 0, 0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
|
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
|
||||||
@ -1329,8 +1351,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl,
|
||||||
struct nvme_ns_ids *ids)
|
struct nvme_ns_info *info)
|
||||||
{
|
{
|
||||||
struct nvme_command c = { };
|
struct nvme_command c = { };
|
||||||
bool csi_seen = false;
|
bool csi_seen = false;
|
||||||
@ -1343,7 +1365,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
c.identify.opcode = nvme_admin_identify;
|
c.identify.opcode = nvme_admin_identify;
|
||||||
c.identify.nsid = cpu_to_le32(nsid);
|
c.identify.nsid = cpu_to_le32(info->nsid);
|
||||||
c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
|
c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
|
||||||
|
|
||||||
data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
|
data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
|
||||||
@ -1355,7 +1377,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
if (status) {
|
if (status) {
|
||||||
dev_warn(ctrl->device,
|
dev_warn(ctrl->device,
|
||||||
"Identify Descriptors failed (nsid=%u, status=0x%x)\n",
|
"Identify Descriptors failed (nsid=%u, status=0x%x)\n",
|
||||||
nsid, status);
|
info->nsid, status);
|
||||||
goto free_data;
|
goto free_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1365,7 +1387,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
if (cur->nidl == 0)
|
if (cur->nidl == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
len = nvme_process_ns_desc(ctrl, ids, cur, &csi_seen);
|
len = nvme_process_ns_desc(ctrl, &info->ids, cur, &csi_seen);
|
||||||
if (len < 0)
|
if (len < 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -1374,7 +1396,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
|
|
||||||
if (nvme_multi_css(ctrl) && !csi_seen) {
|
if (nvme_multi_css(ctrl) && !csi_seen) {
|
||||||
dev_warn(ctrl->device, "Command set not reported for nsid:%d\n",
|
dev_warn(ctrl->device, "Command set not reported for nsid:%d\n",
|
||||||
nsid);
|
info->nsid);
|
||||||
status = -EINVAL;
|
status = -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1384,7 +1406,7 @@ free_data:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||||
struct nvme_ns_ids *ids, struct nvme_id_ns **id)
|
struct nvme_id_ns **id)
|
||||||
{
|
{
|
||||||
struct nvme_command c = { };
|
struct nvme_command c = { };
|
||||||
int error;
|
int error;
|
||||||
@ -1407,20 +1429,6 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
error = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
error = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||||
if ((*id)->ncap == 0) /* namespace not allocated or attached */
|
if ((*id)->ncap == 0) /* namespace not allocated or attached */
|
||||||
goto out_free_id;
|
goto out_free_id;
|
||||||
|
|
||||||
|
|
||||||
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
|
|
||||||
dev_info(ctrl->device,
|
|
||||||
"Ignoring bogus Namespace Identifiers\n");
|
|
||||||
} else {
|
|
||||||
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
|
|
||||||
!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
|
|
||||||
memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
|
|
||||||
if (ctrl->vs >= NVME_VS(1, 2, 0) &&
|
|
||||||
!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
|
|
||||||
memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_free_id:
|
out_free_id:
|
||||||
@ -1428,30 +1436,59 @@ out_free_id:
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
|
static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
|
||||||
struct nvme_id_ns_cs_indep **id)
|
struct nvme_ns_info *info)
|
||||||
{
|
{
|
||||||
|
struct nvme_ns_ids *ids = &info->ids;
|
||||||
|
struct nvme_id_ns *id;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = nvme_identify_ns(ctrl, info->nsid, &id);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
info->anagrpid = id->anagrpid;
|
||||||
|
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
|
||||||
|
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
|
||||||
|
info->is_ready = true;
|
||||||
|
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
|
||||||
|
dev_info(ctrl->device,
|
||||||
|
"Ignoring bogus Namespace Identifiers\n");
|
||||||
|
} else {
|
||||||
|
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
|
||||||
|
!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
|
||||||
|
memcpy(ids->eui64, id->eui64, sizeof(ids->eui64));
|
||||||
|
if (ctrl->vs >= NVME_VS(1, 2, 0) &&
|
||||||
|
!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
|
||||||
|
memcpy(ids->nguid, id->nguid, sizeof(ids->nguid));
|
||||||
|
}
|
||||||
|
kfree(id);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
|
||||||
|
struct nvme_ns_info *info)
|
||||||
|
{
|
||||||
|
struct nvme_id_ns_cs_indep *id;
|
||||||
struct nvme_command c = {
|
struct nvme_command c = {
|
||||||
.identify.opcode = nvme_admin_identify,
|
.identify.opcode = nvme_admin_identify,
|
||||||
.identify.nsid = cpu_to_le32(nsid),
|
.identify.nsid = cpu_to_le32(info->nsid),
|
||||||
.identify.cns = NVME_ID_CNS_NS_CS_INDEP,
|
.identify.cns = NVME_ID_CNS_NS_CS_INDEP,
|
||||||
};
|
};
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
*id = kmalloc(sizeof(**id), GFP_KERNEL);
|
id = kmalloc(sizeof(*id), GFP_KERNEL);
|
||||||
if (!*id)
|
if (!id)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
|
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
|
||||||
if (ret) {
|
if (!ret) {
|
||||||
dev_warn(ctrl->device,
|
info->anagrpid = id->anagrpid;
|
||||||
"Identify namespace (CS independent) failed (%d)\n",
|
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
|
||||||
ret);
|
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
|
||||||
kfree(*id);
|
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
kfree(id);
|
||||||
return 0;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
|
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
|
||||||
@ -1466,7 +1503,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
|
|||||||
c.features.dword11 = cpu_to_le32(dword11);
|
c.features.dword11 = cpu_to_le32(dword11);
|
||||||
|
|
||||||
ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
|
ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
|
||||||
buffer, buflen, 0, NVME_QID_ANY, 0, 0);
|
buffer, buflen, NVME_QID_ANY, 0, 0);
|
||||||
if (ret >= 0 && result)
|
if (ret >= 0 && result)
|
||||||
*result = le32_to_cpu(res.u32);
|
*result = le32_to_cpu(res.u32);
|
||||||
return ret;
|
return ret;
|
||||||
@ -1875,6 +1912,11 @@ static void nvme_update_disk_info(struct gendisk *disk,
|
|||||||
ns->ctrl->max_zeroes_sectors);
|
ns->ctrl->max_zeroes_sectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||||
|
{
|
||||||
|
return info->is_readonly || test_bit(NVME_NS_FORCE_RO, &ns->flags);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool nvme_first_scan(struct gendisk *disk)
|
static inline bool nvme_first_scan(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
/* nvme_alloc_ns() scans the disk prior to adding it */
|
/* nvme_alloc_ns() scans the disk prior to adding it */
|
||||||
@ -1912,12 +1954,44 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
|
|||||||
blk_queue_chunk_sectors(ns->queue, iob);
|
blk_queue_chunk_sectors(ns->queue, iob);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
|
static int nvme_update_ns_info_generic(struct nvme_ns *ns,
|
||||||
|
struct nvme_ns_info *info)
|
||||||
{
|
{
|
||||||
unsigned lbaf = nvme_lbaf_index(id->flbas);
|
blk_mq_freeze_queue(ns->disk->queue);
|
||||||
|
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
||||||
|
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
|
||||||
|
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||||
|
|
||||||
|
if (nvme_ns_head_multipath(ns->head)) {
|
||||||
|
blk_mq_freeze_queue(ns->head->disk->queue);
|
||||||
|
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
|
||||||
|
nvme_mpath_revalidate_paths(ns);
|
||||||
|
blk_stack_limits(&ns->head->disk->queue->limits,
|
||||||
|
&ns->queue->limits, 0);
|
||||||
|
ns->head->disk->flags |= GENHD_FL_HIDDEN;
|
||||||
|
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Hide the block-interface for these devices */
|
||||||
|
ns->disk->flags |= GENHD_FL_HIDDEN;
|
||||||
|
set_bit(NVME_NS_READY, &ns->flags);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||||
|
struct nvme_ns_info *info)
|
||||||
|
{
|
||||||
|
struct nvme_id_ns *id;
|
||||||
|
unsigned lbaf;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
ret = nvme_identify_ns(ns->ctrl, info->nsid, &id);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
blk_mq_freeze_queue(ns->disk->queue);
|
blk_mq_freeze_queue(ns->disk->queue);
|
||||||
|
lbaf = nvme_lbaf_index(id->flbas);
|
||||||
ns->lba_shift = id->lbaf[lbaf].ds;
|
ns->lba_shift = id->lbaf[lbaf].ds;
|
||||||
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
||||||
|
|
||||||
@ -1927,36 +2001,35 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
|
|||||||
|
|
||||||
if (ns->head->ids.csi == NVME_CSI_ZNS) {
|
if (ns->head->ids.csi == NVME_CSI_ZNS) {
|
||||||
ret = nvme_update_zone_info(ns, lbaf);
|
ret = nvme_update_zone_info(ns, lbaf);
|
||||||
if (ret)
|
if (ret) {
|
||||||
goto out_unfreeze;
|
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) ||
|
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
|
||||||
test_bit(NVME_NS_FORCE_RO, &ns->flags));
|
|
||||||
set_bit(NVME_NS_READY, &ns->flags);
|
set_bit(NVME_NS_READY, &ns->flags);
|
||||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||||
|
|
||||||
if (blk_queue_is_zoned(ns->queue)) {
|
if (blk_queue_is_zoned(ns->queue)) {
|
||||||
ret = nvme_revalidate_zones(ns);
|
ret = nvme_revalidate_zones(ns);
|
||||||
if (ret && !nvme_first_scan(ns->disk))
|
if (ret && !nvme_first_scan(ns->disk))
|
||||||
return ret;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nvme_ns_head_multipath(ns->head)) {
|
if (nvme_ns_head_multipath(ns->head)) {
|
||||||
blk_mq_freeze_queue(ns->head->disk->queue);
|
blk_mq_freeze_queue(ns->head->disk->queue);
|
||||||
nvme_update_disk_info(ns->head->disk, ns, id);
|
nvme_update_disk_info(ns->head->disk, ns, id);
|
||||||
set_disk_ro(ns->head->disk,
|
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
|
||||||
(id->nsattr & NVME_NS_ATTR_RO) ||
|
|
||||||
test_bit(NVME_NS_FORCE_RO, &ns->flags));
|
|
||||||
nvme_mpath_revalidate_paths(ns);
|
nvme_mpath_revalidate_paths(ns);
|
||||||
blk_stack_limits(&ns->head->disk->queue->limits,
|
blk_stack_limits(&ns->head->disk->queue->limits,
|
||||||
&ns->queue->limits, 0);
|
&ns->queue->limits, 0);
|
||||||
disk_update_readahead(ns->head->disk);
|
disk_update_readahead(ns->head->disk);
|
||||||
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
|
|
||||||
out_unfreeze:
|
ret = 0;
|
||||||
|
out:
|
||||||
/*
|
/*
|
||||||
* If probing fails due an unsupported feature, hide the block device,
|
* If probing fails due an unsupported feature, hide the block device,
|
||||||
* but still allow other access.
|
* but still allow other access.
|
||||||
@ -1966,10 +2039,31 @@ out_unfreeze:
|
|||||||
set_bit(NVME_NS_READY, &ns->flags);
|
set_bit(NVME_NS_READY, &ns->flags);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
kfree(id);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||||
|
{
|
||||||
|
switch (info->ids.csi) {
|
||||||
|
case NVME_CSI_ZNS:
|
||||||
|
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||||
|
dev_info(ns->ctrl->device,
|
||||||
|
"block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
|
||||||
|
info->nsid);
|
||||||
|
return nvme_update_ns_info_generic(ns, info);
|
||||||
|
}
|
||||||
|
return nvme_update_ns_info_block(ns, info);
|
||||||
|
case NVME_CSI_NVM:
|
||||||
|
return nvme_update_ns_info_block(ns, info);
|
||||||
|
default:
|
||||||
|
dev_info(ns->ctrl->device,
|
||||||
|
"block device for nsid %u not supported (csi %u)\n",
|
||||||
|
info->nsid, info->ids.csi);
|
||||||
|
return nvme_update_ns_info_generic(ns, info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static char nvme_pr_type(enum pr_type type)
|
static char nvme_pr_type(enum pr_type type)
|
||||||
{
|
{
|
||||||
switch (type) {
|
switch (type) {
|
||||||
@ -2103,7 +2197,7 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
|
|||||||
cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
|
cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
|
||||||
cmd.common.cdw11 = cpu_to_le32(len);
|
cmd.common.cdw11 = cpu_to_le32(len);
|
||||||
|
|
||||||
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, 0,
|
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
|
||||||
NVME_QID_ANY, 1, 0);
|
NVME_QID_ANY, 1, 0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nvme_sec_submit);
|
EXPORT_SYMBOL_GPL(nvme_sec_submit);
|
||||||
@ -2123,6 +2217,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector,
|
|||||||
static const struct block_device_operations nvme_bdev_ops = {
|
static const struct block_device_operations nvme_bdev_ops = {
|
||||||
.owner = THIS_MODULE,
|
.owner = THIS_MODULE,
|
||||||
.ioctl = nvme_ioctl,
|
.ioctl = nvme_ioctl,
|
||||||
|
.compat_ioctl = blkdev_compat_ptr_ioctl,
|
||||||
.open = nvme_open,
|
.open = nvme_open,
|
||||||
.release = nvme_release,
|
.release = nvme_release,
|
||||||
.getgeo = nvme_getgeo,
|
.getgeo = nvme_getgeo,
|
||||||
@ -3613,6 +3708,108 @@ static ssize_t dctype_show(struct device *dev,
|
|||||||
}
|
}
|
||||||
static DEVICE_ATTR_RO(dctype);
|
static DEVICE_ATTR_RO(dctype);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_AUTH
|
||||||
|
static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||||
|
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||||
|
|
||||||
|
if (!opts->dhchap_secret)
|
||||||
|
return sysfs_emit(buf, "none\n");
|
||||||
|
return sysfs_emit(buf, "%s\n", opts->dhchap_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
|
||||||
|
struct device_attribute *attr, const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||||
|
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||||
|
char *dhchap_secret;
|
||||||
|
|
||||||
|
if (!ctrl->opts->dhchap_secret)
|
||||||
|
return -EINVAL;
|
||||||
|
if (count < 7)
|
||||||
|
return -EINVAL;
|
||||||
|
if (memcmp(buf, "DHHC-1:", 7))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
|
||||||
|
if (!dhchap_secret)
|
||||||
|
return -ENOMEM;
|
||||||
|
memcpy(dhchap_secret, buf, count);
|
||||||
|
nvme_auth_stop(ctrl);
|
||||||
|
if (strcmp(dhchap_secret, opts->dhchap_secret)) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = nvme_auth_generate_key(dhchap_secret, &ctrl->host_key);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
kfree(opts->dhchap_secret);
|
||||||
|
opts->dhchap_secret = dhchap_secret;
|
||||||
|
/* Key has changed; re-authentication with new key */
|
||||||
|
nvme_auth_reset(ctrl);
|
||||||
|
}
|
||||||
|
/* Start re-authentication */
|
||||||
|
dev_info(ctrl->device, "re-authenticating controller\n");
|
||||||
|
queue_work(nvme_wq, &ctrl->dhchap_auth_work);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
static DEVICE_ATTR(dhchap_secret, S_IRUGO | S_IWUSR,
|
||||||
|
nvme_ctrl_dhchap_secret_show, nvme_ctrl_dhchap_secret_store);
|
||||||
|
|
||||||
|
static ssize_t nvme_ctrl_dhchap_ctrl_secret_show(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||||
|
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||||
|
|
||||||
|
if (!opts->dhchap_ctrl_secret)
|
||||||
|
return sysfs_emit(buf, "none\n");
|
||||||
|
return sysfs_emit(buf, "%s\n", opts->dhchap_ctrl_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
|
||||||
|
struct device_attribute *attr, const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||||
|
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||||
|
char *dhchap_secret;
|
||||||
|
|
||||||
|
if (!ctrl->opts->dhchap_ctrl_secret)
|
||||||
|
return -EINVAL;
|
||||||
|
if (count < 7)
|
||||||
|
return -EINVAL;
|
||||||
|
if (memcmp(buf, "DHHC-1:", 7))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
|
||||||
|
if (!dhchap_secret)
|
||||||
|
return -ENOMEM;
|
||||||
|
memcpy(dhchap_secret, buf, count);
|
||||||
|
nvme_auth_stop(ctrl);
|
||||||
|
if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = nvme_auth_generate_key(dhchap_secret, &ctrl->ctrl_key);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
kfree(opts->dhchap_ctrl_secret);
|
||||||
|
opts->dhchap_ctrl_secret = dhchap_secret;
|
||||||
|
/* Key has changed; re-authentication with new key */
|
||||||
|
nvme_auth_reset(ctrl);
|
||||||
|
}
|
||||||
|
/* Start re-authentication */
|
||||||
|
dev_info(ctrl->device, "re-authenticating controller\n");
|
||||||
|
queue_work(nvme_wq, &ctrl->dhchap_auth_work);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR,
|
||||||
|
nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store);
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct attribute *nvme_dev_attrs[] = {
|
static struct attribute *nvme_dev_attrs[] = {
|
||||||
&dev_attr_reset_controller.attr,
|
&dev_attr_reset_controller.attr,
|
||||||
&dev_attr_rescan_controller.attr,
|
&dev_attr_rescan_controller.attr,
|
||||||
@ -3636,6 +3833,10 @@ static struct attribute *nvme_dev_attrs[] = {
|
|||||||
&dev_attr_kato.attr,
|
&dev_attr_kato.attr,
|
||||||
&dev_attr_cntrltype.attr,
|
&dev_attr_cntrltype.attr,
|
||||||
&dev_attr_dctype.attr,
|
&dev_attr_dctype.attr,
|
||||||
|
#ifdef CONFIG_NVME_AUTH
|
||||||
|
&dev_attr_dhchap_secret.attr,
|
||||||
|
&dev_attr_dhchap_ctrl_secret.attr,
|
||||||
|
#endif
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3659,6 +3860,12 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
|
|||||||
return 0;
|
return 0;
|
||||||
if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts)
|
if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts)
|
||||||
return 0;
|
return 0;
|
||||||
|
#ifdef CONFIG_NVME_AUTH
|
||||||
|
if (a == &dev_attr_dhchap_secret.attr && !ctrl->opts)
|
||||||
|
return 0;
|
||||||
|
if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts)
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
return a->mode;
|
return a->mode;
|
||||||
}
|
}
|
||||||
@ -3786,7 +3993,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
|
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
|
||||||
unsigned nsid, struct nvme_ns_ids *ids, bool is_shared)
|
struct nvme_ns_info *info)
|
||||||
{
|
{
|
||||||
struct nvme_ns_head *head;
|
struct nvme_ns_head *head;
|
||||||
size_t size = sizeof(*head);
|
size_t size = sizeof(*head);
|
||||||
@ -3808,9 +4015,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto out_ida_remove;
|
goto out_ida_remove;
|
||||||
head->subsys = ctrl->subsys;
|
head->subsys = ctrl->subsys;
|
||||||
head->ns_id = nsid;
|
head->ns_id = info->nsid;
|
||||||
head->ids = *ids;
|
head->ids = info->ids;
|
||||||
head->shared = is_shared;
|
head->shared = info->is_shared;
|
||||||
kref_init(&head->ref);
|
kref_init(&head->ref);
|
||||||
|
|
||||||
if (head->ids.csi) {
|
if (head->ids.csi) {
|
||||||
@ -3867,54 +4074,54 @@ static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
|
static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||||
struct nvme_ns_ids *ids, bool is_shared)
|
|
||||||
{
|
{
|
||||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||||
struct nvme_ns_head *head = NULL;
|
struct nvme_ns_head *head = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = nvme_global_check_duplicate_ids(ctrl->subsys, ids);
|
ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
"globally duplicate IDs for nsid %d\n", nsid);
|
"globally duplicate IDs for nsid %d\n", info->nsid);
|
||||||
nvme_print_device_info(ctrl);
|
nvme_print_device_info(ctrl);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&ctrl->subsys->lock);
|
mutex_lock(&ctrl->subsys->lock);
|
||||||
head = nvme_find_ns_head(ctrl, nsid);
|
head = nvme_find_ns_head(ctrl, info->nsid);
|
||||||
if (!head) {
|
if (!head) {
|
||||||
ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids);
|
ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &info->ids);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
"duplicate IDs in subsystem for nsid %d\n",
|
"duplicate IDs in subsystem for nsid %d\n",
|
||||||
nsid);
|
info->nsid);
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared);
|
head = nvme_alloc_ns_head(ctrl, info);
|
||||||
if (IS_ERR(head)) {
|
if (IS_ERR(head)) {
|
||||||
ret = PTR_ERR(head);
|
ret = PTR_ERR(head);
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
if (!is_shared || !head->shared) {
|
if (!info->is_shared || !head->shared) {
|
||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
"Duplicate unshared namespace %d\n", nsid);
|
"Duplicate unshared namespace %d\n",
|
||||||
|
info->nsid);
|
||||||
goto out_put_ns_head;
|
goto out_put_ns_head;
|
||||||
}
|
}
|
||||||
if (!nvme_ns_ids_equal(&head->ids, ids)) {
|
if (!nvme_ns_ids_equal(&head->ids, &info->ids)) {
|
||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
"IDs don't match for shared namespace %d\n",
|
"IDs don't match for shared namespace %d\n",
|
||||||
nsid);
|
info->nsid);
|
||||||
goto out_put_ns_head;
|
goto out_put_ns_head;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!multipath && !list_empty(&head->list)) {
|
if (!multipath && !list_empty(&head->list)) {
|
||||||
dev_warn(ctrl->device,
|
dev_warn(ctrl->device,
|
||||||
"Found shared namespace %d, but multipathing not supported.\n",
|
"Found shared namespace %d, but multipathing not supported.\n",
|
||||||
nsid);
|
info->nsid);
|
||||||
dev_warn_once(ctrl->device,
|
dev_warn_once(ctrl->device,
|
||||||
"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n.");
|
"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n.");
|
||||||
}
|
}
|
||||||
@ -3968,20 +4175,15 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
|
|||||||
list_add(&ns->list, &ns->ctrl->namespaces);
|
list_add(&ns->list, &ns->ctrl->namespaces);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
|
||||||
struct nvme_ns_ids *ids)
|
|
||||||
{
|
{
|
||||||
struct nvme_ns *ns;
|
struct nvme_ns *ns;
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
struct nvme_id_ns *id;
|
|
||||||
int node = ctrl->numa_node;
|
int node = ctrl->numa_node;
|
||||||
|
|
||||||
if (nvme_identify_ns(ctrl, nsid, ids, &id))
|
|
||||||
return;
|
|
||||||
|
|
||||||
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
|
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
|
||||||
if (!ns)
|
if (!ns)
|
||||||
goto out_free_id;
|
return;
|
||||||
|
|
||||||
disk = blk_mq_alloc_disk(ctrl->tagset, ns);
|
disk = blk_mq_alloc_disk(ctrl->tagset, ns);
|
||||||
if (IS_ERR(disk))
|
if (IS_ERR(disk))
|
||||||
@ -4002,7 +4204,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
ns->ctrl = ctrl;
|
ns->ctrl = ctrl;
|
||||||
kref_init(&ns->kref);
|
kref_init(&ns->kref);
|
||||||
|
|
||||||
if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED))
|
if (nvme_init_ns_head(ns, info))
|
||||||
goto out_cleanup_disk;
|
goto out_cleanup_disk;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -4028,7 +4230,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
ns->head->instance);
|
ns->head->instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nvme_update_ns_info(ns, id))
|
if (nvme_update_ns_info(ns, info))
|
||||||
goto out_unlink_ns;
|
goto out_unlink_ns;
|
||||||
|
|
||||||
down_write(&ctrl->namespaces_rwsem);
|
down_write(&ctrl->namespaces_rwsem);
|
||||||
@ -4042,9 +4244,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
if (!nvme_ns_head_multipath(ns->head))
|
if (!nvme_ns_head_multipath(ns->head))
|
||||||
nvme_add_ns_cdev(ns);
|
nvme_add_ns_cdev(ns);
|
||||||
|
|
||||||
nvme_mpath_add_disk(ns, id);
|
nvme_mpath_add_disk(ns, info->anagrpid);
|
||||||
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
|
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
|
||||||
kfree(id);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -4064,8 +4265,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||||||
put_disk(disk);
|
put_disk(disk);
|
||||||
out_free_ns:
|
out_free_ns:
|
||||||
kfree(ns);
|
kfree(ns);
|
||||||
out_free_id:
|
|
||||||
kfree(id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_ns_remove(struct nvme_ns *ns)
|
static void nvme_ns_remove(struct nvme_ns *ns)
|
||||||
@ -4123,29 +4322,21 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
|
static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||||
{
|
{
|
||||||
struct nvme_id_ns *id;
|
|
||||||
int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||||
|
|
||||||
if (test_bit(NVME_NS_DEAD, &ns->flags))
|
if (test_bit(NVME_NS_DEAD, &ns->flags))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, ids, &id);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||||
if (!nvme_ns_ids_equal(&ns->head->ids, ids)) {
|
if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
|
||||||
dev_err(ns->ctrl->device,
|
dev_err(ns->ctrl->device,
|
||||||
"identifiers changed for nsid %d\n", ns->head->ns_id);
|
"identifiers changed for nsid %d\n", ns->head->ns_id);
|
||||||
goto out_free_id;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = nvme_update_ns_info(ns, id);
|
ret = nvme_update_ns_info(ns, info);
|
||||||
|
|
||||||
out_free_id:
|
|
||||||
kfree(id);
|
|
||||||
out:
|
out:
|
||||||
/*
|
/*
|
||||||
* Only remove the namespace if we got a fatal error back from the
|
* Only remove the namespace if we got a fatal error back from the
|
||||||
@ -4157,59 +4348,47 @@ out:
|
|||||||
nvme_ns_remove(ns);
|
nvme_ns_remove(ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||||
{
|
{
|
||||||
struct nvme_ns_ids ids = { };
|
struct nvme_ns_info info = { .nsid = nsid };
|
||||||
struct nvme_id_ns_cs_indep *id;
|
|
||||||
struct nvme_ns *ns;
|
struct nvme_ns *ns;
|
||||||
bool ready = true;
|
|
||||||
|
|
||||||
if (nvme_identify_ns_descs(ctrl, nsid, &ids))
|
if (nvme_identify_ns_descs(ctrl, &info))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
if (info.ids.csi != NVME_CSI_NVM && !nvme_multi_css(ctrl)) {
|
||||||
* Check if the namespace is ready. If not ignore it, we will get an
|
dev_warn(ctrl->device,
|
||||||
* AEN once it becomes ready and restart the scan.
|
"command set not reported for nsid: %d\n", nsid);
|
||||||
*/
|
return;
|
||||||
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
|
|
||||||
!nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
|
|
||||||
ready = id->nstat & NVME_NSTAT_NRDY;
|
|
||||||
kfree(id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ready)
|
/*
|
||||||
|
* If available try to use the Command Set Idependent Identify Namespace
|
||||||
|
* data structure to find all the generic information that is needed to
|
||||||
|
* set up a namespace. If not fall back to the legacy version.
|
||||||
|
*/
|
||||||
|
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
|
||||||
|
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS)) {
|
||||||
|
if (nvme_ns_info_from_id_cs_indep(ctrl, &info))
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
if (nvme_ns_info_from_identify(ctrl, &info))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ignore the namespace if it is not ready. We will get an AEN once it
|
||||||
|
* becomes ready and restart the scan.
|
||||||
|
*/
|
||||||
|
if (!info.is_ready)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
ns = nvme_find_get_ns(ctrl, nsid);
|
ns = nvme_find_get_ns(ctrl, nsid);
|
||||||
if (ns) {
|
if (ns) {
|
||||||
nvme_validate_ns(ns, &ids);
|
nvme_validate_ns(ns, &info);
|
||||||
nvme_put_ns(ns);
|
nvme_put_ns(ns);
|
||||||
return;
|
} else {
|
||||||
}
|
nvme_alloc_ns(ctrl, &info);
|
||||||
|
|
||||||
switch (ids.csi) {
|
|
||||||
case NVME_CSI_NVM:
|
|
||||||
nvme_alloc_ns(ctrl, nsid, &ids);
|
|
||||||
break;
|
|
||||||
case NVME_CSI_ZNS:
|
|
||||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
|
||||||
dev_warn(ctrl->device,
|
|
||||||
"nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
|
|
||||||
nsid);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!nvme_multi_css(ctrl)) {
|
|
||||||
dev_warn(ctrl->device,
|
|
||||||
"command set not reported for nsid: %d\n",
|
|
||||||
nsid);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
nvme_alloc_ns(ctrl, nsid, &ids);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
dev_warn(ctrl->device, "unknown csi %u for nsid %u\n",
|
|
||||||
ids.csi, nsid);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4265,7 +4444,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
|
|||||||
|
|
||||||
if (!nsid) /* end of the list? */
|
if (!nsid) /* end of the list? */
|
||||||
goto out;
|
goto out;
|
||||||
nvme_validate_or_alloc_ns(ctrl, nsid);
|
nvme_scan_ns(ctrl, nsid);
|
||||||
while (++prev < nsid)
|
while (++prev < nsid)
|
||||||
nvme_ns_remove_by_nsid(ctrl, prev);
|
nvme_ns_remove_by_nsid(ctrl, prev);
|
||||||
}
|
}
|
||||||
@ -4288,7 +4467,7 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
|
|||||||
kfree(id);
|
kfree(id);
|
||||||
|
|
||||||
for (i = 1; i <= nn; i++)
|
for (i = 1; i <= nn; i++)
|
||||||
nvme_validate_or_alloc_ns(ctrl, i);
|
nvme_scan_ns(ctrl, i);
|
||||||
|
|
||||||
nvme_remove_invalid_namespaces(ctrl, nn);
|
nvme_remove_invalid_namespaces(ctrl, nn);
|
||||||
}
|
}
|
||||||
@ -4525,9 +4704,19 @@ static void nvme_fw_act_work(struct work_struct *work)
|
|||||||
nvme_get_fw_slot_info(ctrl);
|
nvme_get_fw_slot_info(ctrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 nvme_aer_type(u32 result)
|
||||||
|
{
|
||||||
|
return result & 0x7;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 nvme_aer_subtype(u32 result)
|
||||||
|
{
|
||||||
|
return (result & 0xff00) >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||||
{
|
{
|
||||||
u32 aer_notice_type = (result & 0xff00) >> 8;
|
u32 aer_notice_type = nvme_aer_subtype(result);
|
||||||
|
|
||||||
trace_nvme_async_event(ctrl, aer_notice_type);
|
trace_nvme_async_event(ctrl, aer_notice_type);
|
||||||
|
|
||||||
@ -4542,8 +4731,10 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
|||||||
* recovery actions from interfering with the controller's
|
* recovery actions from interfering with the controller's
|
||||||
* firmware activation.
|
* firmware activation.
|
||||||
*/
|
*/
|
||||||
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
|
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
|
||||||
|
nvme_auth_stop(ctrl);
|
||||||
queue_work(nvme_wq, &ctrl->fw_act_work);
|
queue_work(nvme_wq, &ctrl->fw_act_work);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
#ifdef CONFIG_NVME_MULTIPATH
|
#ifdef CONFIG_NVME_MULTIPATH
|
||||||
case NVME_AER_NOTICE_ANA:
|
case NVME_AER_NOTICE_ANA:
|
||||||
@ -4560,11 +4751,19 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
trace_nvme_async_event(ctrl, NVME_AER_ERROR);
|
||||||
|
dev_warn(ctrl->device, "resetting controller due to AER\n");
|
||||||
|
nvme_reset_ctrl(ctrl);
|
||||||
|
}
|
||||||
|
|
||||||
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
||||||
volatile union nvme_result *res)
|
volatile union nvme_result *res)
|
||||||
{
|
{
|
||||||
u32 result = le32_to_cpu(res->u32);
|
u32 result = le32_to_cpu(res->u32);
|
||||||
u32 aer_type = result & 0x07;
|
u32 aer_type = nvme_aer_type(result);
|
||||||
|
u32 aer_subtype = nvme_aer_subtype(result);
|
||||||
|
|
||||||
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
|
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
|
||||||
return;
|
return;
|
||||||
@ -4574,6 +4773,15 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
|||||||
nvme_handle_aen_notice(ctrl, result);
|
nvme_handle_aen_notice(ctrl, result);
|
||||||
break;
|
break;
|
||||||
case NVME_AER_ERROR:
|
case NVME_AER_ERROR:
|
||||||
|
/*
|
||||||
|
* For a persistent internal error, don't run async_event_work
|
||||||
|
* to submit a new AER. The controller reset will do it.
|
||||||
|
*/
|
||||||
|
if (aer_subtype == NVME_AER_ERROR_PERSIST_INT_ERR) {
|
||||||
|
nvme_handle_aer_persistent_error(ctrl);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fallthrough;
|
||||||
case NVME_AER_SMART:
|
case NVME_AER_SMART:
|
||||||
case NVME_AER_CSS:
|
case NVME_AER_CSS:
|
||||||
case NVME_AER_VS:
|
case NVME_AER_VS:
|
||||||
@ -4590,6 +4798,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
|
|||||||
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
|
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
|
||||||
{
|
{
|
||||||
nvme_mpath_stop(ctrl);
|
nvme_mpath_stop(ctrl);
|
||||||
|
nvme_auth_stop(ctrl);
|
||||||
nvme_stop_keep_alive(ctrl);
|
nvme_stop_keep_alive(ctrl);
|
||||||
nvme_stop_failfast_work(ctrl);
|
nvme_stop_failfast_work(ctrl);
|
||||||
flush_work(&ctrl->async_event_work);
|
flush_work(&ctrl->async_event_work);
|
||||||
@ -4649,6 +4858,8 @@ static void nvme_free_ctrl(struct device *dev)
|
|||||||
|
|
||||||
nvme_free_cels(ctrl);
|
nvme_free_cels(ctrl);
|
||||||
nvme_mpath_uninit(ctrl);
|
nvme_mpath_uninit(ctrl);
|
||||||
|
nvme_auth_stop(ctrl);
|
||||||
|
nvme_auth_free(ctrl);
|
||||||
__free_page(ctrl->discard_page);
|
__free_page(ctrl->discard_page);
|
||||||
|
|
||||||
if (subsys) {
|
if (subsys) {
|
||||||
@ -4739,6 +4950,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
|||||||
|
|
||||||
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
|
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
|
||||||
nvme_mpath_init_ctrl(ctrl);
|
nvme_mpath_init_ctrl(ctrl);
|
||||||
|
nvme_auth_init_ctrl(ctrl);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
out_free_name:
|
out_free_name:
|
||||||
|
@ -152,7 +152,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
|
|||||||
cmd.prop_get.fctype = nvme_fabrics_type_property_get;
|
cmd.prop_get.fctype = nvme_fabrics_type_property_get;
|
||||||
cmd.prop_get.offset = cpu_to_le32(off);
|
cmd.prop_get.offset = cpu_to_le32(off);
|
||||||
|
|
||||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
|
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
|
||||||
NVME_QID_ANY, 0, 0);
|
NVME_QID_ANY, 0, 0);
|
||||||
|
|
||||||
if (ret >= 0)
|
if (ret >= 0)
|
||||||
@ -198,7 +198,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
|
|||||||
cmd.prop_get.attrib = 1;
|
cmd.prop_get.attrib = 1;
|
||||||
cmd.prop_get.offset = cpu_to_le32(off);
|
cmd.prop_get.offset = cpu_to_le32(off);
|
||||||
|
|
||||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
|
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
|
||||||
NVME_QID_ANY, 0, 0);
|
NVME_QID_ANY, 0, 0);
|
||||||
|
|
||||||
if (ret >= 0)
|
if (ret >= 0)
|
||||||
@ -243,7 +243,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
|
|||||||
cmd.prop_set.offset = cpu_to_le32(off);
|
cmd.prop_set.offset = cpu_to_le32(off);
|
||||||
cmd.prop_set.value = cpu_to_le64(val);
|
cmd.prop_set.value = cpu_to_le64(val);
|
||||||
|
|
||||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, 0,
|
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0,
|
||||||
NVME_QID_ANY, 0, 0);
|
NVME_QID_ANY, 0, 0);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
@ -331,6 +331,10 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
|
|||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
"Connect command failed: host path error\n");
|
"Connect command failed: host path error\n");
|
||||||
break;
|
break;
|
||||||
|
case NVME_SC_AUTH_REQUIRED:
|
||||||
|
dev_err(ctrl->device,
|
||||||
|
"Connect command failed: authentication required\n");
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
"Connect command failed, error wo/DNR bit: %d\n",
|
"Connect command failed, error wo/DNR bit: %d\n",
|
||||||
@ -365,6 +369,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
|||||||
union nvme_result res;
|
union nvme_result res;
|
||||||
struct nvmf_connect_data *data;
|
struct nvmf_connect_data *data;
|
||||||
int ret;
|
int ret;
|
||||||
|
u32 result;
|
||||||
|
|
||||||
cmd.connect.opcode = nvme_fabrics_command;
|
cmd.connect.opcode = nvme_fabrics_command;
|
||||||
cmd.connect.fctype = nvme_fabrics_type_connect;
|
cmd.connect.fctype = nvme_fabrics_type_connect;
|
||||||
@ -389,7 +394,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
|||||||
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
|
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
|
||||||
|
|
||||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
|
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
|
||||||
data, sizeof(*data), 0, NVME_QID_ANY, 1,
|
data, sizeof(*data), NVME_QID_ANY, 1,
|
||||||
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
|
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
|
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
|
||||||
@ -397,8 +402,25 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
|||||||
goto out_free_data;
|
goto out_free_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctrl->cntlid = le16_to_cpu(res.u16);
|
result = le32_to_cpu(res.u32);
|
||||||
|
ctrl->cntlid = result & 0xFFFF;
|
||||||
|
if ((result >> 16) & 0x3) {
|
||||||
|
/* Authentication required */
|
||||||
|
ret = nvme_auth_negotiate(ctrl, 0);
|
||||||
|
if (ret) {
|
||||||
|
dev_warn(ctrl->device,
|
||||||
|
"qid 0: authentication setup failed\n");
|
||||||
|
ret = NVME_SC_AUTH_REQUIRED;
|
||||||
|
goto out_free_data;
|
||||||
|
}
|
||||||
|
ret = nvme_auth_wait(ctrl, 0);
|
||||||
|
if (ret)
|
||||||
|
dev_warn(ctrl->device,
|
||||||
|
"qid 0: authentication failed\n");
|
||||||
|
else
|
||||||
|
dev_info(ctrl->device,
|
||||||
|
"qid 0: authenticated\n");
|
||||||
|
}
|
||||||
out_free_data:
|
out_free_data:
|
||||||
kfree(data);
|
kfree(data);
|
||||||
return ret;
|
return ret;
|
||||||
@ -431,6 +453,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
|
|||||||
struct nvmf_connect_data *data;
|
struct nvmf_connect_data *data;
|
||||||
union nvme_result res;
|
union nvme_result res;
|
||||||
int ret;
|
int ret;
|
||||||
|
u32 result;
|
||||||
|
|
||||||
cmd.connect.opcode = nvme_fabrics_command;
|
cmd.connect.opcode = nvme_fabrics_command;
|
||||||
cmd.connect.fctype = nvme_fabrics_type_connect;
|
cmd.connect.fctype = nvme_fabrics_type_connect;
|
||||||
@ -450,12 +473,27 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
|
|||||||
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
|
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
|
||||||
|
|
||||||
ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
|
ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
|
||||||
data, sizeof(*data), 0, qid, 1,
|
data, sizeof(*data), qid, 1,
|
||||||
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
|
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
|
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
|
||||||
&cmd, data);
|
&cmd, data);
|
||||||
}
|
}
|
||||||
|
result = le32_to_cpu(res.u32);
|
||||||
|
if ((result >> 16) & 2) {
|
||||||
|
/* Authentication required */
|
||||||
|
ret = nvme_auth_negotiate(ctrl, qid);
|
||||||
|
if (ret) {
|
||||||
|
dev_warn(ctrl->device,
|
||||||
|
"qid %d: authentication setup failed\n", qid);
|
||||||
|
ret = NVME_SC_AUTH_REQUIRED;
|
||||||
|
} else {
|
||||||
|
ret = nvme_auth_wait(ctrl, qid);
|
||||||
|
if (ret)
|
||||||
|
dev_warn(ctrl->device,
|
||||||
|
"qid %u: authentication failed\n", qid);
|
||||||
|
}
|
||||||
|
}
|
||||||
kfree(data);
|
kfree(data);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -548,6 +586,8 @@ static const match_table_t opt_tokens = {
|
|||||||
{ NVMF_OPT_TOS, "tos=%d" },
|
{ NVMF_OPT_TOS, "tos=%d" },
|
||||||
{ NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" },
|
{ NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" },
|
||||||
{ NVMF_OPT_DISCOVERY, "discovery" },
|
{ NVMF_OPT_DISCOVERY, "discovery" },
|
||||||
|
{ NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" },
|
||||||
|
{ NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" },
|
||||||
{ NVMF_OPT_ERR, NULL }
|
{ NVMF_OPT_ERR, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -829,6 +869,34 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
|
|||||||
case NVMF_OPT_DISCOVERY:
|
case NVMF_OPT_DISCOVERY:
|
||||||
opts->discovery_nqn = true;
|
opts->discovery_nqn = true;
|
||||||
break;
|
break;
|
||||||
|
case NVMF_OPT_DHCHAP_SECRET:
|
||||||
|
p = match_strdup(args);
|
||||||
|
if (!p) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) {
|
||||||
|
pr_err("Invalid DH-CHAP secret %s\n", p);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
kfree(opts->dhchap_secret);
|
||||||
|
opts->dhchap_secret = p;
|
||||||
|
break;
|
||||||
|
case NVMF_OPT_DHCHAP_CTRL_SECRET:
|
||||||
|
p = match_strdup(args);
|
||||||
|
if (!p) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) {
|
||||||
|
pr_err("Invalid DH-CHAP secret %s\n", p);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
kfree(opts->dhchap_ctrl_secret);
|
||||||
|
opts->dhchap_ctrl_secret = p;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
|
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
|
||||||
p);
|
p);
|
||||||
@ -947,6 +1015,8 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
|
|||||||
kfree(opts->subsysnqn);
|
kfree(opts->subsysnqn);
|
||||||
kfree(opts->host_traddr);
|
kfree(opts->host_traddr);
|
||||||
kfree(opts->host_iface);
|
kfree(opts->host_iface);
|
||||||
|
kfree(opts->dhchap_secret);
|
||||||
|
kfree(opts->dhchap_ctrl_secret);
|
||||||
kfree(opts);
|
kfree(opts);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nvmf_free_options);
|
EXPORT_SYMBOL_GPL(nvmf_free_options);
|
||||||
@ -956,7 +1026,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
|
|||||||
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
|
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
|
||||||
NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
|
NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
|
||||||
NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
|
NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
|
||||||
NVMF_OPT_FAIL_FAST_TMO)
|
NVMF_OPT_FAIL_FAST_TMO | NVMF_OPT_DHCHAP_SECRET |\
|
||||||
|
NVMF_OPT_DHCHAP_CTRL_SECRET)
|
||||||
|
|
||||||
static struct nvme_ctrl *
|
static struct nvme_ctrl *
|
||||||
nvmf_create_ctrl(struct device *dev, const char *buf)
|
nvmf_create_ctrl(struct device *dev, const char *buf)
|
||||||
@ -1192,7 +1263,14 @@ static void __exit nvmf_exit(void)
|
|||||||
BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
|
BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
|
||||||
BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
|
BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
|
||||||
BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
|
BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
|
||||||
|
BUILD_BUG_ON(sizeof(struct nvmf_auth_send_command) != 64);
|
||||||
|
BUILD_BUG_ON(sizeof(struct nvmf_auth_receive_command) != 64);
|
||||||
BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024);
|
BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024);
|
||||||
|
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_negotiate_data) != 8);
|
||||||
|
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_challenge_data) != 16);
|
||||||
|
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_reply_data) != 16);
|
||||||
|
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success1_data) != 16);
|
||||||
|
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success2_data) != 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
MODULE_LICENSE("GPL v2");
|
MODULE_LICENSE("GPL v2");
|
||||||
|
@ -68,6 +68,8 @@ enum {
|
|||||||
NVMF_OPT_FAIL_FAST_TMO = 1 << 20,
|
NVMF_OPT_FAIL_FAST_TMO = 1 << 20,
|
||||||
NVMF_OPT_HOST_IFACE = 1 << 21,
|
NVMF_OPT_HOST_IFACE = 1 << 21,
|
||||||
NVMF_OPT_DISCOVERY = 1 << 22,
|
NVMF_OPT_DISCOVERY = 1 << 22,
|
||||||
|
NVMF_OPT_DHCHAP_SECRET = 1 << 23,
|
||||||
|
NVMF_OPT_DHCHAP_CTRL_SECRET = 1 << 24,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -97,6 +99,9 @@ enum {
|
|||||||
* @max_reconnects: maximum number of allowed reconnect attempts before removing
|
* @max_reconnects: maximum number of allowed reconnect attempts before removing
|
||||||
* the controller, (-1) means reconnect forever, zero means remove
|
* the controller, (-1) means reconnect forever, zero means remove
|
||||||
* immediately;
|
* immediately;
|
||||||
|
* @dhchap_secret: DH-HMAC-CHAP secret
|
||||||
|
* @dhchap_ctrl_secret: DH-HMAC-CHAP controller secret for bi-directional
|
||||||
|
* authentication
|
||||||
* @disable_sqflow: disable controller sq flow control
|
* @disable_sqflow: disable controller sq flow control
|
||||||
* @hdr_digest: generate/verify header digest (TCP)
|
* @hdr_digest: generate/verify header digest (TCP)
|
||||||
* @data_digest: generate/verify data digest (TCP)
|
* @data_digest: generate/verify data digest (TCP)
|
||||||
@ -121,6 +126,8 @@ struct nvmf_ctrl_options {
|
|||||||
unsigned int kato;
|
unsigned int kato;
|
||||||
struct nvmf_host *host;
|
struct nvmf_host *host;
|
||||||
int max_reconnects;
|
int max_reconnects;
|
||||||
|
char *dhchap_secret;
|
||||||
|
char *dhchap_ctrl_secret;
|
||||||
bool disable_sqflow;
|
bool disable_sqflow;
|
||||||
bool hdr_digest;
|
bool hdr_digest;
|
||||||
bool data_digest;
|
bool data_digest;
|
||||||
|
@ -346,7 +346,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
|
|||||||
* different queue via blk_steal_bios(), so we need to use the bio_split
|
* different queue via blk_steal_bios(), so we need to use the bio_split
|
||||||
* pool from the original queue to allocate the bvecs from.
|
* pool from the original queue to allocate the bvecs from.
|
||||||
*/
|
*/
|
||||||
blk_queue_split(&bio);
|
bio = bio_split_to_limits(bio);
|
||||||
|
|
||||||
srcu_idx = srcu_read_lock(&head->srcu);
|
srcu_idx = srcu_read_lock(&head->srcu);
|
||||||
ns = nvme_find_path(head);
|
ns = nvme_find_path(head);
|
||||||
@ -408,6 +408,7 @@ const struct block_device_operations nvme_ns_head_ops = {
|
|||||||
.open = nvme_ns_head_open,
|
.open = nvme_ns_head_open,
|
||||||
.release = nvme_ns_head_release,
|
.release = nvme_ns_head_release,
|
||||||
.ioctl = nvme_ns_head_ioctl,
|
.ioctl = nvme_ns_head_ioctl,
|
||||||
|
.compat_ioctl = blkdev_compat_ptr_ioctl,
|
||||||
.getgeo = nvme_getgeo,
|
.getgeo = nvme_getgeo,
|
||||||
.report_zones = nvme_ns_head_report_zones,
|
.report_zones = nvme_ns_head_report_zones,
|
||||||
.pr_ops = &nvme_pr_ops,
|
.pr_ops = &nvme_pr_ops,
|
||||||
@ -800,16 +801,16 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
|
|||||||
return -ENXIO; /* just break out of the loop */
|
return -ENXIO; /* just break out of the loop */
|
||||||
}
|
}
|
||||||
|
|
||||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
|
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
||||||
{
|
{
|
||||||
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
||||||
struct nvme_ana_group_desc desc = {
|
struct nvme_ana_group_desc desc = {
|
||||||
.grpid = id->anagrpid,
|
.grpid = anagrpid,
|
||||||
.state = 0,
|
.state = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
mutex_lock(&ns->ctrl->ana_lock);
|
mutex_lock(&ns->ctrl->ana_lock);
|
||||||
ns->ana_grpid = le32_to_cpu(id->anagrpid);
|
ns->ana_grpid = le32_to_cpu(anagrpid);
|
||||||
nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
|
nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
|
||||||
mutex_unlock(&ns->ctrl->ana_lock);
|
mutex_unlock(&ns->ctrl->ana_lock);
|
||||||
if (desc.state) {
|
if (desc.state) {
|
||||||
|
@ -140,7 +140,7 @@ enum nvme_quirks {
|
|||||||
NVME_QUIRK_DMA_ADDRESS_BITS_48 = (1 << 16),
|
NVME_QUIRK_DMA_ADDRESS_BITS_48 = (1 << 16),
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The controller requires the command_id value be be limited, so skip
|
* The controller requires the command_id value be limited, so skip
|
||||||
* encoding the generation sequence number.
|
* encoding the generation sequence number.
|
||||||
*/
|
*/
|
||||||
NVME_QUIRK_SKIP_CID_GEN = (1 << 17),
|
NVME_QUIRK_SKIP_CID_GEN = (1 << 17),
|
||||||
@ -328,6 +328,15 @@ struct nvme_ctrl {
|
|||||||
struct work_struct ana_work;
|
struct work_struct ana_work;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_AUTH
|
||||||
|
struct work_struct dhchap_auth_work;
|
||||||
|
struct list_head dhchap_auth_list;
|
||||||
|
struct mutex dhchap_auth_mutex;
|
||||||
|
struct nvme_dhchap_key *host_key;
|
||||||
|
struct nvme_dhchap_key *ctrl_key;
|
||||||
|
u16 transaction;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Power saving configuration */
|
/* Power saving configuration */
|
||||||
u64 ps_max_latency_us;
|
u64 ps_max_latency_us;
|
||||||
bool apst_enabled;
|
bool apst_enabled;
|
||||||
@ -781,7 +790,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
|||||||
void *buf, unsigned bufflen);
|
void *buf, unsigned bufflen);
|
||||||
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||||
union nvme_result *result, void *buffer, unsigned bufflen,
|
union nvme_result *result, void *buffer, unsigned bufflen,
|
||||||
unsigned timeout, int qid, int at_head,
|
int qid, int at_head,
|
||||||
blk_mq_req_flags_t flags);
|
blk_mq_req_flags_t flags);
|
||||||
int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
|
int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
|
||||||
unsigned int dword11, void *buffer, size_t buflen,
|
unsigned int dword11, void *buffer, size_t buflen,
|
||||||
@ -837,7 +846,7 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
|
|||||||
void nvme_failover_req(struct request *req);
|
void nvme_failover_req(struct request *req);
|
||||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
||||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
||||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
|
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
|
||||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
|
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
|
||||||
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
|
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
|
||||||
void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
|
void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
|
||||||
@ -879,8 +888,7 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
|
|||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
|
static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
||||||
struct nvme_id_ns *id)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||||
@ -992,6 +1000,27 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
|
|||||||
return ctrl->sgls & ((1 << 0) | (1 << 1));
|
return ctrl->sgls & ((1 << 0) | (1 << 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_AUTH
|
||||||
|
void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl);
|
||||||
|
void nvme_auth_stop(struct nvme_ctrl *ctrl);
|
||||||
|
int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
|
||||||
|
int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
|
||||||
|
void nvme_auth_reset(struct nvme_ctrl *ctrl);
|
||||||
|
void nvme_auth_free(struct nvme_ctrl *ctrl);
|
||||||
|
#else
|
||||||
|
static inline void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) {};
|
||||||
|
static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {};
|
||||||
|
static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
|
||||||
|
{
|
||||||
|
return -EPROTONOSUPPORT;
|
||||||
|
}
|
||||||
|
static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
|
||||||
|
{
|
||||||
|
return NVME_SC_AUTH_REQUIRED;
|
||||||
|
}
|
||||||
|
static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||||
u8 opcode);
|
u8 opcode);
|
||||||
int nvme_execute_passthru_rq(struct request *rq);
|
int nvme_execute_passthru_rq(struct request *rq);
|
||||||
|
@ -670,7 +670,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
|
|||||||
|
|
||||||
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
|
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
|
||||||
if (!prp_list) {
|
if (!prp_list) {
|
||||||
iod->first_dma = dma_addr;
|
|
||||||
iod->npages = -1;
|
iod->npages = -1;
|
||||||
return BLK_STS_RESOURCE;
|
return BLK_STS_RESOURCE;
|
||||||
}
|
}
|
||||||
@ -1435,8 +1434,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
|
|||||||
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
|
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
|
||||||
|
|
||||||
dev_warn(nvmeq->dev->ctrl.device,
|
dev_warn(nvmeq->dev->ctrl.device,
|
||||||
"I/O %d QID %d timeout, aborting\n",
|
"I/O %d (%s) QID %d timeout, aborting\n",
|
||||||
req->tag, nvmeq->qid);
|
req->tag,
|
||||||
|
nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode),
|
||||||
|
nvmeq->qid);
|
||||||
|
|
||||||
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
|
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
|
||||||
BLK_MQ_REQ_NOWAIT);
|
BLK_MQ_REQ_NOWAIT);
|
||||||
@ -1765,26 +1766,27 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
static int nvme_pci_alloc_admin_tag_set(struct nvme_dev *dev)
|
||||||
{
|
{
|
||||||
if (!dev->ctrl.admin_q) {
|
struct blk_mq_tag_set *set = &dev->admin_tagset;
|
||||||
dev->admin_tagset.ops = &nvme_mq_admin_ops;
|
|
||||||
dev->admin_tagset.nr_hw_queues = 1;
|
|
||||||
|
|
||||||
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
set->ops = &nvme_mq_admin_ops;
|
||||||
dev->admin_tagset.timeout = NVME_ADMIN_TIMEOUT;
|
set->nr_hw_queues = 1;
|
||||||
dev->admin_tagset.numa_node = dev->ctrl.numa_node;
|
|
||||||
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
|
|
||||||
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
|
|
||||||
dev->admin_tagset.driver_data = dev;
|
|
||||||
|
|
||||||
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
|
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||||
|
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||||
|
set->numa_node = dev->ctrl.numa_node;
|
||||||
|
set->cmd_size = sizeof(struct nvme_iod);
|
||||||
|
set->flags = BLK_MQ_F_NO_SCHED;
|
||||||
|
set->driver_data = dev;
|
||||||
|
|
||||||
|
if (blk_mq_alloc_tag_set(set))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
dev->ctrl.admin_tagset = &dev->admin_tagset;
|
dev->ctrl.admin_tagset = set;
|
||||||
|
|
||||||
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
|
dev->ctrl.admin_q = blk_mq_init_queue(set);
|
||||||
if (IS_ERR(dev->ctrl.admin_q)) {
|
if (IS_ERR(dev->ctrl.admin_q)) {
|
||||||
blk_mq_free_tag_set(&dev->admin_tagset);
|
blk_mq_free_tag_set(set);
|
||||||
dev->ctrl.admin_q = NULL;
|
dev->ctrl.admin_q = NULL;
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
@ -1793,9 +1795,6 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
|||||||
dev->ctrl.admin_q = NULL;
|
dev->ctrl.admin_q = NULL;
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
} else
|
|
||||||
nvme_start_admin_queue(&dev->ctrl);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2534,23 +2533,22 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_dev_add(struct nvme_dev *dev)
|
static void nvme_pci_alloc_tag_set(struct nvme_dev *dev)
|
||||||
{
|
{
|
||||||
|
struct blk_mq_tag_set * set = &dev->tagset;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!dev->ctrl.tagset) {
|
set->ops = &nvme_mq_ops;
|
||||||
dev->tagset.ops = &nvme_mq_ops;
|
set->nr_hw_queues = dev->online_queues - 1;
|
||||||
dev->tagset.nr_hw_queues = dev->online_queues - 1;
|
set->nr_maps = 2; /* default + read */
|
||||||
dev->tagset.nr_maps = 2; /* default + read */
|
|
||||||
if (dev->io_queues[HCTX_TYPE_POLL])
|
if (dev->io_queues[HCTX_TYPE_POLL])
|
||||||
dev->tagset.nr_maps++;
|
set->nr_maps++;
|
||||||
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
set->timeout = NVME_IO_TIMEOUT;
|
||||||
dev->tagset.numa_node = dev->ctrl.numa_node;
|
set->numa_node = dev->ctrl.numa_node;
|
||||||
dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth,
|
set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
|
||||||
BLK_MQ_MAX_DEPTH) - 1;
|
set->cmd_size = sizeof(struct nvme_iod);
|
||||||
dev->tagset.cmd_size = sizeof(struct nvme_iod);
|
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||||
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
|
set->driver_data = dev;
|
||||||
dev->tagset.driver_data = dev;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some Apple controllers requires tags to be unique
|
* Some Apple controllers requires tags to be unique
|
||||||
@ -2558,23 +2556,22 @@ static void nvme_dev_add(struct nvme_dev *dev)
|
|||||||
* tags of the IO queue.
|
* tags of the IO queue.
|
||||||
*/
|
*/
|
||||||
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
|
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
|
||||||
dev->tagset.reserved_tags = NVME_AQ_DEPTH;
|
set->reserved_tags = NVME_AQ_DEPTH;
|
||||||
|
|
||||||
ret = blk_mq_alloc_tag_set(&dev->tagset);
|
ret = blk_mq_alloc_tag_set(set);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_warn(dev->ctrl.device,
|
dev_warn(dev->ctrl.device,
|
||||||
"IO queues tagset allocation failed %d\n", ret);
|
"IO queues tagset allocation failed %d\n", ret);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
dev->ctrl.tagset = &dev->tagset;
|
dev->ctrl.tagset = set;
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
static void nvme_pci_update_nr_queues(struct nvme_dev *dev)
|
||||||
|
{
|
||||||
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
|
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
|
||||||
|
/* free previously allocated queues that are no longer usable */
|
||||||
/* Free previously allocated queues that are no longer usable */
|
|
||||||
nvme_free_queues(dev, dev->online_queues);
|
nvme_free_queues(dev, dev->online_queues);
|
||||||
}
|
|
||||||
|
|
||||||
nvme_dbbuf_set(dev);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pci_enable(struct nvme_dev *dev)
|
static int nvme_pci_enable(struct nvme_dev *dev)
|
||||||
@ -2725,10 +2722,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
|||||||
nvme_pci_disable(dev);
|
nvme_pci_disable(dev);
|
||||||
nvme_reap_pending_cqes(dev);
|
nvme_reap_pending_cqes(dev);
|
||||||
|
|
||||||
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
|
nvme_cancel_tagset(&dev->ctrl);
|
||||||
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
|
nvme_cancel_admin_tagset(&dev->ctrl);
|
||||||
blk_mq_tagset_wait_completed_request(&dev->tagset);
|
|
||||||
blk_mq_tagset_wait_completed_request(&dev->admin_tagset);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The driver will not be starting up queues again if shutting down so
|
* The driver will not be starting up queues again if shutting down so
|
||||||
@ -2842,9 +2837,13 @@ static void nvme_reset_work(struct work_struct *work)
|
|||||||
if (result)
|
if (result)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
result = nvme_alloc_admin_tags(dev);
|
if (!dev->ctrl.admin_q) {
|
||||||
|
result = nvme_pci_alloc_admin_tag_set(dev);
|
||||||
if (result)
|
if (result)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
} else {
|
||||||
|
nvme_start_admin_queue(&dev->ctrl);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Limit the max command size to prevent iod->sg allocations going
|
* Limit the max command size to prevent iod->sg allocations going
|
||||||
@ -2923,7 +2922,11 @@ static void nvme_reset_work(struct work_struct *work)
|
|||||||
} else {
|
} else {
|
||||||
nvme_start_queues(&dev->ctrl);
|
nvme_start_queues(&dev->ctrl);
|
||||||
nvme_wait_freeze(&dev->ctrl);
|
nvme_wait_freeze(&dev->ctrl);
|
||||||
nvme_dev_add(dev);
|
if (!dev->ctrl.tagset)
|
||||||
|
nvme_pci_alloc_tag_set(dev);
|
||||||
|
else
|
||||||
|
nvme_pci_update_nr_queues(dev);
|
||||||
|
nvme_dbbuf_set(dev);
|
||||||
nvme_unfreeze(&dev->ctrl);
|
nvme_unfreeze(&dev->ctrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
#include "fabrics.h"
|
#include "fabrics.h"
|
||||||
|
|
||||||
|
|
||||||
#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */
|
#define NVME_RDMA_CM_TIMEOUT_MS 3000 /* 3 second */
|
||||||
|
|
||||||
#define NVME_RDMA_MAX_SEGMENTS 256
|
#define NVME_RDMA_MAX_SEGMENTS 256
|
||||||
|
|
||||||
@ -248,12 +248,9 @@ static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = wait_for_completion_interruptible_timeout(&queue->cm_done,
|
ret = wait_for_completion_interruptible(&queue->cm_done);
|
||||||
msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1);
|
if (ret)
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
return ret;
|
||||||
if (ret == 0)
|
|
||||||
return -ETIMEDOUT;
|
|
||||||
WARN_ON_ONCE(queue->cm_error > 0);
|
WARN_ON_ONCE(queue->cm_error > 0);
|
||||||
return queue->cm_error;
|
return queue->cm_error;
|
||||||
}
|
}
|
||||||
@ -612,7 +609,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
|
|||||||
queue->cm_error = -ETIMEDOUT;
|
queue->cm_error = -ETIMEDOUT;
|
||||||
ret = rdma_resolve_addr(queue->cm_id, src_addr,
|
ret = rdma_resolve_addr(queue->cm_id, src_addr,
|
||||||
(struct sockaddr *)&ctrl->addr,
|
(struct sockaddr *)&ctrl->addr,
|
||||||
NVME_RDMA_CONNECT_TIMEOUT_MS);
|
NVME_RDMA_CM_TIMEOUT_MS);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_info(ctrl->ctrl.device,
|
dev_info(ctrl->ctrl.device,
|
||||||
"rdma_resolve_addr failed (%d).\n", ret);
|
"rdma_resolve_addr failed (%d).\n", ret);
|
||||||
@ -790,15 +787,12 @@ out_free_queues:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
|
static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl)
|
||||||
bool admin)
|
|
||||||
{
|
{
|
||||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||||
struct blk_mq_tag_set *set;
|
struct blk_mq_tag_set *set = &ctrl->admin_tag_set;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (admin) {
|
|
||||||
set = &ctrl->admin_tag_set;
|
|
||||||
memset(set, 0, sizeof(*set));
|
memset(set, 0, sizeof(*set));
|
||||||
set->ops = &nvme_rdma_admin_mq_ops;
|
set->ops = &nvme_rdma_admin_mq_ops;
|
||||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||||
@ -810,8 +804,18 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||||||
set->nr_hw_queues = 1;
|
set->nr_hw_queues = 1;
|
||||||
set->timeout = NVME_ADMIN_TIMEOUT;
|
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||||
set->flags = BLK_MQ_F_NO_SCHED;
|
set->flags = BLK_MQ_F_NO_SCHED;
|
||||||
} else {
|
ret = blk_mq_alloc_tag_set(set);
|
||||||
set = &ctrl->tag_set;
|
if (!ret)
|
||||||
|
ctrl->ctrl.admin_tagset = set;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl)
|
||||||
|
{
|
||||||
|
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||||
|
struct blk_mq_tag_set *set = &ctrl->tag_set;
|
||||||
|
int ret;
|
||||||
|
|
||||||
memset(set, 0, sizeof(*set));
|
memset(set, 0, sizeof(*set));
|
||||||
set->ops = &nvme_rdma_mq_ops;
|
set->ops = &nvme_rdma_mq_ops;
|
||||||
set->queue_depth = nctrl->sqsize + 1;
|
set->queue_depth = nctrl->sqsize + 1;
|
||||||
@ -827,13 +831,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||||||
set->nr_hw_queues = nctrl->queue_count - 1;
|
set->nr_hw_queues = nctrl->queue_count - 1;
|
||||||
set->timeout = NVME_IO_TIMEOUT;
|
set->timeout = NVME_IO_TIMEOUT;
|
||||||
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
||||||
}
|
|
||||||
|
|
||||||
ret = blk_mq_alloc_tag_set(set);
|
ret = blk_mq_alloc_tag_set(set);
|
||||||
if (ret)
|
if (!ret)
|
||||||
return ERR_PTR(ret);
|
ctrl->ctrl.tagset = set;
|
||||||
|
return ret;
|
||||||
return set;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
||||||
@ -885,11 +886,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
|||||||
goto out_free_queue;
|
goto out_free_queue;
|
||||||
|
|
||||||
if (new) {
|
if (new) {
|
||||||
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
|
error = nvme_rdma_alloc_admin_tag_set(&ctrl->ctrl);
|
||||||
if (IS_ERR(ctrl->ctrl.admin_tagset)) {
|
if (error)
|
||||||
error = PTR_ERR(ctrl->ctrl.admin_tagset);
|
|
||||||
goto out_free_async_qe;
|
goto out_free_async_qe;
|
||||||
}
|
|
||||||
|
|
||||||
ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
|
ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
|
||||||
if (IS_ERR(ctrl->ctrl.fabrics_q)) {
|
if (IS_ERR(ctrl->ctrl.fabrics_q)) {
|
||||||
@ -972,11 +971,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
|
|||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (new) {
|
if (new) {
|
||||||
ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
|
ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl);
|
||||||
if (IS_ERR(ctrl->ctrl.tagset)) {
|
if (ret)
|
||||||
ret = PTR_ERR(ctrl->ctrl.tagset);
|
|
||||||
goto out_free_io_queues;
|
goto out_free_io_queues;
|
||||||
}
|
|
||||||
|
|
||||||
ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
|
ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1205,6 +1202,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
|
|||||||
struct nvme_rdma_ctrl *ctrl = container_of(work,
|
struct nvme_rdma_ctrl *ctrl = container_of(work,
|
||||||
struct nvme_rdma_ctrl, err_work);
|
struct nvme_rdma_ctrl, err_work);
|
||||||
|
|
||||||
|
nvme_auth_stop(&ctrl->ctrl);
|
||||||
nvme_stop_keep_alive(&ctrl->ctrl);
|
nvme_stop_keep_alive(&ctrl->ctrl);
|
||||||
flush_work(&ctrl->ctrl.async_event_work);
|
flush_work(&ctrl->ctrl.async_event_work);
|
||||||
nvme_rdma_teardown_io_queues(ctrl, false);
|
nvme_rdma_teardown_io_queues(ctrl, false);
|
||||||
@ -1894,7 +1892,7 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
|
|||||||
|
|
||||||
if (ctrl->opts->tos >= 0)
|
if (ctrl->opts->tos >= 0)
|
||||||
rdma_set_service_type(queue->cm_id, ctrl->opts->tos);
|
rdma_set_service_type(queue->cm_id, ctrl->opts->tos);
|
||||||
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
|
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CM_TIMEOUT_MS);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n",
|
dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n",
|
||||||
queue->cm_error);
|
queue->cm_error);
|
||||||
|
@ -209,9 +209,11 @@ static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
|
|||||||
return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
|
return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
|
static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req)
|
||||||
{
|
{
|
||||||
return queue->cmnd_capsule_len - sizeof(struct nvme_command);
|
if (nvme_is_fabrics(req->req.cmd))
|
||||||
|
return NVME_TCP_ADMIN_CCSZ;
|
||||||
|
return req->queue->cmnd_capsule_len - sizeof(struct nvme_command);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
|
static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
|
||||||
@ -229,7 +231,7 @@ static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
|
|||||||
rq = blk_mq_rq_from_pdu(req);
|
rq = blk_mq_rq_from_pdu(req);
|
||||||
|
|
||||||
return rq_data_dir(rq) == WRITE && req->data_len &&
|
return rq_data_dir(rq) == WRITE && req->data_len &&
|
||||||
req->data_len <= nvme_tcp_inline_data_size(req->queue);
|
req->data_len <= nvme_tcp_inline_data_size(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
|
static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
|
||||||
@ -1685,15 +1687,12 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *nctrl)
|
||||||
bool admin)
|
|
||||||
{
|
{
|
||||||
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||||
struct blk_mq_tag_set *set;
|
struct blk_mq_tag_set *set = &ctrl->admin_tag_set;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (admin) {
|
|
||||||
set = &ctrl->admin_tag_set;
|
|
||||||
memset(set, 0, sizeof(*set));
|
memset(set, 0, sizeof(*set));
|
||||||
set->ops = &nvme_tcp_admin_mq_ops;
|
set->ops = &nvme_tcp_admin_mq_ops;
|
||||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||||
@ -1704,8 +1703,18 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||||||
set->driver_data = ctrl;
|
set->driver_data = ctrl;
|
||||||
set->nr_hw_queues = 1;
|
set->nr_hw_queues = 1;
|
||||||
set->timeout = NVME_ADMIN_TIMEOUT;
|
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||||
} else {
|
ret = blk_mq_alloc_tag_set(set);
|
||||||
set = &ctrl->tag_set;
|
if (!ret)
|
||||||
|
nctrl->admin_tagset = set;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *nctrl)
|
||||||
|
{
|
||||||
|
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||||
|
struct blk_mq_tag_set *set = &ctrl->tag_set;
|
||||||
|
int ret;
|
||||||
|
|
||||||
memset(set, 0, sizeof(*set));
|
memset(set, 0, sizeof(*set));
|
||||||
set->ops = &nvme_tcp_mq_ops;
|
set->ops = &nvme_tcp_mq_ops;
|
||||||
set->queue_depth = nctrl->sqsize + 1;
|
set->queue_depth = nctrl->sqsize + 1;
|
||||||
@ -1717,13 +1726,10 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||||||
set->nr_hw_queues = nctrl->queue_count - 1;
|
set->nr_hw_queues = nctrl->queue_count - 1;
|
||||||
set->timeout = NVME_IO_TIMEOUT;
|
set->timeout = NVME_IO_TIMEOUT;
|
||||||
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
||||||
}
|
|
||||||
|
|
||||||
ret = blk_mq_alloc_tag_set(set);
|
ret = blk_mq_alloc_tag_set(set);
|
||||||
if (ret)
|
if (!ret)
|
||||||
return ERR_PTR(ret);
|
nctrl->tagset = set;
|
||||||
|
return ret;
|
||||||
return set;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
|
static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
|
||||||
@ -1899,11 +1905,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
|
|||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (new) {
|
if (new) {
|
||||||
ctrl->tagset = nvme_tcp_alloc_tagset(ctrl, false);
|
ret = nvme_tcp_alloc_tag_set(ctrl);
|
||||||
if (IS_ERR(ctrl->tagset)) {
|
if (ret)
|
||||||
ret = PTR_ERR(ctrl->tagset);
|
|
||||||
goto out_free_io_queues;
|
goto out_free_io_queues;
|
||||||
}
|
|
||||||
|
|
||||||
ret = nvme_ctrl_init_connect_q(ctrl);
|
ret = nvme_ctrl_init_connect_q(ctrl);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1968,11 +1972,9 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
|
|||||||
return error;
|
return error;
|
||||||
|
|
||||||
if (new) {
|
if (new) {
|
||||||
ctrl->admin_tagset = nvme_tcp_alloc_tagset(ctrl, true);
|
error = nvme_tcp_alloc_admin_tag_set(ctrl);
|
||||||
if (IS_ERR(ctrl->admin_tagset)) {
|
if (error)
|
||||||
error = PTR_ERR(ctrl->admin_tagset);
|
|
||||||
goto out_free_queue;
|
goto out_free_queue;
|
||||||
}
|
|
||||||
|
|
||||||
ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
|
ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
|
||||||
if (IS_ERR(ctrl->fabrics_q)) {
|
if (IS_ERR(ctrl->fabrics_q)) {
|
||||||
@ -2173,6 +2175,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
|
|||||||
struct nvme_tcp_ctrl, err_work);
|
struct nvme_tcp_ctrl, err_work);
|
||||||
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
|
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
|
||||||
|
|
||||||
|
nvme_auth_stop(ctrl);
|
||||||
nvme_stop_keep_alive(ctrl);
|
nvme_stop_keep_alive(ctrl);
|
||||||
flush_work(&ctrl->async_event_work);
|
flush_work(&ctrl->async_event_work);
|
||||||
nvme_tcp_teardown_io_queues(ctrl, false);
|
nvme_tcp_teardown_io_queues(ctrl, false);
|
||||||
@ -2371,7 +2374,7 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
|
|||||||
if (!blk_rq_nr_phys_segments(rq))
|
if (!blk_rq_nr_phys_segments(rq))
|
||||||
nvme_tcp_set_sg_null(c);
|
nvme_tcp_set_sg_null(c);
|
||||||
else if (rq_data_dir(rq) == WRITE &&
|
else if (rq_data_dir(rq) == WRITE &&
|
||||||
req->data_len <= nvme_tcp_inline_data_size(queue))
|
req->data_len <= nvme_tcp_inline_data_size(req))
|
||||||
nvme_tcp_set_sg_inline(queue, c, req->data_len);
|
nvme_tcp_set_sg_inline(queue, c, req->data_len);
|
||||||
else
|
else
|
||||||
nvme_tcp_set_sg_host_data(c, req->data_len);
|
nvme_tcp_set_sg_host_data(c, req->data_len);
|
||||||
@ -2406,7 +2409,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
|
|||||||
nvme_tcp_init_iter(req, rq_data_dir(rq));
|
nvme_tcp_init_iter(req, rq_data_dir(rq));
|
||||||
|
|
||||||
if (rq_data_dir(rq) == WRITE &&
|
if (rq_data_dir(rq) == WRITE &&
|
||||||
req->data_len <= nvme_tcp_inline_data_size(queue))
|
req->data_len <= nvme_tcp_inline_data_size(req))
|
||||||
req->pdu_len = req->data_len;
|
req->pdu_len = req->data_len;
|
||||||
|
|
||||||
pdu->hdr.type = nvme_tcp_cmd;
|
pdu->hdr.type = nvme_tcp_cmd;
|
||||||
|
@ -287,6 +287,34 @@ static const char *nvme_trace_fabrics_property_get(struct trace_seq *p, u8 *spc)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *nvme_trace_fabrics_auth_send(struct trace_seq *p, u8 *spc)
|
||||||
|
{
|
||||||
|
const char *ret = trace_seq_buffer_ptr(p);
|
||||||
|
u8 spsp0 = spc[1];
|
||||||
|
u8 spsp1 = spc[2];
|
||||||
|
u8 secp = spc[3];
|
||||||
|
u32 tl = get_unaligned_le32(spc + 4);
|
||||||
|
|
||||||
|
trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, tl=%u",
|
||||||
|
spsp0, spsp1, secp, tl);
|
||||||
|
trace_seq_putc(p, 0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *nvme_trace_fabrics_auth_receive(struct trace_seq *p, u8 *spc)
|
||||||
|
{
|
||||||
|
const char *ret = trace_seq_buffer_ptr(p);
|
||||||
|
u8 spsp0 = spc[1];
|
||||||
|
u8 spsp1 = spc[2];
|
||||||
|
u8 secp = spc[3];
|
||||||
|
u32 al = get_unaligned_le32(spc + 4);
|
||||||
|
|
||||||
|
trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, al=%u",
|
||||||
|
spsp0, spsp1, secp, al);
|
||||||
|
trace_seq_putc(p, 0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc)
|
static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc)
|
||||||
{
|
{
|
||||||
const char *ret = trace_seq_buffer_ptr(p);
|
const char *ret = trace_seq_buffer_ptr(p);
|
||||||
@ -306,6 +334,10 @@ const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p,
|
|||||||
return nvme_trace_fabrics_connect(p, spc);
|
return nvme_trace_fabrics_connect(p, spc);
|
||||||
case nvme_fabrics_type_property_get:
|
case nvme_fabrics_type_property_get:
|
||||||
return nvme_trace_fabrics_property_get(p, spc);
|
return nvme_trace_fabrics_property_get(p, spc);
|
||||||
|
case nvme_fabrics_type_auth_send:
|
||||||
|
return nvme_trace_fabrics_auth_send(p, spc);
|
||||||
|
case nvme_fabrics_type_auth_receive:
|
||||||
|
return nvme_trace_fabrics_auth_receive(p, spc);
|
||||||
default:
|
default:
|
||||||
return nvme_trace_fabrics_common(p, spc);
|
return nvme_trace_fabrics_common(p, spc);
|
||||||
}
|
}
|
||||||
|
@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq,
|
|||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
|
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
|
||||||
__entry->qid = nvme_req_qid(req);
|
__entry->qid = nvme_req_qid(req);
|
||||||
__entry->cid = req->tag;
|
__entry->cid = nvme_req(req)->cmd->common.command_id;
|
||||||
__entry->result = le64_to_cpu(nvme_req(req)->result.u64);
|
__entry->result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||||
__entry->retries = nvme_req(req)->retries;
|
__entry->retries = nvme_req(req)->retries;
|
||||||
__entry->flags = nvme_req(req)->flags;
|
__entry->flags = nvme_req(req)->flags;
|
||||||
|
@ -83,3 +83,18 @@ config NVME_TARGET_TCP
|
|||||||
devices over TCP.
|
devices over TCP.
|
||||||
|
|
||||||
If unsure, say N.
|
If unsure, say N.
|
||||||
|
|
||||||
|
config NVME_TARGET_AUTH
|
||||||
|
bool "NVMe over Fabrics In-band Authentication support"
|
||||||
|
depends on NVME_TARGET
|
||||||
|
select NVME_COMMON
|
||||||
|
select CRYPTO
|
||||||
|
select CRYPTO_HMAC
|
||||||
|
select CRYPTO_SHA256
|
||||||
|
select CRYPTO_SHA512
|
||||||
|
select CRYPTO_DH
|
||||||
|
select CRYPTO_DH_RFC7919_GROUPS
|
||||||
|
help
|
||||||
|
This enables support for NVMe over Fabrics In-band Authentication
|
||||||
|
|
||||||
|
If unsure, say N.
|
||||||
|
@ -13,6 +13,7 @@ nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
|
|||||||
discovery.o io-cmd-file.o io-cmd-bdev.o
|
discovery.o io-cmd-file.o io-cmd-bdev.o
|
||||||
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
|
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
|
||||||
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
||||||
|
nvmet-$(CONFIG_NVME_TARGET_AUTH) += fabrics-cmd-auth.o auth.o
|
||||||
nvme-loop-y += loop.o
|
nvme-loop-y += loop.o
|
||||||
nvmet-rdma-y += rdma.o
|
nvmet-rdma-y += rdma.o
|
||||||
nvmet-fc-y += fc.o
|
nvmet-fc-y += fc.o
|
||||||
|
@ -1017,7 +1017,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
|
|||||||
u16 ret;
|
u16 ret;
|
||||||
|
|
||||||
if (nvme_is_fabrics(cmd))
|
if (nvme_is_fabrics(cmd))
|
||||||
return nvmet_parse_fabrics_cmd(req);
|
return nvmet_parse_fabrics_admin_cmd(req);
|
||||||
|
if (unlikely(!nvmet_check_auth_status(req)))
|
||||||
|
return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
|
||||||
if (nvmet_is_disc_subsys(nvmet_req_subsys(req)))
|
if (nvmet_is_disc_subsys(nvmet_req_subsys(req)))
|
||||||
return nvmet_parse_discovery_cmd(req);
|
return nvmet_parse_discovery_cmd(req);
|
||||||
|
|
||||||
|
525
drivers/nvme/target/auth.c
Normal file
525
drivers/nvme/target/auth.c
Normal file
@ -0,0 +1,525 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* NVMe over Fabrics DH-HMAC-CHAP authentication.
|
||||||
|
* Copyright (c) 2020 Hannes Reinecke, SUSE Software Solutions.
|
||||||
|
* All rights reserved.
|
||||||
|
*/
|
||||||
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/err.h>
|
||||||
|
#include <crypto/hash.h>
|
||||||
|
#include <linux/crc32.h>
|
||||||
|
#include <linux/base64.h>
|
||||||
|
#include <linux/ctype.h>
|
||||||
|
#include <linux/random.h>
|
||||||
|
#include <linux/nvme-auth.h>
|
||||||
|
#include <asm/unaligned.h>
|
||||||
|
|
||||||
|
#include "nvmet.h"
|
||||||
|
|
||||||
|
int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
|
||||||
|
bool set_ctrl)
|
||||||
|
{
|
||||||
|
unsigned char key_hash;
|
||||||
|
char *dhchap_secret;
|
||||||
|
|
||||||
|
if (sscanf(secret, "DHHC-1:%hhd:%*s", &key_hash) != 1)
|
||||||
|
return -EINVAL;
|
||||||
|
if (key_hash > 3) {
|
||||||
|
pr_warn("Invalid DH-HMAC-CHAP hash id %d\n",
|
||||||
|
key_hash);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
if (key_hash > 0) {
|
||||||
|
/* Validate selected hash algorithm */
|
||||||
|
const char *hmac = nvme_auth_hmac_name(key_hash);
|
||||||
|
|
||||||
|
if (!crypto_has_shash(hmac, 0, 0)) {
|
||||||
|
pr_err("DH-HMAC-CHAP hash %s unsupported\n", hmac);
|
||||||
|
return -ENOTSUPP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dhchap_secret = kstrdup(secret, GFP_KERNEL);
|
||||||
|
if (!dhchap_secret)
|
||||||
|
return -ENOMEM;
|
||||||
|
if (set_ctrl) {
|
||||||
|
host->dhchap_ctrl_secret = strim(dhchap_secret);
|
||||||
|
host->dhchap_ctrl_key_hash = key_hash;
|
||||||
|
} else {
|
||||||
|
host->dhchap_secret = strim(dhchap_secret);
|
||||||
|
host->dhchap_key_hash = key_hash;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
|
||||||
|
{
|
||||||
|
const char *dhgroup_kpp;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
pr_debug("%s: ctrl %d selecting dhgroup %d\n",
|
||||||
|
__func__, ctrl->cntlid, dhgroup_id);
|
||||||
|
|
||||||
|
if (ctrl->dh_tfm) {
|
||||||
|
if (ctrl->dh_gid == dhgroup_id) {
|
||||||
|
pr_debug("%s: ctrl %d reuse existing DH group %d\n",
|
||||||
|
__func__, ctrl->cntlid, dhgroup_id);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
crypto_free_kpp(ctrl->dh_tfm);
|
||||||
|
ctrl->dh_tfm = NULL;
|
||||||
|
ctrl->dh_gid = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dhgroup_id == NVME_AUTH_DHGROUP_NULL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
dhgroup_kpp = nvme_auth_dhgroup_kpp(dhgroup_id);
|
||||||
|
if (!dhgroup_kpp) {
|
||||||
|
pr_debug("%s: ctrl %d invalid DH group %d\n",
|
||||||
|
__func__, ctrl->cntlid, dhgroup_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
ctrl->dh_tfm = crypto_alloc_kpp(dhgroup_kpp, 0, 0);
|
||||||
|
if (IS_ERR(ctrl->dh_tfm)) {
|
||||||
|
pr_debug("%s: ctrl %d failed to setup DH group %d, err %ld\n",
|
||||||
|
__func__, ctrl->cntlid, dhgroup_id,
|
||||||
|
PTR_ERR(ctrl->dh_tfm));
|
||||||
|
ret = PTR_ERR(ctrl->dh_tfm);
|
||||||
|
ctrl->dh_tfm = NULL;
|
||||||
|
ctrl->dh_gid = 0;
|
||||||
|
} else {
|
||||||
|
ctrl->dh_gid = dhgroup_id;
|
||||||
|
pr_debug("%s: ctrl %d setup DH group %d\n",
|
||||||
|
__func__, ctrl->cntlid, ctrl->dh_gid);
|
||||||
|
ret = nvme_auth_gen_privkey(ctrl->dh_tfm, ctrl->dh_gid);
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
|
||||||
|
__func__, ctrl->cntlid, ret);
|
||||||
|
kfree_sensitive(ctrl->dh_key);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);
|
||||||
|
kfree_sensitive(ctrl->dh_key);
|
||||||
|
ctrl->dh_key = kzalloc(ctrl->dh_keysize, GFP_KERNEL);
|
||||||
|
if (!ctrl->dh_key) {
|
||||||
|
pr_warn("ctrl %d failed to allocate public key\n",
|
||||||
|
ctrl->cntlid);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
ret = nvme_auth_gen_pubkey(ctrl->dh_tfm, ctrl->dh_key,
|
||||||
|
ctrl->dh_keysize);
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_warn("ctrl %d failed to generate public key\n",
|
||||||
|
ctrl->cntlid);
|
||||||
|
kfree(ctrl->dh_key);
|
||||||
|
ctrl->dh_key = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
struct nvmet_host_link *p;
|
||||||
|
struct nvmet_host *host = NULL;
|
||||||
|
const char *hash_name;
|
||||||
|
|
||||||
|
down_read(&nvmet_config_sem);
|
||||||
|
if (nvmet_is_disc_subsys(ctrl->subsys))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
if (ctrl->subsys->allow_any_host)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
list_for_each_entry(p, &ctrl->subsys->hosts, entry) {
|
||||||
|
pr_debug("check %s\n", nvmet_host_name(p->host));
|
||||||
|
if (strcmp(nvmet_host_name(p->host), ctrl->hostnqn))
|
||||||
|
continue;
|
||||||
|
host = p->host;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!host) {
|
||||||
|
pr_debug("host %s not found\n", ctrl->hostnqn);
|
||||||
|
ret = -EPERM;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = nvmet_setup_dhgroup(ctrl, host->dhchap_dhgroup_id);
|
||||||
|
if (ret < 0)
|
||||||
|
pr_warn("Failed to setup DH group");
|
||||||
|
|
||||||
|
if (!host->dhchap_secret) {
|
||||||
|
pr_debug("No authentication provided\n");
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (host->dhchap_hash_id == ctrl->shash_id) {
|
||||||
|
pr_debug("Re-use existing hash ID %d\n",
|
||||||
|
ctrl->shash_id);
|
||||||
|
} else {
|
||||||
|
hash_name = nvme_auth_hmac_name(host->dhchap_hash_id);
|
||||||
|
if (!hash_name) {
|
||||||
|
pr_warn("Hash ID %d invalid\n", host->dhchap_hash_id);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
ctrl->shash_id = host->dhchap_hash_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip the 'DHHC-1:XX:' prefix */
|
||||||
|
nvme_auth_free_key(ctrl->host_key);
|
||||||
|
ctrl->host_key = nvme_auth_extract_key(host->dhchap_secret + 10,
|
||||||
|
host->dhchap_key_hash);
|
||||||
|
if (IS_ERR(ctrl->host_key)) {
|
||||||
|
ret = PTR_ERR(ctrl->host_key);
|
||||||
|
ctrl->host_key = NULL;
|
||||||
|
goto out_free_hash;
|
||||||
|
}
|
||||||
|
pr_debug("%s: using hash %s key %*ph\n", __func__,
|
||||||
|
ctrl->host_key->hash > 0 ?
|
||||||
|
nvme_auth_hmac_name(ctrl->host_key->hash) : "none",
|
||||||
|
(int)ctrl->host_key->len, ctrl->host_key->key);
|
||||||
|
|
||||||
|
nvme_auth_free_key(ctrl->ctrl_key);
|
||||||
|
if (!host->dhchap_ctrl_secret) {
|
||||||
|
ctrl->ctrl_key = NULL;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctrl->ctrl_key = nvme_auth_extract_key(host->dhchap_ctrl_secret + 10,
|
||||||
|
host->dhchap_ctrl_key_hash);
|
||||||
|
if (IS_ERR(ctrl->ctrl_key)) {
|
||||||
|
ret = PTR_ERR(ctrl->ctrl_key);
|
||||||
|
ctrl->ctrl_key = NULL;
|
||||||
|
}
|
||||||
|
pr_debug("%s: using ctrl hash %s key %*ph\n", __func__,
|
||||||
|
ctrl->ctrl_key->hash > 0 ?
|
||||||
|
nvme_auth_hmac_name(ctrl->ctrl_key->hash) : "none",
|
||||||
|
(int)ctrl->ctrl_key->len, ctrl->ctrl_key->key);
|
||||||
|
|
||||||
|
out_free_hash:
|
||||||
|
if (ret) {
|
||||||
|
if (ctrl->host_key) {
|
||||||
|
nvme_auth_free_key(ctrl->host_key);
|
||||||
|
ctrl->host_key = NULL;
|
||||||
|
}
|
||||||
|
ctrl->shash_id = 0;
|
||||||
|
}
|
||||||
|
out_unlock:
|
||||||
|
up_read(&nvmet_config_sem);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvmet_auth_sq_free(struct nvmet_sq *sq)
|
||||||
|
{
|
||||||
|
cancel_delayed_work(&sq->auth_expired_work);
|
||||||
|
kfree(sq->dhchap_c1);
|
||||||
|
sq->dhchap_c1 = NULL;
|
||||||
|
kfree(sq->dhchap_c2);
|
||||||
|
sq->dhchap_c2 = NULL;
|
||||||
|
kfree(sq->dhchap_skey);
|
||||||
|
sq->dhchap_skey = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvmet_destroy_auth(struct nvmet_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
ctrl->shash_id = 0;
|
||||||
|
|
||||||
|
if (ctrl->dh_tfm) {
|
||||||
|
crypto_free_kpp(ctrl->dh_tfm);
|
||||||
|
ctrl->dh_tfm = NULL;
|
||||||
|
ctrl->dh_gid = 0;
|
||||||
|
}
|
||||||
|
kfree_sensitive(ctrl->dh_key);
|
||||||
|
ctrl->dh_key = NULL;
|
||||||
|
|
||||||
|
if (ctrl->host_key) {
|
||||||
|
nvme_auth_free_key(ctrl->host_key);
|
||||||
|
ctrl->host_key = NULL;
|
||||||
|
}
|
||||||
|
if (ctrl->ctrl_key) {
|
||||||
|
nvme_auth_free_key(ctrl->ctrl_key);
|
||||||
|
ctrl->ctrl_key = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool nvmet_check_auth_status(struct nvmet_req *req)
|
||||||
|
{
|
||||||
|
if (req->sq->ctrl->host_key &&
|
||||||
|
!req->sq->authenticated)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
|
||||||
|
unsigned int shash_len)
|
||||||
|
{
|
||||||
|
struct crypto_shash *shash_tfm;
|
||||||
|
struct shash_desc *shash;
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
const char *hash_name;
|
||||||
|
u8 *challenge = req->sq->dhchap_c1, *host_response;
|
||||||
|
u8 buf[4];
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
hash_name = nvme_auth_hmac_name(ctrl->shash_id);
|
||||||
|
if (!hash_name) {
|
||||||
|
pr_warn("Hash ID %d invalid\n", ctrl->shash_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
shash_tfm = crypto_alloc_shash(hash_name, 0, 0);
|
||||||
|
if (IS_ERR(shash_tfm)) {
|
||||||
|
pr_err("failed to allocate shash %s\n", hash_name);
|
||||||
|
return PTR_ERR(shash_tfm);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shash_len != crypto_shash_digestsize(shash_tfm)) {
|
||||||
|
pr_debug("%s: hash len mismatch (len %d digest %d)\n",
|
||||||
|
__func__, shash_len,
|
||||||
|
crypto_shash_digestsize(shash_tfm));
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_free_tfm;
|
||||||
|
}
|
||||||
|
|
||||||
|
host_response = nvme_auth_transform_key(ctrl->host_key, ctrl->hostnqn);
|
||||||
|
if (IS_ERR(host_response)) {
|
||||||
|
ret = PTR_ERR(host_response);
|
||||||
|
goto out_free_tfm;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = crypto_shash_setkey(shash_tfm, host_response,
|
||||||
|
ctrl->host_key->len);
|
||||||
|
if (ret)
|
||||||
|
goto out_free_response;
|
||||||
|
|
||||||
|
if (ctrl->dh_gid != NVME_AUTH_DHGROUP_NULL) {
|
||||||
|
challenge = kmalloc(shash_len, GFP_KERNEL);
|
||||||
|
if (!challenge) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_response;
|
||||||
|
}
|
||||||
|
ret = nvme_auth_augmented_challenge(ctrl->shash_id,
|
||||||
|
req->sq->dhchap_skey,
|
||||||
|
req->sq->dhchap_skey_len,
|
||||||
|
req->sq->dhchap_c1,
|
||||||
|
challenge, shash_len);
|
||||||
|
if (ret)
|
||||||
|
goto out_free_response;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_debug("ctrl %d qid %d host response seq %u transaction %d\n",
|
||||||
|
ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1,
|
||||||
|
req->sq->dhchap_tid);
|
||||||
|
|
||||||
|
shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!shash) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_response;
|
||||||
|
}
|
||||||
|
shash->tfm = shash_tfm;
|
||||||
|
ret = crypto_shash_init(shash);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, challenge, shash_len);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
put_unaligned_le32(req->sq->dhchap_s1, buf);
|
||||||
|
ret = crypto_shash_update(shash, buf, 4);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
put_unaligned_le16(req->sq->dhchap_tid, buf);
|
||||||
|
ret = crypto_shash_update(shash, buf, 2);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
memset(buf, 0, 4);
|
||||||
|
ret = crypto_shash_update(shash, buf, 1);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, "HostHost", 8);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, buf, 1);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, ctrl->subsysnqn,
|
||||||
|
strlen(ctrl->subsysnqn));
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_final(shash, response);
|
||||||
|
out:
|
||||||
|
if (challenge != req->sq->dhchap_c1)
|
||||||
|
kfree(challenge);
|
||||||
|
kfree(shash);
|
||||||
|
out_free_response:
|
||||||
|
kfree_sensitive(host_response);
|
||||||
|
out_free_tfm:
|
||||||
|
crypto_free_shash(shash_tfm);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
|
||||||
|
unsigned int shash_len)
|
||||||
|
{
|
||||||
|
struct crypto_shash *shash_tfm;
|
||||||
|
struct shash_desc *shash;
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
const char *hash_name;
|
||||||
|
u8 *challenge = req->sq->dhchap_c2, *ctrl_response;
|
||||||
|
u8 buf[4];
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
hash_name = nvme_auth_hmac_name(ctrl->shash_id);
|
||||||
|
if (!hash_name) {
|
||||||
|
pr_warn("Hash ID %d invalid\n", ctrl->shash_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
shash_tfm = crypto_alloc_shash(hash_name, 0, 0);
|
||||||
|
if (IS_ERR(shash_tfm)) {
|
||||||
|
pr_err("failed to allocate shash %s\n", hash_name);
|
||||||
|
return PTR_ERR(shash_tfm);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shash_len != crypto_shash_digestsize(shash_tfm)) {
|
||||||
|
pr_debug("%s: hash len mismatch (len %d digest %d)\n",
|
||||||
|
__func__, shash_len,
|
||||||
|
crypto_shash_digestsize(shash_tfm));
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_free_tfm;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctrl_response = nvme_auth_transform_key(ctrl->ctrl_key,
|
||||||
|
ctrl->subsysnqn);
|
||||||
|
if (IS_ERR(ctrl_response)) {
|
||||||
|
ret = PTR_ERR(ctrl_response);
|
||||||
|
goto out_free_tfm;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = crypto_shash_setkey(shash_tfm, ctrl_response,
|
||||||
|
ctrl->ctrl_key->len);
|
||||||
|
if (ret)
|
||||||
|
goto out_free_response;
|
||||||
|
|
||||||
|
if (ctrl->dh_gid != NVME_AUTH_DHGROUP_NULL) {
|
||||||
|
challenge = kmalloc(shash_len, GFP_KERNEL);
|
||||||
|
if (!challenge) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_response;
|
||||||
|
}
|
||||||
|
ret = nvme_auth_augmented_challenge(ctrl->shash_id,
|
||||||
|
req->sq->dhchap_skey,
|
||||||
|
req->sq->dhchap_skey_len,
|
||||||
|
req->sq->dhchap_c2,
|
||||||
|
challenge, shash_len);
|
||||||
|
if (ret)
|
||||||
|
goto out_free_response;
|
||||||
|
}
|
||||||
|
|
||||||
|
shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!shash) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_free_response;
|
||||||
|
}
|
||||||
|
shash->tfm = shash_tfm;
|
||||||
|
|
||||||
|
ret = crypto_shash_init(shash);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, challenge, shash_len);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
put_unaligned_le32(req->sq->dhchap_s2, buf);
|
||||||
|
ret = crypto_shash_update(shash, buf, 4);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
put_unaligned_le16(req->sq->dhchap_tid, buf);
|
||||||
|
ret = crypto_shash_update(shash, buf, 2);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
memset(buf, 0, 4);
|
||||||
|
ret = crypto_shash_update(shash, buf, 1);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, "Controller", 10);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, ctrl->subsysnqn,
|
||||||
|
strlen(ctrl->subsysnqn));
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, buf, 1);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = crypto_shash_final(shash, response);
|
||||||
|
out:
|
||||||
|
if (challenge != req->sq->dhchap_c2)
|
||||||
|
kfree(challenge);
|
||||||
|
kfree(shash);
|
||||||
|
out_free_response:
|
||||||
|
kfree_sensitive(ctrl_response);
|
||||||
|
out_free_tfm:
|
||||||
|
crypto_free_shash(shash_tfm);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
|
||||||
|
u8 *buf, int buf_size)
|
||||||
|
{
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!ctrl->dh_key) {
|
||||||
|
pr_warn("ctrl %d no DH public key!\n", ctrl->cntlid);
|
||||||
|
return -ENOKEY;
|
||||||
|
}
|
||||||
|
if (buf_size != ctrl->dh_keysize) {
|
||||||
|
pr_warn("ctrl %d DH public key size mismatch, need %zu is %d\n",
|
||||||
|
ctrl->cntlid, ctrl->dh_keysize, buf_size);
|
||||||
|
ret = -EINVAL;
|
||||||
|
} else {
|
||||||
|
memcpy(buf, ctrl->dh_key, buf_size);
|
||||||
|
pr_debug("%s: ctrl %d public key %*ph\n", __func__,
|
||||||
|
ctrl->cntlid, (int)buf_size, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
||||||
|
u8 *pkey, int pkey_size)
|
||||||
|
{
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
req->sq->dhchap_skey_len = ctrl->dh_keysize;
|
||||||
|
req->sq->dhchap_skey = kzalloc(req->sq->dhchap_skey_len, GFP_KERNEL);
|
||||||
|
if (!req->sq->dhchap_skey)
|
||||||
|
return -ENOMEM;
|
||||||
|
ret = nvme_auth_gen_shared_secret(ctrl->dh_tfm,
|
||||||
|
pkey, pkey_size,
|
||||||
|
req->sq->dhchap_skey,
|
||||||
|
req->sq->dhchap_skey_len);
|
||||||
|
if (ret)
|
||||||
|
pr_debug("failed to compute shared secret, err %d\n", ret);
|
||||||
|
else
|
||||||
|
pr_debug("%s: shared secret %*ph\n", __func__,
|
||||||
|
(int)req->sq->dhchap_skey_len,
|
||||||
|
req->sq->dhchap_skey);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
@ -11,6 +11,11 @@
|
|||||||
#include <linux/ctype.h>
|
#include <linux/ctype.h>
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <linux/pci-p2pdma.h>
|
#include <linux/pci-p2pdma.h>
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
#include <linux/nvme-auth.h>
|
||||||
|
#endif
|
||||||
|
#include <crypto/hash.h>
|
||||||
|
#include <crypto/kpp.h>
|
||||||
|
|
||||||
#include "nvmet.h"
|
#include "nvmet.h"
|
||||||
|
|
||||||
@ -1680,10 +1685,133 @@ static const struct config_item_type nvmet_ports_type = {
|
|||||||
static struct config_group nvmet_subsystems_group;
|
static struct config_group nvmet_subsystems_group;
|
||||||
static struct config_group nvmet_ports_group;
|
static struct config_group nvmet_ports_group;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
static ssize_t nvmet_host_dhchap_key_show(struct config_item *item,
|
||||||
|
char *page)
|
||||||
|
{
|
||||||
|
u8 *dhchap_secret = to_host(item)->dhchap_secret;
|
||||||
|
|
||||||
|
if (!dhchap_secret)
|
||||||
|
return sprintf(page, "\n");
|
||||||
|
return sprintf(page, "%s\n", dhchap_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t nvmet_host_dhchap_key_store(struct config_item *item,
|
||||||
|
const char *page, size_t count)
|
||||||
|
{
|
||||||
|
struct nvmet_host *host = to_host(item);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = nvmet_auth_set_key(host, page, false);
|
||||||
|
/*
|
||||||
|
* Re-authentication is a soft state, so keep the
|
||||||
|
* current authentication valid until the host
|
||||||
|
* requests re-authentication.
|
||||||
|
*/
|
||||||
|
return ret < 0 ? ret : count;
|
||||||
|
}
|
||||||
|
|
||||||
|
CONFIGFS_ATTR(nvmet_host_, dhchap_key);
|
||||||
|
|
||||||
|
static ssize_t nvmet_host_dhchap_ctrl_key_show(struct config_item *item,
|
||||||
|
char *page)
|
||||||
|
{
|
||||||
|
u8 *dhchap_secret = to_host(item)->dhchap_ctrl_secret;
|
||||||
|
|
||||||
|
if (!dhchap_secret)
|
||||||
|
return sprintf(page, "\n");
|
||||||
|
return sprintf(page, "%s\n", dhchap_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t nvmet_host_dhchap_ctrl_key_store(struct config_item *item,
|
||||||
|
const char *page, size_t count)
|
||||||
|
{
|
||||||
|
struct nvmet_host *host = to_host(item);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = nvmet_auth_set_key(host, page, true);
|
||||||
|
/*
|
||||||
|
* Re-authentication is a soft state, so keep the
|
||||||
|
* current authentication valid until the host
|
||||||
|
* requests re-authentication.
|
||||||
|
*/
|
||||||
|
return ret < 0 ? ret : count;
|
||||||
|
}
|
||||||
|
|
||||||
|
CONFIGFS_ATTR(nvmet_host_, dhchap_ctrl_key);
|
||||||
|
|
||||||
|
static ssize_t nvmet_host_dhchap_hash_show(struct config_item *item,
|
||||||
|
char *page)
|
||||||
|
{
|
||||||
|
struct nvmet_host *host = to_host(item);
|
||||||
|
const char *hash_name = nvme_auth_hmac_name(host->dhchap_hash_id);
|
||||||
|
|
||||||
|
return sprintf(page, "%s\n", hash_name ? hash_name : "none");
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t nvmet_host_dhchap_hash_store(struct config_item *item,
|
||||||
|
const char *page, size_t count)
|
||||||
|
{
|
||||||
|
struct nvmet_host *host = to_host(item);
|
||||||
|
u8 hmac_id;
|
||||||
|
|
||||||
|
hmac_id = nvme_auth_hmac_id(page);
|
||||||
|
if (hmac_id == NVME_AUTH_HASH_INVALID)
|
||||||
|
return -EINVAL;
|
||||||
|
if (!crypto_has_shash(nvme_auth_hmac_name(hmac_id), 0, 0))
|
||||||
|
return -ENOTSUPP;
|
||||||
|
host->dhchap_hash_id = hmac_id;
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
CONFIGFS_ATTR(nvmet_host_, dhchap_hash);
|
||||||
|
|
||||||
|
static ssize_t nvmet_host_dhchap_dhgroup_show(struct config_item *item,
|
||||||
|
char *page)
|
||||||
|
{
|
||||||
|
struct nvmet_host *host = to_host(item);
|
||||||
|
const char *dhgroup = nvme_auth_dhgroup_name(host->dhchap_dhgroup_id);
|
||||||
|
|
||||||
|
return sprintf(page, "%s\n", dhgroup ? dhgroup : "none");
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t nvmet_host_dhchap_dhgroup_store(struct config_item *item,
|
||||||
|
const char *page, size_t count)
|
||||||
|
{
|
||||||
|
struct nvmet_host *host = to_host(item);
|
||||||
|
int dhgroup_id;
|
||||||
|
|
||||||
|
dhgroup_id = nvme_auth_dhgroup_id(page);
|
||||||
|
if (dhgroup_id == NVME_AUTH_DHGROUP_INVALID)
|
||||||
|
return -EINVAL;
|
||||||
|
if (dhgroup_id != NVME_AUTH_DHGROUP_NULL) {
|
||||||
|
const char *kpp = nvme_auth_dhgroup_kpp(dhgroup_id);
|
||||||
|
|
||||||
|
if (!crypto_has_kpp(kpp, 0, 0))
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
host->dhchap_dhgroup_id = dhgroup_id;
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
CONFIGFS_ATTR(nvmet_host_, dhchap_dhgroup);
|
||||||
|
|
||||||
|
static struct configfs_attribute *nvmet_host_attrs[] = {
|
||||||
|
&nvmet_host_attr_dhchap_key,
|
||||||
|
&nvmet_host_attr_dhchap_ctrl_key,
|
||||||
|
&nvmet_host_attr_dhchap_hash,
|
||||||
|
&nvmet_host_attr_dhchap_dhgroup,
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
#endif /* CONFIG_NVME_TARGET_AUTH */
|
||||||
|
|
||||||
static void nvmet_host_release(struct config_item *item)
|
static void nvmet_host_release(struct config_item *item)
|
||||||
{
|
{
|
||||||
struct nvmet_host *host = to_host(item);
|
struct nvmet_host *host = to_host(item);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
kfree(host->dhchap_secret);
|
||||||
|
#endif
|
||||||
kfree(host);
|
kfree(host);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1693,6 +1821,9 @@ static struct configfs_item_operations nvmet_host_item_ops = {
|
|||||||
|
|
||||||
static const struct config_item_type nvmet_host_type = {
|
static const struct config_item_type nvmet_host_type = {
|
||||||
.ct_item_ops = &nvmet_host_item_ops,
|
.ct_item_ops = &nvmet_host_item_ops,
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
.ct_attrs = nvmet_host_attrs,
|
||||||
|
#endif
|
||||||
.ct_owner = THIS_MODULE,
|
.ct_owner = THIS_MODULE,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1705,6 +1836,11 @@ static struct config_group *nvmet_hosts_make_group(struct config_group *group,
|
|||||||
if (!host)
|
if (!host)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
/* Default to SHA256 */
|
||||||
|
host->dhchap_hash_id = NVME_AUTH_HASH_SHA256;
|
||||||
|
#endif
|
||||||
|
|
||||||
config_group_init_type_name(&host->group, name, &nvmet_host_type);
|
config_group_init_type_name(&host->group, name, &nvmet_host_type);
|
||||||
|
|
||||||
return &host->group;
|
return &host->group;
|
||||||
|
@ -795,6 +795,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
|
|||||||
wait_for_completion(&sq->confirm_done);
|
wait_for_completion(&sq->confirm_done);
|
||||||
wait_for_completion(&sq->free_done);
|
wait_for_completion(&sq->free_done);
|
||||||
percpu_ref_exit(&sq->ref);
|
percpu_ref_exit(&sq->ref);
|
||||||
|
nvmet_auth_sq_free(sq);
|
||||||
|
|
||||||
if (ctrl) {
|
if (ctrl) {
|
||||||
/*
|
/*
|
||||||
@ -865,8 +866,15 @@ static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
|
|||||||
|
|
||||||
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
|
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
|
||||||
{
|
{
|
||||||
|
struct nvme_command *cmd = req->cmd;
|
||||||
u16 ret;
|
u16 ret;
|
||||||
|
|
||||||
|
if (nvme_is_fabrics(cmd))
|
||||||
|
return nvmet_parse_fabrics_io_cmd(req);
|
||||||
|
|
||||||
|
if (unlikely(!nvmet_check_auth_status(req)))
|
||||||
|
return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
|
||||||
|
|
||||||
ret = nvmet_check_ctrl_status(req);
|
ret = nvmet_check_ctrl_status(req);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
return ret;
|
return ret;
|
||||||
@ -1271,6 +1279,11 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req)
|
|||||||
req->cmd->common.opcode, req->sq->qid);
|
req->cmd->common.opcode, req->sq->qid);
|
||||||
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
|
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (unlikely(!nvmet_check_auth_status(req))) {
|
||||||
|
pr_warn("qid %d not authenticated\n", req->sq->qid);
|
||||||
|
return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1467,6 +1480,8 @@ static void nvmet_ctrl_free(struct kref *ref)
|
|||||||
flush_work(&ctrl->async_event_work);
|
flush_work(&ctrl->async_event_work);
|
||||||
cancel_work_sync(&ctrl->fatal_err_work);
|
cancel_work_sync(&ctrl->fatal_err_work);
|
||||||
|
|
||||||
|
nvmet_destroy_auth(ctrl);
|
||||||
|
|
||||||
ida_free(&cntlid_ida, ctrl->cntlid);
|
ida_free(&cntlid_ida, ctrl->cntlid);
|
||||||
|
|
||||||
nvmet_async_events_free(ctrl);
|
nvmet_async_events_free(ctrl);
|
||||||
|
544
drivers/nvme/target/fabrics-cmd-auth.c
Normal file
544
drivers/nvme/target/fabrics-cmd-auth.c
Normal file
@ -0,0 +1,544 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* NVMe over Fabrics DH-HMAC-CHAP authentication command handling.
|
||||||
|
* Copyright (c) 2020 Hannes Reinecke, SUSE Software Solutions.
|
||||||
|
* All rights reserved.
|
||||||
|
*/
|
||||||
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/random.h>
|
||||||
|
#include <linux/nvme-auth.h>
|
||||||
|
#include <crypto/hash.h>
|
||||||
|
#include <crypto/kpp.h>
|
||||||
|
#include "nvmet.h"
|
||||||
|
|
||||||
|
static void nvmet_auth_expired_work(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct nvmet_sq *sq = container_of(to_delayed_work(work),
|
||||||
|
struct nvmet_sq, auth_expired_work);
|
||||||
|
|
||||||
|
pr_debug("%s: ctrl %d qid %d transaction %u expired, resetting\n",
|
||||||
|
__func__, sq->ctrl->cntlid, sq->qid, sq->dhchap_tid);
|
||||||
|
sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
|
||||||
|
sq->dhchap_tid = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
|
||||||
|
{
|
||||||
|
u32 result = le32_to_cpu(req->cqe->result.u32);
|
||||||
|
|
||||||
|
/* Initialize in-band authentication */
|
||||||
|
INIT_DELAYED_WORK(&req->sq->auth_expired_work,
|
||||||
|
nvmet_auth_expired_work);
|
||||||
|
req->sq->authenticated = false;
|
||||||
|
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
|
||||||
|
result |= (u32)NVME_CONNECT_AUTHREQ_ATR << 16;
|
||||||
|
req->cqe->result.u32 = cpu_to_le32(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
|
||||||
|
{
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
struct nvmf_auth_dhchap_negotiate_data *data = d;
|
||||||
|
int i, hash_id = 0, fallback_hash_id = 0, dhgid, fallback_dhgid;
|
||||||
|
|
||||||
|
pr_debug("%s: ctrl %d qid %d: data sc_d %d napd %d authid %d halen %d dhlen %d\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
data->sc_c, data->napd, data->auth_protocol[0].dhchap.authid,
|
||||||
|
data->auth_protocol[0].dhchap.halen,
|
||||||
|
data->auth_protocol[0].dhchap.dhlen);
|
||||||
|
req->sq->dhchap_tid = le16_to_cpu(data->t_id);
|
||||||
|
if (data->sc_c)
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||||
|
|
||||||
|
if (data->napd != 1)
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||||
|
|
||||||
|
if (data->auth_protocol[0].dhchap.authid !=
|
||||||
|
NVME_AUTH_DHCHAP_AUTH_ID)
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||||
|
|
||||||
|
for (i = 0; i < data->auth_protocol[0].dhchap.halen; i++) {
|
||||||
|
u8 host_hmac_id = data->auth_protocol[0].dhchap.idlist[i];
|
||||||
|
|
||||||
|
if (!fallback_hash_id &&
|
||||||
|
crypto_has_shash(nvme_auth_hmac_name(host_hmac_id), 0, 0))
|
||||||
|
fallback_hash_id = host_hmac_id;
|
||||||
|
if (ctrl->shash_id != host_hmac_id)
|
||||||
|
continue;
|
||||||
|
hash_id = ctrl->shash_id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (hash_id == 0) {
|
||||||
|
if (fallback_hash_id == 0) {
|
||||||
|
pr_debug("%s: ctrl %d qid %d: no usable hash found\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid);
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||||
|
}
|
||||||
|
pr_debug("%s: ctrl %d qid %d: no usable hash found, falling back to %s\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
nvme_auth_hmac_name(fallback_hash_id));
|
||||||
|
ctrl->shash_id = fallback_hash_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
dhgid = -1;
|
||||||
|
fallback_dhgid = -1;
|
||||||
|
for (i = 0; i < data->auth_protocol[0].dhchap.dhlen; i++) {
|
||||||
|
int tmp_dhgid = data->auth_protocol[0].dhchap.idlist[i + 30];
|
||||||
|
|
||||||
|
if (tmp_dhgid != ctrl->dh_gid) {
|
||||||
|
dhgid = tmp_dhgid;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (fallback_dhgid < 0) {
|
||||||
|
const char *kpp = nvme_auth_dhgroup_kpp(tmp_dhgid);
|
||||||
|
|
||||||
|
if (crypto_has_kpp(kpp, 0, 0))
|
||||||
|
fallback_dhgid = tmp_dhgid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (dhgid < 0) {
|
||||||
|
if (fallback_dhgid < 0) {
|
||||||
|
pr_debug("%s: ctrl %d qid %d: no usable DH group found\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid);
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
|
||||||
|
}
|
||||||
|
pr_debug("%s: ctrl %d qid %d: configured DH group %s not found\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
nvme_auth_dhgroup_name(fallback_dhgid));
|
||||||
|
ctrl->dh_gid = fallback_dhgid;
|
||||||
|
}
|
||||||
|
pr_debug("%s: ctrl %d qid %d: selected DH group %s (%d)\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
nvme_auth_dhgroup_name(ctrl->dh_gid), ctrl->dh_gid);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u16 nvmet_auth_reply(struct nvmet_req *req, void *d)
|
||||||
|
{
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
struct nvmf_auth_dhchap_reply_data *data = d;
|
||||||
|
u16 dhvlen = le16_to_cpu(data->dhvlen);
|
||||||
|
u8 *response;
|
||||||
|
|
||||||
|
pr_debug("%s: ctrl %d qid %d: data hl %d cvalid %d dhvlen %u\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
data->hl, data->cvalid, dhvlen);
|
||||||
|
|
||||||
|
if (dhvlen) {
|
||||||
|
if (!ctrl->dh_tfm)
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||||
|
if (nvmet_auth_ctrl_sesskey(req, data->rval + 2 * data->hl,
|
||||||
|
dhvlen) < 0)
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
response = kmalloc(data->hl, GFP_KERNEL);
|
||||||
|
if (!response)
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||||
|
|
||||||
|
if (!ctrl->host_key) {
|
||||||
|
pr_warn("ctrl %d qid %d no host key\n",
|
||||||
|
ctrl->cntlid, req->sq->qid);
|
||||||
|
kfree(response);
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||||
|
}
|
||||||
|
if (nvmet_auth_host_hash(req, response, data->hl) < 0) {
|
||||||
|
pr_debug("ctrl %d qid %d host hash failed\n",
|
||||||
|
ctrl->cntlid, req->sq->qid);
|
||||||
|
kfree(response);
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (memcmp(data->rval, response, data->hl)) {
|
||||||
|
pr_info("ctrl %d qid %d host response mismatch\n",
|
||||||
|
ctrl->cntlid, req->sq->qid);
|
||||||
|
kfree(response);
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||||
|
}
|
||||||
|
kfree(response);
|
||||||
|
pr_debug("%s: ctrl %d qid %d host authenticated\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid);
|
||||||
|
if (data->cvalid) {
|
||||||
|
req->sq->dhchap_c2 = kmalloc(data->hl, GFP_KERNEL);
|
||||||
|
if (!req->sq->dhchap_c2)
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||||
|
memcpy(req->sq->dhchap_c2, data->rval + data->hl, data->hl);
|
||||||
|
|
||||||
|
pr_debug("%s: ctrl %d qid %d challenge %*ph\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid, data->hl,
|
||||||
|
req->sq->dhchap_c2);
|
||||||
|
req->sq->dhchap_s2 = le32_to_cpu(data->seqnum);
|
||||||
|
} else {
|
||||||
|
req->sq->authenticated = true;
|
||||||
|
req->sq->dhchap_c2 = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u16 nvmet_auth_failure2(struct nvmet_req *req, void *d)
|
||||||
|
{
|
||||||
|
struct nvmf_auth_dhchap_failure_data *data = d;
|
||||||
|
|
||||||
|
return data->rescode_exp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvmet_execute_auth_send(struct nvmet_req *req)
|
||||||
|
{
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
struct nvmf_auth_dhchap_success2_data *data;
|
||||||
|
void *d;
|
||||||
|
u32 tl;
|
||||||
|
u16 status = 0;
|
||||||
|
|
||||||
|
if (req->cmd->auth_send.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_send_command, secp);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (req->cmd->auth_send.spsp0 != 0x01) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_send_command, spsp0);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (req->cmd->auth_send.spsp1 != 0x01) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_send_command, spsp1);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
tl = le32_to_cpu(req->cmd->auth_send.tl);
|
||||||
|
if (!tl) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_send_command, tl);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (!nvmet_check_transfer_len(req, tl)) {
|
||||||
|
pr_debug("%s: transfer length mismatch (%u)\n", __func__, tl);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
d = kmalloc(tl, GFP_KERNEL);
|
||||||
|
if (!d) {
|
||||||
|
status = NVME_SC_INTERNAL;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = nvmet_copy_from_sgl(req, 0, d, tl);
|
||||||
|
if (status) {
|
||||||
|
kfree(d);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
data = d;
|
||||||
|
pr_debug("%s: ctrl %d qid %d type %d id %d step %x\n", __func__,
|
||||||
|
ctrl->cntlid, req->sq->qid, data->auth_type, data->auth_id,
|
||||||
|
req->sq->dhchap_step);
|
||||||
|
if (data->auth_type != NVME_AUTH_COMMON_MESSAGES &&
|
||||||
|
data->auth_type != NVME_AUTH_DHCHAP_MESSAGES)
|
||||||
|
goto done_failure1;
|
||||||
|
if (data->auth_type == NVME_AUTH_COMMON_MESSAGES) {
|
||||||
|
if (data->auth_id == NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE) {
|
||||||
|
/* Restart negotiation */
|
||||||
|
pr_debug("%s: ctrl %d qid %d reset negotiation\n", __func__,
|
||||||
|
ctrl->cntlid, req->sq->qid);
|
||||||
|
if (!req->sq->qid) {
|
||||||
|
if (nvmet_setup_auth(ctrl) < 0) {
|
||||||
|
status = NVME_SC_INTERNAL;
|
||||||
|
pr_err("ctrl %d qid 0 failed to setup"
|
||||||
|
"re-authentication",
|
||||||
|
ctrl->cntlid);
|
||||||
|
goto done_failure1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
|
||||||
|
} else if (data->auth_id != req->sq->dhchap_step)
|
||||||
|
goto done_failure1;
|
||||||
|
/* Validate negotiation parameters */
|
||||||
|
status = nvmet_auth_negotiate(req, d);
|
||||||
|
if (status == 0)
|
||||||
|
req->sq->dhchap_step =
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE;
|
||||||
|
else {
|
||||||
|
req->sq->dhchap_step =
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||||
|
req->sq->dhchap_status = status;
|
||||||
|
status = 0;
|
||||||
|
}
|
||||||
|
goto done_kfree;
|
||||||
|
}
|
||||||
|
if (data->auth_id != req->sq->dhchap_step) {
|
||||||
|
pr_debug("%s: ctrl %d qid %d step mismatch (%d != %d)\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
data->auth_id, req->sq->dhchap_step);
|
||||||
|
goto done_failure1;
|
||||||
|
}
|
||||||
|
if (le16_to_cpu(data->t_id) != req->sq->dhchap_tid) {
|
||||||
|
pr_debug("%s: ctrl %d qid %d invalid transaction %d (expected %d)\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
le16_to_cpu(data->t_id),
|
||||||
|
req->sq->dhchap_tid);
|
||||||
|
req->sq->dhchap_step =
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||||
|
req->sq->dhchap_status =
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||||
|
goto done_kfree;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (data->auth_id) {
|
||||||
|
case NVME_AUTH_DHCHAP_MESSAGE_REPLY:
|
||||||
|
status = nvmet_auth_reply(req, d);
|
||||||
|
if (status == 0)
|
||||||
|
req->sq->dhchap_step =
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1;
|
||||||
|
else {
|
||||||
|
req->sq->dhchap_step =
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||||
|
req->sq->dhchap_status = status;
|
||||||
|
status = 0;
|
||||||
|
}
|
||||||
|
goto done_kfree;
|
||||||
|
break;
|
||||||
|
case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2:
|
||||||
|
req->sq->authenticated = true;
|
||||||
|
pr_debug("%s: ctrl %d qid %d ctrl authenticated\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid);
|
||||||
|
goto done_kfree;
|
||||||
|
break;
|
||||||
|
case NVME_AUTH_DHCHAP_MESSAGE_FAILURE2:
|
||||||
|
status = nvmet_auth_failure2(req, d);
|
||||||
|
if (status) {
|
||||||
|
pr_warn("ctrl %d qid %d: authentication failed (%d)\n",
|
||||||
|
ctrl->cntlid, req->sq->qid, status);
|
||||||
|
req->sq->dhchap_status = status;
|
||||||
|
req->sq->authenticated = false;
|
||||||
|
status = 0;
|
||||||
|
}
|
||||||
|
goto done_kfree;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
req->sq->dhchap_status =
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_MESSAGE;
|
||||||
|
req->sq->dhchap_step =
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_FAILURE2;
|
||||||
|
req->sq->authenticated = false;
|
||||||
|
goto done_kfree;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
done_failure1:
|
||||||
|
req->sq->dhchap_status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_MESSAGE;
|
||||||
|
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_FAILURE2;
|
||||||
|
|
||||||
|
done_kfree:
|
||||||
|
kfree(d);
|
||||||
|
done:
|
||||||
|
pr_debug("%s: ctrl %d qid %d dhchap status %x step %x\n", __func__,
|
||||||
|
ctrl->cntlid, req->sq->qid,
|
||||||
|
req->sq->dhchap_status, req->sq->dhchap_step);
|
||||||
|
if (status)
|
||||||
|
pr_debug("%s: ctrl %d qid %d nvme status %x error loc %d\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid,
|
||||||
|
status, req->error_loc);
|
||||||
|
req->cqe->result.u64 = 0;
|
||||||
|
nvmet_req_complete(req, status);
|
||||||
|
if (req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 &&
|
||||||
|
req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) {
|
||||||
|
unsigned long auth_expire_secs = ctrl->kato ? ctrl->kato : 120;
|
||||||
|
|
||||||
|
mod_delayed_work(system_wq, &req->sq->auth_expired_work,
|
||||||
|
auth_expire_secs * HZ);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* Final states, clear up variables */
|
||||||
|
nvmet_auth_sq_free(req->sq);
|
||||||
|
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2)
|
||||||
|
nvmet_ctrl_fatal_error(ctrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvmet_auth_challenge(struct nvmet_req *req, void *d, int al)
|
||||||
|
{
|
||||||
|
struct nvmf_auth_dhchap_challenge_data *data = d;
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
int ret = 0;
|
||||||
|
int hash_len = nvme_auth_hmac_hash_len(ctrl->shash_id);
|
||||||
|
int data_size = sizeof(*d) + hash_len;
|
||||||
|
|
||||||
|
if (ctrl->dh_tfm)
|
||||||
|
data_size += ctrl->dh_keysize;
|
||||||
|
if (al < data_size) {
|
||||||
|
pr_debug("%s: buffer too small (al %d need %d)\n", __func__,
|
||||||
|
al, data_size);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
memset(data, 0, data_size);
|
||||||
|
req->sq->dhchap_s1 = nvme_auth_get_seqnum();
|
||||||
|
data->auth_type = NVME_AUTH_DHCHAP_MESSAGES;
|
||||||
|
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE;
|
||||||
|
data->t_id = cpu_to_le16(req->sq->dhchap_tid);
|
||||||
|
data->hashid = ctrl->shash_id;
|
||||||
|
data->hl = hash_len;
|
||||||
|
data->seqnum = cpu_to_le32(req->sq->dhchap_s1);
|
||||||
|
req->sq->dhchap_c1 = kmalloc(data->hl, GFP_KERNEL);
|
||||||
|
if (!req->sq->dhchap_c1)
|
||||||
|
return -ENOMEM;
|
||||||
|
get_random_bytes(req->sq->dhchap_c1, data->hl);
|
||||||
|
memcpy(data->cval, req->sq->dhchap_c1, data->hl);
|
||||||
|
if (ctrl->dh_tfm) {
|
||||||
|
data->dhgid = ctrl->dh_gid;
|
||||||
|
data->dhvlen = cpu_to_le16(ctrl->dh_keysize);
|
||||||
|
ret = nvmet_auth_ctrl_exponential(req, data->cval + data->hl,
|
||||||
|
ctrl->dh_keysize);
|
||||||
|
}
|
||||||
|
pr_debug("%s: ctrl %d qid %d seq %d transaction %d hl %d dhvlen %zu\n",
|
||||||
|
__func__, ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1,
|
||||||
|
req->sq->dhchap_tid, data->hl, ctrl->dh_keysize);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvmet_auth_success1(struct nvmet_req *req, void *d, int al)
|
||||||
|
{
|
||||||
|
struct nvmf_auth_dhchap_success1_data *data = d;
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
int hash_len = nvme_auth_hmac_hash_len(ctrl->shash_id);
|
||||||
|
|
||||||
|
WARN_ON(al < sizeof(*data));
|
||||||
|
memset(data, 0, sizeof(*data));
|
||||||
|
data->auth_type = NVME_AUTH_DHCHAP_MESSAGES;
|
||||||
|
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1;
|
||||||
|
data->t_id = cpu_to_le16(req->sq->dhchap_tid);
|
||||||
|
data->hl = hash_len;
|
||||||
|
if (req->sq->dhchap_c2) {
|
||||||
|
if (!ctrl->ctrl_key) {
|
||||||
|
pr_warn("ctrl %d qid %d no ctrl key\n",
|
||||||
|
ctrl->cntlid, req->sq->qid);
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||||
|
}
|
||||||
|
if (nvmet_auth_ctrl_hash(req, data->rval, data->hl))
|
||||||
|
return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||||
|
data->rvalid = 1;
|
||||||
|
pr_debug("ctrl %d qid %d response %*ph\n",
|
||||||
|
ctrl->cntlid, req->sq->qid, data->hl, data->rval);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nvmet_auth_failure1(struct nvmet_req *req, void *d, int al)
|
||||||
|
{
|
||||||
|
struct nvmf_auth_dhchap_failure_data *data = d;
|
||||||
|
|
||||||
|
WARN_ON(al < sizeof(*data));
|
||||||
|
data->auth_type = NVME_AUTH_COMMON_MESSAGES;
|
||||||
|
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||||
|
data->t_id = cpu_to_le16(req->sq->dhchap_tid);
|
||||||
|
data->rescode = NVME_AUTH_DHCHAP_FAILURE_REASON_FAILED;
|
||||||
|
data->rescode_exp = req->sq->dhchap_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvmet_execute_auth_receive(struct nvmet_req *req)
|
||||||
|
{
|
||||||
|
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||||
|
void *d;
|
||||||
|
u32 al;
|
||||||
|
u16 status = 0;
|
||||||
|
|
||||||
|
if (req->cmd->auth_receive.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_receive_command, secp);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (req->cmd->auth_receive.spsp0 != 0x01) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_receive_command, spsp0);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (req->cmd->auth_receive.spsp1 != 0x01) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_receive_command, spsp1);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
al = le32_to_cpu(req->cmd->auth_receive.al);
|
||||||
|
if (!al) {
|
||||||
|
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||||
|
req->error_loc =
|
||||||
|
offsetof(struct nvmf_auth_receive_command, al);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (!nvmet_check_transfer_len(req, al)) {
|
||||||
|
pr_debug("%s: transfer length mismatch (%u)\n", __func__, al);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
d = kmalloc(al, GFP_KERNEL);
|
||||||
|
if (!d) {
|
||||||
|
status = NVME_SC_INTERNAL;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
pr_debug("%s: ctrl %d qid %d step %x\n", __func__,
|
||||||
|
ctrl->cntlid, req->sq->qid, req->sq->dhchap_step);
|
||||||
|
switch (req->sq->dhchap_step) {
|
||||||
|
case NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE:
|
||||||
|
if (nvmet_auth_challenge(req, d, al) < 0) {
|
||||||
|
pr_warn("ctrl %d qid %d: challenge error (%d)\n",
|
||||||
|
ctrl->cntlid, req->sq->qid, status);
|
||||||
|
status = NVME_SC_INTERNAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (status) {
|
||||||
|
req->sq->dhchap_status = status;
|
||||||
|
nvmet_auth_failure1(req, d, al);
|
||||||
|
pr_warn("ctrl %d qid %d: challenge status (%x)\n",
|
||||||
|
ctrl->cntlid, req->sq->qid,
|
||||||
|
req->sq->dhchap_status);
|
||||||
|
status = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_REPLY;
|
||||||
|
break;
|
||||||
|
case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1:
|
||||||
|
status = nvmet_auth_success1(req, d, al);
|
||||||
|
if (status) {
|
||||||
|
req->sq->dhchap_status = status;
|
||||||
|
req->sq->authenticated = false;
|
||||||
|
nvmet_auth_failure1(req, d, al);
|
||||||
|
pr_warn("ctrl %d qid %d: success1 status (%x)\n",
|
||||||
|
ctrl->cntlid, req->sq->qid,
|
||||||
|
req->sq->dhchap_status);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2;
|
||||||
|
break;
|
||||||
|
case NVME_AUTH_DHCHAP_MESSAGE_FAILURE1:
|
||||||
|
req->sq->authenticated = false;
|
||||||
|
nvmet_auth_failure1(req, d, al);
|
||||||
|
pr_warn("ctrl %d qid %d failure1 (%x)\n",
|
||||||
|
ctrl->cntlid, req->sq->qid, req->sq->dhchap_status);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
pr_warn("ctrl %d qid %d unhandled step (%d)\n",
|
||||||
|
ctrl->cntlid, req->sq->qid, req->sq->dhchap_step);
|
||||||
|
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||||
|
req->sq->dhchap_status = NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||||
|
nvmet_auth_failure1(req, d, al);
|
||||||
|
status = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = nvmet_copy_to_sgl(req, 0, d, al);
|
||||||
|
kfree(d);
|
||||||
|
done:
|
||||||
|
req->cqe->result.u64 = 0;
|
||||||
|
nvmet_req_complete(req, status);
|
||||||
|
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2)
|
||||||
|
nvmet_auth_sq_free(req->sq);
|
||||||
|
else if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) {
|
||||||
|
nvmet_auth_sq_free(req->sq);
|
||||||
|
nvmet_ctrl_fatal_error(ctrl);
|
||||||
|
}
|
||||||
|
}
|
@ -82,7 +82,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
|
|||||||
nvmet_req_complete(req, status);
|
nvmet_req_complete(req, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
|
u16 nvmet_parse_fabrics_admin_cmd(struct nvmet_req *req)
|
||||||
{
|
{
|
||||||
struct nvme_command *cmd = req->cmd;
|
struct nvme_command *cmd = req->cmd;
|
||||||
|
|
||||||
@ -93,6 +93,37 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
|
|||||||
case nvme_fabrics_type_property_get:
|
case nvme_fabrics_type_property_get:
|
||||||
req->execute = nvmet_execute_prop_get;
|
req->execute = nvmet_execute_prop_get;
|
||||||
break;
|
break;
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
case nvme_fabrics_type_auth_send:
|
||||||
|
req->execute = nvmet_execute_auth_send;
|
||||||
|
break;
|
||||||
|
case nvme_fabrics_type_auth_receive:
|
||||||
|
req->execute = nvmet_execute_auth_receive;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
pr_debug("received unknown capsule type 0x%x\n",
|
||||||
|
cmd->fabrics.fctype);
|
||||||
|
req->error_loc = offsetof(struct nvmf_common_command, fctype);
|
||||||
|
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req)
|
||||||
|
{
|
||||||
|
struct nvme_command *cmd = req->cmd;
|
||||||
|
|
||||||
|
switch (cmd->fabrics.fctype) {
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
case nvme_fabrics_type_auth_send:
|
||||||
|
req->execute = nvmet_execute_auth_send;
|
||||||
|
break;
|
||||||
|
case nvme_fabrics_type_auth_receive:
|
||||||
|
req->execute = nvmet_execute_auth_receive;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
pr_debug("received unknown capsule type 0x%x\n",
|
pr_debug("received unknown capsule type 0x%x\n",
|
||||||
cmd->fabrics.fctype);
|
cmd->fabrics.fctype);
|
||||||
@ -173,6 +204,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
|||||||
struct nvmf_connect_data *d;
|
struct nvmf_connect_data *d;
|
||||||
struct nvmet_ctrl *ctrl = NULL;
|
struct nvmet_ctrl *ctrl = NULL;
|
||||||
u16 status = 0;
|
u16 status = 0;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
|
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
|
||||||
return;
|
return;
|
||||||
@ -215,18 +247,32 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
|||||||
|
|
||||||
uuid_copy(&ctrl->hostid, &d->hostid);
|
uuid_copy(&ctrl->hostid, &d->hostid);
|
||||||
|
|
||||||
|
ret = nvmet_setup_auth(ctrl);
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_err("Failed to setup authentication, error %d\n", ret);
|
||||||
|
nvmet_ctrl_put(ctrl);
|
||||||
|
if (ret == -EPERM)
|
||||||
|
status = (NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR);
|
||||||
|
else
|
||||||
|
status = NVME_SC_INTERNAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
status = nvmet_install_queue(ctrl, req);
|
status = nvmet_install_queue(ctrl, req);
|
||||||
if (status) {
|
if (status) {
|
||||||
nvmet_ctrl_put(ctrl);
|
nvmet_ctrl_put(ctrl);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_info("creating %s controller %d for subsystem %s for NQN %s%s.\n",
|
pr_info("creating %s controller %d for subsystem %s for NQN %s%s%s.\n",
|
||||||
nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm",
|
nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm",
|
||||||
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
|
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
|
||||||
ctrl->pi_support ? " T10-PI is enabled" : "");
|
ctrl->pi_support ? " T10-PI is enabled" : "",
|
||||||
|
nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : "");
|
||||||
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||||
|
|
||||||
|
if (nvmet_has_auth(ctrl))
|
||||||
|
nvmet_init_auth(ctrl, req);
|
||||||
out:
|
out:
|
||||||
kfree(d);
|
kfree(d);
|
||||||
complete:
|
complete:
|
||||||
@ -286,6 +332,9 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
|
|||||||
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||||
|
|
||||||
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
|
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
|
||||||
|
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||||
|
if (nvmet_has_auth(ctrl))
|
||||||
|
nvmet_init_auth(ctrl, req);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
kfree(d);
|
kfree(d);
|
||||||
|
@ -424,9 +424,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
|
|||||||
{
|
{
|
||||||
if (ctrl->ctrl.queue_count > 1) {
|
if (ctrl->ctrl.queue_count > 1) {
|
||||||
nvme_stop_queues(&ctrl->ctrl);
|
nvme_stop_queues(&ctrl->ctrl);
|
||||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
nvme_cancel_tagset(&ctrl->ctrl);
|
||||||
nvme_cancel_request, &ctrl->ctrl);
|
|
||||||
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
|
|
||||||
nvme_loop_destroy_io_queues(ctrl);
|
nvme_loop_destroy_io_queues(ctrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -434,9 +432,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
|
|||||||
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
|
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
|
||||||
nvme_shutdown_ctrl(&ctrl->ctrl);
|
nvme_shutdown_ctrl(&ctrl->ctrl);
|
||||||
|
|
||||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
nvme_cancel_admin_tagset(&ctrl->ctrl);
|
||||||
nvme_cancel_request, &ctrl->ctrl);
|
|
||||||
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
|
|
||||||
nvme_loop_destroy_admin_queue(ctrl);
|
nvme_loop_destroy_admin_queue(ctrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,6 +108,19 @@ struct nvmet_sq {
|
|||||||
u16 size;
|
u16 size;
|
||||||
u32 sqhd;
|
u32 sqhd;
|
||||||
bool sqhd_disabled;
|
bool sqhd_disabled;
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
struct delayed_work auth_expired_work;
|
||||||
|
bool authenticated;
|
||||||
|
u16 dhchap_tid;
|
||||||
|
u16 dhchap_status;
|
||||||
|
int dhchap_step;
|
||||||
|
u8 *dhchap_c1;
|
||||||
|
u8 *dhchap_c2;
|
||||||
|
u32 dhchap_s1;
|
||||||
|
u32 dhchap_s2;
|
||||||
|
u8 *dhchap_skey;
|
||||||
|
int dhchap_skey_len;
|
||||||
|
#endif
|
||||||
struct completion free_done;
|
struct completion free_done;
|
||||||
struct completion confirm_done;
|
struct completion confirm_done;
|
||||||
};
|
};
|
||||||
@ -209,6 +222,15 @@ struct nvmet_ctrl {
|
|||||||
u64 err_counter;
|
u64 err_counter;
|
||||||
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
|
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
|
||||||
bool pi_support;
|
bool pi_support;
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
struct nvme_dhchap_key *host_key;
|
||||||
|
struct nvme_dhchap_key *ctrl_key;
|
||||||
|
u8 shash_id;
|
||||||
|
struct crypto_kpp *dh_tfm;
|
||||||
|
u8 dh_gid;
|
||||||
|
u8 *dh_key;
|
||||||
|
size_t dh_keysize;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nvmet_subsys {
|
struct nvmet_subsys {
|
||||||
@ -271,6 +293,12 @@ static inline struct nvmet_subsys *namespaces_to_subsys(
|
|||||||
|
|
||||||
struct nvmet_host {
|
struct nvmet_host {
|
||||||
struct config_group group;
|
struct config_group group;
|
||||||
|
u8 *dhchap_secret;
|
||||||
|
u8 *dhchap_ctrl_secret;
|
||||||
|
u8 dhchap_key_hash;
|
||||||
|
u8 dhchap_ctrl_key_hash;
|
||||||
|
u8 dhchap_hash_id;
|
||||||
|
u8 dhchap_dhgroup_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct nvmet_host *to_host(struct config_item *item)
|
static inline struct nvmet_host *to_host(struct config_item *item)
|
||||||
@ -420,7 +448,8 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
|
|||||||
u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req);
|
u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req);
|
||||||
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
|
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
|
||||||
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
|
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
|
||||||
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
|
u16 nvmet_parse_fabrics_admin_cmd(struct nvmet_req *req);
|
||||||
|
u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req);
|
||||||
|
|
||||||
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
|
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
|
||||||
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
|
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
|
||||||
@ -668,4 +697,48 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
|
|||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||||
|
void nvmet_execute_auth_send(struct nvmet_req *req);
|
||||||
|
void nvmet_execute_auth_receive(struct nvmet_req *req);
|
||||||
|
int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
|
||||||
|
bool set_ctrl);
|
||||||
|
int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash);
|
||||||
|
int nvmet_setup_auth(struct nvmet_ctrl *ctrl);
|
||||||
|
void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req);
|
||||||
|
void nvmet_destroy_auth(struct nvmet_ctrl *ctrl);
|
||||||
|
void nvmet_auth_sq_free(struct nvmet_sq *sq);
|
||||||
|
int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id);
|
||||||
|
bool nvmet_check_auth_status(struct nvmet_req *req);
|
||||||
|
int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
|
||||||
|
unsigned int hash_len);
|
||||||
|
int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
|
||||||
|
unsigned int hash_len);
|
||||||
|
static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
return ctrl->host_key != NULL;
|
||||||
|
}
|
||||||
|
int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
|
||||||
|
u8 *buf, int buf_size);
|
||||||
|
int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
||||||
|
u8 *buf, int buf_size);
|
||||||
|
#else
|
||||||
|
static inline int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
static inline void nvmet_init_auth(struct nvmet_ctrl *ctrl,
|
||||||
|
struct nvmet_req *req) {};
|
||||||
|
static inline void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) {};
|
||||||
|
static inline void nvmet_auth_sq_free(struct nvmet_sq *sq) {};
|
||||||
|
static inline bool nvmet_check_auth_status(struct nvmet_req *req)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; }
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _NVMET_H */
|
#endif /* _NVMET_H */
|
||||||
|
@ -1839,7 +1839,8 @@ static int __init nvmet_tcp_init(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0);
|
nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq",
|
||||||
|
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
|
||||||
if (!nvmet_tcp_wq)
|
if (!nvmet_tcp_wq)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
@ -1725,7 +1725,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
|
|||||||
dasd_put_device(device);
|
dasd_put_device(device);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check for for attention message */
|
/* check for attention message */
|
||||||
if (scsw_dstat(&irb->scsw) & DEV_STAT_ATTENTION) {
|
if (scsw_dstat(&irb->scsw) & DEV_STAT_ATTENTION) {
|
||||||
device = dasd_device_from_cdev_locked(cdev);
|
device = dasd_device_from_cdev_locked(cdev);
|
||||||
if (!IS_ERR(device)) {
|
if (!IS_ERR(device)) {
|
||||||
|
@ -639,6 +639,7 @@ static void dasd_diag_setup_blk_queue(struct dasd_block *block)
|
|||||||
/* With page sized segments each segment can be translated into one idaw/tidaw */
|
/* With page sized segments each segment can be translated into one idaw/tidaw */
|
||||||
blk_queue_max_segment_size(q, PAGE_SIZE);
|
blk_queue_max_segment_size(q, PAGE_SIZE);
|
||||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||||
|
blk_queue_dma_alignment(q, PAGE_SIZE - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int dasd_diag_pe_handler(struct dasd_device *device,
|
static int dasd_diag_pe_handler(struct dasd_device *device,
|
||||||
|
@ -6626,6 +6626,7 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block)
|
|||||||
/* With page sized segments each segment can be translated into one idaw/tidaw */
|
/* With page sized segments each segment can be translated into one idaw/tidaw */
|
||||||
blk_queue_max_segment_size(q, PAGE_SIZE);
|
blk_queue_max_segment_size(q, PAGE_SIZE);
|
||||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||||
|
blk_queue_dma_alignment(q, PAGE_SIZE - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ccw_driver dasd_eckd_driver = {
|
static struct ccw_driver dasd_eckd_driver = {
|
||||||
|
@ -863,7 +863,7 @@ dcssblk_submit_bio(struct bio *bio)
|
|||||||
unsigned long source_addr;
|
unsigned long source_addr;
|
||||||
unsigned long bytes_done;
|
unsigned long bytes_done;
|
||||||
|
|
||||||
blk_queue_split(&bio);
|
bio = bio_split_to_limits(bio);
|
||||||
|
|
||||||
bytes_done = 0;
|
bytes_done = 0;
|
||||||
dev_info = bio->bi_bdev->bd_disk->private_data;
|
dev_info = bio->bi_bdev->bd_disk->private_data;
|
||||||
|
@ -718,6 +718,8 @@ static inline void ahash_request_set_crypt(struct ahash_request *req,
|
|||||||
struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
|
struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
|
||||||
u32 mask);
|
u32 mask);
|
||||||
|
|
||||||
|
int crypto_has_shash(const char *alg_name, u32 type, u32 mask);
|
||||||
|
|
||||||
static inline struct crypto_tfm *crypto_shash_tfm(struct crypto_shash *tfm)
|
static inline struct crypto_tfm *crypto_shash_tfm(struct crypto_shash *tfm)
|
||||||
{
|
{
|
||||||
return &tfm->base;
|
return &tfm->base;
|
||||||
|
@ -104,6 +104,8 @@ struct kpp_alg {
|
|||||||
*/
|
*/
|
||||||
struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask);
|
struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask);
|
||||||
|
|
||||||
|
int crypto_has_kpp(const char *alg_name, u32 type, u32 mask);
|
||||||
|
|
||||||
static inline struct crypto_tfm *crypto_kpp_tfm(struct crypto_kpp *tfm)
|
static inline struct crypto_tfm *crypto_kpp_tfm(struct crypto_kpp *tfm)
|
||||||
{
|
{
|
||||||
return &tfm->base;
|
return &tfm->base;
|
||||||
|
16
include/linux/base64.h
Normal file
16
include/linux/base64.h
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* base64 encoding, lifted from fs/crypto/fname.c.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _LINUX_BASE64_H
|
||||||
|
#define _LINUX_BASE64_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
|
||||||
|
|
||||||
|
int base64_encode(const u8 *src, int len, char *dst);
|
||||||
|
int base64_decode(const char *src, int len, u8 *dst);
|
||||||
|
|
||||||
|
#endif /* _LINUX_BASE64_H */
|
@ -140,6 +140,8 @@ struct gendisk {
|
|||||||
struct request_queue *queue;
|
struct request_queue *queue;
|
||||||
void *private_data;
|
void *private_data;
|
||||||
|
|
||||||
|
struct bio_set bio_split;
|
||||||
|
|
||||||
int flags;
|
int flags;
|
||||||
unsigned long state;
|
unsigned long state;
|
||||||
#define GD_NEED_PART_SCAN 0
|
#define GD_NEED_PART_SCAN 0
|
||||||
@ -531,7 +533,6 @@ struct request_queue {
|
|||||||
|
|
||||||
struct blk_mq_tag_set *tag_set;
|
struct blk_mq_tag_set *tag_set;
|
||||||
struct list_head tag_set_list;
|
struct list_head tag_set_list;
|
||||||
struct bio_set bio_split;
|
|
||||||
|
|
||||||
struct dentry *debugfs_dir;
|
struct dentry *debugfs_dir;
|
||||||
struct dentry *sched_debugfs_dir;
|
struct dentry *sched_debugfs_dir;
|
||||||
@ -864,9 +865,9 @@ void blk_request_module(dev_t devt);
|
|||||||
extern int blk_register_queue(struct gendisk *disk);
|
extern int blk_register_queue(struct gendisk *disk);
|
||||||
extern void blk_unregister_queue(struct gendisk *disk);
|
extern void blk_unregister_queue(struct gendisk *disk);
|
||||||
void submit_bio_noacct(struct bio *bio);
|
void submit_bio_noacct(struct bio *bio);
|
||||||
|
struct bio *bio_split_to_limits(struct bio *bio);
|
||||||
|
|
||||||
extern int blk_lld_busy(struct request_queue *q);
|
extern int blk_lld_busy(struct request_queue *q);
|
||||||
extern void blk_queue_split(struct bio **);
|
|
||||||
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
|
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
|
||||||
extern void blk_queue_exit(struct request_queue *q);
|
extern void blk_queue_exit(struct request_queue *q);
|
||||||
extern void blk_sync_queue(struct request_queue *q);
|
extern void blk_sync_queue(struct request_queue *q);
|
||||||
|
41
include/linux/nvme-auth.h
Normal file
41
include/linux/nvme-auth.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2021 Hannes Reinecke, SUSE Software Solutions
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _NVME_AUTH_H
|
||||||
|
#define _NVME_AUTH_H
|
||||||
|
|
||||||
|
#include <crypto/kpp.h>
|
||||||
|
|
||||||
|
struct nvme_dhchap_key {
|
||||||
|
u8 *key;
|
||||||
|
size_t len;
|
||||||
|
u8 hash;
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 nvme_auth_get_seqnum(void);
|
||||||
|
const char *nvme_auth_dhgroup_name(u8 dhgroup_id);
|
||||||
|
const char *nvme_auth_dhgroup_kpp(u8 dhgroup_id);
|
||||||
|
u8 nvme_auth_dhgroup_id(const char *dhgroup_name);
|
||||||
|
|
||||||
|
const char *nvme_auth_hmac_name(u8 hmac_id);
|
||||||
|
const char *nvme_auth_digest_name(u8 hmac_id);
|
||||||
|
size_t nvme_auth_hmac_hash_len(u8 hmac_id);
|
||||||
|
u8 nvme_auth_hmac_id(const char *hmac_name);
|
||||||
|
|
||||||
|
struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
|
||||||
|
u8 key_hash);
|
||||||
|
void nvme_auth_free_key(struct nvme_dhchap_key *key);
|
||||||
|
u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn);
|
||||||
|
int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key);
|
||||||
|
int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
|
||||||
|
u8 *challenge, u8 *aug, size_t hlen);
|
||||||
|
int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid);
|
||||||
|
int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm,
|
||||||
|
u8 *host_key, size_t host_key_len);
|
||||||
|
int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
|
||||||
|
u8 *ctrl_key, size_t ctrl_key_len,
|
||||||
|
u8 *sess_key, size_t sess_key_len);
|
||||||
|
|
||||||
|
#endif /* _NVME_AUTH_H */
|
@ -19,6 +19,7 @@
|
|||||||
#define NVMF_TRSVCID_SIZE 32
|
#define NVMF_TRSVCID_SIZE 32
|
||||||
#define NVMF_TRADDR_SIZE 256
|
#define NVMF_TRADDR_SIZE 256
|
||||||
#define NVMF_TSAS_SIZE 256
|
#define NVMF_TSAS_SIZE 256
|
||||||
|
#define NVMF_AUTH_HASH_LEN 64
|
||||||
|
|
||||||
#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
|
#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
|
||||||
|
|
||||||
@ -711,6 +712,10 @@ enum {
|
|||||||
NVME_AER_VS = 7,
|
NVME_AER_VS = 7,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NVME_AER_ERROR_PERSIST_INT_ERR = 0x03,
|
||||||
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
NVME_AER_NOTICE_NS_CHANGED = 0x00,
|
NVME_AER_NOTICE_NS_CHANGED = 0x00,
|
||||||
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
|
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
|
||||||
@ -1369,6 +1374,8 @@ enum nvmf_capsule_command {
|
|||||||
nvme_fabrics_type_property_set = 0x00,
|
nvme_fabrics_type_property_set = 0x00,
|
||||||
nvme_fabrics_type_connect = 0x01,
|
nvme_fabrics_type_connect = 0x01,
|
||||||
nvme_fabrics_type_property_get = 0x04,
|
nvme_fabrics_type_property_get = 0x04,
|
||||||
|
nvme_fabrics_type_auth_send = 0x05,
|
||||||
|
nvme_fabrics_type_auth_receive = 0x06,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define nvme_fabrics_type_name(type) { type, #type }
|
#define nvme_fabrics_type_name(type) { type, #type }
|
||||||
@ -1376,7 +1383,9 @@ enum nvmf_capsule_command {
|
|||||||
__print_symbolic(type, \
|
__print_symbolic(type, \
|
||||||
nvme_fabrics_type_name(nvme_fabrics_type_property_set), \
|
nvme_fabrics_type_name(nvme_fabrics_type_property_set), \
|
||||||
nvme_fabrics_type_name(nvme_fabrics_type_connect), \
|
nvme_fabrics_type_name(nvme_fabrics_type_connect), \
|
||||||
nvme_fabrics_type_name(nvme_fabrics_type_property_get))
|
nvme_fabrics_type_name(nvme_fabrics_type_property_get), \
|
||||||
|
nvme_fabrics_type_name(nvme_fabrics_type_auth_send), \
|
||||||
|
nvme_fabrics_type_name(nvme_fabrics_type_auth_receive))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If not fabrics command, fctype will be ignored.
|
* If not fabrics command, fctype will be ignored.
|
||||||
@ -1472,6 +1481,11 @@ struct nvmf_connect_command {
|
|||||||
__u8 resv4[12];
|
__u8 resv4[12];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NVME_CONNECT_AUTHREQ_ASCR = (1 << 2),
|
||||||
|
NVME_CONNECT_AUTHREQ_ATR = (1 << 1),
|
||||||
|
};
|
||||||
|
|
||||||
struct nvmf_connect_data {
|
struct nvmf_connect_data {
|
||||||
uuid_t hostid;
|
uuid_t hostid;
|
||||||
__le16 cntlid;
|
__le16 cntlid;
|
||||||
@ -1506,6 +1520,200 @@ struct nvmf_property_get_command {
|
|||||||
__u8 resv4[16];
|
__u8 resv4[16];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_common_command {
|
||||||
|
__u8 opcode;
|
||||||
|
__u8 resv1;
|
||||||
|
__u16 command_id;
|
||||||
|
__u8 fctype;
|
||||||
|
__u8 resv2[19];
|
||||||
|
union nvme_data_ptr dptr;
|
||||||
|
__u8 resv3;
|
||||||
|
__u8 spsp0;
|
||||||
|
__u8 spsp1;
|
||||||
|
__u8 secp;
|
||||||
|
__le32 al_tl;
|
||||||
|
__u8 resv4[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_send_command {
|
||||||
|
__u8 opcode;
|
||||||
|
__u8 resv1;
|
||||||
|
__u16 command_id;
|
||||||
|
__u8 fctype;
|
||||||
|
__u8 resv2[19];
|
||||||
|
union nvme_data_ptr dptr;
|
||||||
|
__u8 resv3;
|
||||||
|
__u8 spsp0;
|
||||||
|
__u8 spsp1;
|
||||||
|
__u8 secp;
|
||||||
|
__le32 tl;
|
||||||
|
__u8 resv4[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_receive_command {
|
||||||
|
__u8 opcode;
|
||||||
|
__u8 resv1;
|
||||||
|
__u16 command_id;
|
||||||
|
__u8 fctype;
|
||||||
|
__u8 resv2[19];
|
||||||
|
union nvme_data_ptr dptr;
|
||||||
|
__u8 resv3;
|
||||||
|
__u8 spsp0;
|
||||||
|
__u8 spsp1;
|
||||||
|
__u8 secp;
|
||||||
|
__le32 al;
|
||||||
|
__u8 resv4[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Value for secp */
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER = 0xe9,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Defined value for auth_type */
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_COMMON_MESSAGES = 0x00,
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGES = 0x01,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Defined messages for auth_id */
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE = 0x00,
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE = 0x01,
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_REPLY = 0x02,
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1 = 0x03,
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 = 0x04,
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_FAILURE2 = 0xf0,
|
||||||
|
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1 = 0xf1,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_dhchap_protocol_descriptor {
|
||||||
|
__u8 authid;
|
||||||
|
__u8 rsvd;
|
||||||
|
__u8 halen;
|
||||||
|
__u8 dhlen;
|
||||||
|
__u8 idlist[60];
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_DHCHAP_AUTH_ID = 0x01,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Defined hash functions for DH-HMAC-CHAP authentication */
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_HASH_SHA256 = 0x01,
|
||||||
|
NVME_AUTH_HASH_SHA384 = 0x02,
|
||||||
|
NVME_AUTH_HASH_SHA512 = 0x03,
|
||||||
|
NVME_AUTH_HASH_INVALID = 0xff,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Defined Diffie-Hellman group identifiers for DH-HMAC-CHAP authentication */
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_DHGROUP_NULL = 0x00,
|
||||||
|
NVME_AUTH_DHGROUP_2048 = 0x01,
|
||||||
|
NVME_AUTH_DHGROUP_3072 = 0x02,
|
||||||
|
NVME_AUTH_DHGROUP_4096 = 0x03,
|
||||||
|
NVME_AUTH_DHGROUP_6144 = 0x04,
|
||||||
|
NVME_AUTH_DHGROUP_8192 = 0x05,
|
||||||
|
NVME_AUTH_DHGROUP_INVALID = 0xff,
|
||||||
|
};
|
||||||
|
|
||||||
|
union nvmf_auth_protocol {
|
||||||
|
struct nvmf_auth_dhchap_protocol_descriptor dhchap;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_dhchap_negotiate_data {
|
||||||
|
__u8 auth_type;
|
||||||
|
__u8 auth_id;
|
||||||
|
__le16 rsvd;
|
||||||
|
__le16 t_id;
|
||||||
|
__u8 sc_c;
|
||||||
|
__u8 napd;
|
||||||
|
union nvmf_auth_protocol auth_protocol[];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_dhchap_challenge_data {
|
||||||
|
__u8 auth_type;
|
||||||
|
__u8 auth_id;
|
||||||
|
__u16 rsvd1;
|
||||||
|
__le16 t_id;
|
||||||
|
__u8 hl;
|
||||||
|
__u8 rsvd2;
|
||||||
|
__u8 hashid;
|
||||||
|
__u8 dhgid;
|
||||||
|
__le16 dhvlen;
|
||||||
|
__le32 seqnum;
|
||||||
|
/* 'hl' bytes of challenge value */
|
||||||
|
__u8 cval[];
|
||||||
|
/* followed by 'dhvlen' bytes of DH value */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_dhchap_reply_data {
|
||||||
|
__u8 auth_type;
|
||||||
|
__u8 auth_id;
|
||||||
|
__le16 rsvd1;
|
||||||
|
__le16 t_id;
|
||||||
|
__u8 hl;
|
||||||
|
__u8 rsvd2;
|
||||||
|
__u8 cvalid;
|
||||||
|
__u8 rsvd3;
|
||||||
|
__le16 dhvlen;
|
||||||
|
__le32 seqnum;
|
||||||
|
/* 'hl' bytes of response data */
|
||||||
|
__u8 rval[];
|
||||||
|
/* followed by 'hl' bytes of Challenge value */
|
||||||
|
/* followed by 'dhvlen' bytes of DH value */
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_DHCHAP_RESPONSE_VALID = (1 << 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_dhchap_success1_data {
|
||||||
|
__u8 auth_type;
|
||||||
|
__u8 auth_id;
|
||||||
|
__le16 rsvd1;
|
||||||
|
__le16 t_id;
|
||||||
|
__u8 hl;
|
||||||
|
__u8 rsvd2;
|
||||||
|
__u8 rvalid;
|
||||||
|
__u8 rsvd3[7];
|
||||||
|
/* 'hl' bytes of response value if 'rvalid' is set */
|
||||||
|
__u8 rval[];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_dhchap_success2_data {
|
||||||
|
__u8 auth_type;
|
||||||
|
__u8 auth_id;
|
||||||
|
__le16 rsvd1;
|
||||||
|
__le16 t_id;
|
||||||
|
__u8 rsvd2[10];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvmf_auth_dhchap_failure_data {
|
||||||
|
__u8 auth_type;
|
||||||
|
__u8 auth_id;
|
||||||
|
__le16 rsvd1;
|
||||||
|
__le16 t_id;
|
||||||
|
__u8 rescode;
|
||||||
|
__u8 rescode_exp;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_REASON_FAILED = 0x01,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_FAILED = 0x01,
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_NOT_USABLE = 0x02,
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH = 0x03,
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE = 0x04,
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE = 0x05,
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD = 0x06,
|
||||||
|
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_MESSAGE = 0x07,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct nvme_dbbuf {
|
struct nvme_dbbuf {
|
||||||
__u8 opcode;
|
__u8 opcode;
|
||||||
__u8 flags;
|
__u8 flags;
|
||||||
@ -1549,6 +1757,9 @@ struct nvme_command {
|
|||||||
struct nvmf_connect_command connect;
|
struct nvmf_connect_command connect;
|
||||||
struct nvmf_property_set_command prop_set;
|
struct nvmf_property_set_command prop_set;
|
||||||
struct nvmf_property_get_command prop_get;
|
struct nvmf_property_get_command prop_get;
|
||||||
|
struct nvmf_auth_common_command auth_common;
|
||||||
|
struct nvmf_auth_send_command auth_send;
|
||||||
|
struct nvmf_auth_receive_command auth_receive;
|
||||||
struct nvme_dbbuf dbbuf;
|
struct nvme_dbbuf dbbuf;
|
||||||
struct nvme_directive_cmd directive;
|
struct nvme_directive_cmd directive;
|
||||||
};
|
};
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
#define UBLK_CMD_DEL_DEV 0x05
|
#define UBLK_CMD_DEL_DEV 0x05
|
||||||
#define UBLK_CMD_START_DEV 0x06
|
#define UBLK_CMD_START_DEV 0x06
|
||||||
#define UBLK_CMD_STOP_DEV 0x07
|
#define UBLK_CMD_STOP_DEV 0x07
|
||||||
|
#define UBLK_CMD_SET_PARAMS 0x08
|
||||||
|
#define UBLK_CMD_GET_PARAMS 0x09
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IO commands, issued by ublk server, and handled by ublk driver.
|
* IO commands, issued by ublk server, and handled by ublk driver.
|
||||||
@ -28,12 +30,21 @@
|
|||||||
* this IO request, request's handling result is committed to ublk
|
* this IO request, request's handling result is committed to ublk
|
||||||
* driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
|
* driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
|
||||||
* handled before completing io request.
|
* handled before completing io request.
|
||||||
|
*
|
||||||
|
* NEED_GET_DATA: only used for write requests to set io addr and copy data
|
||||||
|
* When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA
|
||||||
|
* command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA.
|
||||||
|
*
|
||||||
|
* It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag
|
||||||
|
* while starting a ublk device.
|
||||||
*/
|
*/
|
||||||
#define UBLK_IO_FETCH_REQ 0x20
|
#define UBLK_IO_FETCH_REQ 0x20
|
||||||
#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21
|
#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21
|
||||||
|
#define UBLK_IO_NEED_GET_DATA 0x22
|
||||||
|
|
||||||
/* only ABORT means that no re-fetch */
|
/* only ABORT means that no re-fetch */
|
||||||
#define UBLK_IO_RES_OK 0
|
#define UBLK_IO_RES_OK 0
|
||||||
|
#define UBLK_IO_RES_NEED_GET_DATA 1
|
||||||
#define UBLK_IO_RES_ABORT (-ENODEV)
|
#define UBLK_IO_RES_ABORT (-ENODEV)
|
||||||
|
|
||||||
#define UBLKSRV_CMD_BUF_OFFSET 0
|
#define UBLKSRV_CMD_BUF_OFFSET 0
|
||||||
@ -54,6 +65,15 @@
|
|||||||
*/
|
*/
|
||||||
#define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1)
|
#define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* User should issue io cmd again for write requests to
|
||||||
|
* set io buffer address and copy data from bio vectors
|
||||||
|
* to the userspace io buffer.
|
||||||
|
*
|
||||||
|
* In this mode, task_work is not used.
|
||||||
|
*/
|
||||||
|
#define UBLK_F_NEED_GET_DATA (1UL << 2)
|
||||||
|
|
||||||
/* device state */
|
/* device state */
|
||||||
#define UBLK_S_DEV_DEAD 0
|
#define UBLK_S_DEV_DEAD 0
|
||||||
#define UBLK_S_DEV_LIVE 1
|
#define UBLK_S_DEV_LIVE 1
|
||||||
@ -78,22 +98,23 @@ struct ublksrv_ctrl_cmd {
|
|||||||
struct ublksrv_ctrl_dev_info {
|
struct ublksrv_ctrl_dev_info {
|
||||||
__u16 nr_hw_queues;
|
__u16 nr_hw_queues;
|
||||||
__u16 queue_depth;
|
__u16 queue_depth;
|
||||||
__u16 block_size;
|
|
||||||
__u16 state;
|
__u16 state;
|
||||||
|
__u16 pad0;
|
||||||
|
|
||||||
__u32 rq_max_blocks;
|
__u32 max_io_buf_bytes;
|
||||||
__u32 dev_id;
|
__u32 dev_id;
|
||||||
|
|
||||||
__u64 dev_blocks;
|
|
||||||
|
|
||||||
__s32 ublksrv_pid;
|
__s32 ublksrv_pid;
|
||||||
__s32 reserved0;
|
__u32 pad1;
|
||||||
|
|
||||||
__u64 flags;
|
__u64 flags;
|
||||||
__u64 flags_reserved;
|
|
||||||
|
|
||||||
/* For ublksrv internal use, invisible to ublk driver */
|
/* For ublksrv internal use, invisible to ublk driver */
|
||||||
__u64 ublksrv_flags;
|
__u64 ublksrv_flags;
|
||||||
__u64 reserved1[9];
|
|
||||||
|
__u64 reserved0;
|
||||||
|
__u64 reserved1;
|
||||||
|
__u64 reserved2;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define UBLK_IO_OP_READ 0
|
#define UBLK_IO_OP_READ 0
|
||||||
@ -158,4 +179,49 @@ struct ublksrv_io_cmd {
|
|||||||
__u64 addr;
|
__u64 addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ublk_param_basic {
|
||||||
|
#define UBLK_ATTR_READ_ONLY (1 << 0)
|
||||||
|
#define UBLK_ATTR_ROTATIONAL (1 << 1)
|
||||||
|
#define UBLK_ATTR_VOLATILE_CACHE (1 << 2)
|
||||||
|
#define UBLK_ATTR_FUA (1 << 3)
|
||||||
|
__u32 attrs;
|
||||||
|
__u8 logical_bs_shift;
|
||||||
|
__u8 physical_bs_shift;
|
||||||
|
__u8 io_opt_shift;
|
||||||
|
__u8 io_min_shift;
|
||||||
|
|
||||||
|
__u32 max_sectors;
|
||||||
|
__u32 chunk_sectors;
|
||||||
|
|
||||||
|
__u64 dev_sectors;
|
||||||
|
__u64 virt_boundary_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ublk_param_discard {
|
||||||
|
__u32 discard_alignment;
|
||||||
|
|
||||||
|
__u32 discard_granularity;
|
||||||
|
__u32 max_discard_sectors;
|
||||||
|
|
||||||
|
__u32 max_write_zeroes_sectors;
|
||||||
|
__u16 max_discard_segments;
|
||||||
|
__u16 reserved0;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ublk_params {
|
||||||
|
/*
|
||||||
|
* Total length of parameters, userspace has to set 'len' for both
|
||||||
|
* SET_PARAMS and GET_PARAMS command, and driver may update len
|
||||||
|
* if two sides use different version of 'ublk_params', same with
|
||||||
|
* 'types' fields.
|
||||||
|
*/
|
||||||
|
__u32 len;
|
||||||
|
#define UBLK_PARAM_TYPE_BASIC (1 << 0)
|
||||||
|
#define UBLK_PARAM_TYPE_DISCARD (1 << 1)
|
||||||
|
__u32 types; /* types of parameter included */
|
||||||
|
|
||||||
|
struct ublk_param_basic basic;
|
||||||
|
struct ublk_param_discard discard;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -46,7 +46,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
|
|||||||
bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
|
bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
|
||||||
list_sort.o uuid.o iov_iter.o clz_ctz.o \
|
list_sort.o uuid.o iov_iter.o clz_ctz.o \
|
||||||
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
|
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
|
||||||
percpu-refcount.o rhashtable.o \
|
percpu-refcount.o rhashtable.o base64.o \
|
||||||
once.o refcount.o usercopy.o errseq.o bucket_locks.o \
|
once.o refcount.o usercopy.o errseq.o bucket_locks.o \
|
||||||
generic-radix-tree.o
|
generic-radix-tree.o
|
||||||
obj-$(CONFIG_STRING_SELFTEST) += test_string.o
|
obj-$(CONFIG_STRING_SELFTEST) += test_string.o
|
||||||
|
103
lib/base64.c
Normal file
103
lib/base64.c
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* base64.c - RFC4648-compliant base64 encoding
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Hannes Reinecke, SUSE
|
||||||
|
*
|
||||||
|
* Based on the base64url routines from fs/crypto/fname.c
|
||||||
|
* (which are using the URL-safe base64 encoding),
|
||||||
|
* modified to use the standard coding table from RFC4648 section 4.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/export.h>
|
||||||
|
#include <linux/string.h>
|
||||||
|
#include <linux/base64.h>
|
||||||
|
|
||||||
|
static const char base64_table[65] =
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* base64_encode() - base64-encode some binary data
|
||||||
|
* @src: the binary data to encode
|
||||||
|
* @srclen: the length of @src in bytes
|
||||||
|
* @dst: (output) the base64-encoded string. Not NUL-terminated.
|
||||||
|
*
|
||||||
|
* Encodes data using base64 encoding, i.e. the "Base 64 Encoding" specified
|
||||||
|
* by RFC 4648, including the '='-padding.
|
||||||
|
*
|
||||||
|
* Return: the length of the resulting base64-encoded string in bytes.
|
||||||
|
*/
|
||||||
|
int base64_encode(const u8 *src, int srclen, char *dst)
|
||||||
|
{
|
||||||
|
u32 ac = 0;
|
||||||
|
int bits = 0;
|
||||||
|
int i;
|
||||||
|
char *cp = dst;
|
||||||
|
|
||||||
|
for (i = 0; i < srclen; i++) {
|
||||||
|
ac = (ac << 8) | src[i];
|
||||||
|
bits += 8;
|
||||||
|
do {
|
||||||
|
bits -= 6;
|
||||||
|
*cp++ = base64_table[(ac >> bits) & 0x3f];
|
||||||
|
} while (bits >= 6);
|
||||||
|
}
|
||||||
|
if (bits) {
|
||||||
|
*cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
|
||||||
|
bits -= 6;
|
||||||
|
}
|
||||||
|
while (bits < 0) {
|
||||||
|
*cp++ = '=';
|
||||||
|
bits += 2;
|
||||||
|
}
|
||||||
|
return cp - dst;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(base64_encode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* base64_decode() - base64-decode a string
|
||||||
|
* @src: the string to decode. Doesn't need to be NUL-terminated.
|
||||||
|
* @srclen: the length of @src in bytes
|
||||||
|
* @dst: (output) the decoded binary data
|
||||||
|
*
|
||||||
|
* Decodes a string using base64 encoding, i.e. the "Base 64 Encoding"
|
||||||
|
* specified by RFC 4648, including the '='-padding.
|
||||||
|
*
|
||||||
|
* This implementation hasn't been optimized for performance.
|
||||||
|
*
|
||||||
|
* Return: the length of the resulting decoded binary data in bytes,
|
||||||
|
* or -1 if the string isn't a valid base64 string.
|
||||||
|
*/
|
||||||
|
int base64_decode(const char *src, int srclen, u8 *dst)
|
||||||
|
{
|
||||||
|
u32 ac = 0;
|
||||||
|
int bits = 0;
|
||||||
|
int i;
|
||||||
|
u8 *bp = dst;
|
||||||
|
|
||||||
|
for (i = 0; i < srclen; i++) {
|
||||||
|
const char *p = strchr(base64_table, src[i]);
|
||||||
|
|
||||||
|
if (src[i] == '=') {
|
||||||
|
ac = (ac << 6);
|
||||||
|
bits += 6;
|
||||||
|
if (bits >= 8)
|
||||||
|
bits -= 8;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (p == NULL || src[i] == 0)
|
||||||
|
return -1;
|
||||||
|
ac = (ac << 6) | (p - base64_table);
|
||||||
|
bits += 6;
|
||||||
|
if (bits >= 8) {
|
||||||
|
bits -= 8;
|
||||||
|
*bp++ = (u8)(ac >> bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ac & ((1 << bits) - 1))
|
||||||
|
return -1;
|
||||||
|
return bp - dst;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(base64_decode);
|
Loading…
x
Reference in New Issue
Block a user