for-5.20/block-2022-08-04
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmLsRfkQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpj43EADBydQhe7nQHH65gecqvttnio2GqEmcbozt lKFQlPPd3SHGMAJjSdR1dIwqtPsJ8q6xZXH+TjHhLXb2kgVu+TQ31krNHIqBwE14 s7SsgGRgvopA46lSf/ls18/8sh6Yz1NgI39YcMVPjvkbLaVFK7zRkL9OSp4RQCwH u/IIHJmV415EeF6QNTgABBel/gEIPBLsvwOxTBIkzDOyUohtExZPYj83MDm7jdr3 jsTUd2MiumNMh7ziMJIp1iN32nQOtIKtwWZaMHDCzfU/IUnBSmh2nj9oXr3+vcwo IsBMDUfUj9Eig5QQ/XcVIrFezi0GnunpBhScXPqL+dxPN812lzxNjkx6PsC+rPn8 mWmXoaeK1ayoyotdHJlmINNmWUSCkOMwVnA2r1c4Hp4cQS5vRUtkKcpNLTpMhk4I OwQ3bjt9mA//WlH+apbhJqXqxjcoBwCwMoveJ4mHVtku9lo+JJAKVGdUs17QjZkC NxACP1MtBcXy1hurNQf14oH5C0Hyg4TBJShPauKmrqGtOFnbOAdX2qIhldvyNfH1 l9cOvGNSgbQ6FLD6MVto6dC/KYOEM3LelVxgNB/80GbSmGwj88Kd/nzQLYFP89JJ 0Wkt14mSkm82gabOvNqXGG8P8hLb/+v6sp4qZv0mf+op0xmb4FB5eaZvoceptVzM 3Z+hmT7MfA== =pgNf -----END PGP SIGNATURE----- Merge tag 'for-5.20/block-2022-08-04' of git://git.kernel.dk/linux-block Pull block driver updates from Jens Axboe: - NVMe pull requests via Christoph: - add support for In-Band authentication (Hannes Reinecke) - handle the persistent internal error AER (Michael Kelley) - use in-capsule data for TCP I/O queue connect (Caleb Sander) - remove timeout for getting RDMA-CM established event (Israel Rukshin) - misc cleanups (Joel Granados, Sagi Grimberg, Chaitanya Kulkarni, Guixin Liu, Xiang wangx) - use command_id instead of req->tag in trace_nvme_complete_rq() (Bean Huo) - various fixes for the new authentication code (Lukas Bulwahn, Dan Carpenter, Colin Ian King, Chaitanya Kulkarni, Hannes Reinecke) - small cleanups (Liu Song, Christoph Hellwig) - restore compat_ioctl support (Nick Bowler) - make a nvmet-tcp workqueue lockdep-safe (Sagi Grimberg) - enable generic interface (/dev/ngXnY) for unknown command sets (Joel Granados, Christoph Hellwig) - don't always build constants.o (Christoph Hellwig) - print the command name of aborted commands (Christoph Hellwig) - MD pull requests via Song: - Improve raid5 lock contention, by Logan Gunthorpe. - Misc fixes to raid5, by Logan Gunthorpe. - Fix race condition with md_reap_sync_thread(), by Guoqing Jiang. - Fix potential deadlock with raid5_quiesce and raid5_get_active_stripe, by Logan Gunthorpe. - Refactoring md_alloc(), by Christoph" - Fix md disk_name lifetime problems, by Christoph Hellwig - Convert prepare_to_wait() to wait_woken() api, by Logan Gunthorpe; - Fix sectors_to_do bitmap issue, by Logan Gunthorpe. - Work on unifying the null_blk module parameters and configfs API (Vincent) - drbd bitmap IO error fix (Lars) - Set of rnbd fixes (Guoqing, Md Haris) - Remove experimental marker on bcache async device registration (Coly) - Series from cleaning up the bio splitting (Christoph) - Removal of the sx8 block driver. This hardware never really widespread, and it didn't receive a lot of attention after the initial merge of it back in 2005 (Christoph) - A few fixes for s390 dasd (Eric, Jiang) - Followup set of fixes for ublk (Ming) - Support for UBLK_IO_NEED_GET_DATA for ublk (ZiyangZhang) - Fixes for the dio dma alignment (Keith) - Misc fixes and cleanups (Ming, Yu, Dan, Christophe * tag 'for-5.20/block-2022-08-04' of git://git.kernel.dk/linux-block: (136 commits) s390/dasd: Establish DMA alignment s390/dasd: drop unexpected word 'for' in comments ublk_drv: add support for UBLK_IO_NEED_GET_DATA ublk_cmd.h: add one new ublk command: UBLK_IO_NEED_GET_DATA ublk_drv: cleanup ublksrv_ctrl_dev_info ublk_drv: add SET_PARAMS/GET_PARAMS control command ublk_drv: fix ublk device leak in case that add_disk fails ublk_drv: cancel device even though disk isn't up block: fix leaking page ref on truncated direct io block: ensure bio_iov_add_page can't fail block: ensure iov_iter advances for added pages drivers:md:fix a potential use-after-free bug md/raid5: Ensure batch_last is released before sleeping for quiesce md/raid5: Move stripe_request_ctx up md/raid5: Drop unnecessary call to r5c_check_stripe_cache_usage() md/raid5: Make is_inactive_blocked() helper md/raid5: Refactor raid5_get_active_stripe() block: pass struct queue_limits to the bio splitting helpers block: move bio_allowed_max_sectors to blk-merge.c block: move the call to get_max_io_size out of blk_bio_segment_split ...
This commit is contained in:
commit
fa9db655d0
@ -72,6 +72,28 @@ submit_queues=[1..nr_cpus]: Default: 1
|
||||
hw_queue_depth=[0..qdepth]: Default: 64
|
||||
The hardware queue depth of the device.
|
||||
|
||||
memory_backed=[0/1]: Default: 0
|
||||
Whether or not to use a memory buffer to respond to IO requests
|
||||
|
||||
= =============================================
|
||||
0 Transfer no data in response to IO requests
|
||||
1 Use a memory buffer to respond to IO requests
|
||||
= =============================================
|
||||
|
||||
discard=[0/1]: Default: 0
|
||||
Support discard operations (requires memory-backed null_blk device).
|
||||
|
||||
= =====================================
|
||||
0 Do not support discard operations
|
||||
1 Enable support for discard operations
|
||||
= =====================================
|
||||
|
||||
cache_size=[Size in MB]: Default: 0
|
||||
Cache size in MB for memory-backed device.
|
||||
|
||||
mbps=[Maximum bandwidth in MB/s]: Default: 0 (no limit)
|
||||
Bandwidth limit for device performance.
|
||||
|
||||
Multi-queue specific parameters
|
||||
-------------------------------
|
||||
|
||||
|
@ -14507,7 +14507,8 @@ S: Supported
|
||||
W: http://git.infradead.org/nvme.git
|
||||
T: git://git.infradead.org/nvme.git
|
||||
F: drivers/nvme/host/
|
||||
F: include/linux/nvme.h
|
||||
F: drivers/nvme/common/
|
||||
F: include/linux/nvme*
|
||||
F: include/uapi/linux/nvme_ioctl.h
|
||||
|
||||
NVM EXPRESS FC TRANSPORT DRIVERS
|
||||
@ -18838,6 +18839,7 @@ SOFTWARE RAID (Multiple Disks) SUPPORT
|
||||
M: Song Liu <song@kernel.org>
|
||||
L: linux-raid@vger.kernel.org
|
||||
S: Supported
|
||||
Q: https://patchwork.kernel.org/project/linux-raid/list/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/song/md.git
|
||||
F: drivers/md/Kconfig
|
||||
F: drivers/md/Makefile
|
||||
|
@ -134,7 +134,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
|
||||
iv = bip->bip_vec + bip->bip_vcnt;
|
||||
|
||||
if (bip->bip_vcnt &&
|
||||
bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev),
|
||||
bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
|
||||
&bip->bip_vec[bip->bip_vcnt - 1], offset))
|
||||
return 0;
|
||||
|
||||
|
51
block/bio.c
51
block/bio.c
@ -965,7 +965,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
* would create a gap, disallow it.
|
||||
*/
|
||||
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
if (bvec_gap_to_prev(q, bvec, offset))
|
||||
if (bvec_gap_to_prev(&q->limits, bvec, offset))
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1151,22 +1151,12 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
}
|
||||
|
||||
static void bio_put_pages(struct page **pages, size_t size, size_t off)
|
||||
{
|
||||
size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
put_page(pages[i]);
|
||||
}
|
||||
|
||||
static int bio_iov_add_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
bool same_page = false;
|
||||
|
||||
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
|
||||
if (WARN_ON_ONCE(bio_full(bio, len)))
|
||||
return -EINVAL;
|
||||
__bio_add_page(bio, page, len, offset);
|
||||
return 0;
|
||||
}
|
||||
@ -1209,8 +1199,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
||||
struct page **pages = (struct page **)bv;
|
||||
ssize_t size, left;
|
||||
unsigned len, i;
|
||||
unsigned len, i = 0;
|
||||
size_t offset;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Move page array up in the allocated memory for the bio vecs as far as
|
||||
@ -1227,32 +1218,40 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
* result to ensure the bio's total size is correct. The remainder of
|
||||
* the iov data will be picked up in the next bio iteration.
|
||||
*/
|
||||
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
|
||||
if (size > 0)
|
||||
size = iov_iter_get_pages(iter, pages, UINT_MAX - bio->bi_iter.bi_size,
|
||||
nr_pages, &offset);
|
||||
if (size > 0) {
|
||||
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
|
||||
size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev));
|
||||
if (unlikely(size <= 0))
|
||||
return size ? size : -EFAULT;
|
||||
} else
|
||||
nr_pages = 0;
|
||||
|
||||
if (unlikely(size <= 0)) {
|
||||
ret = size ? size : -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (left = size, i = 0; left > 0; left -= len, i++) {
|
||||
struct page *page = pages[i];
|
||||
int ret;
|
||||
|
||||
len = min_t(size_t, PAGE_SIZE - offset, left);
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
ret = bio_iov_add_zone_append_page(bio, page, len,
|
||||
offset);
|
||||
else
|
||||
ret = bio_iov_add_page(bio, page, len, offset);
|
||||
if (ret)
|
||||
break;
|
||||
} else
|
||||
bio_iov_add_page(bio, page, len, offset);
|
||||
|
||||
if (ret) {
|
||||
bio_put_pages(pages + i, left, offset);
|
||||
return ret;
|
||||
}
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
iov_iter_advance(iter, size);
|
||||
return 0;
|
||||
iov_iter_advance(iter, size - left);
|
||||
out:
|
||||
while (i < nr_pages)
|
||||
put_page(pages[i++]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -377,7 +377,6 @@ static void blk_timeout_work(struct work_struct *work)
|
||||
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
|
||||
GFP_KERNEL | __GFP_ZERO, node_id);
|
||||
@ -396,13 +395,9 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
||||
if (q->id < 0)
|
||||
goto fail_srcu;
|
||||
|
||||
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0);
|
||||
if (ret)
|
||||
goto fail_id;
|
||||
|
||||
q->stats = blk_alloc_queue_stats();
|
||||
if (!q->stats)
|
||||
goto fail_split;
|
||||
goto fail_id;
|
||||
|
||||
q->node = node_id;
|
||||
|
||||
@ -439,8 +434,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
||||
|
||||
fail_stats:
|
||||
blk_free_queue_stats(q->stats);
|
||||
fail_split:
|
||||
bioset_exit(&q->bio_split);
|
||||
fail_id:
|
||||
ida_free(&blk_queue_ida, q->id);
|
||||
fail_srcu:
|
||||
|
@ -82,7 +82,7 @@ static inline bool bio_will_gap(struct request_queue *q,
|
||||
bio_get_first_bvec(next, &nb);
|
||||
if (biovec_phys_mergeable(q, &pb, &nb))
|
||||
return false;
|
||||
return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
|
||||
return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
|
||||
}
|
||||
|
||||
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
|
||||
@ -95,23 +95,30 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
|
||||
return bio_will_gap(req->q, NULL, bio, req->bio);
|
||||
}
|
||||
|
||||
static struct bio *blk_bio_discard_split(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
struct bio_set *bs,
|
||||
unsigned *nsegs)
|
||||
/*
|
||||
* The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
|
||||
* is defined as 'unsigned int', meantime it has to be aligned to with the
|
||||
* logical block size, which is the minimum accepted unit by hardware.
|
||||
*/
|
||||
static unsigned int bio_allowed_max_sectors(struct queue_limits *lim)
|
||||
{
|
||||
return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static struct bio *bio_split_discard(struct bio *bio, struct queue_limits *lim,
|
||||
unsigned *nsegs, struct bio_set *bs)
|
||||
{
|
||||
unsigned int max_discard_sectors, granularity;
|
||||
int alignment;
|
||||
sector_t tmp;
|
||||
unsigned split_sectors;
|
||||
|
||||
*nsegs = 1;
|
||||
|
||||
/* Zero-sector (unknown) and one-sector granularities are the same. */
|
||||
granularity = max(q->limits.discard_granularity >> 9, 1U);
|
||||
granularity = max(lim->discard_granularity >> 9, 1U);
|
||||
|
||||
max_discard_sectors = min(q->limits.max_discard_sectors,
|
||||
bio_allowed_max_sectors(q));
|
||||
max_discard_sectors =
|
||||
min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
|
||||
max_discard_sectors -= max_discard_sectors % granularity;
|
||||
|
||||
if (unlikely(!max_discard_sectors)) {
|
||||
@ -128,9 +135,8 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
|
||||
* If the next starting sector would be misaligned, stop the discard at
|
||||
* the previous aligned sector.
|
||||
*/
|
||||
alignment = (q->limits.discard_alignment >> 9) % granularity;
|
||||
|
||||
tmp = bio->bi_iter.bi_sector + split_sectors - alignment;
|
||||
tmp = bio->bi_iter.bi_sector + split_sectors -
|
||||
((lim->discard_alignment >> 9) % granularity);
|
||||
tmp = sector_div(tmp, granularity);
|
||||
|
||||
if (split_sectors > tmp)
|
||||
@ -139,18 +145,15 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
|
||||
return bio_split(bio, split_sectors, GFP_NOIO, bs);
|
||||
}
|
||||
|
||||
static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
|
||||
struct bio *bio, struct bio_set *bs, unsigned *nsegs)
|
||||
static struct bio *bio_split_write_zeroes(struct bio *bio,
|
||||
struct queue_limits *lim, unsigned *nsegs, struct bio_set *bs)
|
||||
{
|
||||
*nsegs = 0;
|
||||
|
||||
if (!q->limits.max_write_zeroes_sectors)
|
||||
if (!lim->max_write_zeroes_sectors)
|
||||
return NULL;
|
||||
|
||||
if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
|
||||
if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
|
||||
return NULL;
|
||||
|
||||
return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
|
||||
return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -161,17 +164,17 @@ static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
|
||||
* requests that are submitted to a block device if the start of a bio is not
|
||||
* aligned to a physical block boundary.
|
||||
*/
|
||||
static inline unsigned get_max_io_size(struct request_queue *q,
|
||||
struct bio *bio)
|
||||
static inline unsigned get_max_io_size(struct bio *bio,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
|
||||
unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
|
||||
unsigned max_sectors = queue_max_sectors(q), start, end;
|
||||
unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
|
||||
unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
|
||||
unsigned max_sectors = lim->max_sectors, start, end;
|
||||
|
||||
if (q->limits.chunk_sectors) {
|
||||
if (lim->chunk_sectors) {
|
||||
max_sectors = min(max_sectors,
|
||||
blk_chunk_sectors_left(bio->bi_iter.bi_sector,
|
||||
q->limits.chunk_sectors));
|
||||
lim->chunk_sectors));
|
||||
}
|
||||
|
||||
start = bio->bi_iter.bi_sector & (pbs - 1);
|
||||
@ -181,11 +184,10 @@ static inline unsigned get_max_io_size(struct request_queue *q,
|
||||
return max_sectors & ~(lbs - 1);
|
||||
}
|
||||
|
||||
static inline unsigned get_max_segment_size(const struct request_queue *q,
|
||||
struct page *start_page,
|
||||
unsigned long offset)
|
||||
static inline unsigned get_max_segment_size(struct queue_limits *lim,
|
||||
struct page *start_page, unsigned long offset)
|
||||
{
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
unsigned long mask = lim->seg_boundary_mask;
|
||||
|
||||
offset = mask & (page_to_phys(start_page) + offset);
|
||||
|
||||
@ -194,12 +196,12 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
|
||||
* on 32bit arch, use queue's max segment size when that happens.
|
||||
*/
|
||||
return min_not_zero(mask - offset + 1,
|
||||
(unsigned long)queue_max_segment_size(q));
|
||||
(unsigned long)lim->max_segment_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* bvec_split_segs - verify whether or not a bvec should be split in the middle
|
||||
* @q: [in] request queue associated with the bio associated with @bv
|
||||
* @lim: [in] queue limits to split based on
|
||||
* @bv: [in] bvec to examine
|
||||
* @nsegs: [in,out] Number of segments in the bio being built. Incremented
|
||||
* by the number of segments from @bv that may be appended to that
|
||||
@ -217,10 +219,9 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
|
||||
* *@nsegs segments and *@sectors sectors would make that bio unacceptable for
|
||||
* the block driver.
|
||||
*/
|
||||
static bool bvec_split_segs(const struct request_queue *q,
|
||||
const struct bio_vec *bv, unsigned *nsegs,
|
||||
unsigned *bytes, unsigned max_segs,
|
||||
unsigned max_bytes)
|
||||
static bool bvec_split_segs(struct queue_limits *lim, const struct bio_vec *bv,
|
||||
unsigned *nsegs, unsigned *bytes, unsigned max_segs,
|
||||
unsigned max_bytes)
|
||||
{
|
||||
unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
|
||||
unsigned len = min(bv->bv_len, max_len);
|
||||
@ -228,7 +229,7 @@ static bool bvec_split_segs(const struct request_queue *q,
|
||||
unsigned seg_size = 0;
|
||||
|
||||
while (len && *nsegs < max_segs) {
|
||||
seg_size = get_max_segment_size(q, bv->bv_page,
|
||||
seg_size = get_max_segment_size(lim, bv->bv_page,
|
||||
bv->bv_offset + total_len);
|
||||
seg_size = min(seg_size, len);
|
||||
|
||||
@ -236,7 +237,7 @@ static bool bvec_split_segs(const struct request_queue *q,
|
||||
total_len += seg_size;
|
||||
len -= seg_size;
|
||||
|
||||
if ((bv->bv_offset + total_len) & queue_virt_boundary(q))
|
||||
if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
|
||||
break;
|
||||
}
|
||||
|
||||
@ -247,16 +248,17 @@ static bool bvec_split_segs(const struct request_queue *q,
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_bio_segment_split - split a bio in two bios
|
||||
* @q: [in] request queue pointer
|
||||
* bio_split_rw - split a bio in two bios
|
||||
* @bio: [in] bio to be split
|
||||
* @bs: [in] bio set to allocate the clone from
|
||||
* @lim: [in] queue limits to split based on
|
||||
* @segs: [out] number of segments in the bio with the first half of the sectors
|
||||
* @bs: [in] bio set to allocate the clone from
|
||||
* @max_bytes: [in] maximum number of bytes per bio
|
||||
*
|
||||
* Clone @bio, update the bi_iter of the clone to represent the first sectors
|
||||
* of @bio and update @bio->bi_iter to represent the remaining sectors. The
|
||||
* following is guaranteed for the cloned bio:
|
||||
* - That it has at most get_max_io_size(@q, @bio) sectors.
|
||||
* - That it has at most @max_bytes worth of data
|
||||
* - That it has at most queue_max_segments(@q) segments.
|
||||
*
|
||||
* Except for discard requests the cloned bio will point at the bi_io_vec of
|
||||
@ -265,33 +267,30 @@ static bool bvec_split_segs(const struct request_queue *q,
|
||||
* responsible for ensuring that @bs is only destroyed after processing of the
|
||||
* split bio has finished.
|
||||
*/
|
||||
static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
struct bio_set *bs,
|
||||
unsigned *segs)
|
||||
static struct bio *bio_split_rw(struct bio *bio, struct queue_limits *lim,
|
||||
unsigned *segs, struct bio_set *bs, unsigned max_bytes)
|
||||
{
|
||||
struct bio_vec bv, bvprv, *bvprvp = NULL;
|
||||
struct bvec_iter iter;
|
||||
unsigned nsegs = 0, bytes = 0;
|
||||
const unsigned max_bytes = get_max_io_size(q, bio) << 9;
|
||||
const unsigned max_segs = queue_max_segments(q);
|
||||
|
||||
bio_for_each_bvec(bv, bio, iter) {
|
||||
/*
|
||||
* If the queue doesn't support SG gaps and adding this
|
||||
* offset would create a gap, disallow it.
|
||||
*/
|
||||
if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
|
||||
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
|
||||
goto split;
|
||||
|
||||
if (nsegs < max_segs &&
|
||||
if (nsegs < lim->max_segments &&
|
||||
bytes + bv.bv_len <= max_bytes &&
|
||||
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
||||
nsegs++;
|
||||
bytes += bv.bv_len;
|
||||
} else if (bvec_split_segs(q, &bv, &nsegs, &bytes, max_segs,
|
||||
max_bytes)) {
|
||||
goto split;
|
||||
} else {
|
||||
if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
|
||||
lim->max_segments, max_bytes))
|
||||
goto split;
|
||||
}
|
||||
|
||||
bvprv = bv;
|
||||
@ -308,7 +307,7 @@ split:
|
||||
* split size so that each bio is properly block size aligned, even if
|
||||
* we do not use the full hardware limits.
|
||||
*/
|
||||
bytes = ALIGN_DOWN(bytes, queue_logical_block_size(q));
|
||||
bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
|
||||
|
||||
/*
|
||||
* Bio splitting may cause subtle trouble such as hang when doing sync
|
||||
@ -320,34 +319,35 @@ split:
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_queue_split - split a bio and submit the second half
|
||||
* @q: [in] request_queue new bio is being queued at
|
||||
* @bio: [in, out] bio to be split
|
||||
* @nr_segs: [out] number of segments in the first bio
|
||||
* __bio_split_to_limits - split a bio to fit the queue limits
|
||||
* @bio: bio to be split
|
||||
* @lim: queue limits to split based on
|
||||
* @nr_segs: returns the number of segments in the returned bio
|
||||
*
|
||||
* Split a bio into two bios, chain the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. If the second bio is still too
|
||||
* big it will be split by a recursive call to this function. Since this
|
||||
* function may allocate a new bio from q->bio_split, it is the responsibility
|
||||
* of the caller to ensure that q->bio_split is only released after processing
|
||||
* of the split bio has finished.
|
||||
* Check if @bio needs splitting based on the queue limits, and if so split off
|
||||
* a bio fitting the limits from the beginning of @bio and return it. @bio is
|
||||
* shortened to the remainder and re-submitted.
|
||||
*
|
||||
* The split bio is allocated from @q->bio_split, which is provided by the
|
||||
* block layer.
|
||||
*/
|
||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
|
||||
unsigned int *nr_segs)
|
||||
{
|
||||
struct bio *split = NULL;
|
||||
struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
|
||||
struct bio *split;
|
||||
|
||||
switch (bio_op(*bio)) {
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_DISCARD:
|
||||
case REQ_OP_SECURE_ERASE:
|
||||
split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
|
||||
split = bio_split_discard(bio, lim, nr_segs, bs);
|
||||
break;
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
|
||||
nr_segs);
|
||||
split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
|
||||
break;
|
||||
default:
|
||||
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
|
||||
split = bio_split_rw(bio, lim, nr_segs, bs,
|
||||
get_max_io_size(bio, lim) << SECTOR_SHIFT);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -356,32 +356,35 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
split->bi_opf |= REQ_NOMERGE;
|
||||
|
||||
blkcg_bio_issue_init(split);
|
||||
bio_chain(split, *bio);
|
||||
trace_block_split(split, (*bio)->bi_iter.bi_sector);
|
||||
submit_bio_noacct(*bio);
|
||||
*bio = split;
|
||||
bio_chain(split, bio);
|
||||
trace_block_split(split, bio->bi_iter.bi_sector);
|
||||
submit_bio_noacct(bio);
|
||||
return split;
|
||||
}
|
||||
return bio;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_queue_split - split a bio and submit the second half
|
||||
* @bio: [in, out] bio to be split
|
||||
* bio_split_to_limits - split a bio to fit the queue limits
|
||||
* @bio: bio to be split
|
||||
*
|
||||
* Split a bio into two bios, chains the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. Since this function may allocate
|
||||
* a new bio from q->bio_split, it is the responsibility of the caller to ensure
|
||||
* that q->bio_split is only released after processing of the split bio has
|
||||
* finished.
|
||||
* Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
|
||||
* if so split off a bio fitting the limits from the beginning of @bio and
|
||||
* return it. @bio is shortened to the remainder and re-submitted.
|
||||
*
|
||||
* The split bio is allocated from @q->bio_split, which is provided by the
|
||||
* block layer.
|
||||
*/
|
||||
void blk_queue_split(struct bio **bio)
|
||||
struct bio *bio_split_to_limits(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue((*bio)->bi_bdev);
|
||||
struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
|
||||
unsigned int nr_segs;
|
||||
|
||||
if (blk_may_split(q, *bio))
|
||||
__blk_queue_split(q, bio, &nr_segs);
|
||||
if (bio_may_exceed_limits(bio, lim))
|
||||
return __bio_split_to_limits(bio, lim, &nr_segs);
|
||||
return bio;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_split);
|
||||
EXPORT_SYMBOL(bio_split_to_limits);
|
||||
|
||||
unsigned int blk_recalc_rq_segments(struct request *rq)
|
||||
{
|
||||
@ -411,7 +414,7 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
|
||||
}
|
||||
|
||||
rq_for_each_bvec(bv, rq, iter)
|
||||
bvec_split_segs(rq->q, &bv, &nr_phys_segs, &bytes,
|
||||
bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
|
||||
UINT_MAX, UINT_MAX);
|
||||
return nr_phys_segs;
|
||||
}
|
||||
@ -442,8 +445,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
|
||||
|
||||
while (nbytes > 0) {
|
||||
unsigned offset = bvec->bv_offset + total;
|
||||
unsigned len = min(get_max_segment_size(q, bvec->bv_page,
|
||||
offset), nbytes);
|
||||
unsigned len = min(get_max_segment_size(&q->limits,
|
||||
bvec->bv_page, offset), nbytes);
|
||||
struct page *page = bvec->bv_page;
|
||||
|
||||
/*
|
||||
|
@ -2815,9 +2815,9 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
unsigned int nr_segs = 1;
|
||||
blk_status_t ret;
|
||||
|
||||
blk_queue_bounce(q, &bio);
|
||||
if (blk_may_split(q, bio))
|
||||
__blk_queue_split(q, &bio, &nr_segs);
|
||||
bio = blk_queue_bounce(bio, q);
|
||||
if (bio_may_exceed_limits(bio, &q->limits))
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
|
||||
if (!bio_integrity_prep(bio))
|
||||
return;
|
||||
|
@ -779,8 +779,6 @@ static void blk_release_queue(struct kobject *kobj)
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_release(q);
|
||||
|
||||
bioset_exit(&q->bio_split);
|
||||
|
||||
if (blk_queue_has_srcu(q))
|
||||
cleanup_srcu_struct(q->srcu);
|
||||
|
||||
|
47
block/blk.h
47
block/blk.h
@ -97,23 +97,23 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool __bvec_gap_to_prev(struct request_queue *q,
|
||||
static inline bool __bvec_gap_to_prev(struct queue_limits *lim,
|
||||
struct bio_vec *bprv, unsigned int offset)
|
||||
{
|
||||
return (offset & queue_virt_boundary(q)) ||
|
||||
((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
|
||||
return (offset & lim->virt_boundary_mask) ||
|
||||
((bprv->bv_offset + bprv->bv_len) & lim->virt_boundary_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if adding a bio_vec after bprv with offset would create a gap in
|
||||
* the SG list. Most drivers don't care about this, but some do.
|
||||
*/
|
||||
static inline bool bvec_gap_to_prev(struct request_queue *q,
|
||||
static inline bool bvec_gap_to_prev(struct queue_limits *lim,
|
||||
struct bio_vec *bprv, unsigned int offset)
|
||||
{
|
||||
if (!queue_virt_boundary(q))
|
||||
if (!lim->virt_boundary_mask)
|
||||
return false;
|
||||
return __bvec_gap_to_prev(q, bprv, offset);
|
||||
return __bvec_gap_to_prev(lim, bprv, offset);
|
||||
}
|
||||
|
||||
static inline bool rq_mergeable(struct request *rq)
|
||||
@ -189,7 +189,8 @@ static inline bool integrity_req_gap_back_merge(struct request *req,
|
||||
struct bio_integrity_payload *bip = bio_integrity(req->bio);
|
||||
struct bio_integrity_payload *bip_next = bio_integrity(next);
|
||||
|
||||
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
|
||||
return bvec_gap_to_prev(&req->q->limits,
|
||||
&bip->bip_vec[bip->bip_vcnt - 1],
|
||||
bip_next->bip_vec[0].bv_offset);
|
||||
}
|
||||
|
||||
@ -199,7 +200,8 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
|
||||
|
||||
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
|
||||
return bvec_gap_to_prev(&req->q->limits,
|
||||
&bip->bip_vec[bip->bip_vcnt - 1],
|
||||
bip_next->bip_vec[0].bv_offset);
|
||||
}
|
||||
|
||||
@ -288,7 +290,8 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
|
||||
ssize_t part_timeout_store(struct device *, struct device_attribute *,
|
||||
const char *, size_t);
|
||||
|
||||
static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
|
||||
static inline bool bio_may_exceed_limits(struct bio *bio,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_DISCARD:
|
||||
@ -307,12 +310,12 @@ static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
|
||||
* to the performance impact of cloned bios themselves the loop below
|
||||
* doesn't matter anyway.
|
||||
*/
|
||||
return q->limits.chunk_sectors || bio->bi_vcnt != 1 ||
|
||||
return lim->chunk_sectors || bio->bi_vcnt != 1 ||
|
||||
bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
|
||||
}
|
||||
|
||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
unsigned int *nr_segs);
|
||||
struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
|
||||
unsigned int *nr_segs);
|
||||
int ll_back_merge_fn(struct request *req, struct bio *bio,
|
||||
unsigned int nr_segs);
|
||||
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
@ -344,16 +347,6 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
|
||||
q->last_merge = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
|
||||
* is defined as 'unsigned int', meantime it has to aligned to with logical
|
||||
* block size which is the minimum accepted unit by hardware.
|
||||
*/
|
||||
static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
|
||||
{
|
||||
return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal io_context interface
|
||||
*/
|
||||
@ -378,7 +371,7 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
|
||||
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
|
||||
#endif
|
||||
|
||||
void __blk_queue_bounce(struct request_queue *q, struct bio **bio);
|
||||
struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
|
||||
|
||||
static inline bool blk_queue_may_bounce(struct request_queue *q)
|
||||
{
|
||||
@ -387,10 +380,12 @@ static inline bool blk_queue_may_bounce(struct request_queue *q)
|
||||
max_low_pfn >= max_pfn;
|
||||
}
|
||||
|
||||
static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
|
||||
static inline struct bio *blk_queue_bounce(struct bio *bio,
|
||||
struct request_queue *q)
|
||||
{
|
||||
if (unlikely(blk_queue_may_bounce(q) && bio_has_data(*bio)))
|
||||
__blk_queue_bounce(q, bio);
|
||||
if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio)))
|
||||
return __blk_queue_bounce(bio, q);
|
||||
return bio;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
|
||||
|
@ -199,24 +199,24 @@ err_put:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
||||
struct bio *__blk_queue_bounce(struct bio *bio_orig, struct request_queue *q)
|
||||
{
|
||||
struct bio *bio;
|
||||
int rw = bio_data_dir(*bio_orig);
|
||||
int rw = bio_data_dir(bio_orig);
|
||||
struct bio_vec *to, from;
|
||||
struct bvec_iter iter;
|
||||
unsigned i = 0, bytes = 0;
|
||||
bool bounce = false;
|
||||
int sectors;
|
||||
|
||||
bio_for_each_segment(from, *bio_orig, iter) {
|
||||
bio_for_each_segment(from, bio_orig, iter) {
|
||||
if (i++ < BIO_MAX_VECS)
|
||||
bytes += from.bv_len;
|
||||
if (PageHighMem(from.bv_page))
|
||||
bounce = true;
|
||||
}
|
||||
if (!bounce)
|
||||
return;
|
||||
return bio_orig;
|
||||
|
||||
/*
|
||||
* Individual bvecs might not be logical block aligned. Round down
|
||||
@ -225,13 +225,13 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
||||
*/
|
||||
sectors = ALIGN_DOWN(bytes, queue_logical_block_size(q)) >>
|
||||
SECTOR_SHIFT;
|
||||
if (sectors < bio_sectors(*bio_orig)) {
|
||||
bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
|
||||
bio_chain(bio, *bio_orig);
|
||||
submit_bio_noacct(*bio_orig);
|
||||
*bio_orig = bio;
|
||||
if (sectors < bio_sectors(bio_orig)) {
|
||||
bio = bio_split(bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
|
||||
bio_chain(bio, bio_orig);
|
||||
submit_bio_noacct(bio_orig);
|
||||
bio_orig = bio;
|
||||
}
|
||||
bio = bounce_clone_bio(*bio_orig);
|
||||
bio = bounce_clone_bio(bio_orig);
|
||||
|
||||
/*
|
||||
* Bvec table can't be updated by bio_for_each_segment_all(),
|
||||
@ -254,7 +254,7 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
||||
to->bv_page = bounce_page;
|
||||
}
|
||||
|
||||
trace_block_bio_bounce(*bio_orig);
|
||||
trace_block_bio_bounce(bio_orig);
|
||||
|
||||
bio->bi_flags |= (1 << BIO_BOUNCED);
|
||||
|
||||
@ -263,6 +263,6 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
||||
else
|
||||
bio->bi_end_io = bounce_end_io_write;
|
||||
|
||||
bio->bi_private = *bio_orig;
|
||||
*bio_orig = bio;
|
||||
bio->bi_private = bio_orig;
|
||||
return bio;
|
||||
}
|
||||
|
@ -1151,6 +1151,7 @@ static void disk_release(struct device *dev)
|
||||
blk_mq_exit_queue(disk->queue);
|
||||
|
||||
blkcg_exit_queue(disk->queue);
|
||||
bioset_exit(&disk->bio_split);
|
||||
|
||||
disk_release_events(disk);
|
||||
kfree(disk->random);
|
||||
@ -1342,9 +1343,12 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
|
||||
if (!disk)
|
||||
goto out_put_queue;
|
||||
|
||||
if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0))
|
||||
goto out_free_disk;
|
||||
|
||||
disk->bdi = bdi_alloc(node_id);
|
||||
if (!disk->bdi)
|
||||
goto out_free_disk;
|
||||
goto out_free_bioset;
|
||||
|
||||
/* bdev_alloc() might need the queue, set before the first call */
|
||||
disk->queue = q;
|
||||
@ -1382,6 +1386,8 @@ out_destroy_part_tbl:
|
||||
iput(disk->part0->bd_inode);
|
||||
out_free_bdi:
|
||||
bdi_put(disk->bdi);
|
||||
out_free_bioset:
|
||||
bioset_exit(&disk->bio_split);
|
||||
out_free_disk:
|
||||
kfree(disk);
|
||||
out_put_queue:
|
||||
|
@ -104,6 +104,12 @@ int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_grab_kpp);
|
||||
|
||||
int crypto_has_kpp(const char *alg_name, u32 type, u32 mask)
|
||||
{
|
||||
return crypto_type_has_alg(alg_name, &crypto_kpp_type, type, mask);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_has_kpp);
|
||||
|
||||
static void kpp_prepare_alg(struct kpp_alg *alg)
|
||||
{
|
||||
struct crypto_alg *base = &alg->base;
|
||||
|
@ -521,6 +521,12 @@ struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_alloc_shash);
|
||||
|
||||
int crypto_has_shash(const char *alg_name, u32 type, u32 mask)
|
||||
{
|
||||
return crypto_type_has_alg(alg_name, &crypto_shash_type, type, mask);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_has_shash);
|
||||
|
||||
static int shash_prepare_alg(struct shash_alg *alg)
|
||||
{
|
||||
struct crypto_alg *base = &alg->base;
|
||||
|
@ -248,15 +248,6 @@ config BLK_DEV_NBD
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config BLK_DEV_SX8
|
||||
tristate "Promise SATA SX8 support"
|
||||
depends on PCI
|
||||
help
|
||||
Saying Y or M here will enable support for the
|
||||
Promise SATA SX8 controllers.
|
||||
|
||||
Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
|
||||
|
||||
config BLK_DEV_RAM
|
||||
tristate "RAM block device support"
|
||||
help
|
||||
|
@ -26,8 +26,6 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o
|
||||
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
|
||||
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
|
||||
|
||||
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
|
||||
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
|
||||
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
|
||||
|
@ -974,25 +974,58 @@ static void drbd_bm_endio(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
/* For the layout, see comment above drbd_md_set_sector_offsets(). */
|
||||
static inline sector_t drbd_md_last_bitmap_sector(struct drbd_backing_dev *bdev)
|
||||
{
|
||||
switch (bdev->md.meta_dev_idx) {
|
||||
case DRBD_MD_INDEX_INTERNAL:
|
||||
case DRBD_MD_INDEX_FLEX_INT:
|
||||
return bdev->md.md_offset + bdev->md.al_offset -1;
|
||||
case DRBD_MD_INDEX_FLEX_EXT:
|
||||
default:
|
||||
return bdev->md.md_offset + bdev->md.md_size_sect -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
|
||||
{
|
||||
struct drbd_device *device = ctx->device;
|
||||
enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE;
|
||||
struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op,
|
||||
GFP_NOIO, &drbd_md_io_bio_set);
|
||||
struct drbd_bitmap *b = device->bitmap;
|
||||
struct bio *bio;
|
||||
struct page *page;
|
||||
sector_t last_bm_sect;
|
||||
sector_t first_bm_sect;
|
||||
sector_t on_disk_sector;
|
||||
unsigned int len;
|
||||
|
||||
sector_t on_disk_sector =
|
||||
device->ldev->md.md_offset + device->ldev->md.bm_offset;
|
||||
on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
|
||||
first_bm_sect = device->ldev->md.md_offset + device->ldev->md.bm_offset;
|
||||
on_disk_sector = first_bm_sect + (((sector_t)page_nr) << (PAGE_SHIFT-SECTOR_SHIFT));
|
||||
|
||||
/* this might happen with very small
|
||||
* flexible external meta data device,
|
||||
* or with PAGE_SIZE > 4k */
|
||||
len = min_t(unsigned int, PAGE_SIZE,
|
||||
(drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
|
||||
last_bm_sect = drbd_md_last_bitmap_sector(device->ldev);
|
||||
if (first_bm_sect <= on_disk_sector && last_bm_sect >= on_disk_sector) {
|
||||
sector_t len_sect = last_bm_sect - on_disk_sector + 1;
|
||||
if (len_sect < PAGE_SIZE/SECTOR_SIZE)
|
||||
len = (unsigned int)len_sect*SECTOR_SIZE;
|
||||
else
|
||||
len = PAGE_SIZE;
|
||||
} else {
|
||||
if (__ratelimit(&drbd_ratelimit_state)) {
|
||||
drbd_err(device, "Invalid offset during on-disk bitmap access: "
|
||||
"page idx %u, sector %llu\n", page_nr, on_disk_sector);
|
||||
}
|
||||
ctx->error = -EIO;
|
||||
bm_set_page_io_err(b->bm_pages[page_nr]);
|
||||
if (atomic_dec_and_test(&ctx->in_flight)) {
|
||||
ctx->done = 1;
|
||||
wake_up(&device->misc_wait);
|
||||
kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* serialize IO on this page */
|
||||
bm_page_lock_io(device, page_nr);
|
||||
@ -1007,6 +1040,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
|
||||
bm_store_page_idx(page, page_nr);
|
||||
} else
|
||||
page = b->bm_pages[page_nr];
|
||||
bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO,
|
||||
&drbd_md_io_bio_set);
|
||||
bio->bi_iter.bi_sector = on_disk_sector;
|
||||
/* bio_add_page of a single page to an empty bio will always succeed,
|
||||
* according to api. Do we want to assert that? */
|
||||
|
@ -1608,7 +1608,7 @@ void drbd_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
|
||||
|
||||
blk_queue_split(&bio);
|
||||
bio = bio_split_to_limits(bio);
|
||||
|
||||
/*
|
||||
* what we "blindly" assume:
|
||||
|
@ -11,6 +11,8 @@
|
||||
* (part of code stolen from loop.c)
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "nbd: " fmt
|
||||
|
||||
#include <linux/major.h>
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
@ -1950,7 +1952,7 @@ again:
|
||||
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
|
||||
!refcount_inc_not_zero(&nbd->refs)) {
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
pr_err("nbd: device at index %d is going down\n",
|
||||
pr_err("device at index %d is going down\n",
|
||||
index);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1961,7 +1963,7 @@ again:
|
||||
if (!nbd) {
|
||||
nbd = nbd_dev_add(index, 2);
|
||||
if (IS_ERR(nbd)) {
|
||||
pr_err("nbd: failed to add new device\n");
|
||||
pr_err("failed to add new device\n");
|
||||
return PTR_ERR(nbd);
|
||||
}
|
||||
}
|
||||
|
@ -201,6 +201,22 @@ static bool g_use_per_node_hctx;
|
||||
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
|
||||
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
|
||||
|
||||
static bool g_memory_backed;
|
||||
module_param_named(memory_backed, g_memory_backed, bool, 0444);
|
||||
MODULE_PARM_DESC(memory_backed, "Create a memory-backed block device. Default: false");
|
||||
|
||||
static bool g_discard;
|
||||
module_param_named(discard, g_discard, bool, 0444);
|
||||
MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed null_blk device). Default: false");
|
||||
|
||||
static unsigned long g_cache_size;
|
||||
module_param_named(cache_size, g_cache_size, ulong, 0444);
|
||||
MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
|
||||
|
||||
static unsigned int g_mbps;
|
||||
module_param_named(mbps, g_mbps, uint, 0444);
|
||||
MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
|
||||
|
||||
static bool g_zoned;
|
||||
module_param_named(zoned, g_zoned, bool, S_IRUGO);
|
||||
MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
|
||||
@ -409,6 +425,8 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
|
||||
|
||||
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
|
||||
{
|
||||
@ -532,6 +550,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
|
||||
&nullb_device_attr_zone_max_open,
|
||||
&nullb_device_attr_zone_max_active,
|
||||
&nullb_device_attr_virt_boundary,
|
||||
&nullb_device_attr_no_sched,
|
||||
&nullb_device_attr_shared_tag_bitmap,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -588,7 +608,13 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
|
||||
static ssize_t memb_group_features_show(struct config_item *item, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE,
|
||||
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
|
||||
"badblocks,blocking,blocksize,cache_size,"
|
||||
"completion_nsec,discard,home_node,hw_queue_depth,"
|
||||
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
|
||||
"poll_queues,power,queue_mode,shared_tag_bitmap,size,"
|
||||
"submit_queues,use_per_node_hctx,virt_boundary,zoned,"
|
||||
"zone_capacity,zone_max_active,zone_max_open,"
|
||||
"zone_nr_conv,zone_size\n");
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR_RO(memb_group_, features);
|
||||
@ -650,6 +676,10 @@ static struct nullb_device *null_alloc_dev(void)
|
||||
dev->irqmode = g_irqmode;
|
||||
dev->hw_queue_depth = g_hw_queue_depth;
|
||||
dev->blocking = g_blocking;
|
||||
dev->memory_backed = g_memory_backed;
|
||||
dev->discard = g_discard;
|
||||
dev->cache_size = g_cache_size;
|
||||
dev->mbps = g_mbps;
|
||||
dev->use_per_node_hctx = g_use_per_node_hctx;
|
||||
dev->zoned = g_zoned;
|
||||
dev->zone_size = g_zone_size;
|
||||
@ -658,6 +688,8 @@ static struct nullb_device *null_alloc_dev(void)
|
||||
dev->zone_max_open = g_zone_max_open;
|
||||
dev->zone_max_active = g_zone_max_active;
|
||||
dev->virt_boundary = g_virt_boundary;
|
||||
dev->no_sched = g_no_sched;
|
||||
dev->shared_tag_bitmap = g_shared_tag_bitmap;
|
||||
return dev;
|
||||
}
|
||||
|
||||
@ -1655,7 +1687,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
static void cleanup_queue(struct nullb_queue *nq)
|
||||
{
|
||||
kfree(nq->tag_map);
|
||||
bitmap_free(nq->tag_map);
|
||||
kfree(nq->cmds);
|
||||
}
|
||||
|
||||
@ -1782,14 +1814,13 @@ static const struct block_device_operations null_rq_ops = {
|
||||
static int setup_commands(struct nullb_queue *nq)
|
||||
{
|
||||
struct nullb_cmd *cmd;
|
||||
int i, tag_size;
|
||||
int i;
|
||||
|
||||
nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
|
||||
if (!nq->cmds)
|
||||
return -ENOMEM;
|
||||
|
||||
tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
|
||||
nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
|
||||
nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL);
|
||||
if (!nq->tag_map) {
|
||||
kfree(nq->cmds);
|
||||
return -ENOMEM;
|
||||
@ -1866,31 +1897,48 @@ static int null_gendisk_register(struct nullb *nullb)
|
||||
|
||||
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
|
||||
{
|
||||
unsigned int flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
int hw_queues, numa_node;
|
||||
unsigned int queue_depth;
|
||||
int poll_queues;
|
||||
|
||||
set->ops = &null_mq_ops;
|
||||
set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
|
||||
g_submit_queues;
|
||||
poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues;
|
||||
if (poll_queues)
|
||||
set->nr_hw_queues += poll_queues;
|
||||
set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
|
||||
g_hw_queue_depth;
|
||||
set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
|
||||
set->cmd_size = sizeof(struct nullb_cmd);
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
if (g_no_sched)
|
||||
set->flags |= BLK_MQ_F_NO_SCHED;
|
||||
if (g_shared_tag_bitmap)
|
||||
set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
set->driver_data = nullb;
|
||||
if (poll_queues)
|
||||
set->nr_maps = 3;
|
||||
else
|
||||
set->nr_maps = 1;
|
||||
if (nullb) {
|
||||
hw_queues = nullb->dev->submit_queues;
|
||||
poll_queues = nullb->dev->poll_queues;
|
||||
queue_depth = nullb->dev->hw_queue_depth;
|
||||
numa_node = nullb->dev->home_node;
|
||||
if (nullb->dev->no_sched)
|
||||
flags |= BLK_MQ_F_NO_SCHED;
|
||||
if (nullb->dev->shared_tag_bitmap)
|
||||
flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
if (nullb->dev->blocking)
|
||||
flags |= BLK_MQ_F_BLOCKING;
|
||||
} else {
|
||||
hw_queues = g_submit_queues;
|
||||
poll_queues = g_poll_queues;
|
||||
queue_depth = g_hw_queue_depth;
|
||||
numa_node = g_home_node;
|
||||
if (g_no_sched)
|
||||
flags |= BLK_MQ_F_NO_SCHED;
|
||||
if (g_shared_tag_bitmap)
|
||||
flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
if (g_blocking)
|
||||
flags |= BLK_MQ_F_BLOCKING;
|
||||
}
|
||||
|
||||
if ((nullb && nullb->dev->blocking) || g_blocking)
|
||||
set->flags |= BLK_MQ_F_BLOCKING;
|
||||
set->ops = &null_mq_ops;
|
||||
set->cmd_size = sizeof(struct nullb_cmd);
|
||||
set->flags = flags;
|
||||
set->driver_data = nullb;
|
||||
set->nr_hw_queues = hw_queues;
|
||||
set->queue_depth = queue_depth;
|
||||
set->numa_node = numa_node;
|
||||
if (poll_queues) {
|
||||
set->nr_hw_queues += poll_queues;
|
||||
set->nr_maps = 3;
|
||||
} else {
|
||||
set->nr_maps = 1;
|
||||
}
|
||||
|
||||
return blk_mq_alloc_tag_set(set);
|
||||
}
|
||||
@ -2042,8 +2090,13 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
|
||||
|
||||
mutex_lock(&lock);
|
||||
nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
|
||||
dev->index = nullb->index;
|
||||
rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
|
||||
if (rv < 0) {
|
||||
mutex_unlock(&lock);
|
||||
goto out_cleanup_zone;
|
||||
}
|
||||
nullb->index = rv;
|
||||
dev->index = rv;
|
||||
mutex_unlock(&lock);
|
||||
|
||||
blk_queue_logical_block_size(nullb->q, dev->blocksize);
|
||||
@ -2069,7 +2122,7 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
|
||||
rv = null_gendisk_register(nullb);
|
||||
if (rv)
|
||||
goto out_cleanup_zone;
|
||||
goto out_ida_free;
|
||||
|
||||
mutex_lock(&lock);
|
||||
list_add_tail(&nullb->list, &nullb_list);
|
||||
@ -2078,6 +2131,9 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
pr_info("disk %s created\n", nullb->disk_name);
|
||||
|
||||
return 0;
|
||||
|
||||
out_ida_free:
|
||||
ida_free(&nullb_indexes, nullb->index);
|
||||
out_cleanup_zone:
|
||||
null_free_zoned_dev(dev);
|
||||
out_cleanup_disk:
|
||||
|
@ -113,6 +113,8 @@ struct nullb_device {
|
||||
bool discard; /* if support discard */
|
||||
bool zoned; /* if device is zoned */
|
||||
bool virt_boundary; /* virtual boundary on/off for the device */
|
||||
bool no_sched; /* no IO scheduler for the device */
|
||||
bool shared_tag_bitmap; /* use hostwide shared tags */
|
||||
};
|
||||
|
||||
struct nullb {
|
||||
|
@ -2399,7 +2399,7 @@ static void pkt_submit_bio(struct bio *bio)
|
||||
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
|
||||
struct bio *split;
|
||||
|
||||
blk_queue_split(&bio);
|
||||
bio = bio_split_to_limits(bio);
|
||||
|
||||
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
|
@ -586,7 +586,7 @@ static void ps3vram_submit_bio(struct bio *bio)
|
||||
|
||||
dev_dbg(&dev->core, "%s\n", __func__);
|
||||
|
||||
blk_queue_split(&bio);
|
||||
bio = bio_split_to_limits(bio);
|
||||
|
||||
spin_lock_irq(&priv->lock);
|
||||
busy = !bio_list_empty(&priv->list);
|
||||
|
@ -376,7 +376,7 @@ static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = rnbd_clt_resize_disk(dev, (size_t)sectors);
|
||||
ret = rnbd_clt_resize_disk(dev, sectors);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -68,39 +68,18 @@ static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev)
|
||||
return refcount_inc_not_zero(&dev->refcount);
|
||||
}
|
||||
|
||||
static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
|
||||
const struct rnbd_msg_open_rsp *rsp)
|
||||
static void rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
|
||||
sector_t new_nsectors)
|
||||
{
|
||||
struct rnbd_clt_session *sess = dev->sess;
|
||||
if (get_capacity(dev->gd) == new_nsectors)
|
||||
return;
|
||||
|
||||
if (!rsp->logical_block_size)
|
||||
return -EINVAL;
|
||||
|
||||
dev->device_id = le32_to_cpu(rsp->device_id);
|
||||
dev->nsectors = le64_to_cpu(rsp->nsectors);
|
||||
dev->logical_block_size = le16_to_cpu(rsp->logical_block_size);
|
||||
dev->physical_block_size = le16_to_cpu(rsp->physical_block_size);
|
||||
dev->max_discard_sectors = le32_to_cpu(rsp->max_discard_sectors);
|
||||
dev->discard_granularity = le32_to_cpu(rsp->discard_granularity);
|
||||
dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
|
||||
dev->secure_discard = le16_to_cpu(rsp->secure_discard);
|
||||
dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
|
||||
dev->fua = !!(rsp->cache_policy & RNBD_FUA);
|
||||
|
||||
dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
|
||||
dev->max_segments = sess->max_segments;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
|
||||
size_t new_nsectors)
|
||||
{
|
||||
rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n",
|
||||
dev->nsectors, new_nsectors);
|
||||
dev->nsectors = new_nsectors;
|
||||
set_capacity_and_notify(dev->gd, dev->nsectors);
|
||||
return 0;
|
||||
/*
|
||||
* If the size changed, we need to revalidate it
|
||||
*/
|
||||
rnbd_clt_info(dev, "Device size changed from %llu to %llu sectors\n",
|
||||
get_capacity(dev->gd), new_nsectors);
|
||||
set_capacity_and_notify(dev->gd, new_nsectors);
|
||||
}
|
||||
|
||||
static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
|
||||
@ -119,19 +98,16 @@ static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
|
||||
if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) {
|
||||
u64 nsectors = le64_to_cpu(rsp->nsectors);
|
||||
|
||||
/*
|
||||
* If the device was remapped and the size changed in the
|
||||
* meantime we need to revalidate it
|
||||
*/
|
||||
if (dev->nsectors != nsectors)
|
||||
rnbd_clt_change_capacity(dev, nsectors);
|
||||
rnbd_clt_change_capacity(dev, nsectors);
|
||||
gd_kobj = &disk_to_dev(dev->gd)->kobj;
|
||||
kobject_uevent(gd_kobj, KOBJ_ONLINE);
|
||||
rnbd_clt_info(dev, "Device online, device remapped successfully\n");
|
||||
}
|
||||
err = rnbd_clt_set_dev_attr(dev, rsp);
|
||||
if (err)
|
||||
if (!rsp->logical_block_size) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
dev->device_id = le32_to_cpu(rsp->device_id);
|
||||
dev->dev_state = DEV_STATE_MAPPED;
|
||||
|
||||
out:
|
||||
@ -140,7 +116,7 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
|
||||
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@ -150,7 +126,7 @@ int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
ret = rnbd_clt_change_capacity(dev, newsize);
|
||||
rnbd_clt_change_capacity(dev, newsize);
|
||||
|
||||
out:
|
||||
mutex_unlock(&dev->lock);
|
||||
@ -507,6 +483,11 @@ static void msg_open_conf(struct work_struct *work)
|
||||
struct rnbd_msg_open_rsp *rsp = iu->buf;
|
||||
struct rnbd_clt_dev *dev = iu->dev;
|
||||
int errno = iu->errno;
|
||||
bool from_map = false;
|
||||
|
||||
/* INIT state is only triggered from rnbd_clt_map_device */
|
||||
if (dev->dev_state == DEV_STATE_INIT)
|
||||
from_map = true;
|
||||
|
||||
if (errno) {
|
||||
rnbd_clt_err(dev,
|
||||
@ -523,7 +504,9 @@ static void msg_open_conf(struct work_struct *work)
|
||||
send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT);
|
||||
}
|
||||
}
|
||||
kfree(rsp);
|
||||
/* We free rsp in rnbd_clt_map_device for map scenario */
|
||||
if (!from_map)
|
||||
kfree(rsp);
|
||||
wake_up_iu_comp(iu, errno);
|
||||
rnbd_put_iu(dev->sess, iu);
|
||||
rnbd_clt_put_dev(dev);
|
||||
@ -942,7 +925,7 @@ static int rnbd_client_open(struct block_device *block_device, fmode_t mode)
|
||||
{
|
||||
struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
|
||||
|
||||
if (dev->read_only && (mode & FMODE_WRITE))
|
||||
if (get_disk_ro(dev->gd) && (mode & FMODE_WRITE))
|
||||
return -EPERM;
|
||||
|
||||
if (dev->dev_state == DEV_STATE_UNMAPPED ||
|
||||
@ -963,10 +946,10 @@ static int rnbd_client_getgeo(struct block_device *block_device,
|
||||
struct hd_geometry *geo)
|
||||
{
|
||||
u64 size;
|
||||
struct rnbd_clt_dev *dev;
|
||||
struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
|
||||
struct queue_limits *limit = &dev->queue->limits;
|
||||
|
||||
dev = block_device->bd_disk->private_data;
|
||||
size = dev->size * (dev->logical_block_size / SECTOR_SIZE);
|
||||
size = dev->size * (limit->logical_block_size / SECTOR_SIZE);
|
||||
geo->cylinders = size >> 6; /* size/64 */
|
||||
geo->heads = 4;
|
||||
geo->sectors = 16;
|
||||
@ -1350,11 +1333,15 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_request_queue(struct rnbd_clt_dev *dev)
|
||||
static void setup_request_queue(struct rnbd_clt_dev *dev,
|
||||
struct rnbd_msg_open_rsp *rsp)
|
||||
{
|
||||
blk_queue_logical_block_size(dev->queue, dev->logical_block_size);
|
||||
blk_queue_physical_block_size(dev->queue, dev->physical_block_size);
|
||||
blk_queue_max_hw_sectors(dev->queue, dev->max_hw_sectors);
|
||||
blk_queue_logical_block_size(dev->queue,
|
||||
le16_to_cpu(rsp->logical_block_size));
|
||||
blk_queue_physical_block_size(dev->queue,
|
||||
le16_to_cpu(rsp->physical_block_size));
|
||||
blk_queue_max_hw_sectors(dev->queue,
|
||||
dev->sess->max_io_size / SECTOR_SIZE);
|
||||
|
||||
/*
|
||||
* we don't support discards to "discontiguous" segments
|
||||
@ -1362,21 +1349,27 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
|
||||
*/
|
||||
blk_queue_max_discard_segments(dev->queue, 1);
|
||||
|
||||
blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
|
||||
dev->queue->limits.discard_granularity = dev->discard_granularity;
|
||||
dev->queue->limits.discard_alignment = dev->discard_alignment;
|
||||
if (dev->secure_discard)
|
||||
blk_queue_max_discard_sectors(dev->queue,
|
||||
le32_to_cpu(rsp->max_discard_sectors));
|
||||
dev->queue->limits.discard_granularity =
|
||||
le32_to_cpu(rsp->discard_granularity);
|
||||
dev->queue->limits.discard_alignment =
|
||||
le32_to_cpu(rsp->discard_alignment);
|
||||
if (le16_to_cpu(rsp->secure_discard))
|
||||
blk_queue_max_secure_erase_sectors(dev->queue,
|
||||
dev->max_discard_sectors);
|
||||
le32_to_cpu(rsp->max_discard_sectors));
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
|
||||
blk_queue_max_segments(dev->queue, dev->max_segments);
|
||||
blk_queue_max_segments(dev->queue, dev->sess->max_segments);
|
||||
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
|
||||
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
|
||||
blk_queue_write_cache(dev->queue, dev->wc, dev->fua);
|
||||
blk_queue_write_cache(dev->queue,
|
||||
!!(rsp->cache_policy & RNBD_WRITEBACK),
|
||||
!!(rsp->cache_policy & RNBD_FUA));
|
||||
}
|
||||
|
||||
static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
||||
static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
|
||||
struct rnbd_msg_open_rsp *rsp, int idx)
|
||||
{
|
||||
int err;
|
||||
|
||||
@ -1388,19 +1381,15 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
||||
dev->gd->private_data = dev;
|
||||
snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d",
|
||||
idx);
|
||||
pr_debug("disk_name=%s, capacity=%zu\n",
|
||||
pr_debug("disk_name=%s, capacity=%llu\n",
|
||||
dev->gd->disk_name,
|
||||
dev->nsectors * (dev->logical_block_size / SECTOR_SIZE)
|
||||
);
|
||||
le64_to_cpu(rsp->nsectors) *
|
||||
(le16_to_cpu(rsp->logical_block_size) / SECTOR_SIZE));
|
||||
|
||||
set_capacity(dev->gd, dev->nsectors);
|
||||
set_capacity(dev->gd, le64_to_cpu(rsp->nsectors));
|
||||
|
||||
if (dev->access_mode == RNBD_ACCESS_RO) {
|
||||
dev->read_only = true;
|
||||
if (dev->access_mode == RNBD_ACCESS_RO)
|
||||
set_disk_ro(dev->gd, true);
|
||||
} else {
|
||||
dev->read_only = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Network device does not need rotational
|
||||
@ -1413,11 +1402,13 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
|
||||
static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
|
||||
struct rnbd_msg_open_rsp *rsp)
|
||||
{
|
||||
int idx = dev->clt_device_id;
|
||||
|
||||
dev->size = dev->nsectors * dev->logical_block_size;
|
||||
dev->size = le64_to_cpu(rsp->nsectors) *
|
||||
le16_to_cpu(rsp->logical_block_size);
|
||||
|
||||
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
|
||||
if (IS_ERR(dev->gd))
|
||||
@ -1425,8 +1416,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
|
||||
dev->queue = dev->gd->queue;
|
||||
rnbd_init_mq_hw_queues(dev);
|
||||
|
||||
setup_request_queue(dev);
|
||||
return rnbd_clt_setup_gen_disk(dev, idx);
|
||||
setup_request_queue(dev, rsp);
|
||||
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
|
||||
}
|
||||
|
||||
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
|
||||
@ -1562,7 +1553,14 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
||||
{
|
||||
struct rnbd_clt_session *sess;
|
||||
struct rnbd_clt_dev *dev;
|
||||
int ret;
|
||||
int ret, errno;
|
||||
struct rnbd_msg_open_rsp *rsp;
|
||||
struct rnbd_msg_open msg;
|
||||
struct rnbd_iu *iu;
|
||||
struct kvec vec = {
|
||||
.iov_base = &msg,
|
||||
.iov_len = sizeof(msg)
|
||||
};
|
||||
|
||||
if (exists_devpath(pathname, sessname))
|
||||
return ERR_PTR(-EEXIST);
|
||||
@ -1582,17 +1580,47 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
||||
ret = -EEXIST;
|
||||
goto put_dev;
|
||||
}
|
||||
ret = send_msg_open(dev, RTRS_PERMIT_WAIT);
|
||||
|
||||
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
|
||||
if (!rsp) {
|
||||
ret = -ENOMEM;
|
||||
goto del_dev;
|
||||
}
|
||||
|
||||
iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT);
|
||||
if (!iu) {
|
||||
ret = -ENOMEM;
|
||||
kfree(rsp);
|
||||
goto del_dev;
|
||||
}
|
||||
iu->buf = rsp;
|
||||
iu->dev = dev;
|
||||
sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
|
||||
|
||||
msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN);
|
||||
msg.access_mode = dev->access_mode;
|
||||
strscpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name));
|
||||
|
||||
WARN_ON(!rnbd_clt_get_dev(dev));
|
||||
ret = send_usr_msg(sess->rtrs, READ, iu,
|
||||
&vec, sizeof(*rsp), iu->sgt.sgl, 1,
|
||||
msg_open_conf, &errno, RTRS_PERMIT_WAIT);
|
||||
if (ret) {
|
||||
rnbd_clt_put_dev(dev);
|
||||
rnbd_put_iu(sess, iu);
|
||||
} else {
|
||||
ret = errno;
|
||||
}
|
||||
if (ret) {
|
||||
rnbd_clt_err(dev,
|
||||
"map_device: failed, can't open remote device, err: %d\n",
|
||||
ret);
|
||||
goto del_dev;
|
||||
goto put_iu;
|
||||
}
|
||||
mutex_lock(&dev->lock);
|
||||
pr_debug("Opened remote device: session=%s, path='%s'\n",
|
||||
sess->sessname, pathname);
|
||||
ret = rnbd_client_setup_device(dev);
|
||||
ret = rnbd_client_setup_device(dev, rsp);
|
||||
if (ret) {
|
||||
rnbd_clt_err(dev,
|
||||
"map_device: Failed to configure device, err: %d\n",
|
||||
@ -1602,21 +1630,30 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
||||
}
|
||||
|
||||
rnbd_clt_info(dev,
|
||||
"map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
|
||||
dev->gd->disk_name, dev->nsectors,
|
||||
dev->logical_block_size, dev->physical_block_size,
|
||||
dev->max_discard_sectors,
|
||||
dev->discard_granularity, dev->discard_alignment,
|
||||
dev->secure_discard, dev->max_segments,
|
||||
dev->max_hw_sectors, dev->wc, dev->fua);
|
||||
"map_device: Device mapped as %s (nsectors: %llu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
|
||||
dev->gd->disk_name, le64_to_cpu(rsp->nsectors),
|
||||
le16_to_cpu(rsp->logical_block_size),
|
||||
le16_to_cpu(rsp->physical_block_size),
|
||||
le32_to_cpu(rsp->max_discard_sectors),
|
||||
le32_to_cpu(rsp->discard_granularity),
|
||||
le32_to_cpu(rsp->discard_alignment),
|
||||
le16_to_cpu(rsp->secure_discard),
|
||||
sess->max_segments, sess->max_io_size / SECTOR_SIZE,
|
||||
!!(rsp->cache_policy & RNBD_WRITEBACK),
|
||||
!!(rsp->cache_policy & RNBD_FUA));
|
||||
|
||||
mutex_unlock(&dev->lock);
|
||||
kfree(rsp);
|
||||
rnbd_put_iu(sess, iu);
|
||||
rnbd_clt_put_sess(sess);
|
||||
|
||||
return dev;
|
||||
|
||||
send_close:
|
||||
send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
|
||||
put_iu:
|
||||
kfree(rsp);
|
||||
rnbd_put_iu(sess, iu);
|
||||
del_dev:
|
||||
delete_dev(dev);
|
||||
put_dev:
|
||||
|
@ -106,6 +106,7 @@ struct rnbd_queue {
|
||||
};
|
||||
|
||||
struct rnbd_clt_dev {
|
||||
struct kobject kobj;
|
||||
struct rnbd_clt_session *sess;
|
||||
struct request_queue *queue;
|
||||
struct rnbd_queue *hw_queues;
|
||||
@ -114,27 +115,14 @@ struct rnbd_clt_dev {
|
||||
u32 clt_device_id;
|
||||
struct mutex lock;
|
||||
enum rnbd_clt_dev_state dev_state;
|
||||
refcount_t refcount;
|
||||
char *pathname;
|
||||
enum rnbd_access_mode access_mode;
|
||||
u32 nr_poll_queues;
|
||||
bool read_only;
|
||||
bool wc;
|
||||
bool fua;
|
||||
u32 max_hw_sectors;
|
||||
u32 max_discard_sectors;
|
||||
u32 discard_granularity;
|
||||
u32 discard_alignment;
|
||||
u16 secure_discard;
|
||||
u16 physical_block_size;
|
||||
u16 logical_block_size;
|
||||
u16 max_segments;
|
||||
size_t nsectors;
|
||||
u64 size; /* device size in bytes */
|
||||
struct list_head list;
|
||||
struct gendisk *gd;
|
||||
struct kobject kobj;
|
||||
char *blk_symlink_name;
|
||||
refcount_t refcount;
|
||||
struct work_struct unmap_on_rmmod_work;
|
||||
};
|
||||
|
||||
@ -150,7 +138,7 @@ int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
|
||||
const struct attribute *sysfs_self);
|
||||
|
||||
int rnbd_clt_remap_device(struct rnbd_clt_dev *dev);
|
||||
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize);
|
||||
int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize);
|
||||
|
||||
/* rnbd-clt-sysfs.c */
|
||||
|
||||
|
@ -224,7 +224,6 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
|
||||
wait_for_completion(&dc); /* wait for inflights to drop to zero */
|
||||
|
||||
rnbd_dev_close(sess_dev->rnbd_dev);
|
||||
list_del(&sess_dev->sess_list);
|
||||
mutex_lock(&sess_dev->dev->lock);
|
||||
list_del(&sess_dev->dev_list);
|
||||
if (sess_dev->open_flags & FMODE_WRITE)
|
||||
@ -239,14 +238,14 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
|
||||
|
||||
static void destroy_sess(struct rnbd_srv_session *srv_sess)
|
||||
{
|
||||
struct rnbd_srv_sess_dev *sess_dev, *tmp;
|
||||
struct rnbd_srv_sess_dev *sess_dev;
|
||||
unsigned long index;
|
||||
|
||||
if (list_empty(&srv_sess->sess_dev_list))
|
||||
if (xa_empty(&srv_sess->index_idr))
|
||||
goto out;
|
||||
|
||||
mutex_lock(&srv_sess->lock);
|
||||
list_for_each_entry_safe(sess_dev, tmp, &srv_sess->sess_dev_list,
|
||||
sess_list)
|
||||
xa_for_each(&srv_sess->index_idr, index, sess_dev)
|
||||
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
|
||||
mutex_unlock(&srv_sess->lock);
|
||||
|
||||
@ -281,7 +280,6 @@ static int create_sess(struct rtrs_srv_sess *rtrs)
|
||||
|
||||
srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
|
||||
xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
|
||||
INIT_LIST_HEAD(&srv_sess->sess_dev_list);
|
||||
mutex_init(&srv_sess->lock);
|
||||
mutex_lock(&sess_lock);
|
||||
list_add(&srv_sess->list, &sess_list);
|
||||
@ -323,10 +321,11 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
|
||||
{
|
||||
struct rnbd_srv_session *sess = sess_dev->sess;
|
||||
|
||||
sess_dev->keep_id = true;
|
||||
/* It is already started to close by client's close message. */
|
||||
if (!mutex_trylock(&sess->lock))
|
||||
return;
|
||||
|
||||
sess_dev->keep_id = true;
|
||||
/* first remove sysfs itself to avoid deadlock */
|
||||
sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
|
||||
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
|
||||
@ -666,11 +665,12 @@ static struct rnbd_srv_sess_dev *
|
||||
find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name)
|
||||
{
|
||||
struct rnbd_srv_sess_dev *sess_dev;
|
||||
unsigned long index;
|
||||
|
||||
if (list_empty(&srv_sess->sess_dev_list))
|
||||
if (xa_empty(&srv_sess->index_idr))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(sess_dev, &srv_sess->sess_dev_list, sess_list)
|
||||
xa_for_each(&srv_sess->index_idr, index, sess_dev)
|
||||
if (!strcmp(sess_dev->pathname, dev_name))
|
||||
return sess_dev;
|
||||
|
||||
@ -780,8 +780,6 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
|
||||
list_add(&srv_sess_dev->dev_list, &srv_dev->sess_dev_list);
|
||||
mutex_unlock(&srv_dev->lock);
|
||||
|
||||
list_add(&srv_sess_dev->sess_list, &srv_sess->sess_dev_list);
|
||||
|
||||
rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->id);
|
||||
|
||||
kfree(full_path);
|
||||
|
@ -25,8 +25,6 @@ struct rnbd_srv_session {
|
||||
int queue_depth;
|
||||
|
||||
struct xarray index_idr;
|
||||
/* List of struct rnbd_srv_sess_dev */
|
||||
struct list_head sess_dev_list;
|
||||
struct mutex lock;
|
||||
u8 ver;
|
||||
};
|
||||
@ -48,8 +46,6 @@ struct rnbd_srv_dev {
|
||||
struct rnbd_srv_sess_dev {
|
||||
/* Entry inside rnbd_srv_dev struct */
|
||||
struct list_head dev_list;
|
||||
/* Entry inside rnbd_srv_session struct */
|
||||
struct list_head sess_list;
|
||||
struct rnbd_dev *rnbd_dev;
|
||||
struct rnbd_srv_session *sess;
|
||||
struct rnbd_srv_dev *dev;
|
||||
|
1582
drivers/block/sx8.c
1582
drivers/block/sx8.c
File diff suppressed because it is too large
Load Diff
@ -47,7 +47,12 @@
|
||||
#define UBLK_MINORS (1U << MINORBITS)
|
||||
|
||||
/* All UBLK_F_* have to be included into UBLK_F_ALL */
|
||||
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_URING_CMD_COMP_IN_TASK)
|
||||
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
|
||||
| UBLK_F_URING_CMD_COMP_IN_TASK \
|
||||
| UBLK_F_NEED_GET_DATA)
|
||||
|
||||
/* All UBLK_PARAM_TYPE_* should be included here */
|
||||
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
|
||||
|
||||
struct ublk_rq_data {
|
||||
struct callback_head work;
|
||||
@ -86,6 +91,15 @@ struct ublk_uring_cmd_pdu {
|
||||
*/
|
||||
#define UBLK_IO_FLAG_ABORTED 0x04
|
||||
|
||||
/*
|
||||
* UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
|
||||
* get data buffer address from ublksrv.
|
||||
*
|
||||
* Then, bio data could be copied into this data buffer for a WRITE request
|
||||
* after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset.
|
||||
*/
|
||||
#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
|
||||
|
||||
struct ublk_io {
|
||||
/* userspace buffer address from io cmd */
|
||||
__u64 addr;
|
||||
@ -119,7 +133,6 @@ struct ublk_device {
|
||||
char *__queues;
|
||||
|
||||
unsigned short queue_size;
|
||||
unsigned short bs_shift;
|
||||
struct ublksrv_ctrl_dev_info dev_info;
|
||||
|
||||
struct blk_mq_tag_set tag_set;
|
||||
@ -137,6 +150,8 @@ struct ublk_device {
|
||||
spinlock_t mm_lock;
|
||||
struct mm_struct *mm;
|
||||
|
||||
struct ublk_params params;
|
||||
|
||||
struct completion completion;
|
||||
unsigned int nr_queues_ready;
|
||||
atomic_t nr_aborted_queues;
|
||||
@ -149,6 +164,12 @@ struct ublk_device {
|
||||
struct work_struct stop_work;
|
||||
};
|
||||
|
||||
/* header of ublk_params */
|
||||
struct ublk_params_header {
|
||||
__u32 len;
|
||||
__u32 types;
|
||||
};
|
||||
|
||||
static dev_t ublk_chr_devt;
|
||||
static struct class *ublk_chr_class;
|
||||
|
||||
@ -160,6 +181,90 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
|
||||
|
||||
static struct miscdevice ublk_misc;
|
||||
|
||||
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||
{
|
||||
struct request_queue *q = ub->ub_disk->queue;
|
||||
const struct ublk_param_basic *p = &ub->params.basic;
|
||||
|
||||
blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
|
||||
blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
|
||||
blk_queue_io_min(q, 1 << p->io_min_shift);
|
||||
blk_queue_io_opt(q, 1 << p->io_opt_shift);
|
||||
|
||||
blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
|
||||
p->attrs & UBLK_ATTR_FUA);
|
||||
if (p->attrs & UBLK_ATTR_ROTATIONAL)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||
else
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
|
||||
blk_queue_max_hw_sectors(q, p->max_sectors);
|
||||
blk_queue_chunk_sectors(q, p->chunk_sectors);
|
||||
blk_queue_virt_boundary(q, p->virt_boundary_mask);
|
||||
|
||||
if (p->attrs & UBLK_ATTR_READ_ONLY)
|
||||
set_disk_ro(ub->ub_disk, true);
|
||||
|
||||
set_capacity(ub->ub_disk, p->dev_sectors);
|
||||
}
|
||||
|
||||
static void ublk_dev_param_discard_apply(struct ublk_device *ub)
|
||||
{
|
||||
struct request_queue *q = ub->ub_disk->queue;
|
||||
const struct ublk_param_discard *p = &ub->params.discard;
|
||||
|
||||
q->limits.discard_alignment = p->discard_alignment;
|
||||
q->limits.discard_granularity = p->discard_granularity;
|
||||
blk_queue_max_discard_sectors(q, p->max_discard_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(q,
|
||||
p->max_write_zeroes_sectors);
|
||||
blk_queue_max_discard_segments(q, p->max_discard_segments);
|
||||
}
|
||||
|
||||
static int ublk_validate_params(const struct ublk_device *ub)
|
||||
{
|
||||
/* basic param is the only one which must be set */
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
|
||||
const struct ublk_param_basic *p = &ub->params.basic;
|
||||
|
||||
if (p->logical_bs_shift > PAGE_SHIFT)
|
||||
return -EINVAL;
|
||||
|
||||
if (p->logical_bs_shift > p->physical_bs_shift)
|
||||
return -EINVAL;
|
||||
|
||||
if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
|
||||
return -EINVAL;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
|
||||
const struct ublk_param_discard *p = &ub->params.discard;
|
||||
|
||||
/* So far, only support single segment discard */
|
||||
if (p->max_discard_sectors && p->max_discard_segments != 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (!p->discard_granularity)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_apply_params(struct ublk_device *ub)
|
||||
{
|
||||
if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
|
||||
return -EINVAL;
|
||||
|
||||
ublk_dev_param_basic_apply(ub);
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
|
||||
ublk_dev_param_discard_apply(ub);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
|
||||
{
|
||||
if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
|
||||
@ -168,6 +273,13 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
|
||||
{
|
||||
if (ubq->flags & UBLK_F_NEED_GET_DATA)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
|
||||
{
|
||||
if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
|
||||
@ -509,6 +621,21 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req)
|
||||
}
|
||||
}
|
||||
|
||||
static void ubq_complete_io_cmd(struct ublk_io *io, int res)
|
||||
{
|
||||
/* mark this cmd owned by ublksrv */
|
||||
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
|
||||
|
||||
/*
|
||||
* clear ACTIVE since we are done with this sqe/cmd slot
|
||||
* We can only accept io cmd in case of being not active.
|
||||
*/
|
||||
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
|
||||
|
||||
/* tell ublksrv one io request is coming */
|
||||
io_uring_cmd_done(io->cmd, res, 0);
|
||||
}
|
||||
|
||||
#define UBLK_REQUEUE_DELAY_MS 3
|
||||
|
||||
static inline void __ublk_rq_task_work(struct request *req)
|
||||
@ -531,6 +658,30 @@ static inline void __ublk_rq_task_work(struct request *req)
|
||||
return;
|
||||
}
|
||||
|
||||
if (ublk_need_get_data(ubq) &&
|
||||
(req_op(req) == REQ_OP_WRITE ||
|
||||
req_op(req) == REQ_OP_FLUSH)) {
|
||||
/*
|
||||
* We have not handled UBLK_IO_NEED_GET_DATA command yet,
|
||||
* so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
|
||||
* and notify it.
|
||||
*/
|
||||
if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
|
||||
io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
|
||||
pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
|
||||
__func__, io->cmd->cmd_op, ubq->q_id,
|
||||
req->tag, io->flags);
|
||||
ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* We have handled UBLK_IO_NEED_GET_DATA command,
|
||||
* so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
|
||||
* do the copy work.
|
||||
*/
|
||||
io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
|
||||
}
|
||||
|
||||
mapped_bytes = ublk_map_io(ubq, req, io);
|
||||
|
||||
/* partially mapped, update io descriptor */
|
||||
@ -553,17 +704,7 @@ static inline void __ublk_rq_task_work(struct request *req)
|
||||
mapped_bytes >> 9;
|
||||
}
|
||||
|
||||
/* mark this cmd owned by ublksrv */
|
||||
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
|
||||
|
||||
/*
|
||||
* clear ACTIVE since we are done with this sqe/cmd slot
|
||||
* We can only accept io cmd in case of being not active.
|
||||
*/
|
||||
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
|
||||
|
||||
/* tell ublksrv one io request is coming */
|
||||
io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0);
|
||||
ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
|
||||
}
|
||||
|
||||
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
|
||||
@ -788,16 +929,27 @@ static void ublk_daemon_monitor_work(struct work_struct *work)
|
||||
UBLK_DAEMON_MONITOR_PERIOD);
|
||||
}
|
||||
|
||||
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->nr_io_ready == ubq->q_depth;
|
||||
}
|
||||
|
||||
static void ublk_cancel_queue(struct ublk_queue *ubq)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!ublk_queue_ready(ubq))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ubq->q_depth; i++) {
|
||||
struct ublk_io *io = &ubq->ios[i];
|
||||
|
||||
if (io->flags & UBLK_IO_FLAG_ACTIVE)
|
||||
io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
|
||||
}
|
||||
|
||||
/* all io commands are canceled */
|
||||
ubq->nr_io_ready = 0;
|
||||
}
|
||||
|
||||
/* Cancel all pending commands, must be called after del_gendisk() returns */
|
||||
@ -818,19 +970,14 @@ static void ublk_stop_dev(struct ublk_device *ub)
|
||||
del_gendisk(ub->ub_disk);
|
||||
ub->dev_info.state = UBLK_S_DEV_DEAD;
|
||||
ub->dev_info.ublksrv_pid = -1;
|
||||
ublk_cancel_dev(ub);
|
||||
put_disk(ub->ub_disk);
|
||||
ub->ub_disk = NULL;
|
||||
unlock:
|
||||
ublk_cancel_dev(ub);
|
||||
mutex_unlock(&ub->mutex);
|
||||
cancel_delayed_work_sync(&ub->monitor_work);
|
||||
}
|
||||
|
||||
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->nr_io_ready == ubq->q_depth;
|
||||
}
|
||||
|
||||
/* device can only be started after all IOs are ready */
|
||||
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
{
|
||||
@ -846,6 +993,25 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
mutex_unlock(&ub->mutex);
|
||||
}
|
||||
|
||||
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
|
||||
int tag, struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
|
||||
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
|
||||
|
||||
if (ublk_can_use_task_work(ubq)) {
|
||||
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
|
||||
|
||||
/* should not fail since we call it just in ubq->ubq_daemon */
|
||||
task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
|
||||
} else {
|
||||
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
|
||||
|
||||
pdu->req = req;
|
||||
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
|
||||
}
|
||||
}
|
||||
|
||||
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
|
||||
@ -884,6 +1050,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* ensure that the user issues UBLK_IO_NEED_GET_DATA
|
||||
* iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
|
||||
*/
|
||||
if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
|
||||
^ (cmd_op == UBLK_IO_NEED_GET_DATA))
|
||||
goto out;
|
||||
|
||||
switch (cmd_op) {
|
||||
case UBLK_IO_FETCH_REQ:
|
||||
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
|
||||
@ -917,6 +1091,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
io->cmd = cmd;
|
||||
ublk_commit_completion(ub, ub_cmd);
|
||||
break;
|
||||
case UBLK_IO_NEED_GET_DATA:
|
||||
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
|
||||
goto out;
|
||||
io->addr = ub_cmd->addr;
|
||||
io->cmd = cmd;
|
||||
io->flags |= UBLK_IO_FLAG_ACTIVE;
|
||||
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
@ -1083,13 +1265,13 @@ static void ublk_stop_work_fn(struct work_struct *work)
|
||||
ublk_stop_dev(ub);
|
||||
}
|
||||
|
||||
/* align maximum I/O size to PAGE_SIZE */
|
||||
/* align max io buffer size with PAGE_SIZE */
|
||||
static void ublk_align_max_io_size(struct ublk_device *ub)
|
||||
{
|
||||
unsigned int max_rq_bytes = ub->dev_info.rq_max_blocks << ub->bs_shift;
|
||||
unsigned int max_io_bytes = ub->dev_info.max_io_buf_bytes;
|
||||
|
||||
ub->dev_info.rq_max_blocks =
|
||||
round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift;
|
||||
ub->dev_info.max_io_buf_bytes =
|
||||
round_down(max_io_bytes, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static int ublk_add_tag_set(struct ublk_device *ub)
|
||||
@ -1132,7 +1314,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
int ublksrv_pid = (int)header->data[0];
|
||||
unsigned long dev_blocks = header->data[1];
|
||||
struct ublk_device *ub;
|
||||
struct gendisk *disk;
|
||||
int ret = -EINVAL;
|
||||
@ -1155,10 +1336,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* We may get disk size updated */
|
||||
if (dev_blocks)
|
||||
ub->dev_info.dev_blocks = dev_blocks;
|
||||
|
||||
disk = blk_mq_alloc_disk(&ub->tag_set, ub);
|
||||
if (IS_ERR(disk)) {
|
||||
ret = PTR_ERR(disk);
|
||||
@ -1168,27 +1345,28 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
||||
disk->fops = &ub_fops;
|
||||
disk->private_data = ub;
|
||||
|
||||
blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size);
|
||||
blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size);
|
||||
blk_queue_io_min(disk->queue, ub->dev_info.block_size);
|
||||
blk_queue_max_hw_sectors(disk->queue,
|
||||
ub->dev_info.rq_max_blocks << (ub->bs_shift - 9));
|
||||
disk->queue->limits.discard_granularity = PAGE_SIZE;
|
||||
blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9);
|
||||
blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9);
|
||||
|
||||
set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9));
|
||||
|
||||
ub->dev_info.ublksrv_pid = ublksrv_pid;
|
||||
ub->ub_disk = disk;
|
||||
|
||||
ret = ublk_apply_params(ub);
|
||||
if (ret)
|
||||
goto out_put_disk;
|
||||
|
||||
get_device(&ub->cdev_dev);
|
||||
ret = add_disk(disk);
|
||||
if (ret) {
|
||||
put_disk(disk);
|
||||
goto out_unlock;
|
||||
/*
|
||||
* Has to drop the reference since ->free_disk won't be
|
||||
* called in case of add_disk failure.
|
||||
*/
|
||||
ublk_put_device(ub);
|
||||
goto out_put_disk;
|
||||
}
|
||||
set_bit(UB_STATE_USED, &ub->state);
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
out_put_disk:
|
||||
if (ret)
|
||||
put_disk(disk);
|
||||
out_unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_put_device(ub);
|
||||
@ -1250,9 +1428,8 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
|
||||
{
|
||||
pr_devel("%s: dev id %d flags %llx\n", __func__,
|
||||
info->dev_id, info->flags);
|
||||
pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
|
||||
info->nr_hw_queues, info->queue_depth,
|
||||
info->block_size, info->dev_blocks);
|
||||
pr_devel("\t nr_hw_queues %d queue_depth %d\n",
|
||||
info->nr_hw_queues, info->queue_depth);
|
||||
}
|
||||
|
||||
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
@ -1312,7 +1489,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
/* We are not ready to support zero copy */
|
||||
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
|
||||
|
||||
ub->bs_shift = ilog2(ub->dev_info.block_size);
|
||||
ub->dev_info.nr_hw_queues = min_t(unsigned int,
|
||||
ub->dev_info.nr_hw_queues, nr_cpu_ids);
|
||||
ublk_align_max_io_size(ub);
|
||||
@ -1436,6 +1612,82 @@ static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_params_header ph;
|
||||
struct ublk_device *ub;
|
||||
int ret;
|
||||
|
||||
if (header->len <= sizeof(ph) || !header->addr)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&ph, argp, sizeof(ph)))
|
||||
return -EFAULT;
|
||||
|
||||
if (ph.len > header->len || !ph.len)
|
||||
return -EINVAL;
|
||||
|
||||
if (ph.len > sizeof(struct ublk_params))
|
||||
ph.len = sizeof(struct ublk_params);
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (copy_to_user(argp, &ub->params, ph.len))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
ret = 0;
|
||||
mutex_unlock(&ub->mutex);
|
||||
|
||||
ublk_put_device(ub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_params_header ph;
|
||||
struct ublk_device *ub;
|
||||
int ret = -EFAULT;
|
||||
|
||||
if (header->len <= sizeof(ph) || !header->addr)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&ph, argp, sizeof(ph)))
|
||||
return -EFAULT;
|
||||
|
||||
if (ph.len > header->len || !ph.len || !ph.types)
|
||||
return -EINVAL;
|
||||
|
||||
if (ph.len > sizeof(struct ublk_params))
|
||||
ph.len = sizeof(struct ublk_params);
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
/* parameters can only be changed when device isn't live */
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
|
||||
ret = -EACCES;
|
||||
} else if (copy_from_user(&ub->params, argp, ph.len)) {
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
/* clear all we don't support yet */
|
||||
ub->params.types &= UBLK_PARAM_TYPE_ALL;
|
||||
ret = ublk_validate_params(ub);
|
||||
}
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_put_device(ub);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
@ -1471,6 +1723,12 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
case UBLK_CMD_GET_QUEUE_AFFINITY:
|
||||
ret = ublk_ctrl_get_queue_affinity(cmd);
|
||||
break;
|
||||
case UBLK_CMD_GET_PARAMS:
|
||||
ret = ublk_ctrl_get_params(cmd);
|
||||
break;
|
||||
case UBLK_CMD_SET_PARAMS:
|
||||
ret = ublk_ctrl_set_params(cmd);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ config BCACHE_CLOSURES_DEBUG
|
||||
operations that get stuck.
|
||||
|
||||
config BCACHE_ASYNC_REGISTRATION
|
||||
bool "Asynchronous device registration (EXPERIMENTAL)"
|
||||
bool "Asynchronous device registration"
|
||||
depends on BCACHE
|
||||
help
|
||||
Add a sysfs file /sys/fs/bcache/register_async. Writing registering
|
||||
|
@ -3728,6 +3728,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
|
||||
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
|
||||
|
@ -1016,7 +1016,7 @@ static void dm_wq_requeue_work(struct work_struct *work)
|
||||
while (io) {
|
||||
struct dm_io *next = io->next;
|
||||
|
||||
dm_io_rewind(io, &md->queue->bio_split);
|
||||
dm_io_rewind(io, &md->disk->bio_split);
|
||||
|
||||
io->next = NULL;
|
||||
__dm_io_complete(io, false);
|
||||
@ -1181,7 +1181,7 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector)
|
||||
* Does the target need to split IO even further?
|
||||
* - varied (per target) IO splitting is a tenet of DM; this
|
||||
* explains why stacked chunk_sectors based splitting via
|
||||
* blk_queue_split() isn't possible here.
|
||||
* bio_split_to_limits() isn't possible here.
|
||||
*/
|
||||
if (!ti->max_io_len)
|
||||
return len;
|
||||
@ -1751,10 +1751,10 @@ static void dm_split_and_process_bio(struct mapped_device *md,
|
||||
is_abnormal = is_abnormal_io(bio);
|
||||
if (unlikely(is_abnormal)) {
|
||||
/*
|
||||
* Use blk_queue_split() for abnormal IO (e.g. discard, etc)
|
||||
* Use bio_split_to_limits() for abnormal IO (e.g. discard, etc)
|
||||
* otherwise associated queue_limits won't be imposed.
|
||||
*/
|
||||
blk_queue_split(&bio);
|
||||
bio = bio_split_to_limits(bio);
|
||||
}
|
||||
|
||||
init_clone_info(&ci, md, map, bio, is_abnormal);
|
||||
|
@ -125,7 +125,6 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
||||
char *devname = args->device_names;
|
||||
dev_t devices[MD_SB_DISKS + 1], mdev;
|
||||
struct mdu_array_info_s ainfo = { };
|
||||
struct block_device *bdev;
|
||||
struct mddev *mddev;
|
||||
int err = 0, i;
|
||||
char name[16];
|
||||
@ -169,24 +168,16 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
||||
|
||||
pr_info("md: Loading %s: %s\n", name, args->device_names);
|
||||
|
||||
bdev = blkdev_get_by_dev(mdev, FMODE_READ, NULL);
|
||||
if (IS_ERR(bdev)) {
|
||||
pr_err("md: open failed - cannot start array %s\n", name);
|
||||
mddev = md_alloc(mdev, name);
|
||||
if (IS_ERR(mddev)) {
|
||||
pr_err("md: md_alloc failed - cannot start array %s\n", name);
|
||||
return;
|
||||
}
|
||||
|
||||
err = -EIO;
|
||||
if (WARN(bdev->bd_disk->fops != &md_fops,
|
||||
"Opening block device %x resulted in non-md device\n",
|
||||
mdev))
|
||||
goto out_blkdev_put;
|
||||
|
||||
mddev = bdev->bd_disk->private_data;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
if (err) {
|
||||
pr_err("md: failed to lock array %s\n", name);
|
||||
goto out_blkdev_put;
|
||||
goto out_mddev_put;
|
||||
}
|
||||
|
||||
if (!list_empty(&mddev->disks) || mddev->raid_disks) {
|
||||
@ -230,8 +221,8 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
||||
pr_warn("md: starting %s failed\n", name);
|
||||
out_unlock:
|
||||
mddev_unlock(mddev);
|
||||
out_blkdev_put:
|
||||
blkdev_put(bdev, FMODE_READ);
|
||||
out_mddev_put:
|
||||
mddev_put(mddev);
|
||||
}
|
||||
|
||||
static int __init raid_setup(char *str)
|
||||
|
@ -40,7 +40,7 @@ struct resync_info {
|
||||
|
||||
/* Lock the send communication. This is done through
|
||||
* bit manipulation as opposed to a mutex in order to
|
||||
* accomodate lock and hold. See next comment.
|
||||
* accommodate lock and hold. See next comment.
|
||||
*/
|
||||
#define MD_CLUSTER_SEND_LOCK 4
|
||||
/* If cluster operations (such as adding a disk) must lock the
|
||||
@ -689,7 +689,7 @@ static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked)
|
||||
/*
|
||||
* If resync thread run after raid1d thread, then process_metadata_update
|
||||
* could not continue if raid1d held reconfig_mutex (and raid1d is blocked
|
||||
* since another node already got EX on Token and waitting the EX of Ack),
|
||||
* since another node already got EX on Token and waiting the EX of Ack),
|
||||
* so let resync wake up thread in case flag is set.
|
||||
*/
|
||||
if (mddev_locked && !test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
|
||||
|
428
drivers/md/md.c
428
drivers/md/md.c
@ -368,28 +368,6 @@ EXPORT_SYMBOL_GPL(md_new_event);
|
||||
static LIST_HEAD(all_mddevs);
|
||||
static DEFINE_SPINLOCK(all_mddevs_lock);
|
||||
|
||||
/*
|
||||
* iterates through all used mddevs in the system.
|
||||
* We take care to grab the all_mddevs_lock whenever navigating
|
||||
* the list, and to always hold a refcount when unlocked.
|
||||
* Any code which breaks out of this loop while own
|
||||
* a reference to the current mddev and must mddev_put it.
|
||||
*/
|
||||
#define for_each_mddev(_mddev,_tmp) \
|
||||
\
|
||||
for (({ spin_lock(&all_mddevs_lock); \
|
||||
_tmp = all_mddevs.next; \
|
||||
_mddev = NULL;}); \
|
||||
({ if (_tmp != &all_mddevs) \
|
||||
mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
|
||||
spin_unlock(&all_mddevs_lock); \
|
||||
if (_mddev) mddev_put(_mddev); \
|
||||
_mddev = list_entry(_tmp, struct mddev, all_mddevs); \
|
||||
_tmp != &all_mddevs;}); \
|
||||
({ spin_lock(&all_mddevs_lock); \
|
||||
_tmp = _tmp->next;}) \
|
||||
)
|
||||
|
||||
/* Rather than calling directly into the personality make_request function,
|
||||
* IO requests come here first so that we can check if the device is
|
||||
* being suspended pending a reconfiguration.
|
||||
@ -464,7 +442,7 @@ static void md_submit_bio(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
blk_queue_split(&bio);
|
||||
bio = bio_split_to_limits(bio);
|
||||
|
||||
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
|
||||
if (bio_sectors(bio) != 0)
|
||||
@ -647,13 +625,17 @@ EXPORT_SYMBOL(md_flush_request);
|
||||
|
||||
static inline struct mddev *mddev_get(struct mddev *mddev)
|
||||
{
|
||||
lockdep_assert_held(&all_mddevs_lock);
|
||||
|
||||
if (test_bit(MD_DELETED, &mddev->flags))
|
||||
return NULL;
|
||||
atomic_inc(&mddev->active);
|
||||
return mddev;
|
||||
}
|
||||
|
||||
static void mddev_delayed_delete(struct work_struct *ws);
|
||||
|
||||
static void mddev_put(struct mddev *mddev)
|
||||
void mddev_put(struct mddev *mddev)
|
||||
{
|
||||
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
|
||||
return;
|
||||
@ -661,7 +643,7 @@ static void mddev_put(struct mddev *mddev)
|
||||
mddev->ctime == 0 && !mddev->hold_active) {
|
||||
/* Array is not configured at all, and not held active,
|
||||
* so destroy it */
|
||||
list_del_init(&mddev->all_mddevs);
|
||||
set_bit(MD_DELETED, &mddev->flags);
|
||||
|
||||
/*
|
||||
* Call queue_work inside the spinlock so that
|
||||
@ -678,7 +660,6 @@ static void md_safemode_timeout(struct timer_list *t);
|
||||
|
||||
void mddev_init(struct mddev *mddev)
|
||||
{
|
||||
kobject_init(&mddev->kobj, &md_ktype);
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
@ -733,22 +714,6 @@ static dev_t mddev_alloc_unit(void)
|
||||
return dev;
|
||||
}
|
||||
|
||||
static struct mddev *mddev_find(dev_t unit)
|
||||
{
|
||||
struct mddev *mddev;
|
||||
|
||||
if (MAJOR(unit) != MD_MAJOR)
|
||||
unit &= ~((1 << MdpMinorShift) - 1);
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
mddev = mddev_find_locked(unit);
|
||||
if (mddev)
|
||||
mddev_get(mddev);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
return mddev;
|
||||
}
|
||||
|
||||
static struct mddev *mddev_alloc(dev_t unit)
|
||||
{
|
||||
struct mddev *new;
|
||||
@ -791,6 +756,15 @@ out_free_new:
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
static void mddev_free(struct mddev *mddev)
|
||||
{
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_del(&mddev->all_mddevs);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
kfree(mddev);
|
||||
}
|
||||
|
||||
static const struct attribute_group md_redundancy_group;
|
||||
|
||||
void mddev_unlock(struct mddev *mddev)
|
||||
@ -3335,14 +3309,35 @@ rdev_size_show(struct md_rdev *rdev, char *page)
|
||||
return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
|
||||
}
|
||||
|
||||
static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
|
||||
static int md_rdevs_overlap(struct md_rdev *a, struct md_rdev *b)
|
||||
{
|
||||
/* check if two start/length pairs overlap */
|
||||
if (s1+l1 <= s2)
|
||||
return 0;
|
||||
if (s2+l2 <= s1)
|
||||
return 0;
|
||||
return 1;
|
||||
if (a->data_offset + a->sectors <= b->data_offset)
|
||||
return false;
|
||||
if (b->data_offset + b->sectors <= a->data_offset)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool md_rdev_overlaps(struct md_rdev *rdev)
|
||||
{
|
||||
struct mddev *mddev;
|
||||
struct md_rdev *rdev2;
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
|
||||
if (test_bit(MD_DELETED, &mddev->flags))
|
||||
continue;
|
||||
rdev_for_each(rdev2, mddev) {
|
||||
if (rdev != rdev2 && rdev->bdev == rdev2->bdev &&
|
||||
md_rdevs_overlap(rdev, rdev2)) {
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
|
||||
@ -3394,46 +3389,21 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
return -EINVAL; /* component must fit device */
|
||||
|
||||
rdev->sectors = sectors;
|
||||
if (sectors > oldsectors && my_mddev->external) {
|
||||
/* Need to check that all other rdevs with the same
|
||||
* ->bdev do not overlap. 'rcu' is sufficient to walk
|
||||
* the rdev lists safely.
|
||||
* This check does not provide a hard guarantee, it
|
||||
* just helps avoid dangerous mistakes.
|
||||
|
||||
/*
|
||||
* Check that all other rdevs with the same bdev do not overlap. This
|
||||
* check does not provide a hard guarantee, it just helps avoid
|
||||
* dangerous mistakes.
|
||||
*/
|
||||
if (sectors > oldsectors && my_mddev->external &&
|
||||
md_rdev_overlaps(rdev)) {
|
||||
/*
|
||||
* Someone else could have slipped in a size change here, but
|
||||
* doing so is just silly. We put oldsectors back because we
|
||||
* know it is safe, and trust userspace not to race with itself.
|
||||
*/
|
||||
struct mddev *mddev;
|
||||
int overlap = 0;
|
||||
struct list_head *tmp;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_mddev(mddev, tmp) {
|
||||
struct md_rdev *rdev2;
|
||||
|
||||
rdev_for_each(rdev2, mddev)
|
||||
if (rdev->bdev == rdev2->bdev &&
|
||||
rdev != rdev2 &&
|
||||
overlaps(rdev->data_offset, rdev->sectors,
|
||||
rdev2->data_offset,
|
||||
rdev2->sectors)) {
|
||||
overlap = 1;
|
||||
break;
|
||||
}
|
||||
if (overlap) {
|
||||
mddev_put(mddev);
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (overlap) {
|
||||
/* Someone else could have slipped in a size
|
||||
* change here, but doing so is just silly.
|
||||
* We put oldsectors back because we *know* it is
|
||||
* safe, and trust userspace not to race with
|
||||
* itself
|
||||
*/
|
||||
rdev->sectors = oldsectors;
|
||||
return -EBUSY;
|
||||
}
|
||||
rdev->sectors = oldsectors;
|
||||
return -EBUSY;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
@ -4830,6 +4800,19 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
if (work_pending(&mddev->del_work))
|
||||
flush_workqueue(md_misc_wq);
|
||||
if (mddev->sync_thread) {
|
||||
sector_t save_rp = mddev->reshape_position;
|
||||
|
||||
mddev_unlock(mddev);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
mddev_lock_nointr(mddev);
|
||||
/*
|
||||
* set RECOVERY_INTR again and restore reshape
|
||||
* position in case others changed them after
|
||||
* got lock, eg, reshape_position_store and
|
||||
* md_check_recovery.
|
||||
*/
|
||||
mddev->reshape_position = save_rp;
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
@ -5001,7 +4984,7 @@ static ssize_t
|
||||
sync_speed_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
unsigned long resync, dt, db;
|
||||
if (mddev->curr_resync == 0)
|
||||
if (mddev->curr_resync == MD_RESYNC_NONE)
|
||||
return sprintf(page, "none\n");
|
||||
resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
|
||||
dt = (jiffies - mddev->resync_mark) / HZ;
|
||||
@ -5020,8 +5003,8 @@ sync_completed_show(struct mddev *mddev, char *page)
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return sprintf(page, "none\n");
|
||||
|
||||
if (mddev->curr_resync == 1 ||
|
||||
mddev->curr_resync == 2)
|
||||
if (mddev->curr_resync == MD_RESYNC_YIELDED ||
|
||||
mddev->curr_resync == MD_RESYNC_DELAYED)
|
||||
return sprintf(page, "delayed\n");
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
@ -5532,11 +5515,10 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
spin_lock(&all_mddevs_lock);
|
||||
if (list_empty(&mddev->all_mddevs)) {
|
||||
if (!mddev_get(mddev)) {
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
mddev_get(mddev);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
rv = entry->show(mddev, page);
|
||||
@ -5557,18 +5539,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
spin_lock(&all_mddevs_lock);
|
||||
if (list_empty(&mddev->all_mddevs)) {
|
||||
if (!mddev_get(mddev)) {
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
mddev_get(mddev);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
rv = entry->store(mddev, page, length);
|
||||
mddev_put(mddev);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static void md_free(struct kobject *ko)
|
||||
static void md_kobj_release(struct kobject *ko)
|
||||
{
|
||||
struct mddev *mddev = container_of(ko, struct mddev, kobj);
|
||||
|
||||
@ -5577,15 +5558,8 @@ static void md_free(struct kobject *ko)
|
||||
if (mddev->sysfs_level)
|
||||
sysfs_put(mddev->sysfs_level);
|
||||
|
||||
if (mddev->gendisk) {
|
||||
del_gendisk(mddev->gendisk);
|
||||
put_disk(mddev->gendisk);
|
||||
}
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
kfree(mddev);
|
||||
del_gendisk(mddev->gendisk);
|
||||
put_disk(mddev->gendisk);
|
||||
}
|
||||
|
||||
static const struct sysfs_ops md_sysfs_ops = {
|
||||
@ -5593,7 +5567,7 @@ static const struct sysfs_ops md_sysfs_ops = {
|
||||
.store = md_attr_store,
|
||||
};
|
||||
static struct kobj_type md_ktype = {
|
||||
.release = md_free,
|
||||
.release = md_kobj_release,
|
||||
.sysfs_ops = &md_sysfs_ops,
|
||||
.default_groups = md_attr_groups,
|
||||
};
|
||||
@ -5604,7 +5578,6 @@ static void mddev_delayed_delete(struct work_struct *ws)
|
||||
{
|
||||
struct mddev *mddev = container_of(ws, struct mddev, del_work);
|
||||
|
||||
kobject_del(&mddev->kobj);
|
||||
kobject_put(&mddev->kobj);
|
||||
}
|
||||
|
||||
@ -5623,7 +5596,7 @@ int mddev_init_writes_pending(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
|
||||
|
||||
static int md_alloc(dev_t dev, char *name)
|
||||
struct mddev *md_alloc(dev_t dev, char *name)
|
||||
{
|
||||
/*
|
||||
* If dev is zero, name is the name of a device to allocate with
|
||||
@ -5651,8 +5624,8 @@ static int md_alloc(dev_t dev, char *name)
|
||||
mutex_lock(&disks_mutex);
|
||||
mddev = mddev_alloc(dev);
|
||||
if (IS_ERR(mddev)) {
|
||||
mutex_unlock(&disks_mutex);
|
||||
return PTR_ERR(mddev);
|
||||
error = PTR_ERR(mddev);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
|
||||
@ -5670,7 +5643,7 @@ static int md_alloc(dev_t dev, char *name)
|
||||
strcmp(mddev2->gendisk->disk_name, name) == 0) {
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
error = -EEXIST;
|
||||
goto out_unlock_disks_mutex;
|
||||
goto out_free_mddev;
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
}
|
||||
@ -5683,7 +5656,7 @@ static int md_alloc(dev_t dev, char *name)
|
||||
error = -ENOMEM;
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
goto out_unlock_disks_mutex;
|
||||
goto out_free_mddev;
|
||||
|
||||
disk->major = MAJOR(mddev->unit);
|
||||
disk->first_minor = unit << shift;
|
||||
@ -5704,25 +5677,45 @@ static int md_alloc(dev_t dev, char *name)
|
||||
mddev->gendisk = disk;
|
||||
error = add_disk(disk);
|
||||
if (error)
|
||||
goto out_cleanup_disk;
|
||||
goto out_put_disk;
|
||||
|
||||
kobject_init(&mddev->kobj, &md_ktype);
|
||||
error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
|
||||
if (error)
|
||||
goto out_del_gendisk;
|
||||
if (error) {
|
||||
/*
|
||||
* The disk is already live at this point. Clear the hold flag
|
||||
* and let mddev_put take care of the deletion, as it isn't any
|
||||
* different from a normal close on last release now.
|
||||
*/
|
||||
mddev->hold_active = 0;
|
||||
mutex_unlock(&disks_mutex);
|
||||
mddev_put(mddev);
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
kobject_uevent(&mddev->kobj, KOBJ_ADD);
|
||||
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
|
||||
mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
|
||||
goto out_unlock_disks_mutex;
|
||||
|
||||
out_del_gendisk:
|
||||
del_gendisk(disk);
|
||||
out_cleanup_disk:
|
||||
put_disk(disk);
|
||||
out_unlock_disks_mutex:
|
||||
mutex_unlock(&disks_mutex);
|
||||
return mddev;
|
||||
|
||||
out_put_disk:
|
||||
put_disk(disk);
|
||||
out_free_mddev:
|
||||
mddev_free(mddev);
|
||||
out_unlock:
|
||||
mutex_unlock(&disks_mutex);
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
static int md_alloc_and_put(dev_t dev, char *name)
|
||||
{
|
||||
struct mddev *mddev = md_alloc(dev, name);
|
||||
|
||||
if (IS_ERR(mddev))
|
||||
return PTR_ERR(mddev);
|
||||
mddev_put(mddev);
|
||||
return error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void md_probe(dev_t dev)
|
||||
@ -5730,7 +5723,7 @@ static void md_probe(dev_t dev)
|
||||
if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
|
||||
return;
|
||||
if (create_on_open)
|
||||
md_alloc(dev, NULL);
|
||||
md_alloc_and_put(dev, NULL);
|
||||
}
|
||||
|
||||
static int add_named_array(const char *val, const struct kernel_param *kp)
|
||||
@ -5752,12 +5745,12 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
|
||||
return -E2BIG;
|
||||
strscpy(buf, val, len+1);
|
||||
if (strncmp(buf, "md_", 3) == 0)
|
||||
return md_alloc(0, buf);
|
||||
return md_alloc_and_put(0, buf);
|
||||
if (strncmp(buf, "md", 2) == 0 &&
|
||||
isdigit(buf[2]) &&
|
||||
kstrtoul(buf+2, 10, &devnum) == 0 &&
|
||||
devnum <= MINORMASK)
|
||||
return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
|
||||
return md_alloc_and_put(MKDEV(MD_MAJOR, devnum), NULL);
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -6197,6 +6190,7 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
flush_workqueue(md_misc_wq);
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
|
||||
@ -6244,11 +6238,11 @@ static void mddev_detach(struct mddev *mddev)
|
||||
static void __md_stop(struct mddev *mddev)
|
||||
{
|
||||
struct md_personality *pers = mddev->pers;
|
||||
md_bitmap_destroy(mddev);
|
||||
mddev_detach(mddev);
|
||||
/* Ensure ->event_work is done */
|
||||
if (mddev->event_work.func)
|
||||
flush_workqueue(md_misc_wq);
|
||||
md_bitmap_destroy(mddev);
|
||||
spin_lock(&mddev->lock);
|
||||
mddev->pers = NULL;
|
||||
spin_unlock(&mddev->lock);
|
||||
@ -6497,9 +6491,8 @@ static void autorun_devices(int part)
|
||||
break;
|
||||
}
|
||||
|
||||
md_probe(dev);
|
||||
mddev = mddev_find(dev);
|
||||
if (!mddev)
|
||||
mddev = md_alloc(dev, NULL);
|
||||
if (IS_ERR(mddev))
|
||||
break;
|
||||
|
||||
if (mddev_lock(mddev))
|
||||
@ -7782,45 +7775,33 @@ out_unlock:
|
||||
|
||||
static int md_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
/*
|
||||
* Succeed if we can lock the mddev, which confirms that
|
||||
* it isn't being stopped right now.
|
||||
*/
|
||||
struct mddev *mddev = mddev_find(bdev->bd_dev);
|
||||
struct mddev *mddev;
|
||||
int err;
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
mddev = mddev_get(bdev->bd_disk->private_data);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
if (!mddev)
|
||||
return -ENODEV;
|
||||
|
||||
if (mddev->gendisk != bdev->bd_disk) {
|
||||
/* we are racing with mddev_put which is discarding this
|
||||
* bd_disk.
|
||||
*/
|
||||
mddev_put(mddev);
|
||||
/* Wait until bdev->bd_disk is definitely gone */
|
||||
if (work_pending(&mddev->del_work))
|
||||
flush_workqueue(md_misc_wq);
|
||||
return -EBUSY;
|
||||
}
|
||||
BUG_ON(mddev != bdev->bd_disk->private_data);
|
||||
|
||||
if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
|
||||
err = mutex_lock_interruptible(&mddev->open_mutex);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (test_bit(MD_CLOSING, &mddev->flags)) {
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
err = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
err = -ENODEV;
|
||||
if (test_bit(MD_CLOSING, &mddev->flags))
|
||||
goto out_unlock;
|
||||
|
||||
err = 0;
|
||||
atomic_inc(&mddev->openers);
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
|
||||
bdev_check_media_change(bdev);
|
||||
out:
|
||||
if (err)
|
||||
mddev_put(mddev);
|
||||
return 0;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
out:
|
||||
mddev_put(mddev);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -7844,6 +7825,17 @@ static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void md_free_disk(struct gendisk *disk)
|
||||
{
|
||||
struct mddev *mddev = disk->private_data;
|
||||
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
|
||||
mddev_free(mddev);
|
||||
}
|
||||
|
||||
const struct block_device_operations md_fops =
|
||||
{
|
||||
.owner = THIS_MODULE,
|
||||
@ -7857,6 +7849,7 @@ const struct block_device_operations md_fops =
|
||||
.getgeo = md_getgeo,
|
||||
.check_events = md_check_events,
|
||||
.set_read_only = md_set_read_only,
|
||||
.free_disk = md_free_disk,
|
||||
};
|
||||
|
||||
static int md_thread(void *arg)
|
||||
@ -8018,16 +8011,26 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
max_sectors = mddev->dev_sectors;
|
||||
|
||||
resync = mddev->curr_resync;
|
||||
if (resync <= 3) {
|
||||
if (resync < MD_RESYNC_ACTIVE) {
|
||||
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
|
||||
/* Still cleaning up */
|
||||
resync = max_sectors;
|
||||
} else if (resync > max_sectors)
|
||||
} else if (resync > max_sectors) {
|
||||
resync = max_sectors;
|
||||
else
|
||||
} else {
|
||||
resync -= atomic_read(&mddev->recovery_active);
|
||||
if (resync < MD_RESYNC_ACTIVE) {
|
||||
/*
|
||||
* Resync has started, but the subtraction has
|
||||
* yielded one of the special values. Force it
|
||||
* to active to ensure the status reports an
|
||||
* active resync.
|
||||
*/
|
||||
resync = MD_RESYNC_ACTIVE;
|
||||
}
|
||||
}
|
||||
|
||||
if (resync == 0) {
|
||||
if (resync == MD_RESYNC_NONE) {
|
||||
if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
|
||||
struct md_rdev *rdev;
|
||||
|
||||
@ -8051,7 +8054,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (resync < 3) {
|
||||
if (resync < MD_RESYNC_ACTIVE) {
|
||||
seq_printf(seq, "\tresync=DELAYED");
|
||||
return 1;
|
||||
}
|
||||
@ -8152,6 +8155,8 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
if (!l--) {
|
||||
mddev = list_entry(tmp, struct mddev, all_mddevs);
|
||||
mddev_get(mddev);
|
||||
if (!mddev_get(mddev))
|
||||
continue;
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
return mddev;
|
||||
}
|
||||
@ -8165,25 +8170,35 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
struct mddev *next_mddev, *mddev = v;
|
||||
struct mddev *to_put = NULL;
|
||||
|
||||
++*pos;
|
||||
if (v == (void*)2)
|
||||
return NULL;
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
if (v == (void*)1)
|
||||
if (v == (void*)1) {
|
||||
tmp = all_mddevs.next;
|
||||
else
|
||||
} else {
|
||||
to_put = mddev;
|
||||
tmp = mddev->all_mddevs.next;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if (tmp == &all_mddevs) {
|
||||
next_mddev = (void*)2;
|
||||
*pos = 0x10000;
|
||||
break;
|
||||
}
|
||||
next_mddev = list_entry(tmp, struct mddev, all_mddevs);
|
||||
if (mddev_get(next_mddev))
|
||||
break;
|
||||
mddev = next_mddev;
|
||||
tmp = mddev->all_mddevs.next;
|
||||
if (tmp != &all_mddevs)
|
||||
next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
|
||||
else {
|
||||
next_mddev = (void*)2;
|
||||
*pos = 0x10000;
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
if (v != (void*)1)
|
||||
if (to_put)
|
||||
mddev_put(mddev);
|
||||
return next_mddev;
|
||||
|
||||
@ -8682,7 +8697,6 @@ void md_do_sync(struct md_thread *thread)
|
||||
unsigned long update_time;
|
||||
sector_t mark_cnt[SYNC_MARKS];
|
||||
int last_mark,m;
|
||||
struct list_head *tmp;
|
||||
sector_t last_check;
|
||||
int skipped = 0;
|
||||
struct md_rdev *rdev;
|
||||
@ -8729,13 +8743,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
|
||||
mddev->last_sync_action = action ?: desc;
|
||||
|
||||
/* we overload curr_resync somewhat here.
|
||||
* 0 == not engaged in resync at all
|
||||
* 2 == checking that there is no conflict with another sync
|
||||
* 1 == like 2, but have yielded to allow conflicting resync to
|
||||
* commence
|
||||
* other == active in resync - this many blocks
|
||||
*
|
||||
/*
|
||||
* Before starting a resync we must have set curr_resync to
|
||||
* 2, and then checked that every "conflicting" array has curr_resync
|
||||
* less than ours. When we find one that is the same or higher
|
||||
@ -8747,24 +8755,29 @@ void md_do_sync(struct md_thread *thread)
|
||||
|
||||
do {
|
||||
int mddev2_minor = -1;
|
||||
mddev->curr_resync = 2;
|
||||
mddev->curr_resync = MD_RESYNC_DELAYED;
|
||||
|
||||
try_again:
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
goto skip;
|
||||
for_each_mddev(mddev2, tmp) {
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_for_each_entry(mddev2, &all_mddevs, all_mddevs) {
|
||||
if (test_bit(MD_DELETED, &mddev2->flags))
|
||||
continue;
|
||||
if (mddev2 == mddev)
|
||||
continue;
|
||||
if (!mddev->parallel_resync
|
||||
&& mddev2->curr_resync
|
||||
&& match_mddev_units(mddev, mddev2)) {
|
||||
DEFINE_WAIT(wq);
|
||||
if (mddev < mddev2 && mddev->curr_resync == 2) {
|
||||
if (mddev < mddev2 &&
|
||||
mddev->curr_resync == MD_RESYNC_DELAYED) {
|
||||
/* arbitrarily yield */
|
||||
mddev->curr_resync = 1;
|
||||
mddev->curr_resync = MD_RESYNC_YIELDED;
|
||||
wake_up(&resync_wait);
|
||||
}
|
||||
if (mddev > mddev2 && mddev->curr_resync == 1)
|
||||
if (mddev > mddev2 &&
|
||||
mddev->curr_resync == MD_RESYNC_YIELDED)
|
||||
/* no need to wait here, we can wait the next
|
||||
* time 'round when curr_resync == 2
|
||||
*/
|
||||
@ -8782,7 +8795,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
desc, mdname(mddev),
|
||||
mdname(mddev2));
|
||||
}
|
||||
mddev_put(mddev2);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
if (signal_pending(current))
|
||||
flush_signals(current);
|
||||
schedule();
|
||||
@ -8792,7 +8806,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
finish_wait(&resync_wait, &wq);
|
||||
}
|
||||
}
|
||||
} while (mddev->curr_resync < 2);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
} while (mddev->curr_resync < MD_RESYNC_DELAYED);
|
||||
|
||||
j = 0;
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
@ -8876,7 +8891,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
desc, mdname(mddev));
|
||||
mddev->curr_resync = j;
|
||||
} else
|
||||
mddev->curr_resync = 3; /* no longer delayed */
|
||||
mddev->curr_resync = MD_RESYNC_ACTIVE; /* no longer delayed */
|
||||
mddev->curr_resync_completed = j;
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||
md_new_event();
|
||||
@ -9011,14 +9026,14 @@ void md_do_sync(struct md_thread *thread)
|
||||
|
||||
if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
mddev->curr_resync > 3) {
|
||||
mddev->curr_resync >= MD_RESYNC_ACTIVE) {
|
||||
mddev->curr_resync_completed = mddev->curr_resync;
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||
}
|
||||
mddev->pers->sync_request(mddev, max_sectors, &skipped);
|
||||
|
||||
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
|
||||
mddev->curr_resync > 3) {
|
||||
mddev->curr_resync >= MD_RESYNC_ACTIVE) {
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
if (mddev->curr_resync >= mddev->recovery_cp) {
|
||||
@ -9082,7 +9097,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
mddev->resync_min = mddev->curr_resync_completed;
|
||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
mddev->curr_resync = 0;
|
||||
mddev->curr_resync = MD_RESYNC_NONE;
|
||||
spin_unlock(&mddev->lock);
|
||||
|
||||
wake_up(&resync_wait);
|
||||
@ -9303,6 +9318,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
* ->spare_active and clear saved_raid_disk
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
@ -9338,6 +9354,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
goto unlock;
|
||||
}
|
||||
if (mddev->sync_thread) {
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
goto unlock;
|
||||
}
|
||||
@ -9417,8 +9434,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
sector_t old_dev_sectors = mddev->dev_sectors;
|
||||
bool is_reshaped = false;
|
||||
|
||||
/* resync has finished, collect result */
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
/* sync_thread should be unregistered, collect result */
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||
mddev->degraded != mddev->raid_disks) {
|
||||
@ -9466,6 +9482,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
wake_up(&resync_wait);
|
||||
/* flag recovery needed just to double check */
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
md_new_event();
|
||||
if (mddev->event_work.func)
|
||||
@ -9544,11 +9561,14 @@ EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
|
||||
static int md_notify_reboot(struct notifier_block *this,
|
||||
unsigned long code, void *x)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
struct mddev *mddev;
|
||||
struct mddev *mddev, *n;
|
||||
int need_delay = 0;
|
||||
|
||||
for_each_mddev(mddev, tmp) {
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
|
||||
if (!mddev_get(mddev))
|
||||
continue;
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
if (mddev_trylock(mddev)) {
|
||||
if (mddev->pers)
|
||||
__md_stop_writes(mddev);
|
||||
@ -9557,7 +9577,11 @@ static int md_notify_reboot(struct notifier_block *this,
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
need_delay = 1;
|
||||
mddev_put(mddev);
|
||||
spin_lock(&all_mddevs_lock);
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
/*
|
||||
* certain more exotic SCSI devices are known to be
|
||||
* volatile wrt too early system reboots. While the
|
||||
@ -9876,8 +9900,7 @@ void md_autostart_arrays(int part)
|
||||
|
||||
static __exit void md_exit(void)
|
||||
{
|
||||
struct mddev *mddev;
|
||||
struct list_head *tmp;
|
||||
struct mddev *mddev, *n;
|
||||
int delay = 1;
|
||||
|
||||
unregister_blkdev(MD_MAJOR,"md");
|
||||
@ -9897,17 +9920,24 @@ static __exit void md_exit(void)
|
||||
}
|
||||
remove_proc_entry("mdstat", NULL);
|
||||
|
||||
for_each_mddev(mddev, tmp) {
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
|
||||
if (!mddev_get(mddev))
|
||||
continue;
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
export_array(mddev);
|
||||
mddev->ctime = 0;
|
||||
mddev->hold_active = 0;
|
||||
/*
|
||||
* for_each_mddev() will call mddev_put() at the end of each
|
||||
* iteration. As the mddev is now fully clear, this will
|
||||
* schedule the mddev for destruction by a workqueue, and the
|
||||
* As the mddev is now fully clear, mddev_put will schedule
|
||||
* the mddev for destruction by a workqueue, and the
|
||||
* destroy_workqueue() below will wait for that to complete.
|
||||
*/
|
||||
mddev_put(mddev);
|
||||
spin_lock(&all_mddevs_lock);
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
destroy_workqueue(md_rdev_misc_wq);
|
||||
destroy_workqueue(md_misc_wq);
|
||||
destroy_workqueue(md_wq);
|
||||
|
@ -254,6 +254,7 @@ struct md_cluster_info;
|
||||
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
|
||||
* array is ready yet.
|
||||
* @MD_BROKEN: This is used to stop writes and mark array as failed.
|
||||
* @MD_DELETED: This device is being deleted
|
||||
*
|
||||
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
|
||||
*/
|
||||
@ -270,6 +271,7 @@ enum mddev_flags {
|
||||
MD_UPDATING_SB,
|
||||
MD_NOT_READY,
|
||||
MD_BROKEN,
|
||||
MD_DELETED,
|
||||
};
|
||||
|
||||
enum mddev_sb_flags {
|
||||
@ -288,6 +290,21 @@ struct serial_info {
|
||||
sector_t _subtree_last; /* highest sector in subtree of rb node */
|
||||
};
|
||||
|
||||
/*
|
||||
* mddev->curr_resync stores the current sector of the resync but
|
||||
* also has some overloaded values.
|
||||
*/
|
||||
enum {
|
||||
/* No resync in progress */
|
||||
MD_RESYNC_NONE = 0,
|
||||
/* Yielded to allow another conflicting resync to commence */
|
||||
MD_RESYNC_YIELDED = 1,
|
||||
/* Delayed to check that there is no conflict with another sync */
|
||||
MD_RESYNC_DELAYED = 2,
|
||||
/* Any value greater than or equal to this is in an active resync */
|
||||
MD_RESYNC_ACTIVE = 3,
|
||||
};
|
||||
|
||||
struct mddev {
|
||||
void *private;
|
||||
struct md_personality *pers;
|
||||
@ -750,6 +767,8 @@ extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
|
||||
|
||||
extern void mddev_init(struct mddev *mddev);
|
||||
struct mddev *md_alloc(dev_t dev, char *name);
|
||||
void mddev_put(struct mddev *mddev);
|
||||
extern int md_run(struct mddev *mddev);
|
||||
extern int md_start(struct mddev *mddev);
|
||||
extern void md_stop(struct mddev *mddev);
|
||||
|
@ -2167,9 +2167,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
int err = 0;
|
||||
int number = rdev->raid_disk;
|
||||
struct md_rdev **rdevp;
|
||||
struct raid10_info *p = conf->mirrors + number;
|
||||
struct raid10_info *p;
|
||||
|
||||
print_conf(conf);
|
||||
if (unlikely(number >= mddev->raid_disks))
|
||||
return 0;
|
||||
p = conf->mirrors + number;
|
||||
if (rdev == p->rdev)
|
||||
rdevp = &p->rdev;
|
||||
else if (rdev == p->replacement)
|
||||
|
@ -1590,18 +1590,13 @@ void r5l_quiesce(struct r5l_log *log, int quiesce)
|
||||
|
||||
bool r5l_log_disk_error(struct r5conf *conf)
|
||||
{
|
||||
struct r5l_log *log;
|
||||
bool ret;
|
||||
/* don't allow write if journal disk is missing */
|
||||
rcu_read_lock();
|
||||
log = rcu_dereference(conf->log);
|
||||
struct r5l_log *log = conf->log;
|
||||
|
||||
/* don't allow write if journal disk is missing */
|
||||
if (!log)
|
||||
ret = test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
||||
return test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
||||
else
|
||||
ret = test_bit(Faulty, &log->rdev->flags);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
return test_bit(Faulty, &log->rdev->flags);
|
||||
}
|
||||
|
||||
#define R5L_RECOVERY_PAGE_POOL_SIZE 256
|
||||
@ -2534,12 +2529,13 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
|
||||
struct r5conf *conf;
|
||||
int ret;
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
ret = mddev_lock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
conf = mddev->private;
|
||||
if (!conf || !conf->log) {
|
||||
spin_unlock(&mddev->lock);
|
||||
return 0;
|
||||
}
|
||||
if (!conf || !conf->log)
|
||||
goto out_unlock;
|
||||
|
||||
switch (conf->log->r5c_journal_mode) {
|
||||
case R5C_JOURNAL_MODE_WRITE_THROUGH:
|
||||
@ -2557,7 +2553,9 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
|
||||
default:
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock(&mddev->lock);
|
||||
|
||||
out_unlock:
|
||||
mddev_unlock(mddev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2639,7 +2637,7 @@ int r5c_try_caching_write(struct r5conf *conf,
|
||||
int i;
|
||||
struct r5dev *dev;
|
||||
int to_cache = 0;
|
||||
void **pslot;
|
||||
void __rcu **pslot;
|
||||
sector_t tree_index;
|
||||
int ret;
|
||||
uintptr_t refcount;
|
||||
@ -2806,7 +2804,7 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
|
||||
int i;
|
||||
int do_wakeup = 0;
|
||||
sector_t tree_index;
|
||||
void **pslot;
|
||||
void __rcu **pslot;
|
||||
uintptr_t refcount;
|
||||
|
||||
if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
|
||||
@ -3145,7 +3143,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
||||
spin_lock_init(&log->stripe_in_journal_lock);
|
||||
atomic_set(&log->stripe_in_journal_count, 0);
|
||||
|
||||
rcu_assign_pointer(conf->log, log);
|
||||
conf->log = log;
|
||||
|
||||
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
||||
return 0;
|
||||
@ -3167,13 +3165,13 @@ void r5l_exit_log(struct r5conf *conf)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
|
||||
conf->log = NULL;
|
||||
synchronize_rcu();
|
||||
|
||||
/* Ensure disable_writeback_work wakes up and exits */
|
||||
wake_up(&conf->mddev->sb_wait);
|
||||
flush_work(&log->disable_writeback_work);
|
||||
md_unregister_thread(&log->reclaim_thread);
|
||||
|
||||
conf->log = NULL;
|
||||
|
||||
mempool_exit(&log->meta_pool);
|
||||
bioset_exit(&log->bs);
|
||||
mempool_exit(&log->io_pool);
|
||||
|
@ -2,49 +2,46 @@
|
||||
#ifndef _RAID5_LOG_H
|
||||
#define _RAID5_LOG_H
|
||||
|
||||
extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
|
||||
extern void r5l_exit_log(struct r5conf *conf);
|
||||
extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh);
|
||||
extern void r5l_write_stripe_run(struct r5l_log *log);
|
||||
extern void r5l_flush_stripe_to_raid(struct r5l_log *log);
|
||||
extern void r5l_stripe_write_finished(struct stripe_head *sh);
|
||||
extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
|
||||
extern void r5l_quiesce(struct r5l_log *log, int quiesce);
|
||||
extern bool r5l_log_disk_error(struct r5conf *conf);
|
||||
extern bool r5c_is_writeback(struct r5l_log *log);
|
||||
extern int
|
||||
r5c_try_caching_write(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct stripe_head_state *s, int disks);
|
||||
extern void
|
||||
r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct stripe_head_state *s);
|
||||
extern void r5c_release_extra_page(struct stripe_head *sh);
|
||||
extern void r5c_use_extra_page(struct stripe_head *sh);
|
||||
extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
|
||||
extern void r5c_handle_cached_data_endio(struct r5conf *conf,
|
||||
struct stripe_head *sh, int disks);
|
||||
extern int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh);
|
||||
extern void r5c_make_stripe_write_out(struct stripe_head *sh);
|
||||
extern void r5c_flush_cache(struct r5conf *conf, int num);
|
||||
extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
|
||||
extern void r5c_check_cached_full_stripe(struct r5conf *conf);
|
||||
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
|
||||
void r5l_exit_log(struct r5conf *conf);
|
||||
int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh);
|
||||
void r5l_write_stripe_run(struct r5l_log *log);
|
||||
void r5l_flush_stripe_to_raid(struct r5l_log *log);
|
||||
void r5l_stripe_write_finished(struct stripe_head *sh);
|
||||
int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
|
||||
void r5l_quiesce(struct r5l_log *log, int quiesce);
|
||||
bool r5l_log_disk_error(struct r5conf *conf);
|
||||
bool r5c_is_writeback(struct r5l_log *log);
|
||||
int r5c_try_caching_write(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct stripe_head_state *s, int disks);
|
||||
void r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct stripe_head_state *s);
|
||||
void r5c_release_extra_page(struct stripe_head *sh);
|
||||
void r5c_use_extra_page(struct stripe_head *sh);
|
||||
void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
|
||||
void r5c_handle_cached_data_endio(struct r5conf *conf,
|
||||
struct stripe_head *sh, int disks);
|
||||
int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh);
|
||||
void r5c_make_stripe_write_out(struct stripe_head *sh);
|
||||
void r5c_flush_cache(struct r5conf *conf, int num);
|
||||
void r5c_check_stripe_cache_usage(struct r5conf *conf);
|
||||
void r5c_check_cached_full_stripe(struct r5conf *conf);
|
||||
extern struct md_sysfs_entry r5c_journal_mode;
|
||||
extern void r5c_update_on_rdev_error(struct mddev *mddev,
|
||||
struct md_rdev *rdev);
|
||||
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
|
||||
extern int r5l_start(struct r5l_log *log);
|
||||
void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev);
|
||||
bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
|
||||
int r5l_start(struct r5l_log *log);
|
||||
|
||||
extern struct dma_async_tx_descriptor *
|
||||
struct dma_async_tx_descriptor *
|
||||
ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
|
||||
struct dma_async_tx_descriptor *tx);
|
||||
extern int ppl_init_log(struct r5conf *conf);
|
||||
extern void ppl_exit_log(struct r5conf *conf);
|
||||
extern int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh);
|
||||
extern void ppl_write_stripe_run(struct r5conf *conf);
|
||||
extern void ppl_stripe_write_finished(struct stripe_head *sh);
|
||||
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
|
||||
extern void ppl_quiesce(struct r5conf *conf, int quiesce);
|
||||
extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
|
||||
int ppl_init_log(struct r5conf *conf);
|
||||
void ppl_exit_log(struct r5conf *conf);
|
||||
int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh);
|
||||
void ppl_write_stripe_run(struct r5conf *conf);
|
||||
void ppl_stripe_write_finished(struct stripe_head *sh);
|
||||
int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
|
||||
void ppl_quiesce(struct r5conf *conf, int quiesce);
|
||||
int ppl_handle_flush_request(struct bio *bio);
|
||||
extern struct md_sysfs_entry ppl_write_hint;
|
||||
|
||||
static inline bool raid5_has_log(struct r5conf *conf)
|
||||
@ -111,7 +108,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
|
||||
if (conf->log)
|
||||
ret = r5l_handle_flush_request(conf->log, bio);
|
||||
else if (raid5_has_ppl(conf))
|
||||
ret = ppl_handle_flush_request(conf->log, bio);
|
||||
ret = ppl_handle_flush_request(bio);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -679,7 +679,7 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
|
||||
}
|
||||
}
|
||||
|
||||
int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
|
||||
int ppl_handle_flush_request(struct bio *bio)
|
||||
{
|
||||
if (bio->bi_iter.bi_size == 0) {
|
||||
bio_endio(bio);
|
||||
|
@ -61,6 +61,8 @@
|
||||
#define cpu_to_group(cpu) cpu_to_node(cpu)
|
||||
#define ANY_GROUP NUMA_NO_NODE
|
||||
|
||||
#define RAID5_MAX_REQ_STRIPES 256
|
||||
|
||||
static bool devices_handle_discard_safely = false;
|
||||
module_param(devices_handle_discard_safely, bool, 0644);
|
||||
MODULE_PARM_DESC(devices_handle_discard_safely,
|
||||
@ -624,6 +626,49 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct stripe_head *find_get_stripe(struct r5conf *conf,
|
||||
sector_t sector, short generation, int hash)
|
||||
{
|
||||
int inc_empty_inactive_list_flag;
|
||||
struct stripe_head *sh;
|
||||
|
||||
sh = __find_stripe(conf, sector, generation);
|
||||
if (!sh)
|
||||
return NULL;
|
||||
|
||||
if (atomic_inc_not_zero(&sh->count))
|
||||
return sh;
|
||||
|
||||
/*
|
||||
* Slow path. The reference count is zero which means the stripe must
|
||||
* be on a list (sh->lru). Must remove the stripe from the list that
|
||||
* references it with the device_lock held.
|
||||
*/
|
||||
|
||||
spin_lock(&conf->device_lock);
|
||||
if (!atomic_read(&sh->count)) {
|
||||
if (!test_bit(STRIPE_HANDLE, &sh->state))
|
||||
atomic_inc(&conf->active_stripes);
|
||||
BUG_ON(list_empty(&sh->lru) &&
|
||||
!test_bit(STRIPE_EXPANDING, &sh->state));
|
||||
inc_empty_inactive_list_flag = 0;
|
||||
if (!list_empty(conf->inactive_list + hash))
|
||||
inc_empty_inactive_list_flag = 1;
|
||||
list_del_init(&sh->lru);
|
||||
if (list_empty(conf->inactive_list + hash) &&
|
||||
inc_empty_inactive_list_flag)
|
||||
atomic_inc(&conf->empty_inactive_list_nr);
|
||||
if (sh->group) {
|
||||
sh->group->stripes_cnt--;
|
||||
sh->group = NULL;
|
||||
}
|
||||
}
|
||||
atomic_inc(&sh->count);
|
||||
spin_unlock(&conf->device_lock);
|
||||
|
||||
return sh;
|
||||
}
|
||||
|
||||
/*
|
||||
* Need to check if array has failed when deciding whether to:
|
||||
* - start an array
|
||||
@ -710,80 +755,121 @@ static bool has_failed(struct r5conf *conf)
|
||||
return degraded > conf->max_degraded;
|
||||
}
|
||||
|
||||
struct stripe_head *
|
||||
raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
|
||||
int previous, int noblock, int noquiesce)
|
||||
enum stripe_result {
|
||||
STRIPE_SUCCESS = 0,
|
||||
STRIPE_RETRY,
|
||||
STRIPE_SCHEDULE_AND_RETRY,
|
||||
STRIPE_FAIL,
|
||||
};
|
||||
|
||||
struct stripe_request_ctx {
|
||||
/* a reference to the last stripe_head for batching */
|
||||
struct stripe_head *batch_last;
|
||||
|
||||
/* first sector in the request */
|
||||
sector_t first_sector;
|
||||
|
||||
/* last sector in the request */
|
||||
sector_t last_sector;
|
||||
|
||||
/*
|
||||
* bitmap to track stripe sectors that have been added to stripes
|
||||
* add one to account for unaligned requests
|
||||
*/
|
||||
DECLARE_BITMAP(sectors_to_do, RAID5_MAX_REQ_STRIPES + 1);
|
||||
|
||||
/* the request had REQ_PREFLUSH, cleared after the first stripe_head */
|
||||
bool do_flush;
|
||||
};
|
||||
|
||||
/*
|
||||
* Block until another thread clears R5_INACTIVE_BLOCKED or
|
||||
* there are fewer than 3/4 the maximum number of active stripes
|
||||
* and there is an inactive stripe available.
|
||||
*/
|
||||
static bool is_inactive_blocked(struct r5conf *conf, int hash)
|
||||
{
|
||||
int active = atomic_read(&conf->active_stripes);
|
||||
|
||||
if (list_empty(conf->inactive_list + hash))
|
||||
return false;
|
||||
|
||||
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
|
||||
return true;
|
||||
|
||||
return active < (conf->max_nr_stripes * 3 / 4);
|
||||
}
|
||||
|
||||
static struct stripe_head *__raid5_get_active_stripe(struct r5conf *conf,
|
||||
struct stripe_request_ctx *ctx, sector_t sector,
|
||||
bool previous, bool noblock, bool noquiesce)
|
||||
{
|
||||
struct stripe_head *sh;
|
||||
int hash = stripe_hash_locks_hash(conf, sector);
|
||||
int inc_empty_inactive_list_flag;
|
||||
|
||||
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
|
||||
|
||||
spin_lock_irq(conf->hash_locks + hash);
|
||||
|
||||
do {
|
||||
wait_event_lock_irq(conf->wait_for_quiescent,
|
||||
conf->quiesce == 0 || noquiesce,
|
||||
*(conf->hash_locks + hash));
|
||||
sh = __find_stripe(conf, sector, conf->generation - previous);
|
||||
if (!sh) {
|
||||
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
|
||||
sh = get_free_stripe(conf, hash);
|
||||
if (!sh && !test_bit(R5_DID_ALLOC,
|
||||
&conf->cache_state))
|
||||
set_bit(R5_ALLOC_MORE,
|
||||
&conf->cache_state);
|
||||
}
|
||||
if (noblock && sh == NULL)
|
||||
break;
|
||||
|
||||
r5c_check_stripe_cache_usage(conf);
|
||||
if (!sh) {
|
||||
set_bit(R5_INACTIVE_BLOCKED,
|
||||
&conf->cache_state);
|
||||
r5l_wake_reclaim(conf->log, 0);
|
||||
wait_event_lock_irq(
|
||||
conf->wait_for_stripe,
|
||||
!list_empty(conf->inactive_list + hash) &&
|
||||
(atomic_read(&conf->active_stripes)
|
||||
< (conf->max_nr_stripes * 3 / 4)
|
||||
|| !test_bit(R5_INACTIVE_BLOCKED,
|
||||
&conf->cache_state)),
|
||||
*(conf->hash_locks + hash));
|
||||
clear_bit(R5_INACTIVE_BLOCKED,
|
||||
&conf->cache_state);
|
||||
} else {
|
||||
init_stripe(sh, sector, previous);
|
||||
atomic_inc(&sh->count);
|
||||
}
|
||||
} else if (!atomic_inc_not_zero(&sh->count)) {
|
||||
spin_lock(&conf->device_lock);
|
||||
if (!atomic_read(&sh->count)) {
|
||||
if (!test_bit(STRIPE_HANDLE, &sh->state))
|
||||
atomic_inc(&conf->active_stripes);
|
||||
BUG_ON(list_empty(&sh->lru) &&
|
||||
!test_bit(STRIPE_EXPANDING, &sh->state));
|
||||
inc_empty_inactive_list_flag = 0;
|
||||
if (!list_empty(conf->inactive_list + hash))
|
||||
inc_empty_inactive_list_flag = 1;
|
||||
list_del_init(&sh->lru);
|
||||
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
|
||||
atomic_inc(&conf->empty_inactive_list_nr);
|
||||
if (sh->group) {
|
||||
sh->group->stripes_cnt--;
|
||||
sh->group = NULL;
|
||||
}
|
||||
}
|
||||
atomic_inc(&sh->count);
|
||||
spin_unlock(&conf->device_lock);
|
||||
retry:
|
||||
if (!noquiesce && conf->quiesce) {
|
||||
/*
|
||||
* Must release the reference to batch_last before waiting,
|
||||
* on quiesce, otherwise the batch_last will hold a reference
|
||||
* to a stripe and raid5_quiesce() will deadlock waiting for
|
||||
* active_stripes to go to zero.
|
||||
*/
|
||||
if (ctx && ctx->batch_last) {
|
||||
raid5_release_stripe(ctx->batch_last);
|
||||
ctx->batch_last = NULL;
|
||||
}
|
||||
} while (sh == NULL);
|
||||
|
||||
wait_event_lock_irq(conf->wait_for_quiescent, !conf->quiesce,
|
||||
*(conf->hash_locks + hash));
|
||||
}
|
||||
|
||||
sh = find_get_stripe(conf, sector, conf->generation - previous, hash);
|
||||
if (sh)
|
||||
goto out;
|
||||
|
||||
if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
|
||||
goto wait_for_stripe;
|
||||
|
||||
sh = get_free_stripe(conf, hash);
|
||||
if (sh) {
|
||||
r5c_check_stripe_cache_usage(conf);
|
||||
init_stripe(sh, sector, previous);
|
||||
atomic_inc(&sh->count);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!test_bit(R5_DID_ALLOC, &conf->cache_state))
|
||||
set_bit(R5_ALLOC_MORE, &conf->cache_state);
|
||||
|
||||
wait_for_stripe:
|
||||
if (noblock)
|
||||
goto out;
|
||||
|
||||
set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
|
||||
r5l_wake_reclaim(conf->log, 0);
|
||||
wait_event_lock_irq(conf->wait_for_stripe,
|
||||
is_inactive_blocked(conf, hash),
|
||||
*(conf->hash_locks + hash));
|
||||
clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
|
||||
goto retry;
|
||||
|
||||
out:
|
||||
spin_unlock_irq(conf->hash_locks + hash);
|
||||
return sh;
|
||||
}
|
||||
|
||||
struct stripe_head *raid5_get_active_stripe(struct r5conf *conf,
|
||||
sector_t sector, bool previous, bool noblock, bool noquiesce)
|
||||
{
|
||||
return __raid5_get_active_stripe(conf, NULL, sector, previous, noblock,
|
||||
noquiesce);
|
||||
}
|
||||
|
||||
static bool is_full_stripe_write(struct stripe_head *sh)
|
||||
{
|
||||
BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
|
||||
@ -824,13 +910,13 @@ static bool stripe_can_batch(struct stripe_head *sh)
|
||||
}
|
||||
|
||||
/* we only do back search */
|
||||
static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
|
||||
static void stripe_add_to_batch_list(struct r5conf *conf,
|
||||
struct stripe_head *sh, struct stripe_head *last_sh)
|
||||
{
|
||||
struct stripe_head *head;
|
||||
sector_t head_sector, tmp_sec;
|
||||
int hash;
|
||||
int dd_idx;
|
||||
int inc_empty_inactive_list_flag;
|
||||
|
||||
/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
|
||||
tmp_sec = sh->sector;
|
||||
@ -838,36 +924,20 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
|
||||
return;
|
||||
head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
|
||||
|
||||
hash = stripe_hash_locks_hash(conf, head_sector);
|
||||
spin_lock_irq(conf->hash_locks + hash);
|
||||
head = __find_stripe(conf, head_sector, conf->generation);
|
||||
if (head && !atomic_inc_not_zero(&head->count)) {
|
||||
spin_lock(&conf->device_lock);
|
||||
if (!atomic_read(&head->count)) {
|
||||
if (!test_bit(STRIPE_HANDLE, &head->state))
|
||||
atomic_inc(&conf->active_stripes);
|
||||
BUG_ON(list_empty(&head->lru) &&
|
||||
!test_bit(STRIPE_EXPANDING, &head->state));
|
||||
inc_empty_inactive_list_flag = 0;
|
||||
if (!list_empty(conf->inactive_list + hash))
|
||||
inc_empty_inactive_list_flag = 1;
|
||||
list_del_init(&head->lru);
|
||||
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
|
||||
atomic_inc(&conf->empty_inactive_list_nr);
|
||||
if (head->group) {
|
||||
head->group->stripes_cnt--;
|
||||
head->group = NULL;
|
||||
}
|
||||
}
|
||||
if (last_sh && head_sector == last_sh->sector) {
|
||||
head = last_sh;
|
||||
atomic_inc(&head->count);
|
||||
spin_unlock(&conf->device_lock);
|
||||
} else {
|
||||
hash = stripe_hash_locks_hash(conf, head_sector);
|
||||
spin_lock_irq(conf->hash_locks + hash);
|
||||
head = find_get_stripe(conf, head_sector, conf->generation,
|
||||
hash);
|
||||
spin_unlock_irq(conf->hash_locks + hash);
|
||||
if (!head)
|
||||
return;
|
||||
if (!stripe_can_batch(head))
|
||||
goto out;
|
||||
}
|
||||
spin_unlock_irq(conf->hash_locks + hash);
|
||||
|
||||
if (!head)
|
||||
return;
|
||||
if (!stripe_can_batch(head))
|
||||
goto out;
|
||||
|
||||
lock_two_stripes(head, sh);
|
||||
/* clear_batch_ready clear the flag */
|
||||
@ -2882,10 +2952,10 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
|
||||
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
raid5_release_stripe(sh);
|
||||
|
||||
if (sh->batch_head && sh != sh->batch_head)
|
||||
raid5_release_stripe(sh->batch_head);
|
||||
raid5_release_stripe(sh);
|
||||
}
|
||||
|
||||
static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
@ -3413,39 +3483,32 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
|
||||
s->locked, s->ops_request);
|
||||
}
|
||||
|
||||
/*
|
||||
* Each stripe/dev can have one or more bion attached.
|
||||
* toread/towrite point to the first in a chain.
|
||||
* The bi_next chain must be in order.
|
||||
*/
|
||||
static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
||||
int forwrite, int previous)
|
||||
static bool stripe_bio_overlaps(struct stripe_head *sh, struct bio *bi,
|
||||
int dd_idx, int forwrite)
|
||||
{
|
||||
struct bio **bip;
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
int firstwrite=0;
|
||||
struct bio **bip;
|
||||
|
||||
pr_debug("adding bi b#%llu to stripe s#%llu\n",
|
||||
(unsigned long long)bi->bi_iter.bi_sector,
|
||||
(unsigned long long)sh->sector);
|
||||
pr_debug("checking bi b#%llu to stripe s#%llu\n",
|
||||
bi->bi_iter.bi_sector, sh->sector);
|
||||
|
||||
spin_lock_irq(&sh->stripe_lock);
|
||||
/* Don't allow new IO added to stripes in batch list */
|
||||
if (sh->batch_head)
|
||||
goto overlap;
|
||||
if (forwrite) {
|
||||
return true;
|
||||
|
||||
if (forwrite)
|
||||
bip = &sh->dev[dd_idx].towrite;
|
||||
if (*bip == NULL)
|
||||
firstwrite = 1;
|
||||
} else
|
||||
else
|
||||
bip = &sh->dev[dd_idx].toread;
|
||||
|
||||
while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
|
||||
if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
|
||||
goto overlap;
|
||||
bip = & (*bip)->bi_next;
|
||||
return true;
|
||||
bip = &(*bip)->bi_next;
|
||||
}
|
||||
|
||||
if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
|
||||
goto overlap;
|
||||
return true;
|
||||
|
||||
if (forwrite && raid5_has_ppl(conf)) {
|
||||
/*
|
||||
@ -3474,9 +3537,30 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
||||
}
|
||||
|
||||
if (first + conf->chunk_sectors * (count - 1) != last)
|
||||
goto overlap;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
|
||||
int dd_idx, int forwrite, int previous)
|
||||
{
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
struct bio **bip;
|
||||
int firstwrite = 0;
|
||||
|
||||
if (forwrite) {
|
||||
bip = &sh->dev[dd_idx].towrite;
|
||||
if (!*bip)
|
||||
firstwrite = 1;
|
||||
} else {
|
||||
bip = &sh->dev[dd_idx].toread;
|
||||
}
|
||||
|
||||
while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector)
|
||||
bip = &(*bip)->bi_next;
|
||||
|
||||
if (!forwrite || previous)
|
||||
clear_bit(STRIPE_BATCH_READY, &sh->state);
|
||||
|
||||
@ -3502,9 +3586,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
||||
sh->overwrite_disks++;
|
||||
}
|
||||
|
||||
pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
|
||||
(unsigned long long)(*bip)->bi_iter.bi_sector,
|
||||
(unsigned long long)sh->sector, dd_idx);
|
||||
pr_debug("added bi b#%llu to stripe s#%llu, disk %d, logical %llu\n",
|
||||
(*bip)->bi_iter.bi_sector, sh->sector, dd_idx,
|
||||
sh->dev[dd_idx].sector);
|
||||
|
||||
if (conf->mddev->bitmap && firstwrite) {
|
||||
/* Cannot hold spinlock over bitmap_startwrite,
|
||||
@ -3512,7 +3596,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
||||
* we have added to the bitmap and set bm_seq.
|
||||
* So set STRIPE_BITMAP_PENDING to prevent
|
||||
* batching.
|
||||
* If multiple add_stripe_bio() calls race here they
|
||||
* If multiple __add_stripe_bio() calls race here they
|
||||
* much all set STRIPE_BITMAP_PENDING. So only the first one
|
||||
* to complete "bitmap_startwrite" gets to set
|
||||
* STRIPE_BIT_DELAY. This is important as once a stripe
|
||||
@ -3530,16 +3614,27 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
|
||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
}
|
||||
|
||||
if (stripe_can_batch(sh))
|
||||
stripe_add_to_batch_list(conf, sh);
|
||||
return 1;
|
||||
/*
|
||||
* Each stripe/dev can have one or more bios attached.
|
||||
* toread/towrite point to the first in a chain.
|
||||
* The bi_next chain must be in order.
|
||||
*/
|
||||
static bool add_stripe_bio(struct stripe_head *sh, struct bio *bi,
|
||||
int dd_idx, int forwrite, int previous)
|
||||
{
|
||||
spin_lock_irq(&sh->stripe_lock);
|
||||
|
||||
overlap:
|
||||
set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
|
||||
if (stripe_bio_overlaps(sh, bi, dd_idx, forwrite)) {
|
||||
set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
__add_stripe_bio(sh, bi, dd_idx, forwrite, previous);
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void end_reshape(struct r5conf *conf);
|
||||
@ -5785,17 +5880,215 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
|
||||
bio_endio(bi);
|
||||
}
|
||||
|
||||
static bool ahead_of_reshape(struct mddev *mddev, sector_t sector,
|
||||
sector_t reshape_sector)
|
||||
{
|
||||
return mddev->reshape_backwards ? sector < reshape_sector :
|
||||
sector >= reshape_sector;
|
||||
}
|
||||
|
||||
static bool range_ahead_of_reshape(struct mddev *mddev, sector_t min,
|
||||
sector_t max, sector_t reshape_sector)
|
||||
{
|
||||
return mddev->reshape_backwards ? max < reshape_sector :
|
||||
min >= reshape_sector;
|
||||
}
|
||||
|
||||
static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf,
|
||||
struct stripe_head *sh)
|
||||
{
|
||||
sector_t max_sector = 0, min_sector = MaxSector;
|
||||
bool ret = false;
|
||||
int dd_idx;
|
||||
|
||||
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
|
||||
if (dd_idx == sh->pd_idx)
|
||||
continue;
|
||||
|
||||
min_sector = min(min_sector, sh->dev[dd_idx].sector);
|
||||
max_sector = min(max_sector, sh->dev[dd_idx].sector);
|
||||
}
|
||||
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
|
||||
if (!range_ahead_of_reshape(mddev, min_sector, max_sector,
|
||||
conf->reshape_progress))
|
||||
/* mismatch, need to try again */
|
||||
ret = true;
|
||||
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int add_all_stripe_bios(struct r5conf *conf,
|
||||
struct stripe_request_ctx *ctx, struct stripe_head *sh,
|
||||
struct bio *bi, int forwrite, int previous)
|
||||
{
|
||||
int dd_idx;
|
||||
int ret = 1;
|
||||
|
||||
spin_lock_irq(&sh->stripe_lock);
|
||||
|
||||
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
|
||||
struct r5dev *dev = &sh->dev[dd_idx];
|
||||
|
||||
if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
|
||||
continue;
|
||||
|
||||
if (dev->sector < ctx->first_sector ||
|
||||
dev->sector >= ctx->last_sector)
|
||||
continue;
|
||||
|
||||
if (stripe_bio_overlaps(sh, bi, dd_idx, forwrite)) {
|
||||
set_bit(R5_Overlap, &dev->flags);
|
||||
ret = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
goto out;
|
||||
|
||||
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
|
||||
struct r5dev *dev = &sh->dev[dd_idx];
|
||||
|
||||
if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
|
||||
continue;
|
||||
|
||||
if (dev->sector < ctx->first_sector ||
|
||||
dev->sector >= ctx->last_sector)
|
||||
continue;
|
||||
|
||||
__add_stripe_bio(sh, bi, dd_idx, forwrite, previous);
|
||||
clear_bit((dev->sector - ctx->first_sector) >>
|
||||
RAID5_STRIPE_SHIFT(conf), ctx->sectors_to_do);
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
struct r5conf *conf, struct stripe_request_ctx *ctx,
|
||||
sector_t logical_sector, struct bio *bi)
|
||||
{
|
||||
const int rw = bio_data_dir(bi);
|
||||
enum stripe_result ret;
|
||||
struct stripe_head *sh;
|
||||
sector_t new_sector;
|
||||
int previous = 0;
|
||||
int seq, dd_idx;
|
||||
|
||||
seq = read_seqcount_begin(&conf->gen_lock);
|
||||
|
||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||
/*
|
||||
* Spinlock is needed as reshape_progress may be
|
||||
* 64bit on a 32bit platform, and so it might be
|
||||
* possible to see a half-updated value
|
||||
* Of course reshape_progress could change after
|
||||
* the lock is dropped, so once we get a reference
|
||||
* to the stripe that we think it is, we will have
|
||||
* to check again.
|
||||
*/
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_progress)) {
|
||||
previous = 1;
|
||||
} else {
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_safe)) {
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
return STRIPE_SCHEDULE_AND_RETRY;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
}
|
||||
|
||||
new_sector = raid5_compute_sector(conf, logical_sector, previous,
|
||||
&dd_idx, NULL);
|
||||
pr_debug("raid456: %s, sector %llu logical %llu\n", __func__,
|
||||
new_sector, logical_sector);
|
||||
|
||||
sh = __raid5_get_active_stripe(conf, ctx, new_sector, previous,
|
||||
(bi->bi_opf & REQ_RAHEAD), 0);
|
||||
if (unlikely(!sh)) {
|
||||
/* cannot get stripe, just give-up */
|
||||
bi->bi_status = BLK_STS_IOERR;
|
||||
return STRIPE_FAIL;
|
||||
}
|
||||
|
||||
if (unlikely(previous) &&
|
||||
stripe_ahead_of_reshape(mddev, conf, sh)) {
|
||||
/*
|
||||
* Expansion moved on while waiting for a stripe.
|
||||
* Expansion could still move past after this
|
||||
* test, but as we are holding a reference to
|
||||
* 'sh', we know that if that happens,
|
||||
* STRIPE_EXPANDING will get set and the expansion
|
||||
* won't proceed until we finish with the stripe.
|
||||
*/
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
if (read_seqcount_retry(&conf->gen_lock, seq)) {
|
||||
/* Might have got the wrong stripe_head by accident */
|
||||
ret = STRIPE_RETRY;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
if (test_bit(STRIPE_EXPANDING, &sh->state) ||
|
||||
!add_all_stripe_bios(conf, ctx, sh, bi, rw, previous)) {
|
||||
/*
|
||||
* Stripe is busy expanding or add failed due to
|
||||
* overlap. Flush everything and wait a while.
|
||||
*/
|
||||
md_wakeup_thread(mddev->thread);
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
if (stripe_can_batch(sh)) {
|
||||
stripe_add_to_batch_list(conf, sh, ctx->batch_last);
|
||||
if (ctx->batch_last)
|
||||
raid5_release_stripe(ctx->batch_last);
|
||||
atomic_inc(&sh->count);
|
||||
ctx->batch_last = sh;
|
||||
}
|
||||
|
||||
if (ctx->do_flush) {
|
||||
set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
|
||||
/* we only need flush for one stripe */
|
||||
ctx->do_flush = false;
|
||||
}
|
||||
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||
if ((!sh->batch_head || sh == sh->batch_head) &&
|
||||
(bi->bi_opf & REQ_SYNC) &&
|
||||
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||
atomic_inc(&conf->preread_active_stripes);
|
||||
|
||||
release_stripe_plug(mddev, sh);
|
||||
return STRIPE_SUCCESS;
|
||||
|
||||
out_release:
|
||||
raid5_release_stripe(sh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
struct r5conf *conf = mddev->private;
|
||||
int dd_idx;
|
||||
sector_t new_sector;
|
||||
sector_t logical_sector, last_sector;
|
||||
struct stripe_head *sh;
|
||||
sector_t logical_sector;
|
||||
struct stripe_request_ctx ctx = {};
|
||||
const int rw = bio_data_dir(bi);
|
||||
DEFINE_WAIT(w);
|
||||
bool do_prepare;
|
||||
bool do_flush = false;
|
||||
enum stripe_result res;
|
||||
int s, stripe_cnt;
|
||||
|
||||
if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
|
||||
int ret = log_handle_flush_request(conf, bi);
|
||||
@ -5811,7 +6104,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
* if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
|
||||
* we need to flush journal device
|
||||
*/
|
||||
do_flush = bi->bi_opf & REQ_PREFLUSH;
|
||||
ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
|
||||
}
|
||||
|
||||
if (!md_write_start(mddev, bi))
|
||||
@ -5835,134 +6128,68 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
}
|
||||
|
||||
logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
|
||||
last_sector = bio_end_sector(bi);
|
||||
ctx.first_sector = logical_sector;
|
||||
ctx.last_sector = bio_end_sector(bi);
|
||||
bi->bi_next = NULL;
|
||||
|
||||
stripe_cnt = DIV_ROUND_UP_SECTOR_T(ctx.last_sector - logical_sector,
|
||||
RAID5_STRIPE_SECTORS(conf));
|
||||
bitmap_set(ctx.sectors_to_do, 0, stripe_cnt);
|
||||
|
||||
pr_debug("raid456: %s, logical %llu to %llu\n", __func__,
|
||||
bi->bi_iter.bi_sector, ctx.last_sector);
|
||||
|
||||
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
|
||||
if ((bi->bi_opf & REQ_NOWAIT) &&
|
||||
(conf->reshape_progress != MaxSector) &&
|
||||
(mddev->reshape_backwards
|
||||
? (logical_sector > conf->reshape_progress && logical_sector <= conf->reshape_safe)
|
||||
: (logical_sector >= conf->reshape_safe && logical_sector < conf->reshape_progress))) {
|
||||
!ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
|
||||
ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
|
||||
bio_wouldblock_error(bi);
|
||||
if (rw == WRITE)
|
||||
md_write_end(mddev);
|
||||
return true;
|
||||
}
|
||||
md_account_bio(mddev, &bi);
|
||||
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
|
||||
for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
|
||||
int previous;
|
||||
int seq;
|
||||
|
||||
do_prepare = false;
|
||||
retry:
|
||||
seq = read_seqcount_begin(&conf->gen_lock);
|
||||
previous = 0;
|
||||
if (do_prepare)
|
||||
prepare_to_wait(&conf->wait_for_overlap, &w,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||
/* spinlock is needed as reshape_progress may be
|
||||
* 64bit on a 32bit platform, and so it might be
|
||||
* possible to see a half-updated value
|
||||
* Of course reshape_progress could change after
|
||||
* the lock is dropped, so once we get a reference
|
||||
* to the stripe that we think it is, we will have
|
||||
* to check again.
|
||||
*/
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (mddev->reshape_backwards
|
||||
? logical_sector < conf->reshape_progress
|
||||
: logical_sector >= conf->reshape_progress) {
|
||||
previous = 1;
|
||||
} else {
|
||||
if (mddev->reshape_backwards
|
||||
? logical_sector < conf->reshape_safe
|
||||
: logical_sector >= conf->reshape_safe) {
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
schedule();
|
||||
do_prepare = true;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
}
|
||||
|
||||
new_sector = raid5_compute_sector(conf, logical_sector,
|
||||
previous,
|
||||
&dd_idx, NULL);
|
||||
pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
|
||||
(unsigned long long)new_sector,
|
||||
(unsigned long long)logical_sector);
|
||||
|
||||
sh = raid5_get_active_stripe(conf, new_sector, previous,
|
||||
(bi->bi_opf & REQ_RAHEAD), 0);
|
||||
if (sh) {
|
||||
if (unlikely(previous)) {
|
||||
/* expansion might have moved on while waiting for a
|
||||
* stripe, so we must do the range check again.
|
||||
* Expansion could still move past after this
|
||||
* test, but as we are holding a reference to
|
||||
* 'sh', we know that if that happens,
|
||||
* STRIPE_EXPANDING will get set and the expansion
|
||||
* won't proceed until we finish with the stripe.
|
||||
*/
|
||||
int must_retry = 0;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (mddev->reshape_backwards
|
||||
? logical_sector >= conf->reshape_progress
|
||||
: logical_sector < conf->reshape_progress)
|
||||
/* mismatch, need to try again */
|
||||
must_retry = 1;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
if (must_retry) {
|
||||
raid5_release_stripe(sh);
|
||||
schedule();
|
||||
do_prepare = true;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
if (read_seqcount_retry(&conf->gen_lock, seq)) {
|
||||
/* Might have got the wrong stripe_head
|
||||
* by accident
|
||||
*/
|
||||
raid5_release_stripe(sh);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (test_bit(STRIPE_EXPANDING, &sh->state) ||
|
||||
!add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
|
||||
/* Stripe is busy expanding or
|
||||
* add failed due to overlap. Flush everything
|
||||
* and wait a while
|
||||
*/
|
||||
md_wakeup_thread(mddev->thread);
|
||||
raid5_release_stripe(sh);
|
||||
schedule();
|
||||
do_prepare = true;
|
||||
goto retry;
|
||||
}
|
||||
if (do_flush) {
|
||||
set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
|
||||
/* we only need flush for one stripe */
|
||||
do_flush = false;
|
||||
}
|
||||
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||
if ((!sh->batch_head || sh == sh->batch_head) &&
|
||||
(bi->bi_opf & REQ_SYNC) &&
|
||||
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||
atomic_inc(&conf->preread_active_stripes);
|
||||
release_stripe_plug(mddev, sh);
|
||||
} else {
|
||||
/* cannot get stripe for read-ahead, just give-up */
|
||||
bi->bi_status = BLK_STS_IOERR;
|
||||
add_wait_queue(&conf->wait_for_overlap, &wait);
|
||||
while (1) {
|
||||
res = make_stripe_request(mddev, conf, &ctx, logical_sector,
|
||||
bi);
|
||||
if (res == STRIPE_FAIL)
|
||||
break;
|
||||
|
||||
if (res == STRIPE_RETRY)
|
||||
continue;
|
||||
|
||||
if (res == STRIPE_SCHEDULE_AND_RETRY) {
|
||||
/*
|
||||
* Must release the reference to batch_last before
|
||||
* scheduling and waiting for work to be done,
|
||||
* otherwise the batch_last stripe head could prevent
|
||||
* raid5_activate_delayed() from making progress
|
||||
* and thus deadlocking.
|
||||
*/
|
||||
if (ctx.batch_last) {
|
||||
raid5_release_stripe(ctx.batch_last);
|
||||
ctx.batch_last = NULL;
|
||||
}
|
||||
|
||||
wait_woken(&wait, TASK_UNINTERRUPTIBLE,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
continue;
|
||||
}
|
||||
|
||||
s = find_first_bit(ctx.sectors_to_do, stripe_cnt);
|
||||
if (s == stripe_cnt)
|
||||
break;
|
||||
|
||||
logical_sector = ctx.first_sector +
|
||||
(s << RAID5_STRIPE_SHIFT(conf));
|
||||
}
|
||||
finish_wait(&conf->wait_for_overlap, &w);
|
||||
remove_wait_queue(&conf->wait_for_overlap, &wait);
|
||||
|
||||
if (ctx.batch_last)
|
||||
raid5_release_stripe(ctx.batch_last);
|
||||
|
||||
if (rw == WRITE)
|
||||
md_write_end(mddev);
|
||||
@ -7815,7 +8042,15 @@ static int raid5_run(struct mddev *mddev)
|
||||
mddev->queue->limits.discard_granularity < stripe)
|
||||
blk_queue_max_discard_sectors(mddev->queue, 0);
|
||||
|
||||
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
|
||||
/*
|
||||
* Requests require having a bitmap for each stripe.
|
||||
* Limit the max sectors based on this.
|
||||
*/
|
||||
blk_queue_max_hw_sectors(mddev->queue,
|
||||
RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf));
|
||||
|
||||
/* No restrictions on the number of segments in the request */
|
||||
blk_queue_max_segments(mddev->queue, USHRT_MAX);
|
||||
}
|
||||
|
||||
if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
|
||||
@ -8066,8 +8301,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
* find the disk ... but prefer rdev->saved_raid_disk
|
||||
* if possible.
|
||||
*/
|
||||
if (rdev->saved_raid_disk >= 0 &&
|
||||
rdev->saved_raid_disk >= first &&
|
||||
if (rdev->saved_raid_disk >= first &&
|
||||
rdev->saved_raid_disk <= last &&
|
||||
conf->disks[rdev->saved_raid_disk].rdev == NULL)
|
||||
first = rdev->saved_raid_disk;
|
||||
@ -8704,8 +8938,11 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
|
||||
err = log_init(conf, NULL, true);
|
||||
if (!err) {
|
||||
err = resize_stripes(conf, conf->pool_size);
|
||||
if (err)
|
||||
if (err) {
|
||||
mddev_suspend(mddev);
|
||||
log_exit(conf);
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
}
|
||||
} else
|
||||
err = -EINVAL;
|
||||
|
@ -812,7 +812,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
|
||||
struct stripe_head *sh);
|
||||
extern struct stripe_head *
|
||||
raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
|
||||
int previous, int noblock, int noquiesce);
|
||||
bool previous, bool noblock, bool noquiesce);
|
||||
extern int raid5_calc_degraded(struct r5conf *conf);
|
||||
extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode);
|
||||
#endif
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
menu "NVME Support"
|
||||
|
||||
source "drivers/nvme/common/Kconfig"
|
||||
source "drivers/nvme/host/Kconfig"
|
||||
source "drivers/nvme/target/Kconfig"
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
obj-$(CONFIG_NVME_COMMON) += common/
|
||||
obj-y += host/
|
||||
obj-y += target/
|
||||
|
4
drivers/nvme/common/Kconfig
Normal file
4
drivers/nvme/common/Kconfig
Normal file
@ -0,0 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config NVME_COMMON
|
||||
tristate
|
7
drivers/nvme/common/Makefile
Normal file
7
drivers/nvme/common/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
ccflags-y += -I$(src)
|
||||
|
||||
obj-$(CONFIG_NVME_COMMON) += nvme-common.o
|
||||
|
||||
nvme-common-y += auth.o
|
483
drivers/nvme/common/auth.c
Normal file
483
drivers/nvme/common/auth.c
Normal file
@ -0,0 +1,483 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2020 Hannes Reinecke, SUSE Linux
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/base64.h>
|
||||
#include <linux/prandom.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/dh.h>
|
||||
#include <linux/nvme.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
|
||||
static u32 nvme_dhchap_seqnum;
|
||||
static DEFINE_MUTEX(nvme_dhchap_mutex);
|
||||
|
||||
u32 nvme_auth_get_seqnum(void)
|
||||
{
|
||||
u32 seqnum;
|
||||
|
||||
mutex_lock(&nvme_dhchap_mutex);
|
||||
if (!nvme_dhchap_seqnum)
|
||||
nvme_dhchap_seqnum = prandom_u32();
|
||||
else {
|
||||
nvme_dhchap_seqnum++;
|
||||
if (!nvme_dhchap_seqnum)
|
||||
nvme_dhchap_seqnum++;
|
||||
}
|
||||
seqnum = nvme_dhchap_seqnum;
|
||||
mutex_unlock(&nvme_dhchap_mutex);
|
||||
return seqnum;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_get_seqnum);
|
||||
|
||||
static struct nvme_auth_dhgroup_map {
|
||||
const char name[16];
|
||||
const char kpp[16];
|
||||
} dhgroup_map[] = {
|
||||
[NVME_AUTH_DHGROUP_NULL] = {
|
||||
.name = "null", .kpp = "null" },
|
||||
[NVME_AUTH_DHGROUP_2048] = {
|
||||
.name = "ffdhe2048", .kpp = "ffdhe2048(dh)" },
|
||||
[NVME_AUTH_DHGROUP_3072] = {
|
||||
.name = "ffdhe3072", .kpp = "ffdhe3072(dh)" },
|
||||
[NVME_AUTH_DHGROUP_4096] = {
|
||||
.name = "ffdhe4096", .kpp = "ffdhe4096(dh)" },
|
||||
[NVME_AUTH_DHGROUP_6144] = {
|
||||
.name = "ffdhe6144", .kpp = "ffdhe6144(dh)" },
|
||||
[NVME_AUTH_DHGROUP_8192] = {
|
||||
.name = "ffdhe8192", .kpp = "ffdhe8192(dh)" },
|
||||
};
|
||||
|
||||
const char *nvme_auth_dhgroup_name(u8 dhgroup_id)
|
||||
{
|
||||
if (dhgroup_id >= ARRAY_SIZE(dhgroup_map))
|
||||
return NULL;
|
||||
return dhgroup_map[dhgroup_id].name;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_name);
|
||||
|
||||
const char *nvme_auth_dhgroup_kpp(u8 dhgroup_id)
|
||||
{
|
||||
if (dhgroup_id >= ARRAY_SIZE(dhgroup_map))
|
||||
return NULL;
|
||||
return dhgroup_map[dhgroup_id].kpp;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_kpp);
|
||||
|
||||
u8 nvme_auth_dhgroup_id(const char *dhgroup_name)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!dhgroup_name || !strlen(dhgroup_name))
|
||||
return NVME_AUTH_DHGROUP_INVALID;
|
||||
for (i = 0; i < ARRAY_SIZE(dhgroup_map); i++) {
|
||||
if (!strlen(dhgroup_map[i].name))
|
||||
continue;
|
||||
if (!strncmp(dhgroup_map[i].name, dhgroup_name,
|
||||
strlen(dhgroup_map[i].name)))
|
||||
return i;
|
||||
}
|
||||
return NVME_AUTH_DHGROUP_INVALID;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_id);
|
||||
|
||||
static struct nvme_dhchap_hash_map {
|
||||
int len;
|
||||
const char hmac[15];
|
||||
const char digest[8];
|
||||
} hash_map[] = {
|
||||
[NVME_AUTH_HASH_SHA256] = {
|
||||
.len = 32,
|
||||
.hmac = "hmac(sha256)",
|
||||
.digest = "sha256",
|
||||
},
|
||||
[NVME_AUTH_HASH_SHA384] = {
|
||||
.len = 48,
|
||||
.hmac = "hmac(sha384)",
|
||||
.digest = "sha384",
|
||||
},
|
||||
[NVME_AUTH_HASH_SHA512] = {
|
||||
.len = 64,
|
||||
.hmac = "hmac(sha512)",
|
||||
.digest = "sha512",
|
||||
},
|
||||
};
|
||||
|
||||
const char *nvme_auth_hmac_name(u8 hmac_id)
|
||||
{
|
||||
if (hmac_id >= ARRAY_SIZE(hash_map))
|
||||
return NULL;
|
||||
return hash_map[hmac_id].hmac;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_hmac_name);
|
||||
|
||||
const char *nvme_auth_digest_name(u8 hmac_id)
|
||||
{
|
||||
if (hmac_id >= ARRAY_SIZE(hash_map))
|
||||
return NULL;
|
||||
return hash_map[hmac_id].digest;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_digest_name);
|
||||
|
||||
u8 nvme_auth_hmac_id(const char *hmac_name)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!hmac_name || !strlen(hmac_name))
|
||||
return NVME_AUTH_HASH_INVALID;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hash_map); i++) {
|
||||
if (!strlen(hash_map[i].hmac))
|
||||
continue;
|
||||
if (!strncmp(hash_map[i].hmac, hmac_name,
|
||||
strlen(hash_map[i].hmac)))
|
||||
return i;
|
||||
}
|
||||
return NVME_AUTH_HASH_INVALID;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_hmac_id);
|
||||
|
||||
size_t nvme_auth_hmac_hash_len(u8 hmac_id)
|
||||
{
|
||||
if (hmac_id >= ARRAY_SIZE(hash_map))
|
||||
return 0;
|
||||
return hash_map[hmac_id].len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_hmac_hash_len);
|
||||
|
||||
struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
|
||||
u8 key_hash)
|
||||
{
|
||||
struct nvme_dhchap_key *key;
|
||||
unsigned char *p;
|
||||
u32 crc;
|
||||
int ret, key_len;
|
||||
size_t allocated_len = strlen(secret);
|
||||
|
||||
/* Secret might be affixed with a ':' */
|
||||
p = strrchr(secret, ':');
|
||||
if (p)
|
||||
allocated_len = p - secret;
|
||||
key = kzalloc(sizeof(*key), GFP_KERNEL);
|
||||
if (!key)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
key->key = kzalloc(allocated_len, GFP_KERNEL);
|
||||
if (!key->key) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
key_len = base64_decode(secret, allocated_len, key->key);
|
||||
if (key_len < 0) {
|
||||
pr_debug("base64 key decoding error %d\n",
|
||||
key_len);
|
||||
ret = key_len;
|
||||
goto out_free_secret;
|
||||
}
|
||||
|
||||
if (key_len != 36 && key_len != 52 &&
|
||||
key_len != 68) {
|
||||
pr_err("Invalid key len %d\n", key_len);
|
||||
ret = -EINVAL;
|
||||
goto out_free_secret;
|
||||
}
|
||||
|
||||
if (key_hash > 0 &&
|
||||
(key_len - 4) != nvme_auth_hmac_hash_len(key_hash)) {
|
||||
pr_err("Mismatched key len %d for %s\n", key_len,
|
||||
nvme_auth_hmac_name(key_hash));
|
||||
ret = -EINVAL;
|
||||
goto out_free_secret;
|
||||
}
|
||||
|
||||
/* The last four bytes is the CRC in little-endian format */
|
||||
key_len -= 4;
|
||||
/*
|
||||
* The linux implementation doesn't do pre- and post-increments,
|
||||
* so we have to do it manually.
|
||||
*/
|
||||
crc = ~crc32(~0, key->key, key_len);
|
||||
|
||||
if (get_unaligned_le32(key->key + key_len) != crc) {
|
||||
pr_err("key crc mismatch (key %08x, crc %08x)\n",
|
||||
get_unaligned_le32(key->key + key_len), crc);
|
||||
ret = -EKEYREJECTED;
|
||||
goto out_free_secret;
|
||||
}
|
||||
key->len = key_len;
|
||||
key->hash = key_hash;
|
||||
return key;
|
||||
out_free_secret:
|
||||
kfree_sensitive(key->key);
|
||||
out_free_key:
|
||||
kfree(key);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_extract_key);
|
||||
|
||||
void nvme_auth_free_key(struct nvme_dhchap_key *key)
|
||||
{
|
||||
if (!key)
|
||||
return;
|
||||
kfree_sensitive(key->key);
|
||||
kfree(key);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_free_key);
|
||||
|
||||
u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn)
|
||||
{
|
||||
const char *hmac_name;
|
||||
struct crypto_shash *key_tfm;
|
||||
struct shash_desc *shash;
|
||||
u8 *transformed_key;
|
||||
int ret;
|
||||
|
||||
if (!key || !key->key) {
|
||||
pr_warn("No key specified\n");
|
||||
return ERR_PTR(-ENOKEY);
|
||||
}
|
||||
if (key->hash == 0) {
|
||||
transformed_key = kmemdup(key->key, key->len, GFP_KERNEL);
|
||||
return transformed_key ? transformed_key : ERR_PTR(-ENOMEM);
|
||||
}
|
||||
hmac_name = nvme_auth_hmac_name(key->hash);
|
||||
if (!hmac_name) {
|
||||
pr_warn("Invalid key hash id %d\n", key->hash);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
key_tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(key_tfm))
|
||||
return (u8 *)key_tfm;
|
||||
|
||||
shash = kmalloc(sizeof(struct shash_desc) +
|
||||
crypto_shash_descsize(key_tfm),
|
||||
GFP_KERNEL);
|
||||
if (!shash) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
transformed_key = kzalloc(crypto_shash_digestsize(key_tfm), GFP_KERNEL);
|
||||
if (!transformed_key) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_shash;
|
||||
}
|
||||
|
||||
shash->tfm = key_tfm;
|
||||
ret = crypto_shash_setkey(key_tfm, key->key, key->len);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_update(shash, nqn, strlen(nqn));
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_update(shash, "NVMe-over-Fabrics", 17);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_final(shash, transformed_key);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
|
||||
kfree(shash);
|
||||
crypto_free_shash(key_tfm);
|
||||
|
||||
return transformed_key;
|
||||
|
||||
out_free_transformed_key:
|
||||
kfree_sensitive(transformed_key);
|
||||
out_free_shash:
|
||||
kfree(shash);
|
||||
out_free_key:
|
||||
crypto_free_shash(key_tfm);
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_transform_key);
|
||||
|
||||
static int nvme_auth_hash_skey(int hmac_id, u8 *skey, size_t skey_len, u8 *hkey)
|
||||
{
|
||||
const char *digest_name;
|
||||
struct crypto_shash *tfm;
|
||||
int ret;
|
||||
|
||||
digest_name = nvme_auth_digest_name(hmac_id);
|
||||
if (!digest_name) {
|
||||
pr_debug("%s: failed to get digest for %d\n", __func__,
|
||||
hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
tfm = crypto_alloc_shash(digest_name, 0, 0);
|
||||
if (IS_ERR(tfm))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = crypto_shash_tfm_digest(tfm, skey, skey_len, hkey);
|
||||
if (ret < 0)
|
||||
pr_debug("%s: Failed to hash digest len %zu\n", __func__,
|
||||
skey_len);
|
||||
|
||||
crypto_free_shash(tfm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
|
||||
u8 *challenge, u8 *aug, size_t hlen)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
struct shash_desc *desc;
|
||||
u8 *hashed_key;
|
||||
const char *hmac_name;
|
||||
int ret;
|
||||
|
||||
hashed_key = kmalloc(hlen, GFP_KERNEL);
|
||||
if (!hashed_key)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = nvme_auth_hash_skey(hmac_id, skey,
|
||||
skey_len, hashed_key);
|
||||
if (ret < 0)
|
||||
goto out_free_key;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
ret = -EINVAL;
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
ret = PTR_ERR(tfm);
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
|
||||
GFP_KERNEL);
|
||||
if (!desc) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_hash;
|
||||
}
|
||||
desc->tfm = tfm;
|
||||
|
||||
ret = crypto_shash_setkey(tfm, hashed_key, hlen);
|
||||
if (ret)
|
||||
goto out_free_desc;
|
||||
|
||||
ret = crypto_shash_init(desc);
|
||||
if (ret)
|
||||
goto out_free_desc;
|
||||
|
||||
ret = crypto_shash_update(desc, challenge, hlen);
|
||||
if (ret)
|
||||
goto out_free_desc;
|
||||
|
||||
ret = crypto_shash_final(desc, aug);
|
||||
out_free_desc:
|
||||
kfree_sensitive(desc);
|
||||
out_free_hash:
|
||||
crypto_free_shash(tfm);
|
||||
out_free_key:
|
||||
kfree_sensitive(hashed_key);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_augmented_challenge);
|
||||
|
||||
int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = crypto_kpp_set_secret(dh_tfm, NULL, 0);
|
||||
if (ret)
|
||||
pr_debug("failed to set private key, error %d\n", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_gen_privkey);
|
||||
|
||||
int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm,
|
||||
u8 *host_key, size_t host_key_len)
|
||||
{
|
||||
struct kpp_request *req;
|
||||
struct crypto_wait wait;
|
||||
struct scatterlist dst;
|
||||
int ret;
|
||||
|
||||
req = kpp_request_alloc(dh_tfm, GFP_KERNEL);
|
||||
if (!req)
|
||||
return -ENOMEM;
|
||||
|
||||
crypto_init_wait(&wait);
|
||||
kpp_request_set_input(req, NULL, 0);
|
||||
sg_init_one(&dst, host_key, host_key_len);
|
||||
kpp_request_set_output(req, &dst, host_key_len);
|
||||
kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||
crypto_req_done, &wait);
|
||||
|
||||
ret = crypto_wait_req(crypto_kpp_generate_public_key(req), &wait);
|
||||
kpp_request_free(req);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_gen_pubkey);
|
||||
|
||||
int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
|
||||
u8 *ctrl_key, size_t ctrl_key_len,
|
||||
u8 *sess_key, size_t sess_key_len)
|
||||
{
|
||||
struct kpp_request *req;
|
||||
struct crypto_wait wait;
|
||||
struct scatterlist src, dst;
|
||||
int ret;
|
||||
|
||||
req = kpp_request_alloc(dh_tfm, GFP_KERNEL);
|
||||
if (!req)
|
||||
return -ENOMEM;
|
||||
|
||||
crypto_init_wait(&wait);
|
||||
sg_init_one(&src, ctrl_key, ctrl_key_len);
|
||||
kpp_request_set_input(req, &src, ctrl_key_len);
|
||||
sg_init_one(&dst, sess_key, sess_key_len);
|
||||
kpp_request_set_output(req, &dst, sess_key_len);
|
||||
kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||
crypto_req_done, &wait);
|
||||
|
||||
ret = crypto_wait_req(crypto_kpp_compute_shared_secret(req), &wait);
|
||||
|
||||
kpp_request_free(req);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_gen_shared_secret);
|
||||
|
||||
int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
|
||||
{
|
||||
struct nvme_dhchap_key *key;
|
||||
u8 key_hash;
|
||||
|
||||
if (!secret) {
|
||||
*ret_key = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (sscanf(secret, "DHHC-1:%hhd:%*s:", &key_hash) != 1)
|
||||
return -EINVAL;
|
||||
|
||||
/* Pass in the secret without the 'DHHC-1:XX:' prefix */
|
||||
key = nvme_auth_extract_key(secret + 10, key_hash);
|
||||
if (IS_ERR(key)) {
|
||||
*ret_key = NULL;
|
||||
return PTR_ERR(key);
|
||||
}
|
||||
|
||||
*ret_key = key;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
@ -92,6 +92,21 @@ config NVME_TCP
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config NVME_AUTH
|
||||
bool "NVM Express over Fabrics In-Band Authentication"
|
||||
depends on NVME_CORE
|
||||
select NVME_COMMON
|
||||
select CRYPTO
|
||||
select CRYPTO_HMAC
|
||||
select CRYPTO_SHA256
|
||||
select CRYPTO_SHA512
|
||||
select CRYPTO_DH
|
||||
select CRYPTO_DH_RFC7919_GROUPS
|
||||
help
|
||||
This provides support for NVMe over Fabrics In-Band Authentication.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config NVME_APPLE
|
||||
tristate "Apple ANS2 NVM Express host driver"
|
||||
depends on OF && BLOCK
|
||||
|
@ -10,12 +10,14 @@ obj-$(CONFIG_NVME_FC) += nvme-fc.o
|
||||
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
|
||||
obj-$(CONFIG_NVME_APPLE) += nvme-apple.o
|
||||
|
||||
nvme-core-y := core.o ioctl.o constants.o
|
||||
nvme-core-y += core.o ioctl.o
|
||||
nvme-core-$(CONFIG_NVME_VERBOSE_ERRORS) += constants.o
|
||||
nvme-core-$(CONFIG_TRACING) += trace.o
|
||||
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
|
||||
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
||||
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
|
||||
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
|
||||
nvme-core-$(CONFIG_NVME_AUTH) += auth.o
|
||||
|
||||
nvme-y += pci.o
|
||||
|
||||
|
@ -845,11 +845,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
|
||||
apple_nvme_handle_cq(&anv->adminq, true);
|
||||
spin_unlock_irqrestore(&anv->lock, flags);
|
||||
|
||||
blk_mq_tagset_busy_iter(&anv->tagset, nvme_cancel_request, &anv->ctrl);
|
||||
blk_mq_tagset_busy_iter(&anv->admin_tagset, nvme_cancel_request,
|
||||
&anv->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&anv->tagset);
|
||||
blk_mq_tagset_wait_completed_request(&anv->admin_tagset);
|
||||
nvme_cancel_tagset(&anv->ctrl);
|
||||
nvme_cancel_admin_tagset(&anv->ctrl);
|
||||
|
||||
/*
|
||||
* The driver will not be starting up queues again if shutting down so
|
||||
@ -1222,6 +1219,11 @@ static void apple_nvme_async_probe(void *data, async_cookie_t cookie)
|
||||
nvme_put_ctrl(&anv->ctrl);
|
||||
}
|
||||
|
||||
static void devm_apple_nvme_put_tag_set(void *data)
|
||||
{
|
||||
blk_mq_free_tag_set(data);
|
||||
}
|
||||
|
||||
static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
|
||||
{
|
||||
int ret;
|
||||
@ -1238,8 +1240,7 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
|
||||
ret = blk_mq_alloc_tag_set(&anv->admin_tagset);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = devm_add_action_or_reset(anv->dev,
|
||||
(void (*)(void *))blk_mq_free_tag_set,
|
||||
ret = devm_add_action_or_reset(anv->dev, devm_apple_nvme_put_tag_set,
|
||||
&anv->admin_tagset);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1263,8 +1264,8 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
|
||||
ret = blk_mq_alloc_tag_set(&anv->tagset);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = devm_add_action_or_reset(
|
||||
anv->dev, (void (*)(void *))blk_mq_free_tag_set, &anv->tagset);
|
||||
ret = devm_add_action_or_reset(anv->dev, devm_apple_nvme_put_tag_set,
|
||||
&anv->tagset);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1365,6 +1366,11 @@ static int apple_nvme_attach_genpd(struct apple_nvme *anv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void devm_apple_nvme_mempool_destroy(void *data)
|
||||
{
|
||||
mempool_destroy(data);
|
||||
}
|
||||
|
||||
static int apple_nvme_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
@ -1462,8 +1468,8 @@ static int apple_nvme_probe(struct platform_device *pdev)
|
||||
ret = -ENOMEM;
|
||||
goto put_dev;
|
||||
}
|
||||
ret = devm_add_action_or_reset(
|
||||
anv->dev, (void (*)(void *))mempool_destroy, anv->iod_mempool);
|
||||
ret = devm_add_action_or_reset(anv->dev,
|
||||
devm_apple_nvme_mempool_destroy, anv->iod_mempool);
|
||||
if (ret)
|
||||
goto put_dev;
|
||||
|
||||
|
1017
drivers/nvme/host/auth.c
Normal file
1017
drivers/nvme/host/auth.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,6 @@
|
||||
|
||||
#include "nvme.h"
|
||||
|
||||
#ifdef CONFIG_NVME_VERBOSE_ERRORS
|
||||
static const char * const nvme_ops[] = {
|
||||
[nvme_cmd_flush] = "Flush",
|
||||
[nvme_cmd_write] = "Write",
|
||||
@ -178,6 +177,7 @@ const unsigned char *nvme_get_opcode_str(u8 opcode)
|
||||
return nvme_ops[opcode];
|
||||
return "Unknown";
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_get_opcode_str);
|
||||
|
||||
const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
||||
{
|
||||
@ -185,4 +185,3 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
||||
return nvme_admin_ops[opcode];
|
||||
return "Unknown";
|
||||
}
|
||||
#endif /* CONFIG_NVME_VERBOSE_ERRORS */
|
||||
|
@ -24,12 +24,22 @@
|
||||
|
||||
#include "nvme.h"
|
||||
#include "fabrics.h"
|
||||
#include <linux/nvme-auth.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
#define NVME_MINORS (1U << MINORBITS)
|
||||
|
||||
struct nvme_ns_info {
|
||||
struct nvme_ns_ids ids;
|
||||
u32 nsid;
|
||||
__le32 anagrpid;
|
||||
bool is_shared;
|
||||
bool is_readonly;
|
||||
bool is_ready;
|
||||
};
|
||||
|
||||
unsigned int admin_timeout = 60;
|
||||
module_param(admin_timeout, uint, 0644);
|
||||
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
|
||||
@ -330,6 +340,7 @@ enum nvme_disposition {
|
||||
COMPLETE,
|
||||
RETRY,
|
||||
FAILOVER,
|
||||
AUTHENTICATE,
|
||||
};
|
||||
|
||||
static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
||||
@ -337,6 +348,9 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
||||
if (likely(nvme_req(req)->status == 0))
|
||||
return COMPLETE;
|
||||
|
||||
if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
|
||||
return AUTHENTICATE;
|
||||
|
||||
if (blk_noretry_request(req) ||
|
||||
(nvme_req(req)->status & NVME_SC_DNR) ||
|
||||
nvme_req(req)->retries >= nvme_max_retries)
|
||||
@ -375,11 +389,13 @@ static inline void nvme_end_req(struct request *req)
|
||||
|
||||
void nvme_complete_rq(struct request *req)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
|
||||
|
||||
trace_nvme_complete_rq(req);
|
||||
nvme_cleanup_cmd(req);
|
||||
|
||||
if (nvme_req(req)->ctrl->kas)
|
||||
nvme_req(req)->ctrl->comp_seen = true;
|
||||
if (ctrl->kas)
|
||||
ctrl->comp_seen = true;
|
||||
|
||||
switch (nvme_decide_disposition(req)) {
|
||||
case COMPLETE:
|
||||
@ -391,6 +407,14 @@ void nvme_complete_rq(struct request *req)
|
||||
case FAILOVER:
|
||||
nvme_failover_req(req);
|
||||
return;
|
||||
case AUTHENTICATE:
|
||||
#ifdef CONFIG_NVME_AUTH
|
||||
queue_work(nvme_wq, &ctrl->dhchap_auth_work);
|
||||
nvme_retry_req(req);
|
||||
#else
|
||||
nvme_end_req(req);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_complete_rq);
|
||||
@ -702,7 +726,9 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
|
||||
switch (ctrl->state) {
|
||||
case NVME_CTRL_CONNECTING:
|
||||
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
|
||||
req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
|
||||
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
|
||||
req->cmd->fabrics.fctype == nvme_fabrics_type_auth_send ||
|
||||
req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
@ -990,8 +1016,7 @@ static int nvme_execute_rq(struct request *rq, bool at_head)
|
||||
*/
|
||||
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
union nvme_result *result, void *buffer, unsigned bufflen,
|
||||
unsigned timeout, int qid, int at_head,
|
||||
blk_mq_req_flags_t flags)
|
||||
int qid, int at_head, blk_mq_req_flags_t flags)
|
||||
{
|
||||
struct request *req;
|
||||
int ret;
|
||||
@ -1000,15 +1025,12 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
|
||||
else
|
||||
req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
|
||||
qid ? qid - 1 : 0);
|
||||
qid - 1);
|
||||
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
nvme_init_request(req, cmd);
|
||||
|
||||
if (timeout)
|
||||
req->timeout = timeout;
|
||||
|
||||
if (buffer && bufflen) {
|
||||
ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
|
||||
if (ret)
|
||||
@ -1028,7 +1050,7 @@ EXPORT_SYMBOL_GPL(__nvme_submit_sync_cmd);
|
||||
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
void *buffer, unsigned bufflen)
|
||||
{
|
||||
return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0,
|
||||
return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen,
|
||||
NVME_QID_ANY, 0, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
|
||||
@ -1329,8 +1351,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
|
||||
}
|
||||
}
|
||||
|
||||
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
struct nvme_ns_ids *ids)
|
||||
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_command c = { };
|
||||
bool csi_seen = false;
|
||||
@ -1343,7 +1365,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
return 0;
|
||||
|
||||
c.identify.opcode = nvme_admin_identify;
|
||||
c.identify.nsid = cpu_to_le32(nsid);
|
||||
c.identify.nsid = cpu_to_le32(info->nsid);
|
||||
c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
|
||||
|
||||
data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
|
||||
@ -1355,7 +1377,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
if (status) {
|
||||
dev_warn(ctrl->device,
|
||||
"Identify Descriptors failed (nsid=%u, status=0x%x)\n",
|
||||
nsid, status);
|
||||
info->nsid, status);
|
||||
goto free_data;
|
||||
}
|
||||
|
||||
@ -1365,7 +1387,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
if (cur->nidl == 0)
|
||||
break;
|
||||
|
||||
len = nvme_process_ns_desc(ctrl, ids, cur, &csi_seen);
|
||||
len = nvme_process_ns_desc(ctrl, &info->ids, cur, &csi_seen);
|
||||
if (len < 0)
|
||||
break;
|
||||
|
||||
@ -1374,7 +1396,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
|
||||
if (nvme_multi_css(ctrl) && !csi_seen) {
|
||||
dev_warn(ctrl->device, "Command set not reported for nsid:%d\n",
|
||||
nsid);
|
||||
info->nsid);
|
||||
status = -EINVAL;
|
||||
}
|
||||
|
||||
@ -1384,7 +1406,7 @@ free_data:
|
||||
}
|
||||
|
||||
static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
struct nvme_ns_ids *ids, struct nvme_id_ns **id)
|
||||
struct nvme_id_ns **id)
|
||||
{
|
||||
struct nvme_command c = { };
|
||||
int error;
|
||||
@ -1407,20 +1429,6 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
error = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||
if ((*id)->ncap == 0) /* namespace not allocated or attached */
|
||||
goto out_free_id;
|
||||
|
||||
|
||||
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
|
||||
dev_info(ctrl->device,
|
||||
"Ignoring bogus Namespace Identifiers\n");
|
||||
} else {
|
||||
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
|
||||
!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
|
||||
memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
|
||||
if (ctrl->vs >= NVME_VS(1, 2, 0) &&
|
||||
!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
|
||||
memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_id:
|
||||
@ -1428,30 +1436,59 @@ out_free_id:
|
||||
return error;
|
||||
}
|
||||
|
||||
static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
struct nvme_id_ns_cs_indep **id)
|
||||
static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_ns_ids *ids = &info->ids;
|
||||
struct nvme_id_ns *id;
|
||||
int ret;
|
||||
|
||||
ret = nvme_identify_ns(ctrl, info->nsid, &id);
|
||||
if (ret)
|
||||
return ret;
|
||||
info->anagrpid = id->anagrpid;
|
||||
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
|
||||
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
|
||||
info->is_ready = true;
|
||||
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
|
||||
dev_info(ctrl->device,
|
||||
"Ignoring bogus Namespace Identifiers\n");
|
||||
} else {
|
||||
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
|
||||
!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
|
||||
memcpy(ids->eui64, id->eui64, sizeof(ids->eui64));
|
||||
if (ctrl->vs >= NVME_VS(1, 2, 0) &&
|
||||
!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
|
||||
memcpy(ids->nguid, id->nguid, sizeof(ids->nguid));
|
||||
}
|
||||
kfree(id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_id_ns_cs_indep *id;
|
||||
struct nvme_command c = {
|
||||
.identify.opcode = nvme_admin_identify,
|
||||
.identify.nsid = cpu_to_le32(nsid),
|
||||
.identify.nsid = cpu_to_le32(info->nsid),
|
||||
.identify.cns = NVME_ID_CNS_NS_CS_INDEP,
|
||||
};
|
||||
int ret;
|
||||
|
||||
*id = kmalloc(sizeof(**id), GFP_KERNEL);
|
||||
if (!*id)
|
||||
id = kmalloc(sizeof(*id), GFP_KERNEL);
|
||||
if (!id)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"Identify namespace (CS independent) failed (%d)\n",
|
||||
ret);
|
||||
kfree(*id);
|
||||
return ret;
|
||||
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
|
||||
if (!ret) {
|
||||
info->anagrpid = id->anagrpid;
|
||||
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
|
||||
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
|
||||
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
|
||||
}
|
||||
|
||||
return 0;
|
||||
kfree(id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
|
||||
@ -1466,7 +1503,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
|
||||
c.features.dword11 = cpu_to_le32(dword11);
|
||||
|
||||
ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
|
||||
buffer, buflen, 0, NVME_QID_ANY, 0, 0);
|
||||
buffer, buflen, NVME_QID_ANY, 0, 0);
|
||||
if (ret >= 0 && result)
|
||||
*result = le32_to_cpu(res.u32);
|
||||
return ret;
|
||||
@ -1875,6 +1912,11 @@ static void nvme_update_disk_info(struct gendisk *disk,
|
||||
ns->ctrl->max_zeroes_sectors);
|
||||
}
|
||||
|
||||
static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||
{
|
||||
return info->is_readonly || test_bit(NVME_NS_FORCE_RO, &ns->flags);
|
||||
}
|
||||
|
||||
static inline bool nvme_first_scan(struct gendisk *disk)
|
||||
{
|
||||
/* nvme_alloc_ns() scans the disk prior to adding it */
|
||||
@ -1912,12 +1954,44 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
blk_queue_chunk_sectors(ns->queue, iob);
|
||||
}
|
||||
|
||||
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
static int nvme_update_ns_info_generic(struct nvme_ns *ns,
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
unsigned lbaf = nvme_lbaf_index(id->flbas);
|
||||
blk_mq_freeze_queue(ns->disk->queue);
|
||||
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
||||
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
|
||||
if (nvme_ns_head_multipath(ns->head)) {
|
||||
blk_mq_freeze_queue(ns->head->disk->queue);
|
||||
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
|
||||
nvme_mpath_revalidate_paths(ns);
|
||||
blk_stack_limits(&ns->head->disk->queue->limits,
|
||||
&ns->queue->limits, 0);
|
||||
ns->head->disk->flags |= GENHD_FL_HIDDEN;
|
||||
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
||||
}
|
||||
|
||||
/* Hide the block-interface for these devices */
|
||||
ns->disk->flags |= GENHD_FL_HIDDEN;
|
||||
set_bit(NVME_NS_READY, &ns->flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_id_ns *id;
|
||||
unsigned lbaf;
|
||||
int ret;
|
||||
|
||||
ret = nvme_identify_ns(ns->ctrl, info->nsid, &id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
blk_mq_freeze_queue(ns->disk->queue);
|
||||
lbaf = nvme_lbaf_index(id->flbas);
|
||||
ns->lba_shift = id->lbaf[lbaf].ds;
|
||||
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
||||
|
||||
@ -1927,36 +2001,35 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
|
||||
if (ns->head->ids.csi == NVME_CSI_ZNS) {
|
||||
ret = nvme_update_zone_info(ns, lbaf);
|
||||
if (ret)
|
||||
goto out_unfreeze;
|
||||
if (ret) {
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) ||
|
||||
test_bit(NVME_NS_FORCE_RO, &ns->flags));
|
||||
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
|
||||
set_bit(NVME_NS_READY, &ns->flags);
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
|
||||
if (blk_queue_is_zoned(ns->queue)) {
|
||||
ret = nvme_revalidate_zones(ns);
|
||||
if (ret && !nvme_first_scan(ns->disk))
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (nvme_ns_head_multipath(ns->head)) {
|
||||
blk_mq_freeze_queue(ns->head->disk->queue);
|
||||
nvme_update_disk_info(ns->head->disk, ns, id);
|
||||
set_disk_ro(ns->head->disk,
|
||||
(id->nsattr & NVME_NS_ATTR_RO) ||
|
||||
test_bit(NVME_NS_FORCE_RO, &ns->flags));
|
||||
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
|
||||
nvme_mpath_revalidate_paths(ns);
|
||||
blk_stack_limits(&ns->head->disk->queue->limits,
|
||||
&ns->queue->limits, 0);
|
||||
disk_update_readahead(ns->head->disk);
|
||||
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
||||
}
|
||||
return 0;
|
||||
|
||||
out_unfreeze:
|
||||
ret = 0;
|
||||
out:
|
||||
/*
|
||||
* If probing fails due an unsupported feature, hide the block device,
|
||||
* but still allow other access.
|
||||
@ -1966,10 +2039,31 @@ out_unfreeze:
|
||||
set_bit(NVME_NS_READY, &ns->flags);
|
||||
ret = 0;
|
||||
}
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
kfree(id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||
{
|
||||
switch (info->ids.csi) {
|
||||
case NVME_CSI_ZNS:
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
dev_info(ns->ctrl->device,
|
||||
"block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
|
||||
info->nsid);
|
||||
return nvme_update_ns_info_generic(ns, info);
|
||||
}
|
||||
return nvme_update_ns_info_block(ns, info);
|
||||
case NVME_CSI_NVM:
|
||||
return nvme_update_ns_info_block(ns, info);
|
||||
default:
|
||||
dev_info(ns->ctrl->device,
|
||||
"block device for nsid %u not supported (csi %u)\n",
|
||||
info->nsid, info->ids.csi);
|
||||
return nvme_update_ns_info_generic(ns, info);
|
||||
}
|
||||
}
|
||||
|
||||
static char nvme_pr_type(enum pr_type type)
|
||||
{
|
||||
switch (type) {
|
||||
@ -2103,7 +2197,7 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
|
||||
cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
|
||||
cmd.common.cdw11 = cpu_to_le32(len);
|
||||
|
||||
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, 0,
|
||||
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
|
||||
NVME_QID_ANY, 1, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_sec_submit);
|
||||
@ -2123,6 +2217,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector,
|
||||
static const struct block_device_operations nvme_bdev_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.ioctl = nvme_ioctl,
|
||||
.compat_ioctl = blkdev_compat_ptr_ioctl,
|
||||
.open = nvme_open,
|
||||
.release = nvme_release,
|
||||
.getgeo = nvme_getgeo,
|
||||
@ -3613,6 +3708,108 @@ static ssize_t dctype_show(struct device *dev,
|
||||
}
|
||||
static DEVICE_ATTR_RO(dctype);
|
||||
|
||||
#ifdef CONFIG_NVME_AUTH
|
||||
static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||
|
||||
if (!opts->dhchap_secret)
|
||||
return sysfs_emit(buf, "none\n");
|
||||
return sysfs_emit(buf, "%s\n", opts->dhchap_secret);
|
||||
}
|
||||
|
||||
static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||
char *dhchap_secret;
|
||||
|
||||
if (!ctrl->opts->dhchap_secret)
|
||||
return -EINVAL;
|
||||
if (count < 7)
|
||||
return -EINVAL;
|
||||
if (memcmp(buf, "DHHC-1:", 7))
|
||||
return -EINVAL;
|
||||
|
||||
dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
|
||||
if (!dhchap_secret)
|
||||
return -ENOMEM;
|
||||
memcpy(dhchap_secret, buf, count);
|
||||
nvme_auth_stop(ctrl);
|
||||
if (strcmp(dhchap_secret, opts->dhchap_secret)) {
|
||||
int ret;
|
||||
|
||||
ret = nvme_auth_generate_key(dhchap_secret, &ctrl->host_key);
|
||||
if (ret)
|
||||
return ret;
|
||||
kfree(opts->dhchap_secret);
|
||||
opts->dhchap_secret = dhchap_secret;
|
||||
/* Key has changed; re-authentication with new key */
|
||||
nvme_auth_reset(ctrl);
|
||||
}
|
||||
/* Start re-authentication */
|
||||
dev_info(ctrl->device, "re-authenticating controller\n");
|
||||
queue_work(nvme_wq, &ctrl->dhchap_auth_work);
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR(dhchap_secret, S_IRUGO | S_IWUSR,
|
||||
nvme_ctrl_dhchap_secret_show, nvme_ctrl_dhchap_secret_store);
|
||||
|
||||
static ssize_t nvme_ctrl_dhchap_ctrl_secret_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||
|
||||
if (!opts->dhchap_ctrl_secret)
|
||||
return sysfs_emit(buf, "none\n");
|
||||
return sysfs_emit(buf, "%s\n", opts->dhchap_ctrl_secret);
|
||||
}
|
||||
|
||||
static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||
char *dhchap_secret;
|
||||
|
||||
if (!ctrl->opts->dhchap_ctrl_secret)
|
||||
return -EINVAL;
|
||||
if (count < 7)
|
||||
return -EINVAL;
|
||||
if (memcmp(buf, "DHHC-1:", 7))
|
||||
return -EINVAL;
|
||||
|
||||
dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
|
||||
if (!dhchap_secret)
|
||||
return -ENOMEM;
|
||||
memcpy(dhchap_secret, buf, count);
|
||||
nvme_auth_stop(ctrl);
|
||||
if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
|
||||
int ret;
|
||||
|
||||
ret = nvme_auth_generate_key(dhchap_secret, &ctrl->ctrl_key);
|
||||
if (ret)
|
||||
return ret;
|
||||
kfree(opts->dhchap_ctrl_secret);
|
||||
opts->dhchap_ctrl_secret = dhchap_secret;
|
||||
/* Key has changed; re-authentication with new key */
|
||||
nvme_auth_reset(ctrl);
|
||||
}
|
||||
/* Start re-authentication */
|
||||
dev_info(ctrl->device, "re-authenticating controller\n");
|
||||
queue_work(nvme_wq, &ctrl->dhchap_auth_work);
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR,
|
||||
nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store);
|
||||
#endif
|
||||
|
||||
static struct attribute *nvme_dev_attrs[] = {
|
||||
&dev_attr_reset_controller.attr,
|
||||
&dev_attr_rescan_controller.attr,
|
||||
@ -3636,6 +3833,10 @@ static struct attribute *nvme_dev_attrs[] = {
|
||||
&dev_attr_kato.attr,
|
||||
&dev_attr_cntrltype.attr,
|
||||
&dev_attr_dctype.attr,
|
||||
#ifdef CONFIG_NVME_AUTH
|
||||
&dev_attr_dhchap_secret.attr,
|
||||
&dev_attr_dhchap_ctrl_secret.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -3659,6 +3860,12 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
|
||||
return 0;
|
||||
if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts)
|
||||
return 0;
|
||||
#ifdef CONFIG_NVME_AUTH
|
||||
if (a == &dev_attr_dhchap_secret.attr && !ctrl->opts)
|
||||
return 0;
|
||||
if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
return a->mode;
|
||||
}
|
||||
@ -3786,7 +3993,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns)
|
||||
}
|
||||
|
||||
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
|
||||
unsigned nsid, struct nvme_ns_ids *ids, bool is_shared)
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_ns_head *head;
|
||||
size_t size = sizeof(*head);
|
||||
@ -3808,9 +4015,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
|
||||
if (ret)
|
||||
goto out_ida_remove;
|
||||
head->subsys = ctrl->subsys;
|
||||
head->ns_id = nsid;
|
||||
head->ids = *ids;
|
||||
head->shared = is_shared;
|
||||
head->ns_id = info->nsid;
|
||||
head->ids = info->ids;
|
||||
head->shared = info->is_shared;
|
||||
kref_init(&head->ref);
|
||||
|
||||
if (head->ids.csi) {
|
||||
@ -3867,54 +4074,54 @@ static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
|
||||
struct nvme_ns_ids *ids, bool is_shared)
|
||||
static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
struct nvme_ns_head *head = NULL;
|
||||
int ret;
|
||||
|
||||
ret = nvme_global_check_duplicate_ids(ctrl->subsys, ids);
|
||||
ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
|
||||
if (ret) {
|
||||
dev_err(ctrl->device,
|
||||
"globally duplicate IDs for nsid %d\n", nsid);
|
||||
"globally duplicate IDs for nsid %d\n", info->nsid);
|
||||
nvme_print_device_info(ctrl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
mutex_lock(&ctrl->subsys->lock);
|
||||
head = nvme_find_ns_head(ctrl, nsid);
|
||||
head = nvme_find_ns_head(ctrl, info->nsid);
|
||||
if (!head) {
|
||||
ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids);
|
||||
ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &info->ids);
|
||||
if (ret) {
|
||||
dev_err(ctrl->device,
|
||||
"duplicate IDs in subsystem for nsid %d\n",
|
||||
nsid);
|
||||
info->nsid);
|
||||
goto out_unlock;
|
||||
}
|
||||
head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared);
|
||||
head = nvme_alloc_ns_head(ctrl, info);
|
||||
if (IS_ERR(head)) {
|
||||
ret = PTR_ERR(head);
|
||||
goto out_unlock;
|
||||
}
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
if (!is_shared || !head->shared) {
|
||||
if (!info->is_shared || !head->shared) {
|
||||
dev_err(ctrl->device,
|
||||
"Duplicate unshared namespace %d\n", nsid);
|
||||
"Duplicate unshared namespace %d\n",
|
||||
info->nsid);
|
||||
goto out_put_ns_head;
|
||||
}
|
||||
if (!nvme_ns_ids_equal(&head->ids, ids)) {
|
||||
if (!nvme_ns_ids_equal(&head->ids, &info->ids)) {
|
||||
dev_err(ctrl->device,
|
||||
"IDs don't match for shared namespace %d\n",
|
||||
nsid);
|
||||
info->nsid);
|
||||
goto out_put_ns_head;
|
||||
}
|
||||
|
||||
if (!multipath && !list_empty(&head->list)) {
|
||||
dev_warn(ctrl->device,
|
||||
"Found shared namespace %d, but multipathing not supported.\n",
|
||||
nsid);
|
||||
info->nsid);
|
||||
dev_warn_once(ctrl->device,
|
||||
"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n.");
|
||||
}
|
||||
@ -3968,20 +4175,15 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
|
||||
list_add(&ns->list, &ns->ctrl->namespaces);
|
||||
}
|
||||
|
||||
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
struct nvme_ns_ids *ids)
|
||||
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
struct gendisk *disk;
|
||||
struct nvme_id_ns *id;
|
||||
int node = ctrl->numa_node;
|
||||
|
||||
if (nvme_identify_ns(ctrl, nsid, ids, &id))
|
||||
return;
|
||||
|
||||
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
|
||||
if (!ns)
|
||||
goto out_free_id;
|
||||
return;
|
||||
|
||||
disk = blk_mq_alloc_disk(ctrl->tagset, ns);
|
||||
if (IS_ERR(disk))
|
||||
@ -4002,7 +4204,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
ns->ctrl = ctrl;
|
||||
kref_init(&ns->kref);
|
||||
|
||||
if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED))
|
||||
if (nvme_init_ns_head(ns, info))
|
||||
goto out_cleanup_disk;
|
||||
|
||||
/*
|
||||
@ -4028,7 +4230,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
ns->head->instance);
|
||||
}
|
||||
|
||||
if (nvme_update_ns_info(ns, id))
|
||||
if (nvme_update_ns_info(ns, info))
|
||||
goto out_unlink_ns;
|
||||
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
@ -4042,9 +4244,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
if (!nvme_ns_head_multipath(ns->head))
|
||||
nvme_add_ns_cdev(ns);
|
||||
|
||||
nvme_mpath_add_disk(ns, id);
|
||||
nvme_mpath_add_disk(ns, info->anagrpid);
|
||||
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
|
||||
kfree(id);
|
||||
|
||||
return;
|
||||
|
||||
@ -4064,8 +4265,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
put_disk(disk);
|
||||
out_free_ns:
|
||||
kfree(ns);
|
||||
out_free_id:
|
||||
kfree(id);
|
||||
}
|
||||
|
||||
static void nvme_ns_remove(struct nvme_ns *ns)
|
||||
@ -4123,29 +4322,21 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
|
||||
static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||
{
|
||||
struct nvme_id_ns *id;
|
||||
int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||
|
||||
if (test_bit(NVME_NS_DEAD, &ns->flags))
|
||||
goto out;
|
||||
|
||||
ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, ids, &id);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||
if (!nvme_ns_ids_equal(&ns->head->ids, ids)) {
|
||||
if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
|
||||
dev_err(ns->ctrl->device,
|
||||
"identifiers changed for nsid %d\n", ns->head->ns_id);
|
||||
goto out_free_id;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = nvme_update_ns_info(ns, id);
|
||||
|
||||
out_free_id:
|
||||
kfree(id);
|
||||
ret = nvme_update_ns_info(ns, info);
|
||||
out:
|
||||
/*
|
||||
* Only remove the namespace if we got a fatal error back from the
|
||||
@ -4157,59 +4348,47 @@ out:
|
||||
nvme_ns_remove(ns);
|
||||
}
|
||||
|
||||
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
{
|
||||
struct nvme_ns_ids ids = { };
|
||||
struct nvme_id_ns_cs_indep *id;
|
||||
struct nvme_ns_info info = { .nsid = nsid };
|
||||
struct nvme_ns *ns;
|
||||
bool ready = true;
|
||||
|
||||
if (nvme_identify_ns_descs(ctrl, nsid, &ids))
|
||||
if (nvme_identify_ns_descs(ctrl, &info))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check if the namespace is ready. If not ignore it, we will get an
|
||||
* AEN once it becomes ready and restart the scan.
|
||||
*/
|
||||
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
|
||||
!nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
|
||||
ready = id->nstat & NVME_NSTAT_NRDY;
|
||||
kfree(id);
|
||||
if (info.ids.csi != NVME_CSI_NVM && !nvme_multi_css(ctrl)) {
|
||||
dev_warn(ctrl->device,
|
||||
"command set not reported for nsid: %d\n", nsid);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ready)
|
||||
/*
|
||||
* If available try to use the Command Set Idependent Identify Namespace
|
||||
* data structure to find all the generic information that is needed to
|
||||
* set up a namespace. If not fall back to the legacy version.
|
||||
*/
|
||||
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
|
||||
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS)) {
|
||||
if (nvme_ns_info_from_id_cs_indep(ctrl, &info))
|
||||
return;
|
||||
} else {
|
||||
if (nvme_ns_info_from_identify(ctrl, &info))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore the namespace if it is not ready. We will get an AEN once it
|
||||
* becomes ready and restart the scan.
|
||||
*/
|
||||
if (!info.is_ready)
|
||||
return;
|
||||
|
||||
ns = nvme_find_get_ns(ctrl, nsid);
|
||||
if (ns) {
|
||||
nvme_validate_ns(ns, &ids);
|
||||
nvme_validate_ns(ns, &info);
|
||||
nvme_put_ns(ns);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (ids.csi) {
|
||||
case NVME_CSI_NVM:
|
||||
nvme_alloc_ns(ctrl, nsid, &ids);
|
||||
break;
|
||||
case NVME_CSI_ZNS:
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
dev_warn(ctrl->device,
|
||||
"nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
|
||||
nsid);
|
||||
break;
|
||||
}
|
||||
if (!nvme_multi_css(ctrl)) {
|
||||
dev_warn(ctrl->device,
|
||||
"command set not reported for nsid: %d\n",
|
||||
nsid);
|
||||
break;
|
||||
}
|
||||
nvme_alloc_ns(ctrl, nsid, &ids);
|
||||
break;
|
||||
default:
|
||||
dev_warn(ctrl->device, "unknown csi %u for nsid %u\n",
|
||||
ids.csi, nsid);
|
||||
break;
|
||||
} else {
|
||||
nvme_alloc_ns(ctrl, &info);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4265,7 +4444,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
|
||||
|
||||
if (!nsid) /* end of the list? */
|
||||
goto out;
|
||||
nvme_validate_or_alloc_ns(ctrl, nsid);
|
||||
nvme_scan_ns(ctrl, nsid);
|
||||
while (++prev < nsid)
|
||||
nvme_ns_remove_by_nsid(ctrl, prev);
|
||||
}
|
||||
@ -4288,7 +4467,7 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
|
||||
kfree(id);
|
||||
|
||||
for (i = 1; i <= nn; i++)
|
||||
nvme_validate_or_alloc_ns(ctrl, i);
|
||||
nvme_scan_ns(ctrl, i);
|
||||
|
||||
nvme_remove_invalid_namespaces(ctrl, nn);
|
||||
}
|
||||
@ -4525,9 +4704,19 @@ static void nvme_fw_act_work(struct work_struct *work)
|
||||
nvme_get_fw_slot_info(ctrl);
|
||||
}
|
||||
|
||||
static u32 nvme_aer_type(u32 result)
|
||||
{
|
||||
return result & 0x7;
|
||||
}
|
||||
|
||||
static u32 nvme_aer_subtype(u32 result)
|
||||
{
|
||||
return (result & 0xff00) >> 8;
|
||||
}
|
||||
|
||||
static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||
{
|
||||
u32 aer_notice_type = (result & 0xff00) >> 8;
|
||||
u32 aer_notice_type = nvme_aer_subtype(result);
|
||||
|
||||
trace_nvme_async_event(ctrl, aer_notice_type);
|
||||
|
||||
@ -4542,8 +4731,10 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||
* recovery actions from interfering with the controller's
|
||||
* firmware activation.
|
||||
*/
|
||||
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
|
||||
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
|
||||
nvme_auth_stop(ctrl);
|
||||
queue_work(nvme_wq, &ctrl->fw_act_work);
|
||||
}
|
||||
break;
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
case NVME_AER_NOTICE_ANA:
|
||||
@ -4560,11 +4751,19 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
trace_nvme_async_event(ctrl, NVME_AER_ERROR);
|
||||
dev_warn(ctrl->device, "resetting controller due to AER\n");
|
||||
nvme_reset_ctrl(ctrl);
|
||||
}
|
||||
|
||||
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
||||
volatile union nvme_result *res)
|
||||
{
|
||||
u32 result = le32_to_cpu(res->u32);
|
||||
u32 aer_type = result & 0x07;
|
||||
u32 aer_type = nvme_aer_type(result);
|
||||
u32 aer_subtype = nvme_aer_subtype(result);
|
||||
|
||||
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
|
||||
return;
|
||||
@ -4574,6 +4773,15 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
||||
nvme_handle_aen_notice(ctrl, result);
|
||||
break;
|
||||
case NVME_AER_ERROR:
|
||||
/*
|
||||
* For a persistent internal error, don't run async_event_work
|
||||
* to submit a new AER. The controller reset will do it.
|
||||
*/
|
||||
if (aer_subtype == NVME_AER_ERROR_PERSIST_INT_ERR) {
|
||||
nvme_handle_aer_persistent_error(ctrl);
|
||||
return;
|
||||
}
|
||||
fallthrough;
|
||||
case NVME_AER_SMART:
|
||||
case NVME_AER_CSS:
|
||||
case NVME_AER_VS:
|
||||
@ -4590,6 +4798,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
|
||||
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
nvme_mpath_stop(ctrl);
|
||||
nvme_auth_stop(ctrl);
|
||||
nvme_stop_keep_alive(ctrl);
|
||||
nvme_stop_failfast_work(ctrl);
|
||||
flush_work(&ctrl->async_event_work);
|
||||
@ -4649,6 +4858,8 @@ static void nvme_free_ctrl(struct device *dev)
|
||||
|
||||
nvme_free_cels(ctrl);
|
||||
nvme_mpath_uninit(ctrl);
|
||||
nvme_auth_stop(ctrl);
|
||||
nvme_auth_free(ctrl);
|
||||
__free_page(ctrl->discard_page);
|
||||
|
||||
if (subsys) {
|
||||
@ -4739,6 +4950,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||
|
||||
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
|
||||
nvme_mpath_init_ctrl(ctrl);
|
||||
nvme_auth_init_ctrl(ctrl);
|
||||
|
||||
return 0;
|
||||
out_free_name:
|
||||
|
@ -152,7 +152,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
|
||||
cmd.prop_get.fctype = nvme_fabrics_type_property_get;
|
||||
cmd.prop_get.offset = cpu_to_le32(off);
|
||||
|
||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
|
||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
|
||||
NVME_QID_ANY, 0, 0);
|
||||
|
||||
if (ret >= 0)
|
||||
@ -198,7 +198,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
|
||||
cmd.prop_get.attrib = 1;
|
||||
cmd.prop_get.offset = cpu_to_le32(off);
|
||||
|
||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
|
||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
|
||||
NVME_QID_ANY, 0, 0);
|
||||
|
||||
if (ret >= 0)
|
||||
@ -243,7 +243,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
|
||||
cmd.prop_set.offset = cpu_to_le32(off);
|
||||
cmd.prop_set.value = cpu_to_le64(val);
|
||||
|
||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, 0,
|
||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0,
|
||||
NVME_QID_ANY, 0, 0);
|
||||
if (unlikely(ret))
|
||||
dev_err(ctrl->device,
|
||||
@ -331,6 +331,10 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
|
||||
dev_err(ctrl->device,
|
||||
"Connect command failed: host path error\n");
|
||||
break;
|
||||
case NVME_SC_AUTH_REQUIRED:
|
||||
dev_err(ctrl->device,
|
||||
"Connect command failed: authentication required\n");
|
||||
break;
|
||||
default:
|
||||
dev_err(ctrl->device,
|
||||
"Connect command failed, error wo/DNR bit: %d\n",
|
||||
@ -365,6 +369,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
||||
union nvme_result res;
|
||||
struct nvmf_connect_data *data;
|
||||
int ret;
|
||||
u32 result;
|
||||
|
||||
cmd.connect.opcode = nvme_fabrics_command;
|
||||
cmd.connect.fctype = nvme_fabrics_type_connect;
|
||||
@ -389,7 +394,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
||||
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
|
||||
|
||||
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
|
||||
data, sizeof(*data), 0, NVME_QID_ANY, 1,
|
||||
data, sizeof(*data), NVME_QID_ANY, 1,
|
||||
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
|
||||
if (ret) {
|
||||
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
|
||||
@ -397,8 +402,25 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
||||
goto out_free_data;
|
||||
}
|
||||
|
||||
ctrl->cntlid = le16_to_cpu(res.u16);
|
||||
|
||||
result = le32_to_cpu(res.u32);
|
||||
ctrl->cntlid = result & 0xFFFF;
|
||||
if ((result >> 16) & 0x3) {
|
||||
/* Authentication required */
|
||||
ret = nvme_auth_negotiate(ctrl, 0);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid 0: authentication setup failed\n");
|
||||
ret = NVME_SC_AUTH_REQUIRED;
|
||||
goto out_free_data;
|
||||
}
|
||||
ret = nvme_auth_wait(ctrl, 0);
|
||||
if (ret)
|
||||
dev_warn(ctrl->device,
|
||||
"qid 0: authentication failed\n");
|
||||
else
|
||||
dev_info(ctrl->device,
|
||||
"qid 0: authenticated\n");
|
||||
}
|
||||
out_free_data:
|
||||
kfree(data);
|
||||
return ret;
|
||||
@ -431,6 +453,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
|
||||
struct nvmf_connect_data *data;
|
||||
union nvme_result res;
|
||||
int ret;
|
||||
u32 result;
|
||||
|
||||
cmd.connect.opcode = nvme_fabrics_command;
|
||||
cmd.connect.fctype = nvme_fabrics_type_connect;
|
||||
@ -450,12 +473,27 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
|
||||
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
|
||||
|
||||
ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
|
||||
data, sizeof(*data), 0, qid, 1,
|
||||
data, sizeof(*data), qid, 1,
|
||||
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
|
||||
if (ret) {
|
||||
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
|
||||
&cmd, data);
|
||||
}
|
||||
result = le32_to_cpu(res.u32);
|
||||
if ((result >> 16) & 2) {
|
||||
/* Authentication required */
|
||||
ret = nvme_auth_negotiate(ctrl, qid);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid %d: authentication setup failed\n", qid);
|
||||
ret = NVME_SC_AUTH_REQUIRED;
|
||||
} else {
|
||||
ret = nvme_auth_wait(ctrl, qid);
|
||||
if (ret)
|
||||
dev_warn(ctrl->device,
|
||||
"qid %u: authentication failed\n", qid);
|
||||
}
|
||||
}
|
||||
kfree(data);
|
||||
return ret;
|
||||
}
|
||||
@ -548,6 +586,8 @@ static const match_table_t opt_tokens = {
|
||||
{ NVMF_OPT_TOS, "tos=%d" },
|
||||
{ NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" },
|
||||
{ NVMF_OPT_DISCOVERY, "discovery" },
|
||||
{ NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" },
|
||||
{ NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" },
|
||||
{ NVMF_OPT_ERR, NULL }
|
||||
};
|
||||
|
||||
@ -829,6 +869,34 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
|
||||
case NVMF_OPT_DISCOVERY:
|
||||
opts->discovery_nqn = true;
|
||||
break;
|
||||
case NVMF_OPT_DHCHAP_SECRET:
|
||||
p = match_strdup(args);
|
||||
if (!p) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) {
|
||||
pr_err("Invalid DH-CHAP secret %s\n", p);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
kfree(opts->dhchap_secret);
|
||||
opts->dhchap_secret = p;
|
||||
break;
|
||||
case NVMF_OPT_DHCHAP_CTRL_SECRET:
|
||||
p = match_strdup(args);
|
||||
if (!p) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) {
|
||||
pr_err("Invalid DH-CHAP secret %s\n", p);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
kfree(opts->dhchap_ctrl_secret);
|
||||
opts->dhchap_ctrl_secret = p;
|
||||
break;
|
||||
default:
|
||||
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
|
||||
p);
|
||||
@ -947,6 +1015,8 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
|
||||
kfree(opts->subsysnqn);
|
||||
kfree(opts->host_traddr);
|
||||
kfree(opts->host_iface);
|
||||
kfree(opts->dhchap_secret);
|
||||
kfree(opts->dhchap_ctrl_secret);
|
||||
kfree(opts);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvmf_free_options);
|
||||
@ -956,7 +1026,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
|
||||
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
|
||||
NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
|
||||
NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
|
||||
NVMF_OPT_FAIL_FAST_TMO)
|
||||
NVMF_OPT_FAIL_FAST_TMO | NVMF_OPT_DHCHAP_SECRET |\
|
||||
NVMF_OPT_DHCHAP_CTRL_SECRET)
|
||||
|
||||
static struct nvme_ctrl *
|
||||
nvmf_create_ctrl(struct device *dev, const char *buf)
|
||||
@ -1192,7 +1263,14 @@ static void __exit nvmf_exit(void)
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_auth_send_command) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_auth_receive_command) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_negotiate_data) != 8);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_challenge_data) != 16);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_reply_data) != 16);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success1_data) != 16);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success2_data) != 16);
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
@ -68,6 +68,8 @@ enum {
|
||||
NVMF_OPT_FAIL_FAST_TMO = 1 << 20,
|
||||
NVMF_OPT_HOST_IFACE = 1 << 21,
|
||||
NVMF_OPT_DISCOVERY = 1 << 22,
|
||||
NVMF_OPT_DHCHAP_SECRET = 1 << 23,
|
||||
NVMF_OPT_DHCHAP_CTRL_SECRET = 1 << 24,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -97,6 +99,9 @@ enum {
|
||||
* @max_reconnects: maximum number of allowed reconnect attempts before removing
|
||||
* the controller, (-1) means reconnect forever, zero means remove
|
||||
* immediately;
|
||||
* @dhchap_secret: DH-HMAC-CHAP secret
|
||||
* @dhchap_ctrl_secret: DH-HMAC-CHAP controller secret for bi-directional
|
||||
* authentication
|
||||
* @disable_sqflow: disable controller sq flow control
|
||||
* @hdr_digest: generate/verify header digest (TCP)
|
||||
* @data_digest: generate/verify data digest (TCP)
|
||||
@ -121,6 +126,8 @@ struct nvmf_ctrl_options {
|
||||
unsigned int kato;
|
||||
struct nvmf_host *host;
|
||||
int max_reconnects;
|
||||
char *dhchap_secret;
|
||||
char *dhchap_ctrl_secret;
|
||||
bool disable_sqflow;
|
||||
bool hdr_digest;
|
||||
bool data_digest;
|
||||
|
@ -346,7 +346,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
|
||||
* different queue via blk_steal_bios(), so we need to use the bio_split
|
||||
* pool from the original queue to allocate the bvecs from.
|
||||
*/
|
||||
blk_queue_split(&bio);
|
||||
bio = bio_split_to_limits(bio);
|
||||
|
||||
srcu_idx = srcu_read_lock(&head->srcu);
|
||||
ns = nvme_find_path(head);
|
||||
@ -408,6 +408,7 @@ const struct block_device_operations nvme_ns_head_ops = {
|
||||
.open = nvme_ns_head_open,
|
||||
.release = nvme_ns_head_release,
|
||||
.ioctl = nvme_ns_head_ioctl,
|
||||
.compat_ioctl = blkdev_compat_ptr_ioctl,
|
||||
.getgeo = nvme_getgeo,
|
||||
.report_zones = nvme_ns_head_report_zones,
|
||||
.pr_ops = &nvme_pr_ops,
|
||||
@ -800,16 +801,16 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
|
||||
return -ENXIO; /* just break out of the loop */
|
||||
}
|
||||
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
||||
{
|
||||
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
||||
struct nvme_ana_group_desc desc = {
|
||||
.grpid = id->anagrpid,
|
||||
.grpid = anagrpid,
|
||||
.state = 0,
|
||||
};
|
||||
|
||||
mutex_lock(&ns->ctrl->ana_lock);
|
||||
ns->ana_grpid = le32_to_cpu(id->anagrpid);
|
||||
ns->ana_grpid = le32_to_cpu(anagrpid);
|
||||
nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
|
||||
mutex_unlock(&ns->ctrl->ana_lock);
|
||||
if (desc.state) {
|
||||
|
@ -140,7 +140,7 @@ enum nvme_quirks {
|
||||
NVME_QUIRK_DMA_ADDRESS_BITS_48 = (1 << 16),
|
||||
|
||||
/*
|
||||
* The controller requires the command_id value be be limited, so skip
|
||||
* The controller requires the command_id value be limited, so skip
|
||||
* encoding the generation sequence number.
|
||||
*/
|
||||
NVME_QUIRK_SKIP_CID_GEN = (1 << 17),
|
||||
@ -328,6 +328,15 @@ struct nvme_ctrl {
|
||||
struct work_struct ana_work;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVME_AUTH
|
||||
struct work_struct dhchap_auth_work;
|
||||
struct list_head dhchap_auth_list;
|
||||
struct mutex dhchap_auth_mutex;
|
||||
struct nvme_dhchap_key *host_key;
|
||||
struct nvme_dhchap_key *ctrl_key;
|
||||
u16 transaction;
|
||||
#endif
|
||||
|
||||
/* Power saving configuration */
|
||||
u64 ps_max_latency_us;
|
||||
bool apst_enabled;
|
||||
@ -781,7 +790,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
void *buf, unsigned bufflen);
|
||||
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
union nvme_result *result, void *buffer, unsigned bufflen,
|
||||
unsigned timeout, int qid, int at_head,
|
||||
int qid, int at_head,
|
||||
blk_mq_req_flags_t flags);
|
||||
int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
|
||||
unsigned int dword11, void *buffer, size_t buflen,
|
||||
@ -837,7 +846,7 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
|
||||
void nvme_failover_req(struct request *req);
|
||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
|
||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
|
||||
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
|
||||
void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
|
||||
@ -879,8 +888,7 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
|
||||
struct nvme_id_ns *id)
|
||||
static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
||||
{
|
||||
}
|
||||
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
@ -992,6 +1000,27 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
|
||||
return ctrl->sgls & ((1 << 0) | (1 << 1));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVME_AUTH
|
||||
void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl);
|
||||
void nvme_auth_stop(struct nvme_ctrl *ctrl);
|
||||
int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
|
||||
int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
|
||||
void nvme_auth_reset(struct nvme_ctrl *ctrl);
|
||||
void nvme_auth_free(struct nvme_ctrl *ctrl);
|
||||
#else
|
||||
static inline void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) {};
|
||||
static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {};
|
||||
static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
|
||||
{
|
||||
return -EPROTONOSUPPORT;
|
||||
}
|
||||
static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
|
||||
{
|
||||
return NVME_SC_AUTH_REQUIRED;
|
||||
}
|
||||
static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
|
||||
#endif
|
||||
|
||||
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
u8 opcode);
|
||||
int nvme_execute_passthru_rq(struct request *rq);
|
||||
|
@ -670,7 +670,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
|
||||
|
||||
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
|
||||
if (!prp_list) {
|
||||
iod->first_dma = dma_addr;
|
||||
iod->npages = -1;
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
@ -1435,8 +1434,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
|
||||
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
|
||||
|
||||
dev_warn(nvmeq->dev->ctrl.device,
|
||||
"I/O %d QID %d timeout, aborting\n",
|
||||
req->tag, nvmeq->qid);
|
||||
"I/O %d (%s) QID %d timeout, aborting\n",
|
||||
req->tag,
|
||||
nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode),
|
||||
nvmeq->qid);
|
||||
|
||||
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
|
||||
BLK_MQ_REQ_NOWAIT);
|
||||
@ -1765,37 +1766,35 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
|
||||
}
|
||||
}
|
||||
|
||||
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
||||
static int nvme_pci_alloc_admin_tag_set(struct nvme_dev *dev)
|
||||
{
|
||||
if (!dev->ctrl.admin_q) {
|
||||
dev->admin_tagset.ops = &nvme_mq_admin_ops;
|
||||
dev->admin_tagset.nr_hw_queues = 1;
|
||||
struct blk_mq_tag_set *set = &dev->admin_tagset;
|
||||
|
||||
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
dev->admin_tagset.timeout = NVME_ADMIN_TIMEOUT;
|
||||
dev->admin_tagset.numa_node = dev->ctrl.numa_node;
|
||||
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
|
||||
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
|
||||
dev->admin_tagset.driver_data = dev;
|
||||
set->ops = &nvme_mq_admin_ops;
|
||||
set->nr_hw_queues = 1;
|
||||
|
||||
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
|
||||
return -ENOMEM;
|
||||
dev->ctrl.admin_tagset = &dev->admin_tagset;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||
set->numa_node = dev->ctrl.numa_node;
|
||||
set->cmd_size = sizeof(struct nvme_iod);
|
||||
set->flags = BLK_MQ_F_NO_SCHED;
|
||||
set->driver_data = dev;
|
||||
|
||||
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
|
||||
if (IS_ERR(dev->ctrl.admin_q)) {
|
||||
blk_mq_free_tag_set(&dev->admin_tagset);
|
||||
dev->ctrl.admin_q = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!blk_get_queue(dev->ctrl.admin_q)) {
|
||||
nvme_dev_remove_admin(dev);
|
||||
dev->ctrl.admin_q = NULL;
|
||||
return -ENODEV;
|
||||
}
|
||||
} else
|
||||
nvme_start_admin_queue(&dev->ctrl);
|
||||
if (blk_mq_alloc_tag_set(set))
|
||||
return -ENOMEM;
|
||||
dev->ctrl.admin_tagset = set;
|
||||
|
||||
dev->ctrl.admin_q = blk_mq_init_queue(set);
|
||||
if (IS_ERR(dev->ctrl.admin_q)) {
|
||||
blk_mq_free_tag_set(set);
|
||||
dev->ctrl.admin_q = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!blk_get_queue(dev->ctrl.admin_q)) {
|
||||
nvme_dev_remove_admin(dev);
|
||||
dev->ctrl.admin_q = NULL;
|
||||
return -ENODEV;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2534,47 +2533,45 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nvme_dev_add(struct nvme_dev *dev)
|
||||
static void nvme_pci_alloc_tag_set(struct nvme_dev *dev)
|
||||
{
|
||||
struct blk_mq_tag_set * set = &dev->tagset;
|
||||
int ret;
|
||||
|
||||
if (!dev->ctrl.tagset) {
|
||||
dev->tagset.ops = &nvme_mq_ops;
|
||||
dev->tagset.nr_hw_queues = dev->online_queues - 1;
|
||||
dev->tagset.nr_maps = 2; /* default + read */
|
||||
if (dev->io_queues[HCTX_TYPE_POLL])
|
||||
dev->tagset.nr_maps++;
|
||||
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
||||
dev->tagset.numa_node = dev->ctrl.numa_node;
|
||||
dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth,
|
||||
BLK_MQ_MAX_DEPTH) - 1;
|
||||
dev->tagset.cmd_size = sizeof(struct nvme_iod);
|
||||
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
dev->tagset.driver_data = dev;
|
||||
set->ops = &nvme_mq_ops;
|
||||
set->nr_hw_queues = dev->online_queues - 1;
|
||||
set->nr_maps = 2; /* default + read */
|
||||
if (dev->io_queues[HCTX_TYPE_POLL])
|
||||
set->nr_maps++;
|
||||
set->timeout = NVME_IO_TIMEOUT;
|
||||
set->numa_node = dev->ctrl.numa_node;
|
||||
set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
|
||||
set->cmd_size = sizeof(struct nvme_iod);
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
set->driver_data = dev;
|
||||
|
||||
/*
|
||||
* Some Apple controllers requires tags to be unique
|
||||
* across admin and IO queue, so reserve the first 32
|
||||
* tags of the IO queue.
|
||||
*/
|
||||
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
|
||||
dev->tagset.reserved_tags = NVME_AQ_DEPTH;
|
||||
/*
|
||||
* Some Apple controllers requires tags to be unique
|
||||
* across admin and IO queue, so reserve the first 32
|
||||
* tags of the IO queue.
|
||||
*/
|
||||
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
|
||||
set->reserved_tags = NVME_AQ_DEPTH;
|
||||
|
||||
ret = blk_mq_alloc_tag_set(&dev->tagset);
|
||||
if (ret) {
|
||||
dev_warn(dev->ctrl.device,
|
||||
"IO queues tagset allocation failed %d\n", ret);
|
||||
return;
|
||||
}
|
||||
dev->ctrl.tagset = &dev->tagset;
|
||||
} else {
|
||||
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
|
||||
|
||||
/* Free previously allocated queues that are no longer usable */
|
||||
nvme_free_queues(dev, dev->online_queues);
|
||||
ret = blk_mq_alloc_tag_set(set);
|
||||
if (ret) {
|
||||
dev_warn(dev->ctrl.device,
|
||||
"IO queues tagset allocation failed %d\n", ret);
|
||||
return;
|
||||
}
|
||||
dev->ctrl.tagset = set;
|
||||
}
|
||||
|
||||
nvme_dbbuf_set(dev);
|
||||
static void nvme_pci_update_nr_queues(struct nvme_dev *dev)
|
||||
{
|
||||
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
|
||||
/* free previously allocated queues that are no longer usable */
|
||||
nvme_free_queues(dev, dev->online_queues);
|
||||
}
|
||||
|
||||
static int nvme_pci_enable(struct nvme_dev *dev)
|
||||
@ -2725,10 +2722,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||
nvme_pci_disable(dev);
|
||||
nvme_reap_pending_cqes(dev);
|
||||
|
||||
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
|
||||
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&dev->tagset);
|
||||
blk_mq_tagset_wait_completed_request(&dev->admin_tagset);
|
||||
nvme_cancel_tagset(&dev->ctrl);
|
||||
nvme_cancel_admin_tagset(&dev->ctrl);
|
||||
|
||||
/*
|
||||
* The driver will not be starting up queues again if shutting down so
|
||||
@ -2842,9 +2837,13 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
if (result)
|
||||
goto out_unlock;
|
||||
|
||||
result = nvme_alloc_admin_tags(dev);
|
||||
if (result)
|
||||
goto out_unlock;
|
||||
if (!dev->ctrl.admin_q) {
|
||||
result = nvme_pci_alloc_admin_tag_set(dev);
|
||||
if (result)
|
||||
goto out_unlock;
|
||||
} else {
|
||||
nvme_start_admin_queue(&dev->ctrl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Limit the max command size to prevent iod->sg allocations going
|
||||
@ -2923,7 +2922,11 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
} else {
|
||||
nvme_start_queues(&dev->ctrl);
|
||||
nvme_wait_freeze(&dev->ctrl);
|
||||
nvme_dev_add(dev);
|
||||
if (!dev->ctrl.tagset)
|
||||
nvme_pci_alloc_tag_set(dev);
|
||||
else
|
||||
nvme_pci_update_nr_queues(dev);
|
||||
nvme_dbbuf_set(dev);
|
||||
nvme_unfreeze(&dev->ctrl);
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include "fabrics.h"
|
||||
|
||||
|
||||
#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */
|
||||
#define NVME_RDMA_CM_TIMEOUT_MS 3000 /* 3 second */
|
||||
|
||||
#define NVME_RDMA_MAX_SEGMENTS 256
|
||||
|
||||
@ -248,12 +248,9 @@ static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = wait_for_completion_interruptible_timeout(&queue->cm_done,
|
||||
msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1);
|
||||
if (ret < 0)
|
||||
ret = wait_for_completion_interruptible(&queue->cm_done);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ret == 0)
|
||||
return -ETIMEDOUT;
|
||||
WARN_ON_ONCE(queue->cm_error > 0);
|
||||
return queue->cm_error;
|
||||
}
|
||||
@ -612,7 +609,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
queue->cm_error = -ETIMEDOUT;
|
||||
ret = rdma_resolve_addr(queue->cm_id, src_addr,
|
||||
(struct sockaddr *)&ctrl->addr,
|
||||
NVME_RDMA_CONNECT_TIMEOUT_MS);
|
||||
NVME_RDMA_CM_TIMEOUT_MS);
|
||||
if (ret) {
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"rdma_resolve_addr failed (%d).\n", ret);
|
||||
@ -790,50 +787,54 @@ out_free_queues:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
|
||||
bool admin)
|
||||
static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||
struct blk_mq_tag_set *set;
|
||||
struct blk_mq_tag_set *set = &ctrl->admin_tag_set;
|
||||
int ret;
|
||||
|
||||
if (admin) {
|
||||
set = &ctrl->admin_tag_set;
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_rdma_admin_mq_ops;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->cmd_size = sizeof(struct nvme_rdma_request) +
|
||||
NVME_RDMA_DATA_SGL_SIZE;
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = 1;
|
||||
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||
set->flags = BLK_MQ_F_NO_SCHED;
|
||||
} else {
|
||||
set = &ctrl->tag_set;
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_rdma_mq_ops;
|
||||
set->queue_depth = nctrl->sqsize + 1;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
set->cmd_size = sizeof(struct nvme_rdma_request) +
|
||||
NVME_RDMA_DATA_SGL_SIZE;
|
||||
if (nctrl->max_integrity_segments)
|
||||
set->cmd_size += sizeof(struct nvme_rdma_sgl) +
|
||||
NVME_RDMA_METADATA_SGL_SIZE;
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = nctrl->queue_count - 1;
|
||||
set->timeout = NVME_IO_TIMEOUT;
|
||||
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
||||
}
|
||||
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_rdma_admin_mq_ops;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->cmd_size = sizeof(struct nvme_rdma_request) +
|
||||
NVME_RDMA_DATA_SGL_SIZE;
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = 1;
|
||||
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||
set->flags = BLK_MQ_F_NO_SCHED;
|
||||
ret = blk_mq_alloc_tag_set(set);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
if (!ret)
|
||||
ctrl->ctrl.admin_tagset = set;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return set;
|
||||
static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||
struct blk_mq_tag_set *set = &ctrl->tag_set;
|
||||
int ret;
|
||||
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_rdma_mq_ops;
|
||||
set->queue_depth = nctrl->sqsize + 1;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
set->cmd_size = sizeof(struct nvme_rdma_request) +
|
||||
NVME_RDMA_DATA_SGL_SIZE;
|
||||
if (nctrl->max_integrity_segments)
|
||||
set->cmd_size += sizeof(struct nvme_rdma_sgl) +
|
||||
NVME_RDMA_METADATA_SGL_SIZE;
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = nctrl->queue_count - 1;
|
||||
set->timeout = NVME_IO_TIMEOUT;
|
||||
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
||||
ret = blk_mq_alloc_tag_set(set);
|
||||
if (!ret)
|
||||
ctrl->ctrl.tagset = set;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
@ -885,11 +886,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
goto out_free_queue;
|
||||
|
||||
if (new) {
|
||||
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
|
||||
if (IS_ERR(ctrl->ctrl.admin_tagset)) {
|
||||
error = PTR_ERR(ctrl->ctrl.admin_tagset);
|
||||
error = nvme_rdma_alloc_admin_tag_set(&ctrl->ctrl);
|
||||
if (error)
|
||||
goto out_free_async_qe;
|
||||
}
|
||||
|
||||
ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
|
||||
if (IS_ERR(ctrl->ctrl.fabrics_q)) {
|
||||
@ -972,11 +971,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
|
||||
return ret;
|
||||
|
||||
if (new) {
|
||||
ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
|
||||
if (IS_ERR(ctrl->ctrl.tagset)) {
|
||||
ret = PTR_ERR(ctrl->ctrl.tagset);
|
||||
ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl);
|
||||
if (ret)
|
||||
goto out_free_io_queues;
|
||||
}
|
||||
|
||||
ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
|
||||
if (ret)
|
||||
@ -1205,6 +1202,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
|
||||
struct nvme_rdma_ctrl *ctrl = container_of(work,
|
||||
struct nvme_rdma_ctrl, err_work);
|
||||
|
||||
nvme_auth_stop(&ctrl->ctrl);
|
||||
nvme_stop_keep_alive(&ctrl->ctrl);
|
||||
flush_work(&ctrl->ctrl.async_event_work);
|
||||
nvme_rdma_teardown_io_queues(ctrl, false);
|
||||
@ -1894,7 +1892,7 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
|
||||
|
||||
if (ctrl->opts->tos >= 0)
|
||||
rdma_set_service_type(queue->cm_id, ctrl->opts->tos);
|
||||
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
|
||||
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CM_TIMEOUT_MS);
|
||||
if (ret) {
|
||||
dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n",
|
||||
queue->cm_error);
|
||||
|
@ -209,9 +209,11 @@ static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
|
||||
return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
|
||||
}
|
||||
|
||||
static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
|
||||
static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req)
|
||||
{
|
||||
return queue->cmnd_capsule_len - sizeof(struct nvme_command);
|
||||
if (nvme_is_fabrics(req->req.cmd))
|
||||
return NVME_TCP_ADMIN_CCSZ;
|
||||
return req->queue->cmnd_capsule_len - sizeof(struct nvme_command);
|
||||
}
|
||||
|
||||
static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
|
||||
@ -229,7 +231,7 @@ static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
|
||||
rq = blk_mq_rq_from_pdu(req);
|
||||
|
||||
return rq_data_dir(rq) == WRITE && req->data_len &&
|
||||
req->data_len <= nvme_tcp_inline_data_size(req->queue);
|
||||
req->data_len <= nvme_tcp_inline_data_size(req);
|
||||
}
|
||||
|
||||
static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
|
||||
@ -1685,45 +1687,49 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
||||
bool admin)
|
||||
static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||
struct blk_mq_tag_set *set;
|
||||
struct blk_mq_tag_set *set = &ctrl->admin_tag_set;
|
||||
int ret;
|
||||
|
||||
if (admin) {
|
||||
set = &ctrl->admin_tag_set;
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_tcp_admin_mq_ops;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = 1;
|
||||
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||
} else {
|
||||
set = &ctrl->tag_set;
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_tcp_mq_ops;
|
||||
set->queue_depth = nctrl->sqsize + 1;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = nctrl->queue_count - 1;
|
||||
set->timeout = NVME_IO_TIMEOUT;
|
||||
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
||||
}
|
||||
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_tcp_admin_mq_ops;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = 1;
|
||||
set->timeout = NVME_ADMIN_TIMEOUT;
|
||||
ret = blk_mq_alloc_tag_set(set);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
if (!ret)
|
||||
nctrl->admin_tagset = set;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return set;
|
||||
static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||
struct blk_mq_tag_set *set = &ctrl->tag_set;
|
||||
int ret;
|
||||
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_tcp_mq_ops;
|
||||
set->queue_depth = nctrl->sqsize + 1;
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
set->driver_data = ctrl;
|
||||
set->nr_hw_queues = nctrl->queue_count - 1;
|
||||
set->timeout = NVME_IO_TIMEOUT;
|
||||
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
|
||||
ret = blk_mq_alloc_tag_set(set);
|
||||
if (!ret)
|
||||
nctrl->tagset = set;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
|
||||
@ -1899,11 +1905,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
|
||||
return ret;
|
||||
|
||||
if (new) {
|
||||
ctrl->tagset = nvme_tcp_alloc_tagset(ctrl, false);
|
||||
if (IS_ERR(ctrl->tagset)) {
|
||||
ret = PTR_ERR(ctrl->tagset);
|
||||
ret = nvme_tcp_alloc_tag_set(ctrl);
|
||||
if (ret)
|
||||
goto out_free_io_queues;
|
||||
}
|
||||
|
||||
ret = nvme_ctrl_init_connect_q(ctrl);
|
||||
if (ret)
|
||||
@ -1968,11 +1972,9 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
|
||||
return error;
|
||||
|
||||
if (new) {
|
||||
ctrl->admin_tagset = nvme_tcp_alloc_tagset(ctrl, true);
|
||||
if (IS_ERR(ctrl->admin_tagset)) {
|
||||
error = PTR_ERR(ctrl->admin_tagset);
|
||||
error = nvme_tcp_alloc_admin_tag_set(ctrl);
|
||||
if (error)
|
||||
goto out_free_queue;
|
||||
}
|
||||
|
||||
ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
|
||||
if (IS_ERR(ctrl->fabrics_q)) {
|
||||
@ -2173,6 +2175,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
|
||||
struct nvme_tcp_ctrl, err_work);
|
||||
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
|
||||
|
||||
nvme_auth_stop(ctrl);
|
||||
nvme_stop_keep_alive(ctrl);
|
||||
flush_work(&ctrl->async_event_work);
|
||||
nvme_tcp_teardown_io_queues(ctrl, false);
|
||||
@ -2371,7 +2374,7 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
|
||||
if (!blk_rq_nr_phys_segments(rq))
|
||||
nvme_tcp_set_sg_null(c);
|
||||
else if (rq_data_dir(rq) == WRITE &&
|
||||
req->data_len <= nvme_tcp_inline_data_size(queue))
|
||||
req->data_len <= nvme_tcp_inline_data_size(req))
|
||||
nvme_tcp_set_sg_inline(queue, c, req->data_len);
|
||||
else
|
||||
nvme_tcp_set_sg_host_data(c, req->data_len);
|
||||
@ -2406,7 +2409,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
|
||||
nvme_tcp_init_iter(req, rq_data_dir(rq));
|
||||
|
||||
if (rq_data_dir(rq) == WRITE &&
|
||||
req->data_len <= nvme_tcp_inline_data_size(queue))
|
||||
req->data_len <= nvme_tcp_inline_data_size(req))
|
||||
req->pdu_len = req->data_len;
|
||||
|
||||
pdu->hdr.type = nvme_tcp_cmd;
|
||||
|
@ -287,6 +287,34 @@ static const char *nvme_trace_fabrics_property_get(struct trace_seq *p, u8 *spc)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_fabrics_auth_send(struct trace_seq *p, u8 *spc)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 spsp0 = spc[1];
|
||||
u8 spsp1 = spc[2];
|
||||
u8 secp = spc[3];
|
||||
u32 tl = get_unaligned_le32(spc + 4);
|
||||
|
||||
trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, tl=%u",
|
||||
spsp0, spsp1, secp, tl);
|
||||
trace_seq_putc(p, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_fabrics_auth_receive(struct trace_seq *p, u8 *spc)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 spsp0 = spc[1];
|
||||
u8 spsp1 = spc[2];
|
||||
u8 secp = spc[3];
|
||||
u32 al = get_unaligned_le32(spc + 4);
|
||||
|
||||
trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, al=%u",
|
||||
spsp0, spsp1, secp, al);
|
||||
trace_seq_putc(p, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
@ -306,6 +334,10 @@ const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p,
|
||||
return nvme_trace_fabrics_connect(p, spc);
|
||||
case nvme_fabrics_type_property_get:
|
||||
return nvme_trace_fabrics_property_get(p, spc);
|
||||
case nvme_fabrics_type_auth_send:
|
||||
return nvme_trace_fabrics_auth_send(p, spc);
|
||||
case nvme_fabrics_type_auth_receive:
|
||||
return nvme_trace_fabrics_auth_receive(p, spc);
|
||||
default:
|
||||
return nvme_trace_fabrics_common(p, spc);
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq,
|
||||
TP_fast_assign(
|
||||
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
|
||||
__entry->qid = nvme_req_qid(req);
|
||||
__entry->cid = req->tag;
|
||||
__entry->cid = nvme_req(req)->cmd->common.command_id;
|
||||
__entry->result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
__entry->retries = nvme_req(req)->retries;
|
||||
__entry->flags = nvme_req(req)->flags;
|
||||
|
@ -83,3 +83,18 @@ config NVME_TARGET_TCP
|
||||
devices over TCP.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config NVME_TARGET_AUTH
|
||||
bool "NVMe over Fabrics In-band Authentication support"
|
||||
depends on NVME_TARGET
|
||||
select NVME_COMMON
|
||||
select CRYPTO
|
||||
select CRYPTO_HMAC
|
||||
select CRYPTO_SHA256
|
||||
select CRYPTO_SHA512
|
||||
select CRYPTO_DH
|
||||
select CRYPTO_DH_RFC7919_GROUPS
|
||||
help
|
||||
This enables support for NVMe over Fabrics In-band Authentication
|
||||
|
||||
If unsure, say N.
|
||||
|
@ -13,6 +13,7 @@ nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
|
||||
discovery.o io-cmd-file.o io-cmd-bdev.o
|
||||
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
|
||||
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
||||
nvmet-$(CONFIG_NVME_TARGET_AUTH) += fabrics-cmd-auth.o auth.o
|
||||
nvme-loop-y += loop.o
|
||||
nvmet-rdma-y += rdma.o
|
||||
nvmet-fc-y += fc.o
|
||||
|
@ -1017,7 +1017,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
|
||||
u16 ret;
|
||||
|
||||
if (nvme_is_fabrics(cmd))
|
||||
return nvmet_parse_fabrics_cmd(req);
|
||||
return nvmet_parse_fabrics_admin_cmd(req);
|
||||
if (unlikely(!nvmet_check_auth_status(req)))
|
||||
return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
|
||||
if (nvmet_is_disc_subsys(nvmet_req_subsys(req)))
|
||||
return nvmet_parse_discovery_cmd(req);
|
||||
|
||||
|
525
drivers/nvme/target/auth.c
Normal file
525
drivers/nvme/target/auth.c
Normal file
@ -0,0 +1,525 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NVMe over Fabrics DH-HMAC-CHAP authentication.
|
||||
* Copyright (c) 2020 Hannes Reinecke, SUSE Software Solutions.
|
||||
* All rights reserved.
|
||||
*/
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/base64.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#include "nvmet.h"
|
||||
|
||||
int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
|
||||
bool set_ctrl)
|
||||
{
|
||||
unsigned char key_hash;
|
||||
char *dhchap_secret;
|
||||
|
||||
if (sscanf(secret, "DHHC-1:%hhd:%*s", &key_hash) != 1)
|
||||
return -EINVAL;
|
||||
if (key_hash > 3) {
|
||||
pr_warn("Invalid DH-HMAC-CHAP hash id %d\n",
|
||||
key_hash);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (key_hash > 0) {
|
||||
/* Validate selected hash algorithm */
|
||||
const char *hmac = nvme_auth_hmac_name(key_hash);
|
||||
|
||||
if (!crypto_has_shash(hmac, 0, 0)) {
|
||||
pr_err("DH-HMAC-CHAP hash %s unsupported\n", hmac);
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
}
|
||||
dhchap_secret = kstrdup(secret, GFP_KERNEL);
|
||||
if (!dhchap_secret)
|
||||
return -ENOMEM;
|
||||
if (set_ctrl) {
|
||||
host->dhchap_ctrl_secret = strim(dhchap_secret);
|
||||
host->dhchap_ctrl_key_hash = key_hash;
|
||||
} else {
|
||||
host->dhchap_secret = strim(dhchap_secret);
|
||||
host->dhchap_key_hash = key_hash;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
|
||||
{
|
||||
const char *dhgroup_kpp;
|
||||
int ret = 0;
|
||||
|
||||
pr_debug("%s: ctrl %d selecting dhgroup %d\n",
|
||||
__func__, ctrl->cntlid, dhgroup_id);
|
||||
|
||||
if (ctrl->dh_tfm) {
|
||||
if (ctrl->dh_gid == dhgroup_id) {
|
||||
pr_debug("%s: ctrl %d reuse existing DH group %d\n",
|
||||
__func__, ctrl->cntlid, dhgroup_id);
|
||||
return 0;
|
||||
}
|
||||
crypto_free_kpp(ctrl->dh_tfm);
|
||||
ctrl->dh_tfm = NULL;
|
||||
ctrl->dh_gid = 0;
|
||||
}
|
||||
|
||||
if (dhgroup_id == NVME_AUTH_DHGROUP_NULL)
|
||||
return 0;
|
||||
|
||||
dhgroup_kpp = nvme_auth_dhgroup_kpp(dhgroup_id);
|
||||
if (!dhgroup_kpp) {
|
||||
pr_debug("%s: ctrl %d invalid DH group %d\n",
|
||||
__func__, ctrl->cntlid, dhgroup_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctrl->dh_tfm = crypto_alloc_kpp(dhgroup_kpp, 0, 0);
|
||||
if (IS_ERR(ctrl->dh_tfm)) {
|
||||
pr_debug("%s: ctrl %d failed to setup DH group %d, err %ld\n",
|
||||
__func__, ctrl->cntlid, dhgroup_id,
|
||||
PTR_ERR(ctrl->dh_tfm));
|
||||
ret = PTR_ERR(ctrl->dh_tfm);
|
||||
ctrl->dh_tfm = NULL;
|
||||
ctrl->dh_gid = 0;
|
||||
} else {
|
||||
ctrl->dh_gid = dhgroup_id;
|
||||
pr_debug("%s: ctrl %d setup DH group %d\n",
|
||||
__func__, ctrl->cntlid, ctrl->dh_gid);
|
||||
ret = nvme_auth_gen_privkey(ctrl->dh_tfm, ctrl->dh_gid);
|
||||
if (ret < 0) {
|
||||
pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
|
||||
__func__, ctrl->cntlid, ret);
|
||||
kfree_sensitive(ctrl->dh_key);
|
||||
return ret;
|
||||
}
|
||||
ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);
|
||||
kfree_sensitive(ctrl->dh_key);
|
||||
ctrl->dh_key = kzalloc(ctrl->dh_keysize, GFP_KERNEL);
|
||||
if (!ctrl->dh_key) {
|
||||
pr_warn("ctrl %d failed to allocate public key\n",
|
||||
ctrl->cntlid);
|
||||
return -ENOMEM;
|
||||
}
|
||||
ret = nvme_auth_gen_pubkey(ctrl->dh_tfm, ctrl->dh_key,
|
||||
ctrl->dh_keysize);
|
||||
if (ret < 0) {
|
||||
pr_warn("ctrl %d failed to generate public key\n",
|
||||
ctrl->cntlid);
|
||||
kfree(ctrl->dh_key);
|
||||
ctrl->dh_key = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nvmet_host_link *p;
|
||||
struct nvmet_host *host = NULL;
|
||||
const char *hash_name;
|
||||
|
||||
down_read(&nvmet_config_sem);
|
||||
if (nvmet_is_disc_subsys(ctrl->subsys))
|
||||
goto out_unlock;
|
||||
|
||||
if (ctrl->subsys->allow_any_host)
|
||||
goto out_unlock;
|
||||
|
||||
list_for_each_entry(p, &ctrl->subsys->hosts, entry) {
|
||||
pr_debug("check %s\n", nvmet_host_name(p->host));
|
||||
if (strcmp(nvmet_host_name(p->host), ctrl->hostnqn))
|
||||
continue;
|
||||
host = p->host;
|
||||
break;
|
||||
}
|
||||
if (!host) {
|
||||
pr_debug("host %s not found\n", ctrl->hostnqn);
|
||||
ret = -EPERM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = nvmet_setup_dhgroup(ctrl, host->dhchap_dhgroup_id);
|
||||
if (ret < 0)
|
||||
pr_warn("Failed to setup DH group");
|
||||
|
||||
if (!host->dhchap_secret) {
|
||||
pr_debug("No authentication provided\n");
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (host->dhchap_hash_id == ctrl->shash_id) {
|
||||
pr_debug("Re-use existing hash ID %d\n",
|
||||
ctrl->shash_id);
|
||||
} else {
|
||||
hash_name = nvme_auth_hmac_name(host->dhchap_hash_id);
|
||||
if (!hash_name) {
|
||||
pr_warn("Hash ID %d invalid\n", host->dhchap_hash_id);
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
ctrl->shash_id = host->dhchap_hash_id;
|
||||
}
|
||||
|
||||
/* Skip the 'DHHC-1:XX:' prefix */
|
||||
nvme_auth_free_key(ctrl->host_key);
|
||||
ctrl->host_key = nvme_auth_extract_key(host->dhchap_secret + 10,
|
||||
host->dhchap_key_hash);
|
||||
if (IS_ERR(ctrl->host_key)) {
|
||||
ret = PTR_ERR(ctrl->host_key);
|
||||
ctrl->host_key = NULL;
|
||||
goto out_free_hash;
|
||||
}
|
||||
pr_debug("%s: using hash %s key %*ph\n", __func__,
|
||||
ctrl->host_key->hash > 0 ?
|
||||
nvme_auth_hmac_name(ctrl->host_key->hash) : "none",
|
||||
(int)ctrl->host_key->len, ctrl->host_key->key);
|
||||
|
||||
nvme_auth_free_key(ctrl->ctrl_key);
|
||||
if (!host->dhchap_ctrl_secret) {
|
||||
ctrl->ctrl_key = NULL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ctrl->ctrl_key = nvme_auth_extract_key(host->dhchap_ctrl_secret + 10,
|
||||
host->dhchap_ctrl_key_hash);
|
||||
if (IS_ERR(ctrl->ctrl_key)) {
|
||||
ret = PTR_ERR(ctrl->ctrl_key);
|
||||
ctrl->ctrl_key = NULL;
|
||||
}
|
||||
pr_debug("%s: using ctrl hash %s key %*ph\n", __func__,
|
||||
ctrl->ctrl_key->hash > 0 ?
|
||||
nvme_auth_hmac_name(ctrl->ctrl_key->hash) : "none",
|
||||
(int)ctrl->ctrl_key->len, ctrl->ctrl_key->key);
|
||||
|
||||
out_free_hash:
|
||||
if (ret) {
|
||||
if (ctrl->host_key) {
|
||||
nvme_auth_free_key(ctrl->host_key);
|
||||
ctrl->host_key = NULL;
|
||||
}
|
||||
ctrl->shash_id = 0;
|
||||
}
|
||||
out_unlock:
|
||||
up_read(&nvmet_config_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void nvmet_auth_sq_free(struct nvmet_sq *sq)
|
||||
{
|
||||
cancel_delayed_work(&sq->auth_expired_work);
|
||||
kfree(sq->dhchap_c1);
|
||||
sq->dhchap_c1 = NULL;
|
||||
kfree(sq->dhchap_c2);
|
||||
sq->dhchap_c2 = NULL;
|
||||
kfree(sq->dhchap_skey);
|
||||
sq->dhchap_skey = NULL;
|
||||
}
|
||||
|
||||
void nvmet_destroy_auth(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
ctrl->shash_id = 0;
|
||||
|
||||
if (ctrl->dh_tfm) {
|
||||
crypto_free_kpp(ctrl->dh_tfm);
|
||||
ctrl->dh_tfm = NULL;
|
||||
ctrl->dh_gid = 0;
|
||||
}
|
||||
kfree_sensitive(ctrl->dh_key);
|
||||
ctrl->dh_key = NULL;
|
||||
|
||||
if (ctrl->host_key) {
|
||||
nvme_auth_free_key(ctrl->host_key);
|
||||
ctrl->host_key = NULL;
|
||||
}
|
||||
if (ctrl->ctrl_key) {
|
||||
nvme_auth_free_key(ctrl->ctrl_key);
|
||||
ctrl->ctrl_key = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool nvmet_check_auth_status(struct nvmet_req *req)
|
||||
{
|
||||
if (req->sq->ctrl->host_key &&
|
||||
!req->sq->authenticated)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
|
||||
unsigned int shash_len)
|
||||
{
|
||||
struct crypto_shash *shash_tfm;
|
||||
struct shash_desc *shash;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
const char *hash_name;
|
||||
u8 *challenge = req->sq->dhchap_c1, *host_response;
|
||||
u8 buf[4];
|
||||
int ret;
|
||||
|
||||
hash_name = nvme_auth_hmac_name(ctrl->shash_id);
|
||||
if (!hash_name) {
|
||||
pr_warn("Hash ID %d invalid\n", ctrl->shash_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
shash_tfm = crypto_alloc_shash(hash_name, 0, 0);
|
||||
if (IS_ERR(shash_tfm)) {
|
||||
pr_err("failed to allocate shash %s\n", hash_name);
|
||||
return PTR_ERR(shash_tfm);
|
||||
}
|
||||
|
||||
if (shash_len != crypto_shash_digestsize(shash_tfm)) {
|
||||
pr_debug("%s: hash len mismatch (len %d digest %d)\n",
|
||||
__func__, shash_len,
|
||||
crypto_shash_digestsize(shash_tfm));
|
||||
ret = -EINVAL;
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
host_response = nvme_auth_transform_key(ctrl->host_key, ctrl->hostnqn);
|
||||
if (IS_ERR(host_response)) {
|
||||
ret = PTR_ERR(host_response);
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
ret = crypto_shash_setkey(shash_tfm, host_response,
|
||||
ctrl->host_key->len);
|
||||
if (ret)
|
||||
goto out_free_response;
|
||||
|
||||
if (ctrl->dh_gid != NVME_AUTH_DHGROUP_NULL) {
|
||||
challenge = kmalloc(shash_len, GFP_KERNEL);
|
||||
if (!challenge) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_response;
|
||||
}
|
||||
ret = nvme_auth_augmented_challenge(ctrl->shash_id,
|
||||
req->sq->dhchap_skey,
|
||||
req->sq->dhchap_skey_len,
|
||||
req->sq->dhchap_c1,
|
||||
challenge, shash_len);
|
||||
if (ret)
|
||||
goto out_free_response;
|
||||
}
|
||||
|
||||
pr_debug("ctrl %d qid %d host response seq %u transaction %d\n",
|
||||
ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1,
|
||||
req->sq->dhchap_tid);
|
||||
|
||||
shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
|
||||
GFP_KERNEL);
|
||||
if (!shash) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_response;
|
||||
}
|
||||
shash->tfm = shash_tfm;
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, challenge, shash_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
put_unaligned_le32(req->sq->dhchap_s1, buf);
|
||||
ret = crypto_shash_update(shash, buf, 4);
|
||||
if (ret)
|
||||
goto out;
|
||||
put_unaligned_le16(req->sq->dhchap_tid, buf);
|
||||
ret = crypto_shash_update(shash, buf, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
memset(buf, 0, 4);
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, "HostHost", 8);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->subsysnqn,
|
||||
strlen(ctrl->subsysnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_final(shash, response);
|
||||
out:
|
||||
if (challenge != req->sq->dhchap_c1)
|
||||
kfree(challenge);
|
||||
kfree(shash);
|
||||
out_free_response:
|
||||
kfree_sensitive(host_response);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(shash_tfm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
|
||||
unsigned int shash_len)
|
||||
{
|
||||
struct crypto_shash *shash_tfm;
|
||||
struct shash_desc *shash;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
const char *hash_name;
|
||||
u8 *challenge = req->sq->dhchap_c2, *ctrl_response;
|
||||
u8 buf[4];
|
||||
int ret;
|
||||
|
||||
hash_name = nvme_auth_hmac_name(ctrl->shash_id);
|
||||
if (!hash_name) {
|
||||
pr_warn("Hash ID %d invalid\n", ctrl->shash_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
shash_tfm = crypto_alloc_shash(hash_name, 0, 0);
|
||||
if (IS_ERR(shash_tfm)) {
|
||||
pr_err("failed to allocate shash %s\n", hash_name);
|
||||
return PTR_ERR(shash_tfm);
|
||||
}
|
||||
|
||||
if (shash_len != crypto_shash_digestsize(shash_tfm)) {
|
||||
pr_debug("%s: hash len mismatch (len %d digest %d)\n",
|
||||
__func__, shash_len,
|
||||
crypto_shash_digestsize(shash_tfm));
|
||||
ret = -EINVAL;
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
ctrl_response = nvme_auth_transform_key(ctrl->ctrl_key,
|
||||
ctrl->subsysnqn);
|
||||
if (IS_ERR(ctrl_response)) {
|
||||
ret = PTR_ERR(ctrl_response);
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
ret = crypto_shash_setkey(shash_tfm, ctrl_response,
|
||||
ctrl->ctrl_key->len);
|
||||
if (ret)
|
||||
goto out_free_response;
|
||||
|
||||
if (ctrl->dh_gid != NVME_AUTH_DHGROUP_NULL) {
|
||||
challenge = kmalloc(shash_len, GFP_KERNEL);
|
||||
if (!challenge) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_response;
|
||||
}
|
||||
ret = nvme_auth_augmented_challenge(ctrl->shash_id,
|
||||
req->sq->dhchap_skey,
|
||||
req->sq->dhchap_skey_len,
|
||||
req->sq->dhchap_c2,
|
||||
challenge, shash_len);
|
||||
if (ret)
|
||||
goto out_free_response;
|
||||
}
|
||||
|
||||
shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
|
||||
GFP_KERNEL);
|
||||
if (!shash) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_response;
|
||||
}
|
||||
shash->tfm = shash_tfm;
|
||||
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, challenge, shash_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
put_unaligned_le32(req->sq->dhchap_s2, buf);
|
||||
ret = crypto_shash_update(shash, buf, 4);
|
||||
if (ret)
|
||||
goto out;
|
||||
put_unaligned_le16(req->sq->dhchap_tid, buf);
|
||||
ret = crypto_shash_update(shash, buf, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
memset(buf, 0, 4);
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, "Controller", 10);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->subsysnqn,
|
||||
strlen(ctrl->subsysnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_final(shash, response);
|
||||
out:
|
||||
if (challenge != req->sq->dhchap_c2)
|
||||
kfree(challenge);
|
||||
kfree(shash);
|
||||
out_free_response:
|
||||
kfree_sensitive(ctrl_response);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(shash_tfm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
|
||||
u8 *buf, int buf_size)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
int ret = 0;
|
||||
|
||||
if (!ctrl->dh_key) {
|
||||
pr_warn("ctrl %d no DH public key!\n", ctrl->cntlid);
|
||||
return -ENOKEY;
|
||||
}
|
||||
if (buf_size != ctrl->dh_keysize) {
|
||||
pr_warn("ctrl %d DH public key size mismatch, need %zu is %d\n",
|
||||
ctrl->cntlid, ctrl->dh_keysize, buf_size);
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
memcpy(buf, ctrl->dh_key, buf_size);
|
||||
pr_debug("%s: ctrl %d public key %*ph\n", __func__,
|
||||
ctrl->cntlid, (int)buf_size, buf);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
||||
u8 *pkey, int pkey_size)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
int ret;
|
||||
|
||||
req->sq->dhchap_skey_len = ctrl->dh_keysize;
|
||||
req->sq->dhchap_skey = kzalloc(req->sq->dhchap_skey_len, GFP_KERNEL);
|
||||
if (!req->sq->dhchap_skey)
|
||||
return -ENOMEM;
|
||||
ret = nvme_auth_gen_shared_secret(ctrl->dh_tfm,
|
||||
pkey, pkey_size,
|
||||
req->sq->dhchap_skey,
|
||||
req->sq->dhchap_skey_len);
|
||||
if (ret)
|
||||
pr_debug("failed to compute shared secret, err %d\n", ret);
|
||||
else
|
||||
pr_debug("%s: shared secret %*ph\n", __func__,
|
||||
(int)req->sq->dhchap_skey_len,
|
||||
req->sq->dhchap_skey);
|
||||
|
||||
return ret;
|
||||
}
|
@ -11,6 +11,11 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
#include <linux/nvme-auth.h>
|
||||
#endif
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/kpp.h>
|
||||
|
||||
#include "nvmet.h"
|
||||
|
||||
@ -1680,10 +1685,133 @@ static const struct config_item_type nvmet_ports_type = {
|
||||
static struct config_group nvmet_subsystems_group;
|
||||
static struct config_group nvmet_ports_group;
|
||||
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
static ssize_t nvmet_host_dhchap_key_show(struct config_item *item,
|
||||
char *page)
|
||||
{
|
||||
u8 *dhchap_secret = to_host(item)->dhchap_secret;
|
||||
|
||||
if (!dhchap_secret)
|
||||
return sprintf(page, "\n");
|
||||
return sprintf(page, "%s\n", dhchap_secret);
|
||||
}
|
||||
|
||||
static ssize_t nvmet_host_dhchap_key_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct nvmet_host *host = to_host(item);
|
||||
int ret;
|
||||
|
||||
ret = nvmet_auth_set_key(host, page, false);
|
||||
/*
|
||||
* Re-authentication is a soft state, so keep the
|
||||
* current authentication valid until the host
|
||||
* requests re-authentication.
|
||||
*/
|
||||
return ret < 0 ? ret : count;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR(nvmet_host_, dhchap_key);
|
||||
|
||||
static ssize_t nvmet_host_dhchap_ctrl_key_show(struct config_item *item,
|
||||
char *page)
|
||||
{
|
||||
u8 *dhchap_secret = to_host(item)->dhchap_ctrl_secret;
|
||||
|
||||
if (!dhchap_secret)
|
||||
return sprintf(page, "\n");
|
||||
return sprintf(page, "%s\n", dhchap_secret);
|
||||
}
|
||||
|
||||
static ssize_t nvmet_host_dhchap_ctrl_key_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct nvmet_host *host = to_host(item);
|
||||
int ret;
|
||||
|
||||
ret = nvmet_auth_set_key(host, page, true);
|
||||
/*
|
||||
* Re-authentication is a soft state, so keep the
|
||||
* current authentication valid until the host
|
||||
* requests re-authentication.
|
||||
*/
|
||||
return ret < 0 ? ret : count;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR(nvmet_host_, dhchap_ctrl_key);
|
||||
|
||||
static ssize_t nvmet_host_dhchap_hash_show(struct config_item *item,
|
||||
char *page)
|
||||
{
|
||||
struct nvmet_host *host = to_host(item);
|
||||
const char *hash_name = nvme_auth_hmac_name(host->dhchap_hash_id);
|
||||
|
||||
return sprintf(page, "%s\n", hash_name ? hash_name : "none");
|
||||
}
|
||||
|
||||
static ssize_t nvmet_host_dhchap_hash_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct nvmet_host *host = to_host(item);
|
||||
u8 hmac_id;
|
||||
|
||||
hmac_id = nvme_auth_hmac_id(page);
|
||||
if (hmac_id == NVME_AUTH_HASH_INVALID)
|
||||
return -EINVAL;
|
||||
if (!crypto_has_shash(nvme_auth_hmac_name(hmac_id), 0, 0))
|
||||
return -ENOTSUPP;
|
||||
host->dhchap_hash_id = hmac_id;
|
||||
return count;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR(nvmet_host_, dhchap_hash);
|
||||
|
||||
static ssize_t nvmet_host_dhchap_dhgroup_show(struct config_item *item,
|
||||
char *page)
|
||||
{
|
||||
struct nvmet_host *host = to_host(item);
|
||||
const char *dhgroup = nvme_auth_dhgroup_name(host->dhchap_dhgroup_id);
|
||||
|
||||
return sprintf(page, "%s\n", dhgroup ? dhgroup : "none");
|
||||
}
|
||||
|
||||
static ssize_t nvmet_host_dhchap_dhgroup_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct nvmet_host *host = to_host(item);
|
||||
int dhgroup_id;
|
||||
|
||||
dhgroup_id = nvme_auth_dhgroup_id(page);
|
||||
if (dhgroup_id == NVME_AUTH_DHGROUP_INVALID)
|
||||
return -EINVAL;
|
||||
if (dhgroup_id != NVME_AUTH_DHGROUP_NULL) {
|
||||
const char *kpp = nvme_auth_dhgroup_kpp(dhgroup_id);
|
||||
|
||||
if (!crypto_has_kpp(kpp, 0, 0))
|
||||
return -EINVAL;
|
||||
}
|
||||
host->dhchap_dhgroup_id = dhgroup_id;
|
||||
return count;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR(nvmet_host_, dhchap_dhgroup);
|
||||
|
||||
static struct configfs_attribute *nvmet_host_attrs[] = {
|
||||
&nvmet_host_attr_dhchap_key,
|
||||
&nvmet_host_attr_dhchap_ctrl_key,
|
||||
&nvmet_host_attr_dhchap_hash,
|
||||
&nvmet_host_attr_dhchap_dhgroup,
|
||||
NULL,
|
||||
};
|
||||
#endif /* CONFIG_NVME_TARGET_AUTH */
|
||||
|
||||
static void nvmet_host_release(struct config_item *item)
|
||||
{
|
||||
struct nvmet_host *host = to_host(item);
|
||||
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
kfree(host->dhchap_secret);
|
||||
#endif
|
||||
kfree(host);
|
||||
}
|
||||
|
||||
@ -1693,6 +1821,9 @@ static struct configfs_item_operations nvmet_host_item_ops = {
|
||||
|
||||
static const struct config_item_type nvmet_host_type = {
|
||||
.ct_item_ops = &nvmet_host_item_ops,
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
.ct_attrs = nvmet_host_attrs,
|
||||
#endif
|
||||
.ct_owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
@ -1705,6 +1836,11 @@ static struct config_group *nvmet_hosts_make_group(struct config_group *group,
|
||||
if (!host)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
/* Default to SHA256 */
|
||||
host->dhchap_hash_id = NVME_AUTH_HASH_SHA256;
|
||||
#endif
|
||||
|
||||
config_group_init_type_name(&host->group, name, &nvmet_host_type);
|
||||
|
||||
return &host->group;
|
||||
|
@ -795,6 +795,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
|
||||
wait_for_completion(&sq->confirm_done);
|
||||
wait_for_completion(&sq->free_done);
|
||||
percpu_ref_exit(&sq->ref);
|
||||
nvmet_auth_sq_free(sq);
|
||||
|
||||
if (ctrl) {
|
||||
/*
|
||||
@ -865,8 +866,15 @@ static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
|
||||
|
||||
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_command *cmd = req->cmd;
|
||||
u16 ret;
|
||||
|
||||
if (nvme_is_fabrics(cmd))
|
||||
return nvmet_parse_fabrics_io_cmd(req);
|
||||
|
||||
if (unlikely(!nvmet_check_auth_status(req)))
|
||||
return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
|
||||
|
||||
ret = nvmet_check_ctrl_status(req);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
@ -1271,6 +1279,11 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req)
|
||||
req->cmd->common.opcode, req->sq->qid);
|
||||
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
|
||||
}
|
||||
|
||||
if (unlikely(!nvmet_check_auth_status(req))) {
|
||||
pr_warn("qid %d not authenticated\n", req->sq->qid);
|
||||
return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1467,6 +1480,8 @@ static void nvmet_ctrl_free(struct kref *ref)
|
||||
flush_work(&ctrl->async_event_work);
|
||||
cancel_work_sync(&ctrl->fatal_err_work);
|
||||
|
||||
nvmet_destroy_auth(ctrl);
|
||||
|
||||
ida_free(&cntlid_ida, ctrl->cntlid);
|
||||
|
||||
nvmet_async_events_free(ctrl);
|
||||
|
544
drivers/nvme/target/fabrics-cmd-auth.c
Normal file
544
drivers/nvme/target/fabrics-cmd-auth.c
Normal file
@ -0,0 +1,544 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NVMe over Fabrics DH-HMAC-CHAP authentication command handling.
|
||||
* Copyright (c) 2020 Hannes Reinecke, SUSE Software Solutions.
|
||||
* All rights reserved.
|
||||
*/
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include "nvmet.h"
|
||||
|
||||
static void nvmet_auth_expired_work(struct work_struct *work)
|
||||
{
|
||||
struct nvmet_sq *sq = container_of(to_delayed_work(work),
|
||||
struct nvmet_sq, auth_expired_work);
|
||||
|
||||
pr_debug("%s: ctrl %d qid %d transaction %u expired, resetting\n",
|
||||
__func__, sq->ctrl->cntlid, sq->qid, sq->dhchap_tid);
|
||||
sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
|
||||
sq->dhchap_tid = -1;
|
||||
}
|
||||
|
||||
void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
|
||||
{
|
||||
u32 result = le32_to_cpu(req->cqe->result.u32);
|
||||
|
||||
/* Initialize in-band authentication */
|
||||
INIT_DELAYED_WORK(&req->sq->auth_expired_work,
|
||||
nvmet_auth_expired_work);
|
||||
req->sq->authenticated = false;
|
||||
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
|
||||
result |= (u32)NVME_CONNECT_AUTHREQ_ATR << 16;
|
||||
req->cqe->result.u32 = cpu_to_le32(result);
|
||||
}
|
||||
|
||||
static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
struct nvmf_auth_dhchap_negotiate_data *data = d;
|
||||
int i, hash_id = 0, fallback_hash_id = 0, dhgid, fallback_dhgid;
|
||||
|
||||
pr_debug("%s: ctrl %d qid %d: data sc_d %d napd %d authid %d halen %d dhlen %d\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
data->sc_c, data->napd, data->auth_protocol[0].dhchap.authid,
|
||||
data->auth_protocol[0].dhchap.halen,
|
||||
data->auth_protocol[0].dhchap.dhlen);
|
||||
req->sq->dhchap_tid = le16_to_cpu(data->t_id);
|
||||
if (data->sc_c)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||
|
||||
if (data->napd != 1)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
|
||||
if (data->auth_protocol[0].dhchap.authid !=
|
||||
NVME_AUTH_DHCHAP_AUTH_ID)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||
|
||||
for (i = 0; i < data->auth_protocol[0].dhchap.halen; i++) {
|
||||
u8 host_hmac_id = data->auth_protocol[0].dhchap.idlist[i];
|
||||
|
||||
if (!fallback_hash_id &&
|
||||
crypto_has_shash(nvme_auth_hmac_name(host_hmac_id), 0, 0))
|
||||
fallback_hash_id = host_hmac_id;
|
||||
if (ctrl->shash_id != host_hmac_id)
|
||||
continue;
|
||||
hash_id = ctrl->shash_id;
|
||||
break;
|
||||
}
|
||||
if (hash_id == 0) {
|
||||
if (fallback_hash_id == 0) {
|
||||
pr_debug("%s: ctrl %d qid %d: no usable hash found\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
}
|
||||
pr_debug("%s: ctrl %d qid %d: no usable hash found, falling back to %s\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
nvme_auth_hmac_name(fallback_hash_id));
|
||||
ctrl->shash_id = fallback_hash_id;
|
||||
}
|
||||
|
||||
dhgid = -1;
|
||||
fallback_dhgid = -1;
|
||||
for (i = 0; i < data->auth_protocol[0].dhchap.dhlen; i++) {
|
||||
int tmp_dhgid = data->auth_protocol[0].dhchap.idlist[i + 30];
|
||||
|
||||
if (tmp_dhgid != ctrl->dh_gid) {
|
||||
dhgid = tmp_dhgid;
|
||||
break;
|
||||
}
|
||||
if (fallback_dhgid < 0) {
|
||||
const char *kpp = nvme_auth_dhgroup_kpp(tmp_dhgid);
|
||||
|
||||
if (crypto_has_kpp(kpp, 0, 0))
|
||||
fallback_dhgid = tmp_dhgid;
|
||||
}
|
||||
}
|
||||
if (dhgid < 0) {
|
||||
if (fallback_dhgid < 0) {
|
||||
pr_debug("%s: ctrl %d qid %d: no usable DH group found\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
|
||||
}
|
||||
pr_debug("%s: ctrl %d qid %d: configured DH group %s not found\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
nvme_auth_dhgroup_name(fallback_dhgid));
|
||||
ctrl->dh_gid = fallback_dhgid;
|
||||
}
|
||||
pr_debug("%s: ctrl %d qid %d: selected DH group %s (%d)\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
nvme_auth_dhgroup_name(ctrl->dh_gid), ctrl->dh_gid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u16 nvmet_auth_reply(struct nvmet_req *req, void *d)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
struct nvmf_auth_dhchap_reply_data *data = d;
|
||||
u16 dhvlen = le16_to_cpu(data->dhvlen);
|
||||
u8 *response;
|
||||
|
||||
pr_debug("%s: ctrl %d qid %d: data hl %d cvalid %d dhvlen %u\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
data->hl, data->cvalid, dhvlen);
|
||||
|
||||
if (dhvlen) {
|
||||
if (!ctrl->dh_tfm)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||
if (nvmet_auth_ctrl_sesskey(req, data->rval + 2 * data->hl,
|
||||
dhvlen) < 0)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
|
||||
}
|
||||
|
||||
response = kmalloc(data->hl, GFP_KERNEL);
|
||||
if (!response)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
|
||||
if (!ctrl->host_key) {
|
||||
pr_warn("ctrl %d qid %d no host key\n",
|
||||
ctrl->cntlid, req->sq->qid);
|
||||
kfree(response);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
}
|
||||
if (nvmet_auth_host_hash(req, response, data->hl) < 0) {
|
||||
pr_debug("ctrl %d qid %d host hash failed\n",
|
||||
ctrl->cntlid, req->sq->qid);
|
||||
kfree(response);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
}
|
||||
|
||||
if (memcmp(data->rval, response, data->hl)) {
|
||||
pr_info("ctrl %d qid %d host response mismatch\n",
|
||||
ctrl->cntlid, req->sq->qid);
|
||||
kfree(response);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
}
|
||||
kfree(response);
|
||||
pr_debug("%s: ctrl %d qid %d host authenticated\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
if (data->cvalid) {
|
||||
req->sq->dhchap_c2 = kmalloc(data->hl, GFP_KERNEL);
|
||||
if (!req->sq->dhchap_c2)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
memcpy(req->sq->dhchap_c2, data->rval + data->hl, data->hl);
|
||||
|
||||
pr_debug("%s: ctrl %d qid %d challenge %*ph\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid, data->hl,
|
||||
req->sq->dhchap_c2);
|
||||
req->sq->dhchap_s2 = le32_to_cpu(data->seqnum);
|
||||
} else {
|
||||
req->sq->authenticated = true;
|
||||
req->sq->dhchap_c2 = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u16 nvmet_auth_failure2(struct nvmet_req *req, void *d)
|
||||
{
|
||||
struct nvmf_auth_dhchap_failure_data *data = d;
|
||||
|
||||
return data->rescode_exp;
|
||||
}
|
||||
|
||||
void nvmet_execute_auth_send(struct nvmet_req *req)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
struct nvmf_auth_dhchap_success2_data *data;
|
||||
void *d;
|
||||
u32 tl;
|
||||
u16 status = 0;
|
||||
|
||||
if (req->cmd->auth_send.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_send_command, secp);
|
||||
goto done;
|
||||
}
|
||||
if (req->cmd->auth_send.spsp0 != 0x01) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_send_command, spsp0);
|
||||
goto done;
|
||||
}
|
||||
if (req->cmd->auth_send.spsp1 != 0x01) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_send_command, spsp1);
|
||||
goto done;
|
||||
}
|
||||
tl = le32_to_cpu(req->cmd->auth_send.tl);
|
||||
if (!tl) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_send_command, tl);
|
||||
goto done;
|
||||
}
|
||||
if (!nvmet_check_transfer_len(req, tl)) {
|
||||
pr_debug("%s: transfer length mismatch (%u)\n", __func__, tl);
|
||||
return;
|
||||
}
|
||||
|
||||
d = kmalloc(tl, GFP_KERNEL);
|
||||
if (!d) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
status = nvmet_copy_from_sgl(req, 0, d, tl);
|
||||
if (status) {
|
||||
kfree(d);
|
||||
goto done;
|
||||
}
|
||||
|
||||
data = d;
|
||||
pr_debug("%s: ctrl %d qid %d type %d id %d step %x\n", __func__,
|
||||
ctrl->cntlid, req->sq->qid, data->auth_type, data->auth_id,
|
||||
req->sq->dhchap_step);
|
||||
if (data->auth_type != NVME_AUTH_COMMON_MESSAGES &&
|
||||
data->auth_type != NVME_AUTH_DHCHAP_MESSAGES)
|
||||
goto done_failure1;
|
||||
if (data->auth_type == NVME_AUTH_COMMON_MESSAGES) {
|
||||
if (data->auth_id == NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE) {
|
||||
/* Restart negotiation */
|
||||
pr_debug("%s: ctrl %d qid %d reset negotiation\n", __func__,
|
||||
ctrl->cntlid, req->sq->qid);
|
||||
if (!req->sq->qid) {
|
||||
if (nvmet_setup_auth(ctrl) < 0) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
pr_err("ctrl %d qid 0 failed to setup"
|
||||
"re-authentication",
|
||||
ctrl->cntlid);
|
||||
goto done_failure1;
|
||||
}
|
||||
}
|
||||
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
|
||||
} else if (data->auth_id != req->sq->dhchap_step)
|
||||
goto done_failure1;
|
||||
/* Validate negotiation parameters */
|
||||
status = nvmet_auth_negotiate(req, d);
|
||||
if (status == 0)
|
||||
req->sq->dhchap_step =
|
||||
NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE;
|
||||
else {
|
||||
req->sq->dhchap_step =
|
||||
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||
req->sq->dhchap_status = status;
|
||||
status = 0;
|
||||
}
|
||||
goto done_kfree;
|
||||
}
|
||||
if (data->auth_id != req->sq->dhchap_step) {
|
||||
pr_debug("%s: ctrl %d qid %d step mismatch (%d != %d)\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
data->auth_id, req->sq->dhchap_step);
|
||||
goto done_failure1;
|
||||
}
|
||||
if (le16_to_cpu(data->t_id) != req->sq->dhchap_tid) {
|
||||
pr_debug("%s: ctrl %d qid %d invalid transaction %d (expected %d)\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
le16_to_cpu(data->t_id),
|
||||
req->sq->dhchap_tid);
|
||||
req->sq->dhchap_step =
|
||||
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||
req->sq->dhchap_status =
|
||||
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||
goto done_kfree;
|
||||
}
|
||||
|
||||
switch (data->auth_id) {
|
||||
case NVME_AUTH_DHCHAP_MESSAGE_REPLY:
|
||||
status = nvmet_auth_reply(req, d);
|
||||
if (status == 0)
|
||||
req->sq->dhchap_step =
|
||||
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1;
|
||||
else {
|
||||
req->sq->dhchap_step =
|
||||
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||
req->sq->dhchap_status = status;
|
||||
status = 0;
|
||||
}
|
||||
goto done_kfree;
|
||||
break;
|
||||
case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2:
|
||||
req->sq->authenticated = true;
|
||||
pr_debug("%s: ctrl %d qid %d ctrl authenticated\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
goto done_kfree;
|
||||
break;
|
||||
case NVME_AUTH_DHCHAP_MESSAGE_FAILURE2:
|
||||
status = nvmet_auth_failure2(req, d);
|
||||
if (status) {
|
||||
pr_warn("ctrl %d qid %d: authentication failed (%d)\n",
|
||||
ctrl->cntlid, req->sq->qid, status);
|
||||
req->sq->dhchap_status = status;
|
||||
req->sq->authenticated = false;
|
||||
status = 0;
|
||||
}
|
||||
goto done_kfree;
|
||||
break;
|
||||
default:
|
||||
req->sq->dhchap_status =
|
||||
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_MESSAGE;
|
||||
req->sq->dhchap_step =
|
||||
NVME_AUTH_DHCHAP_MESSAGE_FAILURE2;
|
||||
req->sq->authenticated = false;
|
||||
goto done_kfree;
|
||||
break;
|
||||
}
|
||||
done_failure1:
|
||||
req->sq->dhchap_status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_MESSAGE;
|
||||
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_FAILURE2;
|
||||
|
||||
done_kfree:
|
||||
kfree(d);
|
||||
done:
|
||||
pr_debug("%s: ctrl %d qid %d dhchap status %x step %x\n", __func__,
|
||||
ctrl->cntlid, req->sq->qid,
|
||||
req->sq->dhchap_status, req->sq->dhchap_step);
|
||||
if (status)
|
||||
pr_debug("%s: ctrl %d qid %d nvme status %x error loc %d\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
status, req->error_loc);
|
||||
req->cqe->result.u64 = 0;
|
||||
nvmet_req_complete(req, status);
|
||||
if (req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 &&
|
||||
req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) {
|
||||
unsigned long auth_expire_secs = ctrl->kato ? ctrl->kato : 120;
|
||||
|
||||
mod_delayed_work(system_wq, &req->sq->auth_expired_work,
|
||||
auth_expire_secs * HZ);
|
||||
return;
|
||||
}
|
||||
/* Final states, clear up variables */
|
||||
nvmet_auth_sq_free(req->sq);
|
||||
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2)
|
||||
nvmet_ctrl_fatal_error(ctrl);
|
||||
}
|
||||
|
||||
static int nvmet_auth_challenge(struct nvmet_req *req, void *d, int al)
|
||||
{
|
||||
struct nvmf_auth_dhchap_challenge_data *data = d;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
int ret = 0;
|
||||
int hash_len = nvme_auth_hmac_hash_len(ctrl->shash_id);
|
||||
int data_size = sizeof(*d) + hash_len;
|
||||
|
||||
if (ctrl->dh_tfm)
|
||||
data_size += ctrl->dh_keysize;
|
||||
if (al < data_size) {
|
||||
pr_debug("%s: buffer too small (al %d need %d)\n", __func__,
|
||||
al, data_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
memset(data, 0, data_size);
|
||||
req->sq->dhchap_s1 = nvme_auth_get_seqnum();
|
||||
data->auth_type = NVME_AUTH_DHCHAP_MESSAGES;
|
||||
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE;
|
||||
data->t_id = cpu_to_le16(req->sq->dhchap_tid);
|
||||
data->hashid = ctrl->shash_id;
|
||||
data->hl = hash_len;
|
||||
data->seqnum = cpu_to_le32(req->sq->dhchap_s1);
|
||||
req->sq->dhchap_c1 = kmalloc(data->hl, GFP_KERNEL);
|
||||
if (!req->sq->dhchap_c1)
|
||||
return -ENOMEM;
|
||||
get_random_bytes(req->sq->dhchap_c1, data->hl);
|
||||
memcpy(data->cval, req->sq->dhchap_c1, data->hl);
|
||||
if (ctrl->dh_tfm) {
|
||||
data->dhgid = ctrl->dh_gid;
|
||||
data->dhvlen = cpu_to_le16(ctrl->dh_keysize);
|
||||
ret = nvmet_auth_ctrl_exponential(req, data->cval + data->hl,
|
||||
ctrl->dh_keysize);
|
||||
}
|
||||
pr_debug("%s: ctrl %d qid %d seq %d transaction %d hl %d dhvlen %zu\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1,
|
||||
req->sq->dhchap_tid, data->hl, ctrl->dh_keysize);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvmet_auth_success1(struct nvmet_req *req, void *d, int al)
|
||||
{
|
||||
struct nvmf_auth_dhchap_success1_data *data = d;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
int hash_len = nvme_auth_hmac_hash_len(ctrl->shash_id);
|
||||
|
||||
WARN_ON(al < sizeof(*data));
|
||||
memset(data, 0, sizeof(*data));
|
||||
data->auth_type = NVME_AUTH_DHCHAP_MESSAGES;
|
||||
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1;
|
||||
data->t_id = cpu_to_le16(req->sq->dhchap_tid);
|
||||
data->hl = hash_len;
|
||||
if (req->sq->dhchap_c2) {
|
||||
if (!ctrl->ctrl_key) {
|
||||
pr_warn("ctrl %d qid %d no ctrl key\n",
|
||||
ctrl->cntlid, req->sq->qid);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
}
|
||||
if (nvmet_auth_ctrl_hash(req, data->rval, data->hl))
|
||||
return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
data->rvalid = 1;
|
||||
pr_debug("ctrl %d qid %d response %*ph\n",
|
||||
ctrl->cntlid, req->sq->qid, data->hl, data->rval);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvmet_auth_failure1(struct nvmet_req *req, void *d, int al)
|
||||
{
|
||||
struct nvmf_auth_dhchap_failure_data *data = d;
|
||||
|
||||
WARN_ON(al < sizeof(*data));
|
||||
data->auth_type = NVME_AUTH_COMMON_MESSAGES;
|
||||
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||
data->t_id = cpu_to_le16(req->sq->dhchap_tid);
|
||||
data->rescode = NVME_AUTH_DHCHAP_FAILURE_REASON_FAILED;
|
||||
data->rescode_exp = req->sq->dhchap_status;
|
||||
}
|
||||
|
||||
void nvmet_execute_auth_receive(struct nvmet_req *req)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
void *d;
|
||||
u32 al;
|
||||
u16 status = 0;
|
||||
|
||||
if (req->cmd->auth_receive.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_receive_command, secp);
|
||||
goto done;
|
||||
}
|
||||
if (req->cmd->auth_receive.spsp0 != 0x01) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_receive_command, spsp0);
|
||||
goto done;
|
||||
}
|
||||
if (req->cmd->auth_receive.spsp1 != 0x01) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_receive_command, spsp1);
|
||||
goto done;
|
||||
}
|
||||
al = le32_to_cpu(req->cmd->auth_receive.al);
|
||||
if (!al) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc =
|
||||
offsetof(struct nvmf_auth_receive_command, al);
|
||||
goto done;
|
||||
}
|
||||
if (!nvmet_check_transfer_len(req, al)) {
|
||||
pr_debug("%s: transfer length mismatch (%u)\n", __func__, al);
|
||||
return;
|
||||
}
|
||||
|
||||
d = kmalloc(al, GFP_KERNEL);
|
||||
if (!d) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto done;
|
||||
}
|
||||
pr_debug("%s: ctrl %d qid %d step %x\n", __func__,
|
||||
ctrl->cntlid, req->sq->qid, req->sq->dhchap_step);
|
||||
switch (req->sq->dhchap_step) {
|
||||
case NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE:
|
||||
if (nvmet_auth_challenge(req, d, al) < 0) {
|
||||
pr_warn("ctrl %d qid %d: challenge error (%d)\n",
|
||||
ctrl->cntlid, req->sq->qid, status);
|
||||
status = NVME_SC_INTERNAL;
|
||||
break;
|
||||
}
|
||||
if (status) {
|
||||
req->sq->dhchap_status = status;
|
||||
nvmet_auth_failure1(req, d, al);
|
||||
pr_warn("ctrl %d qid %d: challenge status (%x)\n",
|
||||
ctrl->cntlid, req->sq->qid,
|
||||
req->sq->dhchap_status);
|
||||
status = 0;
|
||||
break;
|
||||
}
|
||||
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_REPLY;
|
||||
break;
|
||||
case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1:
|
||||
status = nvmet_auth_success1(req, d, al);
|
||||
if (status) {
|
||||
req->sq->dhchap_status = status;
|
||||
req->sq->authenticated = false;
|
||||
nvmet_auth_failure1(req, d, al);
|
||||
pr_warn("ctrl %d qid %d: success1 status (%x)\n",
|
||||
ctrl->cntlid, req->sq->qid,
|
||||
req->sq->dhchap_status);
|
||||
break;
|
||||
}
|
||||
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2;
|
||||
break;
|
||||
case NVME_AUTH_DHCHAP_MESSAGE_FAILURE1:
|
||||
req->sq->authenticated = false;
|
||||
nvmet_auth_failure1(req, d, al);
|
||||
pr_warn("ctrl %d qid %d failure1 (%x)\n",
|
||||
ctrl->cntlid, req->sq->qid, req->sq->dhchap_status);
|
||||
break;
|
||||
default:
|
||||
pr_warn("ctrl %d qid %d unhandled step (%d)\n",
|
||||
ctrl->cntlid, req->sq->qid, req->sq->dhchap_step);
|
||||
req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
|
||||
req->sq->dhchap_status = NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
nvmet_auth_failure1(req, d, al);
|
||||
status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
status = nvmet_copy_to_sgl(req, 0, d, al);
|
||||
kfree(d);
|
||||
done:
|
||||
req->cqe->result.u64 = 0;
|
||||
nvmet_req_complete(req, status);
|
||||
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2)
|
||||
nvmet_auth_sq_free(req->sq);
|
||||
else if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) {
|
||||
nvmet_auth_sq_free(req->sq);
|
||||
nvmet_ctrl_fatal_error(ctrl);
|
||||
}
|
||||
}
|
@ -82,7 +82,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
|
||||
u16 nvmet_parse_fabrics_admin_cmd(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_command *cmd = req->cmd;
|
||||
|
||||
@ -93,6 +93,37 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
|
||||
case nvme_fabrics_type_property_get:
|
||||
req->execute = nvmet_execute_prop_get;
|
||||
break;
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
case nvme_fabrics_type_auth_send:
|
||||
req->execute = nvmet_execute_auth_send;
|
||||
break;
|
||||
case nvme_fabrics_type_auth_receive:
|
||||
req->execute = nvmet_execute_auth_receive;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
pr_debug("received unknown capsule type 0x%x\n",
|
||||
cmd->fabrics.fctype);
|
||||
req->error_loc = offsetof(struct nvmf_common_command, fctype);
|
||||
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_command *cmd = req->cmd;
|
||||
|
||||
switch (cmd->fabrics.fctype) {
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
case nvme_fabrics_type_auth_send:
|
||||
req->execute = nvmet_execute_auth_send;
|
||||
break;
|
||||
case nvme_fabrics_type_auth_receive:
|
||||
req->execute = nvmet_execute_auth_receive;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
pr_debug("received unknown capsule type 0x%x\n",
|
||||
cmd->fabrics.fctype);
|
||||
@ -173,6 +204,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
||||
struct nvmf_connect_data *d;
|
||||
struct nvmet_ctrl *ctrl = NULL;
|
||||
u16 status = 0;
|
||||
int ret;
|
||||
|
||||
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
|
||||
return;
|
||||
@ -215,18 +247,32 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
||||
|
||||
uuid_copy(&ctrl->hostid, &d->hostid);
|
||||
|
||||
ret = nvmet_setup_auth(ctrl);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to setup authentication, error %d\n", ret);
|
||||
nvmet_ctrl_put(ctrl);
|
||||
if (ret == -EPERM)
|
||||
status = (NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR);
|
||||
else
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = nvmet_install_queue(ctrl, req);
|
||||
if (status) {
|
||||
nvmet_ctrl_put(ctrl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info("creating %s controller %d for subsystem %s for NQN %s%s.\n",
|
||||
pr_info("creating %s controller %d for subsystem %s for NQN %s%s%s.\n",
|
||||
nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm",
|
||||
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
|
||||
ctrl->pi_support ? " T10-PI is enabled" : "");
|
||||
ctrl->pi_support ? " T10-PI is enabled" : "",
|
||||
nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : "");
|
||||
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||
|
||||
if (nvmet_has_auth(ctrl))
|
||||
nvmet_init_auth(ctrl, req);
|
||||
out:
|
||||
kfree(d);
|
||||
complete:
|
||||
@ -286,6 +332,9 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
|
||||
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||
|
||||
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
|
||||
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||
if (nvmet_has_auth(ctrl))
|
||||
nvmet_init_auth(ctrl, req);
|
||||
|
||||
out:
|
||||
kfree(d);
|
||||
|
@ -424,9 +424,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
|
||||
{
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
||||
nvme_cancel_request, &ctrl->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
|
||||
nvme_cancel_tagset(&ctrl->ctrl);
|
||||
nvme_loop_destroy_io_queues(ctrl);
|
||||
}
|
||||
|
||||
@ -434,9 +432,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
|
||||
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
|
||||
nvme_shutdown_ctrl(&ctrl->ctrl);
|
||||
|
||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
||||
nvme_cancel_request, &ctrl->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
|
||||
nvme_cancel_admin_tagset(&ctrl->ctrl);
|
||||
nvme_loop_destroy_admin_queue(ctrl);
|
||||
}
|
||||
|
||||
|
@ -108,6 +108,19 @@ struct nvmet_sq {
|
||||
u16 size;
|
||||
u32 sqhd;
|
||||
bool sqhd_disabled;
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
struct delayed_work auth_expired_work;
|
||||
bool authenticated;
|
||||
u16 dhchap_tid;
|
||||
u16 dhchap_status;
|
||||
int dhchap_step;
|
||||
u8 *dhchap_c1;
|
||||
u8 *dhchap_c2;
|
||||
u32 dhchap_s1;
|
||||
u32 dhchap_s2;
|
||||
u8 *dhchap_skey;
|
||||
int dhchap_skey_len;
|
||||
#endif
|
||||
struct completion free_done;
|
||||
struct completion confirm_done;
|
||||
};
|
||||
@ -209,6 +222,15 @@ struct nvmet_ctrl {
|
||||
u64 err_counter;
|
||||
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
|
||||
bool pi_support;
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
struct nvme_dhchap_key *host_key;
|
||||
struct nvme_dhchap_key *ctrl_key;
|
||||
u8 shash_id;
|
||||
struct crypto_kpp *dh_tfm;
|
||||
u8 dh_gid;
|
||||
u8 *dh_key;
|
||||
size_t dh_keysize;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct nvmet_subsys {
|
||||
@ -271,6 +293,12 @@ static inline struct nvmet_subsys *namespaces_to_subsys(
|
||||
|
||||
struct nvmet_host {
|
||||
struct config_group group;
|
||||
u8 *dhchap_secret;
|
||||
u8 *dhchap_ctrl_secret;
|
||||
u8 dhchap_key_hash;
|
||||
u8 dhchap_ctrl_key_hash;
|
||||
u8 dhchap_hash_id;
|
||||
u8 dhchap_dhgroup_id;
|
||||
};
|
||||
|
||||
static inline struct nvmet_host *to_host(struct config_item *item)
|
||||
@ -420,7 +448,8 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_parse_fabrics_admin_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req);
|
||||
|
||||
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
|
||||
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
|
||||
@ -668,4 +697,48 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVME_TARGET_AUTH
|
||||
void nvmet_execute_auth_send(struct nvmet_req *req);
|
||||
void nvmet_execute_auth_receive(struct nvmet_req *req);
|
||||
int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
|
||||
bool set_ctrl);
|
||||
int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash);
|
||||
int nvmet_setup_auth(struct nvmet_ctrl *ctrl);
|
||||
void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req);
|
||||
void nvmet_destroy_auth(struct nvmet_ctrl *ctrl);
|
||||
void nvmet_auth_sq_free(struct nvmet_sq *sq);
|
||||
int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id);
|
||||
bool nvmet_check_auth_status(struct nvmet_req *req);
|
||||
int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
|
||||
unsigned int hash_len);
|
||||
int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
|
||||
unsigned int hash_len);
|
||||
static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
return ctrl->host_key != NULL;
|
||||
}
|
||||
int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
|
||||
u8 *buf, int buf_size);
|
||||
int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
||||
u8 *buf, int buf_size);
|
||||
#else
|
||||
static inline int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void nvmet_init_auth(struct nvmet_ctrl *ctrl,
|
||||
struct nvmet_req *req) {};
|
||||
static inline void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) {};
|
||||
static inline void nvmet_auth_sq_free(struct nvmet_sq *sq) {};
|
||||
static inline bool nvmet_check_auth_status(struct nvmet_req *req)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; }
|
||||
#endif
|
||||
|
||||
#endif /* _NVMET_H */
|
||||
|
@ -1839,7 +1839,8 @@ static int __init nvmet_tcp_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0);
|
||||
nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq",
|
||||
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
|
||||
if (!nvmet_tcp_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1725,7 +1725,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
|
||||
dasd_put_device(device);
|
||||
}
|
||||
|
||||
/* check for for attention message */
|
||||
/* check for attention message */
|
||||
if (scsw_dstat(&irb->scsw) & DEV_STAT_ATTENTION) {
|
||||
device = dasd_device_from_cdev_locked(cdev);
|
||||
if (!IS_ERR(device)) {
|
||||
|
@ -639,6 +639,7 @@ static void dasd_diag_setup_blk_queue(struct dasd_block *block)
|
||||
/* With page sized segments each segment can be translated into one idaw/tidaw */
|
||||
blk_queue_max_segment_size(q, PAGE_SIZE);
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||
blk_queue_dma_alignment(q, PAGE_SIZE - 1);
|
||||
}
|
||||
|
||||
static int dasd_diag_pe_handler(struct dasd_device *device,
|
||||
|
@ -6626,6 +6626,7 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block)
|
||||
/* With page sized segments each segment can be translated into one idaw/tidaw */
|
||||
blk_queue_max_segment_size(q, PAGE_SIZE);
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||
blk_queue_dma_alignment(q, PAGE_SIZE - 1);
|
||||
}
|
||||
|
||||
static struct ccw_driver dasd_eckd_driver = {
|
||||
|
@ -863,7 +863,7 @@ dcssblk_submit_bio(struct bio *bio)
|
||||
unsigned long source_addr;
|
||||
unsigned long bytes_done;
|
||||
|
||||
blk_queue_split(&bio);
|
||||
bio = bio_split_to_limits(bio);
|
||||
|
||||
bytes_done = 0;
|
||||
dev_info = bio->bi_bdev->bd_disk->private_data;
|
||||
|
@ -718,6 +718,8 @@ static inline void ahash_request_set_crypt(struct ahash_request *req,
|
||||
struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
|
||||
u32 mask);
|
||||
|
||||
int crypto_has_shash(const char *alg_name, u32 type, u32 mask);
|
||||
|
||||
static inline struct crypto_tfm *crypto_shash_tfm(struct crypto_shash *tfm)
|
||||
{
|
||||
return &tfm->base;
|
||||
|
@ -104,6 +104,8 @@ struct kpp_alg {
|
||||
*/
|
||||
struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask);
|
||||
|
||||
int crypto_has_kpp(const char *alg_name, u32 type, u32 mask);
|
||||
|
||||
static inline struct crypto_tfm *crypto_kpp_tfm(struct crypto_kpp *tfm)
|
||||
{
|
||||
return &tfm->base;
|
||||
|
16
include/linux/base64.h
Normal file
16
include/linux/base64.h
Normal file
@ -0,0 +1,16 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* base64 encoding, lifted from fs/crypto/fname.c.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_BASE64_H
|
||||
#define _LINUX_BASE64_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
|
||||
|
||||
int base64_encode(const u8 *src, int len, char *dst);
|
||||
int base64_decode(const char *src, int len, u8 *dst);
|
||||
|
||||
#endif /* _LINUX_BASE64_H */
|
@ -140,6 +140,8 @@ struct gendisk {
|
||||
struct request_queue *queue;
|
||||
void *private_data;
|
||||
|
||||
struct bio_set bio_split;
|
||||
|
||||
int flags;
|
||||
unsigned long state;
|
||||
#define GD_NEED_PART_SCAN 0
|
||||
@ -531,7 +533,6 @@ struct request_queue {
|
||||
|
||||
struct blk_mq_tag_set *tag_set;
|
||||
struct list_head tag_set_list;
|
||||
struct bio_set bio_split;
|
||||
|
||||
struct dentry *debugfs_dir;
|
||||
struct dentry *sched_debugfs_dir;
|
||||
@ -864,9 +865,9 @@ void blk_request_module(dev_t devt);
|
||||
extern int blk_register_queue(struct gendisk *disk);
|
||||
extern void blk_unregister_queue(struct gendisk *disk);
|
||||
void submit_bio_noacct(struct bio *bio);
|
||||
struct bio *bio_split_to_limits(struct bio *bio);
|
||||
|
||||
extern int blk_lld_busy(struct request_queue *q);
|
||||
extern void blk_queue_split(struct bio **);
|
||||
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
|
||||
extern void blk_queue_exit(struct request_queue *q);
|
||||
extern void blk_sync_queue(struct request_queue *q);
|
||||
|
41
include/linux/nvme-auth.h
Normal file
41
include/linux/nvme-auth.h
Normal file
@ -0,0 +1,41 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (c) 2021 Hannes Reinecke, SUSE Software Solutions
|
||||
*/
|
||||
|
||||
#ifndef _NVME_AUTH_H
|
||||
#define _NVME_AUTH_H
|
||||
|
||||
#include <crypto/kpp.h>
|
||||
|
||||
struct nvme_dhchap_key {
|
||||
u8 *key;
|
||||
size_t len;
|
||||
u8 hash;
|
||||
};
|
||||
|
||||
u32 nvme_auth_get_seqnum(void);
|
||||
const char *nvme_auth_dhgroup_name(u8 dhgroup_id);
|
||||
const char *nvme_auth_dhgroup_kpp(u8 dhgroup_id);
|
||||
u8 nvme_auth_dhgroup_id(const char *dhgroup_name);
|
||||
|
||||
const char *nvme_auth_hmac_name(u8 hmac_id);
|
||||
const char *nvme_auth_digest_name(u8 hmac_id);
|
||||
size_t nvme_auth_hmac_hash_len(u8 hmac_id);
|
||||
u8 nvme_auth_hmac_id(const char *hmac_name);
|
||||
|
||||
struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
|
||||
u8 key_hash);
|
||||
void nvme_auth_free_key(struct nvme_dhchap_key *key);
|
||||
u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn);
|
||||
int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key);
|
||||
int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
|
||||
u8 *challenge, u8 *aug, size_t hlen);
|
||||
int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid);
|
||||
int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm,
|
||||
u8 *host_key, size_t host_key_len);
|
||||
int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
|
||||
u8 *ctrl_key, size_t ctrl_key_len,
|
||||
u8 *sess_key, size_t sess_key_len);
|
||||
|
||||
#endif /* _NVME_AUTH_H */
|
@ -19,6 +19,7 @@
|
||||
#define NVMF_TRSVCID_SIZE 32
|
||||
#define NVMF_TRADDR_SIZE 256
|
||||
#define NVMF_TSAS_SIZE 256
|
||||
#define NVMF_AUTH_HASH_LEN 64
|
||||
|
||||
#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
|
||||
|
||||
@ -711,6 +712,10 @@ enum {
|
||||
NVME_AER_VS = 7,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_AER_ERROR_PERSIST_INT_ERR = 0x03,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_AER_NOTICE_NS_CHANGED = 0x00,
|
||||
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
|
||||
@ -1369,6 +1374,8 @@ enum nvmf_capsule_command {
|
||||
nvme_fabrics_type_property_set = 0x00,
|
||||
nvme_fabrics_type_connect = 0x01,
|
||||
nvme_fabrics_type_property_get = 0x04,
|
||||
nvme_fabrics_type_auth_send = 0x05,
|
||||
nvme_fabrics_type_auth_receive = 0x06,
|
||||
};
|
||||
|
||||
#define nvme_fabrics_type_name(type) { type, #type }
|
||||
@ -1376,7 +1383,9 @@ enum nvmf_capsule_command {
|
||||
__print_symbolic(type, \
|
||||
nvme_fabrics_type_name(nvme_fabrics_type_property_set), \
|
||||
nvme_fabrics_type_name(nvme_fabrics_type_connect), \
|
||||
nvme_fabrics_type_name(nvme_fabrics_type_property_get))
|
||||
nvme_fabrics_type_name(nvme_fabrics_type_property_get), \
|
||||
nvme_fabrics_type_name(nvme_fabrics_type_auth_send), \
|
||||
nvme_fabrics_type_name(nvme_fabrics_type_auth_receive))
|
||||
|
||||
/*
|
||||
* If not fabrics command, fctype will be ignored.
|
||||
@ -1472,6 +1481,11 @@ struct nvmf_connect_command {
|
||||
__u8 resv4[12];
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_CONNECT_AUTHREQ_ASCR = (1 << 2),
|
||||
NVME_CONNECT_AUTHREQ_ATR = (1 << 1),
|
||||
};
|
||||
|
||||
struct nvmf_connect_data {
|
||||
uuid_t hostid;
|
||||
__le16 cntlid;
|
||||
@ -1506,6 +1520,200 @@ struct nvmf_property_get_command {
|
||||
__u8 resv4[16];
|
||||
};
|
||||
|
||||
struct nvmf_auth_common_command {
|
||||
__u8 opcode;
|
||||
__u8 resv1;
|
||||
__u16 command_id;
|
||||
__u8 fctype;
|
||||
__u8 resv2[19];
|
||||
union nvme_data_ptr dptr;
|
||||
__u8 resv3;
|
||||
__u8 spsp0;
|
||||
__u8 spsp1;
|
||||
__u8 secp;
|
||||
__le32 al_tl;
|
||||
__u8 resv4[16];
|
||||
};
|
||||
|
||||
struct nvmf_auth_send_command {
|
||||
__u8 opcode;
|
||||
__u8 resv1;
|
||||
__u16 command_id;
|
||||
__u8 fctype;
|
||||
__u8 resv2[19];
|
||||
union nvme_data_ptr dptr;
|
||||
__u8 resv3;
|
||||
__u8 spsp0;
|
||||
__u8 spsp1;
|
||||
__u8 secp;
|
||||
__le32 tl;
|
||||
__u8 resv4[16];
|
||||
};
|
||||
|
||||
struct nvmf_auth_receive_command {
|
||||
__u8 opcode;
|
||||
__u8 resv1;
|
||||
__u16 command_id;
|
||||
__u8 fctype;
|
||||
__u8 resv2[19];
|
||||
union nvme_data_ptr dptr;
|
||||
__u8 resv3;
|
||||
__u8 spsp0;
|
||||
__u8 spsp1;
|
||||
__u8 secp;
|
||||
__le32 al;
|
||||
__u8 resv4[16];
|
||||
};
|
||||
|
||||
/* Value for secp */
|
||||
enum {
|
||||
NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER = 0xe9,
|
||||
};
|
||||
|
||||
/* Defined value for auth_type */
|
||||
enum {
|
||||
NVME_AUTH_COMMON_MESSAGES = 0x00,
|
||||
NVME_AUTH_DHCHAP_MESSAGES = 0x01,
|
||||
};
|
||||
|
||||
/* Defined messages for auth_id */
|
||||
enum {
|
||||
NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE = 0x00,
|
||||
NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE = 0x01,
|
||||
NVME_AUTH_DHCHAP_MESSAGE_REPLY = 0x02,
|
||||
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1 = 0x03,
|
||||
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 = 0x04,
|
||||
NVME_AUTH_DHCHAP_MESSAGE_FAILURE2 = 0xf0,
|
||||
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1 = 0xf1,
|
||||
};
|
||||
|
||||
struct nvmf_auth_dhchap_protocol_descriptor {
|
||||
__u8 authid;
|
||||
__u8 rsvd;
|
||||
__u8 halen;
|
||||
__u8 dhlen;
|
||||
__u8 idlist[60];
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_AUTH_DHCHAP_AUTH_ID = 0x01,
|
||||
};
|
||||
|
||||
/* Defined hash functions for DH-HMAC-CHAP authentication */
|
||||
enum {
|
||||
NVME_AUTH_HASH_SHA256 = 0x01,
|
||||
NVME_AUTH_HASH_SHA384 = 0x02,
|
||||
NVME_AUTH_HASH_SHA512 = 0x03,
|
||||
NVME_AUTH_HASH_INVALID = 0xff,
|
||||
};
|
||||
|
||||
/* Defined Diffie-Hellman group identifiers for DH-HMAC-CHAP authentication */
|
||||
enum {
|
||||
NVME_AUTH_DHGROUP_NULL = 0x00,
|
||||
NVME_AUTH_DHGROUP_2048 = 0x01,
|
||||
NVME_AUTH_DHGROUP_3072 = 0x02,
|
||||
NVME_AUTH_DHGROUP_4096 = 0x03,
|
||||
NVME_AUTH_DHGROUP_6144 = 0x04,
|
||||
NVME_AUTH_DHGROUP_8192 = 0x05,
|
||||
NVME_AUTH_DHGROUP_INVALID = 0xff,
|
||||
};
|
||||
|
||||
union nvmf_auth_protocol {
|
||||
struct nvmf_auth_dhchap_protocol_descriptor dhchap;
|
||||
};
|
||||
|
||||
struct nvmf_auth_dhchap_negotiate_data {
|
||||
__u8 auth_type;
|
||||
__u8 auth_id;
|
||||
__le16 rsvd;
|
||||
__le16 t_id;
|
||||
__u8 sc_c;
|
||||
__u8 napd;
|
||||
union nvmf_auth_protocol auth_protocol[];
|
||||
};
|
||||
|
||||
struct nvmf_auth_dhchap_challenge_data {
|
||||
__u8 auth_type;
|
||||
__u8 auth_id;
|
||||
__u16 rsvd1;
|
||||
__le16 t_id;
|
||||
__u8 hl;
|
||||
__u8 rsvd2;
|
||||
__u8 hashid;
|
||||
__u8 dhgid;
|
||||
__le16 dhvlen;
|
||||
__le32 seqnum;
|
||||
/* 'hl' bytes of challenge value */
|
||||
__u8 cval[];
|
||||
/* followed by 'dhvlen' bytes of DH value */
|
||||
};
|
||||
|
||||
struct nvmf_auth_dhchap_reply_data {
|
||||
__u8 auth_type;
|
||||
__u8 auth_id;
|
||||
__le16 rsvd1;
|
||||
__le16 t_id;
|
||||
__u8 hl;
|
||||
__u8 rsvd2;
|
||||
__u8 cvalid;
|
||||
__u8 rsvd3;
|
||||
__le16 dhvlen;
|
||||
__le32 seqnum;
|
||||
/* 'hl' bytes of response data */
|
||||
__u8 rval[];
|
||||
/* followed by 'hl' bytes of Challenge value */
|
||||
/* followed by 'dhvlen' bytes of DH value */
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_AUTH_DHCHAP_RESPONSE_VALID = (1 << 0),
|
||||
};
|
||||
|
||||
struct nvmf_auth_dhchap_success1_data {
|
||||
__u8 auth_type;
|
||||
__u8 auth_id;
|
||||
__le16 rsvd1;
|
||||
__le16 t_id;
|
||||
__u8 hl;
|
||||
__u8 rsvd2;
|
||||
__u8 rvalid;
|
||||
__u8 rsvd3[7];
|
||||
/* 'hl' bytes of response value if 'rvalid' is set */
|
||||
__u8 rval[];
|
||||
};
|
||||
|
||||
struct nvmf_auth_dhchap_success2_data {
|
||||
__u8 auth_type;
|
||||
__u8 auth_id;
|
||||
__le16 rsvd1;
|
||||
__le16 t_id;
|
||||
__u8 rsvd2[10];
|
||||
};
|
||||
|
||||
struct nvmf_auth_dhchap_failure_data {
|
||||
__u8 auth_type;
|
||||
__u8 auth_id;
|
||||
__le16 rsvd1;
|
||||
__le16 t_id;
|
||||
__u8 rescode;
|
||||
__u8 rescode_exp;
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_AUTH_DHCHAP_FAILURE_REASON_FAILED = 0x01,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_AUTH_DHCHAP_FAILURE_FAILED = 0x01,
|
||||
NVME_AUTH_DHCHAP_FAILURE_NOT_USABLE = 0x02,
|
||||
NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH = 0x03,
|
||||
NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE = 0x04,
|
||||
NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE = 0x05,
|
||||
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD = 0x06,
|
||||
NVME_AUTH_DHCHAP_FAILURE_INCORRECT_MESSAGE = 0x07,
|
||||
};
|
||||
|
||||
|
||||
struct nvme_dbbuf {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
@ -1549,6 +1757,9 @@ struct nvme_command {
|
||||
struct nvmf_connect_command connect;
|
||||
struct nvmf_property_set_command prop_set;
|
||||
struct nvmf_property_get_command prop_get;
|
||||
struct nvmf_auth_common_command auth_common;
|
||||
struct nvmf_auth_send_command auth_send;
|
||||
struct nvmf_auth_receive_command auth_receive;
|
||||
struct nvme_dbbuf dbbuf;
|
||||
struct nvme_directive_cmd directive;
|
||||
};
|
||||
|
@ -15,6 +15,8 @@
|
||||
#define UBLK_CMD_DEL_DEV 0x05
|
||||
#define UBLK_CMD_START_DEV 0x06
|
||||
#define UBLK_CMD_STOP_DEV 0x07
|
||||
#define UBLK_CMD_SET_PARAMS 0x08
|
||||
#define UBLK_CMD_GET_PARAMS 0x09
|
||||
|
||||
/*
|
||||
* IO commands, issued by ublk server, and handled by ublk driver.
|
||||
@ -28,12 +30,21 @@
|
||||
* this IO request, request's handling result is committed to ublk
|
||||
* driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
|
||||
* handled before completing io request.
|
||||
*
|
||||
* NEED_GET_DATA: only used for write requests to set io addr and copy data
|
||||
* When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA
|
||||
* command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA.
|
||||
*
|
||||
* It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag
|
||||
* while starting a ublk device.
|
||||
*/
|
||||
#define UBLK_IO_FETCH_REQ 0x20
|
||||
#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21
|
||||
#define UBLK_IO_NEED_GET_DATA 0x22
|
||||
|
||||
/* only ABORT means that no re-fetch */
|
||||
#define UBLK_IO_RES_OK 0
|
||||
#define UBLK_IO_RES_NEED_GET_DATA 1
|
||||
#define UBLK_IO_RES_ABORT (-ENODEV)
|
||||
|
||||
#define UBLKSRV_CMD_BUF_OFFSET 0
|
||||
@ -54,6 +65,15 @@
|
||||
*/
|
||||
#define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1)
|
||||
|
||||
/*
|
||||
* User should issue io cmd again for write requests to
|
||||
* set io buffer address and copy data from bio vectors
|
||||
* to the userspace io buffer.
|
||||
*
|
||||
* In this mode, task_work is not used.
|
||||
*/
|
||||
#define UBLK_F_NEED_GET_DATA (1UL << 2)
|
||||
|
||||
/* device state */
|
||||
#define UBLK_S_DEV_DEAD 0
|
||||
#define UBLK_S_DEV_LIVE 1
|
||||
@ -78,22 +98,23 @@ struct ublksrv_ctrl_cmd {
|
||||
struct ublksrv_ctrl_dev_info {
|
||||
__u16 nr_hw_queues;
|
||||
__u16 queue_depth;
|
||||
__u16 block_size;
|
||||
__u16 state;
|
||||
__u16 pad0;
|
||||
|
||||
__u32 rq_max_blocks;
|
||||
__u32 max_io_buf_bytes;
|
||||
__u32 dev_id;
|
||||
|
||||
__u64 dev_blocks;
|
||||
|
||||
__s32 ublksrv_pid;
|
||||
__s32 reserved0;
|
||||
__u32 pad1;
|
||||
|
||||
__u64 flags;
|
||||
__u64 flags_reserved;
|
||||
|
||||
/* For ublksrv internal use, invisible to ublk driver */
|
||||
__u64 ublksrv_flags;
|
||||
__u64 reserved1[9];
|
||||
|
||||
__u64 reserved0;
|
||||
__u64 reserved1;
|
||||
__u64 reserved2;
|
||||
};
|
||||
|
||||
#define UBLK_IO_OP_READ 0
|
||||
@ -158,4 +179,49 @@ struct ublksrv_io_cmd {
|
||||
__u64 addr;
|
||||
};
|
||||
|
||||
struct ublk_param_basic {
|
||||
#define UBLK_ATTR_READ_ONLY (1 << 0)
|
||||
#define UBLK_ATTR_ROTATIONAL (1 << 1)
|
||||
#define UBLK_ATTR_VOLATILE_CACHE (1 << 2)
|
||||
#define UBLK_ATTR_FUA (1 << 3)
|
||||
__u32 attrs;
|
||||
__u8 logical_bs_shift;
|
||||
__u8 physical_bs_shift;
|
||||
__u8 io_opt_shift;
|
||||
__u8 io_min_shift;
|
||||
|
||||
__u32 max_sectors;
|
||||
__u32 chunk_sectors;
|
||||
|
||||
__u64 dev_sectors;
|
||||
__u64 virt_boundary_mask;
|
||||
};
|
||||
|
||||
struct ublk_param_discard {
|
||||
__u32 discard_alignment;
|
||||
|
||||
__u32 discard_granularity;
|
||||
__u32 max_discard_sectors;
|
||||
|
||||
__u32 max_write_zeroes_sectors;
|
||||
__u16 max_discard_segments;
|
||||
__u16 reserved0;
|
||||
};
|
||||
|
||||
struct ublk_params {
|
||||
/*
|
||||
* Total length of parameters, userspace has to set 'len' for both
|
||||
* SET_PARAMS and GET_PARAMS command, and driver may update len
|
||||
* if two sides use different version of 'ublk_params', same with
|
||||
* 'types' fields.
|
||||
*/
|
||||
__u32 len;
|
||||
#define UBLK_PARAM_TYPE_BASIC (1 << 0)
|
||||
#define UBLK_PARAM_TYPE_DISCARD (1 << 1)
|
||||
__u32 types; /* types of parameter included */
|
||||
|
||||
struct ublk_param_basic basic;
|
||||
struct ublk_param_discard discard;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -46,7 +46,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
|
||||
bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
|
||||
list_sort.o uuid.o iov_iter.o clz_ctz.o \
|
||||
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
|
||||
percpu-refcount.o rhashtable.o \
|
||||
percpu-refcount.o rhashtable.o base64.o \
|
||||
once.o refcount.o usercopy.o errseq.o bucket_locks.o \
|
||||
generic-radix-tree.o
|
||||
obj-$(CONFIG_STRING_SELFTEST) += test_string.o
|
||||
|
103
lib/base64.c
Normal file
103
lib/base64.c
Normal file
@ -0,0 +1,103 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* base64.c - RFC4648-compliant base64 encoding
|
||||
*
|
||||
* Copyright (c) 2020 Hannes Reinecke, SUSE
|
||||
*
|
||||
* Based on the base64url routines from fs/crypto/fname.c
|
||||
* (which are using the URL-safe base64 encoding),
|
||||
* modified to use the standard coding table from RFC4648 section 4.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/base64.h>
|
||||
|
||||
static const char base64_table[65] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
/**
|
||||
* base64_encode() - base64-encode some binary data
|
||||
* @src: the binary data to encode
|
||||
* @srclen: the length of @src in bytes
|
||||
* @dst: (output) the base64-encoded string. Not NUL-terminated.
|
||||
*
|
||||
* Encodes data using base64 encoding, i.e. the "Base 64 Encoding" specified
|
||||
* by RFC 4648, including the '='-padding.
|
||||
*
|
||||
* Return: the length of the resulting base64-encoded string in bytes.
|
||||
*/
|
||||
int base64_encode(const u8 *src, int srclen, char *dst)
|
||||
{
|
||||
u32 ac = 0;
|
||||
int bits = 0;
|
||||
int i;
|
||||
char *cp = dst;
|
||||
|
||||
for (i = 0; i < srclen; i++) {
|
||||
ac = (ac << 8) | src[i];
|
||||
bits += 8;
|
||||
do {
|
||||
bits -= 6;
|
||||
*cp++ = base64_table[(ac >> bits) & 0x3f];
|
||||
} while (bits >= 6);
|
||||
}
|
||||
if (bits) {
|
||||
*cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
|
||||
bits -= 6;
|
||||
}
|
||||
while (bits < 0) {
|
||||
*cp++ = '=';
|
||||
bits += 2;
|
||||
}
|
||||
return cp - dst;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(base64_encode);
|
||||
|
||||
/**
|
||||
* base64_decode() - base64-decode a string
|
||||
* @src: the string to decode. Doesn't need to be NUL-terminated.
|
||||
* @srclen: the length of @src in bytes
|
||||
* @dst: (output) the decoded binary data
|
||||
*
|
||||
* Decodes a string using base64 encoding, i.e. the "Base 64 Encoding"
|
||||
* specified by RFC 4648, including the '='-padding.
|
||||
*
|
||||
* This implementation hasn't been optimized for performance.
|
||||
*
|
||||
* Return: the length of the resulting decoded binary data in bytes,
|
||||
* or -1 if the string isn't a valid base64 string.
|
||||
*/
|
||||
int base64_decode(const char *src, int srclen, u8 *dst)
|
||||
{
|
||||
u32 ac = 0;
|
||||
int bits = 0;
|
||||
int i;
|
||||
u8 *bp = dst;
|
||||
|
||||
for (i = 0; i < srclen; i++) {
|
||||
const char *p = strchr(base64_table, src[i]);
|
||||
|
||||
if (src[i] == '=') {
|
||||
ac = (ac << 6);
|
||||
bits += 6;
|
||||
if (bits >= 8)
|
||||
bits -= 8;
|
||||
continue;
|
||||
}
|
||||
if (p == NULL || src[i] == 0)
|
||||
return -1;
|
||||
ac = (ac << 6) | (p - base64_table);
|
||||
bits += 6;
|
||||
if (bits >= 8) {
|
||||
bits -= 8;
|
||||
*bp++ = (u8)(ac >> bits);
|
||||
}
|
||||
}
|
||||
if (ac & ((1 << bits) - 1))
|
||||
return -1;
|
||||
return bp - dst;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(base64_decode);
|
Loading…
x
Reference in New Issue
Block a user