block: bio-integrity: directly map user buffers
Passthrough commands that utilize metadata currently need to bounce the user space buffer through the kernel. Add support for mapping user space directly so that we can avoid this costly overhead. This is similar to how the normal bio data payload utilizes user addresses with bio_map_user_iov(). If the user address can't directly be used for reason, like too many segments or address unalignement, fallback to a copy of the user vec while keeping the user address pinned for the IO duration so that it can safely be copied on completion in any process context. Signed-off-by: Keith Busch <kbusch@kernel.org> Link: https://lore.kernel.org/r/20231130215309.2923568-2-kbusch@meta.com [axboe: fold in fix from Kanchan Joshi] Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
668bfeeabb
commit
492c5d4559
@ -69,15 +69,15 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
||||
|
||||
memset(bip, 0, sizeof(*bip));
|
||||
|
||||
if (nr_vecs > inline_vecs) {
|
||||
/* always report as many vecs as asked explicitly, not inline vecs */
|
||||
bip->bip_max_vcnt = nr_vecs;
|
||||
if (nr_vecs > inline_vecs) {
|
||||
bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
|
||||
&bip->bip_max_vcnt, gfp_mask);
|
||||
if (!bip->bip_vec)
|
||||
goto err;
|
||||
} else {
|
||||
bip->bip_vec = bip->bip_inline_vecs;
|
||||
bip->bip_max_vcnt = inline_vecs;
|
||||
}
|
||||
|
||||
bip->bip_bio = bio;
|
||||
@ -91,6 +91,47 @@ err:
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_alloc);
|
||||
|
||||
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
|
||||
bool dirty)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_vecs; i++) {
|
||||
if (dirty && !PageCompound(bv[i].bv_page))
|
||||
set_page_dirty_lock(bv[i].bv_page);
|
||||
unpin_user_page(bv[i].bv_page);
|
||||
}
|
||||
}
|
||||
|
||||
static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
|
||||
{
|
||||
unsigned short nr_vecs = bip->bip_max_vcnt - 1;
|
||||
struct bio_vec *copy = &bip->bip_vec[1];
|
||||
size_t bytes = bip->bip_iter.bi_size;
|
||||
struct iov_iter iter;
|
||||
int ret;
|
||||
|
||||
iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
|
||||
ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
|
||||
WARN_ON_ONCE(ret != bytes);
|
||||
|
||||
bio_integrity_unpin_bvec(copy, nr_vecs, true);
|
||||
}
|
||||
|
||||
static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
|
||||
{
|
||||
bool dirty = bio_data_dir(bip->bip_bio) == READ;
|
||||
|
||||
if (bip->bip_flags & BIP_COPY_USER) {
|
||||
if (dirty)
|
||||
bio_integrity_uncopy_user(bip);
|
||||
kfree(bvec_virt(bip->bip_vec));
|
||||
return;
|
||||
}
|
||||
|
||||
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, dirty);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_integrity_free - Free bio integrity payload
|
||||
* @bio: bio containing bip to be freed
|
||||
@ -105,6 +146,8 @@ void bio_integrity_free(struct bio *bio)
|
||||
|
||||
if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
|
||||
kfree(bvec_virt(bip->bip_vec));
|
||||
else if (bip->bip_flags & BIP_INTEGRITY_USER)
|
||||
bio_integrity_unmap_user(bip);
|
||||
|
||||
__bio_integrity_free(bs, bip);
|
||||
bio->bi_integrity = NULL;
|
||||
@ -160,6 +203,177 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_add_page);
|
||||
|
||||
static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
|
||||
int nr_vecs, unsigned int len,
|
||||
unsigned int direction, u32 seed)
|
||||
{
|
||||
bool write = direction == ITER_SOURCE;
|
||||
struct bio_integrity_payload *bip;
|
||||
struct iov_iter iter;
|
||||
void *buf;
|
||||
int ret;
|
||||
|
||||
buf = kmalloc(len, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
if (write) {
|
||||
iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
|
||||
if (!copy_from_iter_full(buf, len, &iter)) {
|
||||
ret = -EFAULT;
|
||||
goto free_buf;
|
||||
}
|
||||
|
||||
bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
|
||||
} else {
|
||||
memset(buf, 0, len);
|
||||
|
||||
/*
|
||||
* We need to preserve the original bvec and the number of vecs
|
||||
* in it for completion handling
|
||||
*/
|
||||
bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
|
||||
}
|
||||
|
||||
if (IS_ERR(bip)) {
|
||||
ret = PTR_ERR(bip);
|
||||
goto free_buf;
|
||||
}
|
||||
|
||||
if (write)
|
||||
bio_integrity_unpin_bvec(bvec, nr_vecs, false);
|
||||
else
|
||||
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
|
||||
|
||||
ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
|
||||
offset_in_page(buf));
|
||||
if (ret != len) {
|
||||
ret = -ENOMEM;
|
||||
goto free_bip;
|
||||
}
|
||||
|
||||
bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
|
||||
bip->bip_iter.bi_sector = seed;
|
||||
return 0;
|
||||
free_bip:
|
||||
bio_integrity_free(bio);
|
||||
free_buf:
|
||||
kfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
|
||||
int nr_vecs, unsigned int len, u32 seed)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
|
||||
bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
|
||||
if (IS_ERR(bip))
|
||||
return PTR_ERR(bip);
|
||||
|
||||
memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
|
||||
bip->bip_flags |= BIP_INTEGRITY_USER;
|
||||
bip->bip_iter.bi_sector = seed;
|
||||
bip->bip_iter.bi_size = len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
|
||||
int nr_vecs, ssize_t bytes, ssize_t offset)
|
||||
{
|
||||
unsigned int nr_bvecs = 0;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < nr_vecs; i = j) {
|
||||
size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
|
||||
struct folio *folio = page_folio(pages[i]);
|
||||
|
||||
bytes -= size;
|
||||
for (j = i + 1; j < nr_vecs; j++) {
|
||||
size_t next = min_t(size_t, PAGE_SIZE, bytes);
|
||||
|
||||
if (page_folio(pages[j]) != folio ||
|
||||
pages[j] != pages[j - 1] + 1)
|
||||
break;
|
||||
unpin_user_page(pages[j]);
|
||||
size += next;
|
||||
bytes -= next;
|
||||
}
|
||||
|
||||
bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
|
||||
offset = 0;
|
||||
nr_bvecs++;
|
||||
}
|
||||
|
||||
return nr_bvecs;
|
||||
}
|
||||
|
||||
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
|
||||
u32 seed)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
|
||||
struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
|
||||
struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
|
||||
unsigned int direction, nr_bvecs;
|
||||
struct iov_iter iter;
|
||||
int ret, nr_vecs;
|
||||
size_t offset;
|
||||
bool copy;
|
||||
|
||||
if (bio_integrity(bio))
|
||||
return -EINVAL;
|
||||
if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
|
||||
return -E2BIG;
|
||||
|
||||
if (bio_data_dir(bio) == READ)
|
||||
direction = ITER_DEST;
|
||||
else
|
||||
direction = ITER_SOURCE;
|
||||
|
||||
iov_iter_ubuf(&iter, direction, ubuf, bytes);
|
||||
nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
|
||||
if (nr_vecs > BIO_MAX_VECS)
|
||||
return -E2BIG;
|
||||
if (nr_vecs > UIO_FASTIOV) {
|
||||
bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
|
||||
if (!bvec)
|
||||
return -ENOMEM;
|
||||
pages = NULL;
|
||||
}
|
||||
|
||||
copy = !iov_iter_is_aligned(&iter, align, align);
|
||||
ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
|
||||
if (unlikely(ret < 0))
|
||||
goto free_bvec;
|
||||
|
||||
nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
|
||||
if (pages != stack_pages)
|
||||
kvfree(pages);
|
||||
if (nr_bvecs > queue_max_integrity_segments(q))
|
||||
copy = true;
|
||||
|
||||
if (copy)
|
||||
ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
|
||||
direction, seed);
|
||||
else
|
||||
ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
|
||||
if (ret)
|
||||
goto release_pages;
|
||||
if (bvec != stack_vec)
|
||||
kfree(bvec);
|
||||
|
||||
return 0;
|
||||
|
||||
release_pages:
|
||||
bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
|
||||
free_bvec:
|
||||
if (bvec != stack_vec)
|
||||
kfree(bvec);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_integrity_map_user);
|
||||
|
||||
/**
|
||||
* bio_integrity_process - Process integrity metadata for a bio
|
||||
* @bio: bio to generate/verify integrity metadata for
|
||||
|
@ -324,6 +324,8 @@ enum bip_flags {
|
||||
BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
|
||||
BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
|
||||
BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
|
||||
BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */
|
||||
BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -718,6 +720,7 @@ static inline bool bioset_initialized(struct bio_set *bs)
|
||||
for_each_bio(_bio) \
|
||||
bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
|
||||
|
||||
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
|
||||
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
|
||||
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
|
||||
extern bool bio_integrity_prep(struct bio *);
|
||||
@ -789,6 +792,12 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
|
||||
ssize_t len, u32 seed)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user