Merge branch 'for-3.20/core' of git://git.kernel.dk/linux-block
Pull core block IO changes from Jens Axboe: "This contains: - A series from Christoph that cleans up and refactors various parts of the REQ_BLOCK_PC handling. Contributions in that series from Dongsu Park and Kent Overstreet as well. - CFQ: - A bug fix for cfq for realtime IO scheduling from Jeff Moyer. - A stable patch fixing a potential crash in CFQ in OOM situations. From Konstantin Khlebnikov. - blk-mq: - Add support for tag allocation policies, from Shaohua. This is a prep patch enabling libata (and other SCSI parts) to use the blk-mq tagging, instead of rolling their own. - Various little tweaks from Keith and Mike, in preparation for DM blk-mq support. - Minor little fixes or tweaks from me. - A double free error fix from Tony Battersby. - The partition 4k issue fixes from Matthew and Boaz. - Add support for zero+unprovision for blkdev_issue_zeroout() from Martin" * 'for-3.20/core' of git://git.kernel.dk/linux-block: (27 commits) block: remove unused function blk_bio_map_sg block: handle the null_mapped flag correctly in blk_rq_map_user_iov blk-mq: fix double-free in error path block: prevent request-to-request merging with gaps if not allowed blk-mq: make blk_mq_run_queues() static dm: fix multipath regression due to initializing wrong request cfq-iosched: handle failure of cfq group allocation block: Quiesce zeroout wrapper block: rewrite and split __bio_copy_iov() block: merge __bio_map_user_iov into bio_map_user_iov block: merge __bio_map_kern into bio_map_kern block: pass iov_iter to the BLOCK_PC mapping functions block: add a helper to free bio bounce buffer pages block: use blk_rq_map_user_iov to implement blk_rq_map_user block: simplify bio_map_kern block: mark blk-mq devices as stackable block: keep established cmd_flags when cloning into a blk-mq request block: add blk-mq support to blk_insert_cloned_request() block: require blk_rq_prep_clone() be given an initialized clone request blk-mq: add tag allocation policy ...
This commit is contained in:
commit
3e12cefbe1
@ -28,12 +28,15 @@ Implementation
|
||||
Execute-in-place is implemented in three steps: block device operation,
|
||||
address space operation, and file operations.
|
||||
|
||||
A block device operation named direct_access is used to retrieve a
|
||||
reference (pointer) to a block on-disk. The reference is supposed to be
|
||||
cpu-addressable, physical address and remain valid until the release operation
|
||||
is performed. A struct block_device reference is used to address the device,
|
||||
and a sector_t argument is used to identify the individual block. As an
|
||||
alternative, memory technology devices can be used for this.
|
||||
A block device operation named direct_access is used to translate the
|
||||
block device sector number to a page frame number (pfn) that identifies
|
||||
the physical page for the memory. It also returns a kernel virtual
|
||||
address that can be used to access the memory.
|
||||
|
||||
The direct_access method takes a 'size' parameter that indicates the
|
||||
number of bytes being requested. The function should return the number
|
||||
of bytes that can be contiguously accessed at that offset. It may also
|
||||
return a negative errno if an error occurs.
|
||||
|
||||
The block device operation is optional, these block devices support it as of
|
||||
today:
|
||||
|
@ -139,26 +139,17 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
|
||||
* axon_ram_direct_access - direct_access() method for block device
|
||||
* @device, @sector, @data: see block_device_operations method
|
||||
*/
|
||||
static int
|
||||
static long
|
||||
axon_ram_direct_access(struct block_device *device, sector_t sector,
|
||||
void **kaddr, unsigned long *pfn)
|
||||
void **kaddr, unsigned long *pfn, long size)
|
||||
{
|
||||
struct axon_ram_bank *bank = device->bd_disk->private_data;
|
||||
loff_t offset;
|
||||
|
||||
offset = sector;
|
||||
if (device->bd_part != NULL)
|
||||
offset += device->bd_part->start_sect;
|
||||
offset <<= AXON_RAM_SECTOR_SHIFT;
|
||||
if (offset >= bank->size) {
|
||||
dev_err(&bank->device->dev, "Access outside of address space\n");
|
||||
return -ERANGE;
|
||||
}
|
||||
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
|
||||
|
||||
*kaddr = (void *)(bank->ph_addr + offset);
|
||||
*pfn = virt_to_phys(kaddr) >> PAGE_SHIFT;
|
||||
|
||||
return 0;
|
||||
return bank->size - offset;
|
||||
}
|
||||
|
||||
static const struct block_device_operations axon_ram_devops = {
|
||||
|
440
block/bio.c
440
block/bio.c
@ -28,7 +28,6 @@
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <scsi/sg.h> /* for struct sg_iovec */
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@ -1022,21 +1021,11 @@ void bio_copy_data(struct bio *dst, struct bio *src)
|
||||
EXPORT_SYMBOL(bio_copy_data);
|
||||
|
||||
struct bio_map_data {
|
||||
int nr_sgvecs;
|
||||
int is_our_pages;
|
||||
struct sg_iovec sgvecs[];
|
||||
struct iov_iter iter;
|
||||
struct iovec iov[];
|
||||
};
|
||||
|
||||
static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
|
||||
const struct sg_iovec *iov, int iov_count,
|
||||
int is_our_pages)
|
||||
{
|
||||
memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
|
||||
bmd->nr_sgvecs = iov_count;
|
||||
bmd->is_our_pages = is_our_pages;
|
||||
bio->bi_private = bmd;
|
||||
}
|
||||
|
||||
static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
@ -1044,85 +1033,101 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
|
||||
return NULL;
|
||||
|
||||
return kmalloc(sizeof(struct bio_map_data) +
|
||||
sizeof(struct sg_iovec) * iov_count, gfp_mask);
|
||||
sizeof(struct iovec) * iov_count, gfp_mask);
|
||||
}
|
||||
|
||||
static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
|
||||
int to_user, int from_user, int do_free_page)
|
||||
/**
|
||||
* bio_copy_from_iter - copy all pages from iov_iter to bio
|
||||
* @bio: The &struct bio which describes the I/O as destination
|
||||
* @iter: iov_iter as source
|
||||
*
|
||||
* Copy all pages from iov_iter to bio.
|
||||
* Returns 0 on success, or error on failure.
|
||||
*/
|
||||
static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
|
||||
{
|
||||
int ret = 0, i;
|
||||
int i;
|
||||
struct bio_vec *bvec;
|
||||
int iov_idx = 0;
|
||||
unsigned int iov_off = 0;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
char *bv_addr = page_address(bvec->bv_page);
|
||||
unsigned int bv_len = bvec->bv_len;
|
||||
ssize_t ret;
|
||||
|
||||
while (bv_len && iov_idx < iov_count) {
|
||||
unsigned int bytes;
|
||||
char __user *iov_addr;
|
||||
ret = copy_page_from_iter(bvec->bv_page,
|
||||
bvec->bv_offset,
|
||||
bvec->bv_len,
|
||||
&iter);
|
||||
|
||||
bytes = min_t(unsigned int,
|
||||
iov[iov_idx].iov_len - iov_off, bv_len);
|
||||
iov_addr = iov[iov_idx].iov_base + iov_off;
|
||||
if (!iov_iter_count(&iter))
|
||||
break;
|
||||
|
||||
if (!ret) {
|
||||
if (to_user)
|
||||
ret = copy_to_user(iov_addr, bv_addr,
|
||||
bytes);
|
||||
|
||||
if (from_user)
|
||||
ret = copy_from_user(bv_addr, iov_addr,
|
||||
bytes);
|
||||
|
||||
if (ret)
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
bv_len -= bytes;
|
||||
bv_addr += bytes;
|
||||
iov_addr += bytes;
|
||||
iov_off += bytes;
|
||||
|
||||
if (iov[iov_idx].iov_len == iov_off) {
|
||||
iov_idx++;
|
||||
iov_off = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_free_page)
|
||||
__free_page(bvec->bv_page);
|
||||
if (ret < bvec->bv_len)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_copy_to_iter - copy all pages from bio to iov_iter
|
||||
* @bio: The &struct bio which describes the I/O as source
|
||||
* @iter: iov_iter as destination
|
||||
*
|
||||
* Copy all pages from bio to iov_iter.
|
||||
* Returns 0 on success, or error on failure.
|
||||
*/
|
||||
static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
|
||||
{
|
||||
int i;
|
||||
struct bio_vec *bvec;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
ssize_t ret;
|
||||
|
||||
ret = copy_page_to_iter(bvec->bv_page,
|
||||
bvec->bv_offset,
|
||||
bvec->bv_len,
|
||||
&iter);
|
||||
|
||||
if (!iov_iter_count(&iter))
|
||||
break;
|
||||
|
||||
if (ret < bvec->bv_len)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bio_free_pages(struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bvec;
|
||||
int i;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i)
|
||||
__free_page(bvec->bv_page);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_uncopy_user - finish previously mapped bio
|
||||
* @bio: bio being terminated
|
||||
*
|
||||
* Free pages allocated from bio_copy_user() and write back data
|
||||
* Free pages allocated from bio_copy_user_iov() and write back data
|
||||
* to user space in case of a read.
|
||||
*/
|
||||
int bio_uncopy_user(struct bio *bio)
|
||||
{
|
||||
struct bio_map_data *bmd = bio->bi_private;
|
||||
struct bio_vec *bvec;
|
||||
int ret = 0, i;
|
||||
int ret = 0;
|
||||
|
||||
if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
|
||||
/*
|
||||
* if we're in a workqueue, the request is orphaned, so
|
||||
* don't copy into a random user address space, just free.
|
||||
*/
|
||||
if (current->mm)
|
||||
ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
|
||||
bio_data_dir(bio) == READ,
|
||||
0, bmd->is_our_pages);
|
||||
else if (bmd->is_our_pages)
|
||||
bio_for_each_segment_all(bvec, bio, i)
|
||||
__free_page(bvec->bv_page);
|
||||
if (current->mm && bio_data_dir(bio) == READ)
|
||||
ret = bio_copy_to_iter(bio, bmd->iter);
|
||||
if (bmd->is_our_pages)
|
||||
bio_free_pages(bio);
|
||||
}
|
||||
kfree(bmd);
|
||||
bio_put(bio);
|
||||
@ -1132,12 +1137,10 @@ EXPORT_SYMBOL(bio_uncopy_user);
|
||||
|
||||
/**
|
||||
* bio_copy_user_iov - copy user data to bio
|
||||
* @q: destination block queue
|
||||
* @map_data: pointer to the rq_map_data holding pages (if necessary)
|
||||
* @iov: the iovec.
|
||||
* @iov_count: number of elements in the iovec
|
||||
* @write_to_vm: bool indicating writing to pages or not
|
||||
* @gfp_mask: memory allocation flags
|
||||
* @q: destination block queue
|
||||
* @map_data: pointer to the rq_map_data holding pages (if necessary)
|
||||
* @iter: iovec iterator
|
||||
* @gfp_mask: memory allocation flags
|
||||
*
|
||||
* Prepares and returns a bio for indirect user io, bouncing data
|
||||
* to/from kernel pages as necessary. Must be paired with
|
||||
@ -1145,25 +1148,25 @@ EXPORT_SYMBOL(bio_uncopy_user);
|
||||
*/
|
||||
struct bio *bio_copy_user_iov(struct request_queue *q,
|
||||
struct rq_map_data *map_data,
|
||||
const struct sg_iovec *iov, int iov_count,
|
||||
int write_to_vm, gfp_t gfp_mask)
|
||||
const struct iov_iter *iter,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct bio_map_data *bmd;
|
||||
struct bio_vec *bvec;
|
||||
struct page *page;
|
||||
struct bio *bio;
|
||||
int i, ret;
|
||||
int nr_pages = 0;
|
||||
unsigned int len = 0;
|
||||
unsigned int len = iter->count;
|
||||
unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
|
||||
|
||||
for (i = 0; i < iov_count; i++) {
|
||||
for (i = 0; i < iter->nr_segs; i++) {
|
||||
unsigned long uaddr;
|
||||
unsigned long end;
|
||||
unsigned long start;
|
||||
|
||||
uaddr = (unsigned long)iov[i].iov_base;
|
||||
end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
uaddr = (unsigned long) iter->iov[i].iov_base;
|
||||
end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
|
||||
>> PAGE_SHIFT;
|
||||
start = uaddr >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
@ -1173,22 +1176,31 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
nr_pages += end - start;
|
||||
len += iov[i].iov_len;
|
||||
}
|
||||
|
||||
if (offset)
|
||||
nr_pages++;
|
||||
|
||||
bmd = bio_alloc_map_data(iov_count, gfp_mask);
|
||||
bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
|
||||
if (!bmd)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/*
|
||||
* We need to do a deep copy of the iov_iter including the iovecs.
|
||||
* The caller provided iov might point to an on-stack or otherwise
|
||||
* shortlived one.
|
||||
*/
|
||||
bmd->is_our_pages = map_data ? 0 : 1;
|
||||
memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
|
||||
iov_iter_init(&bmd->iter, iter->type, bmd->iov,
|
||||
iter->nr_segs, iter->count);
|
||||
|
||||
ret = -ENOMEM;
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
if (!bio)
|
||||
goto out_bmd;
|
||||
|
||||
if (!write_to_vm)
|
||||
if (iter->type & WRITE)
|
||||
bio->bi_rw |= REQ_WRITE;
|
||||
|
||||
ret = 0;
|
||||
@ -1236,20 +1248,18 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
|
||||
/*
|
||||
* success
|
||||
*/
|
||||
if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
|
||||
if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
|
||||
(map_data && map_data->from_user)) {
|
||||
ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
|
||||
ret = bio_copy_from_iter(bio, *iter);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
|
||||
bio->bi_private = bmd;
|
||||
return bio;
|
||||
cleanup:
|
||||
if (!map_data)
|
||||
bio_for_each_segment_all(bvec, bio, i)
|
||||
__free_page(bvec->bv_page);
|
||||
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
out_bmd:
|
||||
kfree(bmd);
|
||||
@ -1257,46 +1267,30 @@ out_bmd:
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_copy_user - copy user data to bio
|
||||
* @q: destination block queue
|
||||
* @map_data: pointer to the rq_map_data holding pages (if necessary)
|
||||
* @uaddr: start of user address
|
||||
* @len: length in bytes
|
||||
* @write_to_vm: bool indicating writing to pages or not
|
||||
* @gfp_mask: memory allocation flags
|
||||
* bio_map_user_iov - map user iovec into bio
|
||||
* @q: the struct request_queue for the bio
|
||||
* @iter: iovec iterator
|
||||
* @gfp_mask: memory allocation flags
|
||||
*
|
||||
* Prepares and returns a bio for indirect user io, bouncing data
|
||||
* to/from kernel pages as necessary. Must be paired with
|
||||
* call bio_uncopy_user() on io completion.
|
||||
* Map the user space address into a bio suitable for io to a block
|
||||
* device. Returns an error pointer in case of error.
|
||||
*/
|
||||
struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
|
||||
unsigned long uaddr, unsigned int len,
|
||||
int write_to_vm, gfp_t gfp_mask)
|
||||
struct bio *bio_map_user_iov(struct request_queue *q,
|
||||
const struct iov_iter *iter,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct sg_iovec iov;
|
||||
|
||||
iov.iov_base = (void __user *)uaddr;
|
||||
iov.iov_len = len;
|
||||
|
||||
return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_copy_user);
|
||||
|
||||
static struct bio *__bio_map_user_iov(struct request_queue *q,
|
||||
struct block_device *bdev,
|
||||
const struct sg_iovec *iov, int iov_count,
|
||||
int write_to_vm, gfp_t gfp_mask)
|
||||
{
|
||||
int i, j;
|
||||
int j;
|
||||
int nr_pages = 0;
|
||||
struct page **pages;
|
||||
struct bio *bio;
|
||||
int cur_page = 0;
|
||||
int ret, offset;
|
||||
struct iov_iter i;
|
||||
struct iovec iov;
|
||||
|
||||
for (i = 0; i < iov_count; i++) {
|
||||
unsigned long uaddr = (unsigned long)iov[i].iov_base;
|
||||
unsigned long len = iov[i].iov_len;
|
||||
iov_for_each(iov, i, *iter) {
|
||||
unsigned long uaddr = (unsigned long) iov.iov_base;
|
||||
unsigned long len = iov.iov_len;
|
||||
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
unsigned long start = uaddr >> PAGE_SHIFT;
|
||||
|
||||
@ -1326,16 +1320,17 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
|
||||
if (!pages)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < iov_count; i++) {
|
||||
unsigned long uaddr = (unsigned long)iov[i].iov_base;
|
||||
unsigned long len = iov[i].iov_len;
|
||||
iov_for_each(iov, i, *iter) {
|
||||
unsigned long uaddr = (unsigned long) iov.iov_base;
|
||||
unsigned long len = iov.iov_len;
|
||||
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
unsigned long start = uaddr >> PAGE_SHIFT;
|
||||
const int local_nr_pages = end - start;
|
||||
const int page_limit = cur_page + local_nr_pages;
|
||||
|
||||
ret = get_user_pages_fast(uaddr, local_nr_pages,
|
||||
write_to_vm, &pages[cur_page]);
|
||||
(iter->type & WRITE) != WRITE,
|
||||
&pages[cur_page]);
|
||||
if (ret < local_nr_pages) {
|
||||
ret = -EFAULT;
|
||||
goto out_unmap;
|
||||
@ -1375,72 +1370,10 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
|
||||
/*
|
||||
* set data direction, and check if mapped pages need bouncing
|
||||
*/
|
||||
if (!write_to_vm)
|
||||
if (iter->type & WRITE)
|
||||
bio->bi_rw |= REQ_WRITE;
|
||||
|
||||
bio->bi_bdev = bdev;
|
||||
bio->bi_flags |= (1 << BIO_USER_MAPPED);
|
||||
return bio;
|
||||
|
||||
out_unmap:
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
if(!pages[i])
|
||||
break;
|
||||
page_cache_release(pages[i]);
|
||||
}
|
||||
out:
|
||||
kfree(pages);
|
||||
bio_put(bio);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_map_user - map user address into bio
|
||||
* @q: the struct request_queue for the bio
|
||||
* @bdev: destination block device
|
||||
* @uaddr: start of user address
|
||||
* @len: length in bytes
|
||||
* @write_to_vm: bool indicating writing to pages or not
|
||||
* @gfp_mask: memory allocation flags
|
||||
*
|
||||
* Map the user space address into a bio suitable for io to a block
|
||||
* device. Returns an error pointer in case of error.
|
||||
*/
|
||||
struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
|
||||
unsigned long uaddr, unsigned int len, int write_to_vm,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct sg_iovec iov;
|
||||
|
||||
iov.iov_base = (void __user *)uaddr;
|
||||
iov.iov_len = len;
|
||||
|
||||
return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_map_user);
|
||||
|
||||
/**
|
||||
* bio_map_user_iov - map user sg_iovec table into bio
|
||||
* @q: the struct request_queue for the bio
|
||||
* @bdev: destination block device
|
||||
* @iov: the iovec.
|
||||
* @iov_count: number of elements in the iovec
|
||||
* @write_to_vm: bool indicating writing to pages or not
|
||||
* @gfp_mask: memory allocation flags
|
||||
*
|
||||
* Map the user space address into a bio suitable for io to a block
|
||||
* device. Returns an error pointer in case of error.
|
||||
*/
|
||||
struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
|
||||
const struct sg_iovec *iov, int iov_count,
|
||||
int write_to_vm, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
|
||||
gfp_mask);
|
||||
if (IS_ERR(bio))
|
||||
return bio;
|
||||
|
||||
/*
|
||||
* subtle -- if __bio_map_user() ended up bouncing a bio,
|
||||
@ -1449,8 +1382,18 @@ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
|
||||
* reference to it
|
||||
*/
|
||||
bio_get(bio);
|
||||
|
||||
return bio;
|
||||
|
||||
out_unmap:
|
||||
for (j = 0; j < nr_pages; j++) {
|
||||
if (!pages[j])
|
||||
break;
|
||||
page_cache_release(pages[j]);
|
||||
}
|
||||
out:
|
||||
kfree(pages);
|
||||
bio_put(bio);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void __bio_unmap_user(struct bio *bio)
|
||||
@ -1492,8 +1435,18 @@ static void bio_map_kern_endio(struct bio *bio, int err)
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static struct bio *__bio_map_kern(struct request_queue *q, void *data,
|
||||
unsigned int len, gfp_t gfp_mask)
|
||||
/**
|
||||
* bio_map_kern - map kernel address into bio
|
||||
* @q: the struct request_queue for the bio
|
||||
* @data: pointer to buffer to map
|
||||
* @len: length in bytes
|
||||
* @gfp_mask: allocation flags for bio allocation
|
||||
*
|
||||
* Map the kernel address into a bio suitable for io to a block
|
||||
* device. Returns an error pointer in case of error.
|
||||
*/
|
||||
struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
unsigned long kaddr = (unsigned long)data;
|
||||
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
@ -1517,8 +1470,11 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
|
||||
bytes = len;
|
||||
|
||||
if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
|
||||
offset) < bytes)
|
||||
break;
|
||||
offset) < bytes) {
|
||||
/* we don't support partial mappings */
|
||||
bio_put(bio);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
data += bytes;
|
||||
len -= bytes;
|
||||
@ -1528,57 +1484,26 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
|
||||
bio->bi_end_io = bio_map_kern_endio;
|
||||
return bio;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_map_kern - map kernel address into bio
|
||||
* @q: the struct request_queue for the bio
|
||||
* @data: pointer to buffer to map
|
||||
* @len: length in bytes
|
||||
* @gfp_mask: allocation flags for bio allocation
|
||||
*
|
||||
* Map the kernel address into a bio suitable for io to a block
|
||||
* device. Returns an error pointer in case of error.
|
||||
*/
|
||||
struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = __bio_map_kern(q, data, len, gfp_mask);
|
||||
if (IS_ERR(bio))
|
||||
return bio;
|
||||
|
||||
if (bio->bi_iter.bi_size == len)
|
||||
return bio;
|
||||
|
||||
/*
|
||||
* Don't support partial mappings.
|
||||
*/
|
||||
bio_put(bio);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_map_kern);
|
||||
|
||||
static void bio_copy_kern_endio(struct bio *bio, int err)
|
||||
{
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void bio_copy_kern_endio_read(struct bio *bio, int err)
|
||||
{
|
||||
char *p = bio->bi_private;
|
||||
struct bio_vec *bvec;
|
||||
const int read = bio_data_dir(bio) == READ;
|
||||
struct bio_map_data *bmd = bio->bi_private;
|
||||
int i;
|
||||
char *p = bmd->sgvecs[0].iov_base;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
char *addr = page_address(bvec->bv_page);
|
||||
|
||||
if (read)
|
||||
memcpy(p, addr, bvec->bv_len);
|
||||
|
||||
__free_page(bvec->bv_page);
|
||||
memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
|
||||
p += bvec->bv_len;
|
||||
}
|
||||
|
||||
kfree(bmd);
|
||||
bio_put(bio);
|
||||
bio_copy_kern_endio(bio, err);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1595,28 +1520,59 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
|
||||
struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
gfp_t gfp_mask, int reading)
|
||||
{
|
||||
unsigned long kaddr = (unsigned long)data;
|
||||
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
unsigned long start = kaddr >> PAGE_SHIFT;
|
||||
struct bio *bio;
|
||||
struct bio_vec *bvec;
|
||||
int i;
|
||||
void *p = data;
|
||||
int nr_pages = 0;
|
||||
|
||||
bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
|
||||
if (IS_ERR(bio))
|
||||
return bio;
|
||||
/*
|
||||
* Overflow, abort
|
||||
*/
|
||||
if (end < start)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (!reading) {
|
||||
void *p = data;
|
||||
nr_pages = end - start;
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
if (!bio)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
char *addr = page_address(bvec->bv_page);
|
||||
while (len) {
|
||||
struct page *page;
|
||||
unsigned int bytes = PAGE_SIZE;
|
||||
|
||||
memcpy(addr, p, bvec->bv_len);
|
||||
p += bvec->bv_len;
|
||||
}
|
||||
if (bytes > len)
|
||||
bytes = len;
|
||||
|
||||
page = alloc_page(q->bounce_gfp | gfp_mask);
|
||||
if (!page)
|
||||
goto cleanup;
|
||||
|
||||
if (!reading)
|
||||
memcpy(page_address(page), p, bytes);
|
||||
|
||||
if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
|
||||
break;
|
||||
|
||||
len -= bytes;
|
||||
p += bytes;
|
||||
}
|
||||
|
||||
bio->bi_end_io = bio_copy_kern_endio;
|
||||
if (reading) {
|
||||
bio->bi_end_io = bio_copy_kern_endio_read;
|
||||
bio->bi_private = data;
|
||||
} else {
|
||||
bio->bi_end_io = bio_copy_kern_endio;
|
||||
bio->bi_rw |= REQ_WRITE;
|
||||
}
|
||||
|
||||
return bio;
|
||||
|
||||
cleanup:
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_copy_kern);
|
||||
|
||||
|
@ -2048,6 +2048,13 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
|
||||
should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
|
||||
return -EIO;
|
||||
|
||||
if (q->mq_ops) {
|
||||
if (blk_queue_io_stat(q))
|
||||
blk_account_io_start(rq, true);
|
||||
blk_mq_insert_request(rq, false, true, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
if (unlikely(blk_queue_dying(q))) {
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
@ -2907,7 +2914,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
|
||||
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
|
||||
{
|
||||
dst->cpu = src->cpu;
|
||||
dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
|
||||
dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
|
||||
dst->cmd_type = src->cmd_type;
|
||||
dst->__sector = blk_rq_pos(src);
|
||||
dst->__data_len = blk_rq_bytes(src);
|
||||
@ -2945,8 +2952,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
||||
if (!bs)
|
||||
bs = fs_bio_set;
|
||||
|
||||
blk_rq_init(NULL, rq);
|
||||
|
||||
__rq_for_each_bio(bio_src, rq_src) {
|
||||
bio = bio_clone_fast(bio_src, gfp_mask, bs);
|
||||
if (!bio)
|
||||
|
@ -283,24 +283,34 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
* @sector: start sector
|
||||
* @nr_sects: number of sectors to write
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
* @discard: whether to discard the block range
|
||||
*
|
||||
* Description:
|
||||
* Generate and issue number of bios with zerofiled pages.
|
||||
* Zero-fill a block range. If the discard flag is set and the block
|
||||
* device guarantees that subsequent READ operations to the block range
|
||||
* in question will return zeroes, the blocks will be discarded. Should
|
||||
* the discard request fail, if the discard flag is not set, or if
|
||||
* discard_zeroes_data is not supported, this function will resort to
|
||||
* zeroing the blocks manually, thus provisioning (allocating,
|
||||
* anchoring) them. If the block device supports the WRITE SAME command
|
||||
* blkdev_issue_zeroout() will use it to optimize the process of
|
||||
* clearing the block range. Otherwise the zeroing will be performed
|
||||
* using regular WRITE calls.
|
||||
*/
|
||||
|
||||
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask)
|
||||
sector_t nr_sects, gfp_t gfp_mask, bool discard)
|
||||
{
|
||||
if (bdev_write_same(bdev)) {
|
||||
unsigned char bdn[BDEVNAME_SIZE];
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
|
||||
ZERO_PAGE(0)))
|
||||
return 0;
|
||||
if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data &&
|
||||
blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0)
|
||||
return 0;
|
||||
|
||||
bdevname(bdev, bdn);
|
||||
pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
|
||||
}
|
||||
if (bdev_write_same(bdev) &&
|
||||
blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
|
||||
ZERO_PAGE(0)) == 0)
|
||||
return 0;
|
||||
|
||||
return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
|
||||
}
|
||||
|
172
block/blk-map.c
172
block/blk-map.c
@ -5,7 +5,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <scsi/sg.h> /* for struct sg_iovec */
|
||||
#include <linux/uio.h>
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
@ -39,138 +39,12 @@ static int __blk_rq_unmap_user(struct bio *bio)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
|
||||
struct rq_map_data *map_data, void __user *ubuf,
|
||||
unsigned int len, gfp_t gfp_mask)
|
||||
{
|
||||
unsigned long uaddr;
|
||||
struct bio *bio, *orig_bio;
|
||||
int reading, ret;
|
||||
|
||||
reading = rq_data_dir(rq) == READ;
|
||||
|
||||
/*
|
||||
* if alignment requirement is satisfied, map in user pages for
|
||||
* direct dma. else, set up kernel bounce buffers
|
||||
*/
|
||||
uaddr = (unsigned long) ubuf;
|
||||
if (blk_rq_aligned(q, uaddr, len) && !map_data)
|
||||
bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
|
||||
else
|
||||
bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
|
||||
|
||||
if (IS_ERR(bio))
|
||||
return PTR_ERR(bio);
|
||||
|
||||
if (map_data && map_data->null_mapped)
|
||||
bio->bi_flags |= (1 << BIO_NULL_MAPPED);
|
||||
|
||||
orig_bio = bio;
|
||||
blk_queue_bounce(q, &bio);
|
||||
|
||||
/*
|
||||
* We link the bounce buffer in and could have to traverse it
|
||||
* later so we have to get a ref to prevent it from being freed
|
||||
*/
|
||||
bio_get(bio);
|
||||
|
||||
ret = blk_rq_append_bio(q, rq, bio);
|
||||
if (!ret)
|
||||
return bio->bi_iter.bi_size;
|
||||
|
||||
/* if it was boucned we must call the end io function */
|
||||
bio_endio(bio, 0);
|
||||
__blk_rq_unmap_user(orig_bio);
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
|
||||
* @q: request queue where request should be inserted
|
||||
* @rq: request structure to fill
|
||||
* @map_data: pointer to the rq_map_data holding pages (if necessary)
|
||||
* @ubuf: the user buffer
|
||||
* @len: length of user data
|
||||
* @gfp_mask: memory allocation flags
|
||||
*
|
||||
* Description:
|
||||
* Data will be mapped directly for zero copy I/O, if possible. Otherwise
|
||||
* a kernel bounce buffer is used.
|
||||
*
|
||||
* A matching blk_rq_unmap_user() must be issued at the end of I/O, while
|
||||
* still in process context.
|
||||
*
|
||||
* Note: The mapped bio may need to be bounced through blk_queue_bounce()
|
||||
* before being submitted to the device, as pages mapped may be out of
|
||||
* reach. It's the callers responsibility to make sure this happens. The
|
||||
* original bio must be passed back in to blk_rq_unmap_user() for proper
|
||||
* unmapping.
|
||||
*/
|
||||
int blk_rq_map_user(struct request_queue *q, struct request *rq,
|
||||
struct rq_map_data *map_data, void __user *ubuf,
|
||||
unsigned long len, gfp_t gfp_mask)
|
||||
{
|
||||
unsigned long bytes_read = 0;
|
||||
struct bio *bio = NULL;
|
||||
int ret;
|
||||
|
||||
if (len > (queue_max_hw_sectors(q) << 9))
|
||||
return -EINVAL;
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
|
||||
if (!ubuf && (!map_data || !map_data->null_mapped))
|
||||
return -EINVAL;
|
||||
|
||||
while (bytes_read != len) {
|
||||
unsigned long map_len, end, start;
|
||||
|
||||
map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
|
||||
end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
|
||||
>> PAGE_SHIFT;
|
||||
start = (unsigned long)ubuf >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* A bad offset could cause us to require BIO_MAX_PAGES + 1
|
||||
* pages. If this happens we just lower the requested
|
||||
* mapping len by a page so that we can fit
|
||||
*/
|
||||
if (end - start > BIO_MAX_PAGES)
|
||||
map_len -= PAGE_SIZE;
|
||||
|
||||
ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
|
||||
gfp_mask);
|
||||
if (ret < 0)
|
||||
goto unmap_rq;
|
||||
if (!bio)
|
||||
bio = rq->bio;
|
||||
bytes_read += ret;
|
||||
ubuf += ret;
|
||||
|
||||
if (map_data)
|
||||
map_data->offset += ret;
|
||||
}
|
||||
|
||||
if (!bio_flagged(bio, BIO_USER_MAPPED))
|
||||
rq->cmd_flags |= REQ_COPY_USER;
|
||||
|
||||
return 0;
|
||||
unmap_rq:
|
||||
blk_rq_unmap_user(bio);
|
||||
rq->bio = NULL;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_user);
|
||||
|
||||
/**
|
||||
* blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
|
||||
* @q: request queue where request should be inserted
|
||||
* @rq: request to map data to
|
||||
* @map_data: pointer to the rq_map_data holding pages (if necessary)
|
||||
* @iov: pointer to the iovec
|
||||
* @iov_count: number of elements in the iovec
|
||||
* @len: I/O byte count
|
||||
* @iter: iovec iterator
|
||||
* @gfp_mask: memory allocation flags
|
||||
*
|
||||
* Description:
|
||||
@ -187,20 +61,21 @@ EXPORT_SYMBOL(blk_rq_map_user);
|
||||
* unmapping.
|
||||
*/
|
||||
int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
|
||||
struct rq_map_data *map_data, const struct sg_iovec *iov,
|
||||
int iov_count, unsigned int len, gfp_t gfp_mask)
|
||||
struct rq_map_data *map_data,
|
||||
const struct iov_iter *iter, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio;
|
||||
int i, read = rq_data_dir(rq) == READ;
|
||||
int unaligned = 0;
|
||||
struct iov_iter i;
|
||||
struct iovec iov;
|
||||
|
||||
if (!iov || iov_count <= 0)
|
||||
if (!iter || !iter->count)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < iov_count; i++) {
|
||||
unsigned long uaddr = (unsigned long)iov[i].iov_base;
|
||||
iov_for_each(iov, i, *iter) {
|
||||
unsigned long uaddr = (unsigned long) iov.iov_base;
|
||||
|
||||
if (!iov[i].iov_len)
|
||||
if (!iov.iov_len)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
@ -210,16 +85,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
|
||||
unaligned = 1;
|
||||
}
|
||||
|
||||
if (unaligned || (q->dma_pad_mask & len) || map_data)
|
||||
bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
|
||||
gfp_mask);
|
||||
if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
|
||||
bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
|
||||
else
|
||||
bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
|
||||
bio = bio_map_user_iov(q, iter, gfp_mask);
|
||||
|
||||
if (IS_ERR(bio))
|
||||
return PTR_ERR(bio);
|
||||
|
||||
if (bio->bi_iter.bi_size != len) {
|
||||
if (map_data && map_data->null_mapped)
|
||||
bio->bi_flags |= (1 << BIO_NULL_MAPPED);
|
||||
|
||||
if (bio->bi_iter.bi_size != iter->count) {
|
||||
/*
|
||||
* Grab an extra reference to this bio, as bio_unmap_user()
|
||||
* expects to be able to drop it twice as it happens on the
|
||||
@ -241,6 +118,21 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_user_iov);
|
||||
|
||||
int blk_rq_map_user(struct request_queue *q, struct request *rq,
|
||||
struct rq_map_data *map_data, void __user *ubuf,
|
||||
unsigned long len, gfp_t gfp_mask)
|
||||
{
|
||||
struct iovec iov;
|
||||
struct iov_iter i;
|
||||
|
||||
iov.iov_base = ubuf;
|
||||
iov.iov_len = len;
|
||||
iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len);
|
||||
|
||||
return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_user);
|
||||
|
||||
/**
|
||||
* blk_rq_unmap_user - unmap a request with user data
|
||||
* @bio: start of bio list
|
||||
|
@ -283,35 +283,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_sg);
|
||||
|
||||
/**
|
||||
* blk_bio_map_sg - map a bio to a scatterlist
|
||||
* @q: request_queue in question
|
||||
* @bio: bio being mapped
|
||||
* @sglist: scatterlist being mapped
|
||||
*
|
||||
* Note:
|
||||
* Caller must make sure sg can hold bio->bi_phys_segments entries
|
||||
*
|
||||
* Will return the number of sg entries setup
|
||||
*/
|
||||
int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
|
||||
struct scatterlist *sglist)
|
||||
{
|
||||
struct scatterlist *sg = NULL;
|
||||
int nsegs;
|
||||
struct bio *next = bio->bi_next;
|
||||
bio->bi_next = NULL;
|
||||
|
||||
nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
|
||||
bio->bi_next = next;
|
||||
if (sg)
|
||||
sg_mark_end(sg);
|
||||
|
||||
BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments);
|
||||
return nsegs;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_bio_map_sg);
|
||||
|
||||
static inline int ll_new_hw_segment(struct request_queue *q,
|
||||
struct request *req,
|
||||
struct bio *bio)
|
||||
@ -385,6 +356,14 @@ static bool req_no_special_merge(struct request *req)
|
||||
return !q->mq_ops && req->special;
|
||||
}
|
||||
|
||||
static int req_gap_to_prev(struct request *req, struct request *next)
|
||||
{
|
||||
struct bio *prev = req->biotail;
|
||||
|
||||
return bvec_gap_to_prev(&prev->bi_io_vec[prev->bi_vcnt - 1],
|
||||
next->bio->bi_io_vec[0].bv_offset);
|
||||
}
|
||||
|
||||
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
|
||||
struct request *next)
|
||||
{
|
||||
@ -399,6 +378,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
|
||||
if (req_no_special_merge(req) || req_no_special_merge(next))
|
||||
return 0;
|
||||
|
||||
if (test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags) &&
|
||||
req_gap_to_prev(req, next))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Will it become too large?
|
||||
*/
|
||||
|
@ -140,35 +140,39 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
|
||||
return atomic_read(&hctx->nr_active) < depth;
|
||||
}
|
||||
|
||||
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
|
||||
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
|
||||
bool nowrap)
|
||||
{
|
||||
int tag, org_last_tag, end;
|
||||
bool wrap = last_tag != 0;
|
||||
int tag, org_last_tag = last_tag;
|
||||
|
||||
org_last_tag = last_tag;
|
||||
end = bm->depth;
|
||||
do {
|
||||
restart:
|
||||
tag = find_next_zero_bit(&bm->word, end, last_tag);
|
||||
if (unlikely(tag >= end)) {
|
||||
while (1) {
|
||||
tag = find_next_zero_bit(&bm->word, bm->depth, last_tag);
|
||||
if (unlikely(tag >= bm->depth)) {
|
||||
/*
|
||||
* We started with an offset, start from 0 to
|
||||
* We started with an offset, and we didn't reset the
|
||||
* offset to 0 in a failure case, so start from 0 to
|
||||
* exhaust the map.
|
||||
*/
|
||||
if (wrap) {
|
||||
wrap = false;
|
||||
end = org_last_tag;
|
||||
last_tag = 0;
|
||||
goto restart;
|
||||
if (org_last_tag && last_tag && !nowrap) {
|
||||
last_tag = org_last_tag = 0;
|
||||
continue;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!test_and_set_bit(tag, &bm->word))
|
||||
break;
|
||||
|
||||
last_tag = tag + 1;
|
||||
} while (test_and_set_bit(tag, &bm->word));
|
||||
if (last_tag >= bm->depth - 1)
|
||||
last_tag = 0;
|
||||
}
|
||||
|
||||
return tag;
|
||||
}
|
||||
|
||||
#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)
|
||||
|
||||
/*
|
||||
* Straight forward bitmap tag implementation, where each bit is a tag
|
||||
* (cleared == free, and set == busy). The small twist is using per-cpu
|
||||
@ -181,7 +185,7 @@ restart:
|
||||
* until the map is exhausted.
|
||||
*/
|
||||
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
|
||||
unsigned int *tag_cache)
|
||||
unsigned int *tag_cache, struct blk_mq_tags *tags)
|
||||
{
|
||||
unsigned int last_tag, org_last_tag;
|
||||
int index, i, tag;
|
||||
@ -193,15 +197,24 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
|
||||
index = TAG_TO_INDEX(bt, last_tag);
|
||||
|
||||
for (i = 0; i < bt->map_nr; i++) {
|
||||
tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag));
|
||||
tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
|
||||
BT_ALLOC_RR(tags));
|
||||
if (tag != -1) {
|
||||
tag += (index << bt->bits_per_word);
|
||||
goto done;
|
||||
}
|
||||
|
||||
last_tag = 0;
|
||||
if (++index >= bt->map_nr)
|
||||
/*
|
||||
* Jump to next index, and reset the last tag to be the
|
||||
* first tag of that index
|
||||
*/
|
||||
index++;
|
||||
last_tag = (index << bt->bits_per_word);
|
||||
|
||||
if (index >= bt->map_nr) {
|
||||
index = 0;
|
||||
last_tag = 0;
|
||||
}
|
||||
}
|
||||
|
||||
*tag_cache = 0;
|
||||
@ -212,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
|
||||
* up using the specific cached tag.
|
||||
*/
|
||||
done:
|
||||
if (tag == org_last_tag) {
|
||||
if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
|
||||
last_tag = tag + 1;
|
||||
if (last_tag >= bt->depth - 1)
|
||||
last_tag = 0;
|
||||
@ -241,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
|
||||
static int bt_get(struct blk_mq_alloc_data *data,
|
||||
struct blk_mq_bitmap_tags *bt,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int *last_tag)
|
||||
unsigned int *last_tag, struct blk_mq_tags *tags)
|
||||
{
|
||||
struct bt_wait_state *bs;
|
||||
DEFINE_WAIT(wait);
|
||||
int tag;
|
||||
|
||||
tag = __bt_get(hctx, bt, last_tag);
|
||||
tag = __bt_get(hctx, bt, last_tag, tags);
|
||||
if (tag != -1)
|
||||
return tag;
|
||||
|
||||
@ -258,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
|
||||
do {
|
||||
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
|
||||
tag = __bt_get(hctx, bt, last_tag);
|
||||
tag = __bt_get(hctx, bt, last_tag, tags);
|
||||
if (tag != -1)
|
||||
break;
|
||||
|
||||
@ -273,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
|
||||
* Retry tag allocation after running the hardware queue,
|
||||
* as running the queue may also have found completions.
|
||||
*/
|
||||
tag = __bt_get(hctx, bt, last_tag);
|
||||
tag = __bt_get(hctx, bt, last_tag, tags);
|
||||
if (tag != -1)
|
||||
break;
|
||||
|
||||
@ -304,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
||||
int tag;
|
||||
|
||||
tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
|
||||
&data->ctx->last_tag);
|
||||
&data->ctx->last_tag, data->hctx->tags);
|
||||
if (tag >= 0)
|
||||
return tag + data->hctx->tags->nr_reserved_tags;
|
||||
|
||||
@ -320,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
|
||||
return BLK_MQ_TAG_FAIL;
|
||||
}
|
||||
|
||||
tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero);
|
||||
tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
|
||||
data->hctx->tags);
|
||||
if (tag < 0)
|
||||
return BLK_MQ_TAG_FAIL;
|
||||
|
||||
@ -392,7 +406,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
|
||||
|
||||
BUG_ON(real_tag >= tags->nr_tags);
|
||||
bt_clear_tag(&tags->bitmap_tags, real_tag);
|
||||
*last_tag = real_tag;
|
||||
if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
|
||||
*last_tag = real_tag;
|
||||
} else {
|
||||
BUG_ON(tag >= tags->nr_reserved_tags);
|
||||
bt_clear_tag(&tags->breserved_tags, tag);
|
||||
@ -509,6 +524,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
|
||||
bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
|
||||
if (!bt->bs) {
|
||||
kfree(bt->map);
|
||||
bt->map = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -529,10 +545,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
|
||||
}
|
||||
|
||||
static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
|
||||
int node)
|
||||
int node, int alloc_policy)
|
||||
{
|
||||
unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
|
||||
|
||||
tags->alloc_policy = alloc_policy;
|
||||
|
||||
if (bt_alloc(&tags->bitmap_tags, depth, node, false))
|
||||
goto enomem;
|
||||
if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
|
||||
@ -546,7 +564,8 @@ enomem:
|
||||
}
|
||||
|
||||
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
|
||||
unsigned int reserved_tags, int node)
|
||||
unsigned int reserved_tags,
|
||||
int node, int alloc_policy)
|
||||
{
|
||||
struct blk_mq_tags *tags;
|
||||
|
||||
@ -562,7 +581,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
|
||||
tags->nr_tags = total_tags;
|
||||
tags->nr_reserved_tags = reserved_tags;
|
||||
|
||||
return blk_mq_init_bitmap_tags(tags, node);
|
||||
return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
|
||||
}
|
||||
|
||||
void blk_mq_free_tags(struct blk_mq_tags *tags)
|
||||
|
@ -42,10 +42,12 @@ struct blk_mq_tags {
|
||||
|
||||
struct request **rqs;
|
||||
struct list_head page_list;
|
||||
|
||||
int alloc_policy;
|
||||
};
|
||||
|
||||
|
||||
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
|
||||
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
|
||||
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
|
||||
|
||||
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
|
@ -33,6 +33,7 @@ static DEFINE_MUTEX(all_q_mutex);
|
||||
static LIST_HEAD(all_q_list);
|
||||
|
||||
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
|
||||
static void blk_mq_run_queues(struct request_queue *q);
|
||||
|
||||
/*
|
||||
* Check if any of the ctx's have pending work in this hardware queue
|
||||
@ -117,7 +118,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
|
||||
|
||||
if (freeze) {
|
||||
percpu_ref_kill(&q->mq_usage_counter);
|
||||
blk_mq_run_queues(q, false);
|
||||
blk_mq_run_queues(q);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
|
||||
@ -136,6 +137,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
|
||||
blk_mq_freeze_queue_start(q);
|
||||
blk_mq_freeze_queue_wait(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
|
||||
|
||||
void blk_mq_unfreeze_queue(struct request_queue *q)
|
||||
{
|
||||
@ -902,7 +904,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
&hctx->run_work, 0);
|
||||
}
|
||||
|
||||
void blk_mq_run_queues(struct request_queue *q, bool async)
|
||||
static void blk_mq_run_queues(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
@ -913,10 +915,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
|
||||
test_bit(BLK_MQ_S_STOPPED, &hctx->state))
|
||||
continue;
|
||||
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_queues);
|
||||
|
||||
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
@ -954,7 +955,6 @@ void blk_mq_start_hw_queues(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_start_hw_queues);
|
||||
|
||||
|
||||
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
@ -1423,7 +1423,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
|
||||
size_t rq_size, left;
|
||||
|
||||
tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
|
||||
set->numa_node);
|
||||
set->numa_node,
|
||||
BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
|
||||
if (!tags)
|
||||
return NULL;
|
||||
|
||||
|
@ -119,7 +119,7 @@ fail:
|
||||
}
|
||||
|
||||
static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
|
||||
int depth)
|
||||
int depth, int alloc_policy)
|
||||
{
|
||||
struct blk_queue_tag *tags;
|
||||
|
||||
@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
|
||||
goto fail;
|
||||
|
||||
atomic_set(&tags->refcnt, 1);
|
||||
tags->alloc_policy = alloc_policy;
|
||||
tags->next_tag = 0;
|
||||
return tags;
|
||||
fail:
|
||||
kfree(tags);
|
||||
@ -140,10 +142,11 @@ fail:
|
||||
/**
|
||||
* blk_init_tags - initialize the tag info for an external tag map
|
||||
* @depth: the maximum queue depth supported
|
||||
* @alloc_policy: tag allocation policy
|
||||
**/
|
||||
struct blk_queue_tag *blk_init_tags(int depth)
|
||||
struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy)
|
||||
{
|
||||
return __blk_queue_init_tags(NULL, depth);
|
||||
return __blk_queue_init_tags(NULL, depth, alloc_policy);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_init_tags);
|
||||
|
||||
@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags);
|
||||
* @q: the request queue for the device
|
||||
* @depth: the maximum queue depth supported
|
||||
* @tags: the tag to use
|
||||
* @alloc_policy: tag allocation policy
|
||||
*
|
||||
* Queue lock must be held here if the function is called to resize an
|
||||
* existing map.
|
||||
**/
|
||||
int blk_queue_init_tags(struct request_queue *q, int depth,
|
||||
struct blk_queue_tag *tags)
|
||||
struct blk_queue_tag *tags, int alloc_policy)
|
||||
{
|
||||
int rc;
|
||||
|
||||
BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
|
||||
|
||||
if (!tags && !q->queue_tags) {
|
||||
tags = __blk_queue_init_tags(q, depth);
|
||||
tags = __blk_queue_init_tags(q, depth, alloc_policy);
|
||||
|
||||
if (!tags)
|
||||
return -ENOMEM;
|
||||
@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
|
||||
}
|
||||
|
||||
do {
|
||||
tag = find_first_zero_bit(bqt->tag_map, max_depth);
|
||||
if (tag >= max_depth)
|
||||
return 1;
|
||||
if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) {
|
||||
tag = find_first_zero_bit(bqt->tag_map, max_depth);
|
||||
if (tag >= max_depth)
|
||||
return 1;
|
||||
} else {
|
||||
int start = bqt->next_tag;
|
||||
int size = min_t(int, bqt->max_depth, max_depth + start);
|
||||
tag = find_next_zero_bit(bqt->tag_map, size, start);
|
||||
if (tag >= size && start + size > bqt->max_depth) {
|
||||
size = start + size - bqt->max_depth;
|
||||
tag = find_first_zero_bit(bqt->tag_map, size);
|
||||
}
|
||||
if (tag >= size)
|
||||
return 1;
|
||||
}
|
||||
|
||||
} while (test_and_set_bit_lock(tag, bqt->tag_map));
|
||||
/*
|
||||
@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
|
||||
* See blk_queue_end_tag for details.
|
||||
*/
|
||||
|
||||
bqt->next_tag = (tag + 1) % bqt->max_depth;
|
||||
rq->cmd_flags |= REQ_QUEUED;
|
||||
rq->tag = tag;
|
||||
bqt->tag_index[tag] = rq;
|
||||
|
@ -3590,6 +3590,11 @@ retry:
|
||||
|
||||
blkcg = bio_blkcg(bio);
|
||||
cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
|
||||
if (!cfqg) {
|
||||
cfqq = &cfqd->oom_cfqq;
|
||||
goto out;
|
||||
}
|
||||
|
||||
cfqq = cic_to_cfqq(cic, is_sync);
|
||||
|
||||
/*
|
||||
@ -3626,7 +3631,7 @@ retry:
|
||||
} else
|
||||
cfqq = &cfqd->oom_cfqq;
|
||||
}
|
||||
|
||||
out:
|
||||
if (new_cfqq)
|
||||
kmem_cache_free(cfq_pool, new_cfqq);
|
||||
|
||||
@ -3656,12 +3661,17 @@ static struct cfq_queue *
|
||||
cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
|
||||
struct bio *bio, gfp_t gfp_mask)
|
||||
{
|
||||
const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
|
||||
const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
|
||||
int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
|
||||
int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
|
||||
struct cfq_queue **async_cfqq = NULL;
|
||||
struct cfq_queue *cfqq = NULL;
|
||||
|
||||
if (!is_sync) {
|
||||
if (!ioprio_valid(cic->ioprio)) {
|
||||
struct task_struct *tsk = current;
|
||||
ioprio = task_nice_ioprio(tsk);
|
||||
ioprio_class = task_nice_ioclass(tsk);
|
||||
}
|
||||
async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
|
||||
cfqq = *async_cfqq;
|
||||
}
|
||||
|
@ -198,7 +198,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
|
||||
if (start + len > (i_size_read(bdev->bd_inode) >> 9))
|
||||
return -EINVAL;
|
||||
|
||||
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
|
||||
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false);
|
||||
}
|
||||
|
||||
static int put_ushort(unsigned long arg, unsigned short val)
|
||||
|
@ -184,12 +184,12 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
|
||||
if (err)
|
||||
/* The partition is unrecognized. So report I/O errors if there were any */
|
||||
res = err;
|
||||
if (!res)
|
||||
strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
|
||||
else if (warn_no_part)
|
||||
strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
|
||||
|
||||
printk(KERN_INFO "%s", state->pp_buf);
|
||||
if (res) {
|
||||
if (warn_no_part)
|
||||
strlcat(state->pp_buf,
|
||||
" unable to read partition table\n", PAGE_SIZE);
|
||||
printk(KERN_INFO "%s", state->pp_buf);
|
||||
}
|
||||
|
||||
free_page((unsigned long)state->pp_buf);
|
||||
free_partitions(state);
|
||||
|
@ -332,7 +332,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
|
||||
|
||||
ret = 0;
|
||||
if (hdr->iovec_count) {
|
||||
size_t iov_data_len;
|
||||
struct iov_iter i;
|
||||
struct iovec *iov = NULL;
|
||||
|
||||
ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
|
||||
@ -342,20 +342,11 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
|
||||
goto out_free_cdb;
|
||||
}
|
||||
|
||||
iov_data_len = ret;
|
||||
ret = 0;
|
||||
|
||||
/* SG_IO howto says that the shorter of the two wins */
|
||||
if (hdr->dxfer_len < iov_data_len) {
|
||||
hdr->iovec_count = iov_shorten(iov,
|
||||
hdr->iovec_count,
|
||||
hdr->dxfer_len);
|
||||
iov_data_len = hdr->dxfer_len;
|
||||
}
|
||||
iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count,
|
||||
min_t(unsigned, ret, hdr->dxfer_len));
|
||||
|
||||
ret = blk_rq_map_user_iov(q, rq, NULL, (struct sg_iovec *) iov,
|
||||
hdr->iovec_count,
|
||||
iov_data_len, GFP_KERNEL);
|
||||
ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
|
||||
kfree(iov);
|
||||
} else if (hdr->dxfer_len)
|
||||
ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
|
||||
|
@ -370,25 +370,25 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_XIP
|
||||
static int brd_direct_access(struct block_device *bdev, sector_t sector,
|
||||
void **kaddr, unsigned long *pfn)
|
||||
static long brd_direct_access(struct block_device *bdev, sector_t sector,
|
||||
void **kaddr, unsigned long *pfn, long size)
|
||||
{
|
||||
struct brd_device *brd = bdev->bd_disk->private_data;
|
||||
struct page *page;
|
||||
|
||||
if (!brd)
|
||||
return -ENODEV;
|
||||
if (sector & (PAGE_SECTORS-1))
|
||||
return -EINVAL;
|
||||
if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk))
|
||||
return -ERANGE;
|
||||
page = brd_insert_page(brd, sector);
|
||||
if (!page)
|
||||
return -ENOSPC;
|
||||
*kaddr = page_address(page);
|
||||
*pfn = page_to_pfn(page);
|
||||
|
||||
return 0;
|
||||
/*
|
||||
* TODO: If size > PAGE_SIZE, we could look to see if the next page in
|
||||
* the file happens to be mapped to the next page of physical RAM.
|
||||
*/
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1388,7 +1388,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
|
||||
list_add_tail(&peer_req->w.list, &device->active_ee);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
if (blkdev_issue_zeroout(device->ldev->backing_bdev,
|
||||
sector, data_size >> 9, GFP_NOIO))
|
||||
sector, data_size >> 9, GFP_NOIO, false))
|
||||
peer_req->flags |= EE_WAS_ERROR;
|
||||
drbd_endio_write_sec_final(peer_req);
|
||||
return 0;
|
||||
|
@ -423,7 +423,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
|
||||
}
|
||||
|
||||
/* switch queue to TCQ mode; allocate tag map */
|
||||
rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
|
||||
rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
|
||||
if (rc) {
|
||||
blk_cleanup_queue(q);
|
||||
put_disk(disk);
|
||||
|
@ -1722,6 +1722,7 @@ static int setup_clone(struct request *clone, struct request *rq,
|
||||
{
|
||||
int r;
|
||||
|
||||
blk_rq_init(NULL, clone);
|
||||
r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
|
||||
dm_rq_bio_constructor, tio);
|
||||
if (r)
|
||||
|
@ -28,8 +28,8 @@
|
||||
static int dcssblk_open(struct block_device *bdev, fmode_t mode);
|
||||
static void dcssblk_release(struct gendisk *disk, fmode_t mode);
|
||||
static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
|
||||
static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
|
||||
void **kaddr, unsigned long *pfn);
|
||||
static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
|
||||
void **kaddr, unsigned long *pfn, long size);
|
||||
|
||||
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
|
||||
|
||||
@ -877,25 +877,22 @@ fail:
|
||||
bio_io_error(bio);
|
||||
}
|
||||
|
||||
static int
|
||||
static long
|
||||
dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
|
||||
void **kaddr, unsigned long *pfn)
|
||||
void **kaddr, unsigned long *pfn, long size)
|
||||
{
|
||||
struct dcssblk_dev_info *dev_info;
|
||||
unsigned long pgoff;
|
||||
unsigned long offset, dev_sz;
|
||||
|
||||
dev_info = bdev->bd_disk->private_data;
|
||||
if (!dev_info)
|
||||
return -ENODEV;
|
||||
if (secnum % (PAGE_SIZE/512))
|
||||
return -EINVAL;
|
||||
pgoff = secnum / (PAGE_SIZE / 512);
|
||||
if ((pgoff+1)*PAGE_SIZE-1 > dev_info->end - dev_info->start)
|
||||
return -ERANGE;
|
||||
*kaddr = (void *) (dev_info->start+pgoff*PAGE_SIZE);
|
||||
dev_sz = dev_info->end - dev_info->start;
|
||||
offset = secnum * 512;
|
||||
*kaddr = (void *) (dev_info->start + offset);
|
||||
*pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
|
||||
|
||||
return 0;
|
||||
return dev_sz - offset;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2197,6 +2197,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
|
||||
shost->tag_set.cmd_size = cmd_size;
|
||||
shost->tag_set.numa_node = NUMA_NO_NODE;
|
||||
shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
||||
shost->tag_set.flags |=
|
||||
BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
|
||||
shost->tag_set.driver_data = shost;
|
||||
|
||||
return blk_mq_alloc_tag_set(&shost->tag_set);
|
||||
|
@ -277,7 +277,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
|
||||
if (!shost_use_blk_mq(sdev->host) &&
|
||||
(shost->bqt || shost->hostt->use_blk_tags)) {
|
||||
blk_queue_init_tags(sdev->request_queue,
|
||||
sdev->host->cmd_per_lun, shost->bqt);
|
||||
sdev->host->cmd_per_lun, shost->bqt,
|
||||
shost->hostt->tag_alloc_policy);
|
||||
}
|
||||
scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
|
||||
|
||||
|
@ -1719,22 +1719,19 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
|
||||
}
|
||||
|
||||
if (iov_count) {
|
||||
int len, size = sizeof(struct sg_iovec) * iov_count;
|
||||
int size = sizeof(struct iovec) * iov_count;
|
||||
struct iovec *iov;
|
||||
struct iov_iter i;
|
||||
|
||||
iov = memdup_user(hp->dxferp, size);
|
||||
if (IS_ERR(iov))
|
||||
return PTR_ERR(iov);
|
||||
|
||||
len = iov_length(iov, iov_count);
|
||||
if (hp->dxfer_len < len) {
|
||||
iov_count = iov_shorten(iov, iov_count, hp->dxfer_len);
|
||||
len = hp->dxfer_len;
|
||||
}
|
||||
iov_iter_init(&i, rw, iov, iov_count,
|
||||
min_t(size_t, hp->dxfer_len,
|
||||
iov_length(iov, iov_count)));
|
||||
|
||||
res = blk_rq_map_user_iov(q, rq, md, (struct sg_iovec *)iov,
|
||||
iov_count,
|
||||
len, GFP_ATOMIC);
|
||||
res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC);
|
||||
kfree(iov);
|
||||
} else
|
||||
res = blk_rq_map_user(q, rq, md, hp->dxferp,
|
||||
|
@ -421,6 +421,46 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_write_page);
|
||||
|
||||
/**
|
||||
* bdev_direct_access() - Get the address for directly-accessibly memory
|
||||
* @bdev: The device containing the memory
|
||||
* @sector: The offset within the device
|
||||
* @addr: Where to put the address of the memory
|
||||
* @pfn: The Page Frame Number for the memory
|
||||
* @size: The number of bytes requested
|
||||
*
|
||||
* If a block device is made up of directly addressable memory, this function
|
||||
* will tell the caller the PFN and the address of the memory. The address
|
||||
* may be directly dereferenced within the kernel without the need to call
|
||||
* ioremap(), kmap() or similar. The PFN is suitable for inserting into
|
||||
* page tables.
|
||||
*
|
||||
* Return: negative errno if an error occurs, otherwise the number of bytes
|
||||
* accessible at this address.
|
||||
*/
|
||||
long bdev_direct_access(struct block_device *bdev, sector_t sector,
|
||||
void **addr, unsigned long *pfn, long size)
|
||||
{
|
||||
long avail;
|
||||
const struct block_device_operations *ops = bdev->bd_disk->fops;
|
||||
|
||||
if (size < 0)
|
||||
return size;
|
||||
if (!ops->direct_access)
|
||||
return -EOPNOTSUPP;
|
||||
if ((sector + DIV_ROUND_UP(size, 512)) >
|
||||
part_nr_sects_read(bdev->bd_part))
|
||||
return -ERANGE;
|
||||
sector += get_start_sect(bdev);
|
||||
if (sector % (PAGE_SIZE / 512))
|
||||
return -EINVAL;
|
||||
avail = ops->direct_access(bdev, sector, addr, pfn, size);
|
||||
if (!avail)
|
||||
return -ERANGE;
|
||||
return min(avail, size);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_direct_access);
|
||||
|
||||
/*
|
||||
* pseudo-fs
|
||||
*/
|
||||
|
@ -13,18 +13,12 @@
|
||||
#include "ext2.h"
|
||||
#include "xip.h"
|
||||
|
||||
static inline int
|
||||
__inode_direct_access(struct inode *inode, sector_t block,
|
||||
void **kaddr, unsigned long *pfn)
|
||||
static inline long __inode_direct_access(struct inode *inode, sector_t block,
|
||||
void **kaddr, unsigned long *pfn, long size)
|
||||
{
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
const struct block_device_operations *ops = bdev->bd_disk->fops;
|
||||
sector_t sector;
|
||||
|
||||
sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
|
||||
|
||||
BUG_ON(!ops->direct_access);
|
||||
return ops->direct_access(bdev, sector, kaddr, pfn);
|
||||
sector_t sector = block * (PAGE_SIZE / 512);
|
||||
return bdev_direct_access(bdev, sector, kaddr, pfn, size);
|
||||
}
|
||||
|
||||
static inline int
|
||||
@ -53,12 +47,13 @@ ext2_clear_xip_target(struct inode *inode, sector_t block)
|
||||
{
|
||||
void *kaddr;
|
||||
unsigned long pfn;
|
||||
int rc;
|
||||
long size;
|
||||
|
||||
rc = __inode_direct_access(inode, block, &kaddr, &pfn);
|
||||
if (!rc)
|
||||
clear_page(kaddr);
|
||||
return rc;
|
||||
size = __inode_direct_access(inode, block, &kaddr, &pfn, PAGE_SIZE);
|
||||
if (size < 0)
|
||||
return size;
|
||||
clear_page(kaddr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext2_xip_verify_sb(struct super_block *sb)
|
||||
@ -77,7 +72,7 @@ void ext2_xip_verify_sb(struct super_block *sb)
|
||||
int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
|
||||
void **kmem, unsigned long *pfn)
|
||||
{
|
||||
int rc;
|
||||
long rc;
|
||||
sector_t block;
|
||||
|
||||
/* first, retrieve the sector number */
|
||||
@ -86,6 +81,6 @@ int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
|
||||
return rc;
|
||||
|
||||
/* retrieve address of the target data */
|
||||
rc = __inode_direct_access(mapping->host, block, kmem, pfn);
|
||||
return rc;
|
||||
rc = __inode_direct_access(mapping->host, block, kmem, pfn, PAGE_SIZE);
|
||||
return (rc < 0) ? rc : 0;
|
||||
}
|
||||
|
@ -428,13 +428,9 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
|
||||
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
|
||||
unsigned int, unsigned int);
|
||||
extern int bio_get_nr_vecs(struct block_device *);
|
||||
extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
|
||||
unsigned long, unsigned int, int, gfp_t);
|
||||
struct sg_iovec;
|
||||
struct rq_map_data;
|
||||
extern struct bio *bio_map_user_iov(struct request_queue *,
|
||||
struct block_device *,
|
||||
const struct sg_iovec *, int, int, gfp_t);
|
||||
const struct iov_iter *, gfp_t);
|
||||
extern void bio_unmap_user(struct bio *);
|
||||
extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
|
||||
gfp_t);
|
||||
@ -462,12 +458,10 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
|
||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||
extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
|
||||
|
||||
extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
|
||||
unsigned long, unsigned int, int, gfp_t);
|
||||
extern struct bio *bio_copy_user_iov(struct request_queue *,
|
||||
struct rq_map_data *,
|
||||
const struct sg_iovec *,
|
||||
int, int, gfp_t);
|
||||
const struct iov_iter *,
|
||||
gfp_t);
|
||||
extern int bio_uncopy_user(struct bio *);
|
||||
void zero_fill_bio(struct bio *bio);
|
||||
extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
|
||||
|
@ -146,6 +146,8 @@ enum {
|
||||
BLK_MQ_F_SG_MERGE = 1 << 2,
|
||||
BLK_MQ_F_SYSFS_UP = 1 << 3,
|
||||
BLK_MQ_F_DEFER_ISSUE = 1 << 4,
|
||||
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
|
||||
BLK_MQ_F_ALLOC_POLICY_BITS = 1,
|
||||
|
||||
BLK_MQ_S_STOPPED = 0,
|
||||
BLK_MQ_S_TAG_ACTIVE = 1,
|
||||
@ -154,6 +156,12 @@ enum {
|
||||
|
||||
BLK_MQ_CPU_WORK_BATCH = 8,
|
||||
};
|
||||
#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
|
||||
((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
|
||||
((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
|
||||
#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
|
||||
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
|
||||
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
|
||||
|
||||
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
|
||||
void blk_mq_finish_init(struct request_queue *q);
|
||||
@ -166,7 +174,6 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
|
||||
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
|
||||
|
||||
void blk_mq_insert_request(struct request *, bool, bool, bool);
|
||||
void blk_mq_run_queues(struct request_queue *q, bool async);
|
||||
void blk_mq_free_request(struct request *rq);
|
||||
void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
|
||||
@ -214,6 +221,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
|
||||
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
|
||||
void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
|
||||
void *priv);
|
||||
void blk_mq_freeze_queue(struct request_queue *q);
|
||||
void blk_mq_unfreeze_queue(struct request_queue *q);
|
||||
void blk_mq_freeze_queue_start(struct request_queue *q);
|
||||
|
||||
|
@ -272,7 +272,11 @@ struct blk_queue_tag {
|
||||
int max_depth; /* what we will send to device */
|
||||
int real_max_depth; /* what the array can hold */
|
||||
atomic_t refcnt; /* map can be shared */
|
||||
int alloc_policy; /* tag allocation policy */
|
||||
int next_tag; /* next tag */
|
||||
};
|
||||
#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
|
||||
#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
|
||||
|
||||
#define BLK_SCSI_MAX_CMDS (256)
|
||||
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
|
||||
@ -516,6 +520,7 @@ struct request_queue {
|
||||
(1 << QUEUE_FLAG_ADD_RANDOM))
|
||||
|
||||
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
|
||||
(1 << QUEUE_FLAG_STACKABLE) | \
|
||||
(1 << QUEUE_FLAG_SAME_COMP))
|
||||
|
||||
static inline void queue_lockdep_assert_held(struct request_queue *q)
|
||||
@ -850,8 +855,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *,
|
||||
extern int blk_rq_unmap_user(struct bio *);
|
||||
extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
|
||||
extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
|
||||
struct rq_map_data *, const struct sg_iovec *,
|
||||
int, unsigned int, gfp_t);
|
||||
struct rq_map_data *, const struct iov_iter *,
|
||||
gfp_t);
|
||||
extern int blk_execute_rq(struct request_queue *, struct gendisk *,
|
||||
struct request *, int);
|
||||
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
|
||||
@ -1044,8 +1049,6 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
|
||||
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
||||
|
||||
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
|
||||
extern int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
|
||||
struct scatterlist *sglist);
|
||||
extern void blk_dump_rq_flags(struct request *, char *);
|
||||
extern long nr_blockdev_pages(void);
|
||||
|
||||
@ -1139,11 +1142,11 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
|
||||
extern int blk_queue_start_tag(struct request_queue *, struct request *);
|
||||
extern struct request *blk_queue_find_tag(struct request_queue *, int);
|
||||
extern void blk_queue_end_tag(struct request_queue *, struct request *);
|
||||
extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
|
||||
extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
|
||||
extern void blk_queue_free_tags(struct request_queue *);
|
||||
extern int blk_queue_resize_tags(struct request_queue *, int);
|
||||
extern void blk_queue_invalidate_tags(struct request_queue *);
|
||||
extern struct blk_queue_tag *blk_init_tags(int);
|
||||
extern struct blk_queue_tag *blk_init_tags(int, int);
|
||||
extern void blk_free_tags(struct blk_queue_tag *);
|
||||
|
||||
static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
|
||||
@ -1162,7 +1165,7 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
|
||||
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask);
|
||||
sector_t nr_sects, gfp_t gfp_mask, bool discard);
|
||||
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
|
||||
sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
|
||||
{
|
||||
@ -1176,7 +1179,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
|
||||
return blkdev_issue_zeroout(sb->s_bdev,
|
||||
block << (sb->s_blocksize_bits - 9),
|
||||
nr_blocks << (sb->s_blocksize_bits - 9),
|
||||
gfp_mask);
|
||||
gfp_mask, true);
|
||||
}
|
||||
|
||||
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
|
||||
@ -1601,8 +1604,8 @@ struct block_device_operations {
|
||||
int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
|
||||
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
|
||||
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
|
||||
int (*direct_access) (struct block_device *, sector_t,
|
||||
void **, unsigned long *);
|
||||
long (*direct_access)(struct block_device *, sector_t,
|
||||
void **, unsigned long *pfn, long size);
|
||||
unsigned int (*check_events) (struct gendisk *disk,
|
||||
unsigned int clearing);
|
||||
/* ->media_changed() is DEPRECATED, use ->check_events() instead */
|
||||
@ -1620,6 +1623,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
|
||||
extern int bdev_read_page(struct block_device *, sector_t, struct page *);
|
||||
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
|
||||
struct writeback_control *);
|
||||
extern long bdev_direct_access(struct block_device *, sector_t, void **addr,
|
||||
unsigned long *pfn, long size);
|
||||
#else /* CONFIG_BLOCK */
|
||||
|
||||
struct block_device;
|
||||
|
@ -402,6 +402,9 @@ struct scsi_host_template {
|
||||
*/
|
||||
unsigned char present;
|
||||
|
||||
/* If use block layer to manage tags, this is tag allocation policy */
|
||||
int tag_alloc_policy;
|
||||
|
||||
/*
|
||||
* Let the block layer assigns tags to all commands.
|
||||
*/
|
||||
|
@ -66,7 +66,8 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
|
||||
* devices on the shared host (for libata)
|
||||
*/
|
||||
if (!shost->bqt) {
|
||||
shost->bqt = blk_init_tags(depth);
|
||||
shost->bqt = blk_init_tags(depth,
|
||||
shost->hostt->tag_alloc_policy);
|
||||
if (!shost->bqt)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user